chore: cherry pick patches to release/v0.17.0 branch (#7024 )

* fix: print the output message of the error in admin fn macro (#6994) Signed-off-by: evenyag <realevenyag@gmail.com> Signed-off-by: discord9 <discord9@163.com> * fix: make EXPIRE (keyword) parsing case-insensitive, when creating flow (#6997) fix: make EXPIRE keyword case-insensitive in CREATE FLOW parser Signed-off-by: Shyamnatesan <shyamnatesan21@gmail.com> Signed-off-by: discord9 <discord9@163.com> * fix: promql range function has incorrect timestamps (#7006) * fix: promql range function has incorrect timestamps Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * simplify Signed-off-by: Ruihang Xia <waynestxia@gmail.com> --------- Signed-off-by: Ruihang Xia <waynestxia@gmail.com> Signed-off-by: discord9 <discord9@163.com> * fix: incorrect timestamp resolution in information_schema.partitions table (#7004) * fix: incorrect timestamp resolution in information_schema.partitions table Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * use second for all fields in partitions table Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * update sqlness result Signed-off-by: Ruihang Xia <waynestxia@gmail.com> --------- Signed-off-by: Ruihang Xia <waynestxia@gmail.com> Signed-off-by: discord9 <discord9@163.com> * fix: match promql column reference in case sensitive way (#7013) Signed-off-by: Ruihang Xia <waynestxia@gmail.com> Signed-off-by: discord9 <discord9@163.com> * fix: group by expr not as column in step aggr (#7008) * fix: group by expr not as column Signed-off-by: discord9 <discord9@163.com> * test: dist analyzer date_bin Signed-off-by: discord9 <discord9@163.com> * ???fix wip Signed-off-by: discord9 <discord9@163.com> * fix: deduce using correct input fields Signed-off-by: discord9 <discord9@163.com> * refactor: clearer wrapper Signed-off-by: discord9 <discord9@163.com> * chore: update sqlness Signed-off-by: discord9 <discord9@163.com> * chore: per review Signed-off-by: discord9 <discord9@163.com> * chore: per review Signed-off-by: discord9 <discord9@163.com> * chore: rm todo Signed-off-by: discord9 <discord9@163.com> --------- Signed-off-by: discord9 <discord9@163.com> * fix: skip placeholder when partition columns (#7020) Signed-off-by: discord9 <discord9@163.com> * chore: add function for getting started on metasrv (#7022) Signed-off-by: shuiyisong <xixing.sys@gmail.com> Signed-off-by: discord9 <discord9@163.com> * fix: not step when aggr have order by/filter (#7015) * fix: not applied Signed-off-by: discord9 <discord9@163.com> * chore: per review Signed-off-by: discord9 <discord9@163.com> * test: confirm order by not push down Signed-off-by: discord9 <discord9@163.com> --------- Signed-off-by: discord9 <discord9@163.com> * feat: supports expression in TQL params (#7014) * feat: supports expression in TQL params Signed-off-by: Dennis Zhuang <killme2008@gmail.com> * chore: by cr comments Signed-off-by: Dennis Zhuang <killme2008@gmail.com> * chore: comment Signed-off-by: Dennis Zhuang <killme2008@gmail.com> * chore: by cr comments Signed-off-by: Dennis Zhuang <killme2008@gmail.com> --------- Signed-off-by: Dennis Zhuang <killme2008@gmail.com> Signed-off-by: discord9 <discord9@163.com> * feat: update dashboard to v0.11.6 (#7026) Co-authored-by: ZonaHex <ZonaHex@users.noreply.github.com> Signed-off-by: discord9 <discord9@163.com> * fix: step aggr merge phase not order nor filter (#6998) * fix: not order Signed-off-by: discord9 <discord9@163.com> * test: redacted Signed-off-by: discord9 <discord9@163.com> * feat: fix up state wrapper Signed-off-by: discord9 <discord9@163.com> * df last_value state not as promised! Signed-off-by: discord9 <discord9@163.com> * fix?: could fix better Signed-off-by: discord9 <discord9@163.com> * test: unstable result Signed-off-by: discord9 <discord9@163.com> * fix: work around by fixing state Signed-off-by: discord9 <discord9@163.com> * chore: after rebase fix Signed-off-by: discord9 <discord9@163.com> * chore: finish some todo Signed-off-by: discord9 <discord9@163.com> * chore: per copilot Signed-off-by: discord9 <discord9@163.com> * refactor: not fix but just notify mismatch Signed-off-by: discord9 <discord9@163.com> * chore: warn -> debug state mismatch Signed-off-by: discord9 <discord9@163.com> * chore: refine error msg Signed-off-by: discord9 <discord9@163.com> * test: sqlness add last_value date_bin test Signed-off-by: discord9 <discord9@163.com> * ?: substrait order by decode failure Signed-off-by: discord9 <discord9@163.com> * unit test reproduce that Signed-off-by: discord9 <discord9@163.com> * feat: support state wrapper's order serde in substrait Signed-off-by: discord9 <discord9@163.com> * refactor: stuff Signed-off-by: discord9 <discord9@163.com> * test: standalone/distributed different exec Signed-off-by: discord9 <discord9@163.com> * fmt Signed-off-by: discord9 <discord9@163.com> * chore: per review Signed-off-by: discord9 <discord9@163.com> * refactor: closure Signed-off-by: discord9 <discord9@163.com> * test: first value order by Signed-off-by: discord9 <discord9@163.com> * refactor: per cr Signed-off-by: discord9 <discord9@163.com> * feat: ScanHint last_value last row selector Signed-off-by: discord9 <discord9@163.com> * docs: per cr Signed-off-by: discord9 <discord9@163.com> --------- Signed-off-by: discord9 <discord9@163.com> * chore: bump version to 0.17.2 Signed-off-by: WenyXu <wenymedia@gmail.com> Signed-off-by: discord9 <discord9@163.com> * chore: not warning Signed-off-by: discord9 <discord9@163.com> --------- Signed-off-by: evenyag <realevenyag@gmail.com> Signed-off-by: discord9 <discord9@163.com> Signed-off-by: Shyamnatesan <shyamnatesan21@gmail.com> Signed-off-by: Ruihang Xia <waynestxia@gmail.com> Signed-off-by: shuiyisong <xixing.sys@gmail.com> Signed-off-by: Dennis Zhuang <killme2008@gmail.com> Signed-off-by: WenyXu <wenymedia@gmail.com> Co-authored-by: Yingwen <realevenyag@gmail.com> Co-authored-by: shyam <43544082+Shyamnatesan@users.noreply.github.com> Co-authored-by: Ruihang Xia <waynestxia@gmail.com> Co-authored-by: discord9 <55937128+discord9@users.noreply.github.com> Co-authored-by: shuiyisong <113876041+shuiyisong@users.noreply.github.com> Co-authored-by: dennis zhuang <killme2008@gmail.com> Co-authored-by: ZonaHe <zonahe@qq.com> Co-authored-by: ZonaHex <ZonaHex@users.noreply.github.com> Co-authored-by: discord9 <discord9@163.com>
chore: bump version to 0.17.1
2025-12-22 22:20:02 +00:00 · 2025-09-28 16:21:06 +08:00 · 2025-09-17 16:42:28 +08:00 · 2025-09-17 16:42:28 +08:00 · 2025-09-17 16:42:28 +08:00 · 2025-09-17 16:42:28 +08:00
765 changed files with 50541 additions and 17372 deletions
--- a/.github/actions/setup-etcd-cluster/action.yml
+++ b/.github/actions/setup-etcd-cluster/action.yml
@@ -12,7 +12,7 @@ runs:
  steps:
  - name: Install Etcd cluster
    shell: bash
-    run: | 
+    run: |
      helm upgrade \
        --install etcd oci://registry-1.docker.io/bitnamicharts/etcd \
        --set replicaCount=${{ inputs.etcd-replicas }} \
@@ -24,4 +24,9 @@ runs:
        --set auth.rbac.token.enabled=false \
        --set persistence.size=2Gi \
        --create-namespace \
+        --set global.security.allowInsecureImages=true \
+        --set image.registry=docker.io \
+        --set image.repository=greptime/etcd \
+        --set image.tag=3.6.1-debian-12-r3 \
+        --version 12.0.8 \
        -n ${{ inputs.namespace }}
--- a/.github/actions/setup-greptimedb-cluster/with-remote-wal.yaml
+++ b/.github/actions/setup-greptimedb-cluster/with-remote-wal.yaml
@@ -1,3 +1,8 @@
+logging:
+  level: "info"
+  format: "json"
+  filters:
+  - log_store=debug
 meta:
  configData: |-
    [runtime]
--- a/.github/actions/setup-kafka-cluster/action.yml
+++ b/.github/actions/setup-kafka-cluster/action.yml
@@ -12,7 +12,7 @@ runs:
  steps:
  - name: Install Kafka cluster
    shell: bash
-    run: | 
+    run: |
      helm upgrade \
        --install kafka oci://registry-1.docker.io/bitnamicharts/kafka \
        --set controller.replicaCount=${{ inputs.controller-replicas }} \
@@ -23,4 +23,8 @@ runs:
        --set listeners.controller.protocol=PLAINTEXT \
        --set listeners.client.protocol=PLAINTEXT \
        --create-namespace \
+        --set image.registry=docker.io \
+        --set image.repository=greptime/kafka \
+        --set image.tag=3.9.0-debian-12-r1 \
+        --version 31.0.0 \
        -n ${{ inputs.namespace }}
--- a/.github/actions/setup-postgres-cluster/action.yml
+++ b/.github/actions/setup-postgres-cluster/action.yml
@@ -6,9 +6,7 @@ inputs:
    description: "Number of PostgreSQL replicas"
  namespace:
    default: "postgres-namespace"
-  postgres-version:
-    default: "14.2"
-    description: "PostgreSQL version"
+    description: "The PostgreSQL namespace"
  storage-size:
    default: "1Gi"
    description: "Storage size for PostgreSQL"
@@ -22,7 +20,11 @@ runs:
      helm upgrade \
        --install postgresql oci://registry-1.docker.io/bitnamicharts/postgresql \
        --set replicaCount=${{ inputs.postgres-replicas }} \
-        --set image.tag=${{ inputs.postgres-version }} \
+        --set global.security.allowInsecureImages=true \
+        --set image.registry=docker.io \
+        --set image.repository=greptime/postgresql \
+        --set image.tag=17.5.0-debian-12-r3 \
+        --version 16.7.4 \
        --set persistence.size=${{ inputs.storage-size }} \
        --set postgresql.username=greptimedb \
        --set postgresql.password=admin \
--- a/.github/scripts/check-version.sh
+++ b/.github/scripts/check-version.sh
@@ -35,8 +35,8 @@ HIGHER_VERSION=$(printf "%s\n%s" "$CLEAN_CURRENT" "$CLEAN_LATEST" | sort -V | ta

 if [ "$HIGHER_VERSION" = "$CLEAN_CURRENT" ]; then
  echo "Current version ($CLEAN_CURRENT) is NEWER than or EQUAL to latest ($CLEAN_LATEST)"
-  echo "should-push-latest-tag=true" >> $GITHUB_OUTPUT
+  echo "is-current-version-latest=true" >> $GITHUB_OUTPUT
 else
  echo "Current version ($CLEAN_CURRENT) is OLDER than latest ($CLEAN_LATEST)"
-  echo "should-push-latest-tag=false" >> $GITHUB_OUTPUT
+  echo "is-current-version-latest=false" >> $GITHUB_OUTPUT
 fi
--- a/.github/scripts/deploy-greptimedb.sh
+++ b/.github/scripts/deploy-greptimedb.sh
@@ -3,12 +3,14 @@
 set -e
 set -o pipefail

-KUBERNETES_VERSION="${KUBERNETES_VERSION:-v1.24.0}"
+KUBERNETES_VERSION="${KUBERNETES_VERSION:-v1.32.0}"
 ENABLE_STANDALONE_MODE="${ENABLE_STANDALONE_MODE:-true}"
 DEFAULT_INSTALL_NAMESPACE=${DEFAULT_INSTALL_NAMESPACE:-default}
 GREPTIMEDB_IMAGE_TAG=${GREPTIMEDB_IMAGE_TAG:-latest}
-ETCD_CHART="oci://registry-1.docker.io/bitnamicharts/etcd"
 GREPTIME_CHART="https://greptimeteam.github.io/helm-charts/"
+ETCD_CHART="oci://registry-1.docker.io/bitnamicharts/etcd"
+ETCD_CHART_VERSION="${ETCD_CHART_VERSION:-12.0.8}"
+ETCD_IMAGE_TAG="${ETCD_IMAGE_TAG:-3.6.1-debian-12-r3}"

 # Create a cluster with 1 control-plane node and 5 workers.
 function create_kind_cluster() {
@@ -35,10 +37,16 @@ function add_greptime_chart() {
 function deploy_etcd_cluster() {
  local namespace="$1"

-  helm install etcd "$ETCD_CHART" \
+  helm upgrade --install etcd "$ETCD_CHART" \
+    --version "$ETCD_CHART_VERSION" \
+    --create-namespace \
    --set replicaCount=3 \
    --set auth.rbac.create=false \
    --set auth.rbac.token.enabled=false \
+    --set global.security.allowInsecureImages=true \
+    --set image.registry=docker.io \
+    --set image.repository=greptime/etcd \
+    --set image.tag="$ETCD_IMAGE_TAG" \
    -n "$namespace"

  # Wait for etcd cluster to be ready.
@@ -48,7 +56,8 @@ function deploy_etcd_cluster() {
 # Deploy greptimedb-operator.
 function deploy_greptimedb_operator() {
  # Use the latest chart and image.
-  helm install greptimedb-operator greptime/greptimedb-operator \
+  helm upgrade --install greptimedb-operator greptime/greptimedb-operator \
+    --create-namespace \
    --set image.tag=latest \
    -n "$DEFAULT_INSTALL_NAMESPACE"

@@ -66,9 +75,11 @@ function deploy_greptimedb_cluster() {

  deploy_etcd_cluster "$install_namespace"

-  helm install "$cluster_name" greptime/greptimedb-cluster \
+  helm upgrade --install "$cluster_name" greptime/greptimedb-cluster \
+    --create-namespace \
    --set image.tag="$GREPTIMEDB_IMAGE_TAG" \
    --set meta.backendStorage.etcd.endpoints="etcd.$install_namespace:2379" \
+    --set meta.backendStorage.etcd.storeKeyPrefix="$cluster_name" \
    -n "$install_namespace"

  # Wait for greptimedb cluster to be ready.
@@ -101,15 +112,17 @@ function deploy_greptimedb_cluster_with_s3_storage() {

  deploy_etcd_cluster "$install_namespace"

-  helm install "$cluster_name" greptime/greptimedb-cluster -n "$install_namespace" \
+  helm upgrade --install "$cluster_name" greptime/greptimedb-cluster -n "$install_namespace" \
+    --create-namespace \
    --set image.tag="$GREPTIMEDB_IMAGE_TAG" \
    --set meta.backendStorage.etcd.endpoints="etcd.$install_namespace:2379" \
-    --set storage.s3.bucket="$AWS_CI_TEST_BUCKET" \
-    --set storage.s3.region="$AWS_REGION" \
-    --set storage.s3.root="$DATA_ROOT" \
-    --set storage.credentials.secretName=s3-credentials \
-    --set storage.credentials.accessKeyId="$AWS_ACCESS_KEY_ID" \
-    --set storage.credentials.secretAccessKey="$AWS_SECRET_ACCESS_KEY"
+    --set meta.backendStorage.etcd.storeKeyPrefix="$cluster_name" \
+    --set objectStorage.s3.bucket="$AWS_CI_TEST_BUCKET" \
+    --set objectStorage.s3.region="$AWS_REGION" \
+    --set objectStorage.s3.root="$DATA_ROOT" \
+    --set objectStorage.credentials.secretName=s3-credentials \
+    --set objectStorage.credentials.accessKeyId="$AWS_ACCESS_KEY_ID" \
+    --set objectStorage.credentials.secretAccessKey="$AWS_SECRET_ACCESS_KEY"

  # Wait for greptimedb cluster to be ready.
  while true; do
@@ -134,7 +147,8 @@ function deploy_greptimedb_cluster_with_s3_storage() {
 # Deploy standalone greptimedb.
 # It will expose cluster service ports as '34000', '34001', '34002', '34003' to local access.
 function deploy_standalone_greptimedb() {
-  helm install greptimedb-standalone greptime/greptimedb-standalone \
+  helm upgrade --install greptimedb-standalone greptime/greptimedb-standalone \
+    --create-namespace \
    --set image.tag="$GREPTIMEDB_IMAGE_TAG" \
    -n "$DEFAULT_INSTALL_NAMESPACE"

--- a/.github/scripts/pull-test-deps-images.sh
+++ b/.github/scripts/pull-test-deps-images.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+# This script is used to pull the test dependency images that are stored in public ECR one by one to avoid rate limiting.
+
+set -e
+
+MAX_RETRIES=3
+
+IMAGES=(
+  "greptime/zookeeper:3.7"
+  "greptime/kafka:3.9.0-debian-12-r1"
+  "greptime/etcd:3.6.1-debian-12-r3"
+  "greptime/minio:2024"
+  "greptime/mysql:5.7"
+)
+
+for image in "${IMAGES[@]}"; do
+  for ((attempt=1; attempt<=MAX_RETRIES; attempt++)); do
+    if docker pull "$image"; then
+      # Successfully pulled the image.
+      break
+    else
+      # Use some simple exponential backoff to avoid rate limiting.
+      if [ $attempt -lt $MAX_RETRIES ]; then
+        sleep_seconds=$((attempt * 5))
+        echo "Attempt $attempt failed for $image, waiting $sleep_seconds seconds"
+        sleep $sleep_seconds  # 5s, 10s delays
+      else
+        echo "Failed to pull $image after $MAX_RETRIES attempts"
+        exit 1
+      fi
+    fi
+  done
+done
--- a/.github/scripts/update-dev-builder-version.sh
+++ b/.github/scripts/update-dev-builder-version.sh
@@ -21,7 +21,7 @@ update_dev_builder_version() {

  # Commit the changes.
  git add Makefile
-  git commit -m "ci: update dev-builder image tag"
+  git commit -s -m "ci: update dev-builder image tag"
  git push origin $BRANCH_NAME

  # Create a Pull Request.
--- a/.github/workflows/develop.yml
+++ b/.github/workflows/develop.yml
@@ -12,6 +12,7 @@ on:
      - 'docker/**'
      - '.gitignore'
      - 'grafana/**'
+      - 'Makefile'
  workflow_dispatch:

 name: CI
@@ -617,10 +618,12 @@ jobs:
      - uses: actions/checkout@v4
        with:
          persist-credentials: false
+
      - if: matrix.mode.kafka
        name: Setup kafka server
        working-directory: tests-integration/fixtures
-        run: docker compose up -d --wait kafka
+        run:  ../../.github/scripts/pull-test-deps-images.sh && docker compose up -d --wait kafka
+
      - name: Download pre-built binaries
        uses: actions/download-artifact@v4
        with:
@@ -682,6 +685,30 @@ jobs:
      - name: Run cargo clippy
        run: make clippy

+  check-udeps:
+    if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
+    name: Check Unused Dependencies
+    runs-on: ubuntu-latest
+    timeout-minutes: 60
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
+      - uses: arduino/setup-protoc@v3
+        with:
+          repo-token: ${{ secrets.GITHUB_TOKEN }}
+      - uses: actions-rust-lang/setup-rust-toolchain@v1
+      - name: Rust Cache
+        uses: Swatinem/rust-cache@v2
+        with:
+          shared-key: "check-udeps"
+          cache-all-crates: "true"
+          save-if: ${{ github.ref == 'refs/heads/main' }}
+      - name: Install cargo-udeps
+        run: cargo install cargo-udeps --locked
+      - name: Check unused dependencies
+        run: make check-udeps
+
  conflict-check:
    if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
    name: Check for conflict
@@ -697,7 +724,7 @@ jobs:
    if: ${{ github.repository == 'GreptimeTeam/greptimedb' && github.event_name != 'merge_group' }}
    runs-on: ubuntu-22.04-arm
    timeout-minutes: 60
-    needs:  [conflict-check, clippy, fmt]
+    needs: [conflict-check, clippy, fmt, check-udeps]
    steps:
      - uses: actions/checkout@v4
        with:
@@ -709,7 +736,7 @@ jobs:
      - name: Install toolchain
        uses: actions-rust-lang/setup-rust-toolchain@v1
        with:
-            cache: false
+          cache: false
      - name: Rust Cache
        uses: Swatinem/rust-cache@v2
        with:
@@ -719,9 +746,11 @@ jobs:
          save-if: ${{ github.ref == 'refs/heads/main' }}
      - name: Install latest nextest release
        uses: taiki-e/install-action@nextest
+
      - name: Setup external services
        working-directory: tests-integration/fixtures
-        run: docker compose up -d --wait
+        run: ../../.github/scripts/pull-test-deps-images.sh && docker compose up -d --wait
+
      - name: Run nextest cases
        run: cargo nextest run --workspace -F dashboard -F pg_kvbackend -F mysql_kvbackend
        env:
@@ -738,8 +767,11 @@ jobs:
          GT_MINIO_ACCESS_KEY: superpower_password
          GT_MINIO_REGION: us-west-2
          GT_MINIO_ENDPOINT_URL: http://127.0.0.1:9000
+          GT_ETCD_TLS_ENDPOINTS: https://127.0.0.1:2378
          GT_ETCD_ENDPOINTS: http://127.0.0.1:2379
          GT_POSTGRES_ENDPOINTS: postgres://greptimedb:admin@127.0.0.1:5432/postgres
+          GT_POSTGRES15_ENDPOINTS: postgres://test_user:test_password@127.0.0.1:5433/postgres
+          GT_POSTGRES15_SCHEMA: test_schema
          GT_MYSQL_ENDPOINTS: mysql://greptimedb:admin@127.0.0.1:3306/mysql
          GT_KAFKA_ENDPOINTS: 127.0.0.1:9092
          GT_KAFKA_SASL_ENDPOINTS: 127.0.0.1:9093
@@ -772,9 +804,11 @@ jobs:
        uses: taiki-e/install-action@nextest
      - name: Install cargo-llvm-cov
        uses: taiki-e/install-action@cargo-llvm-cov
+
      - name: Setup external services
        working-directory: tests-integration/fixtures
-        run: docker compose up -d --wait
+        run: ../../.github/scripts/pull-test-deps-images.sh && docker compose up -d --wait
+        
      - name: Run nextest cases
        run: cargo llvm-cov nextest --workspace --lcov --output-path lcov.info -F dashboard -F pg_kvbackend -F mysql_kvbackend
        env:
@@ -790,8 +824,11 @@ jobs:
          GT_MINIO_ACCESS_KEY: superpower_password
          GT_MINIO_REGION: us-west-2
          GT_MINIO_ENDPOINT_URL: http://127.0.0.1:9000
+          GT_ETCD_TLS_ENDPOINTS: https://127.0.0.1:2378
          GT_ETCD_ENDPOINTS: http://127.0.0.1:2379
          GT_POSTGRES_ENDPOINTS: postgres://greptimedb:admin@127.0.0.1:5432/postgres
+          GT_POSTGRES15_ENDPOINTS: postgres://test_user:test_password@127.0.0.1:5433/postgres
+          GT_POSTGRES15_SCHEMA: test_schema
          GT_MYSQL_ENDPOINTS: mysql://greptimedb:admin@127.0.0.1:3306/mysql
          GT_KAFKA_ENDPOINTS: 127.0.0.1:9092
          GT_KAFKA_SASL_ENDPOINTS: 127.0.0.1:9093
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -10,6 +10,7 @@ on:
      - 'docker/**'
      - '.gitignore'
      - 'grafana/**'
+      - 'Makefile'
  push:
    branches:
      - main
@@ -21,6 +22,7 @@ on:
      - 'docker/**'
      - '.gitignore'
      - 'grafana/**'
+      - 'Makefile'
  workflow_dispatch:

 name: CI
@@ -65,6 +67,12 @@ jobs:
    steps:
      - run: 'echo "No action required"'

+  check-udeps:
+    name: Unused Dependencies
+    runs-on: ubuntu-latest
+    steps:
+      - run: 'echo "No action required"'
+
  coverage:
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -111,7 +111,8 @@ jobs:
      # The 'version' use as the global tag name of the release workflow.
      version: ${{ steps.create-version.outputs.version }}

-      should-push-latest-tag: ${{ steps.check-version.outputs.should-push-latest-tag }}
+      # The 'is-current-version-latest' determines whether to update 'latest' Docker tags and downstream repositories.
+      is-current-version-latest: ${{ steps.check-version.outputs.is-current-version-latest }}
    steps:
      - name: Checkout
        uses: actions/checkout@v4
@@ -321,7 +322,7 @@ jobs:
          image-registry-username: ${{ secrets.DOCKERHUB_USERNAME }}
          image-registry-password: ${{ secrets.DOCKERHUB_TOKEN }}
          version: ${{ needs.allocate-runners.outputs.version }}
-          push-latest-tag: ${{ needs.allocate-runners.outputs.should-push-latest-tag == 'true' && github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' }}
+          push-latest-tag: ${{ needs.allocate-runners.outputs.is-current-version-latest == 'true' && github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' }}

      - name: Set build image result
        id: set-build-image-result
@@ -368,7 +369,7 @@ jobs:
          dev-mode: false
          upload-to-s3: true
          update-version-info: true
-          push-latest-tag: ${{ needs.allocate-runners.outputs.should-push-latest-tag == 'true' && github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' }}
+          push-latest-tag: ${{ needs.allocate-runners.outputs.is-current-version-latest == 'true' && github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' }}

  publish-github-release:
    name: Create GitHub release and upload artifacts
@@ -476,7 +477,7 @@ jobs:

  bump-helm-charts-version:
    name: Bump helm charts version
-    if: ${{ github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' }}
+    if: ${{ github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' && needs.allocate-runners.outputs.is-current-version-latest == 'true' }}
    needs: [allocate-runners, publish-github-release]
    runs-on: ubuntu-latest
    permissions:
@@ -497,7 +498,7 @@ jobs:

  bump-homebrew-greptime-version:
    name: Bump homebrew greptime version
-    if: ${{ github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' }}
+    if: ${{ github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' && needs.allocate-runners.outputs.is-current-version-latest == 'true' }}
    needs: [allocate-runners, publish-github-release]
    runs-on: ubuntu-latest
    permissions:
--- a/.github/workflows/semantic-pull-request.yml
+++ b/.github/workflows/semantic-pull-request.yml
@@ -1,7 +1,7 @@
 name: "Semantic Pull Request"

 on:
-  pull_request:
+  pull_request_target:
    types:
      - opened
      - reopened
@@ -12,9 +12,9 @@ concurrency:
  cancel-in-progress: true

 permissions:
-  issues: write
-  contents: write
+  contents: read
  pull-requests: write
+  issues: write

 jobs:
  check:
--- a/.gitignore
+++ b/.gitignore
@@ -52,6 +52,9 @@ venv/
 tests-fuzz/artifacts/
 tests-fuzz/corpus/

+# cargo-udeps reports
+udeps-report.json
+
 # Nix
 .direnv
 .envrc
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -55,14 +55,18 @@ GreptimeDB uses the [Apache 2.0 license](https://github.com/GreptimeTeam/greptim
 - To ensure that community is free and confident in its ability to use your contributions, please sign the Contributor License Agreement (CLA) which will be incorporated in the pull request process.
 - Make sure all files have proper license header (running `docker run --rm -v $(pwd):/github/workspace ghcr.io/korandoru/hawkeye-native:v3 format` from the project root).
 - Make sure all your codes are formatted and follow the [coding style](https://pingcap.github.io/style-guide/rust/) and [style guide](docs/style-guide.md).
- Make sure all unit tests are passed using [nextest](https://nexte.st/index.html) `cargo nextest run`.
- Make sure all clippy warnings are fixed (you can check it locally by running `cargo clippy --workspace --all-targets -- -D warnings`).
+- Make sure all unit tests are passed using [nextest](https://nexte.st/index.html) `cargo nextest run --workspace --features pg_kvbackend,mysql_kvbackend` or `make test`.
+- Make sure all clippy warnings are fixed (you can check it locally by running `cargo clippy --workspace --all-targets -- -D warnings` or `make clippy`).
+- Ensure there are no unused dependencies by running `make check-udeps` (clean them up with `make fix-udeps` if reported).
+- If you must keep a target-specific dependency (e.g. under `[target.'cfg(...)'.dev-dependencies]`), add a cargo-udeps ignore entry in the same `Cargo.toml`, for example:
+  `[package.metadata.cargo-udeps.ignore]` with `development = ["rexpect"]` (or `dependencies`/`build` as appropriate).
+- When modifying sample configuration files in `config/`, run `make config-docs` (which requires Docker to be installed) to update the configuration documentation and include it in your commit.

 #### `pre-commit` Hooks

 You could setup the [`pre-commit`](https://pre-commit.com/#plugins) hooks to run these checks on every commit automatically.

-1. Install `pre-commit`
+1.  Install `pre-commit`

        pip install pre-commit

@@ -70,7 +74,7 @@ You could setup the [`pre-commit`](https://pre-commit.com/#plugins) hooks to run

        brew install pre-commit

-2. Install the `pre-commit` hooks
+2.  Install the `pre-commit` hooks

        $ pre-commit install
        pre-commit installed at .git/hooks/pre-commit
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -73,7 +73,7 @@ members = [
 resolver = "2"

 [workspace.package]
-version = "0.17.0"
+version = "0.17.2"
 edition = "2021"
 license = "Apache-2.0"

@@ -98,11 +98,12 @@ rust.unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] }
 # See for more detaiils: https://github.com/rust-lang/cargo/issues/11329
 ahash = { version = "0.8", features = ["compile-time-rng"] }
 aquamarine = "0.6"
-arrow = { version = "54.2", features = ["prettyprint"] }
-arrow-array = { version = "54.2", default-features = false, features = ["chrono-tz"] }
-arrow-flight = "54.2"
-arrow-ipc = { version = "54.2", default-features = false, features = ["lz4", "zstd"] }
-arrow-schema = { version = "54.2", features = ["serde"] }
+arrow = { version = "56.0", features = ["prettyprint"] }
+arrow-array = { version = "56.0", default-features = false, features = ["chrono-tz"] }
+arrow-buffer = "56.0"
+arrow-flight = "56.0"
+arrow-ipc = { version = "56.0", default-features = false, features = ["lz4", "zstd"] }
+arrow-schema = { version = "56.0", features = ["serde"] }
 async-stream = "0.3"
 async-trait = "0.1"
 # Remember to update axum-extra, axum-macros when updating axum
@@ -121,26 +122,30 @@ clap = { version = "4.4", features = ["derive"] }
 config = "0.13.0"
 crossbeam-utils = "0.8"
 dashmap = "6.1"
-datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
-datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
-datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
-datafusion-functions = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
-datafusion-functions-aggregate-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
-datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
-datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
-datafusion-physical-plan = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
-datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
-datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
+datafusion = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
+datafusion-common = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
+datafusion-expr = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
+datafusion-functions = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
+datafusion-functions-aggregate-common = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
+datafusion-optimizer = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
+datafusion-orc = { git = "https://github.com/GreptimeTeam/datafusion-orc", rev = "a0a5f902158f153119316eaeec868cff3fc8a99d" }
+datafusion-physical-expr = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
+datafusion-physical-plan = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
+datafusion-sql = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
+datafusion-substrait = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "7d5214512740b4dfb742b6b3d91ed9affcc2c9d0" }
 deadpool = "0.12"
 deadpool-postgres = "0.14"
 derive_builder = "0.20"
 dotenv = "0.15"
 either = "1.15"
-etcd-client = "0.14"
+etcd-client = { git = "https://github.com/GreptimeTeam/etcd-client", rev = "f62df834f0cffda355eba96691fe1a9a332b75a7", features = [
+    "tls",
+    "tls-roots",
+] }
 fst = "0.4.7"
 futures = "0.3"
 futures-util = "0.3"
-greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "69680846a078aae670d93fb30511a72738345199" }
+greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "66eb089afa6baaa3ddfafabd0a4abbe317d012c3" }
 hex = "0.4"
 http = "1"
 humantime = "2.1"
@@ -151,7 +156,7 @@ itertools = "0.14"
 jsonb = { git = "https://github.com/databendlabs/jsonb.git", rev = "8c8d2fc294a39f3ff08909d60f718639cfba3875", default-features = false }
 lazy_static = "1.4"
 local-ip-address = "0.6"
-loki-proto = { git = "https://github.com/GreptimeTeam/loki-proto.git", rev = "1434ecf23a2654025d86188fb5205e7a74b225d3" }
+loki-proto = { git = "https://github.com/GreptimeTeam/loki-proto.git", rev = "3b7cd33234358b18ece977bf689dc6fb760f29ab" }
 meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "5618e779cf2bb4755b499c630fba4c35e91898cb" }
 mockall = "0.13"
 moka = "0.12"
@@ -159,9 +164,9 @@ nalgebra = "0.33"
 nix = { version = "0.30.1", default-features = false, features = ["event", "fs", "process"] }
 notify = "8.0"
 num_cpus = "1.16"
-object_store_opendal = "0.50"
+object_store_opendal = "0.54"
 once_cell = "1.18"
-opentelemetry-proto = { version = "0.27", features = [
+opentelemetry-proto = { version = "0.30", features = [
    "gen-tonic",
    "metrics",
    "trace",
@@ -170,13 +175,14 @@ opentelemetry-proto = { version = "0.27", features = [
 ] }
 ordered-float = { version = "4.3", features = ["serde"] }
 parking_lot = "0.12"
-parquet = { version = "54.2", default-features = false, features = ["arrow", "async", "object_store"] }
+parquet = { version = "56.0", default-features = false, features = ["arrow", "async", "object_store"] }
 paste = "1.0"
 pin-project = "1.0"
 pretty_assertions = "1.4.0"
 prometheus = { version = "0.13.3", features = ["process"] }
 promql-parser = { version = "0.6", features = ["ser"] }
 prost = { version = "0.13", features = ["no-recursion-limit"] }
+prost-types = "0.13"
 raft-engine = { version = "0.4.1", default-features = false }
 rand = "0.9"
 ratelimit = "0.10"
@@ -188,7 +194,7 @@ reqwest = { version = "0.12", default-features = false, features = [
    "stream",
    "multipart",
 ] }
-rskafka = { git = "https://github.com/WenyXu/rskafka.git", rev = "bc582e98918def613a882581a1b9331d186d9b2d", features = [
+rskafka = { git = "https://github.com/WenyXu/rskafka.git", rev = "7b0f31ed39db049b4ee2e5f1e95b5a30be9baf76", features = [
    "transport-tls",
 ] }
 rstest = "0.25"
@@ -201,15 +207,14 @@ sea-query = "0.32"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = { version = "1.0", features = ["float_roundtrip"] }
 serde_with = "3"
-shadow-rs = "1.1"
 simd-json = "0.15"
 similar-asserts = "1.6.0"
 smallvec = { version = "1", features = ["serde"] }
 snafu = "0.8"
-sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "df6fcca80ce903f5beef7002cd2c1b062e7024f8", features = [
+sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "39e4fc94c3c741981f77e9d63b5ce8c02e0a27ea", features = [
    "visitor",
    "serde",
-] } # branch = "v0.54.x"
+] } # branch = "v0.55.x"
 sqlx = { version = "0.8", features = [
    "runtime-tokio-rustls",
    "mysql",
@@ -219,20 +224,20 @@ sqlx = { version = "0.8", features = [
 strum = { version = "0.27", features = ["derive"] }
 sysinfo = "0.33"
 tempfile = "3"
-tokio = { version = "1.40", features = ["full"] }
+tokio = { version = "1.47", features = ["full"] }
 tokio-postgres = "0.7"
 tokio-rustls = { version = "0.26.2", default-features = false }
 tokio-stream = "0.1"
 tokio-util = { version = "0.7", features = ["io-util", "compat"] }
 toml = "0.8.8"
-tonic = { version = "0.12", features = ["tls", "gzip", "zstd"] }
+tonic = { version = "0.13", features = ["tls-ring", "gzip", "zstd"] }
 tower = "0.5"
 tower-http = "0.6"
 tracing = "0.1"
 tracing-appender = "0.2"
 tracing-subscriber = { version = "0.3", features = ["env-filter", "json", "fmt"] }
 typetag = "0.2"
-uuid = { version = "1.7", features = ["serde", "v4", "fast-rng"] }
+uuid = { version = "1.17", features = ["serde", "v4", "fast-rng"] }
 vrl = "0.25"
 zstd = "0.13"
 # DO_NOT_REMOVE_THIS: END_OF_EXTERNAL_DEPENDENCIES
@@ -291,7 +296,7 @@ mito-codec = { path = "src/mito-codec" }
 mito2 = { path = "src/mito2" }
 object-store = { path = "src/object-store" }
 operator = { path = "src/operator" }
-otel-arrow-rust = { git = "https://github.com/open-telemetry/otel-arrow", rev = "5d551412d2a12e689cde4d84c14ef29e36784e51", features = [
+otel-arrow-rust = { git = "https://github.com/GreptimeTeam/otel-arrow", rev = "2d64b7c0fa95642028a8205b36fe9ea0b023ec59", features = [
    "server",
 ] }
 partition = { path = "src/partition" }
--- a/15
+++ b/15
@@ -8,7 +8,7 @@ CARGO_BUILD_OPTS := --locked
 IMAGE_REGISTRY ?= docker.io
 IMAGE_NAMESPACE ?= greptime
 IMAGE_TAG ?= latest
-DEV_BUILDER_IMAGE_TAG ?= 2025-05-19-b2377d4b-20250520045554
+DEV_BUILDER_IMAGE_TAG ?= 2025-05-19-f55023f3-20250829091211
 BUILDX_MULTI_PLATFORM_BUILD ?= false
 BUILDX_BUILDER_NAME ?= gtbuilder
 BASE_IMAGE ?= ubuntu
@@ -22,7 +22,7 @@ SQLNESS_OPTS ?=
 ETCD_VERSION ?= v3.5.9
 ETCD_IMAGE ?= quay.io/coreos/etcd:${ETCD_VERSION}
 RETRY_COUNT ?= 3
-NEXTEST_OPTS := --retries ${RETRY_COUNT}
+NEXTEST_OPTS := --retries ${RETRY_COUNT} --features pg_kvbackend,mysql_kvbackend
 BUILD_JOBS ?= $(shell which nproc 1>/dev/null && expr $$(nproc) / 2) # If nproc is not available, we don't set the build jobs.
 ifeq ($(BUILD_JOBS), 0) # If the number of cores is less than 2, set the build jobs to 1.
  BUILD_JOBS := 1
@@ -193,6 +193,17 @@ clippy: ## Check clippy rules.
 fix-clippy: ## Fix clippy violations.
 	cargo clippy --workspace --all-targets --all-features --fix

+.PHONY: check-udeps
+check-udeps: ## Check unused dependencies.
+	cargo udeps --workspace --all-targets
+
+.PHONY: fix-udeps
+fix-udeps: ## Remove unused dependencies automatically.
+	@echo "Running cargo-udeps to find unused dependencies..."
+	@cargo udeps --workspace --all-targets --output json > udeps-report.json || true
+	@echo "Removing unused dependencies..."
+	@python3 scripts/fix-udeps.py udeps-report.json
+	
 .PHONY: fmt-check
 fmt-check: ## Check code format.
 	cargo fmt --all -- --check
--- a/config/config.md
+++ b/config/config.md
@@ -41,6 +41,7 @@
 | `mysql.addr` | String | `127.0.0.1:4002` | The addr to bind the MySQL server. |
 | `mysql.runtime_size` | Integer | `2` | The number of server worker threads. |
 | `mysql.keep_alive` | String | `0s` | Server-side keep-alive time.<br/>Set to 0 (default) to disable. |
+| `mysql.prepared_stmt_cache_size` | Integer | `10000` | Maximum entries in the MySQL prepared statement cache; default is 10,000. |
 | `mysql.tls` | -- | -- | -- |
 | `mysql.tls.mode` | String | `disable` | TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html<br/>- `disable` (default value)<br/>- `prefer`<br/>- `require`<br/>- `verify-ca`<br/>- `verify-full` |
 | `mysql.tls.cert_path` | String | Unset | Certificate file path. |
@@ -147,7 +148,7 @@
 | `region_engine.mito.write_cache_ttl` | String | Unset | TTL for write cache. |
 | `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
 | `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
-| `region_engine.mito.max_concurrent_scan_files` | Integer | `128` | Maximum number of SST files to scan concurrently. |
+| `region_engine.mito.max_concurrent_scan_files` | Integer | `384` | Maximum number of SST files to scan concurrently. |
 | `region_engine.mito.allow_stale_entries` | Bool | `false` | Whether to allow stale WAL entries read during replay. |
 | `region_engine.mito.min_compaction_interval` | String | `0m` | Minimum time interval between two compactions.<br/>To align with the old behavior, the default value is 0 (no restrictions). |
 | `region_engine.mito.index` | -- | -- | The options for index in Mito engine. |
@@ -186,12 +187,13 @@
 | `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
 | `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
 | `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
-| `logging.otlp_endpoint` | String | `http://localhost:4318` | The OTLP tracing endpoint. |
+| `logging.otlp_endpoint` | String | `http://localhost:4318/v1/traces` | The OTLP tracing endpoint. |
 | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
 | `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
 | `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
 | `logging.otlp_export_protocol` | String | `http` | The OTLP tracing export protocol. Can be `grpc`/`http`. |
-| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
+| `logging.otlp_headers` | -- | -- | Additional OTLP headers, only valid when using OTLP http |
+| `logging.tracing_sample_ratio` | -- | Unset | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
 | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
 | `slow_query` | -- | -- | The slow query log options. |
 | `slow_query.enable` | Bool | `false` | Whether to enable slow query log. |
@@ -243,11 +245,22 @@
 | `grpc.tls.cert_path` | String | Unset | Certificate file path. |
 | `grpc.tls.key_path` | String | Unset | Private key file path. |
 | `grpc.tls.watch` | Bool | `false` | Watch for Certificate and key file change and auto reload.<br/>For now, gRPC tls config does not support auto reload. |
+| `internal_grpc` | -- | -- | The internal gRPC server options. Internal gRPC port for nodes inside cluster to access frontend. |
+| `internal_grpc.bind_addr` | String | `127.0.0.1:4010` | The address to bind the gRPC server. |
+| `internal_grpc.server_addr` | String | `127.0.0.1:4010` | The address advertised to the metasrv, and used for connections from outside the host.<br/>If left empty or unset, the server will automatically use the IP address of the first network interface<br/>on the host, with the same port number as the one specified in `grpc.bind_addr`. |
+| `internal_grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
+| `internal_grpc.flight_compression` | String | `arrow_ipc` | Compression mode for frontend side Arrow IPC service. Available options:<br/>- `none`: disable all compression<br/>- `transport`: only enable gRPC transport compression (zstd)<br/>- `arrow_ipc`: only enable Arrow IPC compression (lz4)<br/>- `all`: enable all compression.<br/>Default to `none` |
+| `internal_grpc.tls` | -- | -- | internal gRPC server TLS options, see `mysql.tls` section. |
+| `internal_grpc.tls.mode` | String | `disable` | TLS mode. |
+| `internal_grpc.tls.cert_path` | String | Unset | Certificate file path. |
+| `internal_grpc.tls.key_path` | String | Unset | Private key file path. |
+| `internal_grpc.tls.watch` | Bool | `false` | Watch for Certificate and key file change and auto reload.<br/>For now, gRPC tls config does not support auto reload. |
 | `mysql` | -- | -- | MySQL server options. |
 | `mysql.enable` | Bool | `true` | Whether to enable. |
 | `mysql.addr` | String | `127.0.0.1:4002` | The addr to bind the MySQL server. |
 | `mysql.runtime_size` | Integer | `2` | The number of server worker threads. |
 | `mysql.keep_alive` | String | `0s` | Server-side keep-alive time.<br/>Set to 0 (default) to disable. |
+| `mysql.prepared_stmt_cache_size` | Integer | `10000` | Maximum entries in the MySQL prepared statement cache; default is 10,000. |
 | `mysql.tls` | -- | -- | -- |
 | `mysql.tls.mode` | String | `disable` | TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html<br/>- `disable` (default value)<br/>- `prefer`<br/>- `require`<br/>- `verify-ca`<br/>- `verify-full` |
 | `mysql.tls.cert_path` | String | Unset | Certificate file path. |
@@ -293,12 +306,13 @@
 | `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
 | `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
 | `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
-| `logging.otlp_endpoint` | String | `http://localhost:4318` | The OTLP tracing endpoint. |
+| `logging.otlp_endpoint` | String | `http://localhost:4318/v1/traces` | The OTLP tracing endpoint. |
 | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
 | `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
 | `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
 | `logging.otlp_export_protocol` | String | `http` | The OTLP tracing export protocol. Can be `grpc`/`http`. |
-| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
+| `logging.otlp_headers` | -- | -- | Additional OTLP headers, only valid when using OTLP http |
+| `logging.tracing_sample_ratio` | -- | Unset | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
 | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
 | `slow_query` | -- | -- | The slow query log options. |
 | `slow_query.enable` | Bool | `true` | Whether to enable slow query log. |
@@ -329,6 +343,7 @@
 | `store_key_prefix` | String | `""` | If it's not empty, the metasrv will store all data with this key prefix. |
 | `backend` | String | `etcd_store` | The datastore for meta server.<br/>Available values:<br/>- `etcd_store` (default value)<br/>- `memory_store`<br/>- `postgres_store`<br/>- `mysql_store` |
 | `meta_table_name` | String | `greptime_metakv` | Table name in RDS to store metadata. Effect when using a RDS kvbackend.<br/>**Only used when backend is `postgres_store`.** |
+| `meta_schema_name` | String | `greptime_schema` | Optional PostgreSQL schema for metadata table and election table name qualification.<br/>When PostgreSQL public schema is not writable (e.g., PostgreSQL 15+ with restricted public),<br/>set this to a writable schema. GreptimeDB will use `meta_schema_name`.`meta_table_name`.<br/>GreptimeDB will NOT create the schema automatically; please ensure it exists or the user has permission.<br/>**Only used when backend is `postgres_store`.** |
 | `meta_election_lock_id` | Integer | `1` | Advisory lock id in PostgreSQL for election. Effect when using PostgreSQL as kvbackend<br/>Only used when backend is `postgres_store`. |
 | `selector` | String | `round_robin` | Datanode selector type.<br/>- `round_robin` (default value)<br/>- `lease_based`<br/>- `load_based`<br/>For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". |
 | `use_memory_store` | Bool | `false` | Store data in memory. |
@@ -340,7 +355,7 @@
 | `runtime` | -- | -- | The runtime options. |
 | `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
 | `runtime.compact_rt_size` | Integer | `4` | The number of threads to execute the runtime for global write operations. |
-| `backend_tls` | -- | -- | TLS configuration for kv store backend (only applicable for PostgreSQL/MySQL backends)<br/>When using PostgreSQL or MySQL as metadata store, you can configure TLS here |
+| `backend_tls` | -- | -- | TLS configuration for kv store backend (applicable for etcd, PostgreSQL, and MySQL backends)<br/>When using etcd, PostgreSQL, or MySQL as metadata store, you can configure TLS here |
 | `backend_tls.mode` | String | `prefer` | TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html<br/>- "disable" - No TLS<br/>- "prefer" (default) - Try TLS, fallback to plain<br/>- "require" - Require TLS<br/>- "verify_ca" - Require TLS and verify CA<br/>- "verify_full" - Require TLS and verify hostname |
 | `backend_tls.cert_path` | String | `""` | Path to client certificate file (for client authentication)<br/>Like "/path/to/client.crt" |
 | `backend_tls.key_path` | String | `""` | Path to client private key file (for client authentication)<br/>Like "/path/to/client.key" |
@@ -373,28 +388,33 @@
 | `datanode.client.tcp_nodelay` | Bool | `true` | `TCP_NODELAY` option for accepted connections. |
 | `wal` | -- | -- | -- |
 | `wal.provider` | String | `raft_engine` | -- |
-| `wal.broker_endpoints` | Array | -- | The broker endpoints of the Kafka cluster. |
-| `wal.auto_create_topics` | Bool | `true` | Automatically create topics for WAL.<br/>Set to `true` to automatically create topics for WAL.<br/>Otherwise, use topics named `topic_name_prefix_[0..num_topics)` |
-| `wal.auto_prune_interval` | String | `0s` | Interval of automatically WAL pruning.<br/>Set to `0s` to disable automatically WAL pruning which delete unused remote WAL entries periodically. |
-| `wal.trigger_flush_threshold` | Integer | `0` | The threshold to trigger a flush operation of a region in automatically WAL pruning.<br/>Metasrv will send a flush request to flush the region when:<br/>`trigger_flush_threshold` + `prunable_entry_id` < `max_prunable_entry_id`<br/>where:<br/>- `prunable_entry_id` is the maximum entry id that can be pruned of the region.<br/>- `max_prunable_entry_id` is the maximum prunable entry id among all regions in the same topic.<br/>Set to `0` to disable the flush operation. |
-| `wal.auto_prune_parallelism` | Integer | `10` | Concurrent task limit for automatically WAL pruning. |
-| `wal.num_topics` | Integer | `64` | Number of topics. |
-| `wal.selector_type` | String | `round_robin` | Topic selector type.<br/>Available selector types:<br/>- `round_robin` (default) |
-| `wal.topic_name_prefix` | String | `greptimedb_wal_topic` | A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.<br/>Only accepts strings that match the following regular expression pattern:<br/>[a-zA-Z_:-][a-zA-Z0-9_:\-\.@#]*<br/>i.g., greptimedb_wal_topic_0, greptimedb_wal_topic_1. |
-| `wal.replication_factor` | Integer | `1` | Expected number of replicas of each partition. |
-| `wal.create_topic_timeout` | String | `30s` | Above which a topic creation operation will be cancelled. |
+| `wal.broker_endpoints` | Array | -- | The broker endpoints of the Kafka cluster.<br/><br/>**It's only used when the provider is `kafka`**. |
+| `wal.auto_create_topics` | Bool | `true` | Automatically create topics for WAL.<br/>Set to `true` to automatically create topics for WAL.<br/>Otherwise, use topics named `topic_name_prefix_[0..num_topics)`<br/>**It's only used when the provider is `kafka`**. |
+| `wal.auto_prune_interval` | String | `30m` | Interval of automatically WAL pruning.<br/>Set to `0s` to disable automatically WAL pruning which delete unused remote WAL entries periodically.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.flush_trigger_size` | String | `512MB` | Estimated size threshold to trigger a flush when using Kafka remote WAL.<br/>Since multiple regions may share a Kafka topic, the estimated size is calculated as:<br/>  (latest_entry_id - flushed_entry_id) * avg_record_size<br/>MetaSrv triggers a flush for a region when this estimated size exceeds `flush_trigger_size`.<br/>- `latest_entry_id`: The latest entry ID in the topic.<br/>- `flushed_entry_id`: The last flushed entry ID for the region.<br/>Set to "0" to let the system decide the flush trigger size.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.checkpoint_trigger_size` | String | `128MB` | Estimated size threshold to trigger a checkpoint when using Kafka remote WAL.<br/>The estimated size is calculated as:<br/>  (latest_entry_id - last_checkpoint_entry_id) * avg_record_size<br/>MetaSrv triggers a checkpoint for a region when this estimated size exceeds `checkpoint_trigger_size`.<br/>Set to "0" to let the system decide the checkpoint trigger size.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.auto_prune_parallelism` | Integer | `10` | Concurrent task limit for automatically WAL pruning.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.num_topics` | Integer | `64` | Number of topics used for remote WAL.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.selector_type` | String | `round_robin` | Topic selector type.<br/>Available selector types:<br/>- `round_robin` (default)<br/>**It's only used when the provider is `kafka`**. |
+| `wal.topic_name_prefix` | String | `greptimedb_wal_topic` | A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.<br/>Only accepts strings that match the following regular expression pattern:<br/>[a-zA-Z_:-][a-zA-Z0-9_:\-\.@#]*<br/>i.g., greptimedb_wal_topic_0, greptimedb_wal_topic_1.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.replication_factor` | Integer | `1` | Expected number of replicas of each partition.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.create_topic_timeout` | String | `30s` | The timeout for creating a Kafka topic.<br/>**It's only used when the provider is `kafka`**. |
 | `event_recorder` | -- | -- | Configuration options for the event recorder. |
 | `event_recorder.ttl` | String | `90d` | TTL for the events table that will be used to store the events. Default is `90d`. |
+| `stats_persistence` | -- | -- | Configuration options for the stats persistence. |
+| `stats_persistence.ttl` | String | `0s` | TTL for the stats table that will be used to store the stats.<br/>Set to `0s` to disable stats persistence.<br/>Default is `0s`.<br/>If you want to enable stats persistence, set the TTL to a value greater than 0.<br/>It is recommended to set a small value, e.g., `3h`. |
+| `stats_persistence.interval` | String | `10m` | The interval to persist the stats. Default is `10m`.<br/>The minimum value is `10m`, if the value is less than `10m`, it will be overridden to `10m`. |
 | `logging` | -- | -- | The logging options. |
 | `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
 | `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
 | `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
-| `logging.otlp_endpoint` | String | `http://localhost:4318` | The OTLP tracing endpoint. |
+| `logging.otlp_endpoint` | String | `http://localhost:4318/v1/traces` | The OTLP tracing endpoint. |
 | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
 | `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
 | `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
 | `logging.otlp_export_protocol` | String | `http` | The OTLP tracing export protocol. Can be `grpc`/`http`. |
-| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
+| `logging.otlp_headers` | -- | -- | Additional OTLP headers, only valid when using OTLP http |
+| `logging.tracing_sample_ratio` | -- | Unset | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
 | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
 | `export_metrics` | -- | -- | The metasrv can export its metrics and send to Prometheus compatible service (e.g. `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
 | `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
@@ -501,6 +521,8 @@
 | `region_engine.mito.worker_channel_size` | Integer | `128` | Request channel size of each worker. |
 | `region_engine.mito.worker_request_batch_size` | Integer | `64` | Max batch size for a worker to handle requests. |
 | `region_engine.mito.manifest_checkpoint_distance` | Integer | `10` | Number of meta action updated to trigger a new checkpoint for the manifest. |
+| `region_engine.mito.experimental_manifest_keep_removed_file_count` | Integer | `256` | Number of removed files to keep in manifest's `removed_files` field before also<br/>remove them from `removed_files`. Mostly for debugging purpose.<br/>If set to 0, it will only use `keep_removed_file_ttl` to decide when to remove files<br/>from `removed_files` field. |
+| `region_engine.mito.experimental_manifest_keep_removed_file_ttl` | String | `1h` | How long to keep removed files in the `removed_files` field of manifest<br/>after they are removed from manifest.<br/>files will only be removed from `removed_files` field<br/>if both `keep_removed_file_count` and `keep_removed_file_ttl` is reached. |
 | `region_engine.mito.compress_manifest` | Bool | `false` | Whether to compress manifest and checkpoint file by gzip (default false). |
 | `region_engine.mito.max_background_flushes` | Integer | Auto | Max number of running background flush jobs (default: 1/2 of cpu cores). |
 | `region_engine.mito.max_background_compactions` | Integer | Auto | Max number of running background compaction jobs (default: 1/4 of cpu cores). |
@@ -518,7 +540,7 @@
 | `region_engine.mito.write_cache_ttl` | String | Unset | TTL for write cache. |
 | `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
 | `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
-| `region_engine.mito.max_concurrent_scan_files` | Integer | `128` | Maximum number of SST files to scan concurrently. |
+| `region_engine.mito.max_concurrent_scan_files` | Integer | `384` | Maximum number of SST files to scan concurrently. |
 | `region_engine.mito.allow_stale_entries` | Bool | `false` | Whether to allow stale WAL entries read during replay. |
 | `region_engine.mito.min_compaction_interval` | String | `0m` | Minimum time interval between two compactions.<br/>To align with the old behavior, the default value is 0 (no restrictions). |
 | `region_engine.mito.index` | -- | -- | The options for index in Mito engine. |
@@ -557,12 +579,13 @@
 | `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
 | `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
 | `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
-| `logging.otlp_endpoint` | String | `http://localhost:4318` | The OTLP tracing endpoint. |
+| `logging.otlp_endpoint` | String | `http://localhost:4318/v1/traces` | The OTLP tracing endpoint. |
 | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
 | `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
 | `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
 | `logging.otlp_export_protocol` | String | `http` | The OTLP tracing export protocol. Can be `grpc`/`http`. |
-| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
+| `logging.otlp_headers` | -- | -- | Additional OTLP headers, only valid when using OTLP http |
+| `logging.tracing_sample_ratio` | -- | Unset | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
 | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
 | `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
 | `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
@@ -594,6 +617,11 @@
 | `flow.batching_mode.experimental_max_filter_num_per_query` | Integer | `20` | Maximum number of filters allowed in a single query |
 | `flow.batching_mode.experimental_time_window_merge_threshold` | Integer | `3` | Time window merge distance |
 | `flow.batching_mode.read_preference` | String | `Leader` | Read preference of the Frontend client. |
+| `flow.batching_mode.frontend_tls` | -- | -- | -- |
+| `flow.batching_mode.frontend_tls.enabled` | Bool | `false` | Whether to enable TLS for client. |
+| `flow.batching_mode.frontend_tls.server_ca_cert_path` | String | Unset | Server Certificate file path. |
+| `flow.batching_mode.frontend_tls.client_cert_path` | String | Unset | Client Certificate file path. |
+| `flow.batching_mode.frontend_tls.client_key_path` | String | Unset | Client Private key file path. |
 | `grpc` | -- | -- | The gRPC server options. |
 | `grpc.bind_addr` | String | `127.0.0.1:6800` | The address to bind the gRPC server. |
 | `grpc.server_addr` | String | `127.0.0.1:6800` | The address advertised to the metasrv,<br/>and used for connections from outside the host |
@@ -621,12 +649,13 @@
 | `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
 | `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
 | `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
-| `logging.otlp_endpoint` | String | `http://localhost:4318` | The OTLP tracing endpoint. |
+| `logging.otlp_endpoint` | String | `http://localhost:4318/v1/traces` | The OTLP tracing endpoint. |
 | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
 | `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
 | `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
 | `logging.otlp_export_protocol` | String | `http` | The OTLP tracing export protocol. Can be `grpc`/`http`. |
-| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
+| `logging.otlp_headers` | -- | -- | Additional OTLP headers, only valid when using OTLP http |
+| `logging.tracing_sample_ratio` | -- | Unset | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
 | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
 | `tracing` | -- | -- | The tracing options. Only effect when compiled with `tokio-console` feature. |
 | `tracing.tokio_console_addr` | String | Unset | The tokio console address. |
--- a/config/datanode.example.toml
+++ b/config/datanode.example.toml
@@ -409,6 +409,19 @@ worker_request_batch_size = 64
 ## Number of meta action updated to trigger a new checkpoint for the manifest.
 manifest_checkpoint_distance = 10

+
+## Number of removed files to keep in manifest's `removed_files` field before also
+## remove them from `removed_files`. Mostly for debugging purpose.
+## If set to 0, it will only use `keep_removed_file_ttl` to decide when to remove files
+## from `removed_files` field.
+experimental_manifest_keep_removed_file_count = 256
+
+## How long to keep removed files in the `removed_files` field of manifest
+## after they are removed from manifest.
+## files will only be removed from `removed_files` field
+## if both `keep_removed_file_count` and `keep_removed_file_ttl` is reached.
+experimental_manifest_keep_removed_file_ttl = "1h"
+
 ## Whether to compress manifest and checkpoint file by gzip (default false).
 compress_manifest = false

@@ -475,7 +488,7 @@ sst_write_buffer_size = "8MB"
 parallel_scan_channel_size = 32

 ## Maximum number of SST files to scan concurrently.
-max_concurrent_scan_files = 128
+max_concurrent_scan_files = 384

 ## Whether to allow stale WAL entries read during replay.
 allow_stale_entries = false
@@ -632,7 +645,7 @@ level = "info"
 enable_otlp_tracing = false

 ## The OTLP tracing endpoint.
-otlp_endpoint = "http://localhost:4318"
+otlp_endpoint = "http://localhost:4318/v1/traces"

 ## Whether to append logs to stdout.
 append_stdout = true
@@ -646,6 +659,13 @@ max_log_files = 720
 ## The OTLP tracing export protocol. Can be `grpc`/`http`.
 otlp_export_protocol = "http"

+## Additional OTLP headers, only valid when using OTLP http
+[logging.otlp_headers]
+## @toml2docs:none-default
+#Authorization = "Bearer my-token"
+## @toml2docs:none-default
+#Database = "My database"
+
 ## The percentage of tracing will be sampled and exported.
 ## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
 ## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
--- a/config/flownode.example.toml
+++ b/config/flownode.example.toml
@@ -32,6 +32,18 @@ node_id = 14
 #+experimental_time_window_merge_threshold=3
 ## Read preference of the Frontend client.
 #+read_preference="Leader"
+[flow.batching_mode.frontend_tls]
+## Whether to enable TLS for client.
+#+enabled=false
+## Server Certificate file path.
+## @toml2docs:none-default
+#+server_ca_cert_path=""
+## Client Certificate file path.
+## @toml2docs:none-default
+#+client_cert_path=""
+## Client Private key file path.
+## @toml2docs:none-default
+#+client_key_path=""

 ## The gRPC server options.
 [grpc]
@@ -108,7 +120,7 @@ level = "info"
 enable_otlp_tracing = false

 ## The OTLP tracing endpoint.
-otlp_endpoint = "http://localhost:4318"
+otlp_endpoint = "http://localhost:4318/v1/traces"

 ## Whether to append logs to stdout.
 append_stdout = true
@@ -122,6 +134,13 @@ max_log_files = 720
 ## The OTLP tracing export protocol. Can be `grpc`/`http`.
 otlp_export_protocol = "http"

+## Additional OTLP headers, only valid when using OTLP http
+[logging.otlp_headers]
+## @toml2docs:none-default
+#Authorization = "Bearer my-token"
+## @toml2docs:none-default
+#Database = "My database"
+
 ## The percentage of tracing will be sampled and exported.
 ## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
 ## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
--- a/config/frontend.example.toml
+++ b/config/frontend.example.toml
@@ -79,6 +79,42 @@ key_path = ""
 ## For now, gRPC tls config does not support auto reload.
 watch = false

+## The internal gRPC server options. Internal gRPC port for nodes inside cluster to access frontend.
+[internal_grpc]
+## The address to bind the gRPC server.
+bind_addr = "127.0.0.1:4010"
+## The address advertised to the metasrv, and used for connections from outside the host.
+## If left empty or unset, the server will automatically use the IP address of the first network interface
+## on the host, with the same port number as the one specified in `grpc.bind_addr`.
+server_addr = "127.0.0.1:4010"
+## The number of server worker threads.
+runtime_size = 8
+## Compression mode for frontend side Arrow IPC service. Available options:
+## - `none`: disable all compression
+## - `transport`: only enable gRPC transport compression (zstd)
+## - `arrow_ipc`: only enable Arrow IPC compression (lz4)
+## - `all`: enable all compression.
+## Default to `none`
+flight_compression = "arrow_ipc"
+
+## internal gRPC server TLS options, see `mysql.tls` section.
+[internal_grpc.tls]
+## TLS mode.
+mode = "disable"
+
+## Certificate file path.
+## @toml2docs:none-default
+cert_path = ""
+
+## Private key file path.
+## @toml2docs:none-default
+key_path = ""
+
+## Watch for Certificate and key file change and auto reload.
+## For now, gRPC tls config does not support auto reload.
+watch = false
+
+
 ## MySQL server options.
 [mysql]
 ## Whether to enable.
@@ -90,6 +126,8 @@ runtime_size = 2
 ## Server-side keep-alive time.
 ## Set to 0 (default) to disable.
 keep_alive = "0s"
+## Maximum entries in the MySQL prepared statement cache; default is 10,000.
+prepared_stmt_cache_size = 10000

 # MySQL server TLS options.
 [mysql.tls]
@@ -221,7 +259,7 @@ level = "info"
 enable_otlp_tracing = false

 ## The OTLP tracing endpoint.
-otlp_endpoint = "http://localhost:4318"
+otlp_endpoint = "http://localhost:4318/v1/traces"

 ## Whether to append logs to stdout.
 append_stdout = true
@@ -235,6 +273,13 @@ max_log_files = 720
 ## The OTLP tracing export protocol. Can be `grpc`/`http`.
 otlp_export_protocol = "http"

+## Additional OTLP headers, only valid when using OTLP http
+[logging.otlp_headers]
+## @toml2docs:none-default
+#Authorization = "Bearer my-token"
+## @toml2docs:none-default
+#Database = "My database"
+
 ## The percentage of tracing will be sampled and exported.
 ## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
 ## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
--- a/config/metasrv.example.toml
+++ b/config/metasrv.example.toml
@@ -23,6 +23,14 @@ backend = "etcd_store"
 ## **Only used when backend is `postgres_store`.**
 meta_table_name = "greptime_metakv"

+## Optional PostgreSQL schema for metadata table and election table name qualification.
+## When PostgreSQL public schema is not writable (e.g., PostgreSQL 15+ with restricted public),
+## set this to a writable schema. GreptimeDB will use `meta_schema_name`.`meta_table_name`.
+## GreptimeDB will NOT create the schema automatically; please ensure it exists or the user has permission.
+## **Only used when backend is `postgres_store`.**
+
+meta_schema_name = "greptime_schema"
+
 ## Advisory lock id in PostgreSQL for election. Effect when using PostgreSQL as kvbackend
 ## Only used when backend is `postgres_store`.
 meta_election_lock_id = 1
@@ -65,8 +73,8 @@ node_max_idle_time = "24hours"
 ## The number of threads to execute the runtime for global write operations.
 #+ compact_rt_size = 4

-## TLS configuration for kv store backend (only applicable for PostgreSQL/MySQL backends)
-## When using PostgreSQL or MySQL as metadata store, you can configure TLS here
+## TLS configuration for kv store backend (applicable for etcd, PostgreSQL, and MySQL backends)
+## When using etcd, PostgreSQL, or MySQL as metadata store, you can configure TLS here
 [backend_tls]
 ## TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html
 ## - "disable" - No TLS
@@ -176,50 +184,69 @@ tcp_nodelay = true
 # - `kafka`: metasrv **have to be** configured with kafka wal config when using kafka wal provider in datanode.
 provider = "raft_engine"

-# Kafka wal config.
-
 ## The broker endpoints of the Kafka cluster.
+##
+## **It's only used when the provider is `kafka`**.
 broker_endpoints = ["127.0.0.1:9092"]

 ## Automatically create topics for WAL.
 ## Set to `true` to automatically create topics for WAL.
 ## Otherwise, use topics named `topic_name_prefix_[0..num_topics)`
+## **It's only used when the provider is `kafka`**.
 auto_create_topics = true

 ## Interval of automatically WAL pruning.
 ## Set to `0s` to disable automatically WAL pruning which delete unused remote WAL entries periodically.
-auto_prune_interval = "0s"
+## **It's only used when the provider is `kafka`**.
+auto_prune_interval = "30m"

-## The threshold to trigger a flush operation of a region in automatically WAL pruning.
-## Metasrv will send a flush request to flush the region when:
-## `trigger_flush_threshold` + `prunable_entry_id` < `max_prunable_entry_id`
-## where:
-## - `prunable_entry_id` is the maximum entry id that can be pruned of the region.
-## - `max_prunable_entry_id` is the maximum prunable entry id among all regions in the same topic.
-## Set to `0` to disable the flush operation.
-trigger_flush_threshold = 0
+
+## Estimated size threshold to trigger a flush when using Kafka remote WAL.
+## Since multiple regions may share a Kafka topic, the estimated size is calculated as:
+##   (latest_entry_id - flushed_entry_id) * avg_record_size
+## MetaSrv triggers a flush for a region when this estimated size exceeds `flush_trigger_size`.
+## - `latest_entry_id`: The latest entry ID in the topic.
+## - `flushed_entry_id`: The last flushed entry ID for the region.
+## Set to "0" to let the system decide the flush trigger size.
+## **It's only used when the provider is `kafka`**.
+flush_trigger_size = "512MB"
+
+## Estimated size threshold to trigger a checkpoint when using Kafka remote WAL.
+## The estimated size is calculated as:
+##   (latest_entry_id - last_checkpoint_entry_id) * avg_record_size
+## MetaSrv triggers a checkpoint for a region when this estimated size exceeds `checkpoint_trigger_size`.
+## Set to "0" to let the system decide the checkpoint trigger size.
+## **It's only used when the provider is `kafka`**.
+checkpoint_trigger_size = "128MB"

 ## Concurrent task limit for automatically WAL pruning.
+## **It's only used when the provider is `kafka`**.
 auto_prune_parallelism = 10

-## Number of topics.
+## Number of topics used for remote WAL.
+## **It's only used when the provider is `kafka`**.
 num_topics = 64

 ## Topic selector type.
 ## Available selector types:
 ## - `round_robin` (default)
+## **It's only used when the provider is `kafka`**.
 selector_type = "round_robin"

+
 ## A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.
 ## Only accepts strings that match the following regular expression pattern:
 ## [a-zA-Z_:-][a-zA-Z0-9_:\-\.@#]*
 ## i.g., greptimedb_wal_topic_0, greptimedb_wal_topic_1.
+## **It's only used when the provider is `kafka`**.
 topic_name_prefix = "greptimedb_wal_topic"

 ## Expected number of replicas of each partition.
+## **It's only used when the provider is `kafka`**.
 replication_factor = 1

-## Above which a topic creation operation will be cancelled.
+## The timeout for creating a Kafka topic.
+## **It's only used when the provider is `kafka`**.
 create_topic_timeout = "30s"

 # The Kafka SASL configuration.
@@ -245,6 +272,18 @@ create_topic_timeout = "30s"
 ## TTL for the events table that will be used to store the events. Default is `90d`.
 ttl = "90d"

+## Configuration options for the stats persistence.
+[stats_persistence]
+## TTL for the stats table that will be used to store the stats.
+## Set to `0s` to disable stats persistence.
+## Default is `0s`.
+## If you want to enable stats persistence, set the TTL to a value greater than 0.
+## It is recommended to set a small value, e.g., `3h`.
+ttl = "0s"
+## The interval to persist the stats. Default is `10m`.
+## The minimum value is `10m`, if the value is less than `10m`, it will be overridden to `10m`.
+interval = "10m"
+
 ## The logging options.
 [logging]
 ## The directory to store the log files. If set to empty, logs will not be written to files.
@@ -258,7 +297,7 @@ level = "info"
 enable_otlp_tracing = false

 ## The OTLP tracing endpoint.
-otlp_endpoint = "http://localhost:4318"
+otlp_endpoint = "http://localhost:4318/v1/traces"

 ## Whether to append logs to stdout.
 append_stdout = true
@@ -272,6 +311,14 @@ max_log_files = 720
 ## The OTLP tracing export protocol. Can be `grpc`/`http`.
 otlp_export_protocol = "http"

+## Additional OTLP headers, only valid when using OTLP http
+[logging.otlp_headers]
+## @toml2docs:none-default
+#Authorization = "Bearer my-token"
+## @toml2docs:none-default
+#Database = "My database"
+
+
 ## The percentage of tracing will be sampled and exported.
 ## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
 ## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
--- a/config/standalone.example.toml
+++ b/config/standalone.example.toml
@@ -85,7 +85,8 @@ runtime_size = 2
 ## Server-side keep-alive time.
 ## Set to 0 (default) to disable.
 keep_alive = "0s"
-
+## Maximum entries in the MySQL prepared statement cache; default is 10,000.
+prepared_stmt_cache_size= 10000
 # MySQL server TLS options.
 [mysql.tls]

@@ -566,7 +567,7 @@ sst_write_buffer_size = "8MB"
 parallel_scan_channel_size = 32

 ## Maximum number of SST files to scan concurrently.
-max_concurrent_scan_files = 128
+max_concurrent_scan_files = 384

 ## Whether to allow stale WAL entries read during replay.
 allow_stale_entries = false
@@ -723,7 +724,7 @@ level = "info"
 enable_otlp_tracing = false

 ## The OTLP tracing endpoint.
-otlp_endpoint = "http://localhost:4318"
+otlp_endpoint = "http://localhost:4318/v1/traces"

 ## Whether to append logs to stdout.
 append_stdout = true
@@ -737,6 +738,13 @@ max_log_files = 720
 ## The OTLP tracing export protocol. Can be `grpc`/`http`.
 otlp_export_protocol = "http"

+## Additional OTLP headers, only valid when using OTLP http
+[logging.otlp_headers]
+## @toml2docs:none-default
+#Authorization = "Bearer my-token"
+## @toml2docs:none-default
+#Database = "My database"
+
 ## The percentage of tracing will be sampled and exported.
 ## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
 ## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
--- a/docker/dev-builder/android/Dockerfile
+++ b/docker/dev-builder/android/Dockerfile
@@ -13,7 +13,8 @@ RUN apt-get update && apt-get install -y \
    git \
    unzip \
    build-essential \
-    pkg-config
+    pkg-config \
+    openssh-client

 # Install protoc
 ARG PROTOBUF_VERSION=29.3
--- a/docker/dev-builder/centos/Dockerfile
+++ b/docker/dev-builder/centos/Dockerfile
@@ -19,7 +19,7 @@ ARG PROTOBUF_VERSION=29.3

 RUN curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOBUF_VERSION}/protoc-${PROTOBUF_VERSION}-linux-x86_64.zip && \
    unzip protoc-${PROTOBUF_VERSION}-linux-x86_64.zip -d protoc3;
-    
+
 RUN mv protoc3/bin/* /usr/local/bin/
 RUN mv protoc3/include/* /usr/local/include/

--- a/docker/docker-compose/cluster-with-etcd.yaml
+++ b/docker/docker-compose/cluster-with-etcd.yaml
@@ -34,6 +34,48 @@ services:
    networks:
      - greptimedb

+  etcd-tls:
+    <<: *etcd_common_settings
+    container_name: etcd-tls
+    ports:
+      - 2378:2378
+      - 2381:2381
+    command:
+      - --name=etcd-tls
+      - --data-dir=/var/lib/etcd
+      - --initial-advertise-peer-urls=https://etcd-tls:2381
+      - --listen-peer-urls=https://0.0.0.0:2381
+      - --listen-client-urls=https://0.0.0.0:2378
+      - --advertise-client-urls=https://etcd-tls:2378
+      - --heartbeat-interval=250
+      - --election-timeout=1250
+      - --initial-cluster=etcd-tls=https://etcd-tls:2381
+      - --initial-cluster-state=new
+      - --initial-cluster-token=etcd-tls-cluster
+      - --cert-file=/certs/server.crt
+      - --key-file=/certs/server-key.pem
+      - --peer-cert-file=/certs/server.crt
+      - --peer-key-file=/certs/server-key.pem
+      - --trusted-ca-file=/certs/ca.crt
+      - --peer-trusted-ca-file=/certs/ca.crt
+      - --client-cert-auth
+      - --peer-client-cert-auth
+    volumes:
+      - ./greptimedb-cluster-docker-compose/etcd-tls:/var/lib/etcd
+      - ./greptimedb-cluster-docker-compose/certs:/certs:ro
+    environment:
+      - ETCDCTL_API=3
+      - ETCDCTL_CACERT=/certs/ca.crt
+      - ETCDCTL_CERT=/certs/server.crt
+      - ETCDCTL_KEY=/certs/server-key.pem
+    healthcheck:
+      test: [ "CMD", "etcdctl", "--endpoints=https://etcd-tls:2378", "--cacert=/certs/ca.crt", "--cert=/certs/server.crt", "--key=/certs/server-key.pem", "endpoint", "health" ]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    networks:
+      - greptimedb
+
  metasrv:
    image: *greptimedb_image
    container_name: metasrv
--- a/docs/how-to/how-to-write-aggregate-function.md
+++ b/docs/how-to/how-to-write-aggregate-function.md
@@ -1,72 +0,0 @@
-Currently, our query engine is based on DataFusion, so all aggregate function is executed by DataFusion, through its UDAF interface. You can find DataFusion's UDAF example [here](https://github.com/apache/datafusion/tree/main/datafusion-examples/examples/simple_udaf.rs). Basically, we provide the same way as DataFusion to write aggregate functions: both are centered in a struct called "Accumulator" to accumulates states along the way in aggregation.
-
-However, DataFusion's UDAF implementation has a huge restriction, that it requires user to provide a concrete "Accumulator". Take `Median` aggregate function for example, to aggregate a `u32` datatype column, you have to write a `MedianU32`, and use `SELECT MEDIANU32(x)` in SQL. `MedianU32` cannot be used to aggregate a `i32` datatype column. Or, there's another way: you can use a special type that can hold all kinds of data (like our `Value` enum or Arrow's `ScalarValue`), and `match` all the way up to do aggregate calculations. It might work, though rather tedious. (But I think it's DataFusion's preferred way to write UDAF.)
-
-So is there a way we can make an aggregate function that automatically match the input data's type? For example, a `Median` aggregator that can work on both `u32` column and `i32`? The answer is yes until we find a way to bypass DataFusion's restriction, a restriction that DataFusion simply doesn't pass the input data's type when creating an Accumulator.
-
-> There's an example in `my_sum_udaf_example.rs`, take that as quick start.
-
-# 1. Impl `AggregateFunctionCreator` trait for your accumulator creator.
-
-You must first define a struct that will be used to create your accumulator. For example,
-
-```Rust
-#[as_aggr_func_creator]
-#[derive(Debug, AggrFuncTypeStore)]
-struct MySumAccumulatorCreator {}
-```
-
-Attribute macro `#[as_aggr_func_creator]` and derive macro `#[derive(Debug, AggrFuncTypeStore)]` must both be annotated on the struct. They work together to provide a storage of aggregate function's input data types, which are needed for creating generic accumulator later.  
-
-> Note that the `as_aggr_func_creator` macro will add fields to the struct, so the struct cannot be defined as an empty struct without field like `struct Foo;`, neither as a new type like `struct Foo(bar)`.
-
-Then impl `AggregateFunctionCreator` trait on it. The definition of the trait is:
-
-```Rust
-pub trait AggregateFunctionCreator: Send + Sync + Debug {
-    fn creator(&self) -> AccumulatorCreatorFunction;
-    fn output_type(&self) -> ConcreteDataType;
-    fn state_types(&self) -> Vec<ConcreteDataType>;
-}
-```
-
-You can use input data's type in methods that return output type and state types (just invoke `input_types()`).
-
-The output type is aggregate function's output data's type. For example, `SUM` aggregate function's output type is `u64` for a `u32` datatype column. The state types are accumulator's internal states' types. Take `AVG` aggregate function on a `i32` column as example, its state types are `i64` (for sum) and `u64` (for count).
-
-The `creator` function is where you define how an accumulator (that will be used in DataFusion) is created. You define "how" to create the accumulator (instead of "what" to create), using the input data's type as arguments. With input datatype known, you can create accumulator generically.
-
-# 2. Impl `Accumulator` trait for your accumulator.
-
-The accumulator is where you store the aggregate calculation states and evaluate a result. You must impl `Accumulator` trait for it. The trait's definition is:
-
-```Rust
-pub trait Accumulator: Send + Sync + Debug {
-    fn state(&self) -> Result<Vec<Value>>;
-    fn update_batch(&mut self, values: &[VectorRef]) -> Result<()>;
-    fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()>;
-    fn evaluate(&self) -> Result<Value>;
-}
-```
-
-The DataFusion basically executes aggregate like this:
-
-1. Partitioning all input data for aggregate. Create an accumulator for each part.
-2. Call `update_batch` on each accumulator with partitioned data, to let you update your aggregate calculation.
-3. Call `state` to get each accumulator's internal state, the medial calculation result.
-4. Call `merge_batch` to merge all accumulator's internal state to one.
-5. Execute `evaluate` on the chosen one to get the final calculation result.
-
-Once you know the meaning of each method, you can easily write your accumulator. You can refer to `Median` accumulator or `SUM` accumulator defined in file `my_sum_udaf_example.rs` for more details.
-
-# 3. Register your aggregate function to our query engine.
-
-You can call `register_aggregate_function` method in query engine to register your aggregate function. To do that, you have to new an instance of struct `AggregateFunctionMeta`. The struct has three fields, first is the name of your aggregate function's name. The function name is case-sensitive due to DataFusion's restriction. We strongly recommend using lowercase for your name. If you have to use uppercase name, wrap your aggregate function with quotation marks. For example, if you define an aggregate function named "my_aggr", you can use "`SELECT MY_AGGR(x)`"; if you define "my_AGGR", you have to use "`SELECT "my_AGGR"(x)`".
-
-The second field is arg_counts ,the count of the arguments. Like accumulator `percentile`, calculating the p_number of the column. We need to input the value of column and the value of p to calculate, and so the count of the arguments is two.
-
-The third field is a function about how to create your accumulator creator that you defined in step 1 above. Create creator, that's a bit intertwined, but it is how we make DataFusion use a newly created aggregate function each time it executes a SQL, preventing the stored input types from affecting each other. The key detail can be starting looking at our `DfContextProviderAdapter` struct's `get_aggregate_meta` method.
-
-# (Optional) 4. Make your aggregate function automatically registered.
-
-If you've written a great aggregate function that wants to let everyone use it, you can make it automatically register to our query engine at start time. It's quick and simple, just refer to the `AggregateFunctions::register` function in `common/function/src/scalars/aggregate/mod.rs`.
--- a/docs/rfcs/2025-08-16-async-index-build.md
+++ b/docs/rfcs/2025-08-16-async-index-build.md
@@ -0,0 +1,112 @@
+---
+Feature Name: Async Index Build
+Tracking Issue: https://github.com/GreptimeTeam/greptimedb/issues/6756
+Date: 2025-08-16
+Author: "SNC123 <sinhco@outlook.com>"
+---
+
+# Summary
+This RFC proposes an asynchronous index build mechanism in the database, with a configuration option to choose between synchronous and asynchronous modes, aiming to improve flexibility and adapt to different workload requirements.
+
+# Motivation
+Currently, index creation is performed synchronously, which may lead to prolonged write suspension and impact business continuity. As data volume grows, the time required for index building increases significantly. An asynchronous solution is urgently needed to enhance user experience and system throughput.
+
+# Details
+
+## Overview
+
+The following table highlights the difference between async and sync index approach:
+
+| Approach | Trigger | Data Source | Additional Index Metadata Installation | Fine-grained `FileMeta` Index |
+| :--- | :--- | :--- | :--- | :--- |
+| Sync Index | On `write_sst` | Memory (on flush) / Disk (on compact) | Not required(already installed synchronously) | Not required |
+| Async Index | 4 trigger types | Disk | Required | Required |
+
+The index build mode (synchronous or asynchronous) can be selected via configuration file. 
+
+### Four Trigger Types
+
+This RFC introduces four `IndexBuildType`s to trigger index building:
+
+- **Manual Rebuild**: Triggered by the user via `ADMIN build_index("table_name")`, for scenarios like recovering from failed builds or migrating data. SST files whose `ColumnIndexMetadata` (see below) is already consistent with the `RegionMetadata` will be skipped.
+- **Schema Change**: Automatically triggered when the schema of an indexed column is altered.
+- **Flush**: Automatically builds indexes for new SST files created by a flush.
+- **Compact**: Automatically builds indexes for new SST files created by a compaction.
+
+### Additional Index Metadata Installation
+
+Previously, index information in the in-memory `FileMeta` was updated synchronously. The async approach requires an explicit installation step.
+
+A race condition can occur when compaction and index building run concurrently, leading to:
+1. Building an index for a file that is about to be deleted by compaction.
+2. Creating an unnecessary index file and an incorrect manifest record.
+3. On restart, replaying the manifest could load metadata for a non-existent file.
+
+To prevent this, the system checks if a file's `FileMeta` is in a `compacting` state before updating the manifest. If it is, the installation is aborted.
+
+### Fine-grained `FileMeta` Index
+
+The original `FileMeta` only stored file-level index information. However, manual rebuilds require column-level details to identify files inconsistent with the current DDL. Therefore, the `indexes` field in `FileMeta` is updated as follows:
+```rust
+struct FileMeta {
+    ...
+    // From file-level:
+    // available_indexes: SmallVec<[IndexType; 4]>
+    // To column-level:
+    indexes: Vec<ColumnIndexMetadata>,
+    ...
+}
+pub struct ColumnIndexMetadata {
+    pub column_id: ColumnId,
+    pub created_indexes: IndexTypes,
+}
+```
+
+## Process
+
+The index building process is similar to a flush and is illustrated below:
+
+```mermaid
+sequenceDiagram
+    Region0->>Region0: Triggered by one of 4 conditions, targets specific files
+    loop For each target file
+    Region0->>IndexBuildScheduler: Submits an index build task
+    end
+    IndexBuildScheduler->>IndexBuildTask: Executes the task
+    IndexBuildTask->>Storage Interfaces: Reads SST data from disk
+    IndexBuildTask->>IndexBuildTask: Builds the index file
+    alt Index file size > 0
+    IndexBuildTask->>Region0: Sends IndexBuildFinished notification
+    end
+    alt File exists in Version and is not compacting
+    Region0->>Storage Interfaces: Updates manifest and Version
+    end
+```
+
+### Task Triggering and Scheduling
+
+The process starts with one of the four `IndexBuildType` triggers. In `handle_rebuild_index`, the `RegionWorkerLoop` identifies target SSTs from the request or the current region version. It then creates an `IndexBuildTask` for each file and submits it to the `index_build_scheduler`.
+
+Similar to Flush and Compact operations, index build tasks are ultimately dispatched to the LocalScheduler. Resource usage can be adjusted via configuration files. Since asynchronous index tasks are both memory-intensive and IO-intensive but have lower priority, it is recommended to allocate fewer resources to them compared to compaction and flush tasks—for example, limiting them to 1/8 of the CPU cores. 
+
+### Index Building and Notification
+
+The scheduled `IndexBuildTask` executes its `index_build` method. It uses an `indexer_builder` to create an `Indexer` that reads SST data and builds the index. If a new index file is created (`IndexOutput.file_size > 0`), the task sends an `IndexBuildFinished` notification back to the `RegionWorkerLoop`.
+
+### Index Metadata Installation
+
+Upon receiving the `IndexBuildFinished` notification in `handle_index_build_finished`, the `RegionWorkerLoop` verifies that the file still exists in the current `version` and is not being compacted. If the check passes, it calls `manifest_ctx.update_manifest` to apply a `RegionEdit` with the new index information, completing the installation.
+
+# Drawbacks
+
+Asynchronous index building may consume extra system resources, potentially affecting overall performance during peak periods.
+
+There may be a delay before the new index becomes available for queries, which could impact certain use cases.
+
+# Unresolved Questions and Future Work
+
+**Resource Management and Throttling**: The resource consumption (CPU, I/O) of background index building can be managed and limited to some extent by configuring a dedicated background thread pool. However, this approach cannot fully eliminate resource contention, especially under heavy workloads or when I/O is highly competitive. Additional throttling mechanisms or dynamic prioritization may still be necessary to avoid impacting foreground operations.
+
+# Alternatives
+
+Instead of being triggered by events like Flush or Compact, index building could be performed in batches during scheduled maintenance windows. This offers predictable resource usage but delays index availability.
--- a/flake.nix
+++ b/flake.nix
@@ -15,8 +15,6 @@
      let
        pkgs = nixpkgs.legacyPackages.${system};
        buildInputs = with pkgs; [
-          libgit2
-          libz
        ];
        lib = nixpkgs.lib;
        rustToolchain = fenix.packages.${system}.fromToolchainName {
--- a/grafana/dashboards/metrics/cluster/dashboard.json
+++ b/grafana/dashboards/metrics/cluster/dashboard.json
--- a/grafana/dashboards/metrics/cluster/dashboard.md
+++ b/grafana/dashboards/metrics/cluster/dashboard.md
@@ -87,6 +87,13 @@
 | Other Request P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation!~"read\|write\|list\|Writer::write\|Writer::close\|Reader::read"}[$__rate_interval])))` | `timeseries` | Other Request P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
 | Opendal traffic | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_bytes_sum{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Total traffic as in bytes by instance and operation | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
 | OpenDAL errors per Instance | `sum by(instance, pod, scheme, operation, error) (rate(opendal_operation_errors_total{instance=~"$datanode", error!="NotFound"}[$__rate_interval]))` | `timeseries` | OpenDAL error counts per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]-[{{error}}]` |
+# Remote WAL
+| Title | Query | Type | Description | Datasource | Unit | Legend Format |
+| --- | --- | --- | --- | --- | --- | --- |
+| Triggered region flush total | `meta_triggered_region_flush_total` | `timeseries` | Triggered region flush total | `prometheus` | `none` | `{{pod}}-{{topic_name}}` |
+| Triggered region checkpoint total | `meta_triggered_region_checkpoint_total` | `timeseries` | Triggered region checkpoint total | `prometheus` | `none` | `{{pod}}-{{topic_name}}` |
+| Topic estimated replay size | `meta_topic_estimated_replay_size` | `timeseries` | Topic estimated max replay size | `prometheus` | `bytes` | `{{pod}}-{{topic_name}}` |
+| Kafka logstore's bytes traffic | `rate(greptime_logstore_kafka_client_bytes_total[$__rate_interval])` | `timeseries` | Kafka logstore's bytes traffic | `prometheus` | `bytes` | `{{pod}}-{{logstore}}` |
 # Metasrv
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
@@ -103,6 +110,8 @@
 | Meta KV Ops Latency | `histogram_quantile(0.99, sum by(pod, le, op, target) (greptime_meta_kv_request_elapsed_bucket))` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `s` | `{{pod}}-{{op}} p99` |
 | Rate of meta KV Ops | `rate(greptime_meta_kv_request_elapsed_count[$__rate_interval])` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `none` | `{{pod}}-{{op}} p99` |
 | DDL Latency | `histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_tables_bucket))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_table))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_view))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_flow))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_drop_table))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_alter_table))` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `s` | `CreateLogicalTables-{{step}} p90` |
+| Reconciliation stats | `greptime_meta_reconciliation_stats` | `timeseries` | Reconciliation stats | `prometheus` | `s` | `{{pod}}-{{table_type}}-{{type}}` |
+| Reconciliation steps | `histogram_quantile(0.9, greptime_meta_reconciliation_procedure_bucket)` | `timeseries` | Elapsed of Reconciliation steps  | `prometheus` | `s` | `{{procedure_name}}-{{step}}-P90` |
 # Flownode
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
--- a/grafana/dashboards/metrics/cluster/dashboard.yaml
+++ b/grafana/dashboards/metrics/cluster/dashboard.yaml
@@ -802,6 +802,48 @@ groups:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]-[{{error}}]'
+    - title: Remote WAL
+      panels:
+        - title: Triggered region flush total
+          type: timeseries
+          description: Triggered region flush total
+          unit: none
+          queries:
+            - expr: meta_triggered_region_flush_total
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{pod}}-{{topic_name}}'
+        - title: Triggered region checkpoint total
+          type: timeseries
+          description: Triggered region checkpoint total
+          unit: none
+          queries:
+            - expr: meta_triggered_region_checkpoint_total
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{pod}}-{{topic_name}}'
+        - title: Topic estimated replay size
+          type: timeseries
+          description: Topic estimated max replay size
+          unit: bytes
+          queries:
+            - expr: meta_topic_estimated_replay_size
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{pod}}-{{topic_name}}'
+        - title: Kafka logstore's bytes traffic
+          type: timeseries
+          description: Kafka logstore's bytes traffic
+          unit: bytes
+          queries:
+            - expr: rate(greptime_logstore_kafka_client_bytes_total[$__rate_interval])
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{pod}}-{{logstore}}'
    - title: Metasrv
      panels:
        - title: Region migration datanode
@@ -948,6 +990,26 @@ groups:
                type: prometheus
                uid: ${metrics}
              legendFormat: AlterTable-{{step}} p90
+        - title: Reconciliation stats
+          type: timeseries
+          description: Reconciliation stats
+          unit: s
+          queries:
+            - expr: greptime_meta_reconciliation_stats
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{pod}}-{{table_type}}-{{type}}'
+        - title: Reconciliation steps
+          type: timeseries
+          description: 'Elapsed of Reconciliation steps '
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.9, greptime_meta_reconciliation_procedure_bucket)
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{procedure_name}}-{{step}}-P90'
    - title: Flownode
      panels:
        - title: Flow Ingest / Output Rate
--- a/grafana/dashboards/metrics/standalone/dashboard.json
+++ b/grafana/dashboards/metrics/standalone/dashboard.json
--- a/grafana/dashboards/metrics/standalone/dashboard.md
+++ b/grafana/dashboards/metrics/standalone/dashboard.md
@@ -87,6 +87,13 @@
 | Other Request P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{ operation!~"read\|write\|list\|Writer::write\|Writer::close\|Reader::read"}[$__rate_interval])))` | `timeseries` | Other Request P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
 | Opendal traffic | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_bytes_sum{}[$__rate_interval]))` | `timeseries` | Total traffic as in bytes by instance and operation | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
 | OpenDAL errors per Instance | `sum by(instance, pod, scheme, operation, error) (rate(opendal_operation_errors_total{ error!="NotFound"}[$__rate_interval]))` | `timeseries` | OpenDAL error counts per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]-[{{error}}]` |
+# Remote WAL
+| Title | Query | Type | Description | Datasource | Unit | Legend Format |
+| --- | --- | --- | --- | --- | --- | --- |
+| Triggered region flush total | `meta_triggered_region_flush_total` | `timeseries` | Triggered region flush total | `prometheus` | `none` | `{{pod}}-{{topic_name}}` |
+| Triggered region checkpoint total | `meta_triggered_region_checkpoint_total` | `timeseries` | Triggered region checkpoint total | `prometheus` | `none` | `{{pod}}-{{topic_name}}` |
+| Topic estimated replay size | `meta_topic_estimated_replay_size` | `timeseries` | Topic estimated max replay size | `prometheus` | `bytes` | `{{pod}}-{{topic_name}}` |
+| Kafka logstore's bytes traffic | `rate(greptime_logstore_kafka_client_bytes_total[$__rate_interval])` | `timeseries` | Kafka logstore's bytes traffic | `prometheus` | `bytes` | `{{pod}}-{{logstore}}` |
 # Metasrv
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
@@ -103,6 +110,8 @@
 | Meta KV Ops Latency | `histogram_quantile(0.99, sum by(pod, le, op, target) (greptime_meta_kv_request_elapsed_bucket))` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `s` | `{{pod}}-{{op}} p99` |
 | Rate of meta KV Ops | `rate(greptime_meta_kv_request_elapsed_count[$__rate_interval])` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `none` | `{{pod}}-{{op}} p99` |
 | DDL Latency | `histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_tables_bucket))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_table))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_view))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_create_flow))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_drop_table))`<br/>`histogram_quantile(0.9, sum by(le, pod, step) (greptime_meta_procedure_alter_table))` | `timeseries` | Gauge of load information of each datanode, collected via heartbeat between datanode and metasrv. This information is for metasrv to schedule workloads. | `prometheus` | `s` | `CreateLogicalTables-{{step}} p90` |
+| Reconciliation stats | `greptime_meta_reconciliation_stats` | `timeseries` | Reconciliation stats | `prometheus` | `s` | `{{pod}}-{{table_type}}-{{type}}` |
+| Reconciliation steps | `histogram_quantile(0.9, greptime_meta_reconciliation_procedure_bucket)` | `timeseries` | Elapsed of Reconciliation steps  | `prometheus` | `s` | `{{procedure_name}}-{{step}}-P90` |
 # Flownode
 | Title | Query | Type | Description | Datasource | Unit | Legend Format |
 | --- | --- | --- | --- | --- | --- | --- |
--- a/grafana/dashboards/metrics/standalone/dashboard.yaml
+++ b/grafana/dashboards/metrics/standalone/dashboard.yaml
@@ -802,6 +802,48 @@ groups:
                type: prometheus
                uid: ${metrics}
              legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]-[{{error}}]'
+    - title: Remote WAL
+      panels:
+        - title: Triggered region flush total
+          type: timeseries
+          description: Triggered region flush total
+          unit: none
+          queries:
+            - expr: meta_triggered_region_flush_total
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{pod}}-{{topic_name}}'
+        - title: Triggered region checkpoint total
+          type: timeseries
+          description: Triggered region checkpoint total
+          unit: none
+          queries:
+            - expr: meta_triggered_region_checkpoint_total
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{pod}}-{{topic_name}}'
+        - title: Topic estimated replay size
+          type: timeseries
+          description: Topic estimated max replay size
+          unit: bytes
+          queries:
+            - expr: meta_topic_estimated_replay_size
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{pod}}-{{topic_name}}'
+        - title: Kafka logstore's bytes traffic
+          type: timeseries
+          description: Kafka logstore's bytes traffic
+          unit: bytes
+          queries:
+            - expr: rate(greptime_logstore_kafka_client_bytes_total[$__rate_interval])
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{pod}}-{{logstore}}'
    - title: Metasrv
      panels:
        - title: Region migration datanode
@@ -948,6 +990,26 @@ groups:
                type: prometheus
                uid: ${metrics}
              legendFormat: AlterTable-{{step}} p90
+        - title: Reconciliation stats
+          type: timeseries
+          description: Reconciliation stats
+          unit: s
+          queries:
+            - expr: greptime_meta_reconciliation_stats
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{pod}}-{{table_type}}-{{type}}'
+        - title: Reconciliation steps
+          type: timeseries
+          description: 'Elapsed of Reconciliation steps '
+          unit: s
+          queries:
+            - expr: histogram_quantile(0.9, greptime_meta_reconciliation_procedure_bucket)
+              datasource:
+                type: prometheus
+                uid: ${metrics}
+              legendFormat: '{{procedure_name}}-{{step}}-P90'
    - title: Flownode
      panels:
        - title: Flow Ingest / Output Rate
--- a/scripts/fix-udeps.py
+++ b/scripts/fix-udeps.py
@@ -0,0 +1,265 @@
+# Copyright 2023 Greptime Team
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import os
+import re
+import sys
+
+
+def load_udeps_report(report_path):
+    try:
+        with open(report_path, "r") as f:
+            return json.load(f)
+    except FileNotFoundError:
+        print(f"Error: Report file '{report_path}' not found.")
+        return None
+    except json.JSONDecodeError as e:
+        print(f"Error: Invalid JSON in report file: {e}")
+        return None
+
+
+def extract_unused_dependencies(report):
+    """
+    Extract and organize unused dependencies from the cargo-udeps JSON report.
+
+    The cargo-udeps report has this structure:
+    {
+        "unused_deps": {
+            "package_name v0.1.0 (/path/to/package)": {
+                "normal": ["dep1", "dep2"],
+                "development": ["dev_dep1"],
+                "build": ["build_dep1"],
+                "manifest_path": "/path/to/Cargo.toml"
+            }
+        }
+    }
+
+    Args:
+        report (dict): The parsed JSON report from cargo-udeps
+
+    Returns:
+        dict: Organized unused dependencies by package name:
+        {
+            "package_name": {
+                "dependencies": [("dep1", "normal"), ("dev_dep1", "dev")],
+                "manifest_path": "/path/to/Cargo.toml"
+            }
+        }
+    """
+    if not report or "unused_deps" not in report:
+        return {}
+
+    unused_deps = {}
+    for package_full_name, deps_info in report["unused_deps"].items():
+        package_name = package_full_name.split(" ")[0]
+
+        all_unused = []
+        if deps_info.get("normal"):
+            all_unused.extend([(dep, "normal") for dep in deps_info["normal"]])
+        if deps_info.get("development"):
+            all_unused.extend([(dep, "dev") for dep in deps_info["development"]])
+        if deps_info.get("build"):
+            all_unused.extend([(dep, "build") for dep in deps_info["build"]])
+
+        if all_unused:
+            unused_deps[package_name] = {
+                "dependencies": all_unused,
+                "manifest_path": deps_info.get("manifest_path", "unknown"),
+            }
+
+    return unused_deps
+
+
+def get_section_pattern(dep_type):
+    """
+    Get regex patterns to identify different dependency sections in Cargo.toml.
+
+    Args:
+        dep_type (str): Type of dependency ("normal", "dev", or "build")
+
+    Returns:
+        list: List of regex patterns to match the appropriate section headers
+
+    """
+    patterns = {
+        "normal": [r"\[dependencies\]", r"\[dependencies\..*?\]"],
+        "dev": [r"\[dev-dependencies\]", r"\[dev-dependencies\..*?\]"],
+        "build": [r"\[build-dependencies\]", r"\[build-dependencies\..*?\]"],
+    }
+    return patterns.get(dep_type, [])
+
+
+def remove_dependency_line(content, dep_name, section_start, section_end):
+    """
+    Remove a dependency line from a specific section of a Cargo.toml file.
+
+    Args:
+        content (str): The entire content of the Cargo.toml file
+        dep_name (str): Name of the dependency to remove (e.g., "serde", "tokio")
+        section_start (int): Starting position of the section in the content
+        section_end (int): Ending position of the section in the content
+
+    Returns:
+        tuple: (new_content, removed) where:
+            - new_content (str): The modified content with dependency removed
+            - removed (bool): True if dependency was found and removed, False otherwise
+
+    Example input content format:
+        content = '''
+        [package]
+        name = "my-crate"
+        version = "0.1.0"
+
+        [dependencies]
+        serde = "1.0"
+        tokio = { version = "1.0", features = ["full"] }
+        serde_json.workspace = true
+
+        [dev-dependencies]
+        tempfile = "3.0"
+        '''
+
+        # If dep_name = "serde", section_start = start of [dependencies],
+        # section_end = start of [dev-dependencies], this function will:
+        # 1. Extract the section: "serde = "1.0"\ntokio = { version = "1.0", features = ["full"] }\nserde_json.workspace = true\n"
+        # 2. Find and remove the line: "serde = "1.0""
+        # 3. Return the modified content with that line removed
+    """
+    section_content = content[section_start:section_end]
+
+    dep_patterns = [
+        rf"^{re.escape(dep_name)}\s*=.*$",  # e.g., "serde = "1.0""
+        rf"^{re.escape(dep_name)}\.workspace\s*=.*$",  # e.g., "serde_json.workspace = true"
+    ]
+
+    for pattern in dep_patterns:
+        match = re.search(pattern, section_content, re.MULTILINE)
+        if match:
+            line_start = section_start + match.start()  # Start of the matched line
+            line_end = section_start + match.end()  # End of the matched line
+
+            if line_end < len(content) and content[line_end] == "\n":
+                line_end += 1
+
+            return content[:line_start] + content[line_end:], True
+
+    return content, False
+
+
+def remove_dependency_from_toml(file_path, dep_name, dep_type):
+    """
+    Remove a specific dependency from a Cargo.toml file.
+
+    Args:
+        file_path (str): Path to the Cargo.toml file
+        dep_name (str): Name of the dependency to remove
+        dep_type (str): Type of dependency ("normal", "dev", or "build")
+
+    Returns:
+        bool: True if dependency was successfully removed, False otherwise
+    """
+    try:
+        with open(file_path, "r") as f:
+            content = f.read()
+
+        section_patterns = get_section_pattern(dep_type)
+        if not section_patterns:
+            return False
+
+        for pattern in section_patterns:
+            section_match = re.search(pattern, content, re.IGNORECASE)
+            if not section_match:
+                continue
+
+            section_start = section_match.end()
+            next_section = re.search(r"\n\s*\[", content[section_start:])
+            section_end = (
+                section_start + next_section.start() if next_section else len(content)
+            )
+
+            new_content, removed = remove_dependency_line(
+                content, dep_name, section_start, section_end
+            )
+            if removed:
+                with open(file_path, "w") as f:
+                    f.write(new_content)
+                return True
+
+        return False
+
+    except Exception as e:
+        print(f"Error processing {file_path}: {e}")
+        return False
+
+
+def process_unused_dependencies(unused_deps):
+    """
+    Process and remove all unused dependencies from their respective Cargo.toml files.
+
+    Args:
+        unused_deps (dict): Dictionary of unused dependencies organized by package:
+            {
+                "package_name": {
+                    "dependencies": [("dep1", "normal"), ("dev_dep1", "dev")],
+                    "manifest_path": "/path/to/Cargo.toml"
+                }
+            }
+
+    """
+    if not unused_deps:
+        print("No unused dependencies found.")
+        return
+
+    total_removed = 0
+    total_failed = 0
+
+    for package, info in unused_deps.items():
+        deps = info["dependencies"]
+        manifest_path = info["manifest_path"]
+
+        if not os.path.exists(manifest_path):
+            print(f"Manifest file not found: {manifest_path}")
+            total_failed += len(deps)
+            continue
+
+        for dep, dep_type in deps:
+            if remove_dependency_from_toml(manifest_path, dep, dep_type):
+                print(f"Removed {dep} from {package}")
+                total_removed += 1
+            else:
+                print(f"Failed to remove {dep} from {package}")
+                total_failed += 1
+
+    print(f"Removed {total_removed} dependencies")
+    if total_failed > 0:
+        print(f"Failed to remove {total_failed} dependencies")
+
+
+def main():
+    if len(sys.argv) > 1:
+        report_path = sys.argv[1]
+    else:
+        report_path = "udeps-report.json"
+
+    report = load_udeps_report(report_path)
+    if report is None:
+        sys.exit(1)
+
+    unused_deps = extract_unused_dependencies(report)
+    process_unused_dependencies(unused_deps)
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/generate-etcd-tls-certs.sh
+++ b/scripts/generate-etcd-tls-certs.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+# Generate TLS certificates for etcd testing
+# This script creates certificates for TLS-enabled etcd in testing environments
+
+set -euo pipefail
+
+CERT_DIR="${1:-$(dirname "$0")/../tests-integration/fixtures/etcd-tls-certs}"
+DAYS="${2:-365}"
+
+echo "Generating TLS certificates for etcd in ${CERT_DIR}..."
+
+mkdir -p "${CERT_DIR}"
+cd "${CERT_DIR}"
+
+echo "Generating CA private key..."
+openssl genrsa -out ca-key.pem 2048
+
+echo "Generating CA certificate..."
+openssl req -new -x509 -key ca-key.pem -out ca.crt -days "${DAYS}" \
+  -subj "/C=US/ST=CA/L=SF/O=Greptime/CN=etcd-ca"
+
+# Create server certificate config with Subject Alternative Names
+echo "Creating server certificate configuration..."
+cat > server.conf << 'EOF'
+[req]
+distinguished_name = req
+[v3_req]
+basicConstraints = CA:FALSE
+keyUsage = keyEncipherment, dataEncipherment
+subjectAltName = @alt_names
+[alt_names]
+DNS.1 = localhost
+DNS.2 = etcd-tls
+DNS.3 = 127.0.0.1
+IP.1 = 127.0.0.1
+IP.2 = ::1
+EOF
+
+echo "Generating server private key..."
+openssl genrsa -out server-key.pem 2048
+
+echo "Generating server certificate signing request..."
+openssl req -new -key server-key.pem -out server.csr \
+  -subj "/CN=etcd-tls"
+
+echo "Generating server certificate..."
+openssl x509 -req -in server.csr -CA ca.crt \
+  -CAkey ca-key.pem -CAcreateserial -out server.crt \
+  -days "${DAYS}" -extensions v3_req -extfile server.conf
+
+echo "Generating client private key..."
+openssl genrsa -out client-key.pem 2048
+
+echo "Generating client certificate signing request..."
+openssl req -new -key client-key.pem -out client.csr \
+  -subj "/CN=etcd-client"
+
+echo "Generating client certificate..."
+openssl x509 -req -in client.csr -CA ca.crt \
+  -CAkey ca-key.pem -CAcreateserial -out client.crt \
+  -days "${DAYS}"
+
+echo "Setting proper file permissions..."
+chmod 644 ca.crt server.crt client.crt
+chmod 600 ca-key.pem server-key.pem client-key.pem
+
+# Clean up intermediate files
+rm -f server.csr client.csr server.conf
+
+echo "TLS certificates generated successfully in ${CERT_DIR}"
--- a/src/api/Cargo.toml
+++ b/src/api/Cargo.toml
@@ -19,6 +19,3 @@ paste.workspace = true
 prost.workspace = true
 serde_json.workspace = true
 snafu.workspace = true
-
-[build-dependencies]
-tonic-build = "0.11"
--- a/src/auth/src/permission.rs
+++ b/src/auth/src/permission.rs
@@ -32,6 +32,7 @@ pub enum PermissionReq<'a> {
    PromStoreRead,
    Otlp,
    LogWrite,
+    BulkInsert,
 }

 #[derive(Debug)]
--- a/src/catalog/src/system_schema/information_schema/partitions.rs
+++ b/src/catalog/src/system_schema/information_schema/partitions.rs
@@ -26,12 +26,11 @@ use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
 use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
 use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
 use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
-use datatypes::timestamp::TimestampMicrosecond;
+use datatypes::timestamp::TimestampSecond;
 use datatypes::value::Value;
 use datatypes::vectors::{
    ConstantVector, Int64Vector, Int64VectorBuilder, MutableVector, StringVector,
-    StringVectorBuilder, TimestampMicrosecondVector, TimestampMicrosecondVectorBuilder,
-    UInt64VectorBuilder,
+    StringVectorBuilder, TimestampSecondVector, TimestampSecondVectorBuilder, UInt64VectorBuilder,
 };
 use futures::{StreamExt, TryStreamExt};
 use partition::manager::PartitionInfo;
@@ -129,17 +128,17 @@ impl InformationSchemaPartitions {
            ColumnSchema::new("data_free", ConcreteDataType::int64_datatype(), true),
            ColumnSchema::new(
                "create_time",
-                ConcreteDataType::timestamp_microsecond_datatype(),
+                ConcreteDataType::timestamp_second_datatype(),
                true,
            ),
            ColumnSchema::new(
                "update_time",
-                ConcreteDataType::timestamp_microsecond_datatype(),
+                ConcreteDataType::timestamp_second_datatype(),
                true,
            ),
            ColumnSchema::new(
                "check_time",
-                ConcreteDataType::timestamp_microsecond_datatype(),
+                ConcreteDataType::timestamp_second_datatype(),
                true,
            ),
            ColumnSchema::new("checksum", ConcreteDataType::int64_datatype(), true),
@@ -212,7 +211,7 @@ struct InformationSchemaPartitionsBuilder {
    partition_names: StringVectorBuilder,
    partition_ordinal_positions: Int64VectorBuilder,
    partition_expressions: StringVectorBuilder,
-    create_times: TimestampMicrosecondVectorBuilder,
+    create_times: TimestampSecondVectorBuilder,
    partition_ids: UInt64VectorBuilder,
 }

@@ -232,7 +231,7 @@ impl InformationSchemaPartitionsBuilder {
            partition_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
            partition_ordinal_positions: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
            partition_expressions: StringVectorBuilder::with_capacity(INIT_CAPACITY),
-            create_times: TimestampMicrosecondVectorBuilder::with_capacity(INIT_CAPACITY),
+            create_times: TimestampSecondVectorBuilder::with_capacity(INIT_CAPACITY),
            partition_ids: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
        }
    }
@@ -331,8 +330,8 @@ impl InformationSchemaPartitionsBuilder {
                .push(Some((index + 1) as i64));
            let expression = partition.partition_expr.as_ref().map(|e| e.to_string());
            self.partition_expressions.push(expression.as_deref());
-            self.create_times.push(Some(TimestampMicrosecond::from(
-                table_info.meta.created_on.timestamp_millis(),
+            self.create_times.push(Some(TimestampSecond::from(
+                table_info.meta.created_on.timestamp(),
            )));
            self.partition_ids.push(Some(partition.id.as_u64()));
        }
@@ -349,8 +348,8 @@ impl InformationSchemaPartitionsBuilder {
            Arc::new(Int64Vector::from(vec![None])),
            rows_num,
        ));
-        let null_timestampmicrosecond_vector = Arc::new(ConstantVector::new(
-            Arc::new(TimestampMicrosecondVector::from(vec![None])),
+        let null_timestamp_second_vector = Arc::new(ConstantVector::new(
+            Arc::new(TimestampSecondVector::from(vec![None])),
            rows_num,
        ));
        let partition_methods = Arc::new(ConstantVector::new(
@@ -380,8 +379,8 @@ impl InformationSchemaPartitionsBuilder {
            null_i64_vector.clone(),
            Arc::new(self.create_times.finish()),
            // TODO(dennis): supports update_time
-            null_timestampmicrosecond_vector.clone(),
-            null_timestampmicrosecond_vector,
+            null_timestamp_second_vector.clone(),
+            null_timestamp_second_vector,
            null_i64_vector,
            null_string_vector.clone(),
            null_string_vector.clone(),
--- a/src/catalog/src/system_schema/information_schema/tables.rs
+++ b/src/catalog/src/system_schema/information_schema/tables.rs
@@ -30,8 +30,7 @@ use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
 use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
 use datatypes::value::Value;
 use datatypes::vectors::{
-    StringVectorBuilder, TimestampMicrosecondVectorBuilder, UInt32VectorBuilder,
-    UInt64VectorBuilder,
+    StringVectorBuilder, TimestampSecondVectorBuilder, UInt32VectorBuilder, UInt64VectorBuilder,
 };
 use futures::TryStreamExt;
 use snafu::{OptionExt, ResultExt};
@@ -107,17 +106,17 @@ impl InformationSchemaTables {
            ColumnSchema::new(AUTO_INCREMENT, ConcreteDataType::uint64_datatype(), true),
            ColumnSchema::new(
                CREATE_TIME,
-                ConcreteDataType::timestamp_microsecond_datatype(),
+                ConcreteDataType::timestamp_second_datatype(),
                true,
            ),
            ColumnSchema::new(
                UPDATE_TIME,
-                ConcreteDataType::timestamp_microsecond_datatype(),
+                ConcreteDataType::timestamp_second_datatype(),
                true,
            ),
            ColumnSchema::new(
                CHECK_TIME,
-                ConcreteDataType::timestamp_microsecond_datatype(),
+                ConcreteDataType::timestamp_second_datatype(),
                true,
            ),
            ColumnSchema::new(TABLE_COLLATION, ConcreteDataType::string_datatype(), true),
@@ -194,9 +193,9 @@ struct InformationSchemaTablesBuilder {
    max_index_length: UInt64VectorBuilder,
    data_free: UInt64VectorBuilder,
    auto_increment: UInt64VectorBuilder,
-    create_time: TimestampMicrosecondVectorBuilder,
-    update_time: TimestampMicrosecondVectorBuilder,
-    check_time: TimestampMicrosecondVectorBuilder,
+    create_time: TimestampSecondVectorBuilder,
+    update_time: TimestampSecondVectorBuilder,
+    check_time: TimestampSecondVectorBuilder,
    table_collation: StringVectorBuilder,
    checksum: UInt64VectorBuilder,
    create_options: StringVectorBuilder,
@@ -231,9 +230,9 @@ impl InformationSchemaTablesBuilder {
            max_index_length: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
            data_free: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
            auto_increment: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
-            create_time: TimestampMicrosecondVectorBuilder::with_capacity(INIT_CAPACITY),
-            update_time: TimestampMicrosecondVectorBuilder::with_capacity(INIT_CAPACITY),
-            check_time: TimestampMicrosecondVectorBuilder::with_capacity(INIT_CAPACITY),
+            create_time: TimestampSecondVectorBuilder::with_capacity(INIT_CAPACITY),
+            update_time: TimestampSecondVectorBuilder::with_capacity(INIT_CAPACITY),
+            check_time: TimestampSecondVectorBuilder::with_capacity(INIT_CAPACITY),
            table_collation: StringVectorBuilder::with_capacity(INIT_CAPACITY),
            checksum: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
            create_options: StringVectorBuilder::with_capacity(INIT_CAPACITY),
@@ -380,7 +379,7 @@ impl InformationSchemaTablesBuilder {
        self.create_options
            .push(Some(table_info.meta.options.to_string().as_ref()));
        self.create_time
-            .push(Some(table_info.meta.created_on.timestamp_millis().into()));
+            .push(Some(table_info.meta.created_on.timestamp().into()));

        self.temporary
            .push(if matches!(table_type, TableType::Temporary) {
--- a/src/catalog/src/system_schema/predicate.rs
+++ b/src/catalog/src/system_schema/predicate.rs
@@ -133,7 +133,7 @@ impl Predicate {
                let Expr::Column(c) = *expr else {
                    unreachable!();
                };
-                let Expr::Literal(ScalarValue::Utf8(Some(pattern))) = *pattern else {
+                let Expr::Literal(ScalarValue::Utf8(Some(pattern)), _) = *pattern else {
                    unreachable!();
                };

@@ -148,8 +148,8 @@ impl Predicate {
            // left OP right
            Expr::BinaryExpr(bin) => match (*bin.left, bin.op, *bin.right) {
                // left == right
-                (Expr::Literal(scalar), Operator::Eq, Expr::Column(c))
-                | (Expr::Column(c), Operator::Eq, Expr::Literal(scalar)) => {
+                (Expr::Literal(scalar, _), Operator::Eq, Expr::Column(c))
+                | (Expr::Column(c), Operator::Eq, Expr::Literal(scalar, _)) => {
                    let Ok(v) = Value::try_from(scalar) else {
                        return None;
                    };
@@ -157,8 +157,8 @@ impl Predicate {
                    Some(Predicate::Eq(c.name, v))
                }
                // left != right
-                (Expr::Literal(scalar), Operator::NotEq, Expr::Column(c))
-                | (Expr::Column(c), Operator::NotEq, Expr::Literal(scalar)) => {
+                (Expr::Literal(scalar, _), Operator::NotEq, Expr::Column(c))
+                | (Expr::Column(c), Operator::NotEq, Expr::Literal(scalar, _)) => {
                    let Ok(v) = Value::try_from(scalar) else {
                        return None;
                    };
@@ -189,7 +189,7 @@ impl Predicate {
                        let mut values = Vec::with_capacity(list.len());
                        for scalar in list {
                            // Safety: checked by `is_all_scalars`
-                            let Expr::Literal(scalar) = scalar else {
+                            let Expr::Literal(scalar, _) = scalar else {
                                unreachable!();
                            };

@@ -237,7 +237,7 @@ fn like_utf8(s: &str, pattern: &str, case_insensitive: &bool) -> Option<bool> {
 }

 fn is_string_literal(expr: &Expr) -> bool {
-    matches!(expr, Expr::Literal(ScalarValue::Utf8(Some(_))))
+    matches!(expr, Expr::Literal(ScalarValue::Utf8(Some(_)), _))
 }

 fn is_column(expr: &Expr) -> bool {
@@ -286,14 +286,14 @@ impl Predicates {

 /// Returns true when the values are all [`DfExpr::Literal`].
 fn is_all_scalars(list: &[Expr]) -> bool {
-    list.iter().all(|v| matches!(v, Expr::Literal(_)))
+    list.iter().all(|v| matches!(v, Expr::Literal(_, _)))
 }

 #[cfg(test)]
 mod tests {
-    use datafusion::common::{Column, ScalarValue};
+    use datafusion::common::Column;
    use datafusion::logical_expr::expr::InList;
-    use datafusion::logical_expr::BinaryExpr;
+    use datafusion::logical_expr::{BinaryExpr, Literal};

    use super::*;

@@ -378,7 +378,7 @@ mod tests {
        let expr = Expr::Like(Like {
            negated: false,
            expr: Box::new(column("a")),
-            pattern: Box::new(string_literal("%abc")),
+            pattern: Box::new("%abc".lit()),
            case_insensitive: true,
            escape_char: None,
        });
@@ -405,7 +405,7 @@ mod tests {
        let expr = Expr::Like(Like {
            negated: false,
            expr: Box::new(column("a")),
-            pattern: Box::new(string_literal("%abc")),
+            pattern: Box::new("%abc".lit()),
            case_insensitive: false,
            escape_char: None,
        });
@@ -425,7 +425,7 @@ mod tests {
        let expr = Expr::Like(Like {
            negated: true,
            expr: Box::new(column("a")),
-            pattern: Box::new(string_literal("%abc")),
+            pattern: Box::new("%abc".lit()),
            case_insensitive: true,
            escape_char: None,
        });
@@ -440,10 +440,6 @@ mod tests {
        Expr::Column(Column::from_name(name))
    }

-    fn string_literal(v: &str) -> Expr {
-        Expr::Literal(ScalarValue::Utf8(Some(v.to_string())))
-    }
-
    fn match_string_value(v: &Value, expected: &str) -> bool {
        matches!(v, Value::String(bs) if bs.as_utf8() == expected)
    }
@@ -463,13 +459,13 @@ mod tests {
        let expr1 = Expr::BinaryExpr(BinaryExpr {
            left: Box::new(column("a")),
            op: Operator::Eq,
-            right: Box::new(string_literal("a_value")),
+            right: Box::new("a_value".lit()),
        });

        let expr2 = Expr::BinaryExpr(BinaryExpr {
            left: Box::new(column("b")),
            op: Operator::NotEq,
-            right: Box::new(string_literal("b_value")),
+            right: Box::new("b_value".lit()),
        });

        (expr1, expr2)
@@ -508,7 +504,7 @@ mod tests {

        let inlist_expr = Expr::InList(InList {
            expr: Box::new(column("a")),
-            list: vec![string_literal("a1"), string_literal("a2")],
+            list: vec!["a1".lit(), "a2".lit()],
            negated: false,
        });

@@ -518,7 +514,7 @@ mod tests {

        let inlist_expr = Expr::InList(InList {
            expr: Box::new(column("a")),
-            list: vec![string_literal("a1"), string_literal("a2")],
+            list: vec!["a1".lit(), "a2".lit()],
            negated: true,
        });
        let inlist_p = Predicate::from_expr(inlist_expr).unwrap();
--- a/src/catalog/src/table_source.rs
+++ b/src/catalog/src/table_source.rs
@@ -32,7 +32,7 @@ use dummy_catalog::DummyCatalogList;
 use table::TableRef;

 use crate::error::{
-    CastManagerSnafu, DatafusionSnafu, DecodePlanSnafu, GetViewCacheSnafu, ProjectViewColumnsSnafu,
+    CastManagerSnafu, DecodePlanSnafu, GetViewCacheSnafu, ProjectViewColumnsSnafu,
    QueryAccessDeniedSnafu, Result, TableNotExistSnafu, ViewInfoNotFoundSnafu,
    ViewPlanColumnsChangedSnafu,
 };
@@ -199,10 +199,10 @@ impl DfTableSourceProvider {
            logical_plan
        };

-        Ok(Arc::new(
-            ViewTable::try_new(logical_plan, Some(view_info.definition.to_string()))
-                .context(DatafusionSnafu)?,
-        ))
+        Ok(Arc::new(ViewTable::new(
+            logical_plan,
+            Some(view_info.definition.to_string()),
+        )))
    }
 }

--- a/src/cli/src/bench.rs
+++ b/src/cli/src/bench.rs
@@ -66,6 +66,9 @@ pub struct BenchTableMetadataCommand {
    #[cfg(feature = "pg_kvbackend")]
    #[clap(long)]
    postgres_addr: Option<String>,
+    #[cfg(feature = "pg_kvbackend")]
+    #[clap(long)]
+    postgres_schema: Option<String>,
    #[cfg(feature = "mysql_kvbackend")]
    #[clap(long)]
    mysql_addr: Option<String>,
--- a/src/cli/src/metadata/common.rs
+++ b/src/cli/src/metadata/common.rs
@@ -19,8 +19,9 @@ use common_error::ext::BoxedError;
 use common_meta::kv_backend::chroot::ChrootKvBackend;
 use common_meta::kv_backend::etcd::EtcdStore;
 use common_meta::kv_backend::KvBackendRef;
-use meta_srv::bootstrap::create_etcd_client;
+use meta_srv::bootstrap::create_etcd_client_with_tls;
 use meta_srv::metasrv::BackendImpl;
+use servers::tls::{TlsMode, TlsOption};

 use crate::error::{EmptyStoreAddrsSnafu, UnsupportedMemoryBackendSnafu};

@@ -55,9 +56,47 @@ pub(crate) struct StoreConfig {
    #[cfg(any(feature = "pg_kvbackend", feature = "mysql_kvbackend"))]
    #[clap(long, default_value = common_meta::kv_backend::DEFAULT_META_TABLE_NAME)]
    meta_table_name: String,
+
+    /// Optional PostgreSQL schema for metadata table (defaults to current search_path if unset).
+    #[cfg(feature = "pg_kvbackend")]
+    #[clap(long)]
+    meta_schema_name: Option<String>,
+    /// TLS mode for backend store connections (etcd, PostgreSQL, MySQL)
+    #[clap(long = "backend-tls-mode", value_enum, default_value = "disable")]
+    backend_tls_mode: TlsMode,
+
+    /// Path to TLS certificate file for backend store connections
+    #[clap(long = "backend-tls-cert-path", default_value = "")]
+    backend_tls_cert_path: String,
+
+    /// Path to TLS private key file for backend store connections
+    #[clap(long = "backend-tls-key-path", default_value = "")]
+    backend_tls_key_path: String,
+
+    /// Path to TLS CA certificate file for backend store connections
+    #[clap(long = "backend-tls-ca-cert-path", default_value = "")]
+    backend_tls_ca_cert_path: String,
+
+    /// Enable watching TLS certificate files for changes
+    #[clap(long = "backend-tls-watch")]
+    backend_tls_watch: bool,
 }

 impl StoreConfig {
+    pub fn tls_config(&self) -> Option<TlsOption> {
+        if self.backend_tls_mode != TlsMode::Disable {
+            Some(TlsOption {
+                mode: self.backend_tls_mode.clone(),
+                cert_path: self.backend_tls_cert_path.clone(),
+                key_path: self.backend_tls_key_path.clone(),
+                ca_cert_path: self.backend_tls_ca_cert_path.clone(),
+                watch: self.backend_tls_watch,
+            })
+        } else {
+            None
+        }
+    }
+
    /// Builds a [`KvBackendRef`] from the store configuration.
    pub async fn build(&self) -> Result<KvBackendRef, BoxedError> {
        let max_txn_ops = self.max_txn_ops;
@@ -67,7 +106,8 @@ impl StoreConfig {
        } else {
            let kvbackend = match self.backend {
                BackendImpl::EtcdStore => {
-                    let etcd_client = create_etcd_client(store_addrs)
+                    let tls_config = self.tls_config();
+                    let etcd_client = create_etcd_client_with_tls(store_addrs, tls_config.as_ref())
                        .await
                        .map_err(BoxedError::new)?;
                    Ok(EtcdStore::with_etcd_client(etcd_client, max_txn_ops))
@@ -75,11 +115,14 @@ impl StoreConfig {
                #[cfg(feature = "pg_kvbackend")]
                BackendImpl::PostgresStore => {
                    let table_name = &self.meta_table_name;
-                    let pool = meta_srv::bootstrap::create_postgres_pool(store_addrs, None)
+                    let tls_config = self.tls_config();
+                    let pool = meta_srv::bootstrap::create_postgres_pool(store_addrs, tls_config)
                        .await
                        .map_err(BoxedError::new)?;
+                    let schema_name = self.meta_schema_name.as_deref();
                    Ok(common_meta::kv_backend::rds::PgStore::with_pg_pool(
                        pool,
+                        schema_name,
                        table_name,
                        max_txn_ops,
                    )
--- a/src/cli/src/metadata/repair/create_table.rs
+++ b/src/cli/src/metadata/repair/create_table.rs
@@ -74,11 +74,19 @@ pub fn make_create_region_request_for_peer(
    let catalog = &create_table_expr.catalog_name;
    let schema = &create_table_expr.schema_name;
    let storage_path = region_storage_path(catalog, schema);
+    let partition_exprs = region_routes
+        .iter()
+        .map(|r| (r.region.id.region_number(), r.region.partition_expr()))
+        .collect::<HashMap<_, _>>();

    for region_number in &regions_on_this_peer {
        let region_id = RegionId::new(logical_table_id, *region_number);
-        let region_request =
-            request_builder.build_one(region_id, storage_path.clone(), &HashMap::new());
+        let region_request = request_builder.build_one(
+            region_id,
+            storage_path.clone(),
+            &HashMap::new(),
+            &partition_exprs,
+        );
        requests.push(region_request);
    }

--- a/src/client/Cargo.toml
+++ b/src/client/Cargo.toml
@@ -29,6 +29,7 @@ datatypes.workspace = true
 enum_dispatch = "0.3"
 futures.workspace = true
 futures-util.workspace = true
+humantime.workspace = true
 lazy_static.workspace = true
 moka = { workspace = true, features = ["future"] }
 parking_lot.workspace = true
@@ -38,6 +39,7 @@ query.workspace = true
 rand.workspace = true
 serde_json.workspace = true
 snafu.workspace = true
+store-api.workspace = true
 substrait.workspace = true
 tokio.workspace = true
 tokio-stream = { workspace = true, features = ["net"] }
--- a/src/client/src/client_manager.rs
+++ b/src/client/src/client_manager.rs
@@ -17,7 +17,7 @@ use std::sync::Arc;
 use std::time::Duration;

 use common_grpc::channel_manager::{ChannelConfig, ChannelManager};
-use common_meta::node_manager::{DatanodeRef, FlownodeRef, NodeManager};
+use common_meta::node_manager::{DatanodeManager, DatanodeRef, FlownodeManager, FlownodeRef};
 use common_meta::peer::Peer;
 use moka::future::{Cache, CacheBuilder};

@@ -45,7 +45,7 @@ impl Debug for NodeClients {
 }

 #[async_trait::async_trait]
-impl NodeManager for NodeClients {
+impl DatanodeManager for NodeClients {
    async fn datanode(&self, datanode: &Peer) -> DatanodeRef {
        let client = self.get_client(datanode).await;

@@ -60,7 +60,10 @@ impl NodeManager for NodeClients {
            *accept_compression,
        ))
    }
+}

+#[async_trait::async_trait]
+impl FlownodeManager for NodeClients {
    async fn flownode(&self, flownode: &Peer) -> FlownodeRef {
        let client = self.get_client(flownode).await;

--- a/src/client/src/database.rs
+++ b/src/client/src/database.rs
@@ -75,12 +75,24 @@ pub struct Database {
 }

 pub struct DatabaseClient {
+    pub addr: String,
    pub inner: GreptimeDatabaseClient<Channel>,
 }

+impl DatabaseClient {
+    /// Returns a closure that logs the error when the request fails.
+    pub fn inspect_err<'a>(&'a self, context: &'a str) -> impl Fn(&tonic::Status) + 'a {
+        let addr = &self.addr;
+        move |status| {
+            error!("Failed to {context} request, peer: {addr}, status: {status:?}");
+        }
+    }
+}
+
 fn make_database_client(client: &Client) -> Result<DatabaseClient> {
-    let (_, channel) = client.find_channel()?;
+    let (addr, channel) = client.find_channel()?;
    Ok(DatabaseClient {
+        addr,
        inner: GreptimeDatabaseClient::new(channel)
            .max_decoding_message_size(client.max_grpc_recv_message_size())
            .max_encoding_message_size(client.max_grpc_send_message_size()),
@@ -167,14 +179,19 @@ impl Database {
        requests: InsertRequests,
        hints: &[(&str, &str)],
    ) -> Result<u32> {
-        let mut client = make_database_client(&self.client)?.inner;
+        let mut client = make_database_client(&self.client)?;
        let request = self.to_rpc_request(Request::Inserts(requests));

        let mut request = tonic::Request::new(request);
        let metadata = request.metadata_mut();
        Self::put_hints(metadata, hints)?;

-        let response = client.handle(request).await?.into_inner();
+        let response = client
+            .inner
+            .handle(request)
+            .await
+            .inspect_err(client.inspect_err("insert_with_hints"))?
+            .into_inner();
        from_grpc_response(response)
    }

@@ -189,14 +206,19 @@ impl Database {
        requests: RowInsertRequests,
        hints: &[(&str, &str)],
    ) -> Result<u32> {
-        let mut client = make_database_client(&self.client)?.inner;
+        let mut client = make_database_client(&self.client)?;
        let request = self.to_rpc_request(Request::RowInserts(requests));

        let mut request = tonic::Request::new(request);
        let metadata = request.metadata_mut();
        Self::put_hints(metadata, hints)?;

-        let response = client.handle(request).await?.into_inner();
+        let response = client
+            .inner
+            .handle(request)
+            .await
+            .inspect_err(client.inspect_err("row_inserts_with_hints"))?
+            .into_inner();
        from_grpc_response(response)
    }

@@ -217,9 +239,14 @@ impl Database {

    /// Make a request to the database.
    pub async fn handle(&self, request: Request) -> Result<u32> {
-        let mut client = make_database_client(&self.client)?.inner;
+        let mut client = make_database_client(&self.client)?;
        let request = self.to_rpc_request(request);
-        let response = client.handle(request).await?.into_inner();
+        let response = client
+            .inner
+            .handle(request)
+            .await
+            .inspect_err(client.inspect_err("handle"))?
+            .into_inner();
        from_grpc_response(response)
    }

@@ -231,7 +258,7 @@ impl Database {
        max_retries: u32,
        hints: &[(&str, &str)],
    ) -> Result<u32> {
-        let mut client = make_database_client(&self.client)?.inner;
+        let mut client = make_database_client(&self.client)?;
        let mut retries = 0;

        let request = self.to_rpc_request(request);
@@ -240,7 +267,11 @@ impl Database {
            let mut tonic_request = tonic::Request::new(request.clone());
            let metadata = tonic_request.metadata_mut();
            Self::put_hints(metadata, hints)?;
-            let raw_response = client.handle(tonic_request).await;
+            let raw_response = client
+                .inner
+                .handle(tonic_request)
+                .await
+                .inspect_err(client.inspect_err("handle"));
            match (raw_response, retries < max_retries) {
                (Ok(resp), _) => return from_grpc_response(resp.into_inner()),
                (Err(err), true) => {
@@ -442,8 +473,8 @@ impl Database {
        }) = &self.ctx.auth_header
        {
            let encoded = BASE64_STANDARD.encode(format!("{username}:{password}"));
-            let value =
-                MetadataValue::from_str(&encoded).context(InvalidTonicMetadataValueSnafu)?;
+            let value = MetadataValue::from_str(&format!("Basic {encoded}"))
+                .context(InvalidTonicMetadataValueSnafu)?;
            request.metadata_mut().insert("x-greptime-auth", value);
        }

--- a/src/client/src/error.rs
+++ b/src/client/src/error.rs
@@ -133,6 +133,13 @@ pub enum Error {
        #[snafu(implicit)]
        location: Location,
    },
+
+    #[snafu(display("External error"))]
+    External {
+        #[snafu(implicit)]
+        location: Location,
+        source: BoxedError,
+    },
 }

 pub type Result<T> = std::result::Result<T, Error>;
@@ -154,6 +161,7 @@ impl ErrorExt for Error {
            Error::IllegalGrpcClientState { .. } => StatusCode::Unexpected,
            Error::InvalidTonicMetadataValue { .. } => StatusCode::InvalidArguments,
            Error::ConvertSchema { source, .. } => source.status_code(),
+            Error::External { source, .. } => source.status_code(),
        }
    }

--- a/src/client/src/inserter.rs
+++ b/src/client/src/inserter.rs
@@ -0,0 +1,68 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::time::Duration;
+
+use api::v1::RowInsertRequests;
+use humantime::format_duration;
+use store_api::mito_engine_options::{APPEND_MODE_KEY, TTL_KEY, TWCS_TIME_WINDOW};
+
+use crate::error::Result;
+
+/// Context holds the catalog and schema information.
+pub struct Context<'a> {
+    /// The catalog name.
+    pub catalog: &'a str,
+    /// The schema name.
+    pub schema: &'a str,
+}
+
+/// Options for insert operations.
+#[derive(Debug, Clone, Copy)]
+pub struct InsertOptions {
+    /// Time-to-live for the inserted data.
+    pub ttl: Duration,
+    /// Whether to use append mode for the insert.
+    pub append_mode: bool,
+    /// Time window for twcs compaction.
+    pub twcs_compaction_time_window: Option<Duration>,
+}
+
+impl InsertOptions {
+    /// Converts the insert options to a list of key-value string hints.
+    pub fn to_hints(&self) -> Vec<(&'static str, String)> {
+        let mut hints = vec![
+            (TTL_KEY, format_duration(self.ttl).to_string()),
+            (APPEND_MODE_KEY, self.append_mode.to_string()),
+        ];
+
+        if let Some(time_window) = self.twcs_compaction_time_window {
+            hints.push((TWCS_TIME_WINDOW, format_duration(time_window).to_string()));
+        }
+
+        hints
+    }
+}
+
+/// [`Inserter`] allows different components to share a unified mechanism for inserting data.
+///
+/// An implementation may perform the insert locally (e.g., via a direct procedure call) or
+/// delegate/forward it to another node for processing (e.g., MetaSrv forwarding to an
+/// available Frontend).
+#[async_trait::async_trait]
+pub trait Inserter: Send + Sync {
+    async fn insert_rows(&self, context: &Context<'_>, requests: RowInsertRequests) -> Result<()>;
+
+    fn set_options(&mut self, options: &InsertOptions);
+}
--- a/src/client/src/lib.rs
+++ b/src/client/src/lib.rs
@@ -19,6 +19,7 @@ pub mod client_manager;
 pub mod database;
 pub mod error;
 pub mod flow;
+pub mod inserter;
 pub mod load_balance;
 mod metrics;
 pub mod region;
--- a/src/cmd/Cargo.toml
+++ b/src/cmd/Cargo.toml
@@ -103,3 +103,6 @@ tempfile.workspace = true

 [target.'cfg(not(windows))'.dev-dependencies]
 rexpect = "0.5"
+
+[package.metadata.cargo-udeps.ignore]
+development = ["rexpect"]
--- a/src/cmd/src/flownode.rs
+++ b/src/cmd/src/flownode.rs
@@ -376,7 +376,8 @@ impl StartCommand {
            flow_auth_header,
            opts.query.clone(),
            opts.flow.batching_mode.clone(),
-        );
+        )
+        .context(StartFlownodeSnafu)?;
        let frontend_client = Arc::new(frontend_client);
        let flownode_builder = FlownodeBuilder::new(
            opts.clone(),
--- a/src/cmd/src/frontend.rs
+++ b/src/cmd/src/frontend.rs
@@ -41,6 +41,7 @@ use frontend::server::Services;
 use meta_client::{MetaClientOptions, MetaClientType};
 use servers::addrs;
 use servers::export_metrics::ExportMetricsTask;
+use servers::grpc::GrpcOptions;
 use servers::tls::{TlsMode, TlsOption};
 use snafu::{OptionExt, ResultExt};
 use tracing_appender::non_blocking::WorkerGuard;
@@ -144,6 +145,14 @@ pub struct StartCommand {
    /// on the host, with the same port number as the one specified in `rpc_bind_addr`.
    #[clap(long, alias = "rpc-hostname")]
    rpc_server_addr: Option<String>,
+    /// The address to bind the internal gRPC server.
+    #[clap(long, alias = "internal-rpc-addr")]
+    internal_rpc_bind_addr: Option<String>,
+    /// The address advertised to the metasrv, and used for connections from outside the host.
+    /// If left empty or unset, the server will automatically use the IP address of the first network interface
+    /// on the host, with the same port number as the one specified in `internal_rpc_bind_addr`.
+    #[clap(long, alias = "internal-rpc-hostname")]
+    internal_rpc_server_addr: Option<String>,
    #[clap(long)]
    http_addr: Option<String>,
    #[clap(long)]
@@ -241,6 +250,31 @@ impl StartCommand {
            opts.grpc.server_addr.clone_from(addr);
        }

+        if let Some(addr) = &self.internal_rpc_bind_addr {
+            if let Some(internal_grpc) = &mut opts.internal_grpc {
+                internal_grpc.bind_addr = addr.to_string();
+            } else {
+                let grpc_options = GrpcOptions {
+                    bind_addr: addr.to_string(),
+                    ..Default::default()
+                };
+
+                opts.internal_grpc = Some(grpc_options);
+            }
+        }
+
+        if let Some(addr) = &self.internal_rpc_server_addr {
+            if let Some(internal_grpc) = &mut opts.internal_grpc {
+                internal_grpc.server_addr = addr.to_string();
+            } else {
+                let grpc_options = GrpcOptions {
+                    server_addr: addr.to_string(),
+                    ..Default::default()
+                };
+                opts.internal_grpc = Some(grpc_options);
+            }
+        }
+
        if let Some(addr) = &self.mysql_addr {
            opts.mysql.enable = true;
            opts.mysql.addr.clone_from(addr);
@@ -448,6 +482,8 @@ mod tests {
            http_addr: Some("127.0.0.1:1234".to_string()),
            mysql_addr: Some("127.0.0.1:5678".to_string()),
            postgres_addr: Some("127.0.0.1:5432".to_string()),
+            internal_rpc_bind_addr: Some("127.0.0.1:4010".to_string()),
+            internal_rpc_server_addr: Some("10.0.0.24:4010".to_string()),
            influxdb_enable: Some(false),
            disable_dashboard: Some(false),
            ..Default::default()
@@ -460,6 +496,10 @@ mod tests {
        assert_eq!(opts.mysql.addr, "127.0.0.1:5678");
        assert_eq!(opts.postgres.addr, "127.0.0.1:5432");

+        let internal_grpc = opts.internal_grpc.as_ref().unwrap();
+        assert_eq!(internal_grpc.bind_addr, "127.0.0.1:4010");
+        assert_eq!(internal_grpc.server_addr, "10.0.0.24:4010");
+
        let default_opts = FrontendOptions::default().component;

        assert_eq!(opts.grpc.bind_addr, default_opts.grpc.bind_addr);
--- a/src/cmd/src/standalone.rs
+++ b/src/cmd/src/standalone.rs
@@ -834,6 +834,7 @@ impl InformationExtension for StandaloneInformationExtension {
                    region_manifest: region_stat.manifest.into(),
                    data_topic_latest_entry_id: region_stat.data_topic_latest_entry_id,
                    metadata_topic_latest_entry_id: region_stat.metadata_topic_latest_entry_id,
+                    write_bytes: 0,
                }
            })
            .collect::<Vec<_>>();
--- a/src/cmd/tests/load_config_test.rs
+++ b/src/cmd/tests/load_config_test.rs
@@ -146,6 +146,7 @@ fn test_load_frontend_example_config() {
            grpc: GrpcOptions::default()
                .with_bind_addr("127.0.0.1:4001")
                .with_server_addr("127.0.0.1:4001"),
+            internal_grpc: Some(GrpcOptions::internal_default()),
            http: HttpOptions {
                cors_allowed_origins: vec!["https://example.com".to_string()],
                ..Default::default()
@@ -198,6 +199,7 @@ fn test_load_metasrv_example_config() {
                ca_cert_path: String::new(),
                watch: false,
            }),
+            meta_schema_name: Some("greptime_schema".to_string()),
            ..Default::default()
        },
        ..Default::default()
--- a/src/common/base/src/lib.rs
+++ b/src/common/base/src/lib.rs
@@ -20,6 +20,7 @@ pub mod range_read;
 #[allow(clippy::all)]
 pub mod readable_size;
 pub mod secrets;
+pub mod serde;

 pub type AffectedRows = usize;

--- a/src/common/base/src/serde.rs
+++ b/src/common/base/src/serde.rs
@@ -0,0 +1,31 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use serde::{Deserialize, Deserializer};
+
+/// Deserialize an empty string as the default value.
+pub fn empty_string_as_default<'de, D, T>(deserializer: D) -> Result<T, D::Error>
+where
+    D: Deserializer<'de>,
+    T: Default + Deserialize<'de>,
+{
+    let s = String::deserialize(deserializer)?;
+
+    if s.is_empty() {
+        Ok(T::default())
+    } else {
+        T::deserialize(serde::de::value::StringDeserializer::<D::Error>::new(s))
+            .map_err(serde::de::Error::custom)
+    }
+}
--- a/src/common/datasource/Cargo.toml
+++ b/src/common/datasource/Cargo.toml
@@ -25,19 +25,17 @@ common-error.workspace = true
 common-macro.workspace = true
 common-recordbatch.workspace = true
 common-runtime.workspace = true
+common-telemetry.workspace = true
 datafusion.workspace = true
+datafusion-orc.workspace = true
 datatypes.workspace = true
-derive_builder.workspace = true
 futures.workspace = true
 lazy_static.workspace = true
 object-store.workspace = true
 object_store_opendal.workspace = true
-orc-rust = { git = "https://github.com/datafusion-contrib/orc-rust", rev = "3134cab581a8e91b942d6a23aca2916ea965f6bb", default-features = false, features = [
-    "async",
-] }
+orc-rust = { version = "0.6.3", default-features = false, features = ["async"] }
 parquet.workspace = true
 paste.workspace = true
-rand.workspace = true
 regex = "1.7"
 serde.workspace = true
 snafu.workspace = true
@@ -47,6 +45,4 @@ tokio-util.workspace = true
 url = "2.3"

 [dev-dependencies]
-common-telemetry.workspace = true
 common-test-util.workspace = true
-uuid.workspace = true
--- a/src/common/datasource/src/file_format/orc.rs
+++ b/src/common/datasource/src/file_format/orc.rs
@@ -12,16 +12,11 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use std::sync::Arc;
-
-use arrow_schema::{ArrowError, Schema, SchemaRef};
+use arrow_schema::Schema;
 use async_trait::async_trait;
 use bytes::Bytes;
-use common_recordbatch::adapter::RecordBatchStreamTypeAdapter;
-use datafusion::datasource::physical_plan::{FileMeta, FileOpenFuture, FileOpener};
-use datafusion::error::{DataFusionError, Result as DfResult};
 use futures::future::BoxFuture;
-use futures::{FutureExt, StreamExt, TryStreamExt};
+use futures::FutureExt;
 use object_store::ObjectStore;
 use orc_rust::arrow_reader::ArrowReaderBuilder;
 use orc_rust::async_arrow_reader::ArrowStreamReader;
@@ -97,67 +92,6 @@ impl FileFormat for OrcFormat {
    }
 }

-#[derive(Debug, Clone)]
-pub struct OrcOpener {
-    object_store: Arc<ObjectStore>,
-    output_schema: SchemaRef,
-    projection: Option<Vec<usize>>,
-}
-
-impl OrcOpener {
-    pub fn new(
-        object_store: ObjectStore,
-        output_schema: SchemaRef,
-        projection: Option<Vec<usize>>,
-    ) -> Self {
-        Self {
-            object_store: Arc::from(object_store),
-            output_schema,
-            projection,
-        }
-    }
-}
-
-impl FileOpener for OrcOpener {
-    fn open(&self, meta: FileMeta) -> DfResult<FileOpenFuture> {
-        let object_store = self.object_store.clone();
-        let projected_schema = if let Some(projection) = &self.projection {
-            let projected_schema = self
-                .output_schema
-                .project(projection)
-                .map_err(|e| DataFusionError::External(Box::new(e)))?;
-            Arc::new(projected_schema)
-        } else {
-            self.output_schema.clone()
-        };
-        let projection = self.projection.clone();
-        Ok(Box::pin(async move {
-            let path = meta.location().to_string();
-
-            let meta = object_store
-                .stat(&path)
-                .await
-                .map_err(|e| DataFusionError::External(Box::new(e)))?;
-
-            let reader = object_store
-                .reader(&path)
-                .await
-                .map_err(|e| DataFusionError::External(Box::new(e)))?;
-
-            let stream_reader =
-                new_orc_stream_reader(ReaderAdapter::new(reader, meta.content_length()))
-                    .await
-                    .map_err(|e| DataFusionError::External(Box::new(e)))?;
-
-            let stream =
-                RecordBatchStreamTypeAdapter::new(projected_schema, stream_reader, projection);
-
-            let adopted = stream.map_err(|e| ArrowError::ExternalError(Box::new(e)));
-            Ok(adopted.boxed())
-        }))
-    }
-}
-
 #[cfg(test)]
 mod tests {
    use common_test_util::find_workspace_path;
--- a/src/common/datasource/src/file_format/parquet.rs
+++ b/src/common/datasource/src/file_format/parquet.rs
@@ -31,6 +31,7 @@ use datatypes::schema::SchemaRef;
 use futures::future::BoxFuture;
 use futures::StreamExt;
 use object_store::{FuturesAsyncReader, ObjectStore};
+use parquet::arrow::arrow_reader::ArrowReaderOptions;
 use parquet::arrow::AsyncArrowWriter;
 use parquet::basic::{Compression, Encoding, ZstdLevel};
 use parquet::file::properties::{WriterProperties, WriterPropertiesBuilder};
@@ -65,7 +66,7 @@ impl FileFormat for ParquetFormat {
            .compat();

        let metadata = reader
-            .get_metadata()
+            .get_metadata(None)
            .await
            .context(error::ReadParquetSnafuSnafu)?;

@@ -146,7 +147,7 @@ impl LazyParquetFileReader {
 impl AsyncFileReader for LazyParquetFileReader {
    fn get_bytes(
        &mut self,
-        range: std::ops::Range<usize>,
+        range: std::ops::Range<u64>,
    ) -> BoxFuture<'_, ParquetResult<bytes::Bytes>> {
        Box::pin(async move {
            self.maybe_initialize()
@@ -157,13 +158,16 @@ impl AsyncFileReader for LazyParquetFileReader {
        })
    }

-    fn get_metadata(&mut self) -> BoxFuture<'_, ParquetResult<Arc<ParquetMetaData>>> {
+    fn get_metadata<'a>(
+        &'a mut self,
+        options: Option<&'a ArrowReaderOptions>,
+    ) -> BoxFuture<'a, parquet::errors::Result<Arc<ParquetMetaData>>> {
        Box::pin(async move {
            self.maybe_initialize()
                .await
                .map_err(|e| ParquetError::External(Box::new(e)))?;
            // Safety: Must initialized
-            self.reader.as_mut().unwrap().get_metadata().await
+            self.reader.as_mut().unwrap().get_metadata(options).await
        })
    }
 }
@@ -192,7 +196,10 @@ pub async fn stream_to_parquet(
    concurrency: usize,
 ) -> Result<usize> {
    let write_props = column_wise_config(
-        WriterProperties::builder().set_compression(Compression::ZSTD(ZstdLevel::default())),
+        WriterProperties::builder()
+            .set_compression(Compression::ZSTD(ZstdLevel::default()))
+            .set_statistics_truncate_length(None)
+            .set_column_index_truncate_length(None),
        schema,
    )
    .build();
--- a/src/common/datasource/src/file_format/tests.rs
+++ b/src/common/datasource/src/file_format/tests.rs
@@ -19,35 +19,39 @@ use std::vec;

 use common_test_util::find_workspace_path;
 use datafusion::assert_batches_eq;
-use datafusion::datasource::file_format::file_compression_type::FileCompressionType;
 use datafusion::datasource::physical_plan::{
-    CsvConfig, CsvOpener, FileOpener, FileScanConfig, FileStream, JsonOpener, ParquetExec,
+    CsvSource, FileScanConfig, FileSource, FileStream, JsonSource, ParquetSource,
 };
+use datafusion::datasource::source::DataSourceExec;
 use datafusion::execution::context::TaskContext;
 use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
 use datafusion::physical_plan::ExecutionPlan;
 use datafusion::prelude::SessionContext;
+use datafusion_orc::OrcSource;
 use futures::StreamExt;
+use object_store::ObjectStore;

 use super::FORMAT_TYPE;
-use crate::file_format::orc::{OrcFormat, OrcOpener};
 use crate::file_format::parquet::DefaultParquetFileReaderFactory;
-use crate::file_format::{FileFormat, Format};
+use crate::file_format::{FileFormat, Format, OrcFormat};
 use crate::test_util::{scan_config, test_basic_schema, test_store};
 use crate::{error, test_util};

-struct Test<'a, T: FileOpener> {
+struct Test<'a> {
    config: FileScanConfig,
-    opener: T,
+    file_source: Arc<dyn FileSource>,
    expected: Vec<&'a str>,
 }

-impl<T: FileOpener> Test<'_, T> {
-    pub async fn run(self) {
+impl Test<'_> {
+    async fn run(self, store: &ObjectStore) {
+        let store = Arc::new(object_store_opendal::OpendalStore::new(store.clone()));
+        let file_opener = self.file_source.create_file_opener(store, &self.config, 0);
+
        let result = FileStream::new(
            &self.config,
            0,
-            self.opener,
+            file_opener,
            &ExecutionPlanMetricsSet::new(),
        )
        .unwrap()
@@ -62,26 +66,16 @@ impl<T: FileOpener> Test<'_, T> {
 #[tokio::test]
 async fn test_json_opener() {
    let store = test_store("/");
-    let store = Arc::new(object_store_opendal::OpendalStore::new(store));
-
    let schema = test_basic_schema();
-
-    let json_opener = || {
-        JsonOpener::new(
-            test_util::TEST_BATCH_SIZE,
-            schema.clone(),
-            FileCompressionType::UNCOMPRESSED,
-            store.clone(),
-        )
-    };
+    let file_source = Arc::new(JsonSource::new()).with_batch_size(test_util::TEST_BATCH_SIZE);

    let path = &find_workspace_path("/src/common/datasource/tests/json/basic.json")
        .display()
        .to_string();
    let tests = [
        Test {
-            config: scan_config(schema.clone(), None, path),
-            opener: json_opener(),
+            config: scan_config(schema.clone(), None, path, file_source.clone()),
+            file_source: file_source.clone(),
            expected: vec![
                "+-----+-------+",
                "| num | str   |",
@@ -93,8 +87,8 @@ async fn test_json_opener() {
            ],
        },
        Test {
-            config: scan_config(schema.clone(), Some(1), path),
-            opener: json_opener(),
+            config: scan_config(schema, Some(1), path, file_source.clone()),
+            file_source,
            expected: vec![
                "+-----+------+",
                "| num | str  |",
@@ -106,37 +100,26 @@ async fn test_json_opener() {
    ];

    for test in tests {
-        test.run().await;
+        test.run(&store).await;
    }
 }

 #[tokio::test]
 async fn test_csv_opener() {
    let store = test_store("/");
-    let store = Arc::new(object_store_opendal::OpendalStore::new(store));
-
    let schema = test_basic_schema();
    let path = &find_workspace_path("/src/common/datasource/tests/csv/basic.csv")
        .display()
        .to_string();
-    let csv_config = Arc::new(CsvConfig::new(
-        test_util::TEST_BATCH_SIZE,
-        schema.clone(),
-        None,
-        true,
-        b',',
-        b'"',
-        None,
-        store,
-        None,
-    ));

-    let csv_opener = || CsvOpener::new(csv_config.clone(), FileCompressionType::UNCOMPRESSED);
+    let file_source = CsvSource::new(true, b',', b'"')
+        .with_batch_size(test_util::TEST_BATCH_SIZE)
+        .with_schema(schema.clone());

    let tests = [
        Test {
-            config: scan_config(schema.clone(), None, path),
-            opener: csv_opener(),
+            config: scan_config(schema.clone(), None, path, file_source.clone()),
+            file_source: file_source.clone(),
            expected: vec![
                "+-----+-------+",
                "| num | str   |",
@@ -148,8 +131,8 @@ async fn test_csv_opener() {
            ],
        },
        Test {
-            config: scan_config(schema.clone(), Some(1), path),
-            opener: csv_opener(),
+            config: scan_config(schema, Some(1), path, file_source.clone()),
+            file_source,
            expected: vec![
                "+-----+------+",
                "| num | str  |",
@@ -161,7 +144,7 @@ async fn test_csv_opener() {
    ];

    for test in tests {
-        test.run().await;
+        test.run(&store).await;
    }
 }

@@ -174,12 +157,12 @@ async fn test_parquet_exec() {
    let path = &find_workspace_path("/src/common/datasource/tests/parquet/basic.parquet")
        .display()
        .to_string();
-    let base_config = scan_config(schema.clone(), None, path);

-    let exec = ParquetExec::builder(base_config)
-        .with_parquet_file_reader_factory(Arc::new(DefaultParquetFileReaderFactory::new(store)))
-        .build();
+    let parquet_source = ParquetSource::default()
+        .with_parquet_file_reader_factory(Arc::new(DefaultParquetFileReaderFactory::new(store)));

+    let config = scan_config(schema, None, path, Arc::new(parquet_source));
+    let exec = DataSourceExec::from_data_source(config);
    let ctx = SessionContext::new();

    let context = Arc::new(TaskContext::from(&ctx));
@@ -208,20 +191,18 @@ async fn test_parquet_exec() {

 #[tokio::test]
 async fn test_orc_opener() {
-    let root = find_workspace_path("/src/common/datasource/tests/orc")
+    let path = &find_workspace_path("/src/common/datasource/tests/orc/test.orc")
        .display()
        .to_string();
-    let store = test_store(&root);
-    let schema = OrcFormat.infer_schema(&store, "test.orc").await.unwrap();
-    let schema = Arc::new(schema);

-    let orc_opener = OrcOpener::new(store.clone(), schema.clone(), None);
-    let path = "test.orc";
+    let store = test_store("/");
+    let schema = Arc::new(OrcFormat.infer_schema(&store, path).await.unwrap());
+    let file_source = Arc::new(OrcSource::default());

    let tests = [
        Test {
-            config: scan_config(schema.clone(), None, path),
-            opener: orc_opener.clone(),
+            config: scan_config(schema.clone(), None, path, file_source.clone()),
+            file_source: file_source.clone(),
            expected: vec![
            "+----------+-----+-------+------------+-----+-----+-------+--------------------+------------------------+-----------+---------------+------------+----------------+---------------+-------------------+--------------+---------------+---------------+----------------------------+-------------+",
            "| double_a | a   | b     | str_direct | d   | e   | f     | int_short_repeated | int_neg_short_repeated | int_delta | int_neg_delta | int_direct | int_neg_direct | bigint_direct | bigint_neg_direct | bigint_other | utf8_increase | utf8_decrease | timestamp_simple           | date_simple |",
@@ -235,8 +216,8 @@ async fn test_orc_opener() {
            ],
        },
        Test {
-            config: scan_config(schema.clone(), Some(1), path),
-            opener: orc_opener.clone(),
+            config: scan_config(schema.clone(), Some(1), path, file_source.clone()),
+            file_source,
            expected: vec![
                "+----------+-----+------+------------+---+-----+-------+--------------------+------------------------+-----------+---------------+------------+----------------+---------------+-------------------+--------------+---------------+---------------+-------------------------+-------------+",
                "| double_a | a   | b    | str_direct | d | e   | f     | int_short_repeated | int_neg_short_repeated | int_delta | int_neg_delta | int_direct | int_neg_direct | bigint_direct | bigint_neg_direct | bigint_other | utf8_increase | utf8_decrease | timestamp_simple        | date_simple |",
@@ -248,7 +229,7 @@ async fn test_orc_opener() {
    ];

    for test in tests {
-        test.run().await;
+        test.run(&store).await;
    }
 }

--- a/src/common/datasource/src/test_util.rs
+++ b/src/common/datasource/src/test_util.rs
@@ -16,12 +16,12 @@ use std::sync::Arc;

 use arrow_schema::{DataType, Field, Schema, SchemaRef};
 use common_test_util::temp_dir::{create_temp_dir, TempDir};
-use datafusion::common::{Constraints, Statistics};
 use datafusion::datasource::file_format::file_compression_type::FileCompressionType;
 use datafusion::datasource::listing::PartitionedFile;
 use datafusion::datasource::object_store::ObjectStoreUrl;
 use datafusion::datasource::physical_plan::{
-    CsvConfig, CsvOpener, FileScanConfig, FileStream, JsonOpener,
+    CsvSource, FileGroup, FileScanConfig, FileScanConfigBuilder, FileSource, FileStream,
+    JsonOpener, JsonSource,
 };
 use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
 use object_store::services::Fs;
@@ -68,21 +68,20 @@ pub fn test_basic_schema() -> SchemaRef {
    Arc::new(schema)
 }

-pub fn scan_config(file_schema: SchemaRef, limit: Option<usize>, filename: &str) -> FileScanConfig {
+pub(crate) fn scan_config(
+    file_schema: SchemaRef,
+    limit: Option<usize>,
+    filename: &str,
+    file_source: Arc<dyn FileSource>,
+) -> FileScanConfig {
    // object_store only recognize the Unix style path, so make it happy.
    let filename = &filename.replace('\\', "/");
-    let statistics = Statistics::new_unknown(file_schema.as_ref());
-    FileScanConfig {
-        object_store_url: ObjectStoreUrl::parse("empty://").unwrap(), // won't be used
-        file_schema,
-        file_groups: vec![vec![PartitionedFile::new(filename.to_string(), 10)]],
-        constraints: Constraints::empty(),
-        statistics,
-        projection: None,
-        limit,
-        table_partition_cols: vec![],
-        output_ordering: vec![],
-    }
+    let file_group = FileGroup::new(vec![PartitionedFile::new(filename.to_string(), 4096)]);
+
+    FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), file_schema, file_source)
+        .with_file_group(file_group)
+        .with_limit(limit)
+        .build()
 }

 pub async fn setup_stream_to_json_test(origin_path: &str, threshold: impl Fn(usize) -> usize) {
@@ -99,9 +98,14 @@ pub async fn setup_stream_to_json_test(origin_path: &str, threshold: impl Fn(usi

    let size = store.read(origin_path).await.unwrap().len();

-    let config = scan_config(schema.clone(), None, origin_path);
-
-    let stream = FileStream::new(&config, 0, json_opener, &ExecutionPlanMetricsSet::new()).unwrap();
+    let config = scan_config(schema, None, origin_path, Arc::new(JsonSource::new()));
+    let stream = FileStream::new(
+        &config,
+        0,
+        Arc::new(json_opener),
+        &ExecutionPlanMetricsSet::new(),
+    )
+    .unwrap();

    let (tmp_store, dir) = test_tmp_store("test_stream_to_json");

@@ -127,24 +131,17 @@ pub async fn setup_stream_to_csv_test(origin_path: &str, threshold: impl Fn(usiz

    let schema = test_basic_schema();

-    let csv_config = Arc::new(CsvConfig::new(
-        TEST_BATCH_SIZE,
-        schema.clone(),
-        None,
-        true,
-        b',',
-        b'"',
-        None,
-        Arc::new(object_store_opendal::OpendalStore::new(store.clone())),
-        None,
-    ));
-
-    let csv_opener = CsvOpener::new(csv_config, FileCompressionType::UNCOMPRESSED);
-
+    let csv_source = CsvSource::new(true, b',', b'"')
+        .with_schema(schema.clone())
+        .with_batch_size(TEST_BATCH_SIZE);
+    let config = scan_config(schema, None, origin_path, csv_source.clone());
    let size = store.read(origin_path).await.unwrap().len();

-    let config = scan_config(schema.clone(), None, origin_path);
-
+    let csv_opener = csv_source.create_file_opener(
+        Arc::new(object_store_opendal::OpendalStore::new(store.clone())),
+        &config,
+        0,
+    );
    let stream = FileStream::new(&config, 0, csv_opener, &ExecutionPlanMetricsSet::new()).unwrap();

    let (tmp_store, dir) = test_tmp_store("test_stream_to_csv");
--- a/src/common/error/src/status_code.rs
+++ b/src/common/error/src/status_code.rs
@@ -251,7 +251,6 @@ macro_rules! define_from_tonic_status {
                        .get(key)
                        .and_then(|v| String::from_utf8(v.as_bytes().to_vec()).ok())
                }
-
                let code = metadata_value(&e, $crate::GREPTIME_DB_HEADER_ERROR_CODE)
                    .and_then(|s| {
                        if let Ok(code) = s.parse::<u32>() {
@@ -290,6 +289,8 @@ macro_rules! define_into_tonic_status {
                use tonic::metadata::MetadataMap;
                use $crate::GREPTIME_DB_HEADER_ERROR_CODE;

+                common_telemetry::error!(err; "Failed to handle request");
+
                let mut headers = HeaderMap::<HeaderValue>::with_capacity(2);

                // If either of the status_code or error msg cannot convert to valid HTTP header value
--- a/src/common/event-recorder/src/error.rs
+++ b/src/common/event-recorder/src/error.rs
@@ -22,12 +22,6 @@ use snafu::{Location, Snafu};
 #[snafu(visibility(pub))]
 #[stack_trace_debug]
 pub enum Error {
-    #[snafu(display("No available frontend"))]
-    NoAvailableFrontend {
-        #[snafu(implicit)]
-        location: Location,
-    },
-
    #[snafu(display("Mismatched schema, expected: {:?}, actual: {:?}", expected, actual))]
    MismatchedSchema {
        #[snafu(implicit)]
@@ -69,9 +63,7 @@ impl ErrorExt for Error {
            Error::MismatchedSchema { .. } | Error::SerializeEvent { .. } => {
                StatusCode::InvalidArguments
            }
-            Error::NoAvailableFrontend { .. }
-            | Error::InsertEvents { .. }
-            | Error::KvBackend { .. } => StatusCode::Internal,
+            Error::InsertEvents { .. } | Error::KvBackend { .. } => StatusCode::Internal,
        }
    }

--- a/src/common/event-recorder/src/recorder.rs
+++ b/src/common/event-recorder/src/recorder.rs
@@ -56,6 +56,8 @@ pub type EventRecorderRef = Arc<dyn EventRecorder>;
 pub const DEFAULT_FLUSH_INTERVAL_SECONDS: Duration = Duration::from_secs(5);
 /// The default TTL(90 days) for the events table.
 const DEFAULT_EVENTS_TABLE_TTL: Duration = Duration::from_days(90);
+/// The default compaction time window for the events table.
+pub const DEFAULT_COMPACTION_TIME_WINDOW: Duration = Duration::from_days(1);
 // The capacity of the tokio channel for transmitting events to background processor.
 const DEFAULT_CHANNEL_SIZE: usize = 2048;
 // The size of the buffer for batching events before flushing to event handler.
@@ -236,11 +238,6 @@ pub trait EventHandler: Send + Sync + 'static {
    /// Processes and handles incoming events. The [DefaultEventHandlerImpl] implementation forwards events to frontend instances for persistence.
    /// We use `&[Box<dyn Event>]` to avoid consuming the events, so the caller can buffer the events and retry if the handler fails.
    async fn handle(&self, events: &[Box<dyn Event>]) -> Result<()>;
-
-    /// Returns the handler options for the event type. We can use different options for different event types.
-    fn options(&self, _event_type: &str) -> EventHandlerOptions {
-        EventHandlerOptions::default()
-    }
 }

 /// Configuration options for the event recorder.
--- a/src/common/function/Cargo.toml
+++ b/src/common/function/Cargo.toml
@@ -57,7 +57,6 @@ serde_json.workspace = true
 session.workspace = true
 snafu.workspace = true
 sql.workspace = true
-statrs = "0.16"
 store-api.workspace = true
 table.workspace = true
 uddsketch = { git = "https://github.com/GreptimeTeam/timescaledb-toolkit.git", rev = "84828fe8fb494a6a61412a3da96517fc80f7bb20" }
--- a/src/common/function/src/admin.rs
+++ b/src/common/function/src/admin.rs
@@ -21,8 +21,6 @@ mod reconcile_database;
 mod reconcile_table;
 mod remove_region_follower;

-use std::sync::Arc;
-
 use add_region_follower::AddRegionFollowerFunction;
 use flush_compact_region::{CompactRegionFunction, FlushRegionFunction};
 use flush_compact_table::{CompactTableFunction, FlushTableFunction};
@@ -35,22 +33,22 @@ use remove_region_follower::RemoveRegionFollowerFunction;
 use crate::flush_flow::FlushFlowFunction;
 use crate::function_registry::FunctionRegistry;

-/// Table functions
+/// Administration functions
 pub(crate) struct AdminFunction;

 impl AdminFunction {
-    /// Register all table functions to [`FunctionRegistry`].
+    /// Register all admin functions to [`FunctionRegistry`].
    pub fn register(registry: &FunctionRegistry) {
-        registry.register_async(Arc::new(MigrateRegionFunction));
-        registry.register_async(Arc::new(AddRegionFollowerFunction));
-        registry.register_async(Arc::new(RemoveRegionFollowerFunction));
-        registry.register_async(Arc::new(FlushRegionFunction));
-        registry.register_async(Arc::new(CompactRegionFunction));
-        registry.register_async(Arc::new(FlushTableFunction));
-        registry.register_async(Arc::new(CompactTableFunction));
-        registry.register_async(Arc::new(FlushFlowFunction));
-        registry.register_async(Arc::new(ReconcileCatalogFunction));
-        registry.register_async(Arc::new(ReconcileDatabaseFunction));
-        registry.register_async(Arc::new(ReconcileTableFunction));
+        registry.register(MigrateRegionFunction::factory());
+        registry.register(AddRegionFollowerFunction::factory());
+        registry.register(RemoveRegionFollowerFunction::factory());
+        registry.register(FlushRegionFunction::factory());
+        registry.register(CompactRegionFunction::factory());
+        registry.register(FlushTableFunction::factory());
+        registry.register(CompactTableFunction::factory());
+        registry.register(FlushFlowFunction::factory());
+        registry.register(ReconcileCatalogFunction::factory());
+        registry.register(ReconcileDatabaseFunction::factory());
+        registry.register(ReconcileTableFunction::factory());
    }
 }
--- a/src/common/function/src/admin/add_region_follower.rs
+++ b/src/common/function/src/admin/add_region_follower.rs
@@ -18,7 +18,8 @@ use common_query::error::{
    InvalidFuncArgsSnafu, MissingProcedureServiceHandlerSnafu, Result,
    UnsupportedInputDataTypeSnafu,
 };
-use common_query::prelude::{Signature, TypeSignature, Volatility};
+use datafusion_expr::{Signature, TypeSignature, Volatility};
+use datatypes::data_type::DataType;
 use datatypes::prelude::ConcreteDataType;
 use datatypes::value::{Value, ValueRef};
 use session::context::QueryContextRef;
@@ -82,7 +83,13 @@ fn signature() -> Signature {
    Signature::one_of(
        vec![
            // add_region_follower(region_id, peer)
-            TypeSignature::Uniform(2, ConcreteDataType::numerics()),
+            TypeSignature::Uniform(
+                2,
+                ConcreteDataType::numerics()
+                    .into_iter()
+                    .map(|dt| dt.as_arrow_type())
+                    .collect(),
+            ),
        ],
        Volatility::Immutable,
    )
@@ -92,38 +99,57 @@ fn signature() -> Signature {
 mod tests {
    use std::sync::Arc;

-    use common_query::prelude::TypeSignature;
-    use datatypes::vectors::{UInt64Vector, VectorRef};
+    use arrow::array::UInt64Array;
+    use arrow::datatypes::{DataType, Field};
+    use datafusion_expr::ColumnarValue;

    use super::*;
-    use crate::function::{AsyncFunction, FunctionContext};
+    use crate::function::FunctionContext;
+    use crate::function_factory::ScalarFunctionFactory;

    #[test]
    fn test_add_region_follower_misc() {
-        let f = AddRegionFollowerFunction;
+        let factory: ScalarFunctionFactory = AddRegionFollowerFunction::factory().into();
+        let f = factory.provide(FunctionContext::mock());
        assert_eq!("add_region_follower", f.name());
-        assert_eq!(
-            ConcreteDataType::uint64_datatype(),
-            f.return_type(&[]).unwrap()
-        );
+        assert_eq!(DataType::UInt64, f.return_type(&[]).unwrap());
        assert!(matches!(f.signature(),
-                         Signature {
-                             type_signature: TypeSignature::OneOf(sigs),
-                             volatility: Volatility::Immutable
+                         datafusion_expr::Signature {
+                             type_signature: datafusion_expr::TypeSignature::OneOf(sigs),
+                             volatility: datafusion_expr::Volatility::Immutable
                         } if sigs.len() == 1));
    }

    #[tokio::test]
    async fn test_add_region_follower() {
-        let f = AddRegionFollowerFunction;
-        let args = vec![1, 1];
-        let args = args
-            .into_iter()
-            .map(|arg| Arc::new(UInt64Vector::from_slice([arg])) as _)
-            .collect::<Vec<_>>();
+        let factory: ScalarFunctionFactory = AddRegionFollowerFunction::factory().into();
+        let provider = factory.provide(FunctionContext::mock());
+        let f = provider.as_async().unwrap();

-        let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
-        let expect: VectorRef = Arc::new(UInt64Vector::from_slice([0u64]));
-        assert_eq!(result, expect);
+        let func_args = datafusion::logical_expr::ScalarFunctionArgs {
+            args: vec![
+                ColumnarValue::Array(Arc::new(UInt64Array::from(vec![1]))),
+                ColumnarValue::Array(Arc::new(UInt64Array::from(vec![2]))),
+            ],
+            arg_fields: vec![
+                Arc::new(Field::new("arg_0", DataType::UInt64, false)),
+                Arc::new(Field::new("arg_1", DataType::UInt64, false)),
+            ],
+            return_field: Arc::new(Field::new("result", DataType::UInt64, true)),
+            number_rows: 1,
+            config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
+        };
+
+        let result = f.invoke_async_with_args(func_args).await.unwrap();
+
+        match result {
+            ColumnarValue::Array(array) => {
+                let result_array = array.as_any().downcast_ref::<UInt64Array>().unwrap();
+                assert_eq!(result_array.value(0), 0u64);
+            }
+            ColumnarValue::Scalar(scalar) => {
+                assert_eq!(scalar, datafusion_common::ScalarValue::UInt64(Some(0)));
+            }
+        }
    }
 }
--- a/src/common/function/src/admin/flush_compact_region.rs
+++ b/src/common/function/src/admin/flush_compact_region.rs
@@ -16,7 +16,8 @@ use common_macro::admin_fn;
 use common_query::error::{
    InvalidFuncArgsSnafu, MissingTableMutationHandlerSnafu, Result, UnsupportedInputDataTypeSnafu,
 };
-use common_query::prelude::{Signature, Volatility};
+use datafusion_expr::{Signature, Volatility};
+use datatypes::data_type::DataType;
 use datatypes::prelude::*;
 use session::context::QueryContextRef;
 use snafu::ensure;
@@ -66,71 +67,99 @@ define_region_function!(FlushRegionFunction, flush_region, flush_region);
 define_region_function!(CompactRegionFunction, compact_region, compact_region);

 fn signature() -> Signature {
-    Signature::uniform(1, ConcreteDataType::numerics(), Volatility::Immutable)
+    Signature::uniform(
+        1,
+        ConcreteDataType::numerics()
+            .into_iter()
+            .map(|dt| dt.as_arrow_type())
+            .collect(),
+        Volatility::Immutable,
+    )
 }

 #[cfg(test)]
 mod tests {
    use std::sync::Arc;

-    use common_query::prelude::TypeSignature;
-    use datatypes::vectors::UInt64Vector;
+    use arrow::array::UInt64Array;
+    use arrow::datatypes::{DataType, Field};
+    use datafusion_expr::ColumnarValue;

    use super::*;
-    use crate::function::{AsyncFunction, FunctionContext};
+    use crate::function::FunctionContext;
+    use crate::function_factory::ScalarFunctionFactory;

    macro_rules! define_region_function_test {
        ($name: ident, $func: ident) => {
            paste::paste! {
                #[test]
                fn [<test_ $name _misc>]() {
-                    let f = $func;
+                    let factory: ScalarFunctionFactory = $func::factory().into();
+                    let f = factory.provide(FunctionContext::mock());
                    assert_eq!(stringify!($name), f.name());
                    assert_eq!(
-                        ConcreteDataType::uint64_datatype(),
+                        DataType::UInt64,
                        f.return_type(&[]).unwrap()
                    );
                    assert!(matches!(f.signature(),
-                                     Signature {
-                                         type_signature: TypeSignature::Uniform(1, valid_types),
-                                         volatility: Volatility::Immutable
-                                     } if valid_types == ConcreteDataType::numerics()));
+                                     datafusion_expr::Signature {
+                                         type_signature: datafusion_expr::TypeSignature::Uniform(1, valid_types),
+                                         volatility: datafusion_expr::Volatility::Immutable
+                                     } if valid_types == &ConcreteDataType::numerics().into_iter().map(|dt| { use datatypes::data_type::DataType; dt.as_arrow_type() }).collect::<Vec<_>>()));
                }

                #[tokio::test]
                async fn [<test_ $name _missing_table_mutation>]() {
-                    let f = $func;
+                    let factory: ScalarFunctionFactory = $func::factory().into();
+                    let provider = factory.provide(FunctionContext::default());
+                    let f = provider.as_async().unwrap();

-                    let args = vec![99];
-
-                    let args = args
-                        .into_iter()
-                        .map(|arg| Arc::new(UInt64Vector::from_slice([arg])) as _)
-                        .collect::<Vec<_>>();
-
-                    let result = f.eval(FunctionContext::default(), &args).await.unwrap_err();
+                    let func_args = datafusion::logical_expr::ScalarFunctionArgs {
+                        args: vec![
+                            ColumnarValue::Array(Arc::new(UInt64Array::from(vec![99]))),
+                        ],
+                        arg_fields: vec![
+                            Arc::new(Field::new("arg_0", DataType::UInt64, false)),
+                        ],
+                        return_field: Arc::new(Field::new("result", DataType::UInt64, true)),
+                        number_rows: 1,
+                        config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
+                    };
+                    let result = f.invoke_async_with_args(func_args).await.unwrap_err();
                    assert_eq!(
-                        "Missing TableMutationHandler, not expected",
+                        "Execution error: Handler error: Missing TableMutationHandler, not expected",
                        result.to_string()
                    );
                }

                #[tokio::test]
                async fn [<test_ $name>]() {
-                    let f = $func;
+                    let factory: ScalarFunctionFactory = $func::factory().into();
+                    let provider = factory.provide(FunctionContext::mock());
+                    let f = provider.as_async().unwrap();

+                    let func_args = datafusion::logical_expr::ScalarFunctionArgs {
+                        args: vec![
+                            ColumnarValue::Array(Arc::new(UInt64Array::from(vec![99]))),
+                        ],
+                        arg_fields: vec![
+                            Arc::new(Field::new("arg_0", DataType::UInt64, false)),
+                        ],
+                        return_field: Arc::new(Field::new("result", DataType::UInt64, true)),
+                        number_rows: 1,
+                        config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
+                    };
+                    let result = f.invoke_async_with_args(func_args).await.unwrap();

-                    let args = vec![99];
-
-                    let args = args
-                        .into_iter()
-                        .map(|arg| Arc::new(UInt64Vector::from_slice([arg])) as _)
-                        .collect::<Vec<_>>();
-
-                    let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
-
-                    let expect: VectorRef = Arc::new(UInt64Vector::from_slice([42]));
-                    assert_eq!(expect, result);
+                    match result {
+                        ColumnarValue::Array(array) => {
+                            let result_array = array.as_any().downcast_ref::<UInt64Array>().unwrap();
+                            assert_eq!(result_array.value(0), 42u64);
+                        }
+                        ColumnarValue::Scalar(scalar) => {
+                            assert_eq!(scalar, datafusion_common::ScalarValue::UInt64(Some(42)));
+                        }
+                    }
                }
            }
        };
--- a/src/common/function/src/admin/flush_compact_table.rs
+++ b/src/common/function/src/admin/flush_compact_table.rs
@@ -15,14 +15,15 @@
 use std::str::FromStr;

 use api::v1::region::{compact_request, StrictWindow};
+use arrow::datatypes::DataType as ArrowDataType;
 use common_error::ext::BoxedError;
 use common_macro::admin_fn;
 use common_query::error::{
    InvalidFuncArgsSnafu, MissingTableMutationHandlerSnafu, Result, TableMutationSnafu,
    UnsupportedInputDataTypeSnafu,
 };
-use common_query::prelude::{Signature, Volatility};
 use common_telemetry::info;
+use datafusion_expr::{Signature, Volatility};
 use datatypes::prelude::*;
 use session::context::QueryContextRef;
 use session::table_name::table_name_to_full_name;
@@ -105,18 +106,11 @@ pub(crate) async fn compact_table(
 }

 fn flush_signature() -> Signature {
-    Signature::uniform(
-        1,
-        vec![ConcreteDataType::string_datatype()],
-        Volatility::Immutable,
-    )
+    Signature::uniform(1, vec![ArrowDataType::Utf8], Volatility::Immutable)
 }

 fn compact_signature() -> Signature {
-    Signature::variadic(
-        vec![ConcreteDataType::string_datatype()],
-        Volatility::Immutable,
-    )
+    Signature::variadic(vec![ArrowDataType::Utf8], Volatility::Immutable)
 }

 /// Parses `compact_table` UDF parameters. This function accepts following combinations:
@@ -204,66 +198,87 @@ mod tests {
    use std::sync::Arc;

    use api::v1::region::compact_request::Options;
+    use arrow::array::StringArray;
+    use arrow::datatypes::{DataType, Field};
    use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
-    use common_query::prelude::TypeSignature;
-    use datatypes::vectors::{StringVector, UInt64Vector};
+    use datafusion_expr::ColumnarValue;
    use session::context::QueryContext;

    use super::*;
-    use crate::function::{AsyncFunction, FunctionContext};
+    use crate::function::FunctionContext;
+    use crate::function_factory::ScalarFunctionFactory;

    macro_rules! define_table_function_test {
        ($name: ident, $func: ident) => {
            paste::paste!{
                #[test]
                fn [<test_ $name _misc>]() {
-                    let f = $func;
+                    let factory: ScalarFunctionFactory = $func::factory().into();
+                    let f = factory.provide(FunctionContext::mock());
                    assert_eq!(stringify!($name), f.name());
                    assert_eq!(
-                        ConcreteDataType::uint64_datatype(),
+                        DataType::UInt64,
                        f.return_type(&[]).unwrap()
                    );
                    assert!(matches!(f.signature(),
-                                     Signature {
-                                         type_signature: TypeSignature::Uniform(1, valid_types),
-                                         volatility: Volatility::Immutable
-                                     } if valid_types == vec![ConcreteDataType::string_datatype()]));
+                                     datafusion_expr::Signature {
+                                         type_signature: datafusion_expr::TypeSignature::Uniform(1, valid_types),
+                                         volatility: datafusion_expr::Volatility::Immutable
+                                     } if valid_types == &vec![ArrowDataType::Utf8]));
                }

                #[tokio::test]
                async fn [<test_ $name _missing_table_mutation>]() {
-                    let f = $func;
+                    let factory: ScalarFunctionFactory = $func::factory().into();
+                    let provider = factory.provide(FunctionContext::default());
+                    let f = provider.as_async().unwrap();

-                    let args = vec!["test"];
-
-                    let args = args
-                        .into_iter()
-                        .map(|arg| Arc::new(StringVector::from(vec![arg])) as _)
-                        .collect::<Vec<_>>();
-
-                    let result = f.eval(FunctionContext::default(), &args).await.unwrap_err();
+                    let func_args = datafusion::logical_expr::ScalarFunctionArgs {
+                        args: vec![
+                            ColumnarValue::Array(Arc::new(StringArray::from(vec!["test"]))),
+                        ],
+                        arg_fields: vec![
+                            Arc::new(Field::new("arg_0", DataType::Utf8, false)),
+                        ],
+                        return_field: Arc::new(Field::new("result", DataType::UInt64, true)),
+                        number_rows: 1,
+                        config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
+                    };
+                    let result = f.invoke_async_with_args(func_args).await.unwrap_err();
                    assert_eq!(
-                        "Missing TableMutationHandler, not expected",
+                        "Execution error: Handler error: Missing TableMutationHandler, not expected",
                        result.to_string()
                    );
                }

                #[tokio::test]
                async fn [<test_ $name>]() {
-                    let f = $func;
+                    let factory: ScalarFunctionFactory = $func::factory().into();
+                    let provider = factory.provide(FunctionContext::mock());
+                    let f = provider.as_async().unwrap();

+                    let func_args = datafusion::logical_expr::ScalarFunctionArgs {
+                        args: vec![
+                            ColumnarValue::Array(Arc::new(StringArray::from(vec!["test"]))),
+                        ],
+                        arg_fields: vec![
+                            Arc::new(Field::new("arg_0", DataType::Utf8, false)),
+                        ],
+                        return_field: Arc::new(Field::new("result", DataType::UInt64, true)),
+                        number_rows: 1,
+                        config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
+                    };
+                    let result = f.invoke_async_with_args(func_args).await.unwrap();

-                    let args = vec!["test"];
-
-                    let args = args
-                        .into_iter()
-                        .map(|arg| Arc::new(StringVector::from(vec![arg])) as _)
-                        .collect::<Vec<_>>();
-
-                    let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
-
-                    let expect: VectorRef = Arc::new(UInt64Vector::from_slice([42]));
-                    assert_eq!(expect, result);
+                    match result {
+                        ColumnarValue::Array(array) => {
+                            let result_array = array.as_any().downcast_ref::<arrow::array::UInt64Array>().unwrap();
+                            assert_eq!(result_array.value(0), 42u64);
+                        }
+                        ColumnarValue::Scalar(scalar) => {
+                            assert_eq!(scalar, datafusion_common::ScalarValue::UInt64(Some(42)));
+                        }
+                    }
                }
            }
        }
--- a/src/common/function/src/admin/migrate_region.rs
+++ b/src/common/function/src/admin/migrate_region.rs
@@ -17,7 +17,8 @@ use std::time::Duration;
 use common_macro::admin_fn;
 use common_meta::rpc::procedure::MigrateRegionRequest;
 use common_query::error::{InvalidFuncArgsSnafu, MissingProcedureServiceHandlerSnafu, Result};
-use common_query::prelude::{Signature, TypeSignature, Volatility};
+use datafusion_expr::{Signature, TypeSignature, Volatility};
+use datatypes::data_type::DataType;
 use datatypes::prelude::ConcreteDataType;
 use datatypes::value::{Value, ValueRef};
 use session::context::QueryContextRef;
@@ -103,9 +104,21 @@ fn signature() -> Signature {
    Signature::one_of(
        vec![
            // migrate_region(region_id, from_peer, to_peer)
-            TypeSignature::Uniform(3, ConcreteDataType::numerics()),
+            TypeSignature::Uniform(
+                3,
+                ConcreteDataType::numerics()
+                    .into_iter()
+                    .map(|dt| dt.as_arrow_type())
+                    .collect(),
+            ),
            // migrate_region(region_id, from_peer, to_peer, timeout(secs))
-            TypeSignature::Uniform(4, ConcreteDataType::numerics()),
+            TypeSignature::Uniform(
+                4,
+                ConcreteDataType::numerics()
+                    .into_iter()
+                    .map(|dt| dt.as_arrow_type())
+                    .collect(),
+            ),
        ],
        Volatility::Immutable,
    )
@@ -115,59 +128,89 @@ fn signature() -> Signature {
 mod tests {
    use std::sync::Arc;

-    use common_query::prelude::TypeSignature;
-    use datatypes::vectors::{StringVector, UInt64Vector, VectorRef};
+    use arrow::array::{StringArray, UInt64Array};
+    use arrow::datatypes::{DataType, Field};
+    use datafusion_expr::ColumnarValue;

    use super::*;
-    use crate::function::{AsyncFunction, FunctionContext};
+    use crate::function::FunctionContext;
+    use crate::function_factory::ScalarFunctionFactory;

    #[test]
    fn test_migrate_region_misc() {
-        let f = MigrateRegionFunction;
+        let factory: ScalarFunctionFactory = MigrateRegionFunction::factory().into();
+        let f = factory.provide(FunctionContext::mock());
        assert_eq!("migrate_region", f.name());
-        assert_eq!(
-            ConcreteDataType::string_datatype(),
-            f.return_type(&[]).unwrap()
-        );
+        assert_eq!(DataType::Utf8, f.return_type(&[]).unwrap());
        assert!(matches!(f.signature(),
-                         Signature {
-                             type_signature: TypeSignature::OneOf(sigs),
-                             volatility: Volatility::Immutable
+                         datafusion_expr::Signature {
+                             type_signature: datafusion_expr::TypeSignature::OneOf(sigs),
+                             volatility: datafusion_expr::Volatility::Immutable
                         } if sigs.len() == 2));
    }

    #[tokio::test]
    async fn test_missing_procedure_service() {
-        let f = MigrateRegionFunction;
+        let factory: ScalarFunctionFactory = MigrateRegionFunction::factory().into();
+        let provider = factory.provide(FunctionContext::default());
+        let f = provider.as_async().unwrap();

-        let args = vec![1, 1, 1];
-
-        let args = args
-            .into_iter()
-            .map(|arg| Arc::new(UInt64Vector::from_slice([arg])) as _)
-            .collect::<Vec<_>>();
-
-        let result = f.eval(FunctionContext::default(), &args).await.unwrap_err();
+        let func_args = datafusion::logical_expr::ScalarFunctionArgs {
+            args: vec![
+                ColumnarValue::Array(Arc::new(UInt64Array::from(vec![1]))),
+                ColumnarValue::Array(Arc::new(UInt64Array::from(vec![1]))),
+                ColumnarValue::Array(Arc::new(UInt64Array::from(vec![1]))),
+            ],
+            arg_fields: vec![
+                Arc::new(Field::new("arg_0", DataType::UInt64, false)),
+                Arc::new(Field::new("arg_1", DataType::UInt64, false)),
+                Arc::new(Field::new("arg_2", DataType::UInt64, false)),
+            ],
+            return_field: Arc::new(Field::new("result", DataType::Utf8, true)),
+            number_rows: 1,
+            config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
+        };
+        let result = f.invoke_async_with_args(func_args).await.unwrap_err();
        assert_eq!(
-            "Missing ProcedureServiceHandler, not expected",
+            "Execution error: Handler error: Missing ProcedureServiceHandler, not expected",
            result.to_string()
        );
    }

    #[tokio::test]
    async fn test_migrate_region() {
-        let f = MigrateRegionFunction;
+        let factory: ScalarFunctionFactory = MigrateRegionFunction::factory().into();
+        let provider = factory.provide(FunctionContext::mock());
+        let f = provider.as_async().unwrap();

-        let args = vec![1, 1, 1];
+        let func_args = datafusion::logical_expr::ScalarFunctionArgs {
+            args: vec![
+                ColumnarValue::Array(Arc::new(UInt64Array::from(vec![1]))),
+                ColumnarValue::Array(Arc::new(UInt64Array::from(vec![1]))),
+                ColumnarValue::Array(Arc::new(UInt64Array::from(vec![1]))),
+            ],
+            arg_fields: vec![
+                Arc::new(Field::new("arg_0", DataType::UInt64, false)),
+                Arc::new(Field::new("arg_1", DataType::UInt64, false)),
+                Arc::new(Field::new("arg_2", DataType::UInt64, false)),
+            ],
+            return_field: Arc::new(Field::new("result", DataType::Utf8, true)),
+            number_rows: 1,
+            config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
+        };
+        let result = f.invoke_async_with_args(func_args).await.unwrap();

-        let args = args
-            .into_iter()
-            .map(|arg| Arc::new(UInt64Vector::from_slice([arg])) as _)
-            .collect::<Vec<_>>();
-
-        let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
-
-        let expect: VectorRef = Arc::new(StringVector::from(vec!["test_pid"]));
-        assert_eq!(expect, result);
+        match result {
+            ColumnarValue::Array(array) => {
+                let result_array = array.as_any().downcast_ref::<StringArray>().unwrap();
+                assert_eq!(result_array.value(0), "test_pid");
+            }
+            ColumnarValue::Scalar(scalar) => {
+                assert_eq!(
+                    scalar,
+                    datafusion_common::ScalarValue::Utf8(Some("test_pid".to_string()))
+                );
+            }
+        }
    }
 }
--- a/src/common/function/src/admin/reconcile_catalog.rs
+++ b/src/common/function/src/admin/reconcile_catalog.rs
@@ -14,13 +14,15 @@

 use api::v1::meta::reconcile_request::Target;
 use api::v1::meta::{ReconcileCatalog, ReconcileRequest};
+use arrow::datatypes::DataType as ArrowDataType;
 use common_macro::admin_fn;
 use common_query::error::{
    InvalidFuncArgsSnafu, MissingProcedureServiceHandlerSnafu, Result,
    UnsupportedInputDataTypeSnafu,
 };
-use common_query::prelude::{Signature, TypeSignature, Volatility};
 use common_telemetry::info;
+use datafusion_expr::{Signature, TypeSignature, Volatility};
+use datatypes::data_type::DataType;
 use datatypes::prelude::*;
 use session::context::QueryContextRef;

@@ -104,15 +106,15 @@ fn signature() -> Signature {
    let mut signs = Vec::with_capacity(2 + nums.len());
    signs.extend([
        // reconcile_catalog()
-        TypeSignature::NullAry,
+        TypeSignature::Nullary,
        // reconcile_catalog(resolve_strategy)
-        TypeSignature::Exact(vec![ConcreteDataType::string_datatype()]),
+        TypeSignature::Exact(vec![ArrowDataType::Utf8]),
    ]);
    for sign in nums {
        // reconcile_catalog(resolve_strategy, parallelism)
        signs.push(TypeSignature::Exact(vec![
-            ConcreteDataType::string_datatype(),
-            sign,
+            ArrowDataType::Utf8,
+            sign.as_arrow_type(),
        ]));
    }
    Signature::one_of(signs, Volatility::Immutable)
@@ -120,60 +122,149 @@ fn signature() -> Signature {

 #[cfg(test)]
 mod tests {
-    use std::assert_matches::assert_matches;
    use std::sync::Arc;

-    use common_query::error::Error;
-    use datatypes::vectors::{StringVector, UInt64Vector, VectorRef};
+    use arrow::array::{StringArray, UInt64Array};
+    use arrow::datatypes::{DataType, Field};
+    use datafusion_expr::ColumnarValue;

    use crate::admin::reconcile_catalog::ReconcileCatalogFunction;
-    use crate::function::{AsyncFunction, FunctionContext};
+    use crate::function::FunctionContext;
+    use crate::function_factory::ScalarFunctionFactory;

    #[tokio::test]
    async fn test_reconcile_catalog() {
        common_telemetry::init_default_ut_logging();

        // reconcile_catalog()
-        let f = ReconcileCatalogFunction;
-        let args = vec![];
-        let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
-        let expect: VectorRef = Arc::new(StringVector::from(vec!["test_pid"]));
-        assert_eq!(expect, result);
+        let factory: ScalarFunctionFactory = ReconcileCatalogFunction::factory().into();
+        let provider = factory.provide(FunctionContext::mock());
+        let f = provider.as_async().unwrap();
+
+        let func_args = datafusion::logical_expr::ScalarFunctionArgs {
+            args: vec![],
+            arg_fields: vec![],
+            return_field: Arc::new(Field::new("result", DataType::Utf8, true)),
+            number_rows: 1,
+            config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
+        };
+
+        let result = f.invoke_async_with_args(func_args).await.unwrap();
+        match result {
+            ColumnarValue::Array(array) => {
+                let result_array = array.as_any().downcast_ref::<StringArray>().unwrap();
+                assert_eq!(result_array.value(0), "test_pid");
+            }
+            ColumnarValue::Scalar(scalar) => {
+                assert_eq!(
+                    scalar,
+                    datafusion_common::ScalarValue::Utf8(Some("test_pid".to_string()))
+                );
+            }
+        }

        // reconcile_catalog(resolve_strategy)
-        let f = ReconcileCatalogFunction;
-        let args = vec![Arc::new(StringVector::from(vec!["UseMetasrv"])) as _];
-        let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
-        let expect: VectorRef = Arc::new(StringVector::from(vec!["test_pid"]));
-        assert_eq!(expect, result);
+        let factory: ScalarFunctionFactory = ReconcileCatalogFunction::factory().into();
+        let provider = factory.provide(FunctionContext::mock());
+        let f = provider.as_async().unwrap();
+
+        let func_args = datafusion::logical_expr::ScalarFunctionArgs {
+            args: vec![ColumnarValue::Array(Arc::new(StringArray::from(vec![
+                "UseMetasrv",
+            ])))],
+            arg_fields: vec![Arc::new(Field::new("arg_0", DataType::Utf8, false))],
+            return_field: Arc::new(Field::new("result", DataType::Utf8, true)),
+            number_rows: 1,
+            config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
+        };
+        let result = f.invoke_async_with_args(func_args).await.unwrap();
+        match result {
+            ColumnarValue::Array(array) => {
+                let result_array = array.as_any().downcast_ref::<StringArray>().unwrap();
+                assert_eq!(result_array.value(0), "test_pid");
+            }
+            ColumnarValue::Scalar(scalar) => {
+                assert_eq!(
+                    scalar,
+                    datafusion_common::ScalarValue::Utf8(Some("test_pid".to_string()))
+                );
+            }
+        }

        // reconcile_catalog(resolve_strategy, parallelism)
-        let f = ReconcileCatalogFunction;
-        let args = vec![
-            Arc::new(StringVector::from(vec!["UseLatest"])) as _,
-            Arc::new(UInt64Vector::from_slice([10])) as _,
-        ];
-        let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
-        let expect: VectorRef = Arc::new(StringVector::from(vec!["test_pid"]));
-        assert_eq!(expect, result);
+        let factory: ScalarFunctionFactory = ReconcileCatalogFunction::factory().into();
+        let provider = factory.provide(FunctionContext::mock());
+        let f = provider.as_async().unwrap();
+
+        let func_args = datafusion::logical_expr::ScalarFunctionArgs {
+            args: vec![
+                ColumnarValue::Array(Arc::new(StringArray::from(vec!["UseLatest"]))),
+                ColumnarValue::Array(Arc::new(UInt64Array::from(vec![10]))),
+            ],
+            arg_fields: vec![
+                Arc::new(Field::new("arg_0", DataType::Utf8, false)),
+                Arc::new(Field::new("arg_1", DataType::UInt64, false)),
+            ],
+            return_field: Arc::new(Field::new("result", DataType::Utf8, true)),
+            number_rows: 1,
+            config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
+        };
+        let result = f.invoke_async_with_args(func_args).await.unwrap();
+        match result {
+            ColumnarValue::Array(array) => {
+                let result_array = array.as_any().downcast_ref::<StringArray>().unwrap();
+                assert_eq!(result_array.value(0), "test_pid");
+            }
+            ColumnarValue::Scalar(scalar) => {
+                assert_eq!(
+                    scalar,
+                    datafusion_common::ScalarValue::Utf8(Some("test_pid".to_string()))
+                );
+            }
+        }

        // unsupported input data type
-        let f = ReconcileCatalogFunction;
-        let args = vec![
-            Arc::new(StringVector::from(vec!["UseLatest"])) as _,
-            Arc::new(StringVector::from(vec!["test"])) as _,
-        ];
-        let err = f.eval(FunctionContext::mock(), &args).await.unwrap_err();
-        assert_matches!(err, Error::UnsupportedInputDataType { .. });
+        let factory: ScalarFunctionFactory = ReconcileCatalogFunction::factory().into();
+        let provider = factory.provide(FunctionContext::mock());
+        let f = provider.as_async().unwrap();
+
+        let func_args = datafusion::logical_expr::ScalarFunctionArgs {
+            args: vec![
+                ColumnarValue::Array(Arc::new(StringArray::from(vec!["UseLatest"]))),
+                ColumnarValue::Array(Arc::new(StringArray::from(vec!["test"]))),
+            ],
+            arg_fields: vec![
+                Arc::new(Field::new("arg_0", DataType::Utf8, false)),
+                Arc::new(Field::new("arg_1", DataType::Utf8, false)),
+            ],
+            return_field: Arc::new(Field::new("result", DataType::Utf8, true)),
+            number_rows: 1,
+            config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
+        };
+        let _err = f.invoke_async_with_args(func_args).await.unwrap_err();
+        // Note: Error type is DataFusionError at this level, not common_query::Error

        // invalid function args
-        let f = ReconcileCatalogFunction;
-        let args = vec![
-            Arc::new(StringVector::from(vec!["UseLatest"])) as _,
-            Arc::new(UInt64Vector::from_slice([10])) as _,
-            Arc::new(StringVector::from(vec!["10"])) as _,
-        ];
-        let err = f.eval(FunctionContext::mock(), &args).await.unwrap_err();
-        assert_matches!(err, Error::InvalidFuncArgs { .. });
+        let factory: ScalarFunctionFactory = ReconcileCatalogFunction::factory().into();
+        let provider = factory.provide(FunctionContext::mock());
+        let f = provider.as_async().unwrap();
+
+        let func_args = datafusion::logical_expr::ScalarFunctionArgs {
+            args: vec![
+                ColumnarValue::Array(Arc::new(StringArray::from(vec!["UseLatest"]))),
+                ColumnarValue::Array(Arc::new(UInt64Array::from(vec![10]))),
+                ColumnarValue::Array(Arc::new(StringArray::from(vec!["10"]))),
+            ],
+            arg_fields: vec![
+                Arc::new(Field::new("arg_0", DataType::Utf8, false)),
+                Arc::new(Field::new("arg_1", DataType::UInt64, false)),
+                Arc::new(Field::new("arg_2", DataType::Utf8, false)),
+            ],
+            return_field: Arc::new(Field::new("result", DataType::Utf8, true)),
+            number_rows: 1,
+            config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
+        };
+        let _err = f.invoke_async_with_args(func_args).await.unwrap_err();
+        // Note: Error type is DataFusionError at this level, not common_query::Error
    }
 }
--- a/src/common/function/src/admin/reconcile_database.rs
+++ b/src/common/function/src/admin/reconcile_database.rs
@@ -14,13 +14,15 @@

 use api::v1::meta::reconcile_request::Target;
 use api::v1::meta::{ReconcileDatabase, ReconcileRequest};
+use arrow::datatypes::DataType as ArrowDataType;
 use common_macro::admin_fn;
 use common_query::error::{
    InvalidFuncArgsSnafu, MissingProcedureServiceHandlerSnafu, Result,
    UnsupportedInputDataTypeSnafu,
 };
-use common_query::prelude::{Signature, TypeSignature, Volatility};
 use common_telemetry::info;
+use datafusion_expr::{Signature, TypeSignature, Volatility};
+use datatypes::data_type::DataType;
 use datatypes::prelude::*;
 use session::context::QueryContextRef;

@@ -113,19 +115,16 @@ fn signature() -> Signature {
    let mut signs = Vec::with_capacity(2 + nums.len());
    signs.extend([
        // reconcile_database(datanode_name)
-        TypeSignature::Exact(vec![ConcreteDataType::string_datatype()]),
+        TypeSignature::Exact(vec![ArrowDataType::Utf8]),
        // reconcile_database(database_name, resolve_strategy)
-        TypeSignature::Exact(vec![
-            ConcreteDataType::string_datatype(),
-            ConcreteDataType::string_datatype(),
-        ]),
+        TypeSignature::Exact(vec![ArrowDataType::Utf8, ArrowDataType::Utf8]),
    ]);
    for sign in nums {
        // reconcile_database(database_name, resolve_strategy, parallelism)
        signs.push(TypeSignature::Exact(vec![
-            ConcreteDataType::string_datatype(),
-            ConcreteDataType::string_datatype(),
-            sign,
+            ArrowDataType::Utf8,
+            ArrowDataType::Utf8,
+            sign.as_arrow_type(),
        ]));
    }
    Signature::one_of(signs, Volatility::Immutable)
@@ -133,66 +132,160 @@ fn signature() -> Signature {

 #[cfg(test)]
 mod tests {
-    use std::assert_matches::assert_matches;
    use std::sync::Arc;

-    use common_query::error::Error;
-    use datatypes::vectors::{StringVector, UInt32Vector, VectorRef};
+    use arrow::array::{StringArray, UInt32Array};
+    use arrow::datatypes::{DataType, Field};
+    use datafusion_expr::ColumnarValue;

    use crate::admin::reconcile_database::ReconcileDatabaseFunction;
-    use crate::function::{AsyncFunction, FunctionContext};
+    use crate::function::FunctionContext;
+    use crate::function_factory::ScalarFunctionFactory;

    #[tokio::test]
    async fn test_reconcile_catalog() {
        common_telemetry::init_default_ut_logging();

        // reconcile_database(database_name)
-        let f = ReconcileDatabaseFunction;
-        let args = vec![Arc::new(StringVector::from(vec!["test"])) as _];
-        let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
-        let expect: VectorRef = Arc::new(StringVector::from(vec!["test_pid"]));
-        assert_eq!(expect, result);
+        let factory: ScalarFunctionFactory = ReconcileDatabaseFunction::factory().into();
+        let provider = factory.provide(FunctionContext::mock());
+        let f = provider.as_async().unwrap();
+
+        let func_args = datafusion::logical_expr::ScalarFunctionArgs {
+            args: vec![ColumnarValue::Array(Arc::new(StringArray::from(vec![
+                "test",
+            ])))],
+            arg_fields: vec![Arc::new(Field::new("arg_0", DataType::Utf8, false))],
+            return_field: Arc::new(Field::new("result", DataType::Utf8, true)),
+            number_rows: 1,
+            config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
+        };
+        let result = f.invoke_async_with_args(func_args).await.unwrap();
+        match result {
+            ColumnarValue::Array(array) => {
+                let result_array = array.as_any().downcast_ref::<StringArray>().unwrap();
+                assert_eq!(result_array.value(0), "test_pid");
+            }
+            ColumnarValue::Scalar(scalar) => {
+                assert_eq!(
+                    scalar,
+                    datafusion_common::ScalarValue::Utf8(Some("test_pid".to_string()))
+                );
+            }
+        }

        // reconcile_database(database_name, resolve_strategy)
-        let f = ReconcileDatabaseFunction;
-        let args = vec![
-            Arc::new(StringVector::from(vec!["test"])) as _,
-            Arc::new(StringVector::from(vec!["UseLatest"])) as _,
-        ];
-        let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
-        let expect: VectorRef = Arc::new(StringVector::from(vec!["test_pid"]));
-        assert_eq!(expect, result);
+        let factory: ScalarFunctionFactory = ReconcileDatabaseFunction::factory().into();
+        let provider = factory.provide(FunctionContext::mock());
+        let f = provider.as_async().unwrap();
+
+        let func_args = datafusion::logical_expr::ScalarFunctionArgs {
+            args: vec![
+                ColumnarValue::Array(Arc::new(StringArray::from(vec!["test"]))),
+                ColumnarValue::Array(Arc::new(StringArray::from(vec!["UseLatest"]))),
+            ],
+            arg_fields: vec![
+                Arc::new(Field::new("arg_0", DataType::Utf8, false)),
+                Arc::new(Field::new("arg_1", DataType::Utf8, false)),
+            ],
+            return_field: Arc::new(Field::new("result", DataType::Utf8, true)),
+            number_rows: 1,
+            config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
+        };
+        let result = f.invoke_async_with_args(func_args).await.unwrap();
+        match result {
+            ColumnarValue::Array(array) => {
+                let result_array = array.as_any().downcast_ref::<StringArray>().unwrap();
+                assert_eq!(result_array.value(0), "test_pid");
+            }
+            ColumnarValue::Scalar(scalar) => {
+                assert_eq!(
+                    scalar,
+                    datafusion_common::ScalarValue::Utf8(Some("test_pid".to_string()))
+                );
+            }
+        }

        // reconcile_database(database_name, resolve_strategy, parallelism)
-        let f = ReconcileDatabaseFunction;
-        let args = vec![
-            Arc::new(StringVector::from(vec!["test"])) as _,
-            Arc::new(StringVector::from(vec!["UseLatest"])) as _,
-            Arc::new(UInt32Vector::from_slice([10])) as _,
-        ];
-        let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
-        let expect: VectorRef = Arc::new(StringVector::from(vec!["test_pid"]));
-        assert_eq!(expect, result);
+        let factory: ScalarFunctionFactory = ReconcileDatabaseFunction::factory().into();
+        let provider = factory.provide(FunctionContext::mock());
+        let f = provider.as_async().unwrap();
+
+        let func_args = datafusion::logical_expr::ScalarFunctionArgs {
+            args: vec![
+                ColumnarValue::Array(Arc::new(StringArray::from(vec!["test"]))),
+                ColumnarValue::Array(Arc::new(StringArray::from(vec!["UseLatest"]))),
+                ColumnarValue::Array(Arc::new(UInt32Array::from(vec![10]))),
+            ],
+            arg_fields: vec![
+                Arc::new(Field::new("arg_0", DataType::Utf8, false)),
+                Arc::new(Field::new("arg_1", DataType::Utf8, false)),
+                Arc::new(Field::new("arg_2", DataType::UInt32, false)),
+            ],
+            return_field: Arc::new(Field::new("result", DataType::Utf8, true)),
+            number_rows: 1,
+            config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
+        };
+        let result = f.invoke_async_with_args(func_args).await.unwrap();
+        match result {
+            ColumnarValue::Array(array) => {
+                let result_array = array.as_any().downcast_ref::<StringArray>().unwrap();
+                assert_eq!(result_array.value(0), "test_pid");
+            }
+            ColumnarValue::Scalar(scalar) => {
+                assert_eq!(
+                    scalar,
+                    datafusion_common::ScalarValue::Utf8(Some("test_pid".to_string()))
+                );
+            }
+        }

        // invalid function args
-        let f = ReconcileDatabaseFunction;
-        let args = vec![
-            Arc::new(StringVector::from(vec!["UseLatest"])) as _,
-            Arc::new(UInt32Vector::from_slice([10])) as _,
-            Arc::new(StringVector::from(vec!["v1"])) as _,
-            Arc::new(StringVector::from(vec!["v2"])) as _,
-        ];
-        let err = f.eval(FunctionContext::mock(), &args).await.unwrap_err();
-        assert_matches!(err, Error::InvalidFuncArgs { .. });
+        let factory: ScalarFunctionFactory = ReconcileDatabaseFunction::factory().into();
+        let provider = factory.provide(FunctionContext::mock());
+        let f = provider.as_async().unwrap();
+
+        let func_args = datafusion::logical_expr::ScalarFunctionArgs {
+            args: vec![
+                ColumnarValue::Array(Arc::new(StringArray::from(vec!["UseLatest"]))),
+                ColumnarValue::Array(Arc::new(UInt32Array::from(vec![10]))),
+                ColumnarValue::Array(Arc::new(StringArray::from(vec!["v1"]))),
+                ColumnarValue::Array(Arc::new(StringArray::from(vec!["v2"]))),
+            ],
+            arg_fields: vec![
+                Arc::new(Field::new("arg_0", DataType::Utf8, false)),
+                Arc::new(Field::new("arg_1", DataType::UInt32, false)),
+                Arc::new(Field::new("arg_2", DataType::Utf8, false)),
+                Arc::new(Field::new("arg_3", DataType::Utf8, false)),
+            ],
+            return_field: Arc::new(Field::new("result", DataType::Utf8, true)),
+            number_rows: 1,
+            config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
+        };
+        let _err = f.invoke_async_with_args(func_args).await.unwrap_err();
+        // Note: Error type is DataFusionError at this level, not common_query::Error

        // unsupported input data type
-        let f = ReconcileDatabaseFunction;
-        let args = vec![
-            Arc::new(StringVector::from(vec!["UseLatest"])) as _,
-            Arc::new(UInt32Vector::from_slice([10])) as _,
-            Arc::new(StringVector::from(vec!["v1"])) as _,
-        ];
-        let err = f.eval(FunctionContext::mock(), &args).await.unwrap_err();
-        assert_matches!(err, Error::UnsupportedInputDataType { .. });
+        let factory: ScalarFunctionFactory = ReconcileDatabaseFunction::factory().into();
+        let provider = factory.provide(FunctionContext::mock());
+        let f = provider.as_async().unwrap();
+
+        let func_args = datafusion::logical_expr::ScalarFunctionArgs {
+            args: vec![
+                ColumnarValue::Array(Arc::new(StringArray::from(vec!["UseLatest"]))),
+                ColumnarValue::Array(Arc::new(UInt32Array::from(vec![10]))),
+                ColumnarValue::Array(Arc::new(StringArray::from(vec!["v1"]))),
+            ],
+            arg_fields: vec![
+                Arc::new(Field::new("arg_0", DataType::Utf8, false)),
+                Arc::new(Field::new("arg_1", DataType::UInt32, false)),
+                Arc::new(Field::new("arg_2", DataType::Utf8, false)),
+            ],
+            return_field: Arc::new(Field::new("result", DataType::Utf8, true)),
+            number_rows: 1,
+            config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
+        };
+        let _err = f.invoke_async_with_args(func_args).await.unwrap_err();
+        // Note: Error type is DataFusionError at this level, not common_query::Error
    }
 }
--- a/src/common/function/src/admin/reconcile_table.rs
+++ b/src/common/function/src/admin/reconcile_table.rs
@@ -14,14 +14,15 @@

 use api::v1::meta::reconcile_request::Target;
 use api::v1::meta::{ReconcileRequest, ReconcileTable, ResolveStrategy};
+use arrow::datatypes::DataType as ArrowDataType;
 use common_catalog::format_full_table_name;
 use common_error::ext::BoxedError;
 use common_macro::admin_fn;
 use common_query::error::{
    MissingProcedureServiceHandlerSnafu, Result, TableMutationSnafu, UnsupportedInputDataTypeSnafu,
 };
-use common_query::prelude::{Signature, TypeSignature, Volatility};
 use common_telemetry::info;
+use datafusion_expr::{Signature, TypeSignature, Volatility};
 use datatypes::prelude::*;
 use session::context::QueryContextRef;
 use session::table_name::table_name_to_full_name;
@@ -93,12 +94,9 @@ fn signature() -> Signature {
    Signature::one_of(
        vec![
            // reconcile_table(table_name)
-            TypeSignature::Exact(vec![ConcreteDataType::string_datatype()]),
+            TypeSignature::Exact(vec![ArrowDataType::Utf8]),
            // reconcile_table(table_name, resolve_strategy)
-            TypeSignature::Exact(vec![
-                ConcreteDataType::string_datatype(),
-                ConcreteDataType::string_datatype(),
-            ]),
+            TypeSignature::Exact(vec![ArrowDataType::Utf8, ArrowDataType::Utf8]),
        ],
        Volatility::Immutable,
    )
@@ -106,44 +104,101 @@ fn signature() -> Signature {

 #[cfg(test)]
 mod tests {
-    use std::assert_matches::assert_matches;
    use std::sync::Arc;

-    use common_query::error::Error;
-    use datatypes::vectors::{StringVector, VectorRef};
+    use arrow::array::StringArray;
+    use arrow::datatypes::{DataType, Field};
+    use datafusion_expr::ColumnarValue;

    use crate::admin::reconcile_table::ReconcileTableFunction;
-    use crate::function::{AsyncFunction, FunctionContext};
+    use crate::function::FunctionContext;
+    use crate::function_factory::ScalarFunctionFactory;

    #[tokio::test]
    async fn test_reconcile_table() {
        common_telemetry::init_default_ut_logging();

        // reconcile_table(table_name)
-        let f = ReconcileTableFunction;
-        let args = vec![Arc::new(StringVector::from(vec!["test"])) as _];
-        let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
-        let expect: VectorRef = Arc::new(StringVector::from(vec!["test_pid"]));
-        assert_eq!(expect, result);
+        let factory: ScalarFunctionFactory = ReconcileTableFunction::factory().into();
+        let provider = factory.provide(FunctionContext::mock());
+        let f = provider.as_async().unwrap();
+
+        let func_args = datafusion::logical_expr::ScalarFunctionArgs {
+            args: vec![ColumnarValue::Array(Arc::new(StringArray::from(vec![
+                "test",
+            ])))],
+            arg_fields: vec![Arc::new(Field::new("arg_0", DataType::Utf8, false))],
+            return_field: Arc::new(Field::new("result", DataType::Utf8, true)),
+            number_rows: 1,
+            config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
+        };
+        let result = f.invoke_async_with_args(func_args).await.unwrap();
+        match result {
+            ColumnarValue::Array(array) => {
+                let result_array = array.as_any().downcast_ref::<StringArray>().unwrap();
+                assert_eq!(result_array.value(0), "test_pid");
+            }
+            ColumnarValue::Scalar(scalar) => {
+                assert_eq!(
+                    scalar,
+                    datafusion_common::ScalarValue::Utf8(Some("test_pid".to_string()))
+                );
+            }
+        }

        // reconcile_table(table_name, resolve_strategy)
-        let f = ReconcileTableFunction;
-        let args = vec![
-            Arc::new(StringVector::from(vec!["test"])) as _,
-            Arc::new(StringVector::from(vec!["UseMetasrv"])) as _,
-        ];
-        let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
-        let expect: VectorRef = Arc::new(StringVector::from(vec!["test_pid"]));
-        assert_eq!(expect, result);
+        let factory: ScalarFunctionFactory = ReconcileTableFunction::factory().into();
+        let provider = factory.provide(FunctionContext::mock());
+        let f = provider.as_async().unwrap();
+
+        let func_args = datafusion::logical_expr::ScalarFunctionArgs {
+            args: vec![
+                ColumnarValue::Array(Arc::new(StringArray::from(vec!["test"]))),
+                ColumnarValue::Array(Arc::new(StringArray::from(vec!["UseMetasrv"]))),
+            ],
+            arg_fields: vec![
+                Arc::new(Field::new("arg_0", DataType::Utf8, false)),
+                Arc::new(Field::new("arg_1", DataType::Utf8, false)),
+            ],
+            return_field: Arc::new(Field::new("result", DataType::Utf8, true)),
+            number_rows: 1,
+            config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
+        };
+        let result = f.invoke_async_with_args(func_args).await.unwrap();
+        match result {
+            ColumnarValue::Array(array) => {
+                let result_array = array.as_any().downcast_ref::<StringArray>().unwrap();
+                assert_eq!(result_array.value(0), "test_pid");
+            }
+            ColumnarValue::Scalar(scalar) => {
+                assert_eq!(
+                    scalar,
+                    datafusion_common::ScalarValue::Utf8(Some("test_pid".to_string()))
+                );
+            }
+        }

        // unsupported input data type
-        let f = ReconcileTableFunction;
-        let args = vec![
-            Arc::new(StringVector::from(vec!["test"])) as _,
-            Arc::new(StringVector::from(vec!["UseMetasrv"])) as _,
-            Arc::new(StringVector::from(vec!["10"])) as _,
-        ];
-        let err = f.eval(FunctionContext::mock(), &args).await.unwrap_err();
-        assert_matches!(err, Error::UnsupportedInputDataType { .. });
+        let factory: ScalarFunctionFactory = ReconcileTableFunction::factory().into();
+        let provider = factory.provide(FunctionContext::mock());
+        let f = provider.as_async().unwrap();
+
+        let func_args = datafusion::logical_expr::ScalarFunctionArgs {
+            args: vec![
+                ColumnarValue::Array(Arc::new(StringArray::from(vec!["test"]))),
+                ColumnarValue::Array(Arc::new(StringArray::from(vec!["UseMetasrv"]))),
+                ColumnarValue::Array(Arc::new(StringArray::from(vec!["10"]))),
+            ],
+            arg_fields: vec![
+                Arc::new(Field::new("arg_0", DataType::Utf8, false)),
+                Arc::new(Field::new("arg_1", DataType::Utf8, false)),
+                Arc::new(Field::new("arg_2", DataType::Utf8, false)),
+            ],
+            return_field: Arc::new(Field::new("result", DataType::Utf8, true)),
+            number_rows: 1,
+            config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
+        };
+        let _err = f.invoke_async_with_args(func_args).await.unwrap_err();
+        // Note: Error type is DataFusionError at this level, not common_query::Error
    }
 }
--- a/src/common/function/src/admin/remove_region_follower.rs
+++ b/src/common/function/src/admin/remove_region_follower.rs
@@ -18,7 +18,8 @@ use common_query::error::{
    InvalidFuncArgsSnafu, MissingProcedureServiceHandlerSnafu, Result,
    UnsupportedInputDataTypeSnafu,
 };
-use common_query::prelude::{Signature, TypeSignature, Volatility};
+use datafusion_expr::{Signature, TypeSignature, Volatility};
+use datatypes::data_type::DataType;
 use datatypes::prelude::ConcreteDataType;
 use datatypes::value::{Value, ValueRef};
 use session::context::QueryContextRef;
@@ -82,7 +83,13 @@ fn signature() -> Signature {
    Signature::one_of(
        vec![
            // remove_region_follower(region_id, peer_id)
-            TypeSignature::Uniform(2, ConcreteDataType::numerics()),
+            TypeSignature::Uniform(
+                2,
+                ConcreteDataType::numerics()
+                    .into_iter()
+                    .map(|dt| dt.as_arrow_type())
+                    .collect(),
+            ),
        ],
        Volatility::Immutable,
    )
@@ -92,38 +99,57 @@ fn signature() -> Signature {
 mod tests {
    use std::sync::Arc;

-    use common_query::prelude::TypeSignature;
-    use datatypes::vectors::{UInt64Vector, VectorRef};
+    use arrow::array::UInt64Array;
+    use arrow::datatypes::{DataType, Field};
+    use datafusion_expr::ColumnarValue;

    use super::*;
-    use crate::function::{AsyncFunction, FunctionContext};
+    use crate::function::FunctionContext;
+    use crate::function_factory::ScalarFunctionFactory;

    #[test]
    fn test_remove_region_follower_misc() {
-        let f = RemoveRegionFollowerFunction;
+        let factory: ScalarFunctionFactory = RemoveRegionFollowerFunction::factory().into();
+        let f = factory.provide(FunctionContext::mock());
        assert_eq!("remove_region_follower", f.name());
-        assert_eq!(
-            ConcreteDataType::uint64_datatype(),
-            f.return_type(&[]).unwrap()
-        );
+        assert_eq!(DataType::UInt64, f.return_type(&[]).unwrap());
        assert!(matches!(f.signature(),
-                         Signature {
-                             type_signature: TypeSignature::OneOf(sigs),
-                             volatility: Volatility::Immutable
+                         datafusion_expr::Signature {
+                             type_signature: datafusion_expr::TypeSignature::OneOf(sigs),
+                             volatility: datafusion_expr::Volatility::Immutable
                         } if sigs.len() == 1));
    }

    #[tokio::test]
    async fn test_remove_region_follower() {
-        let f = RemoveRegionFollowerFunction;
-        let args = vec![1, 1];
-        let args = args
-            .into_iter()
-            .map(|arg| Arc::new(UInt64Vector::from_slice([arg])) as _)
-            .collect::<Vec<_>>();
+        let factory: ScalarFunctionFactory = RemoveRegionFollowerFunction::factory().into();
+        let provider = factory.provide(FunctionContext::mock());
+        let f = provider.as_async().unwrap();

-        let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
-        let expect: VectorRef = Arc::new(UInt64Vector::from_slice([0u64]));
-        assert_eq!(result, expect);
+        let func_args = datafusion::logical_expr::ScalarFunctionArgs {
+            args: vec![
+                ColumnarValue::Array(Arc::new(UInt64Array::from(vec![1]))),
+                ColumnarValue::Array(Arc::new(UInt64Array::from(vec![1]))),
+            ],
+            arg_fields: vec![
+                Arc::new(Field::new("arg_0", DataType::UInt64, false)),
+                Arc::new(Field::new("arg_1", DataType::UInt64, false)),
+            ],
+            return_field: Arc::new(Field::new("result", DataType::UInt64, true)),
+            number_rows: 1,
+            config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
+        };
+
+        let result = f.invoke_async_with_args(func_args).await.unwrap();
+
+        match result {
+            ColumnarValue::Array(array) => {
+                let result_array = array.as_any().downcast_ref::<UInt64Array>().unwrap();
+                assert_eq!(result_array.value(0), 0u64);
+            }
+            ColumnarValue::Scalar(scalar) => {
+                assert_eq!(scalar, datafusion_common::ScalarValue::UInt64(Some(0)));
+            }
+        }
    }
 }
--- a/src/common/function/src/aggrs/aggr_wrapper.rs
+++ b/src/common/function/src/aggrs/aggr_wrapper.rs
@@ -25,14 +25,14 @@
 use std::sync::Arc;

 use arrow::array::StructArray;
-use arrow_schema::Fields;
+use arrow_schema::{FieldRef, Fields};
 use common_telemetry::debug;
 use datafusion::functions_aggregate::all_default_aggregate_functions;
 use datafusion::optimizer::analyzer::type_coercion::TypeCoercion;
 use datafusion::optimizer::AnalyzerRule;
 use datafusion::physical_planner::create_aggregate_expr_and_maybe_filter;
 use datafusion_common::{Column, ScalarValue};
-use datafusion_expr::expr::AggregateFunction;
+use datafusion_expr::expr::{AggregateFunction, AggregateFunctionParams};
 use datafusion_expr::function::StateFieldsArgs;
 use datafusion_expr::{
    Accumulator, Aggregate, AggregateUDF, AggregateUDFImpl, Expr, ExprSchemable, LogicalPlan,
@@ -41,7 +41,12 @@ use datafusion_expr::{
 use datafusion_physical_expr::aggregate::AggregateFunctionExpr;
 use datatypes::arrow::datatypes::{DataType, Field};

-use crate::function_registry::FunctionRegistry;
+use crate::aggrs::aggr_wrapper::fix_order::FixStateUdafOrderingAnalyzer;
+use crate::function_registry::{FunctionRegistry, FUNCTION_REGISTRY};
+
+pub mod fix_order;
+#[cfg(test)]
+mod tests;

 /// Returns the name of the state function for the given aggregate function name.
 /// The state function is used to compute the state of the aggregate function.
@@ -57,6 +62,39 @@ pub fn aggr_merge_func_name(aggr_name: &str) -> String {
    format!("__{}_merge", aggr_name)
 }

+/// Check if the given aggregate expression is steppable.
+/// As in if it can be split into multiple steps:
+/// i.e. on datanode first call `state(input)` then
+/// on frontend call `calc(merge(state))` to get the final result.
+pub fn is_all_aggr_exprs_steppable(aggr_exprs: &[Expr]) -> bool {
+    aggr_exprs.iter().all(|expr| {
+        if let Some(aggr_func) = get_aggr_func(expr) {
+            if aggr_func.params.distinct {
+                // Distinct aggregate functions are not steppable(yet).
+                // TODO(discord9): support distinct aggregate functions.
+                return false;
+            }
+
+            // whether the corresponding state function exists in the registry
+            FUNCTION_REGISTRY.is_aggr_func_exist(&aggr_state_func_name(aggr_func.func.name()))
+        } else {
+            false
+        }
+    })
+}
+
+pub fn get_aggr_func(expr: &Expr) -> Option<&datafusion_expr::expr::AggregateFunction> {
+    let mut expr_ref = expr;
+    while let Expr::Alias(alias) = expr_ref {
+        expr_ref = &alias.expr;
+    }
+    if let Expr::AggregateFunction(aggr_func) = expr_ref {
+        Some(aggr_func)
+    } else {
+        None
+    }
+}
+
 /// A wrapper to make an aggregate function out of the state and merge functions of the original aggregate function.
 /// It contains the original aggregate function, the state functions, and the merge function.
 ///
@@ -74,18 +112,6 @@ pub struct StepAggrPlan {
    pub lower_state: LogicalPlan,
 }

-pub fn get_aggr_func(expr: &Expr) -> Option<&datafusion_expr::expr::AggregateFunction> {
-    let mut expr_ref = expr;
-    while let Expr::Alias(alias) = expr_ref {
-        expr_ref = &alias.expr;
-    }
-    if let Expr::AggregateFunction(aggr_func) = expr_ref {
-        Some(aggr_func)
-    } else {
-        None
-    }
-}
-
 impl StateMergeHelper {
    /// Register all the `state` function of supported aggregate functions.
    /// Note that can't register `merge` function here, as it needs to be created from the original aggregate function with given input types.
@@ -118,6 +144,7 @@ impl StateMergeHelper {
    }

    /// Split an aggregate plan into two aggregate plans, one for the state function and one for the merge function.
+    ///
    pub fn split_aggr_node(aggr_plan: Aggregate) -> datafusion_common::Result<StepAggrPlan> {
        let aggr = {
            // certain aggr func need type coercion to work correctly, so we need to analyze the plan first.
@@ -137,6 +164,15 @@ impl StateMergeHelper {
        let mut lower_aggr_exprs = vec![];
        let mut upper_aggr_exprs = vec![];

+        // group exprs for upper plan should refer to the output group expr as column from lower plan
+        // to avoid re-compute group exprs again.
+        let upper_group_exprs = aggr
+            .group_expr
+            .iter()
+            .map(|c| c.qualified_name())
+            .map(|(r, c)| Expr::Column(Column::new(r, c)))
+            .collect();
+
        for aggr_expr in aggr.aggr_expr.iter() {
            let Some(aggr_func) = get_aggr_func(aggr_expr) else {
                return Err(datafusion_common::DataFusionError::NotImplemented(format!(
@@ -146,6 +182,7 @@ impl StateMergeHelper {
            };

            let original_input_types = aggr_func
+                .params
                .args
                .iter()
                .map(|e| e.get_type(&aggr.input.schema()))
@@ -156,17 +193,14 @@ impl StateMergeHelper {

            let expr = AggregateFunction {
                func: Arc::new(state_func.into()),
-                args: aggr_func.args.clone(),
-                distinct: aggr_func.distinct,
-                filter: aggr_func.filter.clone(),
-                order_by: aggr_func.order_by.clone(),
-                null_treatment: aggr_func.null_treatment,
+                params: aggr_func.params.clone(),
            };
            let expr = Expr::AggregateFunction(expr);
            let lower_state_output_col_name = expr.schema_name().to_string();

            lower_aggr_exprs.push(expr);

+            // then create the merge function using the physical expression of the original aggregate function
            let (original_phy_expr, _filter, _ordering) = create_aggregate_expr_and_maybe_filter(
                aggr_expr,
                aggr.input.schema(),
@@ -182,11 +216,16 @@ impl StateMergeHelper {
            let arg = Expr::Column(Column::new_unqualified(lower_state_output_col_name));
            let expr = AggregateFunction {
                func: Arc::new(merge_func.into()),
-                args: vec![arg],
-                distinct: aggr_func.distinct,
-                filter: aggr_func.filter.clone(),
-                order_by: aggr_func.order_by.clone(),
-                null_treatment: aggr_func.null_treatment,
+                // notice filter/order_by is not supported in the merge function, as it's not meaningful to have them in the merge phase.
+                // do notice this order by is only removed in the outer logical plan, the physical plan still have order by and hence
+                // can create correct accumulator with order by.
+                params: AggregateFunctionParams {
+                    args: vec![arg],
+                    distinct: aggr_func.params.distinct,
+                    filter: None,
+                    order_by: vec![],
+                    null_treatment: aggr_func.params.null_treatment,
+                },
            };

            // alias to the original aggregate expr's schema name, so parent plan can refer to it
@@ -202,10 +241,18 @@ impl StateMergeHelper {
        // update aggregate's output schema
        let lower_plan = lower_plan.recompute_schema()?;

-        let mut upper = aggr.clone();
+        // should only affect two udaf `first_value/last_value`
+        // which only them have meaningful order by field
+        let fixed_lower_plan =
+            FixStateUdafOrderingAnalyzer.analyze(lower_plan, &Default::default())?;
+
+        let upper = Aggregate::try_new(
+            Arc::new(fixed_lower_plan.clone()),
+            upper_group_exprs,
+            upper_aggr_exprs.clone(),
+        )?;
        let aggr_plan = LogicalPlan::Aggregate(aggr);
-        upper.aggr_expr = upper_aggr_exprs;
-        upper.input = Arc::new(lower_plan.clone());
+
        // upper schema's output schema should be the same as the original aggregate plan's output schema
        let upper_check = upper;
        let upper_plan = LogicalPlan::Aggregate(upper_check).recompute_schema()?;
@@ -217,7 +264,7 @@ impl StateMergeHelper {
        }

        Ok(StepAggrPlan {
-            lower_state: lower_plan,
+            lower_state: fixed_lower_plan,
            upper_merge: upper_plan,
        })
    }
@@ -228,13 +275,22 @@ impl StateMergeHelper {
 pub struct StateWrapper {
    inner: AggregateUDF,
    name: String,
+    /// Default to empty, might get fixed by analyzer later
+    ordering: Vec<FieldRef>,
+    /// Default to false, might get fixed by analyzer later
+    distinct: bool,
 }

 impl StateWrapper {
    /// `state_index`: The index of the state in the output of the state function.
    pub fn new(inner: AggregateUDF) -> datafusion_common::Result<Self> {
        let name = aggr_state_func_name(inner.name());
-        Ok(Self { inner, name })
+        Ok(Self {
+            inner,
+            name,
+            ordering: vec![],
+            distinct: false,
+        })
    }

    pub fn inner(&self) -> &AggregateUDF {
@@ -247,15 +303,20 @@ impl StateWrapper {
    pub fn deduce_aggr_return_type(
        &self,
        acc_args: &datafusion_expr::function::AccumulatorArgs,
-    ) -> datafusion_common::Result<DataType> {
-        let input_exprs = acc_args.exprs;
-        let input_schema = acc_args.schema;
-        let input_types = input_exprs
+    ) -> datafusion_common::Result<FieldRef> {
+        let input_fields = acc_args
+            .exprs
            .iter()
-            .map(|e| e.data_type(input_schema))
+            .map(|e| e.return_field(acc_args.schema))
            .collect::<Result<Vec<_>, _>>()?;
-        let return_type = self.inner.return_type(&input_types)?;
-        Ok(return_type)
+        self.inner.return_field(&input_fields).inspect_err(|e| {
+            common_telemetry::error!(
+                "StateWrapper: {:#?}\nacc_args:{:?}\nerror:{:?}",
+                &self,
+                &acc_args,
+                e
+            );
+        })
    }
 }

@@ -265,14 +326,13 @@ impl AggregateUDFImpl for StateWrapper {
        acc_args: datafusion_expr::function::AccumulatorArgs<'b>,
    ) -> datafusion_common::Result<Box<dyn Accumulator>> {
        // fix and recover proper acc args for the original aggregate function.
-        let state_type = acc_args.return_type.clone();
+        let state_type = acc_args.return_type().clone();
        let inner = {
-            let old_return_type = self.deduce_aggr_return_type(&acc_args)?;
            let acc_args = datafusion_expr::function::AccumulatorArgs {
-                return_type: &old_return_type,
+                return_field: self.deduce_aggr_return_type(&acc_args)?,
                schema: acc_args.schema,
                ignore_nulls: acc_args.ignore_nulls,
-                ordering_req: acc_args.ordering_req,
+                order_bys: acc_args.order_bys,
                is_reversed: acc_args.is_reversed,
                name: acc_args.name,
                is_distinct: acc_args.is_distinct,
@@ -280,6 +340,7 @@ impl AggregateUDFImpl for StateWrapper {
            };
            self.inner.accumulator(acc_args)?
        };
+
        Ok(Box::new(StateAccum::new(inner, state_type)?))
    }

@@ -297,16 +358,31 @@ impl AggregateUDFImpl for StateWrapper {
    /// Return state_fields as the output struct type.
    ///
    fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result<DataType> {
-        let old_return_type = self.inner.return_type(arg_types)?;
+        let input_fields = &arg_types
+            .iter()
+            .map(|x| Arc::new(Field::new("x", x.clone(), false)))
+            .collect::<Vec<_>>();
+
        let state_fields_args = StateFieldsArgs {
            name: self.inner().name(),
-            input_types: arg_types,
-            return_type: &old_return_type,
-            // TODO(discord9): how to get this?, probably ok?
-            ordering_fields: &[],
-            is_distinct: false,
+            input_fields,
+            return_field: self.inner.return_field(input_fields)?,
+            // those args are also needed as they are vital to construct the state fields correctly.
+            ordering_fields: &self.ordering,
+            is_distinct: self.distinct,
        };
        let state_fields = self.inner.state_fields(state_fields_args)?;
+
+        let state_fields = state_fields
+            .into_iter()
+            .map(|f| {
+                let mut f = f.as_ref().clone();
+                // since state can be null when no input rows, so make all fields nullable
+                f.set_nullable(true);
+                Arc::new(f)
+            })
+            .collect::<Vec<_>>();
+
        let struct_field = DataType::Struct(state_fields.into());
        Ok(struct_field)
    }
@@ -315,12 +391,11 @@ impl AggregateUDFImpl for StateWrapper {
    fn state_fields(
        &self,
        args: datafusion_expr::function::StateFieldsArgs,
-    ) -> datafusion_common::Result<Vec<Field>> {
-        let old_return_type = self.inner.return_type(args.input_types)?;
+    ) -> datafusion_common::Result<Vec<FieldRef>> {
        let state_fields_args = StateFieldsArgs {
            name: args.name,
-            input_types: args.input_types,
-            return_type: &old_return_type,
+            input_fields: args.input_fields,
+            return_field: self.inner.return_field(args.input_fields)?,
            ordering_fields: args.ordering_fields,
            is_distinct: args.is_distinct,
        };
@@ -372,6 +447,39 @@ impl Accumulator for StateAccum {
            .iter()
            .map(|s| s.to_array())
            .collect::<Result<Vec<_>, _>>()?;
+        let array_type = array
+            .iter()
+            .map(|a| a.data_type().clone())
+            .collect::<Vec<_>>();
+        let expected_type: Vec<_> = self
+            .state_fields
+            .iter()
+            .map(|f| f.data_type().clone())
+            .collect();
+        if array_type != expected_type {
+            debug!(
+                "State mismatch, expected: {}, got: {} for expected fields: {:?} and given array types: {:?}",
+                self.state_fields.len(),
+                array.len(),
+                self.state_fields,
+                array_type,
+            );
+            let guess_schema = array
+                .iter()
+                .enumerate()
+                .map(|(index, array)| {
+                    Field::new(
+                        format!("col_{index}[mismatch_state]").as_str(),
+                        array.data_type().clone(),
+                        true,
+                    )
+                })
+                .collect::<Fields>();
+            let arr = StructArray::try_new(guess_schema, array, None)?;
+
+            return Ok(ScalarValue::Struct(Arc::new(arr)));
+        }
+
        let struct_array = StructArray::try_new(self.state_fields.clone(), array, None)?;
        Ok(ScalarValue::Struct(Arc::new(struct_array)))
    }
@@ -410,7 +518,7 @@ pub struct MergeWrapper {
    merge_signature: Signature,
    /// The original physical expression of the aggregate function, can't store the original aggregate function directly, as PhysicalExpr didn't implement Any
    original_phy_expr: Arc<AggregateFunctionExpr>,
-    original_input_types: Vec<DataType>,
+    return_type: DataType,
 }
 impl MergeWrapper {
    pub fn new(
@@ -421,13 +529,14 @@ impl MergeWrapper {
        let name = aggr_merge_func_name(inner.name());
        // the input type is actually struct type, which is the state fields of the original aggregate function.
        let merge_signature = Signature::user_defined(datafusion_expr::Volatility::Immutable);
+        let return_type = inner.return_type(&original_input_types)?;

        Ok(Self {
            inner,
            name,
            merge_signature,
            original_phy_expr,
-            original_input_types,
+            return_type,
        })
    }

@@ -479,8 +588,7 @@ impl AggregateUDFImpl for MergeWrapper {
    /// so return fixed return type instead of using `arg_types` to determine the return type.
    fn return_type(&self, _arg_types: &[DataType]) -> datafusion_common::Result<DataType> {
        // The return type is the same as the original aggregate function's return type.
-        let ret_type = self.inner.return_type(&self.original_input_types)?;
-        Ok(ret_type)
+        Ok(self.return_type.clone())
    }
    fn signature(&self) -> &Signature {
        &self.merge_signature
@@ -502,7 +610,7 @@ impl AggregateUDFImpl for MergeWrapper {
    fn state_fields(
        &self,
        _args: datafusion_expr::function::StateFieldsArgs,
-    ) -> datafusion_common::Result<Vec<Field>> {
+    ) -> datafusion_common::Result<Vec<FieldRef>> {
        self.original_phy_expr.state_fields()
    }
 }
@@ -550,10 +658,11 @@ impl Accumulator for MergeAccum {
            })?;
        let fields = struct_arr.fields();
        if fields != &self.state_fields {
-            return Err(datafusion_common::DataFusionError::Internal(format!(
-                "Expected state fields: {:?}, got: {:?}",
+            debug!(
+                "State fields mismatch, expected: {:?}, got: {:?}",
                self.state_fields, fields
-            )));
+            );
+            // state fields mismatch might be acceptable by datafusion, continue
        }

        // now fields should be the same, so we can merge the batch
@@ -570,6 +679,3 @@ impl Accumulator for MergeAccum {
        self.inner.state()
    }
 }
-
-#[cfg(test)]
-mod tests;
--- a/src/common/function/src/aggrs/aggr_wrapper/fix_order.rs
+++ b/src/common/function/src/aggrs/aggr_wrapper/fix_order.rs
@@ -0,0 +1,189 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::sync::Arc;
+
+use common_telemetry::debug;
+use datafusion::config::ConfigOptions;
+use datafusion::optimizer::AnalyzerRule;
+use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter};
+use datafusion_expr::{AggregateUDF, Expr, ExprSchemable, LogicalPlan};
+
+use crate::aggrs::aggr_wrapper::StateWrapper;
+
+/// Traverse the plan, found all `__<aggr_name>_state` and fix their ordering fields
+/// if their input aggr is with order by, this is currently only useful for `first_value` and `last_value` udaf
+///
+/// should be applied to datanode's query engine
+/// TODO(discord9): proper way to extend substrait's serde ability to allow carry more info for custom udaf with more info
+#[derive(Debug, Default)]
+pub struct FixStateUdafOrderingAnalyzer;
+
+impl AnalyzerRule for FixStateUdafOrderingAnalyzer {
+    fn name(&self) -> &str {
+        "FixStateUdafOrderingAnalyzer"
+    }
+
+    fn analyze(
+        &self,
+        plan: LogicalPlan,
+        _config: &ConfigOptions,
+    ) -> datafusion_common::Result<LogicalPlan> {
+        plan.rewrite_with_subqueries(&mut FixOrderingRewriter::new(true))
+            .map(|t| t.data)
+    }
+}
+
+/// Traverse the plan, found all `__<aggr_name>_state` and remove their ordering fields
+/// this is currently only useful for `first_value` and `last_value` udaf when need to encode to substrait
+///
+#[derive(Debug, Default)]
+pub struct UnFixStateUdafOrderingAnalyzer;
+
+impl AnalyzerRule for UnFixStateUdafOrderingAnalyzer {
+    fn name(&self) -> &str {
+        "UnFixStateUdafOrderingAnalyzer"
+    }
+
+    fn analyze(
+        &self,
+        plan: LogicalPlan,
+        _config: &ConfigOptions,
+    ) -> datafusion_common::Result<LogicalPlan> {
+        plan.rewrite_with_subqueries(&mut FixOrderingRewriter::new(false))
+            .map(|t| t.data)
+    }
+}
+
+struct FixOrderingRewriter {
+    /// once fixed, mark dirty, and always recompute schema from bottom up
+    is_dirty: bool,
+    /// if true, will add the ordering field from outer aggr expr
+    /// if false, will remove the ordering field
+    is_fix: bool,
+}
+
+impl FixOrderingRewriter {
+    pub fn new(is_fix: bool) -> Self {
+        Self {
+            is_dirty: false,
+            is_fix,
+        }
+    }
+}
+
+impl TreeNodeRewriter for FixOrderingRewriter {
+    type Node = LogicalPlan;
+
+    /// found all `__<aggr_name>_state` and fix their ordering fields
+    /// if their input aggr is with order by
+    fn f_up(
+        &mut self,
+        node: Self::Node,
+    ) -> datafusion_common::Result<datafusion_common::tree_node::Transformed<Self::Node>> {
+        let LogicalPlan::Aggregate(mut aggregate) = node else {
+            return if self.is_dirty {
+                let node = node.recompute_schema()?;
+                Ok(Transformed::yes(node))
+            } else {
+                Ok(Transformed::no(node))
+            };
+        };
+
+        // regex to match state udaf name
+        for aggr_expr in &mut aggregate.aggr_expr {
+            let new_aggr_expr = aggr_expr
+                .clone()
+                .transform_up(|expr| rewrite_expr(expr, &aggregate.input, self.is_fix))?;
+
+            if new_aggr_expr.transformed {
+                *aggr_expr = new_aggr_expr.data;
+                self.is_dirty = true;
+            }
+        }
+
+        if self.is_dirty {
+            let node = LogicalPlan::Aggregate(aggregate).recompute_schema()?;
+            debug!(
+                "FixStateUdafOrderingAnalyzer: plan schema's field changed to {:?}",
+                node.schema().fields()
+            );
+
+            Ok(Transformed::yes(node))
+        } else {
+            Ok(Transformed::no(LogicalPlan::Aggregate(aggregate)))
+        }
+    }
+}
+
+/// first see the aggr node in expr
+/// as it could be nested aggr like alias(aggr(sort))
+/// if contained aggr expr have a order by, and the aggr name match the regex
+/// then we need to fix the ordering field of the state udaf
+/// to be the same as the aggr expr
+fn rewrite_expr(
+    expr: Expr,
+    aggregate_input: &Arc<LogicalPlan>,
+    is_fix: bool,
+) -> Result<Transformed<Expr>, datafusion_common::DataFusionError> {
+    let Expr::AggregateFunction(aggregate_function) = expr else {
+        return Ok(Transformed::no(expr));
+    };
+
+    let Some(old_state_wrapper) = aggregate_function
+        .func
+        .inner()
+        .as_any()
+        .downcast_ref::<StateWrapper>()
+    else {
+        return Ok(Transformed::no(Expr::AggregateFunction(aggregate_function)));
+    };
+
+    let mut state_wrapper = old_state_wrapper.clone();
+    if is_fix {
+        // then always fix the ordering field&distinct flag and more
+        let order_by = aggregate_function.params.order_by.clone();
+        let ordering_fields: Vec<_> = order_by
+            .iter()
+            .map(|sort_expr| {
+                sort_expr
+                    .expr
+                    .to_field(&aggregate_input.schema())
+                    .map(|(_, f)| f)
+            })
+            .collect::<datafusion_common::Result<Vec<_>>>()?;
+        let distinct = aggregate_function.params.distinct;
+
+        // fixing up
+        state_wrapper.ordering = ordering_fields;
+        state_wrapper.distinct = distinct;
+    } else {
+        // remove the ordering field & distinct flag
+        state_wrapper.ordering = vec![];
+        state_wrapper.distinct = false;
+    }
+
+    debug!(
+        "FixStateUdafOrderingAnalyzer: fix state udaf from {old_state_wrapper:?} to {:?}",
+        state_wrapper
+    );
+
+    let mut aggregate_function = aggregate_function;
+
+    aggregate_function.func = Arc::new(AggregateUDF::new_from_impl(state_wrapper));
+
+    Ok(Transformed::yes(Expr::AggregateFunction(
+        aggregate_function,
+    )))
+}
--- a/src/common/function/src/aggrs/aggr_wrapper/tests.rs
+++ b/src/common/function/src/aggrs/aggr_wrapper/tests.rs
@@ -17,13 +17,15 @@ use std::pin::Pin;
 use std::sync::{Arc, Mutex};
 use std::task::{Context, Poll};

-use arrow::array::{ArrayRef, Float64Array, Int64Array, UInt64Array};
+use arrow::array::{ArrayRef, BooleanArray, Float64Array, Int64Array, UInt64Array};
 use arrow::record_batch::RecordBatch;
 use arrow_schema::SchemaRef;
+use common_telemetry::init_default_ut_logging;
 use datafusion::catalog::{Session, TableProvider};
 use datafusion::datasource::DefaultTableSource;
 use datafusion::execution::{RecordBatchStream, SendableRecordBatchStream, TaskContext};
 use datafusion::functions_aggregate::average::avg_udaf;
+use datafusion::functions_aggregate::count::count_udaf;
 use datafusion::functions_aggregate::sum::sum_udaf;
 use datafusion::optimizer::analyzer::type_coercion::TypeCoercion;
 use datafusion::optimizer::AnalyzerRule;
@@ -35,7 +37,7 @@ use datafusion::prelude::SessionContext;
 use datafusion_common::{Column, TableReference};
 use datafusion_expr::expr::AggregateFunction;
 use datafusion_expr::sqlparser::ast::NullTreatment;
-use datafusion_expr::{Aggregate, Expr, LogicalPlan, SortExpr, TableScan};
+use datafusion_expr::{lit, Aggregate, Expr, LogicalPlan, SortExpr, TableScan};
 use datafusion_physical_expr::aggregate::AggregateExprBuilder;
 use datafusion_physical_expr::{EquivalenceProperties, Partitioning};
 use datatypes::arrow_array::StringArray;
@@ -234,7 +236,7 @@ async fn test_sum_udaf() {
            vec![Expr::Column(Column::new_unqualified("number"))],
            false,
            None,
-            None,
+            vec![],
            None,
        ))],
    )
@@ -250,7 +252,7 @@ async fn test_sum_udaf() {
                vec![Expr::Column(Column::new_unqualified("number"))],
                false,
                None,
-                None,
+                vec![],
                None,
            ))],
        )
@@ -290,7 +292,7 @@ async fn test_sum_udaf() {
                vec![Expr::Column(Column::new_unqualified("__sum_state(number)"))],
                false,
                None,
-                None,
+                vec![],
                None,
            ))
            .alias("sum(number)")],
@@ -378,7 +380,7 @@ async fn test_avg_udaf() {
            vec![Expr::Column(Column::new_unqualified("number"))],
            false,
            None,
-            None,
+            vec![],
            None,
        ))],
    )
@@ -395,7 +397,7 @@ async fn test_avg_udaf() {
                vec![Expr::Column(Column::new_unqualified("number"))],
                false,
                None,
-                None,
+                vec![],
                None,
            ))],
        )
@@ -449,7 +451,7 @@ async fn test_avg_udaf() {
                vec![Expr::Column(Column::new_unqualified("__avg_state(number)"))],
                false,
                None,
-                None,
+                vec![],
                None,
            ))
            .alias("avg(number)")],
@@ -537,6 +539,208 @@ async fn test_avg_udaf() {
    assert_eq!(merge_eval_res, ScalarValue::Float64(Some(132. / 45_f64)));
 }

+#[tokio::test]
+async fn test_last_value_order_by_udaf() {
+    init_default_ut_logging();
+    let ctx = SessionContext::new();
+
+    let last_value = datafusion::functions_aggregate::first_last::last_value_udaf();
+    let last_value = (*last_value).clone();
+
+    let original_aggr = Aggregate::try_new(
+        Arc::new(dummy_table_scan()),
+        vec![],
+        vec![Expr::AggregateFunction(AggregateFunction::new_udf(
+            Arc::new(last_value.clone()),
+            vec![Expr::Column(Column::new_unqualified("number"))],
+            false,
+            None,
+            vec![datafusion_expr::expr::Sort::new(
+                Expr::Column(Column::new_unqualified("number")),
+                true,
+                true,
+            )],
+            None,
+        ))],
+    )
+    .unwrap();
+    let res = StateMergeHelper::split_aggr_node(original_aggr).unwrap();
+
+    let state_func: Arc<AggregateUDF> =
+        Arc::new(StateWrapper::new(last_value.clone()).unwrap().into());
+
+    let expected_aggr_state_plan = LogicalPlan::Aggregate(
+        Aggregate::try_new(
+            Arc::new(dummy_table_scan()),
+            vec![],
+            vec![Expr::AggregateFunction(AggregateFunction::new_udf(
+                state_func,
+                vec![Expr::Column(Column::new_unqualified("number"))],
+                false,
+                None,
+                vec![datafusion_expr::expr::Sort::new(
+                    Expr::Column(Column::new_unqualified("number")),
+                    true,
+                    true,
+                )],
+                None,
+            ))],
+        )
+        .unwrap(),
+    );
+    // fix the ordering & distinct info of the state udaf, as they are not set in the wrapper.
+    let fixed_aggr_state_plan = FixStateUdafOrderingAnalyzer {}
+        .analyze(expected_aggr_state_plan.clone(), &Default::default())
+        .unwrap();
+
+    assert_eq!(&res.lower_state, &fixed_aggr_state_plan);
+
+    // schema is the state fields of the last_value udaf
+    assert_eq!(
+        res.lower_state.schema().as_arrow(),
+        &arrow_schema::Schema::new(vec![Field::new(
+            "__last_value_state(number) ORDER BY [number ASC NULLS FIRST]",
+            DataType::Struct(
+                vec![
+                    Field::new("last_value[last_value]", DataType::Int64, true),
+                    Field::new("number", DataType::Int64, true), // ordering field is added to state fields too
+                    Field::new("is_set", DataType::Boolean, true)
+                ]
+                .into()
+            ),
+            true,
+        )])
+    );
+
+    let expected_merge_fn = MergeWrapper::new(
+        last_value.clone(),
+        Arc::new(
+            AggregateExprBuilder::new(
+                Arc::new(last_value.clone()),
+                vec![Arc::new(
+                    datafusion::physical_expr::expressions::Column::new("number", 0),
+                )],
+            )
+            .schema(Arc::new(dummy_table_scan().schema().as_arrow().clone()))
+            .alias("last_value(number) ORDER BY [number ASC NULLS FIRST]")
+            .build()
+            .unwrap(),
+        ),
+        vec![DataType::Int64],
+    )
+    .unwrap();
+
+    let expected_merge_plan = LogicalPlan::Aggregate(
+        Aggregate::try_new(
+            Arc::new(fixed_aggr_state_plan.clone()),
+            vec![],
+            vec![Expr::AggregateFunction(AggregateFunction::new_udf(
+                Arc::new(expected_merge_fn.into()),
+                vec![Expr::Column(Column::new_unqualified(
+                    "__last_value_state(number) ORDER BY [number ASC NULLS FIRST]",
+                ))],
+                false,
+                None,
+                vec![],
+                None,
+            ))
+            .alias("last_value(number) ORDER BY [number ASC NULLS FIRST]")],
+        )
+        .unwrap(),
+    );
+    assert_eq!(&res.upper_merge, &expected_merge_plan);
+
+    let phy_aggr_state_plan = DefaultPhysicalPlanner::default()
+        .create_physical_plan(&fixed_aggr_state_plan, &ctx.state())
+        .await
+        .unwrap();
+    let aggr_exec = phy_aggr_state_plan
+        .as_any()
+        .downcast_ref::<AggregateExec>()
+        .unwrap();
+    let aggr_func_expr = &aggr_exec.aggr_expr()[0];
+
+    let mut state_accum = aggr_func_expr.create_accumulator().unwrap();
+
+    // evaluate the state function
+    let input = Int64Array::from(vec![Some(1), Some(2), None, Some(3)]);
+    let values = vec![Arc::new(input) as arrow::array::ArrayRef];
+
+    state_accum.update_batch(&values).unwrap();
+
+    let state = state_accum.state().unwrap();
+
+    // FIXME(discord9): once datafusion fixes the issue that last_value udaf state fields are not correctly(missing ordering field if `last` field is part of ordering field)
+    // then change it back to 3 fields
+    assert_eq!(state.len(), 2); // last value weird optimization(or maybe bug?) that it only has 2 state fields now
+    assert_eq!(state[0], ScalarValue::Int64(Some(3)));
+    assert_eq!(state[1], ScalarValue::Boolean(Some(true)));
+
+    let eval_res = state_accum.evaluate().unwrap();
+    let expected = Arc::new(
+        StructArray::try_new(
+            vec![
+                Field::new("col_0[mismatch_state]", DataType::Int64, true),
+                Field::new("col_1[mismatch_state]", DataType::Boolean, true),
+                // Field::new("last_value[last_value]", DataType::Int64, true),
+                // Field::new("number", DataType::Int64, true),
+                // Field::new("is_set", DataType::Boolean, true),
+            ]
+            .into(),
+            vec![
+                Arc::new(Int64Array::from(vec![Some(3)])),
+                // Arc::new(Int64Array::from(vec![Some(3)])),
+                Arc::new(BooleanArray::from(vec![Some(true)])),
+            ],
+            None,
+        )
+        .unwrap(),
+    );
+    assert_eq!(eval_res, ScalarValue::Struct(expected));
+
+    let phy_aggr_merge_plan = DefaultPhysicalPlanner::default()
+        .create_physical_plan(&res.upper_merge, &ctx.state())
+        .await
+        .unwrap();
+    let aggr_exec = phy_aggr_merge_plan
+        .as_any()
+        .downcast_ref::<AggregateExec>()
+        .unwrap();
+    let aggr_func_expr = &aggr_exec.aggr_expr()[0];
+
+    let mut merge_accum = aggr_func_expr.create_accumulator().unwrap();
+
+    let merge_input = vec![
+        Arc::new(Int64Array::from(vec![Some(3), Some(4)])) as arrow::array::ArrayRef,
+        Arc::new(Int64Array::from(vec![Some(3), Some(4)])),
+        Arc::new(BooleanArray::from(vec![Some(true), Some(true)])),
+    ];
+    let merge_input_struct_arr = StructArray::try_new(
+        vec![
+            Field::new("last_value[last_value]", DataType::Int64, true),
+            Field::new("number", DataType::Int64, true),
+            Field::new("is_set", DataType::Boolean, true),
+        ]
+        .into(),
+        merge_input,
+        None,
+    )
+    .unwrap();
+
+    merge_accum
+        .update_batch(&[Arc::new(merge_input_struct_arr)])
+        .unwrap();
+    let merge_state = merge_accum.state().unwrap();
+    assert_eq!(merge_state.len(), 3);
+    assert_eq!(merge_state[0], ScalarValue::Int64(Some(4)));
+    assert_eq!(merge_state[1], ScalarValue::Int64(Some(4)));
+    assert_eq!(merge_state[2], ScalarValue::Boolean(Some(true)));
+
+    let merge_eval_res = merge_accum.evaluate().unwrap();
+    // the merge function returns the last value, which is 4
+    assert_eq!(merge_eval_res, ScalarValue::Int64(Some(4)));
+}
+
 /// For testing whether the UDAF state fields are correctly implemented.
 /// esp. for our own custom UDAF's state fields.
 /// By compare eval results before and after split to state/merge functions.
@@ -548,10 +752,11 @@ async fn test_udaf_correct_eval_result() {
        input_schema: SchemaRef,
        input: Vec<ArrayRef>,
        expected_output: Option<ScalarValue>,
+        // extra check function on the final array result
        expected_fn: Option<ExpectedFn>,
        distinct: bool,
        filter: Option<Box<Expr>>,
-        order_by: Option<Vec<SortExpr>>,
+        order_by: Vec<SortExpr>,
        null_treatment: Option<NullTreatment>,
    }
    type ExpectedFn = fn(ArrayRef) -> bool;
@@ -575,7 +780,28 @@ async fn test_udaf_correct_eval_result() {
            expected_fn: None,
            distinct: false,
            filter: None,
-            order_by: None,
+            order_by: vec![],
+            null_treatment: None,
+        },
+        TestCase {
+            func: count_udaf(),
+            input_schema: Arc::new(arrow_schema::Schema::new(vec![Field::new(
+                "str_val",
+                DataType::Utf8,
+                true,
+            )])),
+            args: vec![Expr::Column(Column::new_unqualified("str_val"))],
+            input: vec![Arc::new(StringArray::from(vec![
+                Some("hello"),
+                Some("world"),
+                None,
+                Some("what"),
+            ]))],
+            expected_output: Some(ScalarValue::Int64(Some(3))),
+            expected_fn: None,
+            distinct: false,
+            filter: None,
+            order_by: vec![],
            null_treatment: None,
        },
        TestCase {
@@ -596,7 +822,7 @@ async fn test_udaf_correct_eval_result() {
            expected_fn: None,
            distinct: false,
            filter: None,
-            order_by: None,
+            order_by: vec![],
            null_treatment: None,
        },
        TestCase {
@@ -619,7 +845,7 @@ async fn test_udaf_correct_eval_result() {
            expected_fn: None,
            distinct: false,
            filter: None,
-            order_by: None,
+            order_by: vec![],
            null_treatment: None,
        },
        TestCase {
@@ -630,8 +856,8 @@ async fn test_udaf_correct_eval_result() {
                true,
            )])),
            args: vec![
-                Expr::Literal(ScalarValue::Int64(Some(128))),
-                Expr::Literal(ScalarValue::Float64(Some(0.05))),
+                lit(128i64),
+                lit(0.05f64),
                Expr::Column(Column::new_unqualified("number")),
            ],
            input: vec![Arc::new(Float64Array::from(vec![
@@ -659,7 +885,7 @@ async fn test_udaf_correct_eval_result() {
            }),
            distinct: false,
            filter: None,
-            order_by: None,
+            order_by: vec![],
            null_treatment: None,
        },
        TestCase {
@@ -690,7 +916,7 @@ async fn test_udaf_correct_eval_result() {
            }),
            distinct: false,
            filter: None,
-            order_by: None,
+            order_by: vec![],
            null_treatment: None,
        },
        // TODO(discord9): udd_merge/hll_merge/geo_path/quantile_aggr tests
--- a/src/common/function/src/aggrs/count_hash.rs
+++ b/src/common/function/src/aggrs/count_hash.rs
@@ -41,7 +41,7 @@ use datatypes::arrow::array::{
    Array, ArrayRef, AsArray, BooleanArray, Int64Array, ListArray, UInt64Array,
 };
 use datatypes::arrow::buffer::{OffsetBuffer, ScalarBuffer};
-use datatypes::arrow::datatypes::{DataType, Field};
+use datatypes::arrow::datatypes::{DataType, Field, FieldRef};

 use crate::function_registry::FunctionRegistry;

@@ -94,14 +94,14 @@ impl AggregateUDFImpl for CountHash {
        false
    }

-    fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<Field>> {
-        Ok(vec![Field::new_list(
+    fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<FieldRef>> {
+        Ok(vec![Arc::new(Field::new_list(
            format_state_name(args.name, "count_hash"),
            Field::new_list_field(DataType::UInt64, true),
            // For count_hash accumulator, null list item stands for an
            // empty value set (i.e., all NULL value so far for that group).
            true,
-        )])
+        ))])
    }

    fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
--- a/src/common/function/src/aggrs/geo.rs
+++ b/src/common/function/src/aggrs/geo.rs
@@ -21,7 +21,7 @@ pub(crate) struct GeoFunction;

 impl GeoFunction {
    pub fn register(registry: &FunctionRegistry) {
+        registry.register_aggr(encoding::JsonEncodePathAccumulator::uadf_impl());
        registry.register_aggr(geo_path::GeoPathAccumulator::uadf_impl());
-        registry.register_aggr(encoding::JsonPathAccumulator::uadf_impl());
    }
 }
--- a/src/common/function/src/aggrs/geo/encoding.rs
+++ b/src/common/function/src/aggrs/geo/encoding.rs
@@ -14,223 +14,332 @@

 use std::sync::Arc;

-use common_error::ext::{BoxedError, PlainError};
-use common_error::status_code::StatusCode;
-use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
-use common_query::error::{self, InvalidInputStateSnafu, Result};
-use common_query::logical_plan::accumulator::AggrFuncTypeStore;
-use common_query::logical_plan::{
-    create_aggregate_function, Accumulator, AggregateFunctionCreator,
+use arrow::array::AsArray;
+use datafusion::arrow::array::{Array, ArrayRef};
+use datafusion::common::cast::as_primitive_array;
+use datafusion::error::{DataFusionError, Result as DfResult};
+use datafusion::logical_expr::{Accumulator as DfAccumulator, AggregateUDF, Volatility};
+use datafusion::prelude::create_udaf;
+use datafusion_common::cast::{as_list_array, as_struct_array};
+use datafusion_common::ScalarValue;
+use datatypes::arrow::array::{Float64Array, Int64Array, ListArray, StructArray};
+use datatypes::arrow::datatypes::{
+    DataType, Field, Float64Type, Int64Type, TimeUnit, TimestampNanosecondType,
 };
-use common_query::prelude::AccumulatorCreatorFunction;
-use common_time::Timestamp;
-use datafusion_expr::AggregateUDF;
-use datatypes::prelude::ConcreteDataType;
-use datatypes::value::{ListValue, Value};
-use datatypes::vectors::VectorRef;
-use snafu::{ensure, ResultExt};
+use datatypes::compute::{self, sort_to_indices};

-use crate::scalars::geo::helpers::{ensure_columns_len, ensure_columns_n};
+pub const JSON_ENCODE_PATH_NAME: &str = "json_encode_path";

-/// Accumulator of lat, lng, timestamp tuples
-#[derive(Debug)]
-pub struct JsonPathAccumulator {
-    timestamp_type: ConcreteDataType,
+const LATITUDE_FIELD: &str = "lat";
+const LONGITUDE_FIELD: &str = "lng";
+const TIMESTAMP_FIELD: &str = "timestamp";
+const DEFAULT_LIST_FIELD_NAME: &str = "item";
+
+#[derive(Debug, Default)]
+pub struct JsonEncodePathAccumulator {
    lat: Vec<Option<f64>>,
    lng: Vec<Option<f64>>,
-    timestamp: Vec<Option<Timestamp>>,
+    timestamp: Vec<Option<i64>>,
 }

-impl JsonPathAccumulator {
-    fn new(timestamp_type: ConcreteDataType) -> Self {
-        Self {
-            lat: Vec::default(),
-            lng: Vec::default(),
-            timestamp: Vec::default(),
-            timestamp_type,
-        }
+impl JsonEncodePathAccumulator {
+    pub fn new() -> Self {
+        Self::default()
    }

-    /// Create a new `AggregateUDF` for the `json_encode_path` aggregate function.
    pub fn uadf_impl() -> AggregateUDF {
-        create_aggregate_function(
-            "json_encode_path".to_string(),
-            3,
-            Arc::new(JsonPathEncodeFunctionCreator::default()),
+        create_udaf(
+            JSON_ENCODE_PATH_NAME,
+            // Input types: lat, lng, timestamp
+            vec![
+                DataType::Float64,
+                DataType::Float64,
+                DataType::Timestamp(TimeUnit::Nanosecond, None),
+            ],
+            // Output type: geojson compatible linestring
+            Arc::new(DataType::Utf8),
+            Volatility::Immutable,
+            // Create the accumulator
+            Arc::new(|_| Ok(Box::new(Self::new()))),
+            // Intermediate state types
+            Arc::new(vec![DataType::Struct(
+                vec![
+                    Field::new(
+                        LATITUDE_FIELD,
+                        DataType::List(Arc::new(Field::new(
+                            DEFAULT_LIST_FIELD_NAME,
+                            DataType::Float64,
+                            true,
+                        ))),
+                        false,
+                    ),
+                    Field::new(
+                        LONGITUDE_FIELD,
+                        DataType::List(Arc::new(Field::new(
+                            DEFAULT_LIST_FIELD_NAME,
+                            DataType::Float64,
+                            true,
+                        ))),
+                        false,
+                    ),
+                    Field::new(
+                        TIMESTAMP_FIELD,
+                        DataType::List(Arc::new(Field::new(
+                            DEFAULT_LIST_FIELD_NAME,
+                            DataType::Int64,
+                            true,
+                        ))),
+                        false,
+                    ),
+                ]
+                .into(),
+            )]),
        )
-        .into()
    }
 }

-impl Accumulator for JsonPathAccumulator {
-    fn state(&self) -> Result<Vec<Value>> {
-        Ok(vec![
-            Value::List(ListValue::new(
-                self.lat.iter().map(|i| Value::from(*i)).collect(),
-                ConcreteDataType::float64_datatype(),
-            )),
-            Value::List(ListValue::new(
-                self.lng.iter().map(|i| Value::from(*i)).collect(),
-                ConcreteDataType::float64_datatype(),
-            )),
-            Value::List(ListValue::new(
-                self.timestamp.iter().map(|i| Value::from(*i)).collect(),
-                self.timestamp_type.clone(),
-            )),
-        ])
-    }
+impl DfAccumulator for JsonEncodePathAccumulator {
+    fn update_batch(&mut self, values: &[ArrayRef]) -> datafusion::error::Result<()> {
+        if values.len() != 3 {
+            return Err(DataFusionError::Internal(format!(
+                "Expected 3 columns for json_encode_path, got {}",
+                values.len()
+            )));
+        }

-    fn update_batch(&mut self, columns: &[VectorRef]) -> Result<()> {
-        // update batch as in datafusion just provides the accumulator original
-        //  input.
-        //
-        // columns is vec of [`lat`, `lng`, `timestamp`]
-        // where
-        // - `lat` is a vector of `Value::Float64` or similar type. Each item in
-        //  the vector is a row in given dataset.
-        // - so on so forth for `lng` and `timestamp`
-        ensure_columns_n!(columns, 3);
+        let lat_array = as_primitive_array::<Float64Type>(&values[0])?;
+        let lng_array = as_primitive_array::<Float64Type>(&values[1])?;
+        let ts_array = as_primitive_array::<TimestampNanosecondType>(&values[2])?;

-        let lat = &columns[0];
-        let lng = &columns[1];
-        let ts = &columns[2];
-
-        let size = lat.len();
+        let size = lat_array.len();
+        self.lat.reserve(size);
+        self.lng.reserve(size);

        for idx in 0..size {
-            self.lat.push(lat.get(idx).as_f64_lossy());
-            self.lng.push(lng.get(idx).as_f64_lossy());
-            self.timestamp.push(ts.get(idx).as_timestamp());
+            self.lat.push(if lat_array.is_null(idx) {
+                None
+            } else {
+                Some(lat_array.value(idx))
+            });
+
+            self.lng.push(if lng_array.is_null(idx) {
+                None
+            } else {
+                Some(lng_array.value(idx))
+            });
+
+            self.timestamp.push(if ts_array.is_null(idx) {
+                None
+            } else {
+                Some(ts_array.value(idx))
+            });
        }

        Ok(())
    }

-    fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
-        // merge batch as in datafusion gives state accumulated from the data
-        //  returned from child accumulators' state() call
-        // In our particular implementation, the data structure is like
-        //
-        // states is vec of [`lat`, `lng`, `timestamp`]
-        // where
-        // - `lat` is a vector of `Value::List`. Each item in the list is all
-        //  coordinates from a child accumulator.
-        // - so on so forth for `lng` and `timestamp`
+    fn evaluate(&mut self) -> DfResult<ScalarValue> {
+        let unordered_lng_array = Float64Array::from(self.lng.clone());
+        let unordered_lat_array = Float64Array::from(self.lat.clone());
+        let ts_array = Int64Array::from(self.timestamp.clone());

-        ensure_columns_n!(states, 3);
+        let ordered_indices = sort_to_indices(&ts_array, None, None)?;
+        let lat_array = compute::take(&unordered_lat_array, &ordered_indices, None)?;
+        let lng_array = compute::take(&unordered_lng_array, &ordered_indices, None)?;

-        let lat_lists = &states[0];
-        let lng_lists = &states[1];
-        let ts_lists = &states[2];
+        let len = ts_array.len();
+        let lat_array = lat_array.as_primitive::<Float64Type>();
+        let lng_array = lng_array.as_primitive::<Float64Type>();

-        let len = lat_lists.len();
+        let mut coords = Vec::with_capacity(len);
+        for i in 0..len {
+            let lng = lng_array.value(i);
+            let lat = lat_array.value(i);
+            coords.push(vec![lng, lat]);
+        }

-        for idx in 0..len {
-            if let Some(lat_list) = lat_lists
-                .get(idx)
-                .as_list()
-                .map_err(BoxedError::new)
-                .context(error::ExecuteSnafu)?
-            {
-                for v in lat_list.items() {
-                    self.lat.push(v.as_f64_lossy());
-                }
-            }
+        let result = serde_json::to_string(&coords)
+            .map_err(|e| DataFusionError::Execution(format!("Failed to encode json, {}", e)))?;

-            if let Some(lng_list) = lng_lists
-                .get(idx)
-                .as_list()
-                .map_err(BoxedError::new)
-                .context(error::ExecuteSnafu)?
-            {
-                for v in lng_list.items() {
-                    self.lng.push(v.as_f64_lossy());
-                }
-            }
+        Ok(ScalarValue::Utf8(Some(result)))
+    }

-            if let Some(ts_list) = ts_lists
-                .get(idx)
-                .as_list()
-                .map_err(BoxedError::new)
-                .context(error::ExecuteSnafu)?
-            {
-                for v in ts_list.items() {
-                    self.timestamp.push(v.as_timestamp());
-                }
-            }
+    fn size(&self) -> usize {
+        // Base size of JsonEncodePathAccumulator struct fields
+        let mut total_size = std::mem::size_of::<Self>();
+
+        // Size of vectors (approximation)
+        total_size += self.lat.capacity() * std::mem::size_of::<Option<f64>>();
+        total_size += self.lng.capacity() * std::mem::size_of::<Option<f64>>();
+        total_size += self.timestamp.capacity() * std::mem::size_of::<Option<i64>>();
+
+        total_size
+    }
+
+    fn state(&mut self) -> datafusion::error::Result<Vec<ScalarValue>> {
+        let lat_array = Arc::new(ListArray::from_iter_primitive::<Float64Type, _, _>(vec![
+            Some(self.lat.clone()),
+        ]));
+        let lng_array = Arc::new(ListArray::from_iter_primitive::<Float64Type, _, _>(vec![
+            Some(self.lng.clone()),
+        ]));
+        let ts_array = Arc::new(ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
+            Some(self.timestamp.clone()),
+        ]));
+
+        let state_struct = StructArray::new(
+            vec![
+                Field::new(
+                    LATITUDE_FIELD,
+                    DataType::List(Arc::new(Field::new("item", DataType::Float64, true))),
+                    false,
+                ),
+                Field::new(
+                    LONGITUDE_FIELD,
+                    DataType::List(Arc::new(Field::new("item", DataType::Float64, true))),
+                    false,
+                ),
+                Field::new(
+                    TIMESTAMP_FIELD,
+                    DataType::List(Arc::new(Field::new("item", DataType::Int64, true))),
+                    false,
+                ),
+            ]
+            .into(),
+            vec![lat_array, lng_array, ts_array],
+            None,
+        );
+
+        Ok(vec![ScalarValue::Struct(Arc::new(state_struct))])
+    }
+
+    fn merge_batch(&mut self, states: &[ArrayRef]) -> datafusion::error::Result<()> {
+        if states.len() != 1 {
+            return Err(DataFusionError::Internal(format!(
+                "Expected 1 states for json_encode_path, got {}",
+                states.len()
+            )));
+        }
+
+        for state in states {
+            let state = as_struct_array(state)?;
+            let lat_list = as_list_array(state.column(0))?.value(0);
+            let lat_array = as_primitive_array::<Float64Type>(&lat_list)?;
+            let lng_list = as_list_array(state.column(1))?.value(0);
+            let lng_array = as_primitive_array::<Float64Type>(&lng_list)?;
+            let ts_list = as_list_array(state.column(2))?.value(0);
+            let ts_array = as_primitive_array::<Int64Type>(&ts_list)?;
+
+            self.lat.extend(lat_array);
+            self.lng.extend(lng_array);
+            self.timestamp.extend(ts_array);
        }

        Ok(())
    }
-
-    fn evaluate(&self) -> Result<Value> {
-        let mut work_vec: Vec<(&Option<f64>, &Option<f64>, &Option<Timestamp>)> = self
-            .lat
-            .iter()
-            .zip(self.lng.iter())
-            .zip(self.timestamp.iter())
-            .map(|((a, b), c)| (a, b, c))
-            .collect();
-
-        // sort by timestamp, we treat null timestamp as 0
-        work_vec.sort_unstable_by_key(|tuple| tuple.2.unwrap_or_else(|| Timestamp::new_second(0)));
-
-        let result = serde_json::to_string(
-            &work_vec
-                .into_iter()
-                // note that we transform to lng,lat for geojson compatibility
-                .map(|(lat, lng, _)| vec![lng, lat])
-                .collect::<Vec<Vec<&Option<f64>>>>(),
-        )
-        .map_err(|e| {
-            BoxedError::new(PlainError::new(
-                format!("Serialization failure: {}", e),
-                StatusCode::EngineExecuteQuery,
-            ))
-        })
-        .context(error::ExecuteSnafu)?;
-
-        Ok(Value::String(result.into()))
-    }
 }

-/// This function accept rows of lat, lng and timestamp, sort with timestamp and
-/// encoding them into a geojson-like path.
-///
-/// Example:
-///
-/// ```sql
-/// SELECT json_encode_path(lat, lon, timestamp) FROM table [group by ...];
-/// ```
-///
-#[as_aggr_func_creator]
-#[derive(Debug, Default, AggrFuncTypeStore)]
-pub struct JsonPathEncodeFunctionCreator {}
+#[cfg(test)]
+mod tests {
+    use datafusion::arrow::array::{Float64Array, TimestampNanosecondArray};
+    use datafusion::scalar::ScalarValue;

-impl AggregateFunctionCreator for JsonPathEncodeFunctionCreator {
-    fn creator(&self) -> AccumulatorCreatorFunction {
-        let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
-            let ts_type = types[2].clone();
-            Ok(Box::new(JsonPathAccumulator::new(ts_type)))
-        });
+    use super::*;

-        creator
+    #[test]
+    fn test_json_encode_path_basic() {
+        let mut accumulator = JsonEncodePathAccumulator::new();
+
+        // Create test data
+        let lat_array = Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0]));
+        let lng_array = Arc::new(Float64Array::from(vec![4.0, 5.0, 6.0]));
+        let ts_array = Arc::new(TimestampNanosecondArray::from(vec![100, 200, 300]));
+
+        // Update batch
+        accumulator
+            .update_batch(&[lat_array, lng_array, ts_array])
+            .unwrap();
+
+        // Evaluate
+        let result = accumulator.evaluate().unwrap();
+        assert_eq!(
+            result,
+            ScalarValue::Utf8(Some("[[4.0,1.0],[5.0,2.0],[6.0,3.0]]".to_string()))
+        );
    }

-    fn output_type(&self) -> Result<ConcreteDataType> {
-        Ok(ConcreteDataType::string_datatype())
+    #[test]
+    fn test_json_encode_path_sort_by_timestamp() {
+        let mut accumulator = JsonEncodePathAccumulator::new();
+
+        // Create test data with unordered timestamps
+        let lat_array = Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0]));
+        let lng_array = Arc::new(Float64Array::from(vec![4.0, 5.0, 6.0]));
+        let ts_array = Arc::new(TimestampNanosecondArray::from(vec![300, 100, 200]));
+
+        // Update batch
+        accumulator
+            .update_batch(&[lat_array, lng_array, ts_array])
+            .unwrap();
+
+        // Evaluate
+        let result = accumulator.evaluate().unwrap();
+        assert_eq!(
+            result,
+            ScalarValue::Utf8(Some("[[5.0,2.0],[6.0,3.0],[4.0,1.0]]".to_string()))
+        );
    }

-    fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
-        let input_types = self.input_types()?;
-        ensure!(input_types.len() == 3, InvalidInputStateSnafu);
+    #[test]
+    fn test_json_encode_path_merge() {
+        let mut accumulator1 = JsonEncodePathAccumulator::new();
+        let mut accumulator2 = JsonEncodePathAccumulator::new();

-        let timestamp_type = input_types[2].clone();
+        // Create test data for first accumulator
+        let lat_array1 = Arc::new(Float64Array::from(vec![1.0]));
+        let lng_array1 = Arc::new(Float64Array::from(vec![4.0]));
+        let ts_array1 = Arc::new(TimestampNanosecondArray::from(vec![100]));

-        Ok(vec![
-            ConcreteDataType::list_datatype(ConcreteDataType::float64_datatype()),
-            ConcreteDataType::list_datatype(ConcreteDataType::float64_datatype()),
-            ConcreteDataType::list_datatype(timestamp_type),
-        ])
+        // Create test data for second accumulator
+        let lat_array2 = Arc::new(Float64Array::from(vec![2.0]));
+        let lng_array2 = Arc::new(Float64Array::from(vec![5.0]));
+        let ts_array2 = Arc::new(TimestampNanosecondArray::from(vec![200]));
+
+        // Update batches
+        accumulator1
+            .update_batch(&[lat_array1, lng_array1, ts_array1])
+            .unwrap();
+        accumulator2
+            .update_batch(&[lat_array2, lng_array2, ts_array2])
+            .unwrap();
+
+        // Get states
+        let state1 = accumulator1.state().unwrap();
+        let state2 = accumulator2.state().unwrap();
+
+        // Create a merged accumulator
+        let mut merged = JsonEncodePathAccumulator::new();
+
+        // Extract the struct arrays from the states
+        let state_array1 = match &state1[0] {
+            ScalarValue::Struct(array) => array.clone(),
+            _ => panic!("Expected Struct scalar value"),
+        };
+
+        let state_array2 = match &state2[0] {
+            ScalarValue::Struct(array) => array.clone(),
+            _ => panic!("Expected Struct scalar value"),
+        };
+
+        // Merge state arrays
+        merged.merge_batch(&[state_array1]).unwrap();
+        merged.merge_batch(&[state_array2]).unwrap();
+
+        // Evaluate merged result
+        let result = merged.evaluate().unwrap();
+        assert_eq!(
+            result,
+            ScalarValue::Utf8(Some("[[4.0,1.0],[5.0,2.0]]".to_string()))
+        );
    }
 }
--- a/src/common/function/src/aggrs/vector/product.rs
+++ b/src/common/function/src/aggrs/vector/product.rs
@@ -12,21 +12,20 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::borrow::Cow;
 use std::sync::Arc;

-use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
-use common_query::error::{CreateAccumulatorSnafu, Error, InvalidFuncArgsSnafu};
-use common_query::logical_plan::{
-    create_aggregate_function, Accumulator, AggregateFunctionCreator,
-};
-use common_query::prelude::AccumulatorCreatorFunction;
-use datafusion_expr::AggregateUDF;
-use datatypes::prelude::{ConcreteDataType, Value, *};
-use datatypes::vectors::VectorRef;
+use arrow::array::{Array, ArrayRef, AsArray, BinaryArray, StringArray};
+use arrow_schema::{DataType, Field};
+use datafusion::logical_expr::{Signature, TypeSignature, Volatility};
+use datafusion_common::{Result, ScalarValue};
+use datafusion_expr::{Accumulator, AggregateUDF, SimpleAggregateUDF};
+use datafusion_functions_aggregate_common::accumulator::AccumulatorArgs;
 use nalgebra::{Const, DVectorView, Dyn, OVector};
-use snafu::ensure;

-use crate::scalars::vector::impl_conv::{as_veclit, as_veclit_if_const, veclit_to_binlit};
+use crate::scalars::vector::impl_conv::{
+    binlit_as_veclit, parse_veclit_from_strlit, veclit_to_binlit,
+};

 /// Aggregates by multiplying elements across the same dimension, returns a vector.
 #[derive(Debug, Default)]
@@ -35,57 +34,42 @@ pub struct VectorProduct {
    has_null: bool,
 }

-#[as_aggr_func_creator]
-#[derive(Debug, Default, AggrFuncTypeStore)]
-pub struct VectorProductCreator {}
-
-impl AggregateFunctionCreator for VectorProductCreator {
-    fn creator(&self) -> AccumulatorCreatorFunction {
-        let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
-            ensure!(
-                types.len() == 1,
-                InvalidFuncArgsSnafu {
-                    err_msg: format!(
-                        "The length of the args is not correct, expect exactly one, have: {}",
-                        types.len()
-                    )
-                }
-            );
-            let input_type = &types[0];
-            match input_type {
-                ConcreteDataType::String(_) | ConcreteDataType::Binary(_) => {
-                    Ok(Box::new(VectorProduct::default()))
-                }
-                _ => {
-                    let err_msg = format!(
-                        "\"VEC_PRODUCT\" aggregate function not support data type {:?}",
-                        input_type.logical_type_id(),
-                    );
-                    CreateAccumulatorSnafu { err_msg }.fail()?
-                }
-            }
-        });
-        creator
-    }
-
-    fn output_type(&self) -> common_query::error::Result<ConcreteDataType> {
-        Ok(ConcreteDataType::binary_datatype())
-    }
-
-    fn state_types(&self) -> common_query::error::Result<Vec<ConcreteDataType>> {
-        Ok(vec![self.output_type()?])
-    }
-}
-
 impl VectorProduct {
    /// Create a new `AggregateUDF` for the `vec_product` aggregate function.
    pub fn uadf_impl() -> AggregateUDF {
-        create_aggregate_function(
-            "vec_product".to_string(),
-            1,
-            Arc::new(VectorProductCreator::default()),
-        )
-        .into()
+        let signature = Signature::one_of(
+            vec![
+                TypeSignature::Exact(vec![DataType::Utf8]),
+                TypeSignature::Exact(vec![DataType::Binary]),
+            ],
+            Volatility::Immutable,
+        );
+        let udaf = SimpleAggregateUDF::new_with_signature(
+            "vec_product",
+            signature,
+            DataType::Binary,
+            Arc::new(Self::accumulator),
+            vec![Arc::new(Field::new("x", DataType::Binary, true))],
+        );
+        AggregateUDF::from(udaf)
+    }
+
+    fn accumulator(args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
+        if args.schema.fields().len() != 1 {
+            return Err(datafusion_common::DataFusionError::Internal(format!(
+                "expect creating `VEC_PRODUCT` with only one input field, actual {}",
+                args.schema.fields().len()
+            )));
+        }
+
+        let t = args.schema.field(0).data_type();
+        if !matches!(t, DataType::Utf8 | DataType::Binary) {
+            return Err(datafusion_common::DataFusionError::Internal(format!(
+                "unexpected input datatype {t} when creating `VEC_PRODUCT`"
+            )));
+        }
+
+        Ok(Box::new(VectorProduct::default()))
    }

    fn inner(&mut self, len: usize) -> &mut OVector<f32, Dyn> {
@@ -94,67 +78,82 @@ impl VectorProduct {
        })
    }

-    fn update(&mut self, values: &[VectorRef], is_update: bool) -> Result<(), Error> {
+    fn update(&mut self, values: &[ArrayRef], is_update: bool) -> Result<()> {
        if values.is_empty() || self.has_null {
            return Ok(());
        };
-        let column = &values[0];
-        let len = column.len();

-        match as_veclit_if_const(column)? {
-            Some(column) => {
-                let vec_column = DVectorView::from_slice(&column, column.len()).scale(len as f32);
-                *self.inner(vec_column.len()) =
-                    (*self.inner(vec_column.len())).component_mul(&vec_column);
+        let vectors = match values[0].data_type() {
+            DataType::Utf8 => {
+                let arr: &StringArray = values[0].as_string();
+                arr.iter()
+                    .filter_map(|x| x.map(|s| parse_veclit_from_strlit(s).map_err(Into::into)))
+                    .map(|x| x.map(Cow::Owned))
+                    .collect::<Result<Vec<_>>>()?
            }
-            None => {
-                for i in 0..len {
-                    let Some(arg0) = as_veclit(column.get_ref(i))? else {
-                        if is_update {
-                            self.has_null = true;
-                            self.product = None;
-                        }
-                        return Ok(());
-                    };
-                    let vec_column = DVectorView::from_slice(&arg0, arg0.len());
-                    *self.inner(vec_column.len()) =
-                        (*self.inner(vec_column.len())).component_mul(&vec_column);
-                }
+            DataType::Binary => {
+                let arr: &BinaryArray = values[0].as_binary();
+                arr.iter()
+                    .filter_map(|x| x.map(|b| binlit_as_veclit(b).map_err(Into::into)))
+                    .collect::<Result<Vec<_>>>()?
            }
+            _ => {
+                return Err(datafusion_common::DataFusionError::NotImplemented(format!(
+                    "unsupported data type {} for `VEC_PRODUCT`",
+                    values[0].data_type()
+                )))
+            }
+        };
+        if vectors.len() != values[0].len() {
+            if is_update {
+                self.has_null = true;
+                self.product = None;
+            }
+            return Ok(());
        }
+
+        vectors.iter().for_each(|v| {
+            let v = DVectorView::from_slice(v, v.len());
+            let inner = self.inner(v.len());
+            *inner = inner.component_mul(&v);
+        });
        Ok(())
    }
 }

 impl Accumulator for VectorProduct {
-    fn state(&self) -> common_query::error::Result<Vec<Value>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
        self.evaluate().map(|v| vec![v])
    }

-    fn update_batch(&mut self, values: &[VectorRef]) -> common_query::error::Result<()> {
+    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
        self.update(values, true)
    }

-    fn merge_batch(&mut self, states: &[VectorRef]) -> common_query::error::Result<()> {
+    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
        self.update(states, false)
    }

-    fn evaluate(&self) -> common_query::error::Result<Value> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
        match &self.product {
-            None => Ok(Value::Null),
-            Some(vector) => {
-                let v = vector.as_slice();
-                Ok(Value::from(veclit_to_binlit(v)))
-            }
+            None => Ok(ScalarValue::Binary(None)),
+            Some(vector) => Ok(ScalarValue::Binary(Some(veclit_to_binlit(
+                vector.as_slice(),
+            )))),
        }
    }
+
+    fn size(&self) -> usize {
+        size_of_val(self)
+    }
 }

 #[cfg(test)]
 mod tests {
    use std::sync::Arc;

-    use datatypes::vectors::{ConstantVector, StringVector};
+    use datatypes::scalars::ScalarVector;
+    use datatypes::vectors::{ConstantVector, StringVector, Vector};

    use super::*;

@@ -165,59 +164,60 @@ mod tests {
        vec_product.update_batch(&[]).unwrap();
        assert!(vec_product.product.is_none());
        assert!(!vec_product.has_null);
-        assert_eq!(Value::Null, vec_product.evaluate().unwrap());
+        assert_eq!(ScalarValue::Binary(None), vec_product.evaluate().unwrap());

        // test update one not-null value
        let mut vec_product = VectorProduct::default();
-        let v: Vec<VectorRef> = vec![Arc::new(StringVector::from(vec![Some(
+        let v: Vec<ArrayRef> = vec![Arc::new(StringArray::from(vec![Some(
            "[1.0,2.0,3.0]".to_string(),
        )]))];
        vec_product.update_batch(&v).unwrap();
        assert_eq!(
-            Value::from(veclit_to_binlit(&[1.0, 2.0, 3.0])),
+            ScalarValue::Binary(Some(veclit_to_binlit(&[1.0, 2.0, 3.0]))),
            vec_product.evaluate().unwrap()
        );

        // test update one null value
        let mut vec_product = VectorProduct::default();
-        let v: Vec<VectorRef> = vec![Arc::new(StringVector::from(vec![Option::<String>::None]))];
+        let v: Vec<ArrayRef> = vec![Arc::new(StringArray::from(vec![Option::<String>::None]))];
        vec_product.update_batch(&v).unwrap();
-        assert_eq!(Value::Null, vec_product.evaluate().unwrap());
+        assert_eq!(ScalarValue::Binary(None), vec_product.evaluate().unwrap());

        // test update no null-value batch
        let mut vec_product = VectorProduct::default();
-        let v: Vec<VectorRef> = vec![Arc::new(StringVector::from(vec![
+        let v: Vec<ArrayRef> = vec![Arc::new(StringArray::from(vec![
            Some("[1.0,2.0,3.0]".to_string()),
            Some("[4.0,5.0,6.0]".to_string()),
            Some("[7.0,8.0,9.0]".to_string()),
        ]))];
        vec_product.update_batch(&v).unwrap();
        assert_eq!(
-            Value::from(veclit_to_binlit(&[28.0, 80.0, 162.0])),
+            ScalarValue::Binary(Some(veclit_to_binlit(&[28.0, 80.0, 162.0]))),
            vec_product.evaluate().unwrap()
        );

        // test update null-value batch
        let mut vec_product = VectorProduct::default();
-        let v: Vec<VectorRef> = vec![Arc::new(StringVector::from(vec![
+        let v: Vec<ArrayRef> = vec![Arc::new(StringArray::from(vec![
            Some("[1.0,2.0,3.0]".to_string()),
            None,
            Some("[7.0,8.0,9.0]".to_string()),
        ]))];
        vec_product.update_batch(&v).unwrap();
-        assert_eq!(Value::Null, vec_product.evaluate().unwrap());
+        assert_eq!(ScalarValue::Binary(None), vec_product.evaluate().unwrap());

        // test update with constant vector
        let mut vec_product = VectorProduct::default();
-        let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
+        let v: Vec<ArrayRef> = vec![Arc::new(ConstantVector::new(
            Arc::new(StringVector::from_vec(vec!["[1.0,2.0,3.0]".to_string()])),
            4,
-        ))];
+        ))
+        .to_arrow_array()];

        vec_product.update_batch(&v).unwrap();

        assert_eq!(
-            Value::from(veclit_to_binlit(&[4.0, 8.0, 12.0])),
+            ScalarValue::Binary(Some(veclit_to_binlit(&[1.0, 16.0, 81.0]))),
            vec_product.evaluate().unwrap()
        );
    }
--- a/src/common/function/src/aggrs/vector/sum.rs
+++ b/src/common/function/src/aggrs/vector/sum.rs
@@ -14,19 +14,18 @@

 use std::sync::Arc;

-use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
-use common_query::error::{CreateAccumulatorSnafu, Error, InvalidFuncArgsSnafu};
-use common_query::logical_plan::{
-    create_aggregate_function, Accumulator, AggregateFunctionCreator,
+use arrow::array::{Array, ArrayRef, AsArray, BinaryArray, StringArray};
+use arrow_schema::{DataType, Field};
+use datafusion_common::{Result, ScalarValue};
+use datafusion_expr::{
+    Accumulator, AggregateUDF, Signature, SimpleAggregateUDF, TypeSignature, Volatility,
 };
-use common_query::prelude::AccumulatorCreatorFunction;
-use datafusion_expr::AggregateUDF;
-use datatypes::prelude::{ConcreteDataType, Value, *};
-use datatypes::vectors::VectorRef;
+use datafusion_functions_aggregate_common::accumulator::AccumulatorArgs;
 use nalgebra::{Const, DVectorView, Dyn, OVector};
-use snafu::ensure;

-use crate::scalars::vector::impl_conv::{as_veclit, as_veclit_if_const, veclit_to_binlit};
+use crate::scalars::vector::impl_conv::{
+    binlit_as_veclit, parse_veclit_from_strlit, veclit_to_binlit,
+};

 /// The accumulator for the `vec_sum` aggregate function.
 #[derive(Debug, Default)]
@@ -35,57 +34,42 @@ pub struct VectorSum {
    has_null: bool,
 }

-#[as_aggr_func_creator]
-#[derive(Debug, Default, AggrFuncTypeStore)]
-pub struct VectorSumCreator {}
-
-impl AggregateFunctionCreator for VectorSumCreator {
-    fn creator(&self) -> AccumulatorCreatorFunction {
-        let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
-            ensure!(
-                types.len() == 1,
-                InvalidFuncArgsSnafu {
-                    err_msg: format!(
-                        "The length of the args is not correct, expect exactly one, have: {}",
-                        types.len()
-                    )
-                }
-            );
-            let input_type = &types[0];
-            match input_type {
-                ConcreteDataType::String(_) | ConcreteDataType::Binary(_) => {
-                    Ok(Box::new(VectorSum::default()))
-                }
-                _ => {
-                    let err_msg = format!(
-                        "\"VEC_SUM\" aggregate function not support data type {:?}",
-                        input_type.logical_type_id(),
-                    );
-                    CreateAccumulatorSnafu { err_msg }.fail()?
-                }
-            }
-        });
-        creator
-    }
-
-    fn output_type(&self) -> common_query::error::Result<ConcreteDataType> {
-        Ok(ConcreteDataType::binary_datatype())
-    }
-
-    fn state_types(&self) -> common_query::error::Result<Vec<ConcreteDataType>> {
-        Ok(vec![self.output_type()?])
-    }
-}
-
 impl VectorSum {
    /// Create a new `AggregateUDF` for the `vec_sum` aggregate function.
    pub fn uadf_impl() -> AggregateUDF {
-        create_aggregate_function(
-            "vec_sum".to_string(),
-            1,
-            Arc::new(VectorSumCreator::default()),
-        )
-        .into()
+        let signature = Signature::one_of(
+            vec![
+                TypeSignature::Exact(vec![DataType::Utf8]),
+                TypeSignature::Exact(vec![DataType::Binary]),
+            ],
+            Volatility::Immutable,
+        );
+        let udaf = SimpleAggregateUDF::new_with_signature(
+            "vec_sum",
+            signature,
+            DataType::Binary,
+            Arc::new(Self::accumulator),
+            vec![Arc::new(Field::new("x", DataType::Binary, true))],
+        );
+        AggregateUDF::from(udaf)
+    }
+
+    fn accumulator(args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
+        if args.schema.fields().len() != 1 {
+            return Err(datafusion_common::DataFusionError::Internal(format!(
+                "expect creating `VEC_SUM` with only one input field, actual {}",
+                args.schema.fields().len()
+            )));
+        }
+
+        let t = args.schema.field(0).data_type();
+        if !matches!(t, DataType::Utf8 | DataType::Binary) {
+            return Err(datafusion_common::DataFusionError::Internal(format!(
+                "unexpected input datatype {t} when creating `VEC_SUM`"
+            )));
+        }
+
+        Ok(Box::new(VectorSum::default()))
    }

    fn inner(&mut self, len: usize) -> &mut OVector<f32, Dyn> {
@@ -93,62 +77,87 @@ impl VectorSum {
            .get_or_insert_with(|| OVector::zeros_generic(Dyn(len), Const::<1>))
    }

-    fn update(&mut self, values: &[VectorRef], is_update: bool) -> Result<(), Error> {
+    fn update(&mut self, values: &[ArrayRef], is_update: bool) -> Result<()> {
        if values.is_empty() || self.has_null {
            return Ok(());
        };
-        let column = &values[0];
-        let len = column.len();

-        match as_veclit_if_const(column)? {
-            Some(column) => {
-                let vec_column = DVectorView::from_slice(&column, column.len()).scale(len as f32);
-                *self.inner(vec_column.len()) += vec_column;
-            }
-            None => {
-                for i in 0..len {
-                    let Some(arg0) = as_veclit(column.get_ref(i))? else {
+        match values[0].data_type() {
+            DataType::Utf8 => {
+                let arr: &StringArray = values[0].as_string();
+                for s in arr.iter() {
+                    let Some(s) = s else {
                        if is_update {
                            self.has_null = true;
                            self.sum = None;
                        }
                        return Ok(());
                    };
-                    let vec_column = DVectorView::from_slice(&arg0, arg0.len());
+                    let values = parse_veclit_from_strlit(s)?;
+                    let vec_column = DVectorView::from_slice(&values, values.len());
                    *self.inner(vec_column.len()) += vec_column;
                }
            }
+            DataType::Binary => {
+                let arr: &BinaryArray = values[0].as_binary();
+                for b in arr.iter() {
+                    let Some(b) = b else {
+                        if is_update {
+                            self.has_null = true;
+                            self.sum = None;
+                        }
+                        return Ok(());
+                    };
+                    let values = binlit_as_veclit(b)?;
+                    let vec_column = DVectorView::from_slice(&values, values.len());
+                    *self.inner(vec_column.len()) += vec_column;
+                }
+            }
+            _ => {
+                return Err(datafusion_common::DataFusionError::NotImplemented(format!(
+                    "unsupported data type {} for `VEC_SUM`",
+                    values[0].data_type()
+                )))
+            }
        }
        Ok(())
    }
 }

 impl Accumulator for VectorSum {
-    fn state(&self) -> common_query::error::Result<Vec<Value>> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
        self.evaluate().map(|v| vec![v])
    }

-    fn update_batch(&mut self, values: &[VectorRef]) -> common_query::error::Result<()> {
+    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
        self.update(values, true)
    }

-    fn merge_batch(&mut self, states: &[VectorRef]) -> common_query::error::Result<()> {
+    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
        self.update(states, false)
    }

-    fn evaluate(&self) -> common_query::error::Result<Value> {
+    fn evaluate(&mut self) -> Result<ScalarValue> {
        match &self.sum {
-            None => Ok(Value::Null),
-            Some(vector) => Ok(Value::from(veclit_to_binlit(vector.as_slice()))),
+            None => Ok(ScalarValue::Binary(None)),
+            Some(vector) => Ok(ScalarValue::Binary(Some(veclit_to_binlit(
+                vector.as_slice(),
+            )))),
        }
    }
+
+    fn size(&self) -> usize {
+        size_of_val(self)
+    }
 }

 #[cfg(test)]
 mod tests {
    use std::sync::Arc;

-    use datatypes::vectors::{ConstantVector, StringVector};
+    use arrow::array::StringArray;
+    use datatypes::scalars::ScalarVector;
+    use datatypes::vectors::{ConstantVector, StringVector, Vector};

    use super::*;

@@ -159,57 +168,58 @@ mod tests {
        vec_sum.update_batch(&[]).unwrap();
        assert!(vec_sum.sum.is_none());
        assert!(!vec_sum.has_null);
-        assert_eq!(Value::Null, vec_sum.evaluate().unwrap());
+        assert_eq!(ScalarValue::Binary(None), vec_sum.evaluate().unwrap());

        // test update one not-null value
        let mut vec_sum = VectorSum::default();
-        let v: Vec<VectorRef> = vec![Arc::new(StringVector::from(vec![Some(
+        let v: Vec<ArrayRef> = vec![Arc::new(StringArray::from(vec![Some(
            "[1.0,2.0,3.0]".to_string(),
        )]))];
        vec_sum.update_batch(&v).unwrap();
        assert_eq!(
-            Value::from(veclit_to_binlit(&[1.0, 2.0, 3.0])),
+            ScalarValue::Binary(Some(veclit_to_binlit(&[1.0, 2.0, 3.0]))),
            vec_sum.evaluate().unwrap()
        );

        // test update one null value
        let mut vec_sum = VectorSum::default();
-        let v: Vec<VectorRef> = vec![Arc::new(StringVector::from(vec![Option::<String>::None]))];
+        let v: Vec<ArrayRef> = vec![Arc::new(StringArray::from(vec![Option::<String>::None]))];
        vec_sum.update_batch(&v).unwrap();
-        assert_eq!(Value::Null, vec_sum.evaluate().unwrap());
+        assert_eq!(ScalarValue::Binary(None), vec_sum.evaluate().unwrap());

        // test update no null-value batch
        let mut vec_sum = VectorSum::default();
-        let v: Vec<VectorRef> = vec![Arc::new(StringVector::from(vec![
+        let v: Vec<ArrayRef> = vec![Arc::new(StringArray::from(vec![
            Some("[1.0,2.0,3.0]".to_string()),
            Some("[4.0,5.0,6.0]".to_string()),
            Some("[7.0,8.0,9.0]".to_string()),
        ]))];
        vec_sum.update_batch(&v).unwrap();
        assert_eq!(
-            Value::from(veclit_to_binlit(&[12.0, 15.0, 18.0])),
+            ScalarValue::Binary(Some(veclit_to_binlit(&[12.0, 15.0, 18.0]))),
            vec_sum.evaluate().unwrap()
        );

        // test update null-value batch
        let mut vec_sum = VectorSum::default();
-        let v: Vec<VectorRef> = vec![Arc::new(StringVector::from(vec![
+        let v: Vec<ArrayRef> = vec![Arc::new(StringArray::from(vec![
            Some("[1.0,2.0,3.0]".to_string()),
            None,
            Some("[7.0,8.0,9.0]".to_string()),
        ]))];
        vec_sum.update_batch(&v).unwrap();
-        assert_eq!(Value::Null, vec_sum.evaluate().unwrap());
+        assert_eq!(ScalarValue::Binary(None), vec_sum.evaluate().unwrap());

        // test update with constant vector
        let mut vec_sum = VectorSum::default();
-        let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
+        let v: Vec<ArrayRef> = vec![Arc::new(ConstantVector::new(
            Arc::new(StringVector::from_vec(vec!["[1.0,2.0,3.0]".to_string()])),
            4,
-        ))];
+        ))
+        .to_arrow_array()];
        vec_sum.update_batch(&v).unwrap();
        assert_eq!(
-            Value::from(veclit_to_binlit(&[4.0, 8.0, 12.0])),
+            ScalarValue::Binary(Some(veclit_to_binlit(&[4.0, 8.0, 12.0]))),
            vec_sum.evaluate().unwrap()
        );
    }
--- a/src/common/function/src/flush_flow.rs
+++ b/src/common/function/src/flush_flow.rs
@@ -12,28 +12,24 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use arrow::datatypes::DataType as ArrowDataType;
 use common_error::ext::BoxedError;
 use common_macro::admin_fn;
 use common_query::error::{
    ExecuteSnafu, InvalidFuncArgsSnafu, MissingFlowServiceHandlerSnafu, Result,
    UnsupportedInputDataTypeSnafu,
 };
-use common_query::prelude::Signature;
-use datafusion::logical_expr::Volatility;
+use datafusion_expr::{Signature, Volatility};
 use datatypes::value::{Value, ValueRef};
 use session::context::QueryContextRef;
 use snafu::{ensure, ResultExt};
+use sql::ast::ObjectNamePartExt;
 use sql::parser::ParserContext;
-use store_api::storage::ConcreteDataType;

 use crate::handlers::FlowServiceHandlerRef;

 fn flush_signature() -> Signature {
-    Signature::uniform(
-        1,
-        vec![ConcreteDataType::string_datatype()],
-        Volatility::Immutable,
-    )
+    Signature::uniform(1, vec![ArrowDataType::Utf8], Volatility::Immutable)
 }

 #[admin_fn(
@@ -85,9 +81,9 @@ fn parse_flush_flow(
    let (catalog_name, flow_name) = match &obj_name.0[..] {
        [flow_name] => (
            query_ctx.current_catalog().to_string(),
-            flow_name.value.clone(),
+            flow_name.to_string_unquoted(),
        ),
-        [catalog, flow_name] => (catalog.value.clone(), flow_name.value.clone()),
+        [catalog, flow_name] => (catalog.to_string_unquoted(), flow_name.to_string_unquoted()),
        _ => {
            return InvalidFuncArgsSnafu {
                err_msg: format!(
@@ -105,44 +101,55 @@ fn parse_flush_flow(
 mod test {
    use std::sync::Arc;

-    use datatypes::scalars::ScalarVector;
-    use datatypes::vectors::StringVector;
    use session::context::QueryContext;

    use super::*;
-    use crate::function::{AsyncFunction, FunctionContext};
+    use crate::function::FunctionContext;
+    use crate::function_factory::ScalarFunctionFactory;

    #[test]
    fn test_flush_flow_metadata() {
-        let f = FlushFlowFunction;
+        let factory: ScalarFunctionFactory = FlushFlowFunction::factory().into();
+        let f = factory.provide(FunctionContext::mock());
        assert_eq!("flush_flow", f.name());
-        assert_eq!(
-            ConcreteDataType::uint64_datatype(),
-            f.return_type(&[]).unwrap()
-        );
-        assert_eq!(
-            f.signature(),
-            Signature::uniform(
-                1,
-                vec![ConcreteDataType::string_datatype()],
-                Volatility::Immutable,
-            )
+        assert_eq!(ArrowDataType::UInt64, f.return_type(&[]).unwrap());
+        let expected_signature = datafusion_expr::Signature::uniform(
+            1,
+            vec![ArrowDataType::Utf8],
+            datafusion_expr::Volatility::Immutable,
        );
+        assert_eq!(*f.signature(), expected_signature);
    }

    #[tokio::test]
    async fn test_missing_flow_service() {
-        let f = FlushFlowFunction;
+        let factory: ScalarFunctionFactory = FlushFlowFunction::factory().into();
+        let binding = factory.provide(FunctionContext::default());
+        let f = binding.as_async().unwrap();

-        let args = vec!["flow_name"];
-        let args = args
-            .into_iter()
-            .map(|arg| Arc::new(StringVector::from_slice(&[arg])) as _)
-            .collect::<Vec<_>>();
+        let flow_name_array = Arc::new(arrow::array::StringArray::from(vec!["flow_name"]));

-        let result = f.eval(FunctionContext::default(), &args).await.unwrap_err();
+        let columnar_args = vec![datafusion_expr::ColumnarValue::Array(flow_name_array as _)];
+
+        let func_args = datafusion::logical_expr::ScalarFunctionArgs {
+            args: columnar_args,
+            arg_fields: vec![Arc::new(arrow::datatypes::Field::new(
+                "arg_0",
+                ArrowDataType::Utf8,
+                false,
+            ))],
+            return_field: Arc::new(arrow::datatypes::Field::new(
+                "result",
+                ArrowDataType::UInt64,
+                true,
+            )),
+            number_rows: 1,
+            config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
+        };
+
+        let result = f.invoke_async_with_args(func_args).await.unwrap_err();
        assert_eq!(
-            "Missing FlowServiceHandler, not expected",
+            "Execution error: Handler error: Missing FlowServiceHandler, not expected",
            result.to_string()
        );
    }
--- a/src/common/function/src/function.rs
+++ b/src/common/function/src/function.rs
@@ -41,6 +41,12 @@ impl FunctionContext {
    }
 }

+impl std::fmt::Display for FunctionContext {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "FunctionContext {{ query_ctx: {} }}", self.query_ctx)
+    }
+}
+
 impl Default for FunctionContext {
    fn default() -> Self {
        Self {
@@ -67,22 +73,3 @@ pub trait Function: fmt::Display + Sync + Send {
 }

 pub type FunctionRef = Arc<dyn Function>;
-
-/// Async Scalar function trait
-#[async_trait::async_trait]
-pub trait AsyncFunction: fmt::Display + Sync + Send {
-    /// Returns the name of the function, should be unique.
-    fn name(&self) -> &str;
-
-    /// The returned data type of function execution.
-    fn return_type(&self, input_types: &[ConcreteDataType]) -> Result<ConcreteDataType>;
-
-    /// The signature of function.
-    fn signature(&self) -> Signature;
-
-    /// Evaluate the function, e.g. run/execute the function.
-    /// TODO(dennis): simplify the signature and refactor all the admin functions.
-    async fn eval(&self, _func_ctx: FunctionContext, _columns: &[VectorRef]) -> Result<VectorRef>;
-}
-
-pub type AsyncFunctionRef = Arc<dyn AsyncFunction>;
--- a/src/common/function/src/function_factory.rs
+++ b/src/common/function/src/function_factory.rs
@@ -22,8 +22,8 @@ use crate::scalars::udf::create_udf;
 /// A factory for creating `ScalarUDF` that require a function context.
 #[derive(Clone)]
 pub struct ScalarFunctionFactory {
-    name: String,
-    factory: Arc<dyn Fn(FunctionContext) -> ScalarUDF + Send + Sync>,
+    pub name: String,
+    pub factory: Arc<dyn Fn(FunctionContext) -> ScalarUDF + Send + Sync>,
 }

 impl ScalarFunctionFactory {
--- a/src/common/function/src/function_registry.rs
+++ b/src/common/function/src/function_registry.rs
@@ -24,7 +24,7 @@ use crate::aggrs::aggr_wrapper::StateMergeHelper;
 use crate::aggrs::approximate::ApproximateFunction;
 use crate::aggrs::count_hash::CountHash;
 use crate::aggrs::vector::VectorFunction as VectorAggrFunction;
-use crate::function::{AsyncFunctionRef, Function, FunctionRef};
+use crate::function::{Function, FunctionRef};
 use crate::function_factory::ScalarFunctionFactory;
 use crate::scalars::date::DateFunction;
 use crate::scalars::expression::ExpressionFunction;
@@ -42,11 +42,18 @@ use crate::system::SystemFunction;
 #[derive(Default)]
 pub struct FunctionRegistry {
    functions: RwLock<HashMap<String, ScalarFunctionFactory>>,
-    async_functions: RwLock<HashMap<String, AsyncFunctionRef>>,
    aggregate_functions: RwLock<HashMap<String, AggregateUDF>>,
 }

 impl FunctionRegistry {
+    /// Register a function in the registry by converting it into a `ScalarFunctionFactory`.
+    ///
+    /// # Arguments
+    ///
+    /// * `func` - An object that can be converted into a `ScalarFunctionFactory`.
+    ///
+    /// The function is inserted into the internal function map, keyed by its name.
+    /// If a function with the same name already exists, it will be replaced.
    pub fn register(&self, func: impl Into<ScalarFunctionFactory>) {
        let func = func.into();
        let _ = self
@@ -56,18 +63,12 @@ impl FunctionRegistry {
            .insert(func.name().to_string(), func);
    }

+    /// Register a scalar function in the registry.
    pub fn register_scalar(&self, func: impl Function + 'static) {
        self.register(Arc::new(func) as FunctionRef);
    }

-    pub fn register_async(&self, func: AsyncFunctionRef) {
-        let _ = self
-            .async_functions
-            .write()
-            .unwrap()
-            .insert(func.name().to_string(), func);
-    }
-
+    /// Register an aggregate function in the registry.
    pub fn register_aggr(&self, func: AggregateUDF) {
        let _ = self
            .aggregate_functions
@@ -76,28 +77,16 @@ impl FunctionRegistry {
            .insert(func.name().to_string(), func);
    }

-    pub fn get_async_function(&self, name: &str) -> Option<AsyncFunctionRef> {
-        self.async_functions.read().unwrap().get(name).cloned()
-    }
-
-    pub fn async_functions(&self) -> Vec<AsyncFunctionRef> {
-        self.async_functions
-            .read()
-            .unwrap()
-            .values()
-            .cloned()
-            .collect()
-    }
-
-    #[cfg(test)]
    pub fn get_function(&self, name: &str) -> Option<ScalarFunctionFactory> {
        self.functions.read().unwrap().get(name).cloned()
    }

+    /// Returns a list of all scalar functions registered in the registry.
    pub fn scalar_functions(&self) -> Vec<ScalarFunctionFactory> {
        self.functions.read().unwrap().values().cloned().collect()
    }

+    /// Returns a list of all aggregate functions registered in the registry.
    pub fn aggregate_functions(&self) -> Vec<AggregateUDF> {
        self.aggregate_functions
            .read()
@@ -107,6 +96,7 @@ impl FunctionRegistry {
            .collect()
    }

+    /// Returns true if an aggregate function with the given name exists in the registry.
    pub fn is_aggr_func_exist(&self, name: &str) -> bool {
        self.aggregate_functions.read().unwrap().contains_key(name)
    }
--- a/src/common/function/src/scalars/math/clamp.rs
+++ b/src/common/function/src/scalars/math/clamp.rs
@@ -34,6 +34,33 @@ pub struct ClampFunction;

 const CLAMP_NAME: &str = "clamp";

+/// Ensure the vector is constant and not empty (i.e., all values are identical)
+fn ensure_constant_vector(vector: &VectorRef) -> Result<()> {
+    ensure!(
+        !vector.is_empty(),
+        InvalidFuncArgsSnafu {
+            err_msg: "Expect at least one value",
+        }
+    );
+
+    if vector.is_const() {
+        return Ok(());
+    }
+
+    let first = vector.get_ref(0);
+    for i in 1..vector.len() {
+        let v = vector.get_ref(i);
+        if first != v {
+            return InvalidFuncArgsSnafu {
+                err_msg: "All values in min/max argument must be identical",
+            }
+            .fail();
+        }
+    }
+
+    Ok(())
+}
+
 impl Function for ClampFunction {
    fn name(&self) -> &str {
        CLAMP_NAME
@@ -80,16 +107,9 @@ impl Function for ClampFunction {
                ),
            }
        );
-        ensure!(
-            (columns[1].len() == 1 || columns[1].is_const())
-                && (columns[2].len() == 1 || columns[2].is_const()),
-            InvalidFuncArgsSnafu {
-                err_msg: format!(
-                    "The second and third args should be scalar, have: {:?}, {:?}",
-                    columns[1], columns[2]
-                ),
-            }
-        );
+
+        ensure_constant_vector(&columns[1])?;
+        ensure_constant_vector(&columns[2])?;

        with_match_primitive_type_id!(columns[0].data_type().logical_type_id(), |$S| {
            let input_array = columns[0].to_arrow_array();
@@ -204,15 +224,8 @@ impl Function for ClampMinFunction {
                ),
            }
        );
-        ensure!(
-            columns[1].len() == 1 || columns[1].is_const(),
-            InvalidFuncArgsSnafu {
-                err_msg: format!(
-                    "The second arg (min) should be scalar, have: {:?}",
-                    columns[1]
-                ),
-            }
-        );
+
+        ensure_constant_vector(&columns[1])?;

        with_match_primitive_type_id!(columns[0].data_type().logical_type_id(), |$S| {
            let input_array = columns[0].to_arrow_array();
@@ -292,15 +305,8 @@ impl Function for ClampMaxFunction {
                ),
            }
        );
-        ensure!(
-            columns[1].len() == 1 || columns[1].is_const(),
-            InvalidFuncArgsSnafu {
-                err_msg: format!(
-                    "The second arg (max) should be scalar, have: {:?}",
-                    columns[1]
-                ),
-            }
-        );
+
+        ensure_constant_vector(&columns[1])?;

        with_match_primitive_type_id!(columns[0].data_type().logical_type_id(), |$S| {
            let input_array = columns[0].to_arrow_array();
@@ -537,8 +543,8 @@ mod test {
        let func = ClampFunction;
        let args = [
            Arc::new(Float64Vector::from(input)) as _,
-            Arc::new(Float64Vector::from_vec(vec![min, min])) as _,
-            Arc::new(Float64Vector::from_vec(vec![max])) as _,
+            Arc::new(Float64Vector::from_vec(vec![min, max])) as _,
+            Arc::new(Float64Vector::from_vec(vec![max, min])) as _,
        ];
        let result = func.eval(&FunctionContext::default(), args.as_slice());
        assert!(result.is_err());
--- a/src/common/function/src/scalars/udf.rs
+++ b/src/common/function/src/scalars/udf.rs
@@ -16,15 +16,12 @@ use std::any::Any;
 use std::fmt::{Debug, Formatter};
 use std::sync::Arc;

-use common_query::error::FromScalarValueSnafu;
 use common_query::prelude::ColumnarValue;
 use datafusion::logical_expr::{ScalarFunctionArgs, ScalarUDFImpl};
 use datafusion_expr::ScalarUDF;
 use datatypes::data_type::DataType;
 use datatypes::prelude::*;
-use datatypes::vectors::Helper;
 use session::context::QueryContextRef;
-use snafu::ResultExt;

 use crate::function::{FunctionContext, FunctionRef};
 use crate::state::FunctionState;
@@ -76,13 +73,7 @@ impl ScalarUDFImpl for ScalarUdf {
        let columns = args
            .args
            .iter()
-            .map(|x| {
-                ColumnarValue::try_from(x).and_then(|y| match y {
-                    ColumnarValue::Vector(z) => Ok(z),
-                    ColumnarValue::Scalar(z) => Helper::try_from_scalar_value(z, args.number_rows)
-                        .context(FromScalarValueSnafu),
-                })
-            })
+            .map(|x| ColumnarValue::try_from(x).and_then(|y| y.try_into_vector(args.number_rows)))
            .collect::<common_query::error::Result<Vec<_>>>()?;
        let v = self
            .function
@@ -113,6 +104,8 @@ mod tests {

    use common_query::prelude::ScalarValue;
    use datafusion::arrow::array::BooleanArray;
+    use datafusion_common::config::ConfigOptions;
+    use datatypes::arrow::datatypes::Field;
    use datatypes::data_type::ConcreteDataType;
    use datatypes::prelude::VectorRef;
    use datatypes::vectors::{BooleanVector, ConstantVector};
@@ -162,10 +155,21 @@ mod tests {
            ]))),
        ];

+        let arg_fields = vec![
+            Arc::new(Field::new("a", args[0].data_type(), false)),
+            Arc::new(Field::new("b", args[1].data_type(), false)),
+        ];
+        let return_field = Arc::new(Field::new(
+            "x",
+            ConcreteDataType::boolean_datatype().as_arrow_type(),
+            false,
+        ));
        let args = ScalarFunctionArgs {
            args,
+            arg_fields,
            number_rows: 4,
-            return_type: &ConcreteDataType::boolean_datatype().as_arrow_type(),
+            return_field,
+            config_options: Arc::new(ConfigOptions::default()),
        };
        match udf.invoke_with_args(args).unwrap() {
            datafusion_expr::ColumnarValue::Array(x) => {
--- a/src/common/function/src/system.rs
+++ b/src/common/function/src/system.rs
@@ -19,8 +19,6 @@ mod procedure_state;
 mod timezone;
 mod version;

-use std::sync::Arc;
-
 use build::BuildFunction;
 use database::{
    ConnectionIdFunction, CurrentSchemaFunction, DatabaseFunction, PgBackendPidFunction,
@@ -46,7 +44,7 @@ impl SystemFunction {
        registry.register_scalar(PgBackendPidFunction);
        registry.register_scalar(ConnectionIdFunction);
        registry.register_scalar(TimezoneFunction);
-        registry.register_async(Arc::new(ProcedureStateFunction));
+        registry.register(ProcedureStateFunction::factory());
        PGCatalogFunction::register(registry);
    }
 }
--- a/src/common/function/src/system/procedure_state.rs
+++ b/src/common/function/src/system/procedure_state.rs
@@ -13,13 +13,14 @@
 // limitations under the License.

 use api::v1::meta::ProcedureStatus;
+use arrow::datatypes::DataType as ArrowDataType;
 use common_macro::admin_fn;
 use common_meta::rpc::procedure::ProcedureStateResponse;
 use common_query::error::{
    InvalidFuncArgsSnafu, MissingProcedureServiceHandlerSnafu, Result,
    UnsupportedInputDataTypeSnafu,
 };
-use common_query::prelude::{Signature, Volatility};
+use datafusion_expr::{Signature, Volatility};
 use datatypes::prelude::*;
 use serde::Serialize;
 use session::context::QueryContextRef;
@@ -81,73 +82,86 @@ pub(crate) async fn procedure_state(
 }

 fn signature() -> Signature {
-    Signature::uniform(
-        1,
-        vec![ConcreteDataType::string_datatype()],
-        Volatility::Immutable,
-    )
+    Signature::uniform(1, vec![ArrowDataType::Utf8], Volatility::Immutable)
 }

 #[cfg(test)]
 mod tests {
    use std::sync::Arc;

-    use common_query::prelude::TypeSignature;
-    use datatypes::vectors::StringVector;
+    use arrow::array::StringArray;
+    use arrow::datatypes::{DataType, Field};
+    use datafusion_expr::ColumnarValue;

    use super::*;
-    use crate::function::{AsyncFunction, FunctionContext};
+    use crate::function::FunctionContext;
+    use crate::function_factory::ScalarFunctionFactory;

    #[test]
    fn test_procedure_state_misc() {
-        let f = ProcedureStateFunction;
+        let factory: ScalarFunctionFactory = ProcedureStateFunction::factory().into();
+        let f = factory.provide(FunctionContext::mock());
        assert_eq!("procedure_state", f.name());
-        assert_eq!(
-            ConcreteDataType::string_datatype(),
-            f.return_type(&[]).unwrap()
-        );
+        assert_eq!(DataType::Utf8, f.return_type(&[]).unwrap());
        assert!(matches!(f.signature(),
-                         Signature {
-                             type_signature: TypeSignature::Uniform(1, valid_types),
-                             volatility: Volatility::Immutable
-                         } if valid_types == vec![ConcreteDataType::string_datatype()]
-        ));
+                         datafusion_expr::Signature {
+                             type_signature: datafusion_expr::TypeSignature::Uniform(1, valid_types),
+                             volatility: datafusion_expr::Volatility::Immutable
+                         } if valid_types == &vec![ArrowDataType::Utf8]));
    }

    #[tokio::test]
    async fn test_missing_procedure_service() {
-        let f = ProcedureStateFunction;
+        let factory: ScalarFunctionFactory = ProcedureStateFunction::factory().into();
+        let binding = factory.provide(FunctionContext::default());
+        let f = binding.as_async().unwrap();

-        let args = vec!["pid"];
-
-        let args = args
-            .into_iter()
-            .map(|arg| Arc::new(StringVector::from_slice(&[arg])) as _)
-            .collect::<Vec<_>>();
-
-        let result = f.eval(FunctionContext::default(), &args).await.unwrap_err();
-        assert_eq!(
-            "Missing ProcedureServiceHandler, not expected",
-            result.to_string()
-        );
+        let func_args = datafusion::logical_expr::ScalarFunctionArgs {
+            args: vec![ColumnarValue::Array(Arc::new(StringArray::from(vec![
+                "pid",
+            ])))],
+            arg_fields: vec![Arc::new(Field::new("arg_0", DataType::Utf8, false))],
+            return_field: Arc::new(Field::new("result", DataType::Utf8, true)),
+            number_rows: 1,
+            config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
+        };
+        let result = f.invoke_async_with_args(func_args).await;
+        assert!(result.is_err());
    }

    #[tokio::test]
    async fn test_procedure_state() {
-        let f = ProcedureStateFunction;
+        let factory: ScalarFunctionFactory = ProcedureStateFunction::factory().into();
+        let provider = factory.provide(FunctionContext::mock());
+        let f = provider.as_async().unwrap();

-        let args = vec!["pid"];
+        let func_args = datafusion::logical_expr::ScalarFunctionArgs {
+            args: vec![ColumnarValue::Array(Arc::new(StringArray::from(vec![
+                "pid",
+            ])))],
+            arg_fields: vec![Arc::new(Field::new("arg_0", DataType::Utf8, false))],
+            return_field: Arc::new(Field::new("result", DataType::Utf8, true)),
+            number_rows: 1,
+            config_options: Arc::new(datafusion_common::config::ConfigOptions::default()),
+        };
+        let result = f.invoke_async_with_args(func_args).await.unwrap();

-        let args = args
-            .into_iter()
-            .map(|arg| Arc::new(StringVector::from_slice(&[arg])) as _)
-            .collect::<Vec<_>>();
-
-        let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
-
-        let expect: VectorRef = Arc::new(StringVector::from(vec![
-            "{\"status\":\"Done\",\"error\":\"OK\"}",
-        ]));
-        assert_eq!(expect, result);
+        match result {
+            ColumnarValue::Array(array) => {
+                let result_array = array.as_any().downcast_ref::<StringArray>().unwrap();
+                assert_eq!(
+                    result_array.value(0),
+                    "{\"status\":\"Done\",\"error\":\"OK\"}"
+                );
+            }
+            ColumnarValue::Scalar(scalar) => {
+                assert_eq!(
+                    scalar,
+                    datafusion_common::ScalarValue::Utf8(Some(
+                        "{\"status\":\"Done\",\"error\":\"OK\"}".to_string()
+                    ))
+                );
+            }
+        }
    }
 }
--- a/src/common/grpc/Cargo.toml
+++ b/src/common/grpc/Cargo.toml
@@ -20,7 +20,7 @@ common-telemetry.workspace = true
 common-time.workspace = true
 dashmap.workspace = true
 datatypes.workspace = true
-flatbuffers = "24"
+flatbuffers = "25.2"
 hyper.workspace = true
 lazy_static.workspace = true
 prost.workspace = true
--- a/src/common/grpc/src/channel_manager.rs
+++ b/src/common/grpc/src/channel_manager.rs
@@ -21,6 +21,7 @@ use common_telemetry::info;
 use dashmap::mapref::entry::Entry;
 use dashmap::DashMap;
 use lazy_static::lazy_static;
+use serde::{Deserialize, Serialize};
 use snafu::{OptionExt, ResultExt};
 use tokio_util::sync::CancellationToken;
 use tonic::transport::{
@@ -97,6 +98,7 @@ impl ChannelManager {
        }
    }

+    /// Read tls cert and key files and create a ChannelManager with TLS config.
    pub fn with_tls_config(config: ChannelConfig) -> Result<Self> {
        let mut inner = Inner::with_config(config.clone());

@@ -105,20 +107,35 @@ impl ChannelManager {
            msg: "no config input",
        })?;

-        let server_root_ca_cert = std::fs::read_to_string(path_config.server_ca_cert_path)
-            .context(InvalidConfigFilePathSnafu)?;
-        let server_root_ca_cert = Certificate::from_pem(server_root_ca_cert);
-        let client_cert = std::fs::read_to_string(path_config.client_cert_path)
-            .context(InvalidConfigFilePathSnafu)?;
-        let client_key = std::fs::read_to_string(path_config.client_key_path)
-            .context(InvalidConfigFilePathSnafu)?;
-        let client_identity = Identity::from_pem(client_cert, client_key);
+        if !path_config.enabled {
+            // if TLS not enabled, just ignore other tls config
+            // and not set `client_tls_config` hence not use TLS
+            return Ok(Self {
+                inner: Arc::new(inner),
+            });
+        }

-        inner.client_tls_config = Some(
-            ClientTlsConfig::new()
-                .ca_certificate(server_root_ca_cert)
-                .identity(client_identity),
-        );
+        let mut tls_config = ClientTlsConfig::new();
+
+        if let Some(server_ca) = path_config.server_ca_cert_path {
+            let server_root_ca_cert =
+                std::fs::read_to_string(server_ca).context(InvalidConfigFilePathSnafu)?;
+            let server_root_ca_cert = Certificate::from_pem(server_root_ca_cert);
+            tls_config = tls_config.ca_certificate(server_root_ca_cert);
+        }
+
+        if let (Some(client_cert_path), Some(client_key_path)) =
+            (&path_config.client_cert_path, &path_config.client_key_path)
+        {
+            let client_cert =
+                std::fs::read_to_string(client_cert_path).context(InvalidConfigFilePathSnafu)?;
+            let client_key =
+                std::fs::read_to_string(client_key_path).context(InvalidConfigFilePathSnafu)?;
+            let client_identity = Identity::from_pem(client_cert, client_key);
+            tls_config = tls_config.identity(client_identity);
+        }
+
+        inner.client_tls_config = Some(tls_config);

        Ok(Self {
            inner: Arc::new(inner),
@@ -270,11 +287,13 @@ impl ChannelManager {
    }
 }

-#[derive(Clone, Debug, PartialEq, Eq)]
+#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
 pub struct ClientTlsOption {
-    pub server_ca_cert_path: String,
-    pub client_cert_path: String,
-    pub client_key_path: String,
+    /// Whether to enable TLS for client.
+    pub enabled: bool,
+    pub server_ca_cert_path: Option<String>,
+    pub client_cert_path: Option<String>,
+    pub client_key_path: Option<String>,
 }

 #[derive(Clone, Debug, PartialEq, Eq)]
@@ -590,9 +609,10 @@ mod tests {
            .tcp_keepalive(Duration::from_secs(2))
            .tcp_nodelay(false)
            .client_tls_config(ClientTlsOption {
-                server_ca_cert_path: "some_server_path".to_string(),
-                client_cert_path: "some_cert_path".to_string(),
-                client_key_path: "some_key_path".to_string(),
+                enabled: true,
+                server_ca_cert_path: Some("some_server_path".to_string()),
+                client_cert_path: Some("some_cert_path".to_string()),
+                client_key_path: Some("some_key_path".to_string()),
            });

        assert_eq!(
@@ -610,9 +630,10 @@ mod tests {
                tcp_keepalive: Some(Duration::from_secs(2)),
                tcp_nodelay: false,
                client_tls: Some(ClientTlsOption {
-                    server_ca_cert_path: "some_server_path".to_string(),
-                    client_cert_path: "some_cert_path".to_string(),
-                    client_key_path: "some_key_path".to_string(),
+                    enabled: true,
+                    server_ca_cert_path: Some("some_server_path".to_string()),
+                    client_cert_path: Some("some_cert_path".to_string()),
+                    client_key_path: Some("some_key_path".to_string()),
                }),
                max_recv_message_size: DEFAULT_MAX_GRPC_RECV_MESSAGE_SIZE,
                max_send_message_size: DEFAULT_MAX_GRPC_SEND_MESSAGE_SIZE,
--- a/src/common/grpc/src/flight.rs
+++ b/src/common/grpc/src/flight.rs
@@ -25,7 +25,7 @@ use common_recordbatch::DfRecordBatch;
 use datatypes::arrow;
 use datatypes::arrow::array::ArrayRef;
 use datatypes::arrow::buffer::Buffer;
-use datatypes::arrow::datatypes::{Schema as ArrowSchema, SchemaRef};
+use datatypes::arrow::datatypes::{DataType, Schema as ArrowSchema, SchemaRef};
 use datatypes::arrow::error::ArrowError;
 use datatypes::arrow::ipc::{convert, reader, root_as_message, writer, MessageHeader};
 use flatbuffers::FlatBufferBuilder;
@@ -91,7 +91,15 @@ impl FlightEncoder {
    /// be encoded to exactly one [FlightData].
    pub fn encode(&mut self, flight_message: FlightMessage) -> Vec1<FlightData> {
        match flight_message {
-            FlightMessage::Schema(schema) => vec1![self.encode_schema(schema.as_ref())],
+            FlightMessage::Schema(schema) => {
+                schema.fields().iter().for_each(|x| {
+                    if matches!(x.data_type(), DataType::Dictionary(_, _)) {
+                        self.dictionary_tracker.next_dict_id();
+                    }
+                });
+
+                vec1![self.encode_schema(schema.as_ref())]
+            }
            FlightMessage::RecordBatch(record_batch) => {
                let (encoded_dictionaries, encoded_batch) = self
                    .data_gen
--- a/src/common/grpc/tests/mod.rs
+++ b/src/common/grpc/tests/mod.rs
@@ -23,9 +23,10 @@ async fn test_mtls_config() {

    // test wrong file
    let config = ChannelConfig::new().client_tls_config(ClientTlsOption {
-        server_ca_cert_path: "tests/tls/wrong_ca.pem".to_string(),
-        client_cert_path: "tests/tls/wrong_client.pem".to_string(),
-        client_key_path: "tests/tls/wrong_client.key".to_string(),
+        enabled: true,
+        server_ca_cert_path: Some("tests/tls/wrong_ca.pem".to_string()),
+        client_cert_path: Some("tests/tls/wrong_client.pem".to_string()),
+        client_key_path: Some("tests/tls/wrong_client.key".to_string()),
    });

    let re = ChannelManager::with_tls_config(config);
@@ -33,9 +34,10 @@ async fn test_mtls_config() {

    // test corrupted file content
    let config = ChannelConfig::new().client_tls_config(ClientTlsOption {
-        server_ca_cert_path: "tests/tls/ca.pem".to_string(),
-        client_cert_path: "tests/tls/client.pem".to_string(),
-        client_key_path: "tests/tls/corrupted".to_string(),
+        enabled: true,
+        server_ca_cert_path: Some("tests/tls/ca.pem".to_string()),
+        client_cert_path: Some("tests/tls/client.pem".to_string()),
+        client_key_path: Some("tests/tls/corrupted".to_string()),
    });

    let re = ChannelManager::with_tls_config(config).unwrap();
@@ -44,9 +46,10 @@ async fn test_mtls_config() {

    // success
    let config = ChannelConfig::new().client_tls_config(ClientTlsOption {
-        server_ca_cert_path: "tests/tls/ca.pem".to_string(),
-        client_cert_path: "tests/tls/client.pem".to_string(),
-        client_key_path: "tests/tls/client.key".to_string(),
+        enabled: true,
+        server_ca_cert_path: Some("tests/tls/ca.pem".to_string()),
+        client_cert_path: Some("tests/tls/client.pem".to_string()),
+        client_key_path: Some("tests/tls/client.key".to_string()),
    });

    let re = ChannelManager::with_tls_config(config).unwrap();
--- a/src/common/macro/Cargo.toml
+++ b/src/common/macro/Cargo.toml
@@ -11,6 +11,8 @@ proc-macro = true
 workspace = true

 [dependencies]
+greptime-proto.workspace = true
+once_cell.workspace = true
 proc-macro2 = "1.0.66"
 quote = "1.0"
 syn = { version = "2.0", features = [
@@ -19,8 +21,3 @@ syn = { version = "2.0", features = [
 ] }

 [dev-dependencies]
-arc-swap = "1.0"
-common-query.workspace = true
-datatypes.workspace = true
-snafu.workspace = true
-static_assertions = "1.1.0"
--- a/src/common/macro/src/admin_fn.rs
+++ b/src/common/macro/src/admin_fn.rs
@@ -187,8 +187,28 @@ fn build_struct(

    quote! {
        #(#attrs)*
-        #[derive(Debug)]
-        #vis struct #name;
+        #vis struct #name {
+            signature: datafusion_expr::Signature,
+            func_ctx: #user_path::function::FunctionContext,
+        }
+
+        impl #name {
+            /// Creates a new instance of the function with function context.
+            fn create(signature: datafusion_expr::Signature, func_ctx: #user_path::function::FunctionContext) -> Self {
+                Self {
+                    signature,
+                    func_ctx,
+                }
+            }
+
+            /// Returns the [`ScalarFunctionFactory`] of the function.
+            pub fn factory() -> impl Into< #user_path::function_factory::ScalarFunctionFactory>  {
+                Self {
+                    signature: #sig_fn().into(),
+                    func_ctx: #user_path::function::FunctionContext::default(),
+                }
+            }
+        }

        impl std::fmt::Display for #name {
            fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
@@ -196,24 +216,89 @@ fn build_struct(
            }
        }

+        impl std::fmt::Debug for #name {
+            fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+                write!(f, "{}({})", #uppcase_display_name, self.func_ctx)
+            }
+        }

-        #[async_trait::async_trait]
-        impl #user_path::function::AsyncFunction for #name {
-            fn name(&self) -> &'static str {
+        // Implement DataFusion's ScalarUDFImpl trait
+        impl datafusion::logical_expr::ScalarUDFImpl for #name {
+            fn as_any(&self) -> &dyn std::any::Any {
+                self
+            }
+
+            fn name(&self) -> &str {
                #display_name
            }

-            fn return_type(&self, _input_types: &[store_api::storage::ConcreteDataType]) -> common_query::error::Result<store_api::storage::ConcreteDataType> {
-                Ok(store_api::storage::ConcreteDataType::#ret())
+            fn signature(&self) -> &datafusion_expr::Signature {
+                &self.signature
            }

-            fn signature(&self) -> Signature {
-                #sig_fn()
+            fn return_type(&self, _arg_types: &[datafusion::arrow::datatypes::DataType]) -> datafusion_common::Result<datafusion::arrow::datatypes::DataType> {
+                use datatypes::data_type::DataType;
+                Ok(store_api::storage::ConcreteDataType::#ret().as_arrow_type())
            }

-            async fn eval(&self, func_ctx: #user_path::function::FunctionContext, columns: &[datatypes::vectors::VectorRef]) ->  common_query::error::Result<datatypes::vectors::VectorRef> {
-                // Ensure under the `greptime` catalog for security
-                #user_path::ensure_greptime!(func_ctx);
+            fn invoke_with_args(
+                &self,
+                _args: datafusion::logical_expr::ScalarFunctionArgs,
+            ) -> datafusion_common::Result<datafusion_expr::ColumnarValue> {
+                Err(datafusion_common::DataFusionError::NotImplemented(
+                    format!("{} can only be called from async contexts", #display_name)
+                ))
+            }
+        }
+
+        /// Implement From trait for ScalarFunctionFactory
+        impl From<#name> for  #user_path::function_factory::ScalarFunctionFactory {
+            fn from(func: #name) -> Self {
+                 use std::sync::Arc;
+                 use datafusion_expr::ScalarUDFImpl;
+                 use datafusion_expr::async_udf::AsyncScalarUDF;
+
+                let name = func.name().to_string();
+
+                let func = Arc::new(move |ctx: #user_path::function::FunctionContext| {
+                    // create the UDF dynamically with function context
+                    let udf_impl = #name::create(func.signature.clone(), ctx);
+                    let async_udf = AsyncScalarUDF::new(Arc::new(udf_impl));
+                    async_udf.into_scalar_udf()
+                });
+                Self {
+                    name,
+                    factory: func,
+                }
+            }
+        }
+
+        // Implement DataFusion's AsyncScalarUDFImpl trait
+        #[async_trait::async_trait]
+        impl datafusion_expr::async_udf::AsyncScalarUDFImpl for #name {
+            async fn invoke_async_with_args(
+                &self,
+                args: datafusion::logical_expr::ScalarFunctionArgs,
+            ) -> datafusion_common::Result<datafusion_expr::ColumnarValue> {
+                use common_error::ext::ErrorExt;
+
+                let columns = args.args
+                    .iter()
+                    .map(|arg| {
+                        common_query::prelude::ColumnarValue::try_from(arg)
+                            .and_then(|cv| match cv {
+                                common_query::prelude::ColumnarValue::Vector(v) => Ok(v),
+                                common_query::prelude::ColumnarValue::Scalar(s) => {
+                                    datatypes::vectors::Helper::try_from_scalar_value(s, args.number_rows)
+                                        .context(common_query::error::FromScalarValueSnafu)
+                                }
+                            })
+                    })
+                    .collect::<common_query::error::Result<Vec<_>>>()
+                    .map_err(|e| datafusion_common::DataFusionError::Execution(format!("Column conversion error: {}", e.output_msg())))?;
+
+                // Safety check: Ensure under the `greptime` catalog for security
+                #user_path::ensure_greptime!(self.func_ctx);

                let columns_num = columns.len();
                let rows_num = if columns.is_empty() {
@@ -221,23 +306,24 @@ fn build_struct(
                } else {
                    columns[0].len()
                };
-                let columns = Vec::from(columns);

-                use snafu::OptionExt;
+                use snafu::{OptionExt, ResultExt};
                use datatypes::data_type::DataType;

-                let query_ctx = &func_ctx.query_ctx;
-                let handler = func_ctx
+                let query_ctx = &self.func_ctx.query_ctx;
+                let handler = self.func_ctx
                    .state
                    .#handler
                    .as_ref()
-                    .context(#snafu_type)?;
+                    .context(#snafu_type)
+                    .map_err(|e| datafusion_common::DataFusionError::Execution(format!("Handler error: {}", e.output_msg())))?;

                let mut builder = store_api::storage::ConcreteDataType::#ret()
                    .create_mutable_vector(rows_num);

                if columns_num == 0 {
-                    let result = #fn_name(handler, query_ctx, &[]).await?;
+                    let result = #fn_name(handler, query_ctx, &[]).await
+                        .map_err(|e| datafusion_common::DataFusionError::Execution(format!("Function execution error: {}", e.output_msg())))?;

                    builder.push_value_ref(result.as_value_ref());
                } else {
@@ -246,15 +332,18 @@ fn build_struct(
                            .map(|vector| vector.get_ref(i))
                            .collect();

-                        let result = #fn_name(handler, query_ctx, &args).await?;
+                        let result = #fn_name(handler, query_ctx, &args).await
+                            .map_err(|e| datafusion_common::DataFusionError::Execution(format!("Function execution error: {}", e.output_msg())))?;

                        builder.push_value_ref(result.as_value_ref());
                    }
                }

-                Ok(builder.to_vector())
-            }
+                let result_vector = builder.to_vector();

+                // Convert result back to DataFusion ColumnarValue
+                Ok(datafusion_expr::ColumnarValue::Array(result_vector.to_arrow_array()))
+            }
        }
    }
    .into()
--- a/src/common/macro/src/lib.rs
+++ b/src/common/macro/src/lib.rs
@@ -16,6 +16,7 @@ mod admin_fn;
 mod aggr_func;
 mod print_caller;
 mod range_fn;
+mod row;
 mod stack_trace_debug;
 mod utils;

@@ -27,6 +28,9 @@ use range_fn::process_range_fn;
 use syn::{parse_macro_input, Data, DeriveInput, Fields};

 use crate::admin_fn::process_admin_fn;
+use crate::row::into_row::derive_into_row_impl;
+use crate::row::schema::derive_schema_impl;
+use crate::row::to_row::derive_to_row_impl;

 /// Make struct implemented trait [AggrFuncTypeStore], which is necessary when writing UDAF.
 /// This derive macro is expect to be used along with attribute macro [macro@as_aggr_func_creator].
@@ -186,3 +190,117 @@ pub fn derive_meta_builder(input: TokenStream) -> TokenStream {

    gen.into()
 }
+
+/// Derive macro to convert a struct to a row.
+///
+/// # Example
+/// ```rust, ignore
+/// use api::v1::Row;
+/// use api::v1::value::ValueData;
+/// use api::v1::Value;
+///
+/// #[derive(ToRow)]
+/// struct ToRowTest {
+///     my_value: i32,
+///     #[col(name = "string_value", datatype = "string", semantic = "tag")]
+///     my_string: String,  
+///     my_bool: bool,
+///     my_float: f32,
+///     #[col(
+///         name = "timestamp_value",
+///         semantic = "Timestamp",
+///         datatype = "TimestampMillisecond"
+///     )]
+///     my_timestamp: i64,
+///     #[col(skip)]
+///     my_skip: i32,
+/// }
+///
+/// let row = ToRowTest {
+///     my_value: 1,
+///     my_string: "test".to_string(),
+///     my_bool: true,
+///     my_float: 1.0,
+///     my_timestamp: 1718563200000,
+///     my_skip: 1,
+/// }.to_row();
+/// ```
+#[proc_macro_derive(ToRow, attributes(col))]
+pub fn derive_to_row(input: TokenStream) -> TokenStream {
+    let input = parse_macro_input!(input as DeriveInput);
+    let output = derive_to_row_impl(input);
+    output.unwrap_or_else(|e| e.to_compile_error()).into()
+}
+
+/// Derive macro to convert a struct to a row with move semantics.
+///
+/// # Example
+/// ```rust, ignore
+/// use api::v1::Row;
+/// use api::v1::value::ValueData;
+/// use api::v1::Value;
+///
+/// #[derive(IntoRow)]
+/// struct IntoRowTest {
+///     my_value: i32,
+///     #[col(name = "string_value", datatype = "string", semantic = "tag")]
+///     my_string: String,  
+///     my_bool: bool,
+///     my_float: f32,
+///     #[col(
+///         name = "timestamp_value",
+///         semantic = "Timestamp",
+///         datatype = "TimestampMillisecond"
+///     )]
+///     my_timestamp: i64,
+///     #[col(skip)]
+///     my_skip: i32,
+/// }
+///
+/// let row = IntoRowTest {
+///     my_value: 1,
+///     my_string: "test".to_string(),
+///     my_bool: true,
+///     my_float: 1.0,
+///     my_timestamp: 1718563200000,
+///     my_skip: 1,
+/// }.into_row();
+/// ```
+#[proc_macro_derive(IntoRow, attributes(col))]
+pub fn derive_into_row(input: TokenStream) -> TokenStream {
+    let input = parse_macro_input!(input as DeriveInput);
+    let output = derive_into_row_impl(input);
+    output.unwrap_or_else(|e| e.to_compile_error()).into()
+}
+
+/// Derive macro to convert a struct to a schema.
+///
+/// # Example
+/// ```rust, ignore
+/// use api::v1::ColumnSchema;
+///
+/// #[derive(Schema)]
+/// struct SchemaTest {
+///     my_value: i32,
+///     #[col(name = "string_value", datatype = "string", semantic = "tag")]
+///     my_string: String,  
+///     my_bool: bool,
+///     my_float: f32,
+///     #[col(
+///         name = "timestamp_value",
+///         semantic = "Timestamp",
+///         datatype = "TimestampMillisecond"
+///     )]
+///     my_timestamp: i64,
+///     #[col(skip)]
+///     my_skip: i32,
+/// }
+///
+/// let schema = SchemaTest::schema();
+/// ```
+#[proc_macro_derive(Schema, attributes(col))]
+pub fn derive_schema(input: TokenStream) -> TokenStream {
+    let input = parse_macro_input!(input as DeriveInput);
+    let output = derive_schema_impl(input);
+    output.unwrap_or_else(|e| e.to_compile_error()).into()
+}
--- a/Show More
+++ b/Show More