fix(sqlness): redact all volatile text (#4583 )

Commit Message: Add SQLNESS replacements for RoundRobinBatch and region patterns
chore: setup kafka before downloading binary step (#4582 )
2025-12-22 22:20:02 +00:00 · 2024-08-19 08:04:54 +00:00 · 2024-08-19 06:44:33 +00:00 · 2024-08-19 06:11:36 +00:00 · 2024-08-19 03:48:35 +00:00 · 2024-08-19 03:14:49 +00:00
254 changed files with 17340 additions and 7340 deletions
--- a/.env.example
+++ b/.env.example
@@ -14,10 +14,11 @@ GT_AZBLOB_CONTAINER=AZBLOB container
 GT_AZBLOB_ACCOUNT_NAME=AZBLOB account name
 GT_AZBLOB_ACCOUNT_KEY=AZBLOB account key
 GT_AZBLOB_ENDPOINT=AZBLOB endpoint
-# Settings for gcs test 
-GT_GCS_BUCKET = GCS bucket 
+# Settings for gcs test
+GT_GCS_BUCKET = GCS bucket
 GT_GCS_SCOPE  = GCS scope
-GT_GCS_CREDENTIAL_PATH = GCS credential path 
+GT_GCS_CREDENTIAL_PATH = GCS credential path
+GT_GCS_CREDENTIAL = GCS credential
 GT_GCS_ENDPOINT = GCS end point
 # Settings for kafka wal test
 GT_KAFKA_ENDPOINTS = localhost:9092
--- a/.github/actions/build-windows-artifacts/action.yml
+++ b/.github/actions/build-windows-artifacts/action.yml
@@ -69,7 +69,7 @@ runs:
      uses: actions/upload-artifact@v4
      with:
        name: sqlness-logs
-        path: C:\tmp\greptime-*.log
+        path: C:\Users\RUNNER~1\AppData\Local\Temp\sqlness*
        retention-days: 3

    - name: Build greptime binary
--- a/.github/actions/setup-greptimedb-cluster/with-disk.yaml
+++ b/.github/actions/setup-greptimedb-cluster/with-disk.yaml
@@ -1,13 +1,13 @@
 meta:
-  config: |-
+  configData: |-
    [runtime]
    global_rt_size = 4
 datanode:
-  config: |-
+  configData: |-
    [runtime]
    global_rt_size = 4
    compact_rt_size = 2
 frontend:
-  config: |-
+  configData: |-
    [runtime]
-    global_rt_size = 4
+    global_rt_size = 4
--- a/.github/actions/setup-greptimedb-cluster/with-minio-and-cache.yaml
+++ b/.github/actions/setup-greptimedb-cluster/with-minio-and-cache.yaml
@@ -1,5 +1,5 @@
 meta:
-  config: |-
+  configData: |-
    [runtime]
    global_rt_size = 4

@@ -7,7 +7,7 @@ meta:
    [datanode.client]
    timeout = "60s"
 datanode:
-  config: |-
+  configData: |-
    [runtime]
    global_rt_size = 4
    compact_rt_size = 2
@@ -16,7 +16,7 @@ datanode:
    cache_path = "/data/greptimedb/s3cache"
    cache_capacity = "256MB"
 frontend:
-  config: |-
+  configData: |-
    [runtime]
    global_rt_size = 4

--- a/.github/actions/setup-greptimedb-cluster/with-minio.yaml
+++ b/.github/actions/setup-greptimedb-cluster/with-minio.yaml
@@ -1,5 +1,5 @@
 meta:
-  config: |-
+  configData: |-
    [runtime]
    global_rt_size = 4
    
@@ -7,12 +7,12 @@ meta:
    [datanode.client]
    timeout = "60s"
 datanode:
-  config: |-
+  configData: |-
    [runtime]
    global_rt_size = 4
    compact_rt_size = 2
 frontend:
-  config: |-
+  configData: |-
    [runtime]
    global_rt_size = 4

--- a/.github/actions/setup-greptimedb-cluster/with-remote-wal.yaml
+++ b/.github/actions/setup-greptimedb-cluster/with-remote-wal.yaml
@@ -1,5 +1,5 @@
 meta:
-  config: |-
+  configData: |-
    [runtime]
    global_rt_size = 4
    
@@ -13,7 +13,7 @@ meta:
    [datanode.client]
    timeout = "60s"
 datanode:
-  config: |-
+  configData: |-
    [runtime]
    global_rt_size = 4
    compact_rt_size = 2
@@ -23,7 +23,7 @@ datanode:
    broker_endpoints = ["kafka.kafka-cluster.svc.cluster.local:9092"]
    linger = "2ms"
 frontend:
-  config: |-
+  configData: |-
    [runtime]
    global_rt_size = 4

--- a/.github/actions/setup-postgres-cluster/action.yml
+++ b/.github/actions/setup-postgres-cluster/action.yml
@@ -0,0 +1,30 @@
+name: Setup PostgreSQL
+description: Deploy PostgreSQL on Kubernetes
+inputs:
+  postgres-replicas:
+    default: 1
+    description: "Number of PostgreSQL replicas"
+  namespace:
+    default: "postgres-namespace"
+  postgres-version:
+    default: "14.2"
+    description: "PostgreSQL version"
+  storage-size:
+    default: "1Gi"
+    description: "Storage size for PostgreSQL"
+
+runs:
+  using: composite
+  steps:
+  - name: Install PostgreSQL
+    shell: bash
+    run: |
+      helm upgrade \
+        --install postgresql oci://registry-1.docker.io/bitnamicharts/postgresql \
+        --set replicaCount=${{ inputs.postgres-replicas }} \
+        --set image.tag=${{ inputs.postgres-version }} \
+        --set persistence.size=${{ inputs.storage-size }} \
+        --set postgresql.username=greptimedb \
+        --set postgresql.password=admin \
+        --create-namespace \
+        -n ${{ inputs.namespace }}
--- a/.github/workflows/develop.yml
+++ b/.github/workflows/develop.yml
@@ -145,6 +145,18 @@ jobs:
      matrix:
        target: [ "fuzz_create_table", "fuzz_alter_table", "fuzz_create_database", "fuzz_create_logical_table", "fuzz_alter_logical_table", "fuzz_insert", "fuzz_insert_logical_table" ]
    steps:
+      - name: Remove unused software
+        run: |
+          echo "Disk space before:"
+          df -h
+          [[ -d /usr/share/dotnet ]] && sudo rm -rf /usr/share/dotnet
+          [[ -d /usr/local/lib/android ]] && sudo rm -rf /usr/local/lib/android
+          [[ -d /opt/ghc ]] && sudo rm -rf /opt/ghc
+          [[ -d /opt/hostedtoolcache/CodeQL ]] && sudo rm -rf /opt/hostedtoolcache/CodeQL
+          sudo docker image prune --all --force
+          sudo docker builder prune -a
+          echo "Disk space after:"
+          df -h
      - uses: actions/checkout@v4
      - uses: arduino/setup-protoc@v3
        with:
@@ -193,6 +205,18 @@ jobs:
      matrix:
        target: [ "unstable_fuzz_create_table_standalone" ]
    steps:
+      - name: Remove unused software
+        run: |
+          echo "Disk space before:"
+          df -h
+          [[ -d /usr/share/dotnet ]] && sudo rm -rf /usr/share/dotnet
+          [[ -d /usr/local/lib/android ]] && sudo rm -rf /usr/local/lib/android
+          [[ -d /opt/ghc ]] && sudo rm -rf /opt/ghc
+          [[ -d /opt/hostedtoolcache/CodeQL ]] && sudo rm -rf /opt/hostedtoolcache/CodeQL
+          sudo docker image prune --all --force
+          sudo docker builder prune -a
+          echo "Disk space after:"
+          df -h
      - uses: actions/checkout@v4
      - uses: arduino/setup-protoc@v3
        with:
@@ -285,24 +309,24 @@ jobs:
    strategy:
      matrix:
        target: [ "fuzz_create_table", "fuzz_alter_table", "fuzz_create_database", "fuzz_create_logical_table", "fuzz_alter_logical_table", "fuzz_insert", "fuzz_insert_logical_table" ]
-        mode: 
-          - name: "Disk"
-            minio: false
-            kafka: false
-            values: "with-disk.yaml"
-          - name: "Minio"
-            minio: true
-            kafka: false
-            values: "with-minio.yaml"
-          - name: "Minio with Cache"
-            minio: true
-            kafka: false
-            values: "with-minio-and-cache.yaml"
+        mode:
          - name: "Remote WAL"
            minio: true
            kafka: true
            values: "with-remote-wal.yaml"
    steps:
+      - name: Remove unused software
+        run: |
+          echo "Disk space before:"
+          df -h
+          [[ -d /usr/share/dotnet ]] && sudo rm -rf /usr/share/dotnet
+          [[ -d /usr/local/lib/android ]] && sudo rm -rf /usr/local/lib/android
+          [[ -d /opt/ghc ]] && sudo rm -rf /opt/ghc
+          [[ -d /opt/hostedtoolcache/CodeQL ]] && sudo rm -rf /opt/hostedtoolcache/CodeQL
+          sudo docker image prune --all --force
+          sudo docker builder prune -a
+          echo "Disk space after:"
+          df -h
      - uses: actions/checkout@v4
      - name: Setup Kind
        uses: ./.github/actions/setup-kind
@@ -314,6 +338,8 @@ jobs:
        uses: ./.github/actions/setup-kafka-cluster
      - name: Setup Etcd cluser
        uses: ./.github/actions/setup-etcd-cluster
+      - name: Setup Postgres cluser
+        uses: ./.github/actions/setup-postgres-cluster
      # Prepares for fuzz tests
      - uses: arduino/setup-protoc@v3
        with:
@@ -427,6 +453,18 @@ jobs:
            kafka: true
            values: "with-remote-wal.yaml"
    steps:
+      - name: Remove unused software
+        run: |
+          echo "Disk space before:"
+          df -h
+          [[ -d /usr/share/dotnet ]] && sudo rm -rf /usr/share/dotnet
+          [[ -d /usr/local/lib/android ]] && sudo rm -rf /usr/local/lib/android
+          [[ -d /opt/ghc ]] && sudo rm -rf /opt/ghc
+          [[ -d /opt/hostedtoolcache/CodeQL ]] && sudo rm -rf /opt/hostedtoolcache/CodeQL
+          sudo docker image prune --all --force
+          sudo docker builder prune -a
+          echo "Disk space after:"
+          df -h
      - uses: actions/checkout@v4
      - name: Setup Kind
        uses: ./.github/actions/setup-kind
@@ -440,6 +478,8 @@ jobs:
        uses: ./.github/actions/setup-kafka-cluster
      - name: Setup Etcd cluser
        uses: ./.github/actions/setup-etcd-cluster
+      - name: Setup Postgres cluser
+        uses: ./.github/actions/setup-postgres-cluster
      # Prepares for fuzz tests
      - uses: arduino/setup-protoc@v3
        with:
@@ -557,6 +597,10 @@ jobs:
    timeout-minutes: 60
    steps:
      - uses: actions/checkout@v4
+      - if: matrix.mode.kafka
+        name: Setup kafka server
+        working-directory: tests-integration/fixtures/kafka
+        run: docker compose -f docker-compose-standalone.yml up -d --wait
      - name: Download pre-built binaries
        uses: actions/download-artifact@v4
        with:
@@ -564,10 +608,6 @@ jobs:
          path: .
      - name: Unzip binaries
        run: tar -xvf ./bins.tar.gz
-      - if: matrix.mode.kafka
-        name: Setup kafka server
-        working-directory: tests-integration/fixtures/kafka
-        run: docker compose -f docker-compose-standalone.yml up -d --wait
      - name: Run sqlness
        run: RUST_BACKTRACE=1 ./bins/sqlness-runner ${{ matrix.mode.opts }} -c ./tests/cases --bins-dir ./bins --preserve-state
      - name: Upload sqlness logs
@@ -666,6 +706,9 @@ jobs:
      - name: Setup minio
        working-directory: tests-integration/fixtures/minio
        run: docker compose -f docker-compose-standalone.yml up -d --wait
+      - name: Setup postgres server
+        working-directory: tests-integration/fixtures/postgres
+        run: docker compose -f docker-compose-standalone.yml up -d --wait
      - name: Run nextest cases
        run: cargo llvm-cov nextest --workspace --lcov --output-path lcov.info -F pyo3_backend -F dashboard
        env:
@@ -682,7 +725,9 @@ jobs:
          GT_MINIO_REGION: us-west-2
          GT_MINIO_ENDPOINT_URL: http://127.0.0.1:9000
          GT_ETCD_ENDPOINTS: http://127.0.0.1:2379
+          GT_POSTGRES_ENDPOINTS: postgres://greptimedb:admin@127.0.0.1:5432/postgres
          GT_KAFKA_ENDPOINTS: 127.0.0.1:9092
+          GT_KAFKA_SASL_ENDPOINTS: 127.0.0.1:9093
          UNITTEST_LOG_DIR: "__unittest_logs"
      - name: Codecov upload
        uses: codecov/codecov-action@v4
--- a/.github/workflows/nightly-ci.yml
+++ b/.github/workflows/nightly-ci.yml
@@ -33,6 +33,13 @@ jobs:
          aws-region: ${{ vars.AWS_CI_TEST_BUCKET_REGION }}
          aws-access-key-id: ${{ secrets.AWS_CI_TEST_ACCESS_KEY_ID }}
          aws-secret-access-key: ${{ secrets.AWS_CI_TEST_SECRET_ACCESS_KEY }}
+      - name: Upload sqlness logs
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: sqlness-logs-kind
+          path: /tmp/kind/
+          retention-days: 3

  sqlness-windows:
    name: Sqlness tests on Windows
@@ -55,11 +62,11 @@ jobs:
        env:
          SQLNESS_OPTS: "--preserve-state"
      - name: Upload sqlness logs
-        if: always()
+        if: failure()
        uses: actions/upload-artifact@v4
        with:
          name: sqlness-logs
-          path: C:\tmp\greptime-*.log
+          path: C:\Users\RUNNER~1\AppData\Local\Temp\sqlness*
          retention-days: 3

  test-on-windows:
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -64,7 +64,7 @@ members = [
 resolver = "2"

 [workspace.package]
-version = "0.9.0"
+version = "0.9.2"
 edition = "2021"
 license = "Apache-2.0"

@@ -104,15 +104,15 @@ clap = { version = "4.4", features = ["derive"] }
 config = "0.13.0"
 crossbeam-utils = "0.8"
 dashmap = "5.4"
-datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "d7bda5c9b762426e81f144296deadc87e5f4a0b8" }
-datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "d7bda5c9b762426e81f144296deadc87e5f4a0b8" }
-datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "d7bda5c9b762426e81f144296deadc87e5f4a0b8" }
-datafusion-functions = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "d7bda5c9b762426e81f144296deadc87e5f4a0b8" }
-datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "d7bda5c9b762426e81f144296deadc87e5f4a0b8" }
-datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "d7bda5c9b762426e81f144296deadc87e5f4a0b8" }
-datafusion-physical-plan = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "d7bda5c9b762426e81f144296deadc87e5f4a0b8" }
-datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "d7bda5c9b762426e81f144296deadc87e5f4a0b8" }
-datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "d7bda5c9b762426e81f144296deadc87e5f4a0b8" }
+datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" }
+datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" }
+datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" }
+datafusion-functions = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" }
+datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" }
+datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" }
+datafusion-physical-plan = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" }
+datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" }
+datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" }
 derive_builder = "0.12"
 dotenv = "0.15"
 etcd-client = { version = "0.13" }
@@ -124,7 +124,7 @@ humantime = "2.1"
 humantime-serde = "1.1"
 itertools = "0.10"
 lazy_static = "1.4"
-meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "049171eb16cb4249d8099751a0c46750d1fe88e7" }
+meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "80eb97c24c88af4dd9a86f8bbaf50e741d4eb8cd" }
 mockall = "0.11.4"
 moka = "0.12"
 notify = "6.1"
@@ -151,14 +151,19 @@ reqwest = { version = "0.12", default-features = false, features = [
    "stream",
    "multipart",
 ] }
-rskafka = "0.5"
+# SCRAM-SHA-512 requires https://github.com/dequbed/rsasl/pull/48, https://github.com/influxdata/rskafka/pull/247 
+rskafka = { git = "https://github.com/WenyXu/rskafka.git", rev = "940c6030012c5b746fad819fb72e3325b26e39de", features = [
+    "transport-tls",
+] }
 rstest = "0.21"
 rstest_reuse = "0.7"
 rust_decimal = "1.33"
+rustc-hash = "2.0"
 schemars = "0.8"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = { version = "1.0", features = ["float_roundtrip"] }
 serde_with = "3"
+shadow-rs = "0.31"
 smallvec = { version = "1", features = ["serde"] }
 snafu = "0.8"
 sysinfo = "0.30"
@@ -169,6 +174,7 @@ sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "5
 strum = { version = "0.25", features = ["derive"] }
 tempfile = "3"
 tokio = { version = "1.36", features = ["full"] }
+tokio-postgres = "0.7"
 tokio-stream = { version = "0.1" }
 tokio-util = { version = "0.7", features = ["io-util", "compat"] }
 toml = "0.8.8"
@@ -238,7 +244,7 @@ table = { path = "src/table" }

 [workspace.dependencies.meter-macros]
 git = "https://github.com/GreptimeTeam/greptime-meter.git"
-rev = "049171eb16cb4249d8099751a0c46750d1fe88e7"
+rev = "80eb97c24c88af4dd9a86f8bbaf50e741d4eb8cd"

 [profile.release]
 debug = 1
--- a/config/config.md
+++ b/config/config.md
@@ -67,6 +67,11 @@
 | `wal.prefill_log_files` | Bool | `false` | Whether to pre-create log files on start up.<br/>**It's only used when the provider is `raft_engine`**. |
 | `wal.sync_period` | String | `10s` | Duration for fsyncing log files.<br/>**It's only used when the provider is `raft_engine`**. |
 | `wal.broker_endpoints` | Array | -- | The Kafka broker endpoints.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.num_topics` | Integer | `64` | Number of topics to be created upon start.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.selector_type` | String | `round_robin` | Topic selector type.<br/>Available selector types:<br/>- `round_robin` (default)<br/>**It's only used when the provider is `kafka`**. |
+| `wal.topic_name_prefix` | String | `greptimedb_wal_topic` | A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.replication_factor` | Integer | `1` | Expected number of replicas of each partition.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.create_topic_timeout` | String | `30s` | Above which a topic creation operation will be cancelled.<br/>**It's only used when the provider is `kafka`**. |
 | `wal.max_batch_bytes` | String | `1MB` | The max size of a single producer batch.<br/>Warning: Kafka has a default limit of 1MB per message in a topic.<br/>**It's only used when the provider is `kafka`**. |
 | `wal.consumer_wait_timeout` | String | `100ms` | The consumer wait timeout.<br/>**It's only used when the provider is `kafka`**. |
 | `wal.backoff_init` | String | `500ms` | The initial backoff delay.<br/>**It's only used when the provider is `kafka`**. |
@@ -93,6 +98,7 @@
 | `storage.account_key` | String | `None` | The account key of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
 | `storage.scope` | String | `None` | The scope of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
 | `storage.credential_path` | String | `None` | The credential path of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
+| `storage.credential` | String | `None` | The credential of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
 | `storage.container` | String | `None` | The container of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
 | `storage.sas_token` | String | `None` | The sas token of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
 | `storage.endpoint` | String | `None` | The endpoint of the S3 service.<br/>**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. |
@@ -145,7 +151,7 @@
 | `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. |
 | `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. |
 | `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
-| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. |
+| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
 | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
 | `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
 | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
@@ -230,7 +236,7 @@
 | `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. |
 | `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. |
 | `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
-| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. |
+| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
 | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
 | `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
 | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
@@ -254,7 +260,7 @@
 | `bind_addr` | String | `127.0.0.1:3002` | The bind address of metasrv. |
 | `server_addr` | String | `127.0.0.1:3002` | The communication server address for frontend and datanode to connect to metasrv,  "127.0.0.1:3002" by default for localhost. |
 | `store_addr` | String | `127.0.0.1:2379` | Etcd server address. |
-| `selector` | String | `lease_based` | Datanode selector type.<br/>- `lease_based` (default value).<br/>- `load_based`<br/>For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". |
+| `selector` | String | `round_robin` | Datanode selector type.<br/>- `round_robin` (default value)<br/>- `lease_based`<br/>- `load_based`<br/>For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". |
 | `use_memory_store` | Bool | `false` | Store data in memory. |
 | `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. |
 | `store_key_prefix` | String | `""` | If it's not empty, the metasrv will store all data with this key prefix. |
@@ -292,7 +298,7 @@
 | `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. |
 | `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. |
 | `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
-| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. |
+| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
 | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
 | `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
 | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
@@ -368,6 +374,8 @@
 | `wal.backoff_max` | String | `10s` | The maximum backoff delay.<br/>**It's only used when the provider is `kafka`**. |
 | `wal.backoff_base` | Integer | `2` | The exponential backoff rate, i.e. next backoff = base * current backoff.<br/>**It's only used when the provider is `kafka`**. |
 | `wal.backoff_deadline` | String | `5mins` | The deadline of retries.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.create_index` | Bool | `true` | Whether to enable WAL index creation.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.dump_index_interval` | String | `60s` | The interval for dumping WAL indexes.<br/>**It's only used when the provider is `kafka`**. |
 | `storage` | -- | -- | The data storage options. |
 | `storage.data_home` | String | `/tmp/greptimedb/` | The working home directory. |
 | `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
@@ -382,6 +390,7 @@
 | `storage.account_key` | String | `None` | The account key of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
 | `storage.scope` | String | `None` | The scope of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
 | `storage.credential_path` | String | `None` | The credential path of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
+| `storage.credential` | String | `None` | The credential of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
 | `storage.container` | String | `None` | The container of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
 | `storage.sas_token` | String | `None` | The sas token of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
 | `storage.endpoint` | String | `None` | The endpoint of the S3 service.<br/>**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. |
@@ -432,7 +441,7 @@
 | `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. |
 | `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. |
 | `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
-| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. |
+| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
 | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
 | `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
 | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
@@ -477,7 +486,7 @@
 | `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. |
 | `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. |
 | `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
-| `logging.otlp_endpoint` | String | `None` | The OTLP tracing endpoint. |
+| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
 | `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
 | `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
 | `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
--- a/config/datanode.example.toml
+++ b/config/datanode.example.toml
@@ -187,6 +187,32 @@ backoff_base = 2
 ## **It's only used when the provider is `kafka`**.
 backoff_deadline = "5mins"

+## Whether to enable WAL index creation.
+## **It's only used when the provider is `kafka`**.
+create_index = true
+
+## The interval for dumping WAL indexes.
+## **It's only used when the provider is `kafka`**.
+dump_index_interval = "60s"
+
+# The Kafka SASL configuration.
+# **It's only used when the provider is `kafka`**.
+# Available SASL mechanisms:
+# - `PLAIN`
+# - `SCRAM-SHA-256`
+# - `SCRAM-SHA-512`
+# [wal.sasl]
+# type = "SCRAM-SHA-512"
+# username = "user_kafka"
+# password = "secret"
+
+# The Kafka TLS configuration.
+# **It's only used when the provider is `kafka`**.
+# [wal.tls]
+# server_ca_cert_path = "/path/to/server_cert"
+# client_cert_path = "/path/to/client_cert"
+# client_key_path = "/path/to/key"
+
 # Example of using S3 as the storage.
 # [storage]
 # type = "S3"
@@ -223,6 +249,7 @@ backoff_deadline = "5mins"
 # root = "data"
 # scope = "test"
 # credential_path = "123456"
+# credential = "base64-credential"
 # endpoint = "https://storage.googleapis.com"

 ## The data storage options.
@@ -294,6 +321,11 @@ scope = "test"
 ## +toml2docs:none-default
 credential_path = "test"

+## The credential of the google cloud storage.
+## **It's only used when the storage type is `Gcs`**.
+## +toml2docs:none-default
+credential= "base64-credential"
+
 ## The container of the azure account.
 ## **It's only used when the storage type is `Azblob`**.
 ## +toml2docs:none-default
@@ -493,8 +525,7 @@ level = "info"
 enable_otlp_tracing = false

 ## The OTLP tracing endpoint.
-## +toml2docs:none-default
-otlp_endpoint = ""
+otlp_endpoint = "http://localhost:4317"

 ## Whether to append logs to stdout.
 append_stdout = true
--- a/config/flownode.example.toml
+++ b/config/flownode.example.toml
@@ -70,8 +70,7 @@ level = "info"
 enable_otlp_tracing = false

 ## The OTLP tracing endpoint.
-## +toml2docs:none-default
-otlp_endpoint = ""
+otlp_endpoint = "http://localhost:4317"

 ## Whether to append logs to stdout.
 append_stdout = true
--- a/config/frontend.example.toml
+++ b/config/frontend.example.toml
@@ -177,8 +177,7 @@ level = "info"
 enable_otlp_tracing = false

 ## The OTLP tracing endpoint.
-## +toml2docs:none-default
-otlp_endpoint = ""
+otlp_endpoint = "http://localhost:4317"

 ## Whether to append logs to stdout.
 append_stdout = true
--- a/config/metasrv.example.toml
+++ b/config/metasrv.example.toml
@@ -11,10 +11,11 @@ server_addr = "127.0.0.1:3002"
 store_addr = "127.0.0.1:2379"

 ## Datanode selector type.
-## - `lease_based` (default value).
+## - `round_robin` (default value)
+## - `lease_based`
 ## - `load_based`
 ## For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector".
-selector = "lease_based"
+selector = "round_robin"

 ## Store data in memory.
 use_memory_store = false
@@ -123,6 +124,24 @@ backoff_base = 2
 ## Stop reconnecting if the total wait time reaches the deadline. If this config is missing, the reconnecting won't terminate.
 backoff_deadline = "5mins"

+# The Kafka SASL configuration.
+# **It's only used when the provider is `kafka`**.
+# Available SASL mechanisms:
+# - `PLAIN`
+# - `SCRAM-SHA-256`
+# - `SCRAM-SHA-512`
+# [wal.sasl]
+# type = "SCRAM-SHA-512"
+# username = "user_kafka"
+# password = "secret"
+
+# The Kafka TLS configuration.
+# **It's only used when the provider is `kafka`**.
+# [wal.tls]
+# server_ca_cert_path = "/path/to/server_cert"
+# client_cert_path = "/path/to/client_cert"
+# client_key_path = "/path/to/key"
+
 ## The logging options.
 [logging]
 ## The directory to store the log files.
@@ -136,8 +155,7 @@ level = "info"
 enable_otlp_tracing = false

 ## The OTLP tracing endpoint.
-## +toml2docs:none-default
-otlp_endpoint = ""
+otlp_endpoint = "http://localhost:4317"

 ## Whether to append logs to stdout.
 append_stdout = true
--- a/config/standalone.example.toml
+++ b/config/standalone.example.toml
@@ -171,6 +171,28 @@ sync_period = "10s"
 ## **It's only used when the provider is `kafka`**.
 broker_endpoints = ["127.0.0.1:9092"]

+## Number of topics to be created upon start.
+## **It's only used when the provider is `kafka`**.
+num_topics = 64
+
+## Topic selector type.
+## Available selector types:
+## - `round_robin` (default)
+## **It's only used when the provider is `kafka`**.
+selector_type = "round_robin"
+
+## A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.
+## **It's only used when the provider is `kafka`**.
+topic_name_prefix = "greptimedb_wal_topic"
+
+## Expected number of replicas of each partition.
+## **It's only used when the provider is `kafka`**.
+replication_factor = 1
+
+## Above which a topic creation operation will be cancelled.
+## **It's only used when the provider is `kafka`**.
+create_topic_timeout = "30s"
+
 ## The max size of a single producer batch.
 ## Warning: Kafka has a default limit of 1MB per message in a topic.
 ## **It's only used when the provider is `kafka`**.
@@ -196,6 +218,24 @@ backoff_base = 2
 ## **It's only used when the provider is `kafka`**.
 backoff_deadline = "5mins"

+# The Kafka SASL configuration.
+# **It's only used when the provider is `kafka`**.
+# Available SASL mechanisms:
+# - `PLAIN`
+# - `SCRAM-SHA-256`
+# - `SCRAM-SHA-512`
+# [wal.sasl]
+# type = "SCRAM-SHA-512"
+# username = "user_kafka"
+# password = "secret"
+
+# The Kafka TLS configuration.
+# **It's only used when the provider is `kafka`**.
+# [wal.tls]
+# server_ca_cert_path = "/path/to/server_cert"
+# client_cert_path = "/path/to/client_cert"
+# client_key_path = "/path/to/key"
+
 ## Metadata storage options.
 [metadata_store]
 ## Kv file size in bytes.
@@ -246,6 +286,7 @@ retry_delay = "500ms"
 # root = "data"
 # scope = "test"
 # credential_path = "123456"
+# credential = "base64-credential"
 # endpoint = "https://storage.googleapis.com"

 ## The data storage options.
@@ -317,6 +358,11 @@ scope = "test"
 ## +toml2docs:none-default
 credential_path = "test"

+## The credential of the google cloud storage.
+## **It's only used when the storage type is `Gcs`**.
+## +toml2docs:none-default
+credential = "base64-credential"
+
 ## The container of the azure account.
 ## **It's only used when the storage type is `Azblob`**.
 ## +toml2docs:none-default
@@ -522,8 +568,7 @@ level = "info"
 enable_otlp_tracing = false

 ## The OTLP tracing endpoint.
-## +toml2docs:none-default
-otlp_endpoint = ""
+otlp_endpoint = "http://localhost:4317"

 ## Whether to append logs to stdout.
 append_stdout = true
--- a/docker/docker-compose/cluster-with-etcd.yaml
+++ b/docker/docker-compose/cluster-with-etcd.yaml
@@ -1,9 +1,9 @@
 x-custom:
  etcd_initial_cluster_token: &etcd_initial_cluster_token "--initial-cluster-token=etcd-cluster"
  etcd_common_settings: &etcd_common_settings
-    image: quay.io/coreos/etcd:v3.5.10
+    image: "${ETCD_REGISTRY:-quay.io}/${ETCD_NAMESPACE:-coreos}/etcd:${ETCD_VERSION:-v3.5.10}"
    entrypoint: /usr/local/bin/etcd
-  greptimedb_image: &greptimedb_image docker.io/greptimedb/greptimedb:latest
+  greptimedb_image: &greptimedb_image "${GREPTIMEDB_REGISTRY:-docker.io}/${GREPTIMEDB_NAMESPACE:-greptime}/greptimedb:${GREPTIMEDB_VERSION:-latest}"

 services:
  etcd0:
--- a/grafana/README.md
+++ b/grafana/README.md
@@ -25,7 +25,7 @@ Please ensure the following configuration before importing the dashboard into Gr

 __1. Prometheus scrape config__

-Assign `greptime_pod` label to each host target. We use this label to identify each node instance.
+Configure Prometheus to scrape the cluster.

 ```yml
 # example config
@@ -34,27 +34,15 @@ Assign `greptime_pod` label to each host target. We use this label to identify e
 scrape_configs:
  - job_name: metasrv
    static_configs:
-    - targets: ['<ip>:<port>']
-      labels:
-        greptime_pod: metasrv
+    - targets: ['<metasrv-ip>:<port>']

  - job_name: datanode
    static_configs:
-    - targets: ['<ip>:<port>']
-      labels:
-        greptime_pod: datanode1
-    - targets: ['<ip>:<port>']
-      labels:
-        greptime_pod: datanode2
-    - targets: ['<ip>:<port>']
-      labels:
-        greptime_pod: datanode3
+    - targets: ['<datanode0-ip>:<port>', '<datanode1-ip>:<port>', '<datanode2-ip>:<port>']

  - job_name: frontend
    static_configs:
-    - targets: ['<ip>:<port>']
-      labels:
-        greptime_pod: frontend
+    - targets: ['<frontend-ip>:<port>']
 ```

 __2. Grafana config__
@@ -63,4 +51,4 @@ Create a Prometheus data source in Grafana before using this dashboard. We use `

 ### Usage

-Use `datasource` or `greptime_pod` on the upper-left corner to filter data from certain node.
+Use `datasource` or `instance` on the upper-left corner to filter data from certain node.
--- a/grafana/greptimedb-cluster.json
+++ b/grafana/greptimedb-cluster.json
--- a/grafana/greptimedb.json
+++ b/grafana/greptimedb.json
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -1,62 +1,72 @@
-#!/bin/sh
+#!/usr/bin/env bash

 set -ue

 OS_TYPE=
 ARCH_TYPE=
+
+# Set the GitHub token to avoid GitHub API rate limit.
+# You can run with `GITHUB_TOKEN`:
+#  GITHUB_TOKEN=<your_token> ./scripts/install.sh
+GITHUB_TOKEN=${GITHUB_TOKEN:-}
+
 VERSION=${1:-latest}
 GITHUB_ORG=GreptimeTeam
 GITHUB_REPO=greptimedb
 BIN=greptime

-get_os_type() {
-    os_type="$(uname -s)"
+function get_os_type() {
+  os_type="$(uname -s)"

-    case "$os_type" in
+  case "$os_type" in
    Darwin)
-        OS_TYPE=darwin
-        ;;
+      OS_TYPE=darwin
+      ;;
    Linux)
-        OS_TYPE=linux
-        ;;
+      OS_TYPE=linux
+      ;;
    *)
-        echo "Error: Unknown OS type: $os_type"
-        exit 1
-    esac
+      echo "Error: Unknown OS type: $os_type"
+      exit 1
+  esac
 }

-get_arch_type() {
-    arch_type="$(uname -m)"
+function get_arch_type() {
+  arch_type="$(uname -m)"

-    case "$arch_type" in
+  case "$arch_type" in
    arm64)
-        ARCH_TYPE=arm64
-        ;;
+      ARCH_TYPE=arm64
+      ;;
    aarch64)
-        ARCH_TYPE=arm64
-        ;;
+      ARCH_TYPE=arm64
+      ;;
    x86_64)
-        ARCH_TYPE=amd64
-        ;;
+      ARCH_TYPE=amd64
+      ;;
    amd64)
-        ARCH_TYPE=amd64
-        ;;
+      ARCH_TYPE=amd64
+      ;;
    *)
-        echo "Error: Unknown CPU type: $arch_type"
-        exit 1
-    esac
+      echo "Error: Unknown CPU type: $arch_type"
+      exit 1
+  esac
 }

-get_os_type
-get_arch_type
-
-if [ -n "${OS_TYPE}" ] && [ -n "${ARCH_TYPE}" ]; then
-    # Use the latest nightly version.
+function download_artifact() {
+  if [ -n "${OS_TYPE}" ] && [ -n "${ARCH_TYPE}" ]; then
+    # Use the latest stable released version.
+    # GitHub API reference: https://docs.github.com/en/rest/releases/releases?apiVersion=2022-11-28#get-the-latest-release.
    if [ "${VERSION}" = "latest" ]; then
-        VERSION=$(curl -s -XGET "https://api.github.com/repos/${GITHUB_ORG}/${GITHUB_REPO}/releases" | grep tag_name | grep nightly | cut -d: -f 2 | sed 's/.*"\(.*\)".*/\1/' | uniq | sort -r | head -n 1)
-        if [ -z "${VERSION}" ]; then
-            echo "Failed to get the latest version."
-            exit 1
+      # To avoid other tools dependency, we choose to use `curl` to get the version metadata and parsed by `sed`.
+      VERSION=$(curl -sL \
+        -H "Accept: application/vnd.github+json" \
+        -H "X-GitHub-Api-Version: 2022-11-28" \
+        ${GITHUB_TOKEN:+-H "Authorization: Bearer $GITHUB_TOKEN"} \
+        "https://api.github.com/repos/${GITHUB_ORG}/${GITHUB_REPO}/releases/latest" | sed -n 's/.*"tag_name": "\([^"]*\)".*/\1/p')
+      if [ -z "${VERSION}" ]; then
+        echo "Failed to get the latest stable released version."
+        exit 1
        fi
    fi

@@ -73,4 +83,9 @@ if [ -n "${OS_TYPE}" ] && [ -n "${ARCH_TYPE}" ]; then
      rm -r "${PACKAGE_NAME%.tar.gz}" && \
      echo "Run './${BIN} --help' to get started"
    fi
-fi
+  fi
+}
+
+get_os_type
+get_arch_type
+download_artifact
--- a/src/catalog/Cargo.toml
+++ b/src/catalog/Cargo.toml
@@ -40,6 +40,7 @@ moka = { workspace = true, features = ["future", "sync"] }
 partition.workspace = true
 paste = "1.0"
 prometheus.workspace = true
+rustc-hash.workspace = true
 serde_json.workspace = true
 session.workspace = true
 snafu.workspace = true
--- a/src/catalog/src/system_schema/pg_catalog.rs
+++ b/src/catalog/src/system_schema/pg_catalog.rs
@@ -30,6 +30,7 @@ use pg_namespace::PGNamespace;
 use table::TableRef;
 pub use table_names::*;

+use self::pg_namespace::oid_map::{PGNamespaceOidMap, PGNamespaceOidMapRef};
 use super::memory_table::MemoryTable;
 use super::utils::tables::u32_column;
 use super::{SystemSchemaProvider, SystemSchemaProviderInner, SystemTableRef};
@@ -52,6 +53,9 @@ pub struct PGCatalogProvider {
    catalog_name: String,
    catalog_manager: Weak<dyn CatalogManager>,
    tables: HashMap<String, TableRef>,
+
+    // Workaround to store mapping of schema_name to a numeric id
+    namespace_oid_map: PGNamespaceOidMapRef,
 }

 impl SystemSchemaProvider for PGCatalogProvider {
@@ -85,6 +89,7 @@ impl PGCatalogProvider {
            catalog_name,
            catalog_manager,
            tables: HashMap::new(),
+            namespace_oid_map: Arc::new(PGNamespaceOidMap::new()),
        };
        provider.build_tables();
        provider
@@ -122,10 +127,12 @@ impl SystemSchemaProviderInner for PGCatalogProvider {
            table_names::PG_NAMESPACE => Some(Arc::new(PGNamespace::new(
                self.catalog_name.clone(),
                self.catalog_manager.clone(),
+                self.namespace_oid_map.clone(),
            ))),
            table_names::PG_CLASS => Some(Arc::new(PGClass::new(
                self.catalog_name.clone(),
                self.catalog_manager.clone(),
+                self.namespace_oid_map.clone(),
            ))),
            _ => None,
        }
--- a/src/catalog/src/system_schema/pg_catalog/pg_class.rs
+++ b/src/catalog/src/system_schema/pg_catalog/pg_class.rs
@@ -31,6 +31,7 @@ use snafu::{OptionExt, ResultExt};
 use store_api::storage::ScanRequest;
 use table::metadata::TableType;

+use super::pg_namespace::oid_map::PGNamespaceOidMapRef;
 use super::{OID_COLUMN_NAME, PG_CLASS};
 use crate::error::{
    CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
@@ -60,14 +61,22 @@ pub(super) struct PGClass {
    schema: SchemaRef,
    catalog_name: String,
    catalog_manager: Weak<dyn CatalogManager>,
+
+    // Workaround to convert schema_name to a numeric id
+    namespace_oid_map: PGNamespaceOidMapRef,
 }

 impl PGClass {
-    pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
+    pub(super) fn new(
+        catalog_name: String,
+        catalog_manager: Weak<dyn CatalogManager>,
+        namespace_oid_map: PGNamespaceOidMapRef,
+    ) -> Self {
        Self {
            schema: Self::schema(),
            catalog_name,
            catalog_manager,
+            namespace_oid_map,
        }
    }

@@ -75,7 +84,7 @@ impl PGClass {
        Arc::new(Schema::new(vec![
            u32_column(OID_COLUMN_NAME),
            string_column(RELNAME),
-            string_column(RELNAMESPACE),
+            u32_column(RELNAMESPACE),
            string_column(RELKIND),
            u32_column(RELOWNER),
        ]))
@@ -86,6 +95,7 @@ impl PGClass {
            self.schema.clone(),
            self.catalog_name.clone(),
            self.catalog_manager.clone(),
+            self.namespace_oid_map.clone(),
        )
    }
 }
@@ -155,10 +165,11 @@ struct PGClassBuilder {
    schema: SchemaRef,
    catalog_name: String,
    catalog_manager: Weak<dyn CatalogManager>,
+    namespace_oid_map: PGNamespaceOidMapRef,

    oid: UInt32VectorBuilder,
    relname: StringVectorBuilder,
-    relnamespace: StringVectorBuilder,
+    relnamespace: UInt32VectorBuilder,
    relkind: StringVectorBuilder,
    relowner: UInt32VectorBuilder,
 }
@@ -168,15 +179,17 @@ impl PGClassBuilder {
        schema: SchemaRef,
        catalog_name: String,
        catalog_manager: Weak<dyn CatalogManager>,
+        namespace_oid_map: PGNamespaceOidMapRef,
    ) -> Self {
        Self {
            schema,
            catalog_name,
            catalog_manager,
+            namespace_oid_map,

            oid: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
            relname: StringVectorBuilder::with_capacity(INIT_CAPACITY),
-            relnamespace: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            relnamespace: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
            relkind: StringVectorBuilder::with_capacity(INIT_CAPACITY),
            relowner: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
        }
@@ -217,6 +230,7 @@ impl PGClassBuilder {
        table: &str,
        kind: &str,
    ) {
+        let namespace_oid = self.namespace_oid_map.get_oid(schema);
        let row = [
            (OID_COLUMN_NAME, &Value::from(oid)),
            (RELNAMESPACE, &Value::from(schema)),
@@ -230,7 +244,7 @@ impl PGClassBuilder {
        }

        self.oid.push(Some(oid));
-        self.relnamespace.push(Some(schema));
+        self.relnamespace.push(Some(namespace_oid));
        self.relname.push(Some(table));
        self.relkind.push(Some(kind));
        self.relowner.push(Some(DUMMY_OWNER_ID));
--- a/src/catalog/src/system_schema/pg_catalog/pg_namespace.rs
+++ b/src/catalog/src/system_schema/pg_catalog/pg_namespace.rs
@@ -12,6 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+pub(super) mod oid_map;
+
 use std::sync::{Arc, Weak};

 use arrow_schema::SchemaRef as ArrowSchemaRef;
@@ -25,16 +27,16 @@ use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
 use datatypes::scalars::ScalarVectorBuilder;
 use datatypes::schema::{Schema, SchemaRef};
 use datatypes::value::Value;
-use datatypes::vectors::{StringVectorBuilder, VectorRef};
+use datatypes::vectors::{StringVectorBuilder, UInt32VectorBuilder, VectorRef};
 use snafu::{OptionExt, ResultExt};
 use store_api::storage::ScanRequest;

-use super::{OID_COLUMN_NAME, PG_NAMESPACE};
+use super::{PGNamespaceOidMapRef, OID_COLUMN_NAME, PG_NAMESPACE};
 use crate::error::{
    CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
 };
 use crate::information_schema::Predicates;
-use crate::system_schema::utils::tables::string_column;
+use crate::system_schema::utils::tables::{string_column, u32_column};
 use crate::system_schema::SystemTable;
 use crate::CatalogManager;

@@ -48,21 +50,29 @@ pub(super) struct PGNamespace {
    schema: SchemaRef,
    catalog_name: String,
    catalog_manager: Weak<dyn CatalogManager>,
+
+    // Workaround to convert schema_name to a numeric id
+    oid_map: PGNamespaceOidMapRef,
 }

 impl PGNamespace {
-    pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
+    pub(super) fn new(
+        catalog_name: String,
+        catalog_manager: Weak<dyn CatalogManager>,
+        oid_map: PGNamespaceOidMapRef,
+    ) -> Self {
        Self {
            schema: Self::schema(),
            catalog_name,
            catalog_manager,
+            oid_map,
        }
    }

    fn schema() -> SchemaRef {
        Arc::new(Schema::new(vec![
            // TODO(J0HN50N133): we do not have a numeric schema id, use schema name as a workaround. Use a proper schema id once we have it.
-            string_column(OID_COLUMN_NAME),
+            u32_column(OID_COLUMN_NAME),
            string_column(NSPNAME),
        ]))
    }
@@ -72,6 +82,7 @@ impl PGNamespace {
            self.schema.clone(),
            self.catalog_name.clone(),
            self.catalog_manager.clone(),
+            self.oid_map.clone(),
        )
    }
 }
@@ -138,8 +149,9 @@ struct PGNamespaceBuilder {
    schema: SchemaRef,
    catalog_name: String,
    catalog_manager: Weak<dyn CatalogManager>,
+    namespace_oid_map: PGNamespaceOidMapRef,

-    oid: StringVectorBuilder,
+    oid: UInt32VectorBuilder,
    nspname: StringVectorBuilder,
 }

@@ -148,12 +160,14 @@ impl PGNamespaceBuilder {
        schema: SchemaRef,
        catalog_name: String,
        catalog_manager: Weak<dyn CatalogManager>,
+        namespace_oid_map: PGNamespaceOidMapRef,
    ) -> Self {
        Self {
            schema,
            catalog_name,
            catalog_manager,
-            oid: StringVectorBuilder::with_capacity(INIT_CAPACITY),
+            namespace_oid_map,
+            oid: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
            nspname: StringVectorBuilder::with_capacity(INIT_CAPACITY),
        }
    }
@@ -178,14 +192,15 @@ impl PGNamespaceBuilder {
    }

    fn add_namespace(&mut self, predicates: &Predicates, schema_name: &str) {
+        let oid = self.namespace_oid_map.get_oid(schema_name);
        let row = [
-            (OID_COLUMN_NAME, &Value::from(schema_name)),
+            (OID_COLUMN_NAME, &Value::from(oid)),
            (NSPNAME, &Value::from(schema_name)),
        ];
        if !predicates.eval(&row) {
            return;
        }
-        self.oid.push(Some(schema_name));
+        self.oid.push(Some(oid));
        self.nspname.push(Some(schema_name));
    }
 }
--- a/src/catalog/src/system_schema/pg_catalog/pg_namespace/oid_map.rs
+++ b/src/catalog/src/system_schema/pg_catalog/pg_namespace/oid_map.rs
@@ -0,0 +1,100 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::hash::BuildHasher;
+use std::sync::Arc;
+
+use dashmap::DashMap;
+use rustc_hash::FxSeededState;
+
+pub type PGNamespaceOidMapRef = Arc<PGNamespaceOidMap>;
+// Workaround to convert schema_name to a numeric id,
+// remove this when we have numeric schema id in greptime
+pub struct PGNamespaceOidMap {
+    oid_map: DashMap<String, u32>,
+
+    // Rust use SipHasher by default, which provides resistance against DOS attacks.
+    // This will produce different hash value between each greptime instance. This will
+    // cause the sqlness test fail. We need a deterministic hash here to provide
+    // same oid for the same schema name with best effort and DOS attacks aren't concern here.
+    hasher: FxSeededState,
+}
+
+impl PGNamespaceOidMap {
+    pub fn new() -> Self {
+        Self {
+            oid_map: DashMap::new(),
+            hasher: FxSeededState::with_seed(0), // PLEASE DO NOT MODIFY THIS SEED VALUE!!!
+        }
+    }
+
+    fn oid_is_used(&self, oid: u32) -> bool {
+        self.oid_map.iter().any(|e| *e.value() == oid)
+    }
+
+    pub fn get_oid(&self, schema_name: &str) -> u32 {
+        if let Some(oid) = self.oid_map.get(schema_name) {
+            *oid
+        } else {
+            let mut oid = self.hasher.hash_one(schema_name) as u32;
+            while self.oid_is_used(oid) {
+                oid = self.hasher.hash_one(oid) as u32;
+            }
+            self.oid_map.insert(schema_name.to_string(), oid);
+            oid
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+
+    use super::*;
+
+    #[test]
+    fn oid_is_stable() {
+        let oid_map_1 = PGNamespaceOidMap::new();
+        let oid_map_2 = PGNamespaceOidMap::new();
+
+        let schema = "schema";
+        let oid = oid_map_1.get_oid(schema);
+
+        // oid keep stable in the same instance
+        assert_eq!(oid, oid_map_1.get_oid(schema));
+
+        // oid keep stable between different instances
+        assert_eq!(oid, oid_map_2.get_oid(schema));
+    }
+
+    #[test]
+    fn oid_collision() {
+        let oid_map = PGNamespaceOidMap::new();
+
+        let key1 = "3178510";
+        let key2 = "4215648";
+
+        // have collision
+        assert_eq!(
+            oid_map.hasher.hash_one(key1) as u32,
+            oid_map.hasher.hash_one(key2) as u32
+        );
+
+        // insert them into oid_map
+        let oid1 = oid_map.get_oid(key1);
+        let oid2 = oid_map.get_oid(key2);
+
+        // they should have different id
+        assert_ne!(oid1, oid2);
+    }
+}
--- a/src/client/src/flow.rs
+++ b/src/client/src/flow.rs
@@ -16,7 +16,7 @@ use api::v1::flow::{FlowRequest, FlowResponse};
 use api::v1::region::InsertRequests;
 use common_error::ext::BoxedError;
 use common_meta::node_manager::Flownode;
-use snafu::{location, Location, ResultExt};
+use snafu::{location, ResultExt};

 use crate::error::Result;
 use crate::Client;
--- a/src/client/src/region.rs
+++ b/src/client/src/region.rs
@@ -33,7 +33,7 @@ use common_telemetry::error;
 use common_telemetry::tracing_context::TracingContext;
 use prost::Message;
 use query::query_engine::DefaultSerializer;
-use snafu::{location, Location, OptionExt, ResultExt};
+use snafu::{location, OptionExt, ResultExt};
 use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan};
 use tokio_stream::StreamExt;

--- a/src/cmd/src/bin/greptime.rs
+++ b/src/cmd/src/bin/greptime.rs
@@ -62,8 +62,37 @@ enum SubCommand {
 #[global_allocator]
 static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;

+#[cfg(debug_assertions)]
+fn main() -> Result<()> {
+    use snafu::ResultExt;
+    // Set the stack size to 8MB for the thread so it wouldn't overflow on large stack usage in debug mode
+    // see https://github.com/GreptimeTeam/greptimedb/pull/4317
+    // and https://github.com/rust-lang/rust/issues/34283
+    std::thread::Builder::new()
+        .name("main_spawn".to_string())
+        .stack_size(8 * 1024 * 1024)
+        .spawn(|| {
+            {
+                tokio::runtime::Builder::new_multi_thread()
+                    .thread_stack_size(8 * 1024 * 1024)
+                    .enable_all()
+                    .build()
+                    .expect("Failed building the Runtime")
+                    .block_on(main_body())
+            }
+        })
+        .context(cmd::error::SpawnThreadSnafu)?
+        .join()
+        .expect("Couldn't join on the associated thread")
+}
+
+#[cfg(not(debug_assertions))]
 #[tokio::main]
 async fn main() -> Result<()> {
+    main_body().await
+}
+
+async fn main_body() -> Result<()> {
    setup_human_panic();
    start(Command::parse()).await
 }
--- a/src/cmd/src/error.rs
+++ b/src/cmd/src/error.rs
@@ -305,6 +305,12 @@ pub enum Error {
        error: std::io::Error,
    },

+    #[snafu(display("Failed to spawn thread"))]
+    SpawnThread {
+        #[snafu(source)]
+        error: std::io::Error,
+    },
+
    #[snafu(display("Other error"))]
    Other {
        source: BoxedError,
@@ -395,7 +401,9 @@ impl ErrorExt for Error {
            Error::SubstraitEncodeLogicalPlan { source, .. } => source.status_code(),
            Error::StartCatalogManager { source, .. } => source.status_code(),

-            Error::SerdeJson { .. } | Error::FileIo { .. } => StatusCode::Unexpected,
+            Error::SerdeJson { .. } | Error::FileIo { .. } | Error::SpawnThread { .. } => {
+                StatusCode::Unexpected
+            }

            Error::Other { source, .. } => source.status_code(),

--- a/src/cmd/src/flownode.rs
+++ b/src/cmd/src/flownode.rs
@@ -24,6 +24,7 @@ use common_grpc::channel_manager::ChannelConfig;
 use common_meta::cache::{CacheRegistryBuilder, LayeredCacheRegistryBuilder};
 use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
 use common_meta::heartbeat::handler::HandlerGroupExecutor;
+use common_meta::key::flow::FlowMetadataManager;
 use common_meta::key::TableMetadataManager;
 use common_telemetry::info;
 use common_telemetry::logging::TracingOptions;
@@ -296,11 +297,13 @@ impl StartCommand {
            Arc::new(executor),
        );

+        let flow_metadata_manager = Arc::new(FlowMetadataManager::new(cached_meta_backend.clone()));
        let flownode_builder = FlownodeBuilder::new(
            opts,
            Plugins::new(),
            table_metadata_manager,
            catalog_manager.clone(),
+            flow_metadata_manager,
        )
        .with_heartbeat_task(heartbeat_task);

--- a/src/cmd/src/lib.rs
+++ b/src/cmd/src/lib.rs
@@ -30,7 +30,7 @@ pub mod standalone;

 lazy_static::lazy_static! {
    static ref APP_VERSION: prometheus::IntGaugeVec =
-        prometheus::register_int_gauge_vec!("greptime_app_version", "app version", &["short_version", "version"]).unwrap();
+        prometheus::register_int_gauge_vec!("greptime_app_version", "app version", &["version", "short_version"]).unwrap();
 }

 #[async_trait]
@@ -74,16 +74,16 @@ pub trait App: Send {
 }

 /// Log the versions of the application, and the arguments passed to the cli.
-/// `version_string` should be the same as the output of cli "--version";
-/// and the `app_version` is the short version of the codes, often consist of git branch and commit.
-pub fn log_versions(version_string: &str, app_version: &str) {
+/// `version` should be the same as the output of cli "--version";
+/// and the `short_version` is the short version of the codes, often consist of git branch and commit.
+pub fn log_versions(version: &str, short_version: &str) {
    // Report app version as gauge.
    APP_VERSION
-        .with_label_values(&[env!("CARGO_PKG_VERSION"), app_version])
+        .with_label_values(&[env!("CARGO_PKG_VERSION"), short_version])
        .inc();

    // Log version and argument flags.
-    info!("GreptimeDB version: {}", version_string);
+    info!("GreptimeDB version: {}", version);

    log_env_flags();
 }
--- a/src/cmd/src/metasrv.rs
+++ b/src/cmd/src/metasrv.rs
@@ -22,6 +22,7 @@ use common_telemetry::info;
 use common_telemetry::logging::TracingOptions;
 use common_version::{short_version, version};
 use meta_srv::bootstrap::MetasrvInstance;
+use meta_srv::metasrv::BackendImpl;
 use snafu::ResultExt;
 use tracing_appender::non_blocking::WorkerGuard;

@@ -137,6 +138,9 @@ struct StartCommand {
    /// The max operations per txn
    #[clap(long)]
    max_txn_ops: Option<usize>,
+    /// The database backend.
+    #[clap(long, value_enum)]
+    backend: Option<BackendImpl>,
 }

 impl StartCommand {
@@ -219,6 +223,12 @@ impl StartCommand {
            opts.max_txn_ops = max_txn_ops;
        }

+        if let Some(backend) = &self.backend {
+            opts.backend.clone_from(backend);
+        } else {
+            opts.backend = BackendImpl::default()
+        }
+
        // Disable dashboard in metasrv.
        opts.http.disable_dashboard = true;

--- a/src/cmd/src/standalone.rs
+++ b/src/cmd/src/standalone.rs
@@ -476,11 +476,13 @@ impl StartCommand {
            .await
            .context(StartDatanodeSnafu)?;

+        let flow_metadata_manager = Arc::new(FlowMetadataManager::new(kv_backend.clone()));
        let flow_builder = FlownodeBuilder::new(
            Default::default(),
            plugins.clone(),
            table_metadata_manager.clone(),
            catalog_manager.clone(),
+            flow_metadata_manager.clone(),
        );
        let flownode = Arc::new(
            flow_builder
@@ -511,7 +513,6 @@ impl StartCommand {
            opts.wal.into(),
            kv_backend.clone(),
        ));
-        let flow_metadata_manager = Arc::new(FlowMetadataManager::new(kv_backend.clone()));
        let table_meta_allocator = Arc::new(TableMetadataAllocator::new(
            table_id_sequence,
            wal_options_allocator.clone(),
--- a/src/cmd/tests/load_config_test.rs
+++ b/src/cmd/tests/load_config_test.rs
@@ -22,7 +22,7 @@ use common_grpc::channel_manager::{
    DEFAULT_MAX_GRPC_RECV_MESSAGE_SIZE, DEFAULT_MAX_GRPC_SEND_MESSAGE_SIZE,
 };
 use common_runtime::global::RuntimeOptions;
-use common_telemetry::logging::LoggingOptions;
+use common_telemetry::logging::{LoggingOptions, DEFAULT_OTLP_ENDPOINT};
 use common_wal::config::raft_engine::RaftEngineConfig;
 use common_wal::config::DatanodeWalConfig;
 use datanode::config::{DatanodeOptions, RegionEngineConfig, StorageConfig};
@@ -88,7 +88,7 @@ fn test_load_datanode_example_config() {
            ],
            logging: LoggingOptions {
                level: Some("info".to_string()),
-                otlp_endpoint: Some("".to_string()),
+                otlp_endpoint: Some(DEFAULT_OTLP_ENDPOINT.to_string()),
                tracing_sample_ratio: Some(Default::default()),
                ..Default::default()
            },
@@ -136,7 +136,7 @@ fn test_load_frontend_example_config() {
            }),
            logging: LoggingOptions {
                level: Some("info".to_string()),
-                otlp_endpoint: Some("".to_string()),
+                otlp_endpoint: Some(DEFAULT_OTLP_ENDPOINT.to_string()),
                tracing_sample_ratio: Some(Default::default()),
                ..Default::default()
            },
@@ -169,12 +169,12 @@ fn test_load_metasrv_example_config() {
            compact_rt_size: 4,
        },
        component: MetasrvOptions {
-            selector: SelectorType::LeaseBased,
+            selector: SelectorType::default(),
            data_home: "/tmp/metasrv/".to_string(),
            logging: LoggingOptions {
                dir: "/tmp/greptimedb/logs".to_string(),
                level: Some("info".to_string()),
-                otlp_endpoint: Some("".to_string()),
+                otlp_endpoint: Some(DEFAULT_OTLP_ENDPOINT.to_string()),
                tracing_sample_ratio: Some(Default::default()),
                ..Default::default()
            },
@@ -228,7 +228,7 @@ fn test_load_standalone_example_config() {
            },
            logging: LoggingOptions {
                level: Some("info".to_string()),
-                otlp_endpoint: Some("".to_string()),
+                otlp_endpoint: Some(DEFAULT_OTLP_ENDPOINT.to_string()),
                tracing_sample_ratio: Some(Default::default()),
                ..Default::default()
            },
--- a/src/common/datasource/src/object_store/fs.rs
+++ b/src/common/datasource/src/object_store/fs.rs
@@ -19,9 +19,8 @@ use snafu::ResultExt;
 use crate::error::{BuildBackendSnafu, Result};

 pub fn build_fs_backend(root: &str) -> Result<ObjectStore> {
-    let mut builder = Fs::default();
-    let _ = builder.root(root);
-    let object_store = ObjectStore::new(builder)
+    let builder = Fs::default();
+    let object_store = ObjectStore::new(builder.root(root))
        .context(BuildBackendSnafu)?
        .layer(
            object_store::layers::LoggingLayer::default()
--- a/src/common/datasource/src/object_store/s3.rs
+++ b/src/common/datasource/src/object_store/s3.rs
@@ -44,28 +44,26 @@ pub fn build_s3_backend(
    path: &str,
    connection: &HashMap<String, String>,
 ) -> Result<ObjectStore> {
-    let mut builder = S3::default();
-
-    let _ = builder.root(path).bucket(host);
+    let mut builder = S3::default().root(path).bucket(host);

    if let Some(endpoint) = connection.get(ENDPOINT) {
-        let _ = builder.endpoint(endpoint);
+        builder = builder.endpoint(endpoint);
    }

    if let Some(region) = connection.get(REGION) {
-        let _ = builder.region(region);
+        builder = builder.region(region);
    }

    if let Some(key_id) = connection.get(ACCESS_KEY_ID) {
-        let _ = builder.access_key_id(key_id);
+        builder = builder.access_key_id(key_id);
    }

    if let Some(key) = connection.get(SECRET_ACCESS_KEY) {
-        let _ = builder.secret_access_key(key);
+        builder = builder.secret_access_key(key);
    }

    if let Some(session_token) = connection.get(SESSION_TOKEN) {
-        let _ = builder.security_token(session_token);
+        builder = builder.session_token(session_token);
    }

    if let Some(enable_str) = connection.get(ENABLE_VIRTUAL_HOST_STYLE) {
@@ -79,7 +77,7 @@ pub fn build_s3_backend(
            .build()
        })?;
        if enable {
-            let _ = builder.enable_virtual_host_style();
+            builder = builder.enable_virtual_host_style();
        }
    }

--- a/src/common/datasource/src/test_util.rs
+++ b/src/common/datasource/src/test_util.rs
@@ -47,19 +47,15 @@ pub fn format_schema(schema: Schema) -> Vec<String> {
 }

 pub fn test_store(root: &str) -> ObjectStore {
-    let mut builder = Fs::default();
-    let _ = builder.root(root);
-
-    ObjectStore::new(builder).unwrap().finish()
+    let builder = Fs::default();
+    ObjectStore::new(builder.root(root)).unwrap().finish()
 }

 pub fn test_tmp_store(root: &str) -> (ObjectStore, TempDir) {
    let dir = create_temp_dir(root);

-    let mut builder = Fs::default();
-    let _ = builder.root("/");
-
-    (ObjectStore::new(builder).unwrap().finish(), dir)
+    let builder = Fs::default();
+    (ObjectStore::new(builder.root("/")).unwrap().finish(), dir)
 }

 pub fn test_basic_schema() -> SchemaRef {
--- a/src/common/function/src/table/flush_compact_table.rs
+++ b/src/common/function/src/table/flush_compact_table.rs
@@ -33,6 +33,8 @@ use crate::handlers::TableMutationHandlerRef;

 /// Compact type: strict window.
 const COMPACT_TYPE_STRICT_WINDOW: &str = "strict_window";
+/// Compact type: strict window (short name).
+const COMPACT_TYPE_STRICT_WINDOW_SHORT: &str = "swcs";

 #[admin_fn(
    name = FlushTableFunction,
@@ -168,8 +170,12 @@ fn parse_compact_params(
    })
 }

+/// Parses compaction strategy type. For `strict_window` or `swcs` strict window compaction is chose,
+/// otherwise choose regular (TWCS) compaction.
 fn parse_compact_type(type_str: &str, option: Option<&str>) -> Result<compact_request::Options> {
-    if type_str.eq_ignore_ascii_case(COMPACT_TYPE_STRICT_WINDOW) {
+    if type_str.eq_ignore_ascii_case(COMPACT_TYPE_STRICT_WINDOW)
+        | type_str.eq_ignore_ascii_case(COMPACT_TYPE_STRICT_WINDOW_SHORT)
+    {
        let window_seconds = option
            .map(|v| {
                i64::from_str(v).map_err(|_| {
@@ -350,6 +356,17 @@ mod tests {
                    compact_options: Options::Regular(Default::default()),
                },
            ),
+            (
+                &["table", "swcs", "120"],
+                CompactTableRequest {
+                    catalog_name: DEFAULT_CATALOG_NAME.to_string(),
+                    schema_name: DEFAULT_SCHEMA_NAME.to_string(),
+                    table_name: "table".to_string(),
+                    compact_options: Options::StrictWindow(StrictWindow {
+                        window_seconds: 120,
+                    }),
+                },
+            ),
        ]);

        assert!(parse_compact_params(
--- a/src/common/meta/Cargo.toml
+++ b/src/common/meta/Cargo.toml
@@ -6,6 +6,7 @@ license.workspace = true

 [features]
 testing = []
+pg_kvbackend = ["dep:tokio-postgres"]

 [lints]
 workspace = true
@@ -56,6 +57,7 @@ store-api.workspace = true
 strum.workspace = true
 table.workspace = true
 tokio.workspace = true
+tokio-postgres = { workspace = true, optional = true }
 tonic.workspace = true
 typetag = "0.2"

--- a/src/common/meta/src/ddl/drop_table.rs
+++ b/src/common/meta/src/ddl/drop_table.rs
@@ -227,7 +227,7 @@ impl Procedure for DropTableProcedure {
    }

    fn rollback_supported(&self) -> bool {
-        !matches!(self.data.state, DropTableState::Prepare)
+        !matches!(self.data.state, DropTableState::Prepare) && self.data.allow_rollback
    }

    async fn rollback(&mut self, _: &ProcedureContext) -> ProcedureResult<()> {
@@ -256,6 +256,8 @@ pub struct DropTableData {
    pub task: DropTableTask,
    pub physical_region_routes: Vec<RegionRoute>,
    pub physical_table_id: Option<TableId>,
+    #[serde(default)]
+    pub allow_rollback: bool,
 }

 impl DropTableData {
@@ -266,6 +268,7 @@ impl DropTableData {
            task,
            physical_region_routes: vec![],
            physical_table_id: None,
+            allow_rollback: false,
        }
    }

--- a/src/common/meta/src/ddl/drop_table/metadata.rs
+++ b/src/common/meta/src/ddl/drop_table/metadata.rs
@@ -12,8 +12,12 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use common_catalog::format_full_table_name;
+use snafu::OptionExt;
+use store_api::metric_engine_consts::METRIC_ENGINE_NAME;
+
 use crate::ddl::drop_table::DropTableProcedure;
-use crate::error::Result;
+use crate::error::{self, Result};

 impl DropTableProcedure {
    /// Fetches the table info and physical table route.
@@ -29,6 +33,23 @@ impl DropTableProcedure {
        self.data.physical_region_routes = physical_table_route_value.region_routes;
        self.data.physical_table_id = Some(physical_table_id);

+        if physical_table_id == self.data.table_id() {
+            let table_info_value = self
+                .context
+                .table_metadata_manager
+                .table_info_manager()
+                .get(task.table_id)
+                .await?
+                .with_context(|| error::TableInfoNotFoundSnafu {
+                    table: format_full_table_name(&task.catalog, &task.schema, &task.table),
+                })?
+                .into_inner();
+
+            let engine = table_info_value.table_info.meta.engine;
+            // rollback only if dropping the metric physical table fails
+            self.data.allow_rollback = engine.as_str() == METRIC_ENGINE_NAME
+        }
+
        Ok(())
    }
 }
--- a/src/common/meta/src/ddl/test_util.rs
+++ b/src/common/meta/src/ddl/test_util.rs
@@ -23,6 +23,7 @@ use std::collections::HashMap;
 use api::v1::meta::Partition;
 use api::v1::{ColumnDataType, SemanticType};
 use common_procedure::Status;
+use store_api::metric_engine_consts::{LOGICAL_TABLE_METADATA_KEY, METRIC_ENGINE_NAME};
 use table::metadata::{RawTableInfo, TableId};

 use crate::ddl::create_logical_tables::CreateLogicalTablesProcedure;
@@ -130,6 +131,11 @@ pub fn test_create_logical_table_task(name: &str) -> CreateTableTask {
        .time_index("ts")
        .primary_keys(["host".into()])
        .table_name(name)
+        .engine(METRIC_ENGINE_NAME)
+        .table_options(HashMap::from([(
+            LOGICAL_TABLE_METADATA_KEY.to_string(),
+            "phy".to_string(),
+        )]))
        .build()
        .unwrap()
        .into();
@@ -166,6 +172,7 @@ pub fn test_create_physical_table_task(name: &str) -> CreateTableTask {
        .time_index("ts")
        .primary_keys(["value".into()])
        .table_name(name)
+        .engine(METRIC_ENGINE_NAME)
        .build()
        .unwrap()
        .into();
--- a/src/common/meta/src/ddl/test_util/create_table.rs
+++ b/src/common/meta/src/ddl/test_util/create_table.rs
@@ -127,7 +127,7 @@ pub fn build_raw_table_info_from_expr(expr: &CreateTableExpr) -> RawTableInfo {
            engine: expr.engine.to_string(),
            next_column_id: expr.column_defs.len() as u32,
            region_numbers: vec![],
-            options: TableOptions::default(),
+            options: TableOptions::try_from_iter(&expr.table_options).unwrap(),
            created_on: DateTime::default(),
            partition_key_indices: vec![],
        },
--- a/src/common/meta/src/ddl/tests/drop_table.rs
+++ b/src/common/meta/src/ddl/tests/drop_table.rs
@@ -91,6 +91,7 @@ async fn test_on_prepare_table() {
    // Drop if exists
    let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone());
    procedure.on_prepare().await.unwrap();
+    assert!(!procedure.rollback_supported());

    let task = new_drop_table_task(table_name, table_id, false);
    // Drop table
@@ -224,9 +225,12 @@ async fn test_on_rollback() {
        let task = new_drop_table_task("phy_table", physical_table_id, false);
        let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone());
        procedure.on_prepare().await.unwrap();
+        assert!(procedure.rollback_supported());
        procedure.on_delete_metadata().await.unwrap();
+        assert!(procedure.rollback_supported());
        procedure.rollback(&ctx).await.unwrap();
        // Rollback again
+        assert!(procedure.rollback_supported());
        procedure.rollback(&ctx).await.unwrap();
        let kvs = kv_backend.dump();
        assert_eq!(kvs, expected_kvs);
@@ -236,12 +240,7 @@ async fn test_on_rollback() {
    let task = new_drop_table_task("foo", table_ids[0], false);
    let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone());
    procedure.on_prepare().await.unwrap();
-    procedure.on_delete_metadata().await.unwrap();
-    procedure.rollback(&ctx).await.unwrap();
-    // Rollback again
-    procedure.rollback(&ctx).await.unwrap();
-    let kvs = kv_backend.dump();
-    assert_eq!(kvs, expected_kvs);
+    assert!(!procedure.rollback_supported());
 }

 fn new_drop_table_task(table_name: &str, table_id: TableId, drop_if_exists: bool) -> DropTableTask {
--- a/src/common/meta/src/ddl/utils.rs
+++ b/src/common/meta/src/ddl/utils.rs
@@ -15,7 +15,7 @@
 use common_catalog::consts::METRIC_ENGINE;
 use common_error::ext::BoxedError;
 use common_procedure::error::Error as ProcedureError;
-use snafu::{ensure, location, Location, OptionExt};
+use snafu::{ensure, location, OptionExt};
 use store_api::metric_engine_consts::LOGICAL_TABLE_METADATA_KEY;
 use table::metadata::TableId;

--- a/src/common/meta/src/error.rs
+++ b/src/common/meta/src/error.rs
@@ -499,6 +499,13 @@ pub enum Error {
        error: rskafka::client::error::Error,
    },

+    #[snafu(display("Failed to create TLS Config"))]
+    TlsConfig {
+        #[snafu(implicit)]
+        location: Location,
+        source: common_wal::error::Error,
+    },
+
    #[snafu(display("Failed to resolve Kafka broker endpoint."))]
    ResolveKafkaEndpoint { source: common_wal::error::Error },

@@ -636,6 +643,15 @@ pub enum Error {
        location: Location,
    },

+    #[snafu(display("Failed to parse {} from str to utf8", name))]
+    StrFromUtf8 {
+        name: String,
+        #[snafu(source)]
+        error: std::str::Utf8Error,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
    #[snafu(display("Value not exists"))]
    ValueNotExist {
        #[snafu(implicit)]
@@ -644,6 +660,24 @@ pub enum Error {

    #[snafu(display("Failed to get cache"))]
    GetCache { source: Arc<Error> },
+
+    #[cfg(feature = "pg_kvbackend")]
+    #[snafu(display("Failed to execute via Postgres"))]
+    PostgresExecution {
+        #[snafu(source)]
+        error: tokio_postgres::Error,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[cfg(feature = "pg_kvbackend")]
+    #[snafu(display("Failed to connect to Postgres"))]
+    ConnectPostgres {
+        #[snafu(source)]
+        error: tokio_postgres::Error,
+        #[snafu(implicit)]
+        location: Location,
+    },
 }

 pub type Result<T> = std::result::Result<T, Error>;
@@ -697,7 +731,8 @@ impl ErrorExt for Error {
            | UnexpectedLogicalRouteTable { .. }
            | ProcedureOutput { .. }
            | FromUtf8 { .. }
-            | MetadataCorruption { .. } => StatusCode::Unexpected,
+            | MetadataCorruption { .. }
+            | StrFromUtf8 { .. } => StatusCode::Unexpected,

            SendMessage { .. } | GetKvCache { .. } | CacheNotGet { .. } | RenameTable { .. } => {
                StatusCode::Internal
@@ -714,7 +749,8 @@ impl ErrorExt for Error {
            | AlterLogicalTablesInvalidArguments { .. }
            | CreateLogicalTablesInvalidArguments { .. }
            | MismatchPrefix { .. }
-            | DelimiterNotFound { .. } => StatusCode::InvalidArguments,
+            | DelimiterNotFound { .. }
+            | TlsConfig { .. } => StatusCode::InvalidArguments,

            FlowNotFound { .. } => StatusCode::FlowNotFound,
            FlowRouteNotFound { .. } => StatusCode::Unexpected,
@@ -741,6 +777,11 @@ impl ErrorExt for Error {
            | ParseNum { .. }
            | InvalidRole { .. }
            | EmptyDdlTasks { .. } => StatusCode::InvalidArguments,
+
+            #[cfg(feature = "pg_kvbackend")]
+            PostgresExecution { .. } => StatusCode::Internal,
+            #[cfg(feature = "pg_kvbackend")]
+            ConnectPostgres { .. } => StatusCode::Internal,
        }
    }

--- a/src/common/meta/src/kv_backend.rs
+++ b/src/common/meta/src/kv_backend.rs
@@ -31,6 +31,8 @@ use crate::rpc::KeyValue;
 pub mod chroot;
 pub mod etcd;
 pub mod memory;
+#[cfg(feature = "pg_kvbackend")]
+pub mod postgres;
 pub mod test;
 pub mod txn;

--- a/src/common/meta/src/kv_backend/postgres.rs
+++ b/src/common/meta/src/kv_backend/postgres.rs
@@ -0,0 +1,626 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::any::Any;
+use std::borrow::Cow;
+use std::sync::Arc;
+
+use snafu::ResultExt;
+use tokio_postgres::types::ToSql;
+use tokio_postgres::{Client, NoTls};
+
+use super::{KvBackend, TxnService};
+use crate::error::{ConnectPostgresSnafu, Error, PostgresExecutionSnafu, Result, StrFromUtf8Snafu};
+use crate::kv_backend::txn::{Txn as KvTxn, TxnResponse as KvTxnResponse};
+use crate::kv_backend::KvBackendRef;
+use crate::rpc::store::{
+    BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse, BatchPutRequest,
+    BatchPutResponse, CompareAndPutRequest, CompareAndPutResponse, DeleteRangeRequest,
+    DeleteRangeResponse, PutRequest, PutResponse, RangeRequest, RangeResponse,
+};
+use crate::rpc::KeyValue;
+
+/// Posgres backend store for metasrv
+pub struct PgStore {
+    // TODO: Consider using sqlx crate.
+    client: Client,
+}
+
+const EMPTY: &[u8] = &[0];
+
+// TODO: allow users to configure metadata table name.
+const METADKV_CREATION: &str =
+    "CREATE TABLE IF NOT EXISTS greptime_metakv(k varchar PRIMARY KEY, v varchar)";
+
+const FULL_TABLE_SCAN: &str = "SELECT k, v FROM greptime_metakv $1 ORDER BY K";
+
+const POINT_GET: &str = "SELECT k, v FROM greptime_metakv WHERE k = $1";
+
+const PREFIX_SCAN: &str = "SELECT k, v FROM greptime_metakv WHERE k LIKE $1 ORDER BY K";
+
+const RANGE_SCAN_LEFT_BOUNDED: &str = "SELECT k, v FROM greptime_metakv WHERE k >= $1 ORDER BY K";
+
+const RANGE_SCAN_FULL_RANGE: &str =
+    "SELECT k, v FROM greptime_metakv WHERE k >= $1 AND K < $2 ORDER BY K";
+
+const FULL_TABLE_DELETE: &str = "DELETE FROM greptime_metakv RETURNING k,v";
+
+const POINT_DELETE: &str = "DELETE FROM greptime_metakv WHERE K = $1 RETURNING k,v;";
+
+const PREFIX_DELETE: &str = "DELETE FROM greptime_metakv WHERE k LIKE $1 RETURNING k,v;";
+
+const RANGE_DELETE_LEFT_BOUNDED: &str = "DELETE FROM greptime_metakv WHERE k >= $1 RETURNING k,v;";
+
+const RANGE_DELETE_FULL_RANGE: &str =
+    "DELETE FROM greptime_metakv WHERE k >= $1 AND K < $2 RETURNING k,v;";
+
+const CAS: &str = r#"
+WITH prev AS (
+    SELECT k,v FROM greptime_metakv WHERE k = $1 AND v = $2
+), update AS (
+UPDATE greptime_metakv
+SET k=$1,
+v=$2
+WHERE 
+    k=$1 AND v=$3
+)
+
+SELECT k, v FROM prev;
+"#;
+
+const PUT_IF_NOT_EXISTS: &str = r#"    
+WITH prev AS (
+    select k,v from greptime_metakv where k = $1
+), insert AS (
+    INSERT INTO greptime_metakv
+    VALUES ($1, $2)
+    ON CONFLICT (k) DO NOTHING
+)
+
+SELECT k, v FROM prev;"#;
+
+impl PgStore {
+    /// Create pgstore impl of KvBackendRef from url.
+    pub async fn with_url(url: &str) -> Result<KvBackendRef> {
+        // TODO: support tls.
+        let (client, conn) = tokio_postgres::connect(url, NoTls)
+            .await
+            .context(ConnectPostgresSnafu)?;
+        tokio::spawn(async move { conn.await.context(ConnectPostgresSnafu) });
+        Self::with_pg_client(client).await
+    }
+
+    /// Create pgstore impl of KvBackendRef from tokio-postgres client.
+    pub async fn with_pg_client(client: Client) -> Result<KvBackendRef> {
+        // This step ensures the postgres metadata backend is ready to use.
+        // We check if greptime_metakv table exists, and we will create a new table
+        // if it does not exist.
+        client
+            .execute(METADKV_CREATION, &[])
+            .await
+            .context(PostgresExecutionSnafu)?;
+        Ok(Arc::new(Self { client }))
+    }
+
+    async fn put_if_not_exists(&self, key: &str, value: &str) -> Result<bool> {
+        let res = self
+            .client
+            .query(PUT_IF_NOT_EXISTS, &[&key, &value])
+            .await
+            .context(PostgresExecutionSnafu)?;
+        Ok(res.is_empty())
+    }
+}
+
+fn select_range_template(req: &RangeRequest) -> &str {
+    if req.range_end.is_empty() {
+        return POINT_GET;
+    }
+    if req.key == EMPTY && req.range_end == EMPTY {
+        FULL_TABLE_SCAN
+    } else if req.range_end == EMPTY {
+        RANGE_SCAN_LEFT_BOUNDED
+    } else if is_prefix_range(&req.key, &req.range_end) {
+        PREFIX_SCAN
+    } else {
+        RANGE_SCAN_FULL_RANGE
+    }
+}
+
+fn select_range_delete_template(req: &DeleteRangeRequest) -> &str {
+    if req.range_end.is_empty() {
+        return POINT_DELETE;
+    }
+    if req.key == EMPTY && req.range_end == EMPTY {
+        FULL_TABLE_DELETE
+    } else if req.range_end == EMPTY {
+        RANGE_DELETE_LEFT_BOUNDED
+    } else if is_prefix_range(&req.key, &req.range_end) {
+        PREFIX_DELETE
+    } else {
+        RANGE_DELETE_FULL_RANGE
+    }
+}
+
+// Generate dynamic parameterized sql for batch get.
+fn generate_batch_get_query(key_len: usize) -> String {
+    let in_placeholders: Vec<String> = (1..=key_len).map(|i| format!("${}", i)).collect();
+    let in_clause = in_placeholders.join(", ");
+    format!(
+        "SELECT k, v FROM greptime_metakv WHERE k in ({});",
+        in_clause
+    )
+}
+
+// Generate dynamic parameterized sql for batch delete.
+fn generate_batch_delete_query(key_len: usize) -> String {
+    let in_placeholders: Vec<String> = (1..=key_len).map(|i| format!("${}", i)).collect();
+    let in_clause = in_placeholders.join(", ");
+    format!(
+        "DELETE FROM greptime_metakv WHERE k in ({}) RETURNING k, v;",
+        in_clause
+    )
+}
+
+// Generate dynamic parameterized sql for batch upsert.
+fn generate_batch_upsert_query(kv_len: usize) -> String {
+    let in_placeholders: Vec<String> = (1..=kv_len).map(|i| format!("${}", i)).collect();
+    let in_clause = in_placeholders.join(", ");
+    let mut param_index = kv_len + 1;
+    let mut values_placeholders = Vec::new();
+    for _ in 0..kv_len {
+        values_placeholders.push(format!("(${0}, ${1})", param_index, param_index + 1));
+        param_index += 2;
+    }
+    let values_clause = values_placeholders.join(", ");
+
+    format!(
+        r#"
+    WITH prev AS (
+        SELECT k,v FROM greptime_metakv WHERE k IN ({in_clause})
+    ), update AS (
+    INSERT INTO greptime_metakv (k, v) VALUES
+        {values_clause}
+    ON CONFLICT (
+        k
+    ) DO UPDATE SET
+        v = excluded.v
+    )
+
+    SELECT k, v FROM prev;
+    "#
+    )
+}
+
+//  Trim null byte at the end and convert bytes to string.
+fn process_bytes<'a>(data: &'a [u8], name: &str) -> Result<&'a str> {
+    let mut len = data.len();
+    // remove trailing null bytes to avoid error in postgres encoding.
+    while len > 0 && data[len - 1] == 0 {
+        len -= 1;
+    }
+    let res = std::str::from_utf8(&data[0..len]).context(StrFromUtf8Snafu { name })?;
+    Ok(res)
+}
+
+#[async_trait::async_trait]
+impl KvBackend for PgStore {
+    fn name(&self) -> &str {
+        "Postgres"
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    async fn range(&self, req: RangeRequest) -> Result<RangeResponse> {
+        let mut params = vec![];
+        let template = select_range_template(&req);
+        if req.key != EMPTY {
+            let key = process_bytes(&req.key, "rangeKey")?;
+            if template == PREFIX_SCAN {
+                let prefix = format!("{key}%");
+                params.push(Cow::Owned(prefix))
+            } else {
+                params.push(Cow::Borrowed(key))
+            }
+        }
+        if template == RANGE_SCAN_FULL_RANGE && req.range_end != EMPTY {
+            let range_end = process_bytes(&req.range_end, "rangeEnd")?;
+            params.push(Cow::Borrowed(range_end));
+        }
+        let limit = req.limit as usize;
+        let limit_cause = match limit > 0 {
+            true => format!(" LIMIT {};", limit + 1),
+            false => ";".to_string(),
+        };
+        let template = format!("{}{}", template, limit_cause);
+        let params: Vec<&(dyn ToSql + Sync)> = params
+            .iter()
+            .map(|x| match x {
+                Cow::Borrowed(borrowed) => borrowed as &(dyn ToSql + Sync),
+                Cow::Owned(owned) => owned as &(dyn ToSql + Sync),
+            })
+            .collect();
+        let res = self
+            .client
+            .query(&template, &params)
+            .await
+            .context(PostgresExecutionSnafu)?;
+        let kvs: Vec<KeyValue> = res
+            .into_iter()
+            .map(|r| {
+                let key: String = r.get(0);
+                if req.keys_only {
+                    return KeyValue {
+                        key: key.into_bytes(),
+                        value: vec![],
+                    };
+                }
+                let value: String = r.get(1);
+                KeyValue {
+                    key: key.into_bytes(),
+                    value: value.into_bytes(),
+                }
+            })
+            .collect();
+        if limit == 0 || limit > kvs.len() {
+            return Ok(RangeResponse { kvs, more: false });
+        }
+        let (filtered_kvs, _) = kvs.split_at(limit);
+        Ok(RangeResponse {
+            kvs: filtered_kvs.to_vec(),
+            more: kvs.len() > limit,
+        })
+    }
+
+    async fn put(&self, req: PutRequest) -> Result<PutResponse> {
+        let kv = KeyValue {
+            key: req.key,
+            value: req.value,
+        };
+        let mut res = self
+            .batch_put(BatchPutRequest {
+                kvs: vec![kv],
+                prev_kv: req.prev_kv,
+            })
+            .await?;
+
+        if !res.prev_kvs.is_empty() {
+            return Ok(PutResponse {
+                prev_kv: Some(res.prev_kvs.remove(0)),
+            });
+        }
+        Ok(PutResponse { prev_kv: None })
+    }
+
+    async fn batch_put(&self, req: BatchPutRequest) -> Result<BatchPutResponse> {
+        let mut in_params = Vec::with_capacity(req.kvs.len());
+        let mut values_params = Vec::with_capacity(req.kvs.len() * 2);
+
+        for kv in &req.kvs {
+            let processed_key = process_bytes(&kv.key, "BatchPutRequestKey")?;
+            in_params.push(processed_key);
+
+            let processed_value = process_bytes(&kv.value, "BatchPutRequestValue")?;
+            values_params.push(processed_key);
+            values_params.push(processed_value);
+        }
+        in_params.extend(values_params);
+        let params: Vec<&(dyn ToSql + Sync)> =
+            in_params.iter().map(|x| x as &(dyn ToSql + Sync)).collect();
+
+        let query = generate_batch_upsert_query(req.kvs.len());
+        let res = self
+            .client
+            .query(&query, &params)
+            .await
+            .context(PostgresExecutionSnafu)?;
+        if req.prev_kv {
+            let kvs: Vec<KeyValue> = res
+                .into_iter()
+                .map(|r| {
+                    let key: String = r.get(0);
+                    let value: String = r.get(1);
+                    KeyValue {
+                        key: key.into_bytes(),
+                        value: value.into_bytes(),
+                    }
+                })
+                .collect();
+            if !kvs.is_empty() {
+                return Ok(BatchPutResponse { prev_kvs: kvs });
+            }
+        }
+        Ok(BatchPutResponse { prev_kvs: vec![] })
+    }
+
+    async fn batch_get(&self, req: BatchGetRequest) -> Result<BatchGetResponse> {
+        if req.keys.is_empty() {
+            return Ok(BatchGetResponse { kvs: vec![] });
+        }
+        let query = generate_batch_get_query(req.keys.len());
+        let value_params = req
+            .keys
+            .iter()
+            .map(|k| process_bytes(k, "BatchGetRequestKey"))
+            .collect::<Result<Vec<&str>>>()?;
+        let params: Vec<&(dyn ToSql + Sync)> = value_params
+            .iter()
+            .map(|x| x as &(dyn ToSql + Sync))
+            .collect();
+        let res = self
+            .client
+            .query(&query, &params)
+            .await
+            .context(PostgresExecutionSnafu)?;
+        let kvs: Vec<KeyValue> = res
+            .into_iter()
+            .map(|r| {
+                let key: String = r.get(0);
+                let value: String = r.get(1);
+                KeyValue {
+                    key: key.into_bytes(),
+                    value: value.into_bytes(),
+                }
+            })
+            .collect();
+        Ok(BatchGetResponse { kvs })
+    }
+
+    async fn delete_range(&self, req: DeleteRangeRequest) -> Result<DeleteRangeResponse> {
+        let mut params = vec![];
+        let template = select_range_delete_template(&req);
+        if req.key != EMPTY {
+            let key = process_bytes(&req.key, "deleteRangeKey")?;
+            if template == PREFIX_DELETE {
+                let prefix = format!("{key}%");
+                params.push(Cow::Owned(prefix));
+            } else {
+                params.push(Cow::Borrowed(key));
+            }
+        }
+        if template == RANGE_DELETE_FULL_RANGE && req.range_end != EMPTY {
+            let range_end = process_bytes(&req.range_end, "deleteRangeEnd")?;
+            params.push(Cow::Borrowed(range_end));
+        }
+        let params: Vec<&(dyn ToSql + Sync)> = params
+            .iter()
+            .map(|x| match x {
+                Cow::Borrowed(borrowed) => borrowed as &(dyn ToSql + Sync),
+                Cow::Owned(owned) => owned as &(dyn ToSql + Sync),
+            })
+            .collect();
+
+        let res = self
+            .client
+            .query(template, &params)
+            .await
+            .context(PostgresExecutionSnafu)?;
+        let deleted = res.len() as i64;
+        if !req.prev_kv {
+            return Ok({
+                DeleteRangeResponse {
+                    deleted,
+                    prev_kvs: vec![],
+                }
+            });
+        }
+        let kvs: Vec<KeyValue> = res
+            .into_iter()
+            .map(|r| {
+                let key: String = r.get(0);
+                let value: String = r.get(1);
+                KeyValue {
+                    key: key.into_bytes(),
+                    value: value.into_bytes(),
+                }
+            })
+            .collect();
+        Ok(DeleteRangeResponse {
+            deleted,
+            prev_kvs: kvs,
+        })
+    }
+
+    async fn batch_delete(&self, req: BatchDeleteRequest) -> Result<BatchDeleteResponse> {
+        if req.keys.is_empty() {
+            return Ok(BatchDeleteResponse { prev_kvs: vec![] });
+        }
+        let query = generate_batch_delete_query(req.keys.len());
+        let value_params = req
+            .keys
+            .iter()
+            .map(|k| process_bytes(k, "BatchDeleteRequestKey"))
+            .collect::<Result<Vec<&str>>>()?;
+        let params: Vec<&(dyn ToSql + Sync)> = value_params
+            .iter()
+            .map(|x| x as &(dyn ToSql + Sync))
+            .collect();
+        let res = self
+            .client
+            .query(&query, &params)
+            .await
+            .context(PostgresExecutionSnafu)?;
+        if !req.prev_kv {
+            return Ok(BatchDeleteResponse { prev_kvs: vec![] });
+        }
+        let kvs: Vec<KeyValue> = res
+            .into_iter()
+            .map(|r| {
+                let key: String = r.get(0);
+                let value: String = r.get(1);
+                KeyValue {
+                    key: key.into_bytes(),
+                    value: value.into_bytes(),
+                }
+            })
+            .collect();
+        Ok(BatchDeleteResponse { prev_kvs: kvs })
+    }
+
+    async fn compare_and_put(&self, req: CompareAndPutRequest) -> Result<CompareAndPutResponse> {
+        let key = process_bytes(&req.key, "CASKey")?;
+        let value = process_bytes(&req.value, "CASValue")?;
+        if req.expect.is_empty() {
+            let put_res = self.put_if_not_exists(key, value).await?;
+            return Ok(CompareAndPutResponse {
+                success: put_res,
+                prev_kv: None,
+            });
+        }
+        let expect = process_bytes(&req.expect, "CASExpect")?;
+
+        let res = self
+            .client
+            .query(CAS, &[&key, &value, &expect])
+            .await
+            .context(PostgresExecutionSnafu)?;
+        match res.is_empty() {
+            true => Ok(CompareAndPutResponse {
+                success: false,
+                prev_kv: None,
+            }),
+            false => {
+                let mut kvs: Vec<KeyValue> = res
+                    .into_iter()
+                    .map(|r| {
+                        let key: String = r.get(0);
+                        let value: String = r.get(1);
+                        KeyValue {
+                            key: key.into_bytes(),
+                            value: value.into_bytes(),
+                        }
+                    })
+                    .collect();
+                Ok(CompareAndPutResponse {
+                    success: true,
+                    prev_kv: Some(kvs.remove(0)),
+                })
+            }
+        }
+    }
+}
+
+#[async_trait::async_trait]
+impl TxnService for PgStore {
+    type Error = Error;
+
+    async fn txn(&self, _txn: KvTxn) -> Result<KvTxnResponse> {
+        // TODO: implement txn for pg kv backend.
+        unimplemented!()
+    }
+
+    fn max_txn_ops(&self) -> usize {
+        unreachable!("postgres backend does not support max_txn_ops!")
+    }
+}
+
+fn is_prefix_range(start: &[u8], end: &[u8]) -> bool {
+    if start.len() != end.len() {
+        return false;
+    }
+    let l = start.len();
+    let same_prefix = start[0..l - 1] == end[0..l - 1];
+    if let (Some(rhs), Some(lhs)) = (start.last(), end.last()) {
+        return same_prefix && (*rhs + 1) == *lhs;
+    }
+    false
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::kv_backend::test::{
+        prepare_kv_with_prefix, test_kv_batch_delete_with_prefix, test_kv_batch_get_with_prefix,
+        test_kv_compare_and_put_with_prefix, test_kv_delete_range_with_prefix,
+        test_kv_put_with_prefix, test_kv_range_2_with_prefix, test_kv_range_with_prefix,
+        unprepare_kv,
+    };
+
+    async fn build_pg_kv_backend() -> Option<PgStore> {
+        let endpoints = std::env::var("GT_POSTGRES_ENDPOINTS").unwrap_or_default();
+        if endpoints.is_empty() {
+            return None;
+        }
+
+        let (client, connection) = tokio_postgres::connect(&endpoints, NoTls).await.unwrap();
+        tokio::spawn(connection);
+        let _ = client.execute(METADKV_CREATION, &[]).await;
+        Some(PgStore { client })
+    }
+
+    #[tokio::test]
+    async fn test_put() {
+        if let Some(kv_backend) = build_pg_kv_backend().await {
+            let prefix = b"put/";
+            prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
+            test_kv_put_with_prefix(&kv_backend, prefix.to_vec()).await;
+            unprepare_kv(&kv_backend, prefix).await;
+        }
+    }
+
+    #[tokio::test]
+    async fn test_range() {
+        if let Some(kv_backend) = build_pg_kv_backend().await {
+            let prefix = b"range/";
+            prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
+            test_kv_range_with_prefix(&kv_backend, prefix.to_vec()).await;
+            unprepare_kv(&kv_backend, prefix).await;
+        }
+    }
+
+    #[tokio::test]
+    async fn test_range_2() {
+        if let Some(kv_backend) = build_pg_kv_backend().await {
+            test_kv_range_2_with_prefix(kv_backend, b"range2/".to_vec()).await;
+        }
+    }
+
+    #[tokio::test]
+    async fn test_batch_get() {
+        if let Some(kv_backend) = build_pg_kv_backend().await {
+            let prefix = b"batchGet/";
+            prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
+            test_kv_batch_get_with_prefix(&kv_backend, prefix.to_vec()).await;
+            unprepare_kv(&kv_backend, prefix).await;
+        }
+    }
+
+    #[tokio::test(flavor = "multi_thread")]
+    async fn test_compare_and_put() {
+        if let Some(kv_backend) = build_pg_kv_backend().await {
+            let kv_backend = Arc::new(kv_backend);
+            test_kv_compare_and_put_with_prefix(kv_backend, b"compareAndPut/".to_vec()).await;
+        }
+    }
+
+    #[tokio::test]
+    async fn test_delete_range() {
+        if let Some(kv_backend) = build_pg_kv_backend().await {
+            let prefix = b"deleteRange/";
+            prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
+            test_kv_delete_range_with_prefix(kv_backend, prefix.to_vec()).await;
+        }
+    }
+
+    #[tokio::test]
+    async fn test_batch_delete() {
+        if let Some(kv_backend) = build_pg_kv_backend().await {
+            let prefix = b"batchDelete/";
+            prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
+            test_kv_batch_delete_with_prefix(kv_backend, prefix.to_vec()).await;
+        }
+    }
+}
--- a/src/common/meta/src/wal_options_allocator.rs
+++ b/src/common/meta/src/wal_options_allocator.rs
@@ -123,7 +123,7 @@ pub fn prepare_wal_options(

 #[cfg(test)]
 mod tests {
-    use common_wal::config::kafka::common::KafkaTopicConfig;
+    use common_wal::config::kafka::common::{KafkaConnectionConfig, KafkaTopicConfig};
    use common_wal::config::kafka::MetasrvKafkaConfig;
    use common_wal::test_util::run_test_with_kafka_wal;

@@ -166,7 +166,10 @@ mod tests {
                    ..Default::default()
                };
                let config = MetasrvKafkaConfig {
-                    broker_endpoints,
+                    connection: KafkaConnectionConfig {
+                        broker_endpoints,
+                        ..Default::default()
+                    },
                    kafka_topic,
                    ..Default::default()
                };
--- a/src/common/meta/src/wal_options_allocator/kafka/topic_manager.rs
+++ b/src/common/meta/src/wal_options_allocator/kafka/topic_manager.rs
@@ -30,7 +30,7 @@ use snafu::{ensure, ResultExt};
 use crate::error::{
    BuildKafkaClientSnafu, BuildKafkaCtrlClientSnafu, BuildKafkaPartitionClientSnafu,
    CreateKafkaWalTopicSnafu, DecodeJsonSnafu, EncodeJsonSnafu, InvalidNumTopicsSnafu,
-    ProduceRecordSnafu, ResolveKafkaEndpointSnafu, Result,
+    ProduceRecordSnafu, ResolveKafkaEndpointSnafu, Result, TlsConfigSnafu,
 };
 use crate::kv_backend::KvBackendRef;
 use crate::rpc::store::PutRequest;
@@ -117,15 +117,22 @@ impl TopicManager {
            base: self.config.backoff.base as f64,
            deadline: self.config.backoff.deadline,
        };
-        let broker_endpoints = common_wal::resolve_to_ipv4(&self.config.broker_endpoints)
-            .await
-            .context(ResolveKafkaEndpointSnafu)?;
-        let client = ClientBuilder::new(broker_endpoints)
-            .backoff_config(backoff_config)
+        let broker_endpoints =
+            common_wal::resolve_to_ipv4(&self.config.connection.broker_endpoints)
+                .await
+                .context(ResolveKafkaEndpointSnafu)?;
+        let mut builder = ClientBuilder::new(broker_endpoints).backoff_config(backoff_config);
+        if let Some(sasl) = &self.config.connection.sasl {
+            builder = builder.sasl_config(sasl.config.clone().into_sasl_config());
+        };
+        if let Some(tls) = &self.config.connection.tls {
+            builder = builder.tls_config(tls.to_tls_config().await.context(TlsConfigSnafu)?)
+        };
+        let client = builder
            .build()
            .await
            .with_context(|_| BuildKafkaClientSnafu {
-                broker_endpoints: self.config.broker_endpoints.clone(),
+                broker_endpoints: self.config.connection.broker_endpoints.clone(),
            })?;

        let control_client = client
@@ -242,7 +249,7 @@ impl TopicManager {

 #[cfg(test)]
 mod tests {
-    use common_wal::config::kafka::common::KafkaTopicConfig;
+    use common_wal::config::kafka::common::{KafkaConnectionConfig, KafkaTopicConfig};
    use common_wal::test_util::run_test_with_kafka_wal;

    use super::*;
@@ -289,7 +296,10 @@ mod tests {
                    ..Default::default()
                };
                let config = MetasrvKafkaConfig {
-                    broker_endpoints,
+                    connection: KafkaConnectionConfig {
+                        broker_endpoints,
+                        ..Default::default()
+                    },
                    kafka_topic,
                    ..Default::default()
                };
--- a/src/common/procedure/src/local.rs
+++ b/src/common/procedure/src/local.rs
@@ -680,9 +680,8 @@ pub(crate) mod test_util {

    pub(crate) fn new_object_store(dir: &TempDir) -> ObjectStore {
        let store_dir = dir.path().to_str().unwrap();
-        let mut builder = Builder::default();
-        let _ = builder.root(store_dir);
-        ObjectStore::new(builder).unwrap().finish()
+        let builder = Builder::default();
+        ObjectStore::new(builder.root(store_dir)).unwrap().finish()
    }
 }

--- a/src/common/procedure/src/store.rs
+++ b/src/common/procedure/src/store.rs
@@ -361,8 +361,7 @@ mod tests {

    fn procedure_store_for_test(dir: &TempDir) -> ProcedureStore {
        let store_dir = dir.path().to_str().unwrap();
-        let mut builder = Builder::default();
-        let _ = builder.root(store_dir);
+        let builder = Builder::default().root(store_dir);
        let object_store = ObjectStore::new(builder).unwrap().finish();

        ProcedureStore::from_object_store(object_store)
--- a/src/common/procedure/src/store/state_store.rs
+++ b/src/common/procedure/src/store/state_store.rs
@@ -220,8 +220,7 @@ mod tests {
    async fn test_object_state_store() {
        let dir = create_temp_dir("state_store");
        let store_dir = dir.path().to_str().unwrap();
-        let mut builder = Builder::default();
-        let _ = builder.root(store_dir);
+        let builder = Builder::default().root(store_dir);

        let object_store = ObjectStore::new(builder).unwrap().finish();
        let state_store = ObjectStateStore::new(object_store);
@@ -291,8 +290,7 @@ mod tests {
    async fn test_object_state_store_delete() {
        let dir = create_temp_dir("state_store_list");
        let store_dir = dir.path().to_str().unwrap();
-        let mut builder = Builder::default();
-        let _ = builder.root(store_dir);
+        let builder = Builder::default().root(store_dir);

        let object_store = ObjectStore::new(builder).unwrap().finish();
        let state_store = ObjectStateStore::new(object_store);
--- a/src/common/query/Cargo.toml
+++ b/src/common/query/Cargo.toml
@@ -31,4 +31,5 @@ store-api.workspace = true

 [dev-dependencies]
 common-base.workspace = true
+futures-util.workspace = true
 tokio.workspace = true
--- a/src/common/query/src/error.rs
+++ b/src/common/query/src/error.rs
@@ -155,13 +155,6 @@ pub enum Error {
        source: DataTypeError,
    },

-    #[snafu(display("Failed to execute physical plan"))]
-    ExecutePhysicalPlan {
-        #[snafu(implicit)]
-        location: Location,
-        source: BoxedError,
-    },
-
    #[snafu(display("Failed to cast array to {:?}", typ))]
    TypeCast {
        #[snafu(source)]
@@ -308,7 +301,6 @@ impl ErrorExt for Error {

            Error::DecodePlan { source, .. }
            | Error::Execute { source, .. }
-            | Error::ExecutePhysicalPlan { source, .. }
            | Error::ProcedureService { source, .. }
            | Error::TableMutation { source, .. } => source.status_code(),

--- a/src/common/query/src/lib.rs
+++ b/src/common/query/src/lib.rs
@@ -19,8 +19,10 @@ pub mod logical_plan;
 pub mod prelude;
 pub mod request;
 mod signature;
+pub mod stream;
 #[cfg(any(test, feature = "testing"))]
 pub mod test_util;
+
 use std::fmt::{Debug, Display, Formatter};
 use std::sync::Arc;

--- a/src/common/query/src/stream.rs
+++ b/src/common/query/src/stream.rs
@@ -0,0 +1,175 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::any::Any;
+use std::fmt::{Debug, Formatter};
+use std::sync::{Arc, Mutex};
+
+use common_recordbatch::adapter::DfRecordBatchStreamAdapter;
+use common_recordbatch::SendableRecordBatchStream;
+use datafusion::execution::context::TaskContext;
+use datafusion::execution::SendableRecordBatchStream as DfSendableRecordBatchStream;
+use datafusion::physical_expr::{EquivalenceProperties, Partitioning, PhysicalSortExpr};
+use datafusion::physical_plan::{
+    DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, PlanProperties,
+};
+use datafusion_common::DataFusionError;
+use datatypes::arrow::datatypes::SchemaRef as ArrowSchemaRef;
+use datatypes::schema::SchemaRef;
+
+/// Adapts greptime's [SendableRecordBatchStream] to DataFusion's [ExecutionPlan].
+pub struct StreamScanAdapter {
+    stream: Mutex<Option<SendableRecordBatchStream>>,
+    schema: SchemaRef,
+    arrow_schema: ArrowSchemaRef,
+    properties: PlanProperties,
+    output_ordering: Option<Vec<PhysicalSortExpr>>,
+}
+
+impl Debug for StreamScanAdapter {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("StreamScanAdapter")
+            .field("stream", &"<SendableRecordBatchStream>")
+            .field("schema", &self.schema)
+            .finish()
+    }
+}
+
+impl StreamScanAdapter {
+    pub fn new(stream: SendableRecordBatchStream) -> Self {
+        let schema = stream.schema();
+        let arrow_schema = schema.arrow_schema().clone();
+        let properties = PlanProperties::new(
+            EquivalenceProperties::new(arrow_schema.clone()),
+            Partitioning::UnknownPartitioning(1),
+            ExecutionMode::Bounded,
+        );
+
+        Self {
+            stream: Mutex::new(Some(stream)),
+            schema,
+            arrow_schema,
+            properties,
+            output_ordering: None,
+        }
+    }
+
+    pub fn with_output_ordering(mut self, output_ordering: Option<Vec<PhysicalSortExpr>>) -> Self {
+        self.output_ordering = output_ordering;
+        self
+    }
+}
+
+impl DisplayAs for StreamScanAdapter {
+    fn fmt_as(&self, _t: DisplayFormatType, f: &mut Formatter) -> std::fmt::Result {
+        write!(
+            f,
+            "StreamScanAdapter: [<SendableRecordBatchStream>], schema: ["
+        )?;
+        write!(f, "{:?}", &self.arrow_schema)?;
+        write!(f, "]")
+    }
+}
+
+impl ExecutionPlan for StreamScanAdapter {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn schema(&self) -> ArrowSchemaRef {
+        self.arrow_schema.clone()
+    }
+
+    fn properties(&self) -> &PlanProperties {
+        &self.properties
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![]
+    }
+
+    // DataFusion will swap children unconditionally.
+    // But since this node is leaf node, it's safe to just return self.
+    fn with_new_children(
+        self: Arc<Self>,
+        _children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> datafusion_common::Result<Arc<dyn ExecutionPlan>> {
+        Ok(self.clone())
+    }
+
+    fn execute(
+        &self,
+        _partition: usize,
+        _context: Arc<TaskContext>,
+    ) -> datafusion_common::Result<DfSendableRecordBatchStream> {
+        let mut stream = self.stream.lock().unwrap();
+        let stream = stream
+            .take()
+            .ok_or_else(|| DataFusionError::Execution("Stream already exhausted".to_string()))?;
+        Ok(Box::pin(DfRecordBatchStreamAdapter::new(stream)))
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use common_recordbatch::{RecordBatch, RecordBatches};
+    use datafusion::prelude::SessionContext;
+    use datatypes::data_type::ConcreteDataType;
+    use datatypes::schema::{ColumnSchema, Schema};
+    use datatypes::vectors::Int32Vector;
+    use futures_util::TryStreamExt;
+
+    use super::*;
+
+    #[tokio::test]
+    async fn test_simple_table_scan() {
+        let ctx = SessionContext::new();
+        let schema = Arc::new(Schema::new(vec![ColumnSchema::new(
+            "a",
+            ConcreteDataType::int32_datatype(),
+            false,
+        )]));
+
+        let batch1 = RecordBatch::new(
+            schema.clone(),
+            vec![Arc::new(Int32Vector::from_slice([1, 2])) as _],
+        )
+        .unwrap();
+        let batch2 = RecordBatch::new(
+            schema.clone(),
+            vec![Arc::new(Int32Vector::from_slice([3, 4, 5])) as _],
+        )
+        .unwrap();
+
+        let recordbatches =
+            RecordBatches::try_new(schema.clone(), vec![batch1.clone(), batch2.clone()]).unwrap();
+        let stream = recordbatches.as_stream();
+
+        let scan = StreamScanAdapter::new(stream);
+
+        assert_eq!(scan.schema(), schema.arrow_schema().clone());
+
+        let stream = scan.execute(0, ctx.task_ctx()).unwrap();
+        let recordbatches = stream.try_collect::<Vec<_>>().await.unwrap();
+        assert_eq!(recordbatches[0], batch1.into_df_record_batch());
+        assert_eq!(recordbatches[1], batch2.into_df_record_batch());
+
+        let result = scan.execute(0, ctx.task_ctx());
+        assert!(result.is_err());
+        match result {
+            Err(e) => assert!(e.to_string().contains("Stream already exhausted")),
+            _ => unreachable!(),
+        }
+    }
+}
--- a/src/common/telemetry/src/logging.rs
+++ b/src/common/telemetry/src/logging.rs
@@ -33,7 +33,7 @@ use tracing_subscriber::{filter, EnvFilter, Registry};

 use crate::tracing_sampler::{create_sampler, TracingSampleOptions};

-const DEFAULT_OTLP_ENDPOINT: &str = "http://localhost:4317";
+pub const DEFAULT_OTLP_ENDPOINT: &str = "http://localhost:4317";

 #[derive(Clone, Debug, Serialize, Deserialize)]
 #[serde(default)]
--- a/src/common/version/Cargo.toml
+++ b/src/common/version/Cargo.toml
@@ -14,8 +14,8 @@ codec = ["dep:serde", "dep:schemars"]
 const_format = "0.2"
 schemars = { workspace = true, optional = true }
 serde = { workspace = true, optional = true }
-shadow-rs = "0.29"
+shadow-rs.workspace = true

 [build-dependencies]
 build-data = "0.2"
-shadow-rs = "0.29"
+shadow-rs.workspace = true
--- a/src/common/version/build.rs
+++ b/src/common/version/build.rs
@@ -12,7 +12,11 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::collections::BTreeSet;
+use std::env;
+
 use build_data::{format_timestamp, get_source_time};
+use shadow_rs::{CARGO_METADATA, CARGO_TREE};

 fn main() -> shadow_rs::SdResult<()> {
    println!("cargo:rerun-if-changed=.git/refs/heads");
@@ -25,5 +29,16 @@ fn main() -> shadow_rs::SdResult<()> {
        }
    );
    build_data::set_BUILD_TIMESTAMP();
-    shadow_rs::new()
+
+    // The "CARGO_WORKSPACE_DIR" is set manually (not by Rust itself) in Cargo config file, to
+    // solve the problem where the "CARGO_MANIFEST_DIR" is not what we want when this repo is
+    // made as a submodule in another repo.
+    let src_path = env::var("CARGO_WORKSPACE_DIR").or_else(|_| env::var("CARGO_MANIFEST_DIR"))?;
+    let out_path = env::var("OUT_DIR")?;
+    let _ = shadow_rs::Shadow::build_with(
+        src_path,
+        out_path,
+        BTreeSet::from([CARGO_METADATA, CARGO_TREE]),
+    )?;
+    Ok(())
 }
--- a/src/common/version/src/lib.rs
+++ b/src/common/version/src/lib.rs
@@ -125,5 +125,14 @@ pub const fn version() -> &'static str {
 }

 pub const fn short_version() -> &'static str {
-    const_format::formatcp!("{}-{}", BUILD_INFO.branch, BUILD_INFO.commit_short,)
+    const BRANCH: &str = BUILD_INFO.branch;
+    const COMMIT_ID: &str = BUILD_INFO.commit_short;
+
+    // When git checkout to a commit, the branch is empty.
+    #[allow(clippy::const_is_empty)]
+    if !BRANCH.is_empty() {
+        const_format::formatcp!("{}-{}", BRANCH, COMMIT_ID)
+    } else {
+        COMMIT_ID
+    }
 }
--- a/src/common/wal/Cargo.toml
+++ b/src/common/wal/Cargo.toml
@@ -18,6 +18,9 @@ common-telemetry.workspace = true
 futures-util.workspace = true
 humantime-serde.workspace = true
 rskafka.workspace = true
+rustls = { version = "0.23", default-features = false, features = ["ring", "logging", "std", "tls12"] }
+rustls-native-certs = "0.7"
+rustls-pemfile = "2.1"
 serde.workspace = true
 serde_with.workspace = true
 snafu.workspace = true
--- a/src/common/wal/src/config.rs
+++ b/src/common/wal/src/config.rs
@@ -23,12 +23,14 @@ use crate::config::raft_engine::RaftEngineConfig;
 /// Wal configurations for metasrv.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
 #[serde(tag = "provider", rename_all = "snake_case")]
+#[allow(clippy::large_enum_variant)]
 pub enum MetasrvWalConfig {
    #[default]
    RaftEngine,
    Kafka(MetasrvKafkaConfig),
 }

+#[allow(clippy::large_enum_variant)]
 /// Wal configurations for datanode.
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(tag = "provider", rename_all = "snake_case")]
@@ -48,7 +50,7 @@ impl From<DatanodeWalConfig> for MetasrvWalConfig {
        match config {
            DatanodeWalConfig::RaftEngine(_) => Self::RaftEngine,
            DatanodeWalConfig::Kafka(config) => Self::Kafka(MetasrvKafkaConfig {
-                broker_endpoints: config.broker_endpoints,
+                connection: config.connection,
                backoff: config.backoff,
                kafka_topic: config.kafka_topic,
            }),
@@ -61,7 +63,7 @@ impl From<MetasrvWalConfig> for DatanodeWalConfig {
        match config {
            MetasrvWalConfig::RaftEngine => Self::RaftEngine(RaftEngineConfig::default()),
            MetasrvWalConfig::Kafka(config) => Self::Kafka(DatanodeKafkaConfig {
-                broker_endpoints: config.broker_endpoints,
+                connection: config.connection,
                backoff: config.backoff,
                kafka_topic: config.kafka_topic,
                ..Default::default()
@@ -75,6 +77,9 @@ mod tests {
    use std::time::Duration;

    use common_base::readable_size::ReadableSize;
+    use kafka::common::{
+        KafkaClientSasl, KafkaClientSaslConfig, KafkaClientTls, KafkaConnectionConfig,
+    };
    use tests::kafka::common::KafkaTopicConfig;

    use super::*;
@@ -144,12 +149,31 @@ mod tests {
            replication_factor = 1
            create_topic_timeout = "30s"
            topic_name_prefix = "greptimedb_wal_topic"
+            [tls]
+            server_ca_cert_path = "/path/to/server.pem"
+            [sasl]
+            type = "SCRAM-SHA-512"
+            username = "hi"
+            password = "test"
        "#;

        // Deserialized to MetasrvWalConfig.
        let metasrv_wal_config: MetasrvWalConfig = toml::from_str(toml_str).unwrap();
        let expected = MetasrvKafkaConfig {
-            broker_endpoints: vec!["127.0.0.1:9092".to_string()],
+            connection: KafkaConnectionConfig {
+                broker_endpoints: vec!["127.0.0.1:9092".to_string()],
+                sasl: Some(KafkaClientSasl {
+                    config: KafkaClientSaslConfig::ScramSha512 {
+                        username: "hi".to_string(),
+                        password: "test".to_string(),
+                    },
+                }),
+                tls: Some(KafkaClientTls {
+                    server_ca_cert_path: Some("/path/to/server.pem".to_string()),
+                    client_cert_path: None,
+                    client_key_path: None,
+                }),
+            },
            backoff: BackoffConfig {
                init: Duration::from_millis(500),
                max: Duration::from_secs(10),
@@ -170,7 +194,20 @@ mod tests {
        // Deserialized to DatanodeWalConfig.
        let datanode_wal_config: DatanodeWalConfig = toml::from_str(toml_str).unwrap();
        let expected = DatanodeKafkaConfig {
-            broker_endpoints: vec!["127.0.0.1:9092".to_string()],
+            connection: KafkaConnectionConfig {
+                broker_endpoints: vec!["127.0.0.1:9092".to_string()],
+                sasl: Some(KafkaClientSasl {
+                    config: KafkaClientSaslConfig::ScramSha512 {
+                        username: "hi".to_string(),
+                        password: "test".to_string(),
+                    },
+                }),
+                tls: Some(KafkaClientTls {
+                    server_ca_cert_path: Some("/path/to/server.pem".to_string()),
+                    client_cert_path: None,
+                    client_key_path: None,
+                }),
+            },
            max_batch_bytes: ReadableSize::mb(1),
            consumer_wait_timeout: Duration::from_millis(100),
            backoff: BackoffConfig {
@@ -187,6 +224,7 @@ mod tests {
                replication_factor: 1,
                create_topic_timeout: Duration::from_secs(30),
            },
+            ..Default::default()
        };
        assert_eq!(datanode_wal_config, DatanodeWalConfig::Kafka(expected));
    }
--- a/src/common/wal/src/config/kafka/common.rs
+++ b/src/common/wal/src/config/kafka/common.rs
@@ -12,16 +12,22 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::io::Cursor;
+use std::sync::Arc;
 use std::time::Duration;

+use rskafka::client::{Credentials, SaslConfig};
+use rustls::{ClientConfig, RootCertStore};
 use serde::{Deserialize, Serialize};
 use serde_with::with_prefix;
+use snafu::{OptionExt, ResultExt};

-use crate::{TopicSelectorType, TOPIC_NAME_PREFIX};
+use crate::error::{self, Result};
+use crate::{TopicSelectorType, BROKER_ENDPOINT, TOPIC_NAME_PREFIX};

 with_prefix!(pub backoff_prefix "backoff_");

-/// Backoff configurations for kafka clients.
+/// Backoff configurations for kafka client.
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
 #[serde(default)]
 pub struct BackoffConfig {
@@ -49,6 +55,134 @@ impl Default for BackoffConfig {
    }
 }

+/// The SASL configurations for kafka client.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+pub struct KafkaClientSasl {
+    #[serde(flatten)]
+    pub config: KafkaClientSaslConfig,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+#[serde(tag = "type", rename_all = "SCREAMING-KEBAB-CASE")]
+pub enum KafkaClientSaslConfig {
+    Plain {
+        username: String,
+        password: String,
+    },
+    #[serde(rename = "SCRAM-SHA-256")]
+    ScramSha256 {
+        username: String,
+        password: String,
+    },
+    #[serde(rename = "SCRAM-SHA-512")]
+    ScramSha512 {
+        username: String,
+        password: String,
+    },
+}
+
+impl KafkaClientSaslConfig {
+    /// Converts to [`SaslConfig`].
+    pub fn into_sasl_config(self) -> SaslConfig {
+        match self {
+            KafkaClientSaslConfig::Plain { username, password } => {
+                SaslConfig::Plain(Credentials::new(username, password))
+            }
+            KafkaClientSaslConfig::ScramSha256 { username, password } => {
+                SaslConfig::ScramSha256(Credentials::new(username, password))
+            }
+            KafkaClientSaslConfig::ScramSha512 { username, password } => {
+                SaslConfig::ScramSha512(Credentials::new(username, password))
+            }
+        }
+    }
+}
+
+/// The TLS configurations for kafka client.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+pub struct KafkaClientTls {
+    pub server_ca_cert_path: Option<String>,
+    pub client_cert_path: Option<String>,
+    pub client_key_path: Option<String>,
+}
+
+impl KafkaClientTls {
+    /// Builds the [`ClientConfig`].
+    pub async fn to_tls_config(&self) -> Result<Arc<ClientConfig>> {
+        let builder = ClientConfig::builder();
+        let mut roots = RootCertStore::empty();
+
+        if let Some(server_ca_cert_path) = &self.server_ca_cert_path {
+            let root_cert_bytes =
+                tokio::fs::read(&server_ca_cert_path)
+                    .await
+                    .context(error::ReadFileSnafu {
+                        path: server_ca_cert_path,
+                    })?;
+            let mut cursor = Cursor::new(root_cert_bytes);
+            for cert in rustls_pemfile::certs(&mut cursor)
+                .collect::<std::result::Result<Vec<_>, _>>()
+                .context(error::ReadCertsSnafu {
+                    path: server_ca_cert_path,
+                })?
+            {
+                roots.add(cert).context(error::AddCertSnafu)?;
+            }
+        };
+        roots.add_parsable_certificates(
+            rustls_native_certs::load_native_certs().context(error::LoadSystemCertsSnafu)?,
+        );
+
+        let builder = builder.with_root_certificates(roots);
+        let config = if let (Some(cert_path), Some(key_path)) =
+            (&self.client_cert_path, &self.client_key_path)
+        {
+            let cert_bytes = tokio::fs::read(cert_path)
+                .await
+                .context(error::ReadFileSnafu { path: cert_path })?;
+            let client_certs = rustls_pemfile::certs(&mut Cursor::new(cert_bytes))
+                .collect::<std::result::Result<Vec<_>, _>>()
+                .context(error::ReadCertsSnafu { path: cert_path })?;
+            let key_bytes = tokio::fs::read(key_path)
+                .await
+                .context(error::ReadFileSnafu { path: key_path })?;
+            let client_key = rustls_pemfile::private_key(&mut Cursor::new(key_bytes))
+                .context(error::ReadKeySnafu { path: key_path })?
+                .context(error::KeyNotFoundSnafu { path: key_path })?;
+
+            builder
+                .with_client_auth_cert(client_certs, client_key)
+                .context(error::SetClientAuthCertSnafu)?
+        } else {
+            builder.with_no_client_auth()
+        };
+
+        Ok(Arc::new(config))
+    }
+}
+
+/// The connection configurations for kafka clients.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+#[serde(default)]
+pub struct KafkaConnectionConfig {
+    /// The broker endpoints of the Kafka cluster.
+    pub broker_endpoints: Vec<String>,
+    /// Client SASL.
+    pub sasl: Option<KafkaClientSasl>,
+    /// Client TLS config
+    pub tls: Option<KafkaClientTls>,
+}
+
+impl Default for KafkaConnectionConfig {
+    fn default() -> Self {
+        Self {
+            broker_endpoints: vec![BROKER_ENDPOINT.to_string()],
+            sasl: None,
+            tls: None,
+        }
+    }
+}
+
 /// Topic configurations for kafka clients.
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
 #[serde(default)]
--- a/src/common/wal/src/config/kafka/datanode.rs
+++ b/src/common/wal/src/config/kafka/datanode.rs
@@ -17,15 +17,16 @@ use std::time::Duration;
 use common_base::readable_size::ReadableSize;
 use serde::{Deserialize, Serialize};

+use super::common::KafkaConnectionConfig;
 use crate::config::kafka::common::{backoff_prefix, BackoffConfig, KafkaTopicConfig};
-use crate::BROKER_ENDPOINT;

 /// Kafka wal configurations for datanode.
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
 #[serde(default)]
 pub struct DatanodeKafkaConfig {
-    /// The broker endpoints of the Kafka cluster.
-    pub broker_endpoints: Vec<String>,
+    /// The kafka connection config.
+    #[serde(flatten)]
+    pub connection: KafkaConnectionConfig,
    /// TODO(weny): Remove the alias once we release v0.9.
    /// The max size of a single producer batch.
    #[serde(alias = "max_batch_size")]
@@ -39,17 +40,22 @@ pub struct DatanodeKafkaConfig {
    /// The kafka topic config.
    #[serde(flatten)]
    pub kafka_topic: KafkaTopicConfig,
+    pub create_index: bool,
+    #[serde(with = "humantime_serde")]
+    pub dump_index_interval: Duration,
 }

 impl Default for DatanodeKafkaConfig {
    fn default() -> Self {
        Self {
-            broker_endpoints: vec![BROKER_ENDPOINT.to_string()],
+            connection: KafkaConnectionConfig::default(),
            // Warning: Kafka has a default limit of 1MB per message in a topic.
            max_batch_bytes: ReadableSize::mb(1),
            consumer_wait_timeout: Duration::from_millis(100),
            backoff: BackoffConfig::default(),
            kafka_topic: KafkaTopicConfig::default(),
+            create_index: true,
+            dump_index_interval: Duration::from_secs(60),
        }
    }
 }
--- a/src/common/wal/src/config/kafka/metasrv.rs
+++ b/src/common/wal/src/config/kafka/metasrv.rs
@@ -14,15 +14,16 @@

 use serde::{Deserialize, Serialize};

+use super::common::KafkaConnectionConfig;
 use crate::config::kafka::common::{backoff_prefix, BackoffConfig, KafkaTopicConfig};
-use crate::BROKER_ENDPOINT;

 /// Kafka wal configurations for metasrv.
-#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Default)]
 #[serde(default)]
 pub struct MetasrvKafkaConfig {
-    /// The broker endpoints of the Kafka cluster.
-    pub broker_endpoints: Vec<String>,
+    /// The kafka connection config.
+    #[serde(flatten)]
+    pub connection: KafkaConnectionConfig,
    /// The backoff config.
    #[serde(flatten, with = "backoff_prefix")]
    pub backoff: BackoffConfig,
@@ -30,14 +31,3 @@ pub struct MetasrvKafkaConfig {
    #[serde(flatten)]
    pub kafka_topic: KafkaTopicConfig,
 }
-
-impl Default for MetasrvKafkaConfig {
-    fn default() -> Self {
-        let broker_endpoints = vec![BROKER_ENDPOINT.to_string()];
-        Self {
-            broker_endpoints,
-            backoff: BackoffConfig::default(),
-            kafka_topic: KafkaTopicConfig::default(),
-        }
-    }
-}
--- a/src/common/wal/src/config/kafka/standalone.rs
+++ b/src/common/wal/src/config/kafka/standalone.rs
@@ -1,72 +0,0 @@
-// Copyright 2023 Greptime Team
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::time::Duration;
-
-use common_base::readable_size::ReadableSize;
-use serde::{Deserialize, Serialize};
-
-use crate::config::kafka::common::{backoff_prefix, BackoffConfig};
-use crate::{TopicSelectorType, BROKER_ENDPOINT, TOPIC_NAME_PREFIX};
-
-/// Kafka wal configurations for standalone.
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
-#[serde(default)]
-pub struct StandaloneKafkaConfig {
-    /// The broker endpoints of the Kafka cluster.
-    pub broker_endpoints: Vec<String>,
-    /// Number of topics to be created upon start.
-    pub num_topics: usize,
-    /// The type of the topic selector with which to select a topic for a region.
-    pub selector_type: TopicSelectorType,
-    /// Topic name prefix.
-    pub topic_name_prefix: String,
-    /// Number of partitions per topic.
-    pub num_partitions: i32,
-    /// The replication factor of each topic.
-    pub replication_factor: i16,
-    /// The timeout of topic creation.
-    #[serde(with = "humantime_serde")]
-    pub create_topic_timeout: Duration,
-    /// TODO(weny): Remove the alias once we release v0.9.
-    /// The max size of a single producer batch.
-    #[serde(alias = "max_batch_size")]
-    pub max_batch_bytes: ReadableSize,
-    /// The consumer wait timeout.
-    #[serde(with = "humantime_serde")]
-    pub consumer_wait_timeout: Duration,
-    /// The backoff config.
-    #[serde(flatten, with = "backoff_prefix")]
-    pub backoff: BackoffConfig,
-}
-
-impl Default for StandaloneKafkaConfig {
-    fn default() -> Self {
-        let broker_endpoints = vec![BROKER_ENDPOINT.to_string()];
-        let replication_factor = broker_endpoints.len() as i16;
-        Self {
-            broker_endpoints,
-            num_topics: 64,
-            selector_type: TopicSelectorType::RoundRobin,
-            topic_name_prefix: TOPIC_NAME_PREFIX.to_string(),
-            num_partitions: 1,
-            replication_factor,
-            create_topic_timeout: Duration::from_secs(30),
-            // Warning: Kafka has a default limit of 1MB per message in a topic.
-            max_batch_bytes: ReadableSize::mb(1),
-            consumer_wait_timeout: Duration::from_millis(100),
-            backoff: BackoffConfig::default(),
-        }
-    }
-}
--- a/src/common/wal/src/error.rs
+++ b/src/common/wal/src/error.rs
@@ -13,7 +13,7 @@
 // limitations under the License.

 use common_macro::stack_trace_debug;
-use snafu::Snafu;
+use snafu::{Location, Snafu};

 #[derive(Snafu)]
 #[snafu(visibility(pub))]
@@ -24,10 +24,74 @@ pub enum Error {
        broker_endpoint: String,
        #[snafu(source)]
        error: std::io::Error,
+        #[snafu(implicit)]
+        location: Location,
    },

    #[snafu(display("Failed to find ipv4 endpoint: {:?}", broker_endpoint))]
-    EndpointIPV4NotFound { broker_endpoint: String },
+    EndpointIPV4NotFound {
+        broker_endpoint: String,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display("Failed to read file, path: {}", path))]
+    ReadFile {
+        path: String,
+        #[snafu(source)]
+        error: std::io::Error,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display("Failed to add root cert"))]
+    AddCert {
+        #[snafu(source)]
+        error: rustls::Error,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display("Failed to read cert, path: {}", path))]
+    ReadCerts {
+        path: String,
+        #[snafu(source)]
+        error: std::io::Error,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display("Failed to read key, path: {}", path))]
+    ReadKey {
+        path: String,
+        #[snafu(source)]
+        error: std::io::Error,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display("Failed to parse key, path: {}", path))]
+    KeyNotFound {
+        path: String,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display("Failed to set client auth cert"))]
+    SetClientAuthCert {
+        #[snafu(source)]
+        error: rustls::Error,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
+    #[snafu(display("Failed to load ca certs from system"))]
+    LoadSystemCerts {
+        #[snafu(source)]
+        error: std::io::Error,
+        #[snafu(implicit)]
+        location: Location,
+    },
 }

 pub type Result<T> = std::result::Result<T, Error>;
--- a/src/common/wal/src/lib.rs
+++ b/src/common/wal/src/lib.rs
@@ -61,6 +61,9 @@ async fn resolve_to_ipv4_one<T: AsRef<str>>(endpoint: T) -> Result<String> {
 mod tests {
    use std::assert_matches::assert_matches;

+    use common_telemetry::warn;
+    use rskafka::client::{Credentials, SaslConfig};
+
    use super::*;
    use crate::error::Error;

@@ -86,4 +89,44 @@ mod tests {
        let got = resolve_to_ipv4_one(host).await;
        assert_matches!(got.unwrap_err(), Error::ResolveEndpoint { .. });
    }
+
+    #[tokio::test]
+    async fn test_sasl() {
+        common_telemetry::init_default_ut_logging();
+        let Ok(broker_endpoints) = std::env::var("GT_KAFKA_SASL_ENDPOINTS") else {
+            warn!("The endpoints is empty, skipping the test 'test_sasl'");
+            return;
+        };
+        let broker_endpoints = broker_endpoints
+            .split(',')
+            .map(|s| s.trim().to_string())
+            .collect::<Vec<_>>();
+
+        let username = "user_kafka";
+        let password = "secret";
+        let _ = rskafka::client::ClientBuilder::new(broker_endpoints.clone())
+            .sasl_config(SaslConfig::Plain(Credentials::new(
+                username.to_string(),
+                password.to_string(),
+            )))
+            .build()
+            .await
+            .unwrap();
+        let _ = rskafka::client::ClientBuilder::new(broker_endpoints.clone())
+            .sasl_config(SaslConfig::ScramSha256(Credentials::new(
+                username.to_string(),
+                password.to_string(),
+            )))
+            .build()
+            .await
+            .unwrap();
+        let _ = rskafka::client::ClientBuilder::new(broker_endpoints)
+            .sasl_config(SaslConfig::ScramSha512(Credentials::new(
+                username.to_string(),
+                password.to_string(),
+            )))
+            .build()
+            .await
+            .unwrap();
+    }
 }
--- a/src/datanode/src/config.rs
+++ b/src/datanode/src/config.rs
@@ -179,6 +179,8 @@ pub struct GcsConfig {
    pub scope: String,
    #[serde(skip_serializing)]
    pub credential_path: SecretString,
+    #[serde(skip_serializing)]
+    pub credential: SecretString,
    pub endpoint: String,
    #[serde(flatten)]
    pub cache: ObjectStorageCacheConfig,
@@ -190,6 +192,7 @@ impl PartialEq for GcsConfig {
            && self.bucket == other.bucket
            && self.scope == other.scope
            && self.credential_path.expose_secret() == other.credential_path.expose_secret()
+            && self.credential.expose_secret() == other.credential.expose_secret()
            && self.endpoint == other.endpoint
            && self.cache == other.cache
    }
@@ -243,6 +246,7 @@ impl Default for GcsConfig {
            bucket: String::default(),
            scope: String::default(),
            credential_path: SecretString::from(String::default()),
+            credential: SecretString::from(String::default()),
            endpoint: String::default(),
            cache: ObjectStorageCacheConfig::default(),
        }
--- a/src/datanode/src/datanode.rs
+++ b/src/datanode/src/datanode.rs
@@ -16,6 +16,7 @@

 use std::path::Path;
 use std::sync::Arc;
+use std::time::Duration;

 use catalog::memory::MemoryCatalogManager;
 use common_base::Plugins;
@@ -32,6 +33,7 @@ use common_wal::config::DatanodeWalConfig;
 use file_engine::engine::FileRegionEngine;
 use futures_util::TryStreamExt;
 use log_store::kafka::log_store::KafkaLogStore;
+use log_store::kafka::{default_index_file, GlobalIndexCollector};
 use log_store::raft_engine::log_store::RaftEngineLogStore;
 use meta_client::MetaClientRef;
 use metric_engine::engine::MetricEngine;
@@ -64,7 +66,7 @@ use crate::event_listener::{
 use crate::greptimedb_telemetry::get_greptimedb_telemetry_task;
 use crate::heartbeat::HeartbeatTask;
 use crate::region_server::{DummyTableProviderFactory, RegionServer};
-use crate::store;
+use crate::store::{self, new_object_store_without_cache};

 /// Datanode service.
 pub struct Datanode {
@@ -398,15 +400,37 @@ impl DatanodeBuilder {
            )
            .await
            .context(BuildMitoEngineSnafu)?,
-            DatanodeWalConfig::Kafka(kafka_config) => MitoEngine::new(
-                &opts.storage.data_home,
-                config,
-                Self::build_kafka_log_store(kafka_config).await?,
-                object_store_manager,
-                plugins,
-            )
-            .await
-            .context(BuildMitoEngineSnafu)?,
+            DatanodeWalConfig::Kafka(kafka_config) => {
+                if kafka_config.create_index && opts.node_id.is_none() {
+                    warn!("The WAL index creation only available in distributed mode.")
+                }
+                let global_index_collector = if kafka_config.create_index && opts.node_id.is_some()
+                {
+                    let operator = new_object_store_without_cache(
+                        &opts.storage.store,
+                        &opts.storage.data_home,
+                    )
+                    .await?;
+                    let path = default_index_file(opts.node_id.unwrap());
+                    Some(Self::build_global_index_collector(
+                        kafka_config.dump_index_interval,
+                        operator,
+                        path,
+                    ))
+                } else {
+                    None
+                };
+
+                MitoEngine::new(
+                    &opts.storage.data_home,
+                    config,
+                    Self::build_kafka_log_store(kafka_config, global_index_collector).await?,
+                    object_store_manager,
+                    plugins,
+                )
+                .await
+                .context(BuildMitoEngineSnafu)?
+            }
        };
        Ok(mito_engine)
    }
@@ -438,14 +462,26 @@ impl DatanodeBuilder {
        Ok(Arc::new(logstore))
    }

-    /// Builds [KafkaLogStore].
-    async fn build_kafka_log_store(config: &DatanodeKafkaConfig) -> Result<Arc<KafkaLogStore>> {
-        KafkaLogStore::try_new(config)
+    /// Builds [`KafkaLogStore`].
+    async fn build_kafka_log_store(
+        config: &DatanodeKafkaConfig,
+        global_index_collector: Option<GlobalIndexCollector>,
+    ) -> Result<Arc<KafkaLogStore>> {
+        KafkaLogStore::try_new(config, global_index_collector)
            .await
            .map_err(Box::new)
            .context(OpenLogStoreSnafu)
            .map(Arc::new)
    }
+
+    /// Builds [`GlobalIndexCollector`]
+    fn build_global_index_collector(
+        dump_index_interval: Duration,
+        operator: object_store::ObjectStore,
+        path: String,
+    ) -> GlobalIndexCollector {
+        GlobalIndexCollector::new(dump_index_interval, operator, path)
+    }
 }

 /// Open all regions belong to this datanode.
--- a/src/datanode/src/error.rs
+++ b/src/datanode/src/error.rs
@@ -395,6 +395,20 @@ pub enum Error {
        #[snafu(implicit)]
        location: Location,
    },
+
+    #[snafu(display("Failed to setup plugin"))]
+    SetupPlugin {
+        #[snafu(implicit)]
+        location: Location,
+        source: BoxedError,
+    },
+
+    #[snafu(display("Failed to start plugin"))]
+    StartPlugin {
+        #[snafu(implicit)]
+        location: Location,
+        source: BoxedError,
+    },
 }

 pub type Result<T> = std::result::Result<T, Error>;
@@ -442,9 +456,12 @@ impl ErrorExt for Error {

            AsyncTaskExecute { source, .. } => source.status_code(),

-            CreateDir { .. } | RemoveDir { .. } | ShutdownInstance { .. } | DataFusion { .. } => {
-                StatusCode::Internal
-            }
+            CreateDir { .. }
+            | RemoveDir { .. }
+            | ShutdownInstance { .. }
+            | DataFusion { .. }
+            | SetupPlugin { .. }
+            | StartPlugin { .. } => StatusCode::Internal,

            RegionNotFound { .. } => StatusCode::RegionNotFound,
            RegionNotReady { .. } => StatusCode::RegionNotReady,
--- a/src/datanode/src/store.rs
+++ b/src/datanode/src/store.rs
@@ -29,18 +29,18 @@ use common_telemetry::{info, warn};
 use object_store::layers::{LruCacheLayer, RetryInterceptor, RetryLayer};
 use object_store::services::Fs;
 use object_store::util::{join_dir, normalize_dir, with_instrument_layers};
-use object_store::{Error, HttpClient, ObjectStore, ObjectStoreBuilder};
+use object_store::{Access, Error, HttpClient, ObjectStore, ObjectStoreBuilder};
 use snafu::prelude::*;

 use crate::config::{ObjectStoreConfig, DEFAULT_OBJECT_STORE_CACHE_SIZE};
 use crate::error::{self, Result};

-pub(crate) async fn new_object_store(
-    store: ObjectStoreConfig,
+pub(crate) async fn new_raw_object_store(
+    store: &ObjectStoreConfig,
    data_home: &str,
 ) -> Result<ObjectStore> {
    let data_home = normalize_dir(data_home);
-    let object_store = match &store {
+    let object_store = match store {
        ObjectStoreConfig::File(file_config) => {
            fs::new_fs_object_store(&data_home, file_config).await
        }
@@ -51,27 +51,61 @@ pub(crate) async fn new_object_store(
        }
        ObjectStoreConfig::Gcs(gcs_config) => gcs::new_gcs_object_store(gcs_config).await,
    }?;
+    Ok(object_store)
+}

+fn with_retry_layers(object_store: ObjectStore) -> ObjectStore {
+    object_store.layer(
+        RetryLayer::new()
+            .with_jitter()
+            .with_notify(PrintDetailedError),
+    )
+}
+
+pub(crate) async fn new_object_store_without_cache(
+    store: &ObjectStoreConfig,
+    data_home: &str,
+) -> Result<ObjectStore> {
+    let object_store = new_raw_object_store(store, data_home).await?;
    // Enable retry layer and cache layer for non-fs object storages
    let object_store = if !matches!(store, ObjectStoreConfig::File(..)) {
-        let object_store = create_object_store_with_cache(object_store, &store).await?;
-        object_store.layer(
-            RetryLayer::new()
-                .with_jitter()
-                .with_notify(PrintDetailedError),
-        )
+        // Adds retry layer
+        with_retry_layers(object_store)
    } else {
        object_store
    };

-    let store = with_instrument_layers(object_store, true);
-    Ok(store)
+    let object_store = with_instrument_layers(object_store, true);
+    Ok(object_store)
 }

-async fn create_object_store_with_cache(
-    object_store: ObjectStore,
-    store_config: &ObjectStoreConfig,
+pub(crate) async fn new_object_store(
+    store: ObjectStoreConfig,
+    data_home: &str,
 ) -> Result<ObjectStore> {
+    let object_store = new_raw_object_store(&store, data_home).await?;
+    // Enable retry layer and cache layer for non-fs object storages
+    let object_store = if !matches!(store, ObjectStoreConfig::File(..)) {
+        let object_store = if let Some(cache_layer) = build_cache_layer(&store).await? {
+            // Adds cache layer
+            object_store.layer(cache_layer)
+        } else {
+            object_store
+        };
+
+        // Adds retry layer
+        with_retry_layers(object_store)
+    } else {
+        object_store
+    };
+
+    let object_store = with_instrument_layers(object_store, true);
+    Ok(object_store)
+}
+
+async fn build_cache_layer(
+    store_config: &ObjectStoreConfig,
+) -> Result<Option<LruCacheLayer<impl Access>>> {
    let (cache_path, cache_capacity) = match store_config {
        ObjectStoreConfig::S3(s3_config) => {
            let path = s3_config.cache.cache_path.as_ref();
@@ -112,11 +146,11 @@ async fn create_object_store_with_cache(
        let atomic_temp_dir = join_dir(path, ".tmp/");
        clean_temp_dir(&atomic_temp_dir)?;

-        let cache_store = {
-            let mut builder = Fs::default();
-            builder.root(path).atomic_write_dir(&atomic_temp_dir);
-            builder.build().context(error::InitBackendSnafu)?
-        };
+        let cache_store = Fs::default()
+            .root(path)
+            .atomic_write_dir(&atomic_temp_dir)
+            .build()
+            .context(error::InitBackendSnafu)?;

        let cache_layer = LruCacheLayer::new(Arc::new(cache_store), cache_capacity.0 as usize)
            .await
@@ -127,9 +161,9 @@ async fn create_object_store_with_cache(
            path, cache_capacity
        );

-        Ok(object_store.layer(cache_layer))
+        Ok(Some(cache_layer))
    } else {
-        Ok(object_store)
+        Ok(None)
    }
 }

@@ -175,7 +209,6 @@ pub(crate) fn build_http_client() -> Result<HttpClient> {

    HttpClient::build(http_builder).context(error::InitBackendSnafu)
 }
-
 struct PrintDetailedError;

 // PrintDetailedError is a retry interceptor that prints error in Debug format in retrying.
--- a/src/datanode/src/store/azblob.rs
+++ b/src/datanode/src/store/azblob.rs
@@ -30,8 +30,7 @@ pub(crate) async fn new_azblob_object_store(azblob_config: &AzblobConfig) -> Res
        azblob_config.container, &root
    );

-    let mut builder = Azblob::default();
-    let _ = builder
+    let mut builder = Azblob::default()
        .root(&root)
        .container(&azblob_config.container)
        .endpoint(&azblob_config.endpoint)
@@ -40,8 +39,8 @@ pub(crate) async fn new_azblob_object_store(azblob_config: &AzblobConfig) -> Res
        .http_client(build_http_client()?);

    if let Some(token) = &azblob_config.sas_token {
-        let _ = builder.sas_token(token);
-    }
+        builder = builder.sas_token(token);
+    };

    Ok(ObjectStore::new(builder)
        .context(error::InitBackendSnafu)?
--- a/src/datanode/src/store/fs.rs
+++ b/src/datanode/src/store/fs.rs
@@ -35,8 +35,9 @@ pub(crate) async fn new_fs_object_store(
    let atomic_write_dir = join_dir(data_home, ".tmp/");
    store::clean_temp_dir(&atomic_write_dir)?;

-    let mut builder = Fs::default();
-    let _ = builder.root(data_home).atomic_write_dir(&atomic_write_dir);
+    let builder = Fs::default()
+        .root(data_home)
+        .atomic_write_dir(&atomic_write_dir);

    let object_store = ObjectStore::new(builder)
        .context(error::InitBackendSnafu)?
--- a/src/datanode/src/store/gcs.rs
+++ b/src/datanode/src/store/gcs.rs
@@ -29,12 +29,12 @@ pub(crate) async fn new_gcs_object_store(gcs_config: &GcsConfig) -> Result<Objec
        gcs_config.bucket, &root
    );

-    let mut builder = Gcs::default();
-    builder
+    let builder = Gcs::default()
        .root(&root)
        .bucket(&gcs_config.bucket)
        .scope(&gcs_config.scope)
        .credential_path(gcs_config.credential_path.expose_secret())
+        .credential(gcs_config.credential.expose_secret())
        .endpoint(&gcs_config.endpoint)
        .http_client(build_http_client()?);

--- a/src/datanode/src/store/oss.rs
+++ b/src/datanode/src/store/oss.rs
@@ -29,8 +29,7 @@ pub(crate) async fn new_oss_object_store(oss_config: &OssConfig) -> Result<Objec
        oss_config.bucket, &root
    );

-    let mut builder = Oss::default();
-    let _ = builder
+    let builder = Oss::default()
        .root(&root)
        .bucket(&oss_config.bucket)
        .endpoint(&oss_config.endpoint)
--- a/src/datanode/src/store/s3.rs
+++ b/src/datanode/src/store/s3.rs
@@ -30,8 +30,7 @@ pub(crate) async fn new_s3_object_store(s3_config: &S3Config) -> Result<ObjectSt
        s3_config.bucket, &root
    );

-    let mut builder = S3::default();
-    let _ = builder
+    let mut builder = S3::default()
        .root(&root)
        .bucket(&s3_config.bucket)
        .access_key_id(s3_config.access_key_id.expose_secret())
@@ -39,11 +38,11 @@ pub(crate) async fn new_s3_object_store(s3_config: &S3Config) -> Result<ObjectSt
        .http_client(build_http_client()?);

    if s3_config.endpoint.is_some() {
-        let _ = builder.endpoint(s3_config.endpoint.as_ref().unwrap());
-    }
+        builder = builder.endpoint(s3_config.endpoint.as_ref().unwrap());
+    };
    if s3_config.region.is_some() {
-        let _ = builder.region(s3_config.region.as_ref().unwrap());
-    }
+        builder = builder.region(s3_config.region.as_ref().unwrap());
+    };

    Ok(ObjectStore::new(builder)
        .context(error::InitBackendSnafu)?
--- a/src/datatypes/src/duration.rs
+++ b/src/datatypes/src/duration.rs
@@ -111,6 +111,24 @@ macro_rules! define_duration_with_unit {
                    val.0.value()
                }
            }
+
+            impl TryFrom<Value> for Option<[<Duration $unit>]> {
+                type Error = $crate::error::Error;
+
+                #[inline]
+                fn try_from(from: Value) -> std::result::Result<Self, Self::Error> {
+                    match from {
+                        Value::Duration(v) if v.unit() == TimeUnit::$unit => {
+                            Ok(Some([<Duration $unit>](v)))
+                        },
+                        Value::Null => Ok(None),
+                        _ => $crate::error::TryFromValueSnafu {
+                            reason: format!("{:?} is not a {}", from, stringify!([<Duration $unit>])),
+                        }
+                        .fail(),
+                    }
+                }
+            }
        }
    };
 }
--- a/src/datatypes/src/interval.rs
+++ b/src/datatypes/src/interval.rs
@@ -106,6 +106,24 @@ macro_rules! define_interval_with_unit {
                    val.0.[<to_ $native_ty>]()
                }
            }
+
+            impl TryFrom<Value> for Option<[<Interval $unit>]> {
+                type Error = $crate::error::Error;
+
+                #[inline]
+                fn try_from(from: Value) -> std::result::Result<Self, Self::Error> {
+                    match from {
+                        Value::Interval(v) if v.unit() == common_time::interval::IntervalUnit::$unit => {
+                            Ok(Some([<Interval $unit>](v)))
+                        },
+                        Value::Null => Ok(None),
+                        _ => $crate::error::TryFromValueSnafu {
+                            reason: format!("{:?} is not a {}", from, stringify!([<Interval $unit>])),
+                        }
+                        .fail(),
+                    }
+                }
+            }
        }
    };
 }
--- a/src/datatypes/src/time.rs
+++ b/src/datatypes/src/time.rs
@@ -109,6 +109,24 @@ macro_rules! define_time_with_unit {
                    val.0.value()
                }
            }
+
+            impl TryFrom<Value> for Option<[<Time $unit>]> {
+                type Error = $crate::error::Error;
+
+                #[inline]
+                fn try_from(from: Value) -> std::result::Result<Self, Self::Error> {
+                    match from {
+                        Value::Time(v) if *v.unit() == TimeUnit::$unit => {
+                            Ok(Some([<Time $unit>](v)))
+                        },
+                        Value::Null => Ok(None),
+                        _ => $crate::error::TryFromValueSnafu {
+                            reason: format!("{:?} is not a {}", from, stringify!([<Time $unit>])),
+                        }
+                        .fail(),
+                    }
+                }
+            }
        }
    };
 }
--- a/src/datatypes/src/timestamp.rs
+++ b/src/datatypes/src/timestamp.rs
@@ -111,6 +111,24 @@ macro_rules! define_timestamp_with_unit {
                    val.0.value()
                }
            }
+
+            impl TryFrom<Value> for Option<[<Timestamp $unit>]> {
+                type Error = $crate::error::Error;
+
+                #[inline]
+                fn try_from(from: Value) -> std::result::Result<Self, Self::Error> {
+                    match from {
+                        Value::Timestamp(v) if v.unit() == TimeUnit::$unit => {
+                            Ok(Some([<Timestamp $unit>](v)))
+                        },
+                        Value::Null => Ok(None),
+                        _ => $crate::error::TryFromValueSnafu {
+                            reason: format!("{:?} is not a {}", from, stringify!([<Timestamp $unit>])),
+                        }
+                        .fail(),
+                    }
+                }
+            }
        }
    };
 }
--- a/src/datatypes/src/vectors/helper.rs
+++ b/src/datatypes/src/vectors/helper.rs
@@ -27,6 +27,7 @@ use snafu::{OptionExt, ResultExt};

 use crate::data_type::ConcreteDataType;
 use crate::error::{self, ConvertArrowArrayToScalarsSnafu, Result};
+use crate::prelude::DataType;
 use crate::scalars::{Scalar, ScalarVectorBuilder};
 use crate::value::{ListValue, ListValueRef, Value};
 use crate::vectors::{
@@ -367,6 +368,16 @@ impl Helper {
        })
    }

+    /// Try to cast an vec of values into vector, fail if type is not the same across all values.
+    pub fn try_from_row_into_vector(row: &[Value], dt: &ConcreteDataType) -> Result<VectorRef> {
+        let mut builder = dt.create_mutable_vector(row.len());
+        for val in row {
+            builder.try_push_value_ref(val.as_value_ref())?;
+        }
+        let vector = builder.to_vector();
+        Ok(vector)
+    }
+
    /// Try to cast slice of `arrays` to vectors.
    pub fn try_into_vectors(arrays: &[ArrayRef]) -> Result<Vec<VectorRef>> {
        arrays.iter().map(Self::try_into_vector).collect()
@@ -681,4 +692,48 @@ mod tests {
            assert_eq!(Value::Interval(Interval::from_i128(2000)), vector.get(i));
        }
    }
+
+    fn check_try_from_row_to_vector(row: Vec<Value>, dt: &ConcreteDataType) {
+        let vector = Helper::try_from_row_into_vector(&row, dt).unwrap();
+        for (i, item) in row.iter().enumerate().take(vector.len()) {
+            assert_eq!(*item, vector.get(i));
+        }
+    }
+
+    fn check_into_and_from(array: impl Array + 'static) {
+        let array: ArrayRef = Arc::new(array);
+        let vector = Helper::try_into_vector(array.clone()).unwrap();
+        assert_eq!(&array, &vector.to_arrow_array());
+        let row: Vec<Value> = (0..array.len()).map(|i| vector.get(i)).collect();
+        let dt = vector.data_type();
+        check_try_from_row_to_vector(row, &dt);
+    }
+
+    #[test]
+    fn test_try_from_row_to_vector() {
+        check_into_and_from(NullArray::new(2));
+        check_into_and_from(BooleanArray::from(vec![true, false]));
+        check_into_and_from(Int8Array::from(vec![1, 2, 3]));
+        check_into_and_from(Int16Array::from(vec![1, 2, 3]));
+        check_into_and_from(Int32Array::from(vec![1, 2, 3]));
+        check_into_and_from(Int64Array::from(vec![1, 2, 3]));
+        check_into_and_from(UInt8Array::from(vec![1, 2, 3]));
+        check_into_and_from(UInt16Array::from(vec![1, 2, 3]));
+        check_into_and_from(UInt32Array::from(vec![1, 2, 3]));
+        check_into_and_from(UInt64Array::from(vec![1, 2, 3]));
+        check_into_and_from(Float32Array::from(vec![1.0, 2.0, 3.0]));
+        check_into_and_from(Float64Array::from(vec![1.0, 2.0, 3.0]));
+        check_into_and_from(StringArray::from(vec!["hello", "world"]));
+        check_into_and_from(Date32Array::from(vec![1, 2, 3]));
+        check_into_and_from(Date64Array::from(vec![1, 2, 3]));
+
+        check_into_and_from(TimestampSecondArray::from(vec![1, 2, 3]));
+        check_into_and_from(TimestampMillisecondArray::from(vec![1, 2, 3]));
+        check_into_and_from(TimestampMicrosecondArray::from(vec![1, 2, 3]));
+        check_into_and_from(TimestampNanosecondArray::from(vec![1, 2, 3]));
+        check_into_and_from(Time32SecondArray::from(vec![1, 2, 3]));
+        check_into_and_from(Time32MillisecondArray::from(vec![1, 2, 3]));
+        check_into_and_from(Time64MicrosecondArray::from(vec![1, 2, 3]));
+        check_into_and_from(Time64NanosecondArray::from(vec![1, 2, 3]));
+    }
 }
--- a/src/file-engine/src/engine.rs
+++ b/src/file-engine/src/engine.rs
@@ -90,7 +90,8 @@ impl RegionEngine for FileRegionEngine {
        request: ScanRequest,
    ) -> Result<RegionScannerRef, BoxedError> {
        let stream = self.handle_query(region_id, request).await?;
-        let scanner = Box::new(SinglePartitionScanner::new(stream));
+        // We don't support enabling append mode for file engine.
+        let scanner = Box::new(SinglePartitionScanner::new(stream, false));
        Ok(scanner)
    }

--- a/src/file-engine/src/test_util.rs
+++ b/src/file-engine/src/test_util.rs
@@ -26,8 +26,7 @@ use store_api::metadata::ColumnMetadata;
 pub fn new_test_object_store(prefix: &str) -> (TempDir, ObjectStore) {
    let dir = create_temp_dir(prefix);
    let store_dir = dir.path().to_string_lossy();
-    let mut builder = Fs::default();
-    let _ = builder.root(&store_dir);
+    let builder = Fs::default().root(&store_dir);
    (dir, ObjectStore::new(builder).unwrap().finish())
 }

--- a/src/flow/Cargo.toml
+++ b/src/flow/Cargo.toml
@@ -9,6 +9,7 @@ workspace = true

 [dependencies]
 api.workspace = true
+arrow.workspace = true
 arrow-schema.workspace = true
 async-recursion = "1.0"
 async-trait.workspace = true
@@ -44,12 +45,14 @@ greptime-proto.workspace = true
 # otherwise it is the same with upstream repo
 hydroflow = { git = "https://github.com/GreptimeTeam/hydroflow.git", branch = "main" }
 itertools.workspace = true
+lazy_static.workspace = true
 meta-client.workspace = true
 minstant = "0.1.7"
 nom = "7.1.3"
 num-traits = "0.2"
 operator.workspace = true
 partition.workspace = true
+prometheus.workspace = true
 prost.workspace = true
 query.workspace = true
 serde.workspace = true
--- a/src/flow/src/adapter.rs
+++ b/src/flow/src/adapter.rs
@@ -51,6 +51,9 @@ use crate::adapter::worker::{create_worker, Worker, WorkerHandle};
 use crate::compute::ErrCollector;
 use crate::error::{ExternalSnafu, InternalSnafu, TableNotFoundSnafu, UnexpectedSnafu};
 use crate::expr::GlobalId;
+use crate::metrics::{
+    METRIC_FLOW_INPUT_BUF_SIZE, METRIC_FLOW_INSERT_ELAPSED, METRIC_FLOW_RUN_INTERVAL_MS,
+};
 use crate::repr::{self, DiffRow, Row, BATCH_SIZE};
 use crate::transform::sql_to_flow_plan;

@@ -193,6 +196,15 @@ pub enum DiffRequest {
    Delete(Vec<(Row, repr::Timestamp)>),
 }

+impl DiffRequest {
+    pub fn len(&self) -> usize {
+        match self {
+            Self::Insert(v) => v.len(),
+            Self::Delete(v) => v.len(),
+        }
+    }
+}
+
 /// iterate through the diff row and form continuous diff row with same diff type
 pub fn diff_row_to_request(rows: Vec<DiffRow>) -> Vec<DiffRequest> {
    let mut reqs = Vec::new();
@@ -544,6 +556,7 @@ impl FlowWorkerManager {
            let new_wait = BATCH_SIZE * 1000 / avg_spd.max(1); //in ms
            let new_wait = Duration::from_millis(new_wait as u64).min(default_interval);
            trace!("Wait for {} ms, row_cnt={}", new_wait.as_millis(), row_cnt);
+            METRIC_FLOW_RUN_INTERVAL_MS.set(new_wait.as_millis() as i64);
            since_last_run = tokio::time::Instant::now();
            tokio::time::sleep(new_wait).await;
        }
@@ -575,7 +588,7 @@ impl FlowWorkerManager {
                }
            }
            // check row send and rows remain in send buf
-            let (flush_res, buf_len) = if blocking {
+            let (flush_res, _buf_len) = if blocking {
                let ctx = self.node_context.read().await;
                (ctx.flush_all_sender().await, ctx.get_send_buf_size().await)
            } else {
@@ -585,16 +598,19 @@ impl FlowWorkerManager {
                }
            };
            match flush_res {
-                Ok(r) => row_cnt += r,
+                Ok(r) => {
+                    common_telemetry::trace!("Flushed {} rows", r);
+                    row_cnt += r;
+                    // send buf is likely to be somewhere empty now, wait
+                    if r < BATCH_SIZE / 2 {
+                        break;
+                    }
+                }
                Err(err) => {
                    common_telemetry::error!("Flush send buf errors: {:?}", err);
                    break;
                }
            };
-            // if not enough rows, break
-            if buf_len < BATCH_SIZE {
-                break;
-            }
        }

        Ok(row_cnt)
@@ -606,13 +622,17 @@ impl FlowWorkerManager {
        region_id: RegionId,
        rows: Vec<DiffRow>,
    ) -> Result<(), Error> {
-        debug!(
-            "Handling write request for region_id={:?} with {} rows",
-            region_id,
-            rows.len()
-        );
+        let rows_len = rows.len();
        let table_id = region_id.table_id();
+        METRIC_FLOW_INPUT_BUF_SIZE.add(rows_len as _);
+        let _timer = METRIC_FLOW_INSERT_ELAPSED
+            .with_label_values(&[table_id.to_string().as_str()])
+            .start_timer();
        self.node_context.read().await.send(table_id, rows).await?;
+        debug!(
+            "Handling write request for table_id={} with {} rows",
+            table_id, rows_len
+        );
        Ok(())
    }
 }
--- a/src/flow/src/adapter/flownode_impl.rs
+++ b/src/flow/src/adapter/flownode_impl.rs
@@ -30,6 +30,7 @@ use store_api::storage::RegionId;

 use crate::adapter::FlowWorkerManager;
 use crate::error::InternalSnafu;
+use crate::metrics::METRIC_FLOW_TASK_COUNT;
 use crate::repr::{self, DiffRow};

 fn to_meta_err(err: crate::error::Error) -> common_meta::error::Error {
@@ -78,6 +79,7 @@ impl Flownode for FlowWorkerManager {
                    )
                    .await
                    .map_err(to_meta_err)?;
+                METRIC_FLOW_TASK_COUNT.inc();
                Ok(FlowResponse {
                    affected_flows: ret
                        .map(|id| greptime_proto::v1::FlowId { id: id as u32 })
@@ -92,6 +94,7 @@ impl Flownode for FlowWorkerManager {
                self.remove_flow(flow_id.id as u64)
                    .await
                    .map_err(to_meta_err)?;
+                METRIC_FLOW_TASK_COUNT.dec();
                Ok(Default::default())
            }
            Some(flow_request::Body::Flush(FlushFlow {
--- a/src/flow/src/adapter/node_context.rs
+++ b/src/flow/src/adapter/node_context.rs
@@ -15,6 +15,7 @@
 //! Node context, prone to change with every incoming requests

 use std::collections::{BTreeMap, BTreeSet, HashMap};
+use std::sync::atomic::AtomicUsize;
 use std::sync::Arc;

 use common_telemetry::debug;
@@ -27,7 +28,8 @@ use crate::adapter::{FlowId, TableName, TableSource};
 use crate::error::{Error, EvalSnafu, TableNotFoundSnafu};
 use crate::expr::error::InternalSnafu;
 use crate::expr::GlobalId;
-use crate::repr::{DiffRow, RelationDesc, BROADCAST_CAP};
+use crate::metrics::METRIC_FLOW_INPUT_BUF_SIZE;
+use crate::repr::{DiffRow, RelationDesc, BROADCAST_CAP, SEND_BUF_CAP};

 /// A context that holds the information of the dataflow
 #[derive(Default, Debug)]
@@ -67,18 +69,20 @@ pub struct FlownodeContext {
 pub struct SourceSender {
    // TODO(discord9): make it all Vec<DiffRow>?
    sender: broadcast::Sender<DiffRow>,
-    send_buf_tx: mpsc::UnboundedSender<Vec<DiffRow>>,
-    send_buf_rx: RwLock<mpsc::UnboundedReceiver<Vec<DiffRow>>>,
+    send_buf_tx: mpsc::Sender<Vec<DiffRow>>,
+    send_buf_rx: RwLock<mpsc::Receiver<Vec<DiffRow>>>,
+    send_buf_row_cnt: AtomicUsize,
 }

 impl Default for SourceSender {
    fn default() -> Self {
-        let (send_buf_tx, send_buf_rx) = mpsc::unbounded_channel();
+        let (send_buf_tx, send_buf_rx) = mpsc::channel(SEND_BUF_CAP);
        Self {
            // TODO(discord9): found a better way then increase this to prevent lagging and hence missing input data
            sender: broadcast::Sender::new(BROADCAST_CAP * 2),
            send_buf_tx,
            send_buf_rx: RwLock::new(send_buf_rx),
+            send_buf_row_cnt: AtomicUsize::new(0),
        }
    }
 }
@@ -94,15 +98,18 @@ impl SourceSender {
    /// until send buf is empty or broadchannel is full
    pub async fn try_flush(&self) -> Result<usize, Error> {
        let mut row_cnt = 0;
-        let mut iterations = 0;
-        while iterations < Self::MAX_ITERATIONS {
+        loop {
            let mut send_buf = self.send_buf_rx.write().await;
            // if inner sender channel is empty or send buf is empty, there
            // is nothing to do for now, just break
            if self.sender.len() >= BROADCAST_CAP || send_buf.is_empty() {
                break;
            }
+            // TODO(discord9): send rows instead so it's just moving a point
            if let Some(rows) = send_buf.recv().await {
+                let len = rows.len();
+                self.send_buf_row_cnt
+                    .fetch_sub(len, std::sync::atomic::Ordering::SeqCst);
                for row in rows {
                    self.sender
                        .send(row)
@@ -116,10 +123,10 @@ impl SourceSender {
                    row_cnt += 1;
                }
            }
-            iterations += 1;
        }
        if row_cnt > 0 {
            debug!("Send {} rows", row_cnt);
+            METRIC_FLOW_INPUT_BUF_SIZE.sub(row_cnt as _);
            debug!(
                "Remaining Send buf.len() = {}",
                self.send_buf_rx.read().await.len()
@@ -131,13 +138,12 @@ impl SourceSender {

    /// return number of rows it actual send(including what's in the buffer)
    pub async fn send_rows(&self, rows: Vec<DiffRow>) -> Result<usize, Error> {
-        self.send_buf_tx.send(rows).map_err(|e| {
+        self.send_buf_tx.send(rows).await.map_err(|e| {
            crate::error::InternalSnafu {
                reason: format!("Failed to send row, error = {:?}", e),
            }
            .build()
        })?;
-
        Ok(0)
    }
 }
@@ -153,7 +159,8 @@ impl FlownodeContext {
            .with_context(|| TableNotFoundSnafu {
                name: table_id.to_string(),
            })?;
-        // debug!("FlownodeContext::send: trying to send {} rows", rows.len());
+
+        debug!("FlownodeContext::send: trying to send {} rows", rows.len());
        sender.send_rows(rows).await
    }

@@ -169,6 +176,7 @@ impl FlownodeContext {
    }

    /// Return the sum number of rows in all send buf
+    /// TODO(discord9): remove this since we can't get correct row cnt anyway
    pub async fn get_send_buf_size(&self) -> usize {
        let mut sum = 0;
        for sender in self.source_sender.values() {
--- a/src/flow/src/compute/render.rs
+++ b/src/flow/src/compute/render.rs
@@ -16,32 +16,21 @@
 //!
 //! And the [`Context`] is the environment for the render process, it contains all the necessary information for the render process

-use std::cell::RefCell;
-use std::collections::{BTreeMap, VecDeque};
-use std::ops::Range;
-use std::rc::Rc;
+use std::collections::BTreeMap;

-use datatypes::data_type::ConcreteDataType;
-use datatypes::value::{ListValue, Value};
-use hydroflow::futures::SinkExt;
-use hydroflow::lattices::cc_traits::Get;
 use hydroflow::scheduled::graph::Hydroflow;
 use hydroflow::scheduled::graph_ext::GraphExt;
 use hydroflow::scheduled::port::{PortCtx, SEND};
 use itertools::Itertools;
-use snafu::{ensure, OptionExt, ResultExt};
+use snafu::OptionExt;

 use super::state::Scheduler;
 use crate::compute::state::DataflowState;
-use crate::compute::types::{Arranged, Collection, CollectionBundle, ErrCollector, Toff};
-use crate::error::{Error, EvalSnafu, InvalidQuerySnafu, NotImplementedSnafu, PlanSnafu};
-use crate::expr::error::{DataTypeSnafu, InternalSnafu};
-use crate::expr::{
-    self, EvalError, GlobalId, LocalId, MapFilterProject, MfpPlan, SafeMfpPlan, ScalarExpr,
-};
-use crate::plan::{AccumulablePlan, KeyValPlan, Plan, ReducePlan, TypedPlan};
-use crate::repr::{self, DiffRow, KeyValDiffRow, Row};
-use crate::utils::{ArrangeHandler, ArrangeReader, ArrangeWriter, Arrangement};
+use crate::compute::types::{Collection, CollectionBundle, ErrCollector, Toff};
+use crate::error::{Error, InvalidQuerySnafu, NotImplementedSnafu};
+use crate::expr::{self, GlobalId, LocalId};
+use crate::plan::{Plan, TypedPlan};
+use crate::repr::{self, DiffRow};

 mod map;
 mod reduce;
@@ -218,20 +207,17 @@ mod test {
    use std::cell::RefCell;
    use std::rc::Rc;

-    use common_time::DateTime;
-    use datatypes::data_type::ConcreteDataType;
    use hydroflow::scheduled::graph::Hydroflow;
    use hydroflow::scheduled::graph_ext::GraphExt;
    use hydroflow::scheduled::handoff::VecHandoff;
-    use pretty_assertions::{assert_eq, assert_ne};
+    use pretty_assertions::assert_eq;

    use super::*;
-    use crate::expr::BinaryFunc;
    use crate::repr::Row;
    pub fn run_and_check(
        state: &mut DataflowState,
        df: &mut Hydroflow,
-        time_range: Range<i64>,
+        time_range: std::ops::Range<i64>,
        expected: BTreeMap<i64, Vec<DiffRow>>,
        output: Rc<RefCell<Vec<DiffRow>>>,
    ) {
--- a/src/flow/src/compute/render/map.rs
+++ b/src/flow/src/compute/render/map.rs
@@ -24,7 +24,7 @@ use crate::compute::state::Scheduler;
 use crate::compute::types::{Arranged, Collection, CollectionBundle, ErrCollector, Toff};
 use crate::error::{Error, PlanSnafu};
 use crate::expr::{EvalError, MapFilterProject, MfpPlan, ScalarExpr};
-use crate::plan::{Plan, TypedPlan};
+use crate::plan::TypedPlan;
 use crate::repr::{self, DiffRow, KeyValDiffRow, Row};
 use crate::utils::ArrangeHandler;

@@ -206,8 +206,6 @@ fn eval_mfp_core(

 #[cfg(test)]
 mod test {
-    use std::cell::RefCell;
-    use std::rc::Rc;

    use datatypes::data_type::ConcreteDataType;
    use hydroflow::scheduled::graph::Hydroflow;
@@ -216,6 +214,7 @@ mod test {
    use crate::compute::render::test::{get_output_handle, harness_test_ctx, run_and_check};
    use crate::compute::state::DataflowState;
    use crate::expr::{self, BinaryFunc, GlobalId};
+    use crate::plan::Plan;
    use crate::repr::{ColumnType, RelationType};

    /// test if temporal filter works properly
--- a/src/flow/src/compute/render/reduce.rs
+++ b/src/flow/src/compute/render/reduce.rs
@@ -18,17 +18,15 @@ use std::ops::Range;
 use datatypes::data_type::ConcreteDataType;
 use datatypes::value::{ListValue, Value};
 use hydroflow::scheduled::graph_ext::GraphExt;
-use hydroflow::scheduled::port::{PortCtx, SEND};
 use itertools::Itertools;
 use snafu::{ensure, OptionExt, ResultExt};

 use crate::compute::render::{Context, SubgraphArg};
-use crate::compute::state::Scheduler;
 use crate::compute::types::{Arranged, Collection, CollectionBundle, ErrCollector, Toff};
 use crate::error::{Error, PlanSnafu};
 use crate::expr::error::{DataAlreadyExpiredSnafu, DataTypeSnafu, InternalSnafu};
-use crate::expr::{AggregateExpr, EvalError, ScalarExpr};
-use crate::plan::{AccumulablePlan, AggrWithIndex, KeyValPlan, Plan, ReducePlan, TypedPlan};
+use crate::expr::{EvalError, ScalarExpr};
+use crate::plan::{AccumulablePlan, AggrWithIndex, KeyValPlan, ReducePlan, TypedPlan};
 use crate::repr::{self, DiffRow, KeyValDiffRow, RelationType, Row};
 use crate::utils::{ArrangeHandler, ArrangeReader, ArrangeWriter, KeyExpiryManager};

@@ -790,8 +788,6 @@ fn from_val_to_slice_idx(
 // TODO(discord9): add tests for accum ser/de
 #[cfg(test)]
 mod test {
-    use std::cell::RefCell;
-    use std::rc::Rc;

    use common_time::{DateTime, Interval, Timestamp};
    use datatypes::data_type::{ConcreteDataType, ConcreteDataType as CDT};
@@ -800,7 +796,10 @@ mod test {
    use super::*;
    use crate::compute::render::test::{get_output_handle, harness_test_ctx, run_and_check};
    use crate::compute::state::DataflowState;
-    use crate::expr::{self, AggregateFunc, BinaryFunc, GlobalId, MapFilterProject, UnaryFunc};
+    use crate::expr::{
+        self, AggregateExpr, AggregateFunc, BinaryFunc, GlobalId, MapFilterProject, UnaryFunc,
+    };
+    use crate::plan::Plan;
    use crate::repr::{ColumnType, RelationType};

    /// SELECT sum(number) FROM numbers_with_ts GROUP BY tumble(ts, '1 second', '2021-07-01 00:00:00')
--- a/src/flow/src/compute/render/src_sink.rs
+++ b/src/flow/src/compute/render/src_sink.rs
@@ -16,7 +16,7 @@

 use std::collections::{BTreeMap, VecDeque};

-use common_telemetry::{debug, info};
+use common_telemetry::debug;
 use hydroflow::scheduled::graph_ext::GraphExt;
 use itertools::Itertools;
 use snafu::OptionExt;
@@ -27,7 +27,7 @@ use crate::compute::render::Context;
 use crate::compute::types::{Arranged, Collection, CollectionBundle, Toff};
 use crate::error::{Error, PlanSnafu};
 use crate::expr::error::InternalSnafu;
-use crate::expr::{EvalError, GlobalId};
+use crate::expr::EvalError;
 use crate::repr::{DiffRow, Row, BROADCAST_CAP};

 #[allow(clippy::mutable_key_type)]
--- a/src/flow/src/compute/state.rs
+++ b/src/flow/src/compute/state.rs
@@ -13,7 +13,7 @@
 // limitations under the License.

 use std::cell::RefCell;
-use std::collections::{BTreeMap, BTreeSet, VecDeque};
+use std::collections::{BTreeMap, VecDeque};
 use std::rc::Rc;

 use hydroflow::scheduled::graph::Hydroflow;
--- a/src/flow/src/compute/types.rs
+++ b/src/flow/src/compute/types.rs
@@ -22,12 +22,11 @@ use hydroflow::scheduled::handoff::TeeingHandoff;
 use hydroflow::scheduled::port::RecvPort;
 use hydroflow::scheduled::SubgraphId;
 use itertools::Itertools;
-use tokio::sync::{Mutex, RwLock};
+use tokio::sync::Mutex;

-use crate::compute::render::Context;
 use crate::expr::{EvalError, ScalarExpr};
 use crate::repr::DiffRow;
-use crate::utils::{ArrangeHandler, Arrangement};
+use crate::utils::ArrangeHandler;

 pub type Toff<T = DiffRow> = TeeingHandoff<T>;

--- a/src/flow/src/error.rs
+++ b/src/flow/src/error.rs
@@ -83,6 +83,14 @@ pub enum Error {
        location: Location,
    },

+    #[snafu(display("Failed to list flows in flownode={id:?}"))]
+    ListFlows {
+        id: Option<common_meta::FlownodeId>,
+        source: common_meta::error::Error,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
    #[snafu(display("Flow already exist, id={id}"))]
    FlowAlreadyExist {
        id: FlowId,
@@ -214,7 +222,8 @@ impl ErrorExt for Error {
            }
            Self::TableNotFound { .. }
            | Self::TableNotFoundMeta { .. }
-            | Self::FlowNotFound { .. } => StatusCode::TableNotFound,
+            | Self::FlowNotFound { .. }
+            | Self::ListFlows { .. } => StatusCode::TableNotFound,
            Self::InvalidQueryProst { .. }
            | &Self::InvalidQuery { .. }
            | &Self::Plan { .. }
--- a/src/flow/src/expr.rs
+++ b/src/flow/src/expr.rs
@@ -14,6 +14,7 @@

 //! for declare Expression in dataflow, including map, reduce, id and join(TODO!) etc.

+mod df_func;
 pub(crate) mod error;
 mod func;
 mod id;
@@ -22,9 +23,92 @@ mod relation;
 mod scalar;
 mod signature;

-pub(crate) use error::{EvalError, InvalidArgumentSnafu, OptimizeSnafu};
+use datatypes::prelude::DataType;
+use datatypes::vectors::VectorRef;
+pub(crate) use df_func::{DfScalarFunction, RawDfScalarFn};
+pub(crate) use error::{EvalError, InvalidArgumentSnafu};
 pub(crate) use func::{BinaryFunc, UnaryFunc, UnmaterializableFunc, VariadicFunc};
 pub(crate) use id::{GlobalId, Id, LocalId};
+use itertools::Itertools;
 pub(crate) use linear::{MapFilterProject, MfpPlan, SafeMfpPlan};
 pub(crate) use relation::{AggregateExpr, AggregateFunc};
-pub(crate) use scalar::{DfScalarFunction, RawDfScalarFn, ScalarExpr, TypedExpr};
+pub(crate) use scalar::{ScalarExpr, TypedExpr};
+use snafu::{ensure, ResultExt};
+
+use crate::expr::error::DataTypeSnafu;
+
+/// A batch of vectors with the same length but without schema, only useful in dataflow
+pub struct Batch {
+    batch: Vec<VectorRef>,
+    row_count: usize,
+}
+
+impl Batch {
+    pub fn new(batch: Vec<VectorRef>, row_count: usize) -> Self {
+        Self { batch, row_count }
+    }
+
+    pub fn batch(&self) -> &[VectorRef] {
+        &self.batch
+    }
+
+    pub fn row_count(&self) -> usize {
+        self.row_count
+    }
+
+    /// Slices the `Batch`, returning a new `Batch`.
+    ///
+    /// # Panics
+    /// This function panics if `offset + length > self.row_count()`.
+    pub fn slice(&self, offset: usize, length: usize) -> Batch {
+        let batch = self
+            .batch()
+            .iter()
+            .map(|v| v.slice(offset, length))
+            .collect_vec();
+        Batch::new(batch, length)
+    }
+
+    /// append another batch to self
+    pub fn append_batch(&mut self, other: Batch) -> Result<(), EvalError> {
+        ensure!(
+            self.batch.len() == other.batch.len(),
+            InvalidArgumentSnafu {
+                reason: format!(
+                    "Expect two batch to have same numbers of column, found {} and {} columns",
+                    self.batch.len(),
+                    other.batch.len()
+                )
+            }
+        );
+
+        let batch_builders = self
+            .batch
+            .iter()
+            .map(|v| {
+                v.data_type()
+                    .create_mutable_vector(self.row_count() + other.row_count())
+            })
+            .collect_vec();
+
+        let mut result = vec![];
+        let zelf_row_count = self.row_count();
+        let other_row_count = other.row_count();
+        for (idx, mut builder) in batch_builders.into_iter().enumerate() {
+            builder
+                .extend_slice_of(self.batch()[idx].as_ref(), 0, zelf_row_count)
+                .context(DataTypeSnafu {
+                    msg: "Failed to extend vector",
+                })?;
+            builder
+                .extend_slice_of(other.batch()[idx].as_ref(), 0, other_row_count)
+                .context(DataTypeSnafu {
+                    msg: "Failed to extend vector",
+                })?;
+            result.push(builder.to_vector());
+        }
+        self.batch = result;
+        self.row_count = zelf_row_count + other_row_count;
+        Ok(())
+    }
+}
--- a/src/flow/src/expr/df_func.rs
+++ b/src/flow/src/expr/df_func.rs
@@ -0,0 +1,293 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Porting Datafusion scalar function to our scalar function to be used in dataflow
+
+use std::sync::Arc;
+
+use arrow::array::RecordBatchOptions;
+use bytes::BytesMut;
+use common_error::ext::BoxedError;
+use common_recordbatch::DfRecordBatch;
+use common_telemetry::debug;
+use datafusion_physical_expr::PhysicalExpr;
+use datatypes::data_type::DataType;
+use datatypes::value::Value;
+use datatypes::vectors::VectorRef;
+use prost::Message;
+use snafu::{IntoError, ResultExt};
+use substrait::error::{DecodeRelSnafu, EncodeRelSnafu};
+use substrait::substrait_proto_df::proto::expression::ScalarFunction;
+
+use crate::error::Error;
+use crate::expr::error::{
+    ArrowSnafu, DatafusionSnafu as EvalDatafusionSnafu, EvalError, ExternalSnafu,
+    InvalidArgumentSnafu,
+};
+use crate::expr::{Batch, ScalarExpr};
+use crate::repr::RelationDesc;
+use crate::transform::{from_scalar_fn_to_df_fn_impl, FunctionExtensions};
+
+/// A way to represent a scalar function that is implemented in Datafusion
+#[derive(Debug, Clone)]
+pub struct DfScalarFunction {
+    /// The raw bytes encoded datafusion scalar function
+    pub(crate) raw_fn: RawDfScalarFn,
+    // TODO(discord9): directly from datafusion expr
+    /// The implementation of the function
+    pub(crate) fn_impl: Arc<dyn PhysicalExpr>,
+    /// The input schema of the function
+    pub(crate) df_schema: Arc<datafusion_common::DFSchema>,
+}
+
+impl DfScalarFunction {
+    pub fn new(raw_fn: RawDfScalarFn, fn_impl: Arc<dyn PhysicalExpr>) -> Result<Self, Error> {
+        Ok(Self {
+            df_schema: Arc::new(raw_fn.input_schema.to_df_schema()?),
+            raw_fn,
+            fn_impl,
+        })
+    }
+
+    pub async fn try_from_raw_fn(raw_fn: RawDfScalarFn) -> Result<Self, Error> {
+        Ok(Self {
+            fn_impl: raw_fn.get_fn_impl().await?,
+            df_schema: Arc::new(raw_fn.input_schema.to_df_schema()?),
+            raw_fn,
+        })
+    }
+
+    /// Evaluate a batch of expressions using input values
+    pub fn eval_batch(&self, batch: &Batch, exprs: &[ScalarExpr]) -> Result<VectorRef, EvalError> {
+        let row_count = batch.row_count();
+        let batch: Vec<_> = exprs
+            .iter()
+            .map(|expr| expr.eval_batch(batch))
+            .collect::<Result<_, _>>()?;
+
+        let schema = self.df_schema.inner().clone();
+
+        let arrays = batch
+            .iter()
+            .map(|array| array.to_arrow_array())
+            .collect::<Vec<_>>();
+        let rb = DfRecordBatch::try_new_with_options(schema, arrays, &RecordBatchOptions::new().with_row_count(Some(row_count))).map_err(|err| {
+            ArrowSnafu {
+                context:
+                    "Failed to create RecordBatch from values when eval_batch datafusion scalar function",
+            }
+            .into_error(err)
+        })?;
+
+        let len = rb.num_rows();
+
+        let res = self.fn_impl.evaluate(&rb).map_err(|err| {
+            EvalDatafusionSnafu {
+                raw: err,
+                context: "Failed to evaluate datafusion scalar function",
+            }
+            .build()
+        })?;
+        let res = common_query::columnar_value::ColumnarValue::try_from(&res)
+            .map_err(BoxedError::new)
+            .context(ExternalSnafu)?;
+        let res_vec = res
+            .try_into_vector(len)
+            .map_err(BoxedError::new)
+            .context(ExternalSnafu)?;
+
+        Ok(res_vec)
+    }
+
+    /// eval a list of expressions using input values
+    fn eval_args(values: &[Value], exprs: &[ScalarExpr]) -> Result<Vec<Value>, EvalError> {
+        exprs
+            .iter()
+            .map(|expr| expr.eval(values))
+            .collect::<Result<_, _>>()
+    }
+
+    // TODO(discord9): add RecordBatch support
+    pub fn eval(&self, values: &[Value], exprs: &[ScalarExpr]) -> Result<Value, EvalError> {
+        // first eval exprs to construct values to feed to datafusion
+        let values: Vec<_> = Self::eval_args(values, exprs)?;
+        if values.is_empty() {
+            return InvalidArgumentSnafu {
+                reason: "values is empty".to_string(),
+            }
+            .fail();
+        }
+        // TODO(discord9): make cols all array length of one
+        let mut cols = vec![];
+        for (idx, typ) in self
+            .raw_fn
+            .input_schema
+            .typ()
+            .column_types
+            .iter()
+            .enumerate()
+        {
+            let typ = typ.scalar_type();
+            let mut array = typ.create_mutable_vector(1);
+            array.push_value_ref(values[idx].as_value_ref());
+            cols.push(array.to_vector().to_arrow_array());
+        }
+        let schema = self.df_schema.inner().clone();
+        let rb = DfRecordBatch::try_new_with_options(
+            schema,
+            cols,
+            &RecordBatchOptions::new().with_row_count(Some(1)),
+        )
+        .map_err(|err| {
+            ArrowSnafu {
+                context:
+                    "Failed to create RecordBatch from values when eval datafusion scalar function",
+            }
+            .into_error(err)
+        })?;
+
+        let res = self.fn_impl.evaluate(&rb).map_err(|err| {
+            EvalDatafusionSnafu {
+                raw: err,
+                context: "Failed to evaluate datafusion scalar function",
+            }
+            .build()
+        })?;
+        let res = common_query::columnar_value::ColumnarValue::try_from(&res)
+            .map_err(BoxedError::new)
+            .context(ExternalSnafu)?;
+        let res_vec = res
+            .try_into_vector(1)
+            .map_err(BoxedError::new)
+            .context(ExternalSnafu)?;
+        let res_val = res_vec
+            .try_get(0)
+            .map_err(BoxedError::new)
+            .context(ExternalSnafu)?;
+        Ok(res_val)
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct RawDfScalarFn {
+    /// The raw bytes encoded datafusion scalar function
+    pub(crate) f: bytes::BytesMut,
+    /// The input schema of the function
+    pub(crate) input_schema: RelationDesc,
+    /// Extension contains mapping from function reference to function name
+    pub(crate) extensions: FunctionExtensions,
+}
+
+impl RawDfScalarFn {
+    pub fn from_proto(
+        f: &substrait::substrait_proto_df::proto::expression::ScalarFunction,
+        input_schema: RelationDesc,
+        extensions: FunctionExtensions,
+    ) -> Result<Self, Error> {
+        let mut buf = BytesMut::new();
+        f.encode(&mut buf)
+            .context(EncodeRelSnafu)
+            .map_err(BoxedError::new)
+            .context(crate::error::ExternalSnafu)?;
+        Ok(Self {
+            f: buf,
+            input_schema,
+            extensions,
+        })
+    }
+    async fn get_fn_impl(&self) -> Result<Arc<dyn PhysicalExpr>, Error> {
+        let f = ScalarFunction::decode(&mut self.f.as_ref())
+            .context(DecodeRelSnafu)
+            .map_err(BoxedError::new)
+            .context(crate::error::ExternalSnafu)?;
+        debug!("Decoded scalar function: {:?}", f);
+
+        let input_schema = &self.input_schema;
+        let extensions = &self.extensions;
+
+        from_scalar_fn_to_df_fn_impl(&f, input_schema, extensions).await
+    }
+}
+
+impl std::cmp::PartialEq for DfScalarFunction {
+    fn eq(&self, other: &Self) -> bool {
+        self.raw_fn.eq(&other.raw_fn)
+    }
+}
+
+// can't derive Eq because of Arc<dyn PhysicalExpr> not eq, so implement it manually
+impl std::cmp::Eq for DfScalarFunction {}
+
+impl std::cmp::PartialOrd for DfScalarFunction {
+    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+        Some(self.cmp(other))
+    }
+}
+impl std::cmp::Ord for DfScalarFunction {
+    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
+        self.raw_fn.cmp(&other.raw_fn)
+    }
+}
+impl std::hash::Hash for DfScalarFunction {
+    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+        self.raw_fn.hash(state);
+    }
+}
+
+#[cfg(test)]
+mod test {
+
+    use datatypes::prelude::ConcreteDataType;
+    use substrait::substrait_proto_df::proto::expression::literal::LiteralType;
+    use substrait::substrait_proto_df::proto::expression::{Literal, RexType};
+    use substrait::substrait_proto_df::proto::function_argument::ArgType;
+    use substrait::substrait_proto_df::proto::{Expression, FunctionArgument};
+
+    use super::*;
+    use crate::repr::{ColumnType, RelationType};
+
+    #[tokio::test]
+    async fn test_df_scalar_function() {
+        let raw_scalar_func = ScalarFunction {
+            function_reference: 0,
+            arguments: vec![FunctionArgument {
+                arg_type: Some(ArgType::Value(Expression {
+                    rex_type: Some(RexType::Literal(Literal {
+                        nullable: false,
+                        type_variation_reference: 0,
+                        literal_type: Some(LiteralType::I64(-1)),
+                    })),
+                })),
+            }],
+            output_type: None,
+            ..Default::default()
+        };
+        let input_schema = RelationDesc::try_new(
+            RelationType::new(vec![ColumnType::new_nullable(
+                ConcreteDataType::null_datatype(),
+            )]),
+            vec!["null_column".to_string()],
+        )
+        .unwrap();
+        let extensions = FunctionExtensions::from_iter(vec![(0, "abs")]);
+        let raw_fn = RawDfScalarFn::from_proto(&raw_scalar_func, input_schema, extensions).unwrap();
+        let df_func = DfScalarFunction::try_from_raw_fn(raw_fn).await.unwrap();
+        assert_eq!(
+            df_func
+                .eval(&[Value::Null], &[ScalarExpr::Column(0)])
+                .unwrap(),
+            Value::Int64(1)
+        );
+    }
+}
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Lei, HUANG	975b8c69e5	fix(sqlness): redact all volatile text (#4583 ) Commit Message: Add SQLNESS replacements for RoundRobinBatch and region patterns	2024-08-19 08:04:54 +00:00
Weny Xu	8036b44347	chore: setup kafka before downloading binary step (#4582 )	2024-08-19 06:44:33 +00:00
Zhenchi	4c72b3f3fe	chore: bump version to v0.9.2 (#4581 ) chore: bump version to 0.9.2 Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>	2024-08-19 06:11:36 +00:00
Weny Xu	76dc906574	feat(log_store): introduce the CollectionTask (#4530 ) * feat: introduce the `CollectionTask` * feat: add config of index collector * chore: remove unused code * feat: truncate indexes * chore: apply suggestions from CR * chore: update config examples * refactor: retrieve latest offset while dumping indexes * chore: print warn	2024-08-19 03:48:35 +00:00
Ran Joe	2a73e0937f	fix(common_version): short_version with empty branch (#4572 )	2024-08-19 03:14:49 +00:00
Zhenchi	c8de8b80f4	fix(fulltext-index): single segment is not sufficient for >50M rows SST (#4552 ) * fix(fulltext-index): single segment is not sufficient for a >50M rows SST Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * fix: update doc comment Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> --------- Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>	2024-08-16 09:14:33 +00:00
LFC	ec59ce5c9a	feat: able to handle concurrent region edit requests (#4569 ) * feat: able to handle concurrent region edit requests * resolve PR comments	2024-08-16 03:29:03 +00:00
liyang	f578155602	feat: add GcsConfig credential field (#4568 )	2024-08-16 03:11:20 +00:00
Weny Xu	d1472782d0	chore(log_store): remove redundant metrics (#4570 ) chore(log_store): remove unused metrics	2024-08-16 02:23:21 +00:00
Lanqing Yang	93be81c041	feat: implement postgres kvbackend (#4421 )	2024-08-14 22:49:32 +00:00
discord9	2c3fccb516	feat(flow): add `eval_batch` for ScalarExpr (#4551 ) * refactor: better perf flow * feat(WIP): batching proc * feat: UnaryFunc::eval_batch untested * feat: BinaryFunc::eval_batch untested * feat: VariadicFunc::eval_batch un tested * feat: literal eval_batch * refactor: move DfScalarFunc to separate file * chore: remove unused imports * feat: eval_batch df func&ifthen * chore: remove unused file * refactor: use Batch type * chore: remove unused * chore: remove a done TODO * refactor: per review * chore: import * refactor: eval_batch if then * chore: typo	2024-08-14 11:29:30 +00:00
Lei, HUANG	c1b1be47ba	fix: append table stats (#4561 ) * fix: append table stats * fix: clippy	2024-08-14 09:01:42 +00:00
Weny Xu	0f85037024	chore: remove unused code (#4559 )	2024-08-14 06:55:54 +00:00
discord9	f88705080b	chore: set topic to 3 for sqlness test (#4560 )	2024-08-14 06:32:26 +00:00
discord9	cbb06cd0c6	feat(flow): add some metrics (#4539 ) * feat: add some metrics * fix: tmp rate limiter * feat: add task count metrics * refactor: use bounded channel anyway * refactor: better metrics	2024-08-14 03:23:49 +00:00
discord9	b59a93dfbc	chore: Helper function to convert `Vec<Value>` to VectorRef (#4546 ) * chore: `try_from_row_into_vector` helper * test: try_from_row * refactor: simplify with builder * fix: deicmal set prec&scale * refactor: more simplify * refactor: use ref	2024-08-14 03:11:44 +00:00
localhost	202c730363	perf: Optimizing pipeline performance (#4390 ) * chore: improve pipeline performance * chore: use arc to improve time type * chore: improve pipeline coerce * chore: add vec refactor * chore: add vec pp * chore: improve pipeline * inprocess * chore: set log ingester use new pipeline * chore: fix some error by pr comment * chore: fix typo * chore: use enum_dispatch to simplify code * chore: some minor fix * chore: format code * chore: update by pr comment * chore: fix typo * chore: make clippy happy * chore: fix by pr comment * chore: remove epoch and date process add new timestamp process * chore: add more test for pipeline * chore: restore epoch and date processor * chore: compatibility issue * chore: fix by pr comment * chore: move the evaluation out of the loop * chore: fix by pr comment * chore: fix dissect output key filter * chore: fix transform output greptime value has order error * chore: keep pipeline transform output order * chore: revert tests * chore: simplify pipeline prepare implementation * chore: add test for timestamp pipelin processor * chore: make clippy happy * chore: replace is_some check to match --------- Co-authored-by: shuiyisong <xixing.sys@gmail.com>	2024-08-13 11:32:04 +00:00
zyy17	63e1892dc1	refactor(plugin): add SetupPlugin and StartPlugin error (#4554 )	2024-08-13 11:22:48 +00:00
Lei, HUANG	216bce6973	perf: count() for append-only tables (#4545 ) feat: support fast count() for append-only tables fix: total_rows stats in time series memtable * fix: sqlness result changes for SinglePartitionScanner -> StreamScanAdapter * fix: some cr comments	2024-08-13 09:27:50 +00:00
Yingwen	4466fee580	docs: update grafana readme (#4550 ) * docs: update grafana readme * docs: simplify example	2024-08-13 08:45:06 +00:00
shuiyisong	5aa4c70057	chore: update validator signature (#4548 )	2024-08-13 08:06:12 +00:00
Yingwen	72a1732fb4	docs: Adds more panels to grafana dashboards (#4540 ) * docs: update standalone grafana * docs: add more panels to grafana dashboards * docs: replace source name * docs: bump dashboard version * docs: update hit rate expr * docs: greptime_pod to instance, add panels for cache	2024-08-13 06:29:28 +00:00
Weny Xu	c821d21111	feat(log_store): introduce the `IndexCollector` (#4461 ) * feat: introduce the IndexCollector * refactor: separate BackgroundProducerWorker code into files * feat: introduce index related operations * feat: introduce the `GlobalIndexCollector` * refactor: move collector to index mod * refactor: refactor `GlobalIndexCollector` * chore: remove unused collector.rs * chore: add comments * chore: add comments * chore: apply suggestions from CR * chore: apply suggestions from CR	2024-08-13 06:15:24 +00:00
Weny Xu	2e2eacf3b2	feat: add SASL and TLS config for Kafka client (#4536 ) * feat: add SASL and TLS config * feat: add SASL/PLAIN and TLS config for Kafka client * chore: use `ring` * feat: support SASL SCRAM-SHA-256 and SCRAM-SHA-512 * fix: correct unit test * test: add integration test * chore: apply suggestions from CR * refactor: introduce `KafkaConnectionConfig` * chore: refine toml examples * docs: add missing fields * chore: refine examples * feat: allow no server ca cert * chore: refine examples * chore: fix clippy * feat: load system ca certs * chore: fmt toml * chore: unpin version * Update src/common/wal/src/error.rs Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com> --------- Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>	2024-08-12 12:27:11 +00:00
Ruihang Xia	9bcaeaaa0e	refactor: reuse aligned ts array in range manipulate exec (#4535 ) Signed-off-by: Ruihang Xia <waynestxia@gmail.com>	2024-08-12 06:26:11 +00:00
Weny Xu	90cfe276b4	chore: upload kind logs (#4544 )	2024-08-12 05:01:13 +00:00
JohnsonLee	6694d2a930	fix: change the type of oid in pg_namespace to u32 (#4541 ) * fix: change the type of oid in pg_namespace to u32 * fix: header and correct logic of update oid	2024-08-10 15:06:14 +00:00
Ning Sun	9532ffb954	fix: configuration example for selector (#4532 ) * fix: configuration example for selector * docs: update config docs * test: update unit tests for configuration in meta	2024-08-09 09:51:05 +00:00
Weny Xu	665b7e5c6e	perf: merge small byte ranges for optimized fetching (#4520 )	2024-08-09 08:17:54 +00:00
Weny Xu	27d9aa0f3b	fix: rollback only if dropping the metric physical table fails (#4525 ) * fix: rollback only if dropping the metric physical table fails * chore: apply suggestions from CR	2024-08-09 08:01:11 +00:00
discord9	8f3293d4fb	fix: larger stack size in debug mode (#4521 ) * fix: larger stack size in debug mode * chore: typo * chore: clippy * chore: per review * chore: rename thread * chore: per review * refactor: better looking cfg * chore: async main entry	2024-08-09 07:01:20 +00:00
LFC	7dd20b0348	chore: make mysql server version changable (#4531 )	2024-08-09 03:43:43 +00:00
zyy17	4c1a3f29c0	ci: download the latest stable released version by default and do some small refactoring (#4529 ) refactor: download the latest stable released version by default and do some small refactoring	2024-08-08 07:46:09 +00:00
Jeremyhi	0d70961448	feat: change the default selector to RoundRobin (#4528 ) * feat: change the default selector to rr * Update src/meta-srv/src/selector.rs * fix: unit test	2024-08-08 04:58:20 +00:00
LFC	a75cfaa516	chore: update snafu to make clippy happy (#4507 ) * chore: update snafu to make clippy happy * fix ci	2024-08-07 16:12:00 +00:00
Lei, HUANG	aa3f53f08a	fix: install script (#4527 ) fix: install script always install v0.9.0-nightly-20240709 instead of latest nightly	2024-08-07 14:07:32 +00:00
Ruihang Xia	8f0959fa9f	fix: fix incorrect result of topk with cte (#4523 ) * fix: fix incorrect result of topk with cte Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * update sqlness Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * clean up cargo toml Signed-off-by: Ruihang Xia <waynestxia@gmail.com> --------- Signed-off-by: Ruihang Xia <waynestxia@gmail.com>	2024-08-07 09:13:38 +00:00
Weny Xu	4a3982ca60	chore: use `configData` (#4522 ) * chore: use `configData` * chore: add an empty line	2024-08-07 07:43:04 +00:00
Yingwen	559219496d	ci: fix windows temp path (#4518 )	2024-08-06 13:53:12 +00:00
LFC	685aa7dd8f	ci: squeeze some disk space for complex fuzz tests (#4519 ) * ci: squeeze some disk space for complex fuzz tests * Update .github/workflows/develop.yml Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com> --------- Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>	2024-08-06 11:52:34 +00:00
Lei, HUANG	be5364a056	chore: support swcs as the short name for strict window compaction (#4517 )	2024-08-06 07:38:07 +00:00
Weny Xu	a25d9f736f	chore: set default `otlp_endpoint` (#4508 ) * chore: set default `otlp_endpoint` * fix: fix ci	2024-08-06 06:48:14 +00:00
dependabot[bot]	2cd4a78f17	build(deps): bump zerovec from 0.10.2 to 0.10.4 (#4335 ) Bumps [zerovec](https://github.com/unicode-org/icu4x) from 0.10.2 to 0.10.4. - [Release notes](https://github.com/unicode-org/icu4x/releases) - [Changelog](https://github.com/unicode-org/icu4x/blob/main/CHANGELOG.md) - [Commits](https://github.com/unicode-org/icu4x/commits/ind/zerovec@0.10.4) --- updated-dependencies: - dependency-name: zerovec dependency-type: indirect ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Weny Xu <wenymedia@gmail.com>	2024-08-06 00:40:03 +00:00
dependabot[bot]	188e182d75	build(deps): bump zerovec-derive from 0.10.2 to 0.10.3 (#4346 ) Bumps [zerovec-derive](https://github.com/unicode-org/icu4x) from 0.10.2 to 0.10.3. - [Release notes](https://github.com/unicode-org/icu4x/releases) - [Changelog](https://github.com/unicode-org/icu4x/blob/main/CHANGELOG.md) - [Commits](https://github.com/unicode-org/icu4x/commits/ind/zerovec-derive@0.10.3) --- updated-dependencies: - dependency-name: zerovec-derive dependency-type: indirect ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Weny Xu <wenymedia@gmail.com>	2024-08-05 23:58:30 +00:00
Yingwen	d64cc79ab4	docs: add v0.9.1 bench result (#4511 )	2024-08-05 16:53:32 +00:00
discord9	e6cc4df8c8	feat: flow recreate on reboot (#4509 ) * feat: flow reboot clean * refactor: per review * refactor: per review * test: sqlness flow reboot	2024-08-05 13:57:48 +00:00
LFC	803780030d	fix: too large shadow-rs consts (#4506 )	2024-08-05 07:05:14 +00:00
Weny Xu	79f10d0415	chore: reduce fuzz tests in CI (#4505 )	2024-08-05 06:56:41 +00:00
Weny Xu	3937e67694	feat: introduce new kafka topic consumer respecting WAL index (#4424 ) * feat: introduce new kafka topic consumer respecting WAL index * chore: fmt * chore: fmt toml * chore: add comments * feat: merge close ranges * fix: fix unit tests * chore: fix typos * chore: use loop * chore: use unstable sort * chore: use gt instead of gte * chore: add comments * chore: rename to `current_entry_id` * chore: apply suggestions from CR * chore: apply suggestions from CR * refactor: minor refactor * chore: apply suggestions from CR	2024-08-05 06:56:25 +00:00
Weny Xu	4c93fe6c2d	chore: bump rust-postgres to 0.7.11 (#4504 )	2024-08-05 04:26:46 +00:00
LFC	c4717abb68	chore: bump `shadow-rs` version to set the path to find the correct git repo (#4494 )	2024-08-05 02:24:12 +00:00
shuiyisong	3b701d8f5e	test: more on processors (#4493 ) * test: add date test * test: add epoch test * test: add letter test and complete some others * test: add urlencoding test * chore: typo	2024-08-04 08:29:31 +00:00
Weny Xu	cb4cffe636	chore: bump opendal version to 0.48 (#4499 )	2024-08-04 00:46:04 +00:00
Ruihang Xia	cc7f33c90c	fix(tql): avoid unwrap on parsing tql query (#4502 ) * fix(tql): avoid unwrap on parsing tql query Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * add unit test Signed-off-by: Ruihang Xia <waynestxia@gmail.com> --------- Signed-off-by: Ruihang Xia <waynestxia@gmail.com>	2024-08-03 20:58:53 +00:00
Ruihang Xia	fe1cfbf2b3	fix: partition column with mixed quoted and unquoted idents (#4491 ) * fix: partition column with mixed quoted and unquoted idents Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * update error message Signed-off-by: Ruihang Xia <waynestxia@gmail.com> --------- Signed-off-by: Ruihang Xia <waynestxia@gmail.com>	2024-08-02 09:06:31 +00:00
Yingwen	ded874da04	feat: enlarge default page cache size (#4490 )	2024-08-02 07:24:20 +00:00
Lei, HUANG	fe2d29a2a0	chore: bump version v0.9.1 (#4486 ) Update package versions to 0.9.1 - Bump version for multiple packages from 0.9.0 to 0.9.1 in Cargo.lock	2024-08-02 07:10:05 +00:00
Yingwen	b388829a96	fix: avoid total size overflow (#4487 ) feat: avoid total size overflow	2024-08-02 06:16:37 +00:00
zyy17	8e7c027bf5	ci: make docker image args configurable from env vars (#4484 ) refactor: make docker image args configurable from env vars	2024-08-02 03:17:09 +00:00