ingest jsonbench data

parse partial struct json datatype in create sql
chore: expose symbols (#7451 )
2025-12-23 06:30:05 +00:00 · 2025-12-22 19:32:03 +08:00 · 2025-12-22 05:39:03 +00:00 · 2025-12-22 05:13:39 +00:00 · 2025-12-19 13:16:15 +00:00 · 2025-12-19 07:36:44 +00:00
96 changed files with 3889 additions and 1293 deletions
--- a/.github/scripts/create-version.sh
+++ b/.github/scripts/create-version.sh
@@ -49,6 +49,17 @@ function create_version() {
      echo "GITHUB_REF_NAME is empty in push event" >&2
      exit 1
    fi
+    
+    # For tag releases, ensure GITHUB_REF_NAME matches the version in Cargo.toml
+    CARGO_VERSION=$(grep '^version = ' Cargo.toml | cut -d '"' -f 2 | head -n 1)
+    EXPECTED_REF_NAME="v${CARGO_VERSION}"
+    
+    if [ "$GITHUB_REF_NAME" != "$EXPECTED_REF_NAME" ]; then
+      echo "Error: GITHUB_REF_NAME '$GITHUB_REF_NAME' does not match Cargo.toml version 'v${CARGO_VERSION}'" >&2
+      echo "Expected tag name: '$EXPECTED_REF_NAME'" >&2
+      exit 1
+    fi
+    
    echo "$GITHUB_REF_NAME"
  elif [ "$GITHUB_EVENT_NAME" = workflow_dispatch ]; then
    echo "$NEXT_RELEASE_VERSION-$(git rev-parse --short HEAD)-$(date "+%Y%m%d-%s")"
--- a/.github/workflows/check-git-deps.yml
+++ b/.github/workflows/check-git-deps.yml
@@ -0,0 +1,154 @@
+name: Check Git Dependencies on Main Branch
+
+on:
+    pull_request:
+        branches: [main]
+        paths:
+            - 'Cargo.toml'
+    push:
+        branches: [main]
+        paths:
+            - 'Cargo.toml'
+
+jobs:
+    check-git-deps:
+        runs-on: ubuntu-latest
+
+        steps:
+            - name: Checkout repository
+              uses: actions/checkout@v6
+
+            - name: Check git dependencies
+              env:
+                  WHITELIST_DEPS: "greptime-proto,meter-core,meter-macros"
+              run: |
+                  #!/bin/bash
+                  set -e
+
+                  echo "Checking whitelisted git dependencies..."
+
+                  # Function to check if a commit is on main branch
+                  check_commit_on_main() {
+                      local repo_url="$1"
+                      local commit="$2"
+                      local repo_name=$(basename "$repo_url" .git)
+
+                      echo "Checking $repo_name"
+                      echo "Repo: $repo_url"
+                      echo "Commit: $commit"
+
+                      # Create a temporary directory for cloning
+                      local temp_dir=$(mktemp -d)
+
+                      # Clone the repository
+                      if git clone "$repo_url" "$temp_dir" 2>/dev/null; then
+                          cd "$temp_dir"
+
+                          # Try to determine the main branch name
+                          local main_branch="main"
+                          if ! git rev-parse --verify origin/main >/dev/null 2>&1; then
+                              if git rev-parse --verify origin/master >/dev/null 2>&1; then
+                                  main_branch="master"
+                              else
+                                  # Try to get the default branch
+                                  main_branch=$(git symbolic-ref refs/remotes/origin/HEAD | sed 's@^refs/remotes/origin/@@')
+                              fi
+                          fi
+
+                          echo "Main branch: $main_branch"
+
+                          # Check if commit exists
+                          if git cat-file -e "$commit" 2>/dev/null; then
+                              # Check if commit is on main branch
+                              if git merge-base --is-ancestor "$commit" "origin/$main_branch" 2>/dev/null; then
+                                  echo "PASS: Commit $commit is on $main_branch branch"
+                                  cd - >/dev/null
+                                  rm -rf "$temp_dir"
+                                  return 0
+                              else
+                                  echo "FAIL: Commit $commit is NOT on $main_branch branch"
+
+                                  # Try to find which branch contains this commit
+                                  local branch_name=$(git branch -r --contains "$commit" 2>/dev/null | head -1 | sed 's/^[[:space:]]*origin\///' | sed 's/[[:space:]]*$//')
+                                  if [[ -n "$branch_name" ]]; then
+                                      echo "Found on branch: $branch_name"
+                                  fi
+                                  cd - >/dev/null
+                                  rm -rf "$temp_dir"
+                                  return 1
+                              fi
+                          else
+                              echo "FAIL: Commit $commit not found in repository"
+                              cd - >/dev/null
+                              rm -rf "$temp_dir"
+                              return 1
+                          fi
+                      else
+                          echo "FAIL: Failed to clone $repo_url"
+                          rm -rf "$temp_dir"
+                          return 1
+                      fi
+                  }
+
+                  # Extract whitelisted git dependencies from Cargo.toml
+                  echo "Extracting git dependencies from Cargo.toml..."
+
+                  # Create temporary array to store dependencies
+                  declare -a deps=()
+
+                  # Build awk pattern from whitelist
+                  IFS=',' read -ra WHITELIST <<< "$WHITELIST_DEPS"
+                  awk_pattern=""
+                  for dep in "${WHITELIST[@]}"; do
+                      if [[ -n "$awk_pattern" ]]; then
+                          awk_pattern="$awk_pattern|"
+                      fi
+                      awk_pattern="$awk_pattern$dep"
+                  done
+
+                  # Extract whitelisted dependencies
+                  while IFS= read -r line; do
+                      if [[ -n "$line" ]]; then
+                          deps+=("$line")
+                      fi
+                  done < <(awk -v pattern="$awk_pattern" '
+                  $0 ~ pattern ".*git = \"https:/" {
+                      match($0, /git = "([^"]+)"/, arr)
+                      git_url = arr[1]
+                      if (match($0, /rev = "([^"]+)"/, rev_arr)) {
+                          rev = rev_arr[1]
+                          print git_url " " rev
+                      } else {
+                          # Check next line for rev
+                          getline
+                          if (match($0, /rev = "([^"]+)"/, rev_arr)) {
+                              rev = rev_arr[1]
+                              print git_url " " rev
+                          }
+                      }
+                  }
+                  ' Cargo.toml)
+
+                  echo "Found ${#deps[@]} dependencies to check:"
+                  for dep in "${deps[@]}"; do
+                      echo "  $dep"
+                  done
+
+                  failed=0
+
+                  for dep in "${deps[@]}"; do
+                      read -r repo_url commit <<< "$dep"
+                      if ! check_commit_on_main "$repo_url" "$commit"; then
+                          failed=1
+                      fi
+                  done
+
+                  echo "Check completed."
+
+                  if [[ $failed -eq 1 ]]; then
+                      echo "ERROR: Some git dependencies are not on their main branches!"
+                      echo "Please update the commits to point to main branch commits."
+                      exit 1
+                  else
+                      echo "SUCCESS: All git dependencies are on their main branches!"
+                  fi
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1786,6 +1786,7 @@ dependencies = [
 "common-recordbatch",
 "common-runtime",
 "common-telemetry",
+ "common-test-util",
 "common-time",
 "common-version",
 "common-wal",
@@ -2579,10 +2580,12 @@ dependencies = [
 name = "common-sql"
 version = "1.0.0-beta.3"
 dependencies = [
+ "arrow-schema",
 "common-base",
 "common-decimal",
 "common-error",
 "common-macro",
+ "common-telemetry",
 "common-time",
 "datafusion-sql",
 "datatypes",
@@ -4633,8 +4636,9 @@ dependencies = [

 [[package]]
 name = "etcd-client"
-version = "0.15.0"
-source = "git+https://github.com/GreptimeTeam/etcd-client?rev=f62df834f0cffda355eba96691fe1a9a332b75a7#f62df834f0cffda355eba96691fe1a9a332b75a7"
+version = "0.16.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "88365f1a5671eb2f7fc240adb216786bc6494b38ce15f1d26ad6eaa303d5e822"
 dependencies = [
 "http 1.3.1",
 "prost 0.13.5",
@@ -5459,7 +5463,7 @@ dependencies = [
 [[package]]
 name = "greptime-proto"
 version = "0.1.0"
-source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=0423fa30203187c75e2937a668df1da699c8b96c#0423fa30203187c75e2937a668df1da699c8b96c"
+source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=173efe5ec62722089db7c531c0b0d470a072b915#173efe5ec62722089db7c531c0b0d470a072b915"
 dependencies = [
 "prost 0.13.5",
 "prost-types 0.13.5",
@@ -9318,9 +9322,9 @@ dependencies = [

 [[package]]
 name = "pgwire"
-version = "0.36.3"
+version = "0.37.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "70a2bcdcc4b20a88e0648778ecf00415bbd5b447742275439c22176835056f99"
+checksum = "02d86d57e732d40382ceb9bfea80901d839bae8571aa11c06af9177aed9dfb6c"
 dependencies = [
 "async-trait",
 "base64 0.22.1",
@@ -9339,6 +9343,7 @@ dependencies = [
 "ryu",
 "serde",
 "serde_json",
+ "smol_str",
 "stringprep",
 "thiserror 2.0.17",
 "tokio",
@@ -11503,10 +11508,11 @@ checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc"

 [[package]]
 name = "serde"
-version = "1.0.219"
+version = "1.0.228"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
 dependencies = [
+ "serde_core",
 "serde_derive",
 ]

@@ -11521,10 +11527,19 @@ dependencies = [
 ]

 [[package]]
-name = "serde_derive"
-version = "1.0.219"
+name = "serde_core"
+version = "1.0.228"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
 dependencies = [
 "proc-macro2",
 "quote",
@@ -11999,6 +12014,16 @@ dependencies = [
 "serde",
 ]

+[[package]]
+name = "smol_str"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3498b0a27f93ef1402f20eefacfaa1691272ac4eca1cdc8c596cb0a245d6cbf5"
+dependencies = [
+ "borsh",
+ "serde_core",
+]
+
 [[package]]
 name = "snafu"
 version = "0.7.5"
@@ -12204,7 +12229,7 @@ dependencies = [
 [[package]]
 name = "sqlparser"
 version = "0.58.0"
-source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=4b519a5caa95472cc3988f5556813a583dd35af1#4b519a5caa95472cc3988f5556813a583dd35af1"
+source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=a0ce2bc6eb3e804532932f39833c32432f5c9a39#a0ce2bc6eb3e804532932f39833c32432f5c9a39"
 dependencies = [
 "lazy_static",
 "log",
@@ -12228,7 +12253,7 @@ dependencies = [
 [[package]]
 name = "sqlparser_derive"
 version = "0.3.0"
-source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=4b519a5caa95472cc3988f5556813a583dd35af1#4b519a5caa95472cc3988f5556813a583dd35af1"
+source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=a0ce2bc6eb3e804532932f39833c32432f5c9a39#a0ce2bc6eb3e804532932f39833c32432f5c9a39"
 dependencies = [
 "proc-macro2",
 "quote",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -143,14 +143,14 @@ derive_builder = "0.20"
 derive_more = { version = "2.1", features = ["full"] }
 dotenv = "0.15"
 either = "1.15"
-etcd-client = { git = "https://github.com/GreptimeTeam/etcd-client", rev = "f62df834f0cffda355eba96691fe1a9a332b75a7", features = [
+etcd-client = { version = "0.16.1", features = [
    "tls",
    "tls-roots",
 ] }
 fst = "0.4.7"
 futures = "0.3"
 futures-util = "0.3"
-greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "0423fa30203187c75e2937a668df1da699c8b96c" }
+greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "173efe5ec62722089db7c531c0b0d470a072b915" }
 hex = "0.4"
 http = "1"
 humantime = "2.1"
@@ -332,7 +332,7 @@ datafusion-physical-plan = { git = "https://github.com/GreptimeTeam/datafusion.g
 datafusion-datasource = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
 datafusion-sql = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
 datafusion-substrait = { git = "https://github.com/GreptimeTeam/datafusion.git", rev = "fd4b2abcf3c3e43e94951bda452c9fd35243aab0" }
-sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "4b519a5caa95472cc3988f5556813a583dd35af1" }                           # branch = "v0.58.x"
+sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "a0ce2bc6eb3e804532932f39833c32432f5c9a39" }                           # branch = "v0.58.x"

 [profile.release]
 debug = 1
--- a/config/config.md
+++ b/config/config.md
@@ -83,6 +83,8 @@
 | `wal.sync_period` | String | `10s` | Duration for fsyncing log files.<br/>**It's only used when the provider is `raft_engine`**. |
 | `wal.recovery_parallelism` | Integer | `2` | Parallelism during WAL recovery. |
 | `wal.broker_endpoints` | Array | -- | The Kafka broker endpoints.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.connect_timeout` | String | `3s` | The connect timeout for kafka client.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.timeout` | String | `3s` | The timeout for kafka client.<br/>**It's only used when the provider is `kafka`**. |
 | `wal.auto_create_topics` | Bool | `true` | Automatically create topics for WAL.<br/>Set to `true` to automatically create topics for WAL.<br/>Otherwise, use topics named `topic_name_prefix_[0..num_topics)` |
 | `wal.num_topics` | Integer | `64` | Number of topics.<br/>**It's only used when the provider is `kafka`**. |
 | `wal.selector_type` | String | `round_robin` | Topic selector type.<br/>Available selector types:<br/>- `round_robin` (default)<br/>**It's only used when the provider is `kafka`**. |
@@ -352,6 +354,7 @@
 | `region_failure_detector_initialization_delay` | String | `10m` | The delay before starting region failure detection.<br/>This delay helps prevent Metasrv from triggering unnecessary region failovers before all Datanodes are fully started.<br/>Especially useful when the cluster is not deployed with GreptimeDB Operator and maintenance mode is not enabled. |
 | `allow_region_failover_on_local_wal` | Bool | `false` | Whether to allow region failover on local WAL.<br/>**This option is not recommended to be set to true, because it may lead to data loss during failover.** |
 | `node_max_idle_time` | String | `24hours` | Max allowed idle time before removing node info from metasrv memory. |
+| `heartbeat_interval` | String | `3s` | Base heartbeat interval for calculating distributed time constants.<br/>The frontend heartbeat interval is 6 times of the base heartbeat interval.<br/>The flownode/datanode heartbeat interval is 1 times of the base heartbeat interval.<br/>e.g., If the base heartbeat interval is 3s, the frontend heartbeat interval is 18s, the flownode/datanode heartbeat interval is 3s.<br/>If you change this value, you need to change the heartbeat interval of the flownode/frontend/datanode accordingly. |
 | `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. Enabled by default. |
 | `runtime` | -- | -- | The runtime options. |
 | `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
@@ -361,12 +364,18 @@
 | `backend_tls.cert_path` | String | `""` | Path to client certificate file (for client authentication)<br/>Like "/path/to/client.crt" |
 | `backend_tls.key_path` | String | `""` | Path to client private key file (for client authentication)<br/>Like "/path/to/client.key" |
 | `backend_tls.ca_cert_path` | String | `""` | Path to CA certificate file (for server certificate verification)<br/>Required when using custom CAs or self-signed certificates<br/>Leave empty to use system root certificates only<br/>Like "/path/to/ca.crt" |
+| `backend_client` | -- | -- | The backend client options.<br/>Currently, only applicable when using etcd as the metadata store. |
+| `backend_client.keep_alive_timeout` | String | `3s` | The keep alive timeout for backend client. |
+| `backend_client.keep_alive_interval` | String | `10s` | The keep alive interval for backend client. |
+| `backend_client.connect_timeout` | String | `3s` | The connect timeout for backend client. |
 | `grpc` | -- | -- | The gRPC server options. |
 | `grpc.bind_addr` | String | `127.0.0.1:3002` | The address to bind the gRPC server. |
 | `grpc.server_addr` | String | `127.0.0.1:3002` | The communication server address for the frontend and datanode to connect to metasrv.<br/>If left empty or unset, the server will automatically use the IP address of the first network interface<br/>on the host, with the same port number as the one specified in `bind_addr`. |
 | `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
 | `grpc.max_recv_message_size` | String | `512MB` | The maximum receive message size for gRPC server. |
 | `grpc.max_send_message_size` | String | `512MB` | The maximum send message size for gRPC server. |
+| `grpc.http2_keep_alive_interval` | String | `10s` | The server side HTTP/2 keep-alive interval |
+| `grpc.http2_keep_alive_timeout` | String | `3s` | The server side HTTP/2 keep-alive timeout. |
 | `http` | -- | -- | The HTTP server options. |
 | `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
 | `http.timeout` | String | `0s` | HTTP request timeout. Set to 0 to disable timeout. |
@@ -476,6 +485,8 @@
 | `wal.sync_period` | String | `10s` | Duration for fsyncing log files.<br/>**It's only used when the provider is `raft_engine`**. |
 | `wal.recovery_parallelism` | Integer | `2` | Parallelism during WAL recovery. |
 | `wal.broker_endpoints` | Array | -- | The Kafka broker endpoints.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.connect_timeout` | String | `3s` | The connect timeout for kafka client.<br/>**It's only used when the provider is `kafka`**. |
+| `wal.timeout` | String | `3s` | The timeout for kafka client.<br/>**It's only used when the provider is `kafka`**. |
 | `wal.max_batch_bytes` | String | `1MB` | The max size of a single producer batch.<br/>Warning: Kafka has a default limit of 1MB per message in a topic.<br/>**It's only used when the provider is `kafka`**. |
 | `wal.consumer_wait_timeout` | String | `100ms` | The consumer wait timeout.<br/>**It's only used when the provider is `kafka`**. |
 | `wal.create_index` | Bool | `true` | Whether to enable WAL index creation.<br/>**It's only used when the provider is `kafka`**. |
--- a/config/datanode.example.toml
+++ b/config/datanode.example.toml
@@ -169,6 +169,14 @@ recovery_parallelism = 2
 ## **It's only used when the provider is `kafka`**.
 broker_endpoints = ["127.0.0.1:9092"]

+## The connect timeout for kafka client.
+## **It's only used when the provider is `kafka`**.
+#+ connect_timeout = "3s"
+
+## The timeout for kafka client.
+## **It's only used when the provider is `kafka`**.
+#+ timeout = "3s"
+
 ## The max size of a single producer batch.
 ## Warning: Kafka has a default limit of 1MB per message in a topic.
 ## **It's only used when the provider is `kafka`**.
@@ -225,6 +233,7 @@ overwrite_entry_start_id = false
 # endpoint = "https://s3.amazonaws.com"
 # region = "us-west-2"
 # enable_virtual_host_style = false
+# disable_ec2_metadata = false

 # Example of using Oss as the storage.
 # [storage]
--- a/config/frontend.example.toml
+++ b/config/frontend.example.toml
@@ -131,7 +131,6 @@ key_path = ""
 ## For now, gRPC tls config does not support auto reload.
 watch = false

-
 ## MySQL server options.
 [mysql]
 ## Whether to enable.
--- a/config/metasrv.example.toml
+++ b/config/metasrv.example.toml
@@ -71,6 +71,13 @@ allow_region_failover_on_local_wal = false
 ## Max allowed idle time before removing node info from metasrv memory.
 node_max_idle_time = "24hours"

+## Base heartbeat interval for calculating distributed time constants.
+## The frontend heartbeat interval is 6 times of the base heartbeat interval.
+## The flownode/datanode heartbeat interval is 1 times of the base heartbeat interval.
+## e.g., If the base heartbeat interval is 3s, the frontend heartbeat interval is 18s, the flownode/datanode heartbeat interval is 3s.
+## If you change this value, you need to change the heartbeat interval of the flownode/frontend/datanode accordingly.
+#+ heartbeat_interval = "3s"
+
 ## Whether to enable greptimedb telemetry. Enabled by default.
 #+ enable_telemetry = true

@@ -109,6 +116,16 @@ key_path = ""
 ## Like "/path/to/ca.crt"
 ca_cert_path = ""

+## The backend client options.
+## Currently, only applicable when using etcd as the metadata store.
+#+ [backend_client]
+## The keep alive timeout for backend client.
+#+ keep_alive_timeout = "3s"
+## The keep alive interval for backend client.
+#+ keep_alive_interval = "10s"
+## The connect timeout for backend client.
+#+ connect_timeout = "3s"
+
 ## The gRPC server options.
 [grpc]
 ## The address to bind the gRPC server.
@@ -123,6 +140,10 @@ runtime_size = 8
 max_recv_message_size = "512MB"
 ## The maximum send message size for gRPC server.
 max_send_message_size = "512MB"
+## The server side HTTP/2 keep-alive interval
+#+ http2_keep_alive_interval = "10s"
+## The server side HTTP/2 keep-alive timeout.
+#+ http2_keep_alive_timeout = "3s"

 ## The HTTP server options.
 [http]
--- a/config/standalone.example.toml
+++ b/config/standalone.example.toml
@@ -230,6 +230,14 @@ recovery_parallelism = 2
 ## **It's only used when the provider is `kafka`**.
 broker_endpoints = ["127.0.0.1:9092"]

+## The connect timeout for kafka client.
+## **It's only used when the provider is `kafka`**.
+#+ connect_timeout = "3s"
+
+## The timeout for kafka client.
+## **It's only used when the provider is `kafka`**.
+#+ timeout = "3s"
+
 ## Automatically create topics for WAL.
 ## Set to `true` to automatically create topics for WAL.
 ## Otherwise, use topics named `topic_name_prefix_[0..num_topics)`
@@ -332,6 +340,7 @@ max_running_procedures = 128
 # endpoint = "https://s3.amazonaws.com"
 # region = "us-west-2"
 # enable_virtual_host_style = false
+# disable_ec2_metadata = false

 # Example of using Oss as the storage.
 # [storage]
--- a/src/cli/Cargo.toml
+++ b/src/cli/Cargo.toml
@@ -67,6 +67,7 @@ tracing-appender.workspace = true

 [dev-dependencies]
 common-meta = { workspace = true, features = ["testing"] }
+common-test-util.workspace = true
 common-version.workspace = true
 serde.workspace = true
 tempfile.workspace = true
--- a/src/cli/src/common.rs
+++ b/src/cli/src/common.rs
@@ -15,5 +15,8 @@
 mod object_store;
 mod store;

-pub use object_store::{ObjectStoreConfig, new_fs_object_store};
+pub use object_store::{
+    ObjectStoreConfig, PrefixedAzblobConnection, PrefixedGcsConnection, PrefixedOssConnection,
+    PrefixedS3Connection, new_fs_object_store,
+};
 pub use store::StoreConfig;
--- a/src/cli/src/common/object_store.rs
+++ b/src/cli/src/common/object_store.rs
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use common_base::secrets::SecretString;
+use common_base::secrets::{ExposeSecret, SecretString};
 use common_error::ext::BoxedError;
 use object_store::services::{Azblob, Fs, Gcs, Oss, S3};
 use object_store::util::{with_instrument_layers, with_retry_layers};
@@ -22,9 +22,69 @@ use snafu::ResultExt;

 use crate::error::{self};

+/// Trait to convert CLI field types to target struct field types.
+/// This enables `Option<SecretString>` (CLI) -> `SecretString` (target) conversions,
+/// allowing us to distinguish "not provided" from "provided but empty".
+trait IntoField<T> {
+    fn into_field(self) -> T;
+}
+
+/// Identity conversion for types that are the same.
+impl<T> IntoField<T> for T {
+    fn into_field(self) -> T {
+        self
+    }
+}
+
+/// Convert `Option<SecretString>` to `SecretString`, using default for None.
+impl IntoField<SecretString> for Option<SecretString> {
+    fn into_field(self) -> SecretString {
+        self.unwrap_or_default()
+    }
+}
+
+/// Trait for checking if a field is effectively empty.
+///
+/// **`is_empty()`**: Checks if the field has no meaningful value
+/// - Used when backend is enabled to validate required fields
+/// - `None`, `Some("")`, `false`, or `""` are considered empty
+trait FieldValidator {
+    /// Check if the field is empty (has no meaningful value).
+    fn is_empty(&self) -> bool;
+}
+
+/// String fields: empty if the string is empty
+impl FieldValidator for String {
+    fn is_empty(&self) -> bool {
+        self.is_empty()
+    }
+}
+
+/// Bool fields: false is considered "empty", true is "provided"
+impl FieldValidator for bool {
+    fn is_empty(&self) -> bool {
+        !self
+    }
+}
+
+/// Option<String> fields: None or empty content is empty
+impl FieldValidator for Option<String> {
+    fn is_empty(&self) -> bool {
+        self.as_ref().is_none_or(|s| s.is_empty())
+    }
+}
+
+/// Option<SecretString> fields: None or empty secret is empty
+/// For secrets, Some("") is treated as "not provided" for both checks
+impl FieldValidator for Option<SecretString> {
+    fn is_empty(&self) -> bool {
+        self.as_ref().is_none_or(|s| s.expose_secret().is_empty())
+    }
+}
+
 macro_rules! wrap_with_clap_prefix {
    (
-        $new_name:ident, $prefix:literal, $base:ty, {
+        $new_name:ident, $prefix:literal, $enable_flag:literal, $base:ty, {
            $( $( #[doc = $doc:expr] )? $( #[alias = $alias:literal] )? $field:ident : $type:ty $( = $default:expr )? ),* $(,)?
        }
    ) => {
@@ -34,15 +94,16 @@ macro_rules! wrap_with_clap_prefix {
                $(
                    $( #[doc = $doc] )?
                    $( #[clap(alias = $alias)] )?
-                    #[clap(long $(, default_value_t = $default )? )]
-                    [<$prefix $field>]: $type,
+                    #[clap(long, requires = $enable_flag $(, default_value_t = $default )? )]
+                    pub [<$prefix $field>]: $type,
                )*
            }

            impl From<$new_name> for $base {
                fn from(w: $new_name) -> Self {
                    Self {
-                        $( $field: w.[<$prefix $field>] ),*
+                        // Use into_field() to handle Option<SecretString> -> SecretString conversion
+                        $( $field: w.[<$prefix $field>].into_field() ),*
                    }
                }
            }
@@ -50,9 +111,90 @@ macro_rules! wrap_with_clap_prefix {
    };
 }

+/// Macro for declarative backend validation.
+///
+/// # Validation Rules
+///
+/// For each storage backend (S3, OSS, GCS, Azblob), this function validates:
+/// **When backend is enabled** (e.g., `--s3`): All required fields must be non-empty
+///
+/// Note: When backend is disabled, clap's `requires` attribute ensures no configuration
+/// fields can be provided at parse time.
+///
+/// # Syntax
+///
+/// ```ignore
+/// validate_backend!(
+///     enable: self.enable_s3,
+///     name: "S3",
+///     required: [(field1, "name1"), (field2, "name2"), ...],
+///     custom_validator: |missing| { ... }  // optional
+/// )
+/// ```
+///
+/// # Arguments
+///
+/// - `enable`: Boolean expression indicating if backend is enabled
+/// - `name`: Human-readable backend name for error messages
+/// - `required`: Array of (field_ref, field_name) tuples for required fields
+/// - `custom_validator`: Optional closure for complex validation logic
+///
+/// # Example
+///
+/// ```ignore
+/// validate_backend!(
+///     enable: self.enable_s3,
+///     name: "S3",
+///     required: [
+///         (&self.s3.s3_bucket, "bucket"),
+///         (&self.s3.s3_access_key_id, "access key ID"),
+///     ]
+/// )
+/// ```
+macro_rules! validate_backend {
+    (
+        enable: $enable:expr,
+        name: $backend_name:expr,
+        required: [ $( ($field:expr, $field_name:expr) ),* $(,)? ]
+        $(, custom_validator: $custom_validator:expr)?
+    ) => {{
+        if $enable {
+            // Check required fields when backend is enabled
+            let mut missing = Vec::new();
+            $(
+                if FieldValidator::is_empty($field) {
+                    missing.push($field_name);
+                }
+            )*
+
+            // Run custom validation if provided
+            $(
+                $custom_validator(&mut missing);
+            )?
+
+            if !missing.is_empty() {
+                return Err(BoxedError::new(
+                    error::MissingConfigSnafu {
+                        msg: format!(
+                            "{} {} must be set when --{} is enabled.",
+                            $backend_name,
+                            missing.join(", "),
+                            $backend_name.to_lowercase()
+                        ),
+                    }
+                    .build(),
+                ));
+            }
+        }
+
+        Ok(())
+    }};
+}
+
 wrap_with_clap_prefix! {
    PrefixedAzblobConnection,
    "azblob-",
+    "enable_azblob",
    AzblobConnection,
    {
        #[doc = "The container of the object store."]
@@ -60,9 +202,9 @@ wrap_with_clap_prefix! {
        #[doc = "The root of the object store."]
        root: String = Default::default(),
        #[doc = "The account name of the object store."]
-        account_name: SecretString = Default::default(),
+        account_name: Option<SecretString>,
        #[doc = "The account key of the object store."]
-        account_key: SecretString = Default::default(),
+        account_key: Option<SecretString>,
        #[doc = "The endpoint of the object store."]
        endpoint: String = Default::default(),
        #[doc = "The SAS token of the object store."]
@@ -70,9 +212,33 @@ wrap_with_clap_prefix! {
    }
 }

+impl PrefixedAzblobConnection {
+    pub fn validate(&self) -> Result<(), BoxedError> {
+        validate_backend!(
+            enable: true,
+            name: "AzBlob",
+            required: [
+                (&self.azblob_container, "container"),
+                (&self.azblob_root, "root"),
+                (&self.azblob_account_name, "account name"),
+                (&self.azblob_endpoint, "endpoint"),
+            ],
+            custom_validator: |missing: &mut Vec<&str>| {
+                // account_key is only required if sas_token is not provided
+                if self.azblob_sas_token.is_none()
+                    && self.azblob_account_key.is_empty()
+                {
+                    missing.push("account key (when sas_token is not provided)");
+                }
+            }
+        )
+    }
+}
+
 wrap_with_clap_prefix! {
    PrefixedS3Connection,
    "s3-",
+    "enable_s3",
    S3Connection,
    {
        #[doc = "The bucket of the object store."]
@@ -80,21 +246,39 @@ wrap_with_clap_prefix! {
        #[doc = "The root of the object store."]
        root: String = Default::default(),
        #[doc = "The access key ID of the object store."]
-        access_key_id: SecretString = Default::default(),
+        access_key_id: Option<SecretString>,
        #[doc = "The secret access key of the object store."]
-        secret_access_key: SecretString = Default::default(),
+        secret_access_key: Option<SecretString>,
        #[doc = "The endpoint of the object store."]
        endpoint: Option<String>,
        #[doc = "The region of the object store."]
        region: Option<String>,
        #[doc = "Enable virtual host style for the object store."]
        enable_virtual_host_style: bool = Default::default(),
+        #[doc = "Disable EC2 metadata service for the object store."]
+        disable_ec2_metadata: bool = Default::default(),
+    }
+}
+
+impl PrefixedS3Connection {
+    pub fn validate(&self) -> Result<(), BoxedError> {
+        validate_backend!(
+            enable: true,
+            name: "S3",
+            required: [
+                (&self.s3_bucket, "bucket"),
+                (&self.s3_access_key_id, "access key ID"),
+                (&self.s3_secret_access_key, "secret access key"),
+                (&self.s3_region, "region"),
+            ]
+        )
    }
 }

 wrap_with_clap_prefix! {
    PrefixedOssConnection,
    "oss-",
+    "enable_oss",
    OssConnection,
    {
        #[doc = "The bucket of the object store."]
@@ -102,17 +286,33 @@ wrap_with_clap_prefix! {
        #[doc = "The root of the object store."]
        root: String = Default::default(),
        #[doc = "The access key ID of the object store."]
-        access_key_id: SecretString = Default::default(),
+        access_key_id: Option<SecretString>,
        #[doc = "The access key secret of the object store."]
-        access_key_secret: SecretString = Default::default(),
+        access_key_secret: Option<SecretString>,
        #[doc = "The endpoint of the object store."]
        endpoint: String = Default::default(),
    }
 }

+impl PrefixedOssConnection {
+    pub fn validate(&self) -> Result<(), BoxedError> {
+        validate_backend!(
+            enable: true,
+            name: "OSS",
+            required: [
+                (&self.oss_bucket, "bucket"),
+                (&self.oss_access_key_id, "access key ID"),
+                (&self.oss_access_key_secret, "access key secret"),
+                (&self.oss_endpoint, "endpoint"),
+            ]
+        )
+    }
+}
+
 wrap_with_clap_prefix! {
    PrefixedGcsConnection,
    "gcs-",
+    "enable_gcs",
    GcsConnection,
    {
        #[doc = "The root of the object store."]
@@ -122,40 +322,72 @@ wrap_with_clap_prefix! {
        #[doc = "The scope of the object store."]
        scope: String = Default::default(),
        #[doc = "The credential path of the object store."]
-        credential_path: SecretString = Default::default(),
+        credential_path: Option<SecretString>,
        #[doc = "The credential of the object store."]
-        credential: SecretString = Default::default(),
+        credential: Option<SecretString>,
        #[doc = "The endpoint of the object store."]
        endpoint: String = Default::default(),
    }
 }

-/// common config for object store.
+impl PrefixedGcsConnection {
+    pub fn validate(&self) -> Result<(), BoxedError> {
+        validate_backend!(
+            enable: true,
+            name: "GCS",
+            required: [
+                (&self.gcs_bucket, "bucket"),
+                (&self.gcs_root, "root"),
+                (&self.gcs_scope, "scope"),
+            ]
+            // No custom_validator needed: GCS supports Application Default Credentials (ADC)
+            // where neither credential_path nor credential is required.
+            // Endpoint is also optional (defaults to https://storage.googleapis.com).
+        )
+    }
+}
+
+/// Common config for object store.
+///
+/// # Dependency Enforcement
+///
+/// Each backend's configuration fields (e.g., `--s3-bucket`) requires its corresponding
+/// enable flag (e.g., `--s3`) to be present. This is enforced by `clap` at parse time
+/// using the `requires` attribute.
+///
+/// For example, attempting to use `--s3-bucket my-bucket` without `--s3` will result in:
+/// ```text
+/// error: The argument '--s3-bucket <BUCKET>' requires '--s3'
+/// ```
+///
+/// This ensures that users cannot accidentally provide backend-specific configuration
+/// without explicitly enabling that backend.
 #[derive(clap::Parser, Debug, Clone, PartialEq, Default)]
+#[clap(group(clap::ArgGroup::new("storage_backend").required(false).multiple(false)))]
 pub struct ObjectStoreConfig {
    /// Whether to use S3 object store.
-    #[clap(long, alias = "s3")]
+    #[clap(long = "s3", group = "storage_backend")]
    pub enable_s3: bool,

    #[clap(flatten)]
    pub s3: PrefixedS3Connection,

    /// Whether to use OSS.
-    #[clap(long, alias = "oss")]
+    #[clap(long = "oss", group = "storage_backend")]
    pub enable_oss: bool,

    #[clap(flatten)]
    pub oss: PrefixedOssConnection,

    /// Whether to use GCS.
-    #[clap(long, alias = "gcs")]
+    #[clap(long = "gcs", group = "storage_backend")]
    pub enable_gcs: bool,

    #[clap(flatten)]
    pub gcs: PrefixedGcsConnection,

    /// Whether to use Azure Blob.
-    #[clap(long, alias = "azblob")]
+    #[clap(long = "azblob", group = "storage_backend")]
    pub enable_azblob: bool,

    #[clap(flatten)]
@@ -173,52 +405,66 @@ pub fn new_fs_object_store(root: &str) -> std::result::Result<ObjectStore, Boxed
    Ok(with_instrument_layers(object_store, false))
 }

+macro_rules! gen_object_store_builder {
+    ($method:ident, $field:ident, $conn_type:ty, $service_type:ty) => {
+        pub fn $method(&self) -> Result<ObjectStore, BoxedError> {
+            let config = <$conn_type>::from(self.$field.clone());
+            common_telemetry::info!(
+                "Building object store with {}: {:?}",
+                stringify!($field),
+                config
+            );
+            let object_store = ObjectStore::new(<$service_type>::from(&config))
+                .context(error::InitBackendSnafu)
+                .map_err(BoxedError::new)?
+                .finish();
+            Ok(with_instrument_layers(
+                with_retry_layers(object_store),
+                false,
+            ))
+        }
+    };
+}
+
 impl ObjectStoreConfig {
+    gen_object_store_builder!(build_s3, s3, S3Connection, S3);
+
+    gen_object_store_builder!(build_oss, oss, OssConnection, Oss);
+
+    gen_object_store_builder!(build_gcs, gcs, GcsConnection, Gcs);
+
+    gen_object_store_builder!(build_azblob, azblob, AzblobConnection, Azblob);
+
+    pub fn validate(&self) -> Result<(), BoxedError> {
+        if self.enable_s3 {
+            self.s3.validate()?;
+        }
+        if self.enable_oss {
+            self.oss.validate()?;
+        }
+        if self.enable_gcs {
+            self.gcs.validate()?;
+        }
+        if self.enable_azblob {
+            self.azblob.validate()?;
+        }
+        Ok(())
+    }
+
    /// Builds the object store from the config.
    pub fn build(&self) -> Result<Option<ObjectStore>, BoxedError> {
-        let object_store = if self.enable_s3 {
-            let s3 = S3Connection::from(self.s3.clone());
-            common_telemetry::info!("Building object store with s3: {:?}", s3);
-            Some(
-                ObjectStore::new(S3::from(&s3))
-                    .context(error::InitBackendSnafu)
-                    .map_err(BoxedError::new)?
-                    .finish(),
-            )
+        self.validate()?;
+
+        if self.enable_s3 {
+            self.build_s3().map(Some)
        } else if self.enable_oss {
-            let oss = OssConnection::from(self.oss.clone());
-            common_telemetry::info!("Building object store with oss: {:?}", oss);
-            Some(
-                ObjectStore::new(Oss::from(&oss))
-                    .context(error::InitBackendSnafu)
-                    .map_err(BoxedError::new)?
-                    .finish(),
-            )
+            self.build_oss().map(Some)
        } else if self.enable_gcs {
-            let gcs = GcsConnection::from(self.gcs.clone());
-            common_telemetry::info!("Building object store with gcs: {:?}", gcs);
-            Some(
-                ObjectStore::new(Gcs::from(&gcs))
-                    .context(error::InitBackendSnafu)
-                    .map_err(BoxedError::new)?
-                    .finish(),
-            )
+            self.build_gcs().map(Some)
        } else if self.enable_azblob {
-            let azblob = AzblobConnection::from(self.azblob.clone());
-            common_telemetry::info!("Building object store with azblob: {:?}", azblob);
-            Some(
-                ObjectStore::new(Azblob::from(&azblob))
-                    .context(error::InitBackendSnafu)
-                    .map_err(BoxedError::new)?
-                    .finish(),
-            )
+            self.build_azblob().map(Some)
        } else {
-            None
-        };
-
-        let object_store = object_store
-            .map(|object_store| with_instrument_layers(with_retry_layers(object_store), false));
-
-        Ok(object_store)
+            Ok(None)
+        }
    }
 }
--- a/src/cli/src/common/store.rs
+++ b/src/cli/src/common/store.rs
@@ -19,7 +19,7 @@ use common_error::ext::BoxedError;
 use common_meta::kv_backend::KvBackendRef;
 use common_meta::kv_backend::chroot::ChrootKvBackend;
 use common_meta::kv_backend::etcd::EtcdStore;
-use meta_srv::metasrv::BackendImpl;
+use meta_srv::metasrv::{BackendClientOptions, BackendImpl};
 use meta_srv::utils::etcd::create_etcd_client_with_tls;
 use servers::tls::{TlsMode, TlsOption};

@@ -112,9 +112,13 @@ impl StoreConfig {
            let kvbackend = match self.backend {
                BackendImpl::EtcdStore => {
                    let tls_config = self.tls_config();
-                    let etcd_client = create_etcd_client_with_tls(store_addrs, tls_config.as_ref())
-                        .await
-                        .map_err(BoxedError::new)?;
+                    let etcd_client = create_etcd_client_with_tls(
+                        store_addrs,
+                        &BackendClientOptions::default(),
+                        tls_config.as_ref(),
+                    )
+                    .await
+                    .map_err(BoxedError::new)?;
                    Ok(EtcdStore::with_etcd_client(etcd_client, max_txn_ops))
                }
                #[cfg(feature = "pg_kvbackend")]
--- a/src/cli/src/data.rs
+++ b/src/cli/src/data.rs
@@ -14,6 +14,7 @@

 mod export;
 mod import;
+mod storage_export;

 use clap::Subcommand;
 use client::DEFAULT_CATALOG_NAME;
--- a/src/cli/src/data/export.rs
+++ b/src/cli/src/data/export.rs
--- a/src/cli/src/data/storage_export.rs
+++ b/src/cli/src/data/storage_export.rs
@@ -0,0 +1,373 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::path::PathBuf;
+
+use common_base::secrets::{ExposeSecret, SecretString};
+use common_error::ext::BoxedError;
+
+use crate::common::{
+    PrefixedAzblobConnection, PrefixedGcsConnection, PrefixedOssConnection, PrefixedS3Connection,
+};
+
+/// Helper function to extract secret string from Option<SecretString>.
+/// Returns empty string if None.
+fn expose_optional_secret(secret: &Option<SecretString>) -> &str {
+    secret
+        .as_ref()
+        .map(|s| s.expose_secret().as_str())
+        .unwrap_or("")
+}
+
+/// Helper function to format root path with leading slash if non-empty.
+fn format_root_path(root: &str) -> String {
+    if root.is_empty() {
+        String::new()
+    } else {
+        format!("/{}", root)
+    }
+}
+
+/// Helper function to mask multiple secrets in a string.
+fn mask_secrets(mut sql: String, secrets: &[&str]) -> String {
+    for secret in secrets {
+        if !secret.is_empty() {
+            sql = sql.replace(secret, "[REDACTED]");
+        }
+    }
+    sql
+}
+
+/// Helper function to format storage URI.
+fn format_uri(scheme: &str, bucket: &str, root: &str, path: &str) -> String {
+    let root = format_root_path(root);
+    format!("{}://{}{}/{}", scheme, bucket, root, path)
+}
+
+/// Trait for storage backends that can be used for data export.
+pub trait StorageExport: Send + Sync {
+    /// Generate the storage path for COPY DATABASE command.
+    /// Returns (path, connection_string) where connection_string includes CONNECTION clause.
+    fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String);
+
+    /// Format the output path for logging purposes.
+    fn format_output_path(&self, file_path: &str) -> String;
+
+    /// Mask sensitive information in SQL commands for safe logging.
+    fn mask_sensitive_info(&self, sql: &str) -> String;
+}
+
+macro_rules! define_backend {
+    ($name:ident, $config:ty) => {
+        #[derive(Clone)]
+        pub struct $name {
+            config: $config,
+        }
+
+        impl $name {
+            pub fn new(config: $config) -> Result<Self, BoxedError> {
+                config.validate()?;
+                Ok(Self { config })
+            }
+        }
+    };
+}
+
+/// Local file system storage backend.
+#[derive(Clone)]
+pub struct FsBackend {
+    output_dir: String,
+}
+
+impl FsBackend {
+    pub fn new(output_dir: String) -> Self {
+        Self { output_dir }
+    }
+}
+
+impl StorageExport for FsBackend {
+    fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) {
+        if self.output_dir.is_empty() {
+            unreachable!("output_dir must be set when not using remote storage")
+        }
+        let path = PathBuf::from(&self.output_dir)
+            .join(catalog)
+            .join(format!("{schema}/"))
+            .to_string_lossy()
+            .to_string();
+        (path, String::new())
+    }
+
+    fn format_output_path(&self, file_path: &str) -> String {
+        format!("{}/{}", self.output_dir, file_path)
+    }
+
+    fn mask_sensitive_info(&self, sql: &str) -> String {
+        sql.to_string()
+    }
+}
+
+define_backend!(S3Backend, PrefixedS3Connection);
+
+impl StorageExport for S3Backend {
+    fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) {
+        let s3_path = format_uri(
+            "s3",
+            &self.config.s3_bucket,
+            &self.config.s3_root,
+            &format!("{}/{}/", catalog, schema),
+        );
+
+        let mut connection_options = vec![
+            format!(
+                "ACCESS_KEY_ID='{}'",
+                expose_optional_secret(&self.config.s3_access_key_id)
+            ),
+            format!(
+                "SECRET_ACCESS_KEY='{}'",
+                expose_optional_secret(&self.config.s3_secret_access_key)
+            ),
+        ];
+
+        if let Some(region) = &self.config.s3_region {
+            connection_options.push(format!("REGION='{}'", region));
+        }
+
+        if let Some(endpoint) = &self.config.s3_endpoint {
+            connection_options.push(format!("ENDPOINT='{}'", endpoint));
+        }
+
+        let connection_str = format!(" CONNECTION ({})", connection_options.join(", "));
+        (s3_path, connection_str)
+    }
+
+    fn format_output_path(&self, file_path: &str) -> String {
+        format_uri(
+            "s3",
+            &self.config.s3_bucket,
+            &self.config.s3_root,
+            file_path,
+        )
+    }
+
+    fn mask_sensitive_info(&self, sql: &str) -> String {
+        mask_secrets(
+            sql.to_string(),
+            &[
+                expose_optional_secret(&self.config.s3_access_key_id),
+                expose_optional_secret(&self.config.s3_secret_access_key),
+            ],
+        )
+    }
+}
+
+define_backend!(OssBackend, PrefixedOssConnection);
+
+impl StorageExport for OssBackend {
+    fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) {
+        let oss_path = format_uri(
+            "oss",
+            &self.config.oss_bucket,
+            &self.config.oss_root,
+            &format!("{}/{}/", catalog, schema),
+        );
+
+        let connection_options = [
+            format!(
+                "ACCESS_KEY_ID='{}'",
+                expose_optional_secret(&self.config.oss_access_key_id)
+            ),
+            format!(
+                "ACCESS_KEY_SECRET='{}'",
+                expose_optional_secret(&self.config.oss_access_key_secret)
+            ),
+        ];
+
+        let connection_str = format!(" CONNECTION ({})", connection_options.join(", "));
+        (oss_path, connection_str)
+    }
+
+    fn format_output_path(&self, file_path: &str) -> String {
+        format_uri(
+            "oss",
+            &self.config.oss_bucket,
+            &self.config.oss_root,
+            file_path,
+        )
+    }
+
+    fn mask_sensitive_info(&self, sql: &str) -> String {
+        mask_secrets(
+            sql.to_string(),
+            &[
+                expose_optional_secret(&self.config.oss_access_key_id),
+                expose_optional_secret(&self.config.oss_access_key_secret),
+            ],
+        )
+    }
+}
+
+define_backend!(GcsBackend, PrefixedGcsConnection);
+
+impl StorageExport for GcsBackend {
+    fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) {
+        let gcs_path = format_uri(
+            "gcs",
+            &self.config.gcs_bucket,
+            &self.config.gcs_root,
+            &format!("{}/{}/", catalog, schema),
+        );
+
+        let mut connection_options = Vec::new();
+
+        let credential_path = expose_optional_secret(&self.config.gcs_credential_path);
+        if !credential_path.is_empty() {
+            connection_options.push(format!("CREDENTIAL_PATH='{}'", credential_path));
+        }
+
+        let credential = expose_optional_secret(&self.config.gcs_credential);
+        if !credential.is_empty() {
+            connection_options.push(format!("CREDENTIAL='{}'", credential));
+        }
+
+        if !self.config.gcs_endpoint.is_empty() {
+            connection_options.push(format!("ENDPOINT='{}'", self.config.gcs_endpoint));
+        }
+
+        let connection_str = if connection_options.is_empty() {
+            String::new()
+        } else {
+            format!(" CONNECTION ({})", connection_options.join(", "))
+        };
+
+        (gcs_path, connection_str)
+    }
+
+    fn format_output_path(&self, file_path: &str) -> String {
+        format_uri(
+            "gcs",
+            &self.config.gcs_bucket,
+            &self.config.gcs_root,
+            file_path,
+        )
+    }
+
+    fn mask_sensitive_info(&self, sql: &str) -> String {
+        mask_secrets(
+            sql.to_string(),
+            &[
+                expose_optional_secret(&self.config.gcs_credential_path),
+                expose_optional_secret(&self.config.gcs_credential),
+            ],
+        )
+    }
+}
+
+define_backend!(AzblobBackend, PrefixedAzblobConnection);
+
+impl StorageExport for AzblobBackend {
+    fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) {
+        let azblob_path = format_uri(
+            "azblob",
+            &self.config.azblob_container,
+            &self.config.azblob_root,
+            &format!("{}/{}/", catalog, schema),
+        );
+
+        let mut connection_options = vec![
+            format!(
+                "ACCOUNT_NAME='{}'",
+                expose_optional_secret(&self.config.azblob_account_name)
+            ),
+            format!(
+                "ACCOUNT_KEY='{}'",
+                expose_optional_secret(&self.config.azblob_account_key)
+            ),
+        ];
+
+        if let Some(sas_token) = &self.config.azblob_sas_token {
+            connection_options.push(format!("SAS_TOKEN='{}'", sas_token));
+        }
+
+        let connection_str = format!(" CONNECTION ({})", connection_options.join(", "));
+        (azblob_path, connection_str)
+    }
+
+    fn format_output_path(&self, file_path: &str) -> String {
+        format_uri(
+            "azblob",
+            &self.config.azblob_container,
+            &self.config.azblob_root,
+            file_path,
+        )
+    }
+
+    fn mask_sensitive_info(&self, sql: &str) -> String {
+        mask_secrets(
+            sql.to_string(),
+            &[
+                expose_optional_secret(&self.config.azblob_account_name),
+                expose_optional_secret(&self.config.azblob_account_key),
+            ],
+        )
+    }
+}
+
+#[derive(Clone)]
+pub enum StorageType {
+    Fs(FsBackend),
+    S3(S3Backend),
+    Oss(OssBackend),
+    Gcs(GcsBackend),
+    Azblob(AzblobBackend),
+}
+
+impl StorageExport for StorageType {
+    fn get_storage_path(&self, catalog: &str, schema: &str) -> (String, String) {
+        match self {
+            StorageType::Fs(backend) => backend.get_storage_path(catalog, schema),
+            StorageType::S3(backend) => backend.get_storage_path(catalog, schema),
+            StorageType::Oss(backend) => backend.get_storage_path(catalog, schema),
+            StorageType::Gcs(backend) => backend.get_storage_path(catalog, schema),
+            StorageType::Azblob(backend) => backend.get_storage_path(catalog, schema),
+        }
+    }
+
+    fn format_output_path(&self, file_path: &str) -> String {
+        match self {
+            StorageType::Fs(backend) => backend.format_output_path(file_path),
+            StorageType::S3(backend) => backend.format_output_path(file_path),
+            StorageType::Oss(backend) => backend.format_output_path(file_path),
+            StorageType::Gcs(backend) => backend.format_output_path(file_path),
+            StorageType::Azblob(backend) => backend.format_output_path(file_path),
+        }
+    }
+
+    fn mask_sensitive_info(&self, sql: &str) -> String {
+        match self {
+            StorageType::Fs(backend) => backend.mask_sensitive_info(sql),
+            StorageType::S3(backend) => backend.mask_sensitive_info(sql),
+            StorageType::Oss(backend) => backend.mask_sensitive_info(sql),
+            StorageType::Gcs(backend) => backend.mask_sensitive_info(sql),
+            StorageType::Azblob(backend) => backend.mask_sensitive_info(sql),
+        }
+    }
+}
+
+impl StorageType {
+    /// Returns true if the storage backend is remote (not local filesystem).
+    pub fn is_remote_storage(&self) -> bool {
+        !matches!(self, StorageType::Fs(_))
+    }
+}
--- a/src/cli/src/error.rs
+++ b/src/cli/src/error.rs
@@ -253,12 +253,6 @@ pub enum Error {
        error: ObjectStoreError,
    },

-    #[snafu(display("S3 config need be set"))]
-    S3ConfigNotSet {
-        #[snafu(implicit)]
-        location: Location,
-    },
-
    #[snafu(display("Output directory not set"))]
    OutputDirNotSet {
        #[snafu(implicit)]
@@ -364,9 +358,9 @@ impl ErrorExt for Error {

            Error::Other { source, .. } => source.status_code(),
            Error::OpenDal { .. } | Error::InitBackend { .. } => StatusCode::Internal,
-            Error::S3ConfigNotSet { .. }
-            | Error::OutputDirNotSet { .. }
-            | Error::EmptyStoreAddrs { .. } => StatusCode::InvalidArguments,
+            Error::OutputDirNotSet { .. } | Error::EmptyStoreAddrs { .. } => {
+                StatusCode::InvalidArguments
+            }

            Error::BuildRuntime { source, .. } => source.status_code(),

--- a/src/cmd/src/metasrv.rs
+++ b/src/cmd/src/metasrv.rs
@@ -20,6 +20,7 @@ use async_trait::async_trait;
 use clap::Parser;
 use common_base::Plugins;
 use common_config::Configurable;
+use common_meta::distributed_time_constants::init_distributed_time_constants;
 use common_telemetry::info;
 use common_telemetry::logging::{DEFAULT_LOGGING_DIR, TracingOptions};
 use common_version::{short_version, verbose_version};
@@ -327,6 +328,7 @@ impl StartCommand {
        log_versions(verbose_version(), short_version(), APP_NAME);
        maybe_activate_heap_profile(&opts.component.memory);
        create_resource_limit_metrics(APP_NAME);
+        init_distributed_time_constants(opts.component.heartbeat_interval);

        info!("Metasrv start command: {:#?}", self);

--- a/src/common/config/src/error.rs
+++ b/src/common/config/src/error.rs
@@ -59,15 +59,6 @@ pub enum Error {
        location: Location,
    },

-    #[snafu(display("Failed to canonicalize path: {}", path))]
-    CanonicalizePath {
-        path: String,
-        #[snafu(source)]
-        error: std::io::Error,
-        #[snafu(implicit)]
-        location: Location,
-    },
-
    #[snafu(display("Invalid path '{}': expected a file, not a directory", path))]
    InvalidPath {
        path: String,
@@ -82,8 +73,7 @@ impl ErrorExt for Error {
            Error::TomlFormat { .. }
            | Error::LoadLayeredConfig { .. }
            | Error::FileWatch { .. }
-            | Error::InvalidPath { .. }
-            | Error::CanonicalizePath { .. } => StatusCode::InvalidArguments,
+            | Error::InvalidPath { .. } => StatusCode::InvalidArguments,
            Error::SerdeJson { .. } => StatusCode::Unexpected,
        }
    }
--- a/src/common/config/src/file_watcher.rs
+++ b/src/common/config/src/file_watcher.rs
@@ -30,7 +30,7 @@ use common_telemetry::{error, info, warn};
 use notify::{EventKind, RecursiveMode, Watcher};
 use snafu::ResultExt;

-use crate::error::{CanonicalizePathSnafu, FileWatchSnafu, InvalidPathSnafu, Result};
+use crate::error::{FileWatchSnafu, InvalidPathSnafu, Result};

 /// Configuration for the file watcher behavior.
 #[derive(Debug, Clone, Default)]
@@ -41,15 +41,10 @@ pub struct FileWatcherConfig {

 impl FileWatcherConfig {
    pub fn new() -> Self {
-        Self::default()
+        Default::default()
    }

-    pub fn with_modify_and_create(mut self) -> Self {
-        self.include_remove_events = false;
-        self
-    }
-
-    pub fn with_remove_events(mut self) -> Self {
+    pub fn include_remove_events(mut self) -> Self {
        self.include_remove_events = true;
        self
    }
@@ -93,11 +88,8 @@ impl FileWatcherBuilder {
                path: path.display().to_string(),
            }
        );
-        // Canonicalize the path for reliable comparison with event paths
-        let canonical = path.canonicalize().context(CanonicalizePathSnafu {
-            path: path.display().to_string(),
-        })?;
-        self.file_paths.push(canonical);
+
+        self.file_paths.push(path.to_path_buf());
        Ok(self)
    }

@@ -144,7 +136,6 @@ impl FileWatcherBuilder {
        }

        let config = self.config;
-        let watched_files: HashSet<PathBuf> = self.file_paths.iter().cloned().collect();

        info!(
            "Spawning file watcher for paths: {:?} (watching parent directories)",
@@ -165,25 +156,7 @@ impl FileWatcherBuilder {
                            continue;
                        }

-                        // Check if any of the event paths match our watched files
-                        let is_watched_file = event.paths.iter().any(|event_path| {
-                            // Try to canonicalize the event path for comparison
-                            // If the file was deleted, canonicalize will fail, so we also
-                            // compare the raw path
-                            if let Ok(canonical) = event_path.canonicalize()
-                                && watched_files.contains(&canonical)
-                            {
-                                return true;
-                            }
-                            // For deleted files, compare using the raw path
-                            watched_files.contains(event_path)
-                        });
-
-                        if !is_watched_file {
-                            continue;
-                        }
-
-                        info!(?event.kind, ?event.paths, "Detected file change");
+                        info!(?event.kind, ?event.paths, "Detected folder change");
                        callback();
                    }
                    Err(err) => {
@@ -301,55 +274,4 @@ mod tests {
            "Watcher should have detected file recreation"
        );
    }
-
-    #[test]
-    fn test_file_watcher_ignores_other_files() {
-        common_telemetry::init_default_ut_logging();
-
-        let dir = create_temp_dir("test_file_watcher_other");
-        let watched_file = dir.path().join("watched.txt");
-        let other_file = dir.path().join("other.txt");
-
-        // Create both files
-        std::fs::write(&watched_file, "watched content").unwrap();
-        std::fs::write(&other_file, "other content").unwrap();
-
-        let counter = Arc::new(AtomicUsize::new(0));
-        let counter_clone = counter.clone();
-
-        FileWatcherBuilder::new()
-            .watch_path(&watched_file)
-            .unwrap()
-            .config(FileWatcherConfig::new())
-            .spawn(move || {
-                counter_clone.fetch_add(1, Ordering::SeqCst);
-            })
-            .unwrap();
-
-        // Give watcher time to start
-        std::thread::sleep(Duration::from_millis(100));
-
-        // Modify the other file - should NOT trigger callback
-        std::fs::write(&other_file, "modified other content").unwrap();
-
-        // Wait for potential event
-        std::thread::sleep(Duration::from_millis(500));
-
-        assert_eq!(
-            counter.load(Ordering::SeqCst),
-            0,
-            "Watcher should not have detected changes to other files"
-        );
-
-        // Now modify the watched file - SHOULD trigger callback
-        std::fs::write(&watched_file, "modified watched content").unwrap();
-
-        // Wait for the event to be processed
-        std::thread::sleep(Duration::from_millis(500));
-
-        assert!(
-            counter.load(Ordering::SeqCst) >= 1,
-            "Watcher should have detected change to watched file"
-        );
-    }
 }
--- a/src/common/datasource/src/object_store/s3.rs
+++ b/src/common/datasource/src/object_store/s3.rs
@@ -27,6 +27,7 @@ const SECRET_ACCESS_KEY: &str = "secret_access_key";
 const SESSION_TOKEN: &str = "session_token";
 const REGION: &str = "region";
 const ENABLE_VIRTUAL_HOST_STYLE: &str = "enable_virtual_host_style";
+const DISABLE_EC2_METADATA: &str = "disable_ec2_metadata";

 pub fn is_supported_in_s3(key: &str) -> bool {
    [
@@ -36,6 +37,7 @@ pub fn is_supported_in_s3(key: &str) -> bool {
        SESSION_TOKEN,
        REGION,
        ENABLE_VIRTUAL_HOST_STYLE,
+        DISABLE_EC2_METADATA,
    ]
    .contains(&key)
 }
@@ -82,6 +84,21 @@ pub fn build_s3_backend(
        }
    }

+    if let Some(disable_str) = connection.get(DISABLE_EC2_METADATA) {
+        let disable = disable_str.as_str().parse::<bool>().map_err(|e| {
+            error::InvalidConnectionSnafu {
+                msg: format!(
+                    "failed to parse the option {}={}, {}",
+                    DISABLE_EC2_METADATA, disable_str, e
+                ),
+            }
+            .build()
+        })?;
+        if disable {
+            builder = builder.disable_ec2_metadata();
+        }
+    }
+
    // TODO(weny): Consider finding a better way to eliminate duplicate code.
    Ok(ObjectStore::new(builder)
        .context(error::BuildBackendSnafu)?
@@ -109,6 +126,7 @@ mod tests {
        assert!(is_supported_in_s3(SESSION_TOKEN));
        assert!(is_supported_in_s3(REGION));
        assert!(is_supported_in_s3(ENABLE_VIRTUAL_HOST_STYLE));
+        assert!(is_supported_in_s3(DISABLE_EC2_METADATA));
        assert!(!is_supported_in_s3("foo"))
    }
 }
--- a/src/common/function/Cargo.toml
+++ b/src/common/function/Cargo.toml
@@ -19,7 +19,7 @@ arc-swap = "1.0"
 arrow.workspace = true
 arrow-schema.workspace = true
 async-trait.workspace = true
-bincode = "1.3"
+bincode = "=1.3.3"
 catalog.workspace = true
 chrono.workspace = true
 common-base.workspace = true
--- a/src/common/memory-manager/src/error.rs
+++ b/src/common/memory-manager/src/error.rs
@@ -13,6 +13,7 @@
 // limitations under the License.

 use std::any::Any;
+use std::time::Duration;

 use common_error::ext::ErrorExt;
 use common_error::status_code::StatusCode;
@@ -35,6 +36,14 @@ pub enum Error {

    #[snafu(display("Memory semaphore unexpectedly closed"))]
    MemorySemaphoreClosed,
+
+    #[snafu(display(
+        "Timeout waiting for memory quota: requested {requested_bytes} bytes, waited {waited:?}"
+    ))]
+    MemoryAcquireTimeout {
+        requested_bytes: u64,
+        waited: Duration,
+    },
 }

 impl ErrorExt for Error {
@@ -44,6 +53,7 @@ impl ErrorExt for Error {
        match self {
            MemoryLimitExceeded { .. } => StatusCode::RuntimeResourcesExhausted,
            MemorySemaphoreClosed => StatusCode::Unexpected,
+            MemoryAcquireTimeout { .. } => StatusCode::RuntimeResourcesExhausted,
        }
    }

--- a/src/common/memory-manager/src/granularity.rs
+++ b/src/common/memory-manager/src/granularity.rs
@@ -0,0 +1,168 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::fmt;
+
+/// Memory permit granularity for different use cases.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
+pub enum PermitGranularity {
+    /// 1 KB per permit
+    ///
+    /// Use for:
+    /// - HTTP/gRPC request limiting (small, high-concurrency operations)
+    /// - Small batch operations
+    /// - Scenarios requiring fine-grained fairness
+    Kilobyte,
+
+    /// 1 MB per permit (default)
+    ///
+    /// Use for:
+    /// - Query execution memory management
+    /// - Compaction memory control
+    /// - Large, long-running operations
+    #[default]
+    Megabyte,
+}
+
+impl PermitGranularity {
+    /// Returns the number of bytes per permit.
+    #[inline]
+    pub const fn bytes(self) -> u64 {
+        match self {
+            Self::Kilobyte => 1024,
+            Self::Megabyte => 1024 * 1024,
+        }
+    }
+
+    /// Returns a human-readable string representation.
+    pub const fn as_str(self) -> &'static str {
+        match self {
+            Self::Kilobyte => "1KB",
+            Self::Megabyte => "1MB",
+        }
+    }
+
+    /// Converts bytes to permits based on this granularity.
+    ///
+    /// Rounds up to ensure the requested bytes are fully covered.
+    /// Clamped to Semaphore::MAX_PERMITS.
+    #[inline]
+    pub fn bytes_to_permits(self, bytes: u64) -> u32 {
+        use tokio::sync::Semaphore;
+
+        let granularity_bytes = self.bytes();
+        bytes
+            .saturating_add(granularity_bytes - 1)
+            .saturating_div(granularity_bytes)
+            .min(Semaphore::MAX_PERMITS as u64)
+            .min(u32::MAX as u64) as u32
+    }
+
+    /// Converts permits to bytes based on this granularity.
+    #[inline]
+    pub fn permits_to_bytes(self, permits: u32) -> u64 {
+        (permits as u64).saturating_mul(self.bytes())
+    }
+}
+
+impl fmt::Display for PermitGranularity {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{}", self.as_str())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_bytes_to_permits_kilobyte() {
+        let granularity = PermitGranularity::Kilobyte;
+
+        // Exact multiples
+        assert_eq!(granularity.bytes_to_permits(1024), 1);
+        assert_eq!(granularity.bytes_to_permits(2048), 2);
+        assert_eq!(granularity.bytes_to_permits(10 * 1024), 10);
+
+        // Rounds up
+        assert_eq!(granularity.bytes_to_permits(1), 1);
+        assert_eq!(granularity.bytes_to_permits(1025), 2);
+        assert_eq!(granularity.bytes_to_permits(2047), 2);
+    }
+
+    #[test]
+    fn test_bytes_to_permits_megabyte() {
+        let granularity = PermitGranularity::Megabyte;
+
+        // Exact multiples
+        assert_eq!(granularity.bytes_to_permits(1024 * 1024), 1);
+        assert_eq!(granularity.bytes_to_permits(2 * 1024 * 1024), 2);
+
+        // Rounds up
+        assert_eq!(granularity.bytes_to_permits(1), 1);
+        assert_eq!(granularity.bytes_to_permits(1024), 1);
+        assert_eq!(granularity.bytes_to_permits(1024 * 1024 + 1), 2);
+    }
+
+    #[test]
+    fn test_bytes_to_permits_zero_bytes() {
+        assert_eq!(PermitGranularity::Kilobyte.bytes_to_permits(0), 0);
+        assert_eq!(PermitGranularity::Megabyte.bytes_to_permits(0), 0);
+    }
+
+    #[test]
+    fn test_bytes_to_permits_clamps_to_maximum() {
+        use tokio::sync::Semaphore;
+
+        let max_permits = (Semaphore::MAX_PERMITS as u64).min(u32::MAX as u64) as u32;
+
+        assert_eq!(
+            PermitGranularity::Kilobyte.bytes_to_permits(u64::MAX),
+            max_permits
+        );
+        assert_eq!(
+            PermitGranularity::Megabyte.bytes_to_permits(u64::MAX),
+            max_permits
+        );
+    }
+
+    #[test]
+    fn test_permits_to_bytes() {
+        assert_eq!(PermitGranularity::Kilobyte.permits_to_bytes(1), 1024);
+        assert_eq!(PermitGranularity::Kilobyte.permits_to_bytes(10), 10 * 1024);
+
+        assert_eq!(PermitGranularity::Megabyte.permits_to_bytes(1), 1024 * 1024);
+        assert_eq!(
+            PermitGranularity::Megabyte.permits_to_bytes(10),
+            10 * 1024 * 1024
+        );
+    }
+
+    #[test]
+    fn test_round_trip_conversion() {
+        // Kilobyte: bytes -> permits -> bytes (should round up)
+        let kb = PermitGranularity::Kilobyte;
+        let permits = kb.bytes_to_permits(1500);
+        let bytes = kb.permits_to_bytes(permits);
+        assert!(bytes >= 1500); // Must cover original request
+        assert_eq!(bytes, 2048); // 2KB
+
+        // Megabyte: bytes -> permits -> bytes (should round up)
+        let mb = PermitGranularity::Megabyte;
+        let permits = mb.bytes_to_permits(1500);
+        let bytes = mb.permits_to_bytes(permits);
+        assert!(bytes >= 1500);
+        assert_eq!(bytes, 1024 * 1024); // 1MB
+    }
+}
--- a/src/common/memory-manager/src/guard.rs
+++ b/src/common/memory-manager/src/guard.rs
@@ -17,7 +17,7 @@ use std::{fmt, mem};
 use common_telemetry::debug;
 use tokio::sync::{OwnedSemaphorePermit, TryAcquireError};

-use crate::manager::{MemoryMetrics, MemoryQuota, bytes_to_permits, permits_to_bytes};
+use crate::manager::{MemoryMetrics, MemoryQuota};

 /// Guard representing a slice of reserved memory.
 pub struct MemoryGuard<M: MemoryMetrics> {
@@ -49,7 +49,9 @@ impl<M: MemoryMetrics> MemoryGuard<M> {
    pub fn granted_bytes(&self) -> u64 {
        match &self.state {
            GuardState::Unlimited => 0,
-            GuardState::Limited { permit, .. } => permits_to_bytes(permit.num_permits() as u32),
+            GuardState::Limited { permit, quota } => {
+                quota.permits_to_bytes(permit.num_permits() as u32)
+            }
        }
    }

@@ -65,7 +67,7 @@ impl<M: MemoryMetrics> MemoryGuard<M> {
                    return true;
                }

-                let additional_permits = bytes_to_permits(bytes);
+                let additional_permits = quota.bytes_to_permits(bytes);

                match quota
                    .semaphore
@@ -99,11 +101,12 @@ impl<M: MemoryMetrics> MemoryGuard<M> {
                    return true;
                }

-                let release_permits = bytes_to_permits(bytes);
+                let release_permits = quota.bytes_to_permits(bytes);

                match permit.split(release_permits as usize) {
                    Some(released_permit) => {
-                        let released_bytes = permits_to_bytes(released_permit.num_permits() as u32);
+                        let released_bytes =
+                            quota.permits_to_bytes(released_permit.num_permits() as u32);
                        drop(released_permit);
                        quota.update_in_use_metric();
                        debug!("Early released {} bytes from memory guard", released_bytes);
@@ -121,7 +124,7 @@ impl<M: MemoryMetrics> Drop for MemoryGuard<M> {
        if let GuardState::Limited { permit, quota } =
            mem::replace(&mut self.state, GuardState::Unlimited)
        {
-            let bytes = permits_to_bytes(permit.num_permits() as u32);
+            let bytes = quota.permits_to_bytes(permit.num_permits() as u32);
            drop(permit);
            quota.update_in_use_metric();
            debug!("Released memory: {} bytes", bytes);
--- a/src/common/memory-manager/src/lib.rs
+++ b/src/common/memory-manager/src/lib.rs
@@ -19,6 +19,7 @@
 //! share the same allocation logic while using their own metrics.

 mod error;
+mod granularity;
 mod guard;
 mod manager;
 mod policy;
@@ -27,8 +28,9 @@ mod policy;
 mod tests;

 pub use error::{Error, Result};
+pub use granularity::PermitGranularity;
 pub use guard::MemoryGuard;
-pub use manager::{MemoryManager, MemoryMetrics, PERMIT_GRANULARITY_BYTES};
+pub use manager::{MemoryManager, MemoryMetrics};
 pub use policy::{DEFAULT_MEMORY_WAIT_TIMEOUT, OnExhaustedPolicy};

 /// No-op metrics implementation for testing.
--- a/src/common/memory-manager/src/manager.rs
+++ b/src/common/memory-manager/src/manager.rs
@@ -17,11 +17,12 @@ use std::sync::Arc;
 use snafu::ensure;
 use tokio::sync::{Semaphore, TryAcquireError};

-use crate::error::{MemoryLimitExceededSnafu, MemorySemaphoreClosedSnafu, Result};
+use crate::error::{
+    MemoryAcquireTimeoutSnafu, MemoryLimitExceededSnafu, MemorySemaphoreClosedSnafu, Result,
+};
+use crate::granularity::PermitGranularity;
 use crate::guard::MemoryGuard;
-
-/// Minimum bytes controlled by one semaphore permit.
-pub const PERMIT_GRANULARITY_BYTES: u64 = 1 << 20; // 1 MB
+use crate::policy::OnExhaustedPolicy;

 /// Trait for recording memory usage metrics.
 pub trait MemoryMetrics: Clone + Send + Sync + 'static {
@@ -40,6 +41,7 @@ pub struct MemoryManager<M: MemoryMetrics> {
 pub(crate) struct MemoryQuota<M: MemoryMetrics> {
    pub(crate) semaphore: Arc<Semaphore>,
    pub(crate) limit_permits: u32,
+    pub(crate) granularity: PermitGranularity,
    pub(crate) metrics: M,
 }

@@ -47,19 +49,25 @@ impl<M: MemoryMetrics> MemoryManager<M> {
    /// Creates a new memory manager with the given limit in bytes.
    /// `limit_bytes = 0` disables the limit.
    pub fn new(limit_bytes: u64, metrics: M) -> Self {
+        Self::with_granularity(limit_bytes, PermitGranularity::default(), metrics)
+    }
+
+    /// Creates a new memory manager with specified granularity.
+    pub fn with_granularity(limit_bytes: u64, granularity: PermitGranularity, metrics: M) -> Self {
        if limit_bytes == 0 {
            metrics.set_limit(0);
            return Self { quota: None };
        }

-        let limit_permits = bytes_to_permits(limit_bytes);
-        let limit_aligned_bytes = permits_to_bytes(limit_permits);
+        let limit_permits = granularity.bytes_to_permits(limit_bytes);
+        let limit_aligned_bytes = granularity.permits_to_bytes(limit_permits);
        metrics.set_limit(limit_aligned_bytes as i64);

        Self {
            quota: Some(MemoryQuota {
                semaphore: Arc::new(Semaphore::new(limit_permits as usize)),
                limit_permits,
+                granularity,
                metrics,
            }),
        }
@@ -69,7 +77,7 @@ impl<M: MemoryMetrics> MemoryManager<M> {
    pub fn limit_bytes(&self) -> u64 {
        self.quota
            .as_ref()
-            .map(|quota| permits_to_bytes(quota.limit_permits))
+            .map(|quota| quota.permits_to_bytes(quota.limit_permits))
            .unwrap_or(0)
    }

@@ -77,7 +85,7 @@ impl<M: MemoryMetrics> MemoryManager<M> {
    pub fn used_bytes(&self) -> u64 {
        self.quota
            .as_ref()
-            .map(|quota| permits_to_bytes(quota.used_permits()))
+            .map(|quota| quota.permits_to_bytes(quota.used_permits()))
            .unwrap_or(0)
    }

@@ -85,7 +93,7 @@ impl<M: MemoryMetrics> MemoryManager<M> {
    pub fn available_bytes(&self) -> u64 {
        self.quota
            .as_ref()
-            .map(|quota| permits_to_bytes(quota.available_permits_clamped()))
+            .map(|quota| quota.permits_to_bytes(quota.available_permits_clamped()))
            .unwrap_or(0)
    }

@@ -98,13 +106,13 @@ impl<M: MemoryMetrics> MemoryManager<M> {
        match &self.quota {
            None => Ok(MemoryGuard::unlimited()),
            Some(quota) => {
-                let permits = bytes_to_permits(bytes);
+                let permits = quota.bytes_to_permits(bytes);

                ensure!(
                    permits <= quota.limit_permits,
                    MemoryLimitExceededSnafu {
                        requested_bytes: bytes,
-                        limit_bytes: permits_to_bytes(quota.limit_permits),
+                        limit_bytes: self.limit_bytes()
                    }
                );

@@ -125,7 +133,7 @@ impl<M: MemoryMetrics> MemoryManager<M> {
        match &self.quota {
            None => Some(MemoryGuard::unlimited()),
            Some(quota) => {
-                let permits = bytes_to_permits(bytes);
+                let permits = quota.bytes_to_permits(bytes);

                match quota.semaphore.clone().try_acquire_many_owned(permits) {
                    Ok(permit) => {
@@ -140,9 +148,56 @@ impl<M: MemoryMetrics> MemoryManager<M> {
            }
        }
    }
+
+    /// Acquires memory based on the given policy.
+    ///
+    /// - For `OnExhaustedPolicy::Wait`: Waits up to the timeout duration for memory to become available
+    /// - For `OnExhaustedPolicy::Fail`: Returns immediately if memory is not available
+    ///
+    /// # Errors
+    /// - `MemoryLimitExceeded`: Requested bytes exceed the total limit (both policies), or memory is currently exhausted (Fail policy only)
+    /// - `MemoryAcquireTimeout`: Timeout elapsed while waiting for memory (Wait policy only)
+    /// - `MemorySemaphoreClosed`: The internal semaphore is unexpectedly closed (rare, indicates system issue)
+    pub async fn acquire_with_policy(
+        &self,
+        bytes: u64,
+        policy: OnExhaustedPolicy,
+    ) -> Result<MemoryGuard<M>> {
+        match policy {
+            OnExhaustedPolicy::Wait { timeout } => {
+                match tokio::time::timeout(timeout, self.acquire(bytes)).await {
+                    Ok(Ok(guard)) => Ok(guard),
+                    Ok(Err(e)) => Err(e),
+                    Err(_elapsed) => {
+                        // Timeout elapsed while waiting
+                        MemoryAcquireTimeoutSnafu {
+                            requested_bytes: bytes,
+                            waited: timeout,
+                        }
+                        .fail()
+                    }
+                }
+            }
+            OnExhaustedPolicy::Fail => self.try_acquire(bytes).ok_or_else(|| {
+                MemoryLimitExceededSnafu {
+                    requested_bytes: bytes,
+                    limit_bytes: self.limit_bytes(),
+                }
+                .build()
+            }),
+        }
+    }
 }

 impl<M: MemoryMetrics> MemoryQuota<M> {
+    pub(crate) fn bytes_to_permits(&self, bytes: u64) -> u32 {
+        self.granularity.bytes_to_permits(bytes)
+    }
+
+    pub(crate) fn permits_to_bytes(&self, permits: u32) -> u64 {
+        self.granularity.permits_to_bytes(permits)
+    }
+
    pub(crate) fn used_permits(&self) -> u32 {
        self.limit_permits
            .saturating_sub(self.available_permits_clamped())
@@ -155,19 +210,7 @@ impl<M: MemoryMetrics> MemoryQuota<M> {
    }

    pub(crate) fn update_in_use_metric(&self) {
-        let bytes = permits_to_bytes(self.used_permits());
+        let bytes = self.permits_to_bytes(self.used_permits());
        self.metrics.set_in_use(bytes as i64);
    }
 }
-
-pub(crate) fn bytes_to_permits(bytes: u64) -> u32 {
-    bytes
-        .saturating_add(PERMIT_GRANULARITY_BYTES - 1)
-        .saturating_div(PERMIT_GRANULARITY_BYTES)
-        .min(Semaphore::MAX_PERMITS as u64)
-        .min(u32::MAX as u64) as u32
-}
-
-pub(crate) fn permits_to_bytes(permits: u32) -> u64 {
-    (permits as u64).saturating_mul(PERMIT_GRANULARITY_BYTES)
-}
--- a/src/common/memory-manager/src/tests.rs
+++ b/src/common/memory-manager/src/tests.rs
@@ -14,7 +14,10 @@

 use tokio::time::{Duration, sleep};

-use crate::{MemoryManager, NoOpMetrics, PERMIT_GRANULARITY_BYTES};
+use crate::{MemoryManager, NoOpMetrics, PermitGranularity};
+
+// Helper constant for tests - use default Megabyte granularity
+const PERMIT_GRANULARITY_BYTES: u64 = PermitGranularity::Megabyte.bytes();

 #[test]
 fn test_try_acquire_unlimited() {
--- a/src/common/meta/src/distributed_time_constants.rs
+++ b/src/common/meta/src/distributed_time_constants.rs
@@ -12,27 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::sync::OnceLock;
 use std::time::Duration;

-use etcd_client::ConnectOptions;
-
-/// Heartbeat interval time (is the basic unit of various time).
-pub const HEARTBEAT_INTERVAL_MILLIS: u64 = 3000;
-
-/// The frontend will also send heartbeats to Metasrv, sending an empty
-/// heartbeat every HEARTBEAT_INTERVAL_MILLIS * 6 seconds.
-pub const FRONTEND_HEARTBEAT_INTERVAL_MILLIS: u64 = HEARTBEAT_INTERVAL_MILLIS * 6;
-
-/// The lease seconds of a region. It's set by 3 heartbeat intervals
-/// (HEARTBEAT_INTERVAL_MILLIS × 3), plus some extra buffer (1 second).
-pub const REGION_LEASE_SECS: u64 =
-    Duration::from_millis(HEARTBEAT_INTERVAL_MILLIS * 3).as_secs() + 1;
-
-/// When creating table or region failover, a target node needs to be selected.
-/// If the node's lease has expired, the `Selector` will not select it.
-pub const DATANODE_LEASE_SECS: u64 = REGION_LEASE_SECS;
-
-pub const FLOWNODE_LEASE_SECS: u64 = DATANODE_LEASE_SECS;
+pub const BASE_HEARTBEAT_INTERVAL: Duration = Duration::from_secs(3);

 /// The lease seconds of metasrv leader.
 pub const META_LEASE_SECS: u64 = 5;
@@ -52,14 +35,6 @@ pub const HEARTBEAT_CHANNEL_KEEP_ALIVE_INTERVAL_SECS: Duration = Duration::from_
 /// The keep-alive timeout of the heartbeat channel.
 pub const HEARTBEAT_CHANNEL_KEEP_ALIVE_TIMEOUT_SECS: Duration = Duration::from_secs(5);

-/// The default options for the etcd client.
-pub fn default_etcd_client_options() -> ConnectOptions {
-    ConnectOptions::new()
-        .with_keep_alive_while_idle(true)
-        .with_keep_alive(Duration::from_secs(15), Duration::from_secs(5))
-        .with_connect_timeout(Duration::from_secs(10))
-}
-
 /// The default mailbox round-trip timeout.
 pub const MAILBOX_RTT_SECS: u64 = 1;

@@ -68,3 +43,60 @@ pub const TOPIC_STATS_REPORT_INTERVAL_SECS: u64 = 15;

 /// The retention seconds of topic stats.
 pub const TOPIC_STATS_RETENTION_SECS: u64 = TOPIC_STATS_REPORT_INTERVAL_SECS * 100;
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+/// The distributed time constants.
+pub struct DistributedTimeConstants {
+    pub heartbeat_interval: Duration,
+    pub frontend_heartbeat_interval: Duration,
+    pub region_lease: Duration,
+    pub datanode_lease: Duration,
+    pub flownode_lease: Duration,
+}
+
+/// The frontend heartbeat interval is 6 times of the base heartbeat interval.
+pub fn frontend_heartbeat_interval(base_heartbeat_interval: Duration) -> Duration {
+    base_heartbeat_interval * 6
+}
+
+impl DistributedTimeConstants {
+    /// Create a new DistributedTimeConstants from the heartbeat interval.
+    pub fn from_heartbeat_interval(heartbeat_interval: Duration) -> Self {
+        let region_lease = heartbeat_interval * 3 + Duration::from_secs(1);
+        let datanode_lease = region_lease;
+        let flownode_lease = datanode_lease;
+        Self {
+            heartbeat_interval,
+            frontend_heartbeat_interval: frontend_heartbeat_interval(heartbeat_interval),
+            region_lease,
+            datanode_lease,
+            flownode_lease,
+        }
+    }
+}
+
+impl Default for DistributedTimeConstants {
+    fn default() -> Self {
+        Self::from_heartbeat_interval(BASE_HEARTBEAT_INTERVAL)
+    }
+}
+
+static DEFAULT_DISTRIBUTED_TIME_CONSTANTS: OnceLock<DistributedTimeConstants> = OnceLock::new();
+
+/// Get the default distributed time constants.
+pub fn default_distributed_time_constants() -> &'static DistributedTimeConstants {
+    DEFAULT_DISTRIBUTED_TIME_CONSTANTS.get_or_init(Default::default)
+}
+
+/// Initialize the default distributed time constants.
+pub fn init_distributed_time_constants(base_heartbeat_interval: Duration) {
+    let distributed_time_constants =
+        DistributedTimeConstants::from_heartbeat_interval(base_heartbeat_interval);
+    DEFAULT_DISTRIBUTED_TIME_CONSTANTS
+        .set(distributed_time_constants)
+        .expect("Failed to set default distributed time constants");
+    common_telemetry::info!(
+        "Initialized default distributed time constants: {:#?}",
+        distributed_time_constants
+    );
+}
--- a/src/common/meta/src/wal_options_allocator/topic_creator.rs
+++ b/src/common/meta/src/wal_options_allocator/topic_creator.rs
@@ -14,7 +14,7 @@

 use common_telemetry::{debug, error, info};
 use common_wal::config::kafka::common::{
-    DEFAULT_BACKOFF_CONFIG, DEFAULT_CONNECT_TIMEOUT, KafkaConnectionConfig, KafkaTopicConfig,
+    DEFAULT_BACKOFF_CONFIG, KafkaConnectionConfig, KafkaTopicConfig,
 };
 use rskafka::client::error::Error as RsKafkaError;
 use rskafka::client::error::ProtocolError::TopicAlreadyExists;
@@ -211,7 +211,8 @@ pub async fn build_kafka_client(connection: &KafkaConnectionConfig) -> Result<Cl
    // Builds an kafka controller client for creating topics.
    let mut builder = ClientBuilder::new(connection.broker_endpoints.clone())
        .backoff_config(DEFAULT_BACKOFF_CONFIG)
-        .connect_timeout(Some(DEFAULT_CONNECT_TIMEOUT));
+        .connect_timeout(Some(connection.connect_timeout))
+        .timeout(Some(connection.timeout));
    if let Some(sasl) = &connection.sasl {
        builder = builder.sasl_config(sasl.config.clone().into_sasl_config());
    };
--- a/src/common/sql/Cargo.toml
+++ b/src/common/sql/Cargo.toml
@@ -5,10 +5,12 @@ edition.workspace = true
 license.workspace = true

 [dependencies]
+arrow-schema.workspace = true
 common-base.workspace = true
 common-decimal.workspace = true
 common-error.workspace = true
 common-macro.workspace = true
+common-telemetry.workspace = true
 common-time.workspace = true
 datafusion-sql.workspace = true
 datatypes.workspace = true
--- a/src/common/sql/src/convert.rs
+++ b/src/common/sql/src/convert.rs
@@ -14,11 +14,12 @@

 use std::str::FromStr;

+use arrow_schema::extension::ExtensionType;
 use common_time::Timestamp;
 use common_time::timezone::Timezone;
-use datatypes::json::JsonStructureSettings;
+use datatypes::extension::json::JsonExtensionType;
 use datatypes::prelude::ConcreteDataType;
-use datatypes::schema::ColumnDefaultConstraint;
+use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema};
 use datatypes::types::{JsonFormat, parse_string_to_jsonb, parse_string_to_vector_type_value};
 use datatypes::value::{OrderedF32, OrderedF64, Value};
 use snafu::{OptionExt, ResultExt, ensure};
@@ -124,13 +125,14 @@ pub(crate) fn sql_number_to_value(data_type: &ConcreteDataType, n: &str) -> Resu
 /// If `auto_string_to_numeric` is true, tries to cast the string value to numeric values,
 /// and returns error if the cast fails.
 pub fn sql_value_to_value(
-    column_name: &str,
-    data_type: &ConcreteDataType,
+    column_schema: &ColumnSchema,
    sql_val: &SqlValue,
    timezone: Option<&Timezone>,
    unary_op: Option<UnaryOperator>,
    auto_string_to_numeric: bool,
 ) -> Result<Value> {
+    let column_name = &column_schema.name;
+    let data_type = &column_schema.data_type;
    let mut value = match sql_val {
        SqlValue::Number(n, _) => sql_number_to_value(data_type, n)?,
        SqlValue::Null => Value::Null,
@@ -146,13 +148,9 @@ pub fn sql_value_to_value(

            (*b).into()
        }
-        SqlValue::DoubleQuotedString(s) | SqlValue::SingleQuotedString(s) => parse_string_to_value(
-            column_name,
-            s.clone(),
-            data_type,
-            timezone,
-            auto_string_to_numeric,
-        )?,
+        SqlValue::DoubleQuotedString(s) | SqlValue::SingleQuotedString(s) => {
+            parse_string_to_value(column_schema, s.clone(), timezone, auto_string_to_numeric)?
+        }
        SqlValue::HexStringLiteral(s) => {
            // Should not directly write binary into json column
            ensure!(
@@ -244,12 +242,12 @@ pub fn sql_value_to_value(
 }

 pub(crate) fn parse_string_to_value(
-    column_name: &str,
+    column_schema: &ColumnSchema,
    s: String,
-    data_type: &ConcreteDataType,
    timezone: Option<&Timezone>,
    auto_string_to_numeric: bool,
 ) -> Result<Value> {
+    let data_type = &column_schema.data_type;
    if auto_string_to_numeric && let Some(value) = auto_cast_to_numeric(&s, data_type)? {
        return Ok(value);
    }
@@ -257,7 +255,7 @@ pub(crate) fn parse_string_to_value(
    ensure!(
        data_type.is_stringifiable(),
        ColumnTypeMismatchSnafu {
-            column_name,
+            column_name: column_schema.name.clone(),
            expect: data_type.clone(),
            actual: ConcreteDataType::string_datatype(),
        }
@@ -303,23 +301,21 @@ pub(crate) fn parse_string_to_value(
            }
        }
        ConcreteDataType::Binary(_) => Ok(Value::Binary(s.as_bytes().into())),
-        ConcreteDataType::Json(j) => {
-            match &j.format {
-                JsonFormat::Jsonb => {
-                    let v = parse_string_to_jsonb(&s).context(DatatypeSnafu)?;
-                    Ok(Value::Binary(v.into()))
-                }
-                JsonFormat::Native(_inner) => {
-                    // Always use the structured version at this level.
-                    let serde_json_value =
-                        serde_json::from_str(&s).context(DeserializeSnafu { json: s })?;
-                    let json_structure_settings = JsonStructureSettings::Structured(None);
-                    json_structure_settings
-                        .encode(serde_json_value)
-                        .context(DatatypeSnafu)
-                }
+        ConcreteDataType::Json(j) => match &j.format {
+            JsonFormat::Jsonb => {
+                let v = parse_string_to_jsonb(&s).context(DatatypeSnafu)?;
+                Ok(Value::Binary(v.into()))
            }
-        }
+            JsonFormat::Native(_) => {
+                let extension_type: Option<JsonExtensionType> =
+                    column_schema.extension_type().context(DatatypeSnafu)?;
+                let json_structure_settings = extension_type
+                    .and_then(|x| x.metadata().json_structure_settings.clone())
+                    .unwrap_or_default();
+                let v = serde_json::from_str(&s).context(DeserializeSnafu { json: s })?;
+                json_structure_settings.encode(v).context(DatatypeSnafu)
+            }
+        },
        ConcreteDataType::Vector(d) => {
            let v = parse_string_to_vector_type_value(&s, Some(d.dim)).context(DatatypeSnafu)?;
            Ok(Value::Binary(v.into()))
@@ -417,305 +413,265 @@ mod test {

    use super::*;

+    macro_rules! call_parse_string_to_value {
+        ($column_name: expr, $input: expr, $data_type: expr) => {
+            call_parse_string_to_value!($column_name, $input, $data_type, None)
+        };
+        ($column_name: expr, $input: expr, $data_type: expr, timezone = $timezone: expr) => {
+            call_parse_string_to_value!($column_name, $input, $data_type, Some($timezone))
+        };
+        ($column_name: expr, $input: expr, $data_type: expr, $timezone: expr) => {{
+            let column_schema = ColumnSchema::new($column_name, $data_type, true);
+            parse_string_to_value(&column_schema, $input, $timezone, true)
+        }};
+    }
+
    #[test]
-    fn test_string_to_value_auto_numeric() {
+    fn test_string_to_value_auto_numeric() -> Result<()> {
        // Test string to boolean with auto cast
-        let result = parse_string_to_value(
+        let result = call_parse_string_to_value!(
            "col",
            "true".to_string(),
-            &ConcreteDataType::boolean_datatype(),
-            None,
-            true,
-        )
-        .unwrap();
+            ConcreteDataType::boolean_datatype()
+        )?;
        assert_eq!(Value::Boolean(true), result);

        // Test invalid string to boolean with auto cast
-        let result = parse_string_to_value(
+        let result = call_parse_string_to_value!(
            "col",
            "not_a_boolean".to_string(),
-            &ConcreteDataType::boolean_datatype(),
-            None,
-            true,
+            ConcreteDataType::boolean_datatype()
        );
        assert!(result.is_err());

        // Test string to int8
-        let result = parse_string_to_value(
+        let result = call_parse_string_to_value!(
            "col",
            "42".to_string(),
-            &ConcreteDataType::int8_datatype(),
-            None,
-            true,
-        )
-        .unwrap();
+            ConcreteDataType::int8_datatype()
+        )?;
        assert_eq!(Value::Int8(42), result);

        // Test invalid string to int8 with auto cast
-        let result = parse_string_to_value(
+        let result = call_parse_string_to_value!(
            "col",
            "not_an_int8".to_string(),
-            &ConcreteDataType::int8_datatype(),
-            None,
-            true,
+            ConcreteDataType::int8_datatype()
        );
        assert!(result.is_err());

        // Test string to int16
-        let result = parse_string_to_value(
+        let result = call_parse_string_to_value!(
            "col",
            "1000".to_string(),
-            &ConcreteDataType::int16_datatype(),
-            None,
-            true,
-        )
-        .unwrap();
+            ConcreteDataType::int16_datatype()
+        )?;
        assert_eq!(Value::Int16(1000), result);

        // Test invalid string to int16 with auto cast
-        let result = parse_string_to_value(
+        let result = call_parse_string_to_value!(
            "col",
            "not_an_int16".to_string(),
-            &ConcreteDataType::int16_datatype(),
-            None,
-            true,
+            ConcreteDataType::int16_datatype()
        );
        assert!(result.is_err());

        // Test string to int32
-        let result = parse_string_to_value(
+        let result = call_parse_string_to_value!(
            "col",
            "100000".to_string(),
-            &ConcreteDataType::int32_datatype(),
-            None,
-            true,
-        )
-        .unwrap();
+            ConcreteDataType::int32_datatype()
+        )?;
        assert_eq!(Value::Int32(100000), result);

        // Test invalid string to int32 with auto cast
-        let result = parse_string_to_value(
+        let result = call_parse_string_to_value!(
            "col",
            "not_an_int32".to_string(),
-            &ConcreteDataType::int32_datatype(),
-            None,
-            true,
+            ConcreteDataType::int32_datatype()
        );
        assert!(result.is_err());

        // Test string to int64
-        let result = parse_string_to_value(
+        let result = call_parse_string_to_value!(
            "col",
            "1000000".to_string(),
-            &ConcreteDataType::int64_datatype(),
-            None,
-            true,
-        )
-        .unwrap();
+            ConcreteDataType::int64_datatype()
+        )?;
        assert_eq!(Value::Int64(1000000), result);

        // Test invalid string to int64 with auto cast
-        let result = parse_string_to_value(
+        let result = call_parse_string_to_value!(
            "col",
            "not_an_int64".to_string(),
-            &ConcreteDataType::int64_datatype(),
-            None,
-            true,
+            ConcreteDataType::int64_datatype()
        );
        assert!(result.is_err());

        // Test string to uint8
-        let result = parse_string_to_value(
+        let result = call_parse_string_to_value!(
            "col",
            "200".to_string(),
-            &ConcreteDataType::uint8_datatype(),
-            None,
-            true,
-        )
-        .unwrap();
+            ConcreteDataType::uint8_datatype()
+        )?;
        assert_eq!(Value::UInt8(200), result);

        // Test invalid string to uint8 with auto cast
-        let result = parse_string_to_value(
+        let result = call_parse_string_to_value!(
            "col",
            "not_a_uint8".to_string(),
-            &ConcreteDataType::uint8_datatype(),
-            None,
-            true,
+            ConcreteDataType::uint8_datatype()
        );
        assert!(result.is_err());

        // Test string to uint16
-        let result = parse_string_to_value(
+        let result = call_parse_string_to_value!(
            "col",
            "60000".to_string(),
-            &ConcreteDataType::uint16_datatype(),
-            None,
-            true,
-        )
-        .unwrap();
+            ConcreteDataType::uint16_datatype()
+        )?;
        assert_eq!(Value::UInt16(60000), result);

        // Test invalid string to uint16 with auto cast
-        let result = parse_string_to_value(
+        let result = call_parse_string_to_value!(
            "col",
            "not_a_uint16".to_string(),
-            &ConcreteDataType::uint16_datatype(),
-            None,
-            true,
+            ConcreteDataType::uint16_datatype()
        );
        assert!(result.is_err());

        // Test string to uint32
-        let result = parse_string_to_value(
+        let result = call_parse_string_to_value!(
            "col",
            "4000000000".to_string(),
-            &ConcreteDataType::uint32_datatype(),
-            None,
-            true,
-        )
-        .unwrap();
+            ConcreteDataType::uint32_datatype()
+        )?;
        assert_eq!(Value::UInt32(4000000000), result);

        // Test invalid string to uint32 with auto cast
-        let result = parse_string_to_value(
+        let result = call_parse_string_to_value!(
            "col",
            "not_a_uint32".to_string(),
-            &ConcreteDataType::uint32_datatype(),
-            None,
-            true,
+            ConcreteDataType::uint32_datatype()
        );
        assert!(result.is_err());

        // Test string to uint64
-        let result = parse_string_to_value(
+        let result = call_parse_string_to_value!(
            "col",
            "18446744073709551615".to_string(),
-            &ConcreteDataType::uint64_datatype(),
-            None,
-            true,
-        )
-        .unwrap();
+            ConcreteDataType::uint64_datatype()
+        )?;
        assert_eq!(Value::UInt64(18446744073709551615), result);

        // Test invalid string to uint64 with auto cast
-        let result = parse_string_to_value(
+        let result = call_parse_string_to_value!(
            "col",
            "not_a_uint64".to_string(),
-            &ConcreteDataType::uint64_datatype(),
-            None,
-            true,
+            ConcreteDataType::uint64_datatype()
        );
        assert!(result.is_err());

        // Test string to float32
-        let result = parse_string_to_value(
+        let result = call_parse_string_to_value!(
            "col",
            "3.5".to_string(),
-            &ConcreteDataType::float32_datatype(),
-            None,
-            true,
-        )
-        .unwrap();
+            ConcreteDataType::float32_datatype()
+        )?;
        assert_eq!(Value::Float32(OrderedF32::from(3.5)), result);

        // Test invalid string to float32 with auto cast
-        let result = parse_string_to_value(
+        let result = call_parse_string_to_value!(
            "col",
            "not_a_float32".to_string(),
-            &ConcreteDataType::float32_datatype(),
-            None,
-            true,
+            ConcreteDataType::float32_datatype()
        );
        assert!(result.is_err());

        // Test string to float64
-        let result = parse_string_to_value(
+        let result = call_parse_string_to_value!(
            "col",
            "3.5".to_string(),
-            &ConcreteDataType::float64_datatype(),
-            None,
-            true,
-        )
-        .unwrap();
+            ConcreteDataType::float64_datatype()
+        )?;
        assert_eq!(Value::Float64(OrderedF64::from(3.5)), result);

        // Test invalid string to float64 with auto cast
-        let result = parse_string_to_value(
+        let result = call_parse_string_to_value!(
            "col",
            "not_a_float64".to_string(),
-            &ConcreteDataType::float64_datatype(),
-            None,
-            true,
+            ConcreteDataType::float64_datatype()
        );
        assert!(result.is_err());
+        Ok(())
    }

-    #[test]
-    fn test_sql_value_to_value() {
-        let sql_val = SqlValue::Null;
-        assert_eq!(
-            Value::Null,
-            sql_value_to_value(
-                "a",
-                &ConcreteDataType::float64_datatype(),
-                &sql_val,
-                None,
+    macro_rules! call_sql_value_to_value {
+        ($column_name: expr, $data_type: expr, $sql_value: expr) => {
+            call_sql_value_to_value!($column_name, $data_type, $sql_value, None, None, false)
+        };
+        ($column_name: expr, $data_type: expr, $sql_value: expr, timezone = $timezone: expr) => {
+            call_sql_value_to_value!(
+                $column_name,
+                $data_type,
+                $sql_value,
+                Some($timezone),
                None,
                false
            )
-            .unwrap()
+        };
+        ($column_name: expr, $data_type: expr, $sql_value: expr, unary_op = $unary_op: expr) => {
+            call_sql_value_to_value!(
+                $column_name,
+                $data_type,
+                $sql_value,
+                None,
+                Some($unary_op),
+                false
+            )
+        };
+        ($column_name: expr, $data_type: expr, $sql_value: expr, auto_string_to_numeric) => {
+            call_sql_value_to_value!($column_name, $data_type, $sql_value, None, None, true)
+        };
+        ($column_name: expr, $data_type: expr, $sql_value: expr, $timezone: expr, $unary_op: expr, $auto_string_to_numeric: expr) => {{
+            let column_schema = ColumnSchema::new($column_name, $data_type, true);
+            sql_value_to_value(
+                &column_schema,
+                $sql_value,
+                $timezone,
+                $unary_op,
+                $auto_string_to_numeric,
+            )
+        }};
+    }
+
+    #[test]
+    fn test_sql_value_to_value() -> Result<()> {
+        let sql_val = SqlValue::Null;
+        assert_eq!(
+            Value::Null,
+            call_sql_value_to_value!("a", ConcreteDataType::float64_datatype(), &sql_val)?
        );

        let sql_val = SqlValue::Boolean(true);
        assert_eq!(
            Value::Boolean(true),
-            sql_value_to_value(
-                "a",
-                &ConcreteDataType::boolean_datatype(),
-                &sql_val,
-                None,
-                None,
-                false
-            )
-            .unwrap()
+            call_sql_value_to_value!("a", ConcreteDataType::boolean_datatype(), &sql_val)?
        );

        let sql_val = SqlValue::Number("3.0".to_string(), false);
        assert_eq!(
            Value::Float64(OrderedFloat(3.0)),
-            sql_value_to_value(
-                "a",
-                &ConcreteDataType::float64_datatype(),
-                &sql_val,
-                None,
-                None,
-                false
-            )
-            .unwrap()
+            call_sql_value_to_value!("a", ConcreteDataType::float64_datatype(), &sql_val)?
        );

        let sql_val = SqlValue::Number("3.0".to_string(), false);
-        let v = sql_value_to_value(
-            "a",
-            &ConcreteDataType::boolean_datatype(),
-            &sql_val,
-            None,
-            None,
-            false,
-        );
+        let v = call_sql_value_to_value!("a", ConcreteDataType::boolean_datatype(), &sql_val);
        assert!(v.is_err());
        assert!(format!("{v:?}").contains("Failed to parse number '3.0' to boolean column type"));

        let sql_val = SqlValue::Boolean(true);
-        let v = sql_value_to_value(
-            "a",
-            &ConcreteDataType::float64_datatype(),
-            &sql_val,
-            None,
-            None,
-            false,
-        );
+        let v = call_sql_value_to_value!("a", ConcreteDataType::float64_datatype(), &sql_val);
        assert!(v.is_err());
        assert!(
            format!("{v:?}").contains(
@@ -725,41 +681,18 @@ mod test {
        );

        let sql_val = SqlValue::HexStringLiteral("48656c6c6f20776f726c6421".to_string());
-        let v = sql_value_to_value(
-            "a",
-            &ConcreteDataType::binary_datatype(),
-            &sql_val,
-            None,
-            None,
-            false,
-        )
-        .unwrap();
+        let v = call_sql_value_to_value!("a", ConcreteDataType::binary_datatype(), &sql_val)?;
        assert_eq!(Value::Binary(Bytes::from(b"Hello world!".as_slice())), v);

        let sql_val = SqlValue::DoubleQuotedString("MorningMyFriends".to_string());
-        let v = sql_value_to_value(
-            "a",
-            &ConcreteDataType::binary_datatype(),
-            &sql_val,
-            None,
-            None,
-            false,
-        )
-        .unwrap();
+        let v = call_sql_value_to_value!("a", ConcreteDataType::binary_datatype(), &sql_val)?;
        assert_eq!(
            Value::Binary(Bytes::from(b"MorningMyFriends".as_slice())),
            v
        );

        let sql_val = SqlValue::HexStringLiteral("9AF".to_string());
-        let v = sql_value_to_value(
-            "a",
-            &ConcreteDataType::binary_datatype(),
-            &sql_val,
-            None,
-            None,
-            false,
-        );
+        let v = call_sql_value_to_value!("a", ConcreteDataType::binary_datatype(), &sql_val);
        assert!(v.is_err());
        assert!(
            format!("{v:?}").contains("odd number of digits"),
@@ -767,38 +700,16 @@ mod test {
        );

        let sql_val = SqlValue::HexStringLiteral("AG".to_string());
-        let v = sql_value_to_value(
-            "a",
-            &ConcreteDataType::binary_datatype(),
-            &sql_val,
-            None,
-            None,
-            false,
-        );
+        let v = call_sql_value_to_value!("a", ConcreteDataType::binary_datatype(), &sql_val);
        assert!(v.is_err());
        assert!(format!("{v:?}").contains("invalid character"), "v is {v:?}",);

        let sql_val = SqlValue::DoubleQuotedString("MorningMyFriends".to_string());
-        let v = sql_value_to_value(
-            "a",
-            &ConcreteDataType::json_datatype(),
-            &sql_val,
-            None,
-            None,
-            false,
-        );
+        let v = call_sql_value_to_value!("a", ConcreteDataType::json_datatype(), &sql_val);
        assert!(v.is_err());

        let sql_val = SqlValue::DoubleQuotedString(r#"{"a":"b"}"#.to_string());
-        let v = sql_value_to_value(
-            "a",
-            &ConcreteDataType::json_datatype(),
-            &sql_val,
-            None,
-            None,
-            false,
-        )
-        .unwrap();
+        let v = call_sql_value_to_value!("a", ConcreteDataType::json_datatype(), &sql_val)?;
        assert_eq!(
            Value::Binary(Bytes::from(
                jsonb::parse_value(r#"{"a":"b"}"#.as_bytes())
@@ -808,16 +719,15 @@ mod test {
            )),
            v
        );
+        Ok(())
    }

    #[test]
    fn test_parse_json_to_jsonb() {
-        match parse_string_to_value(
+        match call_parse_string_to_value!(
            "json_col",
            r#"{"a": "b"}"#.to_string(),
-            &ConcreteDataType::json_datatype(),
-            None,
-            false,
+            ConcreteDataType::json_datatype()
        ) {
            Ok(Value::Binary(b)) => {
                assert_eq!(
@@ -833,12 +743,10 @@ mod test {
        }

        assert!(
-            parse_string_to_value(
+            call_parse_string_to_value!(
                "json_col",
                r#"Nicola Kovac is the best rifler in the world"#.to_string(),
-                &ConcreteDataType::json_datatype(),
-                None,
-                false,
+                ConcreteDataType::json_datatype()
            )
            .is_err()
        )
@@ -878,13 +786,10 @@ mod test {

    #[test]
    fn test_parse_date_literal() {
-        let value = sql_value_to_value(
+        let value = call_sql_value_to_value!(
            "date",
-            &ConcreteDataType::date_datatype(),
-            &SqlValue::DoubleQuotedString("2022-02-22".to_string()),
-            None,
-            None,
-            false,
+            ConcreteDataType::date_datatype(),
+            &SqlValue::DoubleQuotedString("2022-02-22".to_string())
        )
        .unwrap();
        assert_eq!(ConcreteDataType::date_datatype(), value.data_type());
@@ -895,13 +800,11 @@ mod test {
        }

        // with timezone
-        let value = sql_value_to_value(
+        let value = call_sql_value_to_value!(
            "date",
-            &ConcreteDataType::date_datatype(),
+            ConcreteDataType::date_datatype(),
            &SqlValue::DoubleQuotedString("2022-02-22".to_string()),
-            Some(&Timezone::from_tz_string("+07:00").unwrap()),
-            None,
-            false,
+            timezone = &Timezone::from_tz_string("+07:00").unwrap()
        )
        .unwrap();
        assert_eq!(ConcreteDataType::date_datatype(), value.data_type());
@@ -913,16 +816,12 @@ mod test {
    }

    #[test]
-    fn test_parse_timestamp_literal() {
-        match parse_string_to_value(
+    fn test_parse_timestamp_literal() -> Result<()> {
+        match call_parse_string_to_value!(
            "timestamp_col",
            "2022-02-22T00:01:01+08:00".to_string(),
-            &ConcreteDataType::timestamp_millisecond_datatype(),
-            None,
-            false,
-        )
-        .unwrap()
-        {
+            ConcreteDataType::timestamp_millisecond_datatype()
+        )? {
            Value::Timestamp(ts) => {
                assert_eq!(1645459261000, ts.value());
                assert_eq!(TimeUnit::Millisecond, ts.unit());
@@ -932,15 +831,11 @@ mod test {
            }
        }

-        match parse_string_to_value(
+        match call_parse_string_to_value!(
            "timestamp_col",
            "2022-02-22T00:01:01+08:00".to_string(),
-            &ConcreteDataType::timestamp_datatype(TimeUnit::Second),
-            None,
-            false,
-        )
-        .unwrap()
-        {
+            ConcreteDataType::timestamp_datatype(TimeUnit::Second)
+        )? {
            Value::Timestamp(ts) => {
                assert_eq!(1645459261, ts.value());
                assert_eq!(TimeUnit::Second, ts.unit());
@@ -950,15 +845,11 @@ mod test {
            }
        }

-        match parse_string_to_value(
+        match call_parse_string_to_value!(
            "timestamp_col",
            "2022-02-22T00:01:01+08:00".to_string(),
-            &ConcreteDataType::timestamp_datatype(TimeUnit::Microsecond),
-            None,
-            false,
-        )
-        .unwrap()
-        {
+            ConcreteDataType::timestamp_datatype(TimeUnit::Microsecond)
+        )? {
            Value::Timestamp(ts) => {
                assert_eq!(1645459261000000, ts.value());
                assert_eq!(TimeUnit::Microsecond, ts.unit());
@@ -968,15 +859,11 @@ mod test {
            }
        }

-        match parse_string_to_value(
+        match call_parse_string_to_value!(
            "timestamp_col",
            "2022-02-22T00:01:01+08:00".to_string(),
-            &ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond),
-            None,
-            false,
-        )
-        .unwrap()
-        {
+            ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond)
+        )? {
            Value::Timestamp(ts) => {
                assert_eq!(1645459261000000000, ts.value());
                assert_eq!(TimeUnit::Nanosecond, ts.unit());
@@ -987,26 +874,21 @@ mod test {
        }

        assert!(
-            parse_string_to_value(
+            call_parse_string_to_value!(
                "timestamp_col",
                "2022-02-22T00:01:01+08".to_string(),
-                &ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond),
-                None,
-                false,
+                ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond)
            )
            .is_err()
        );

        // with timezone
-        match parse_string_to_value(
+        match call_parse_string_to_value!(
            "timestamp_col",
            "2022-02-22T00:01:01".to_string(),
-            &ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond),
-            Some(&Timezone::from_tz_string("Asia/Shanghai").unwrap()),
-            false,
-        )
-        .unwrap()
-        {
+            ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond),
+            timezone = &Timezone::from_tz_string("Asia/Shanghai").unwrap()
+        )? {
            Value::Timestamp(ts) => {
                assert_eq!(1645459261000000000, ts.value());
                assert_eq!("2022-02-21 16:01:01+0000", ts.to_iso8601_string());
@@ -1016,51 +898,42 @@ mod test {
                unreachable!()
            }
        }
+        Ok(())
    }

    #[test]
    fn test_parse_placeholder_value() {
        assert!(
-            sql_value_to_value(
+            call_sql_value_to_value!(
                "test",
-                &ConcreteDataType::string_datatype(),
+                ConcreteDataType::string_datatype(),
+                &SqlValue::Placeholder("default".into())
+            )
+            .is_err()
+        );
+        assert!(
+            call_sql_value_to_value!(
+                "test",
+                ConcreteDataType::string_datatype(),
                &SqlValue::Placeholder("default".into()),
-                None,
-                None,
-                false
+                unary_op = UnaryOperator::Minus
            )
            .is_err()
        );
        assert!(
-            sql_value_to_value(
+            call_sql_value_to_value!(
                "test",
-                &ConcreteDataType::string_datatype(),
-                &SqlValue::Placeholder("default".into()),
-                None,
-                Some(UnaryOperator::Minus),
-                false
-            )
-            .is_err()
-        );
-        assert!(
-            sql_value_to_value(
-                "test",
-                &ConcreteDataType::uint16_datatype(),
+                ConcreteDataType::uint16_datatype(),
                &SqlValue::Number("3".into(), false),
-                None,
-                Some(UnaryOperator::Minus),
-                false
+                unary_op = UnaryOperator::Minus
            )
            .is_err()
        );
        assert!(
-            sql_value_to_value(
+            call_sql_value_to_value!(
                "test",
-                &ConcreteDataType::uint16_datatype(),
-                &SqlValue::Number("3".into(), false),
-                None,
-                None,
-                false
+                ConcreteDataType::uint16_datatype(),
+                &SqlValue::Number("3".into(), false)
            )
            .is_ok()
        );
@@ -1070,77 +943,60 @@ mod test {
    fn test_auto_string_to_numeric() {
        // Test with auto_string_to_numeric=true
        let sql_val = SqlValue::SingleQuotedString("123".to_string());
-        let v = sql_value_to_value(
+        let v = call_sql_value_to_value!(
            "a",
-            &ConcreteDataType::int32_datatype(),
+            ConcreteDataType::int32_datatype(),
            &sql_val,
-            None,
-            None,
-            true,
+            auto_string_to_numeric
        )
        .unwrap();
        assert_eq!(Value::Int32(123), v);

        // Test with a float string
        let sql_val = SqlValue::SingleQuotedString("3.5".to_string());
-        let v = sql_value_to_value(
+        let v = call_sql_value_to_value!(
            "a",
-            &ConcreteDataType::float64_datatype(),
+            ConcreteDataType::float64_datatype(),
            &sql_val,
-            None,
-            None,
-            true,
+            auto_string_to_numeric
        )
        .unwrap();
        assert_eq!(Value::Float64(OrderedFloat(3.5)), v);

        // Test with auto_string_to_numeric=false
        let sql_val = SqlValue::SingleQuotedString("123".to_string());
-        let v = sql_value_to_value(
-            "a",
-            &ConcreteDataType::int32_datatype(),
-            &sql_val,
-            None,
-            None,
-            false,
-        );
+        let v = call_sql_value_to_value!("a", ConcreteDataType::int32_datatype(), &sql_val);
        assert!(v.is_err());

        // Test with an invalid numeric string but auto_string_to_numeric=true
        // Should return an error now with the new auto_cast_to_numeric behavior
        let sql_val = SqlValue::SingleQuotedString("not_a_number".to_string());
-        let v = sql_value_to_value(
+        let v = call_sql_value_to_value!(
            "a",
-            &ConcreteDataType::int32_datatype(),
+            ConcreteDataType::int32_datatype(),
            &sql_val,
-            None,
-            None,
-            true,
+            auto_string_to_numeric
        );
        assert!(v.is_err());

        // Test with boolean type
        let sql_val = SqlValue::SingleQuotedString("true".to_string());
-        let v = sql_value_to_value(
+        let v = call_sql_value_to_value!(
            "a",
-            &ConcreteDataType::boolean_datatype(),
+            ConcreteDataType::boolean_datatype(),
            &sql_val,
-            None,
-            None,
-            true,
+            auto_string_to_numeric
        )
        .unwrap();
        assert_eq!(Value::Boolean(true), v);

        // Non-numeric types should still be handled normally
        let sql_val = SqlValue::SingleQuotedString("hello".to_string());
-        let v = sql_value_to_value(
+        let v = call_sql_value_to_value!(
            "a",
-            &ConcreteDataType::string_datatype(),
+            ConcreteDataType::string_datatype(),
            &sql_val,
-            None,
-            None,
-            true,
+            auto_string_to_numeric
        );
        assert!(v.is_ok());
    }
--- a/src/common/sql/src/default_constraint.rs
+++ b/src/common/sql/src/default_constraint.rs
@@ -14,8 +14,8 @@

 use common_time::timezone::Timezone;
 use datatypes::prelude::ConcreteDataType;
-use datatypes::schema::ColumnDefaultConstraint;
 use datatypes::schema::constraint::{CURRENT_TIMESTAMP, CURRENT_TIMESTAMP_FN};
+use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema};
 use snafu::ensure;
 use sqlparser::ast::ValueWithSpan;
 pub use sqlparser::ast::{
@@ -47,9 +47,12 @@ pub fn parse_column_default_constraint(
        );

        let default_constraint = match &opt.option {
-            ColumnOption::Default(Expr::Value(v)) => ColumnDefaultConstraint::Value(
-                sql_value_to_value(column_name, data_type, &v.value, timezone, None, false)?,
-            ),
+            ColumnOption::Default(Expr::Value(v)) => {
+                let schema = ColumnSchema::new(column_name, data_type.clone(), true);
+                ColumnDefaultConstraint::Value(sql_value_to_value(
+                    &schema, &v.value, timezone, None, false,
+                )?)
+            }
            ColumnOption::Default(Expr::Function(func)) => {
                let mut func = format!("{func}").to_lowercase();
                // normalize CURRENT_TIMESTAMP to CURRENT_TIMESTAMP()
@@ -80,8 +83,7 @@ pub fn parse_column_default_constraint(

                if let Expr::Value(v) = &**expr {
                    let value = sql_value_to_value(
-                        column_name,
-                        data_type,
+                        &ColumnSchema::new(column_name, data_type.clone(), true),
                        &v.value,
                        timezone,
                        Some(*op),
--- a/src/common/telemetry/src/metric.rs
+++ b/src/common/telemetry/src/metric.rs
@@ -71,6 +71,7 @@ pub fn convert_metric_to_write_request(
                        timestamp,
                    }],
                    exemplars: vec![],
+                    histograms: vec![],
                }),
                MetricType::GAUGE => timeseries.push(TimeSeries {
                    labels: convert_label(m.get_label(), mf_name, None),
@@ -79,6 +80,7 @@ pub fn convert_metric_to_write_request(
                        timestamp,
                    }],
                    exemplars: vec![],
+                    histograms: vec![],
                }),
                MetricType::HISTOGRAM => {
                    let h = m.get_histogram();
@@ -97,6 +99,7 @@ pub fn convert_metric_to_write_request(
                                timestamp,
                            }],
                            exemplars: vec![],
+                            histograms: vec![],
                        });
                        if upper_bound.is_sign_positive() && upper_bound.is_infinite() {
                            inf_seen = true;
@@ -114,6 +117,7 @@ pub fn convert_metric_to_write_request(
                                timestamp,
                            }],
                            exemplars: vec![],
+                            histograms: vec![],
                        });
                    }
                    timeseries.push(TimeSeries {
@@ -127,6 +131,7 @@ pub fn convert_metric_to_write_request(
                            timestamp,
                        }],
                        exemplars: vec![],
+                        histograms: vec![],
                    });
                    timeseries.push(TimeSeries {
                        labels: convert_label(
@@ -139,6 +144,7 @@ pub fn convert_metric_to_write_request(
                            timestamp,
                        }],
                        exemplars: vec![],
+                        histograms: vec![],
                    });
                }
                MetricType::SUMMARY => {
@@ -155,6 +161,7 @@ pub fn convert_metric_to_write_request(
                                timestamp,
                            }],
                            exemplars: vec![],
+                            histograms: vec![],
                        });
                    }
                    timeseries.push(TimeSeries {
@@ -168,6 +175,7 @@ pub fn convert_metric_to_write_request(
                            timestamp,
                        }],
                        exemplars: vec![],
+                        histograms: vec![],
                    });
                    timeseries.push(TimeSeries {
                        labels: convert_label(
@@ -180,6 +188,7 @@ pub fn convert_metric_to_write_request(
                            timestamp,
                        }],
                        exemplars: vec![],
+                        histograms: vec![],
                    });
                }
                MetricType::UNTYPED => {
@@ -274,7 +283,7 @@ mod test {

        assert_eq!(
            format!("{:?}", write_quest.timeseries),
-            r#"[TimeSeries { labels: [Label { name: "__name__", value: "test_counter" }, Label { name: "a", value: "1" }, Label { name: "b", value: "2" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [] }]"#
+            r#"[TimeSeries { labels: [Label { name: "__name__", value: "test_counter" }, Label { name: "a", value: "1" }, Label { name: "b", value: "2" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [], histograms: [] }]"#
        );

        let gauge_opts = Opts::new("test_gauge", "test help")
@@ -288,7 +297,7 @@ mod test {
        let write_quest = convert_metric_to_write_request(mf, None, 0);
        assert_eq!(
            format!("{:?}", write_quest.timeseries),
-            r#"[TimeSeries { labels: [Label { name: "__name__", value: "test_gauge" }, Label { name: "a", value: "1" }, Label { name: "b", value: "2" }], samples: [Sample { value: 42.0, timestamp: 0 }], exemplars: [] }]"#
+            r#"[TimeSeries { labels: [Label { name: "__name__", value: "test_gauge" }, Label { name: "a", value: "1" }, Label { name: "b", value: "2" }], samples: [Sample { value: 42.0, timestamp: 0 }], exemplars: [], histograms: [] }]"#
        );
    }

@@ -305,20 +314,20 @@ mod test {
            .iter()
            .map(|x| format!("{:?}", x))
            .collect();
-        let ans = r#"TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.005" }], samples: [Sample { value: 0.0, timestamp: 0 }], exemplars: [] }
-TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.01" }], samples: [Sample { value: 0.0, timestamp: 0 }], exemplars: [] }
-TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.025" }], samples: [Sample { value: 0.0, timestamp: 0 }], exemplars: [] }
-TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.05" }], samples: [Sample { value: 0.0, timestamp: 0 }], exemplars: [] }
-TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.1" }], samples: [Sample { value: 0.0, timestamp: 0 }], exemplars: [] }
-TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.25" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [] }
-TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.5" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [] }
-TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "1" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [] }
-TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "2.5" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [] }
-TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "5" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [] }
-TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "10" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [] }
-TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "+Inf" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [] }
-TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_sum" }, Label { name: "a", value: "1" }], samples: [Sample { value: 0.25, timestamp: 0 }], exemplars: [] }
-TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_count" }, Label { name: "a", value: "1" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [] }"#;
+        let ans = r#"TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.005" }], samples: [Sample { value: 0.0, timestamp: 0 }], exemplars: [], histograms: [] }
+TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.01" }], samples: [Sample { value: 0.0, timestamp: 0 }], exemplars: [], histograms: [] }
+TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.025" }], samples: [Sample { value: 0.0, timestamp: 0 }], exemplars: [], histograms: [] }
+TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.05" }], samples: [Sample { value: 0.0, timestamp: 0 }], exemplars: [], histograms: [] }
+TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.1" }], samples: [Sample { value: 0.0, timestamp: 0 }], exemplars: [], histograms: [] }
+TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.25" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [], histograms: [] }
+TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "0.5" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [], histograms: [] }
+TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "1" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [], histograms: [] }
+TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "2.5" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [], histograms: [] }
+TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "5" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [], histograms: [] }
+TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "10" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [], histograms: [] }
+TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_bucket" }, Label { name: "a", value: "1" }, Label { name: "le", value: "+Inf" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [], histograms: [] }
+TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_sum" }, Label { name: "a", value: "1" }], samples: [Sample { value: 0.25, timestamp: 0 }], exemplars: [], histograms: [] }
+TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_count" }, Label { name: "a", value: "1" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [], histograms: [] }"#;
        assert_eq!(write_quest_str.join("\n"), ans);
    }

@@ -355,10 +364,10 @@ TimeSeries { labels: [Label { name: "__name__", value: "test_histogram_count" },
            .iter()
            .map(|x| format!("{:?}", x))
            .collect();
-        let ans = r#"TimeSeries { labels: [Label { name: "__name__", value: "test_summary" }, Label { name: "quantile", value: "50" }], samples: [Sample { value: 3.0, timestamp: 20 }], exemplars: [] }
-TimeSeries { labels: [Label { name: "__name__", value: "test_summary" }, Label { name: "quantile", value: "100" }], samples: [Sample { value: 5.0, timestamp: 20 }], exemplars: [] }
-TimeSeries { labels: [Label { name: "__name__", value: "test_summary_sum" }], samples: [Sample { value: 15.0, timestamp: 20 }], exemplars: [] }
-TimeSeries { labels: [Label { name: "__name__", value: "test_summary_count" }], samples: [Sample { value: 5.0, timestamp: 20 }], exemplars: [] }"#;
+        let ans = r#"TimeSeries { labels: [Label { name: "__name__", value: "test_summary" }, Label { name: "quantile", value: "50" }], samples: [Sample { value: 3.0, timestamp: 20 }], exemplars: [], histograms: [] }
+TimeSeries { labels: [Label { name: "__name__", value: "test_summary" }, Label { name: "quantile", value: "100" }], samples: [Sample { value: 5.0, timestamp: 20 }], exemplars: [], histograms: [] }
+TimeSeries { labels: [Label { name: "__name__", value: "test_summary_sum" }], samples: [Sample { value: 15.0, timestamp: 20 }], exemplars: [], histograms: [] }
+TimeSeries { labels: [Label { name: "__name__", value: "test_summary_count" }], samples: [Sample { value: 5.0, timestamp: 20 }], exemplars: [], histograms: [] }"#;
        assert_eq!(write_quest_str.join("\n"), ans);
    }

@@ -385,11 +394,11 @@ TimeSeries { labels: [Label { name: "__name__", value: "test_summary_count" }],
        let write_quest2 = convert_metric_to_write_request(mf, Some(&filter), 0);
        assert_eq!(
            format!("{:?}", write_quest1.timeseries),
-            r#"[TimeSeries { labels: [Label { name: "__name__", value: "filter_counter" }, Label { name: "a", value: "1" }, Label { name: "b", value: "2" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [] }, TimeSeries { labels: [Label { name: "__name__", value: "test_counter" }, Label { name: "a", value: "1" }, Label { name: "b", value: "2" }], samples: [Sample { value: 2.0, timestamp: 0 }], exemplars: [] }]"#
+            r#"[TimeSeries { labels: [Label { name: "__name__", value: "filter_counter" }, Label { name: "a", value: "1" }, Label { name: "b", value: "2" }], samples: [Sample { value: 1.0, timestamp: 0 }], exemplars: [], histograms: [] }, TimeSeries { labels: [Label { name: "__name__", value: "test_counter" }, Label { name: "a", value: "1" }, Label { name: "b", value: "2" }], samples: [Sample { value: 2.0, timestamp: 0 }], exemplars: [], histograms: [] }]"#
        );
        assert_eq!(
            format!("{:?}", write_quest2.timeseries),
-            r#"[TimeSeries { labels: [Label { name: "__name__", value: "test_counter" }, Label { name: "a", value: "1" }, Label { name: "b", value: "2" }], samples: [Sample { value: 2.0, timestamp: 0 }], exemplars: [] }]"#
+            r#"[TimeSeries { labels: [Label { name: "__name__", value: "test_counter" }, Label { name: "a", value: "1" }, Label { name: "b", value: "2" }], samples: [Sample { value: 2.0, timestamp: 0 }], exemplars: [], histograms: [] }]"#
        );
    }
 }
--- a/src/common/wal/src/config.rs
+++ b/src/common/wal/src/config.rs
@@ -206,6 +206,8 @@ mod tests {
                    client_cert_path: None,
                    client_key_path: None,
                }),
+                connect_timeout: Duration::from_secs(3),
+                timeout: Duration::from_secs(3),
            },
            kafka_topic: KafkaTopicConfig {
                num_topics: 32,
@@ -239,6 +241,8 @@ mod tests {
                    client_cert_path: None,
                    client_key_path: None,
                }),
+                connect_timeout: Duration::from_secs(3),
+                timeout: Duration::from_secs(3),
            },
            max_batch_bytes: ReadableSize::mb(1),
            consumer_wait_timeout: Duration::from_millis(100),
--- a/src/common/wal/src/config/kafka/common.rs
+++ b/src/common/wal/src/config/kafka/common.rs
@@ -36,9 +36,6 @@ pub const DEFAULT_BACKOFF_CONFIG: BackoffConfig = BackoffConfig {
    deadline: Some(Duration::from_secs(3)),
 };

-/// The default connect timeout for kafka client.
-pub const DEFAULT_CONNECT_TIMEOUT: Duration = Duration::from_secs(10);
-
 /// Default interval for auto WAL pruning.
 pub const DEFAULT_AUTO_PRUNE_INTERVAL: Duration = Duration::from_mins(30);
 /// Default limit for concurrent auto pruning tasks.
@@ -167,6 +164,12 @@ pub struct KafkaConnectionConfig {
    pub sasl: Option<KafkaClientSasl>,
    /// Client TLS config
    pub tls: Option<KafkaClientTls>,
+    /// The connect timeout for kafka client.
+    #[serde(with = "humantime_serde")]
+    pub connect_timeout: Duration,
+    /// The timeout for kafka client.
+    #[serde(with = "humantime_serde")]
+    pub timeout: Duration,
 }

 impl Default for KafkaConnectionConfig {
@@ -175,6 +178,8 @@ impl Default for KafkaConnectionConfig {
            broker_endpoints: vec![BROKER_ENDPOINT.to_string()],
            sasl: None,
            tls: None,
+            connect_timeout: Duration::from_secs(3),
+            timeout: Duration::from_secs(3),
        }
    }
 }
--- a/src/frontend/src/frontend.rs
+++ b/src/frontend/src/frontend.rs
@@ -157,7 +157,6 @@ mod tests {
    use common_error::from_header_to_err_code_msg;
    use common_error::status_code::StatusCode;
    use common_grpc::channel_manager::ChannelManager;
-    use common_meta::distributed_time_constants::FRONTEND_HEARTBEAT_INTERVAL_MILLIS;
    use common_meta::heartbeat::handler::HandlerGroupExecutor;
    use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
    use common_meta::heartbeat::handler::suspend::SuspendHandler;
@@ -400,6 +399,10 @@ mod tests {
                ..Default::default()
            },
            meta_client: Some(meta_client_options.clone()),
+            heartbeat: HeartbeatOptions {
+                interval: Duration::from_secs(1),
+                ..Default::default()
+            },
            ..Default::default()
        };

@@ -409,7 +412,8 @@ mod tests {
        let meta_client = create_meta_client(&meta_client_options, server.clone()).await;
        let frontend = create_frontend(&options, meta_client).await?;

-        tokio::time::sleep(Duration::from_millis(FRONTEND_HEARTBEAT_INTERVAL_MILLIS)).await;
+        let frontend_heartbeat_interval = options.heartbeat.interval;
+        tokio::time::sleep(frontend_heartbeat_interval).await;
        // initial state: not suspend:
        assert!(!frontend.instance.is_suspended());
        verify_suspend_state_by_http(&frontend, Ok(r#"[{"records":{"schema":{"column_schemas":[{"name":"Int64(1)","data_type":"Int64"}]},"rows":[[1]],"total_rows":1}}]"#)).await;
@@ -426,7 +430,7 @@ mod tests {

        // make heartbeat server returned "suspend" instruction,
        server.suspend.store(true, Ordering::Relaxed);
-        tokio::time::sleep(Duration::from_millis(FRONTEND_HEARTBEAT_INTERVAL_MILLIS)).await;
+        tokio::time::sleep(frontend_heartbeat_interval).await;
        // ... then the frontend is suspended:
        assert!(frontend.instance.is_suspended());
        verify_suspend_state_by_http(
@@ -442,7 +446,7 @@ mod tests {

        // make heartbeat server NOT returned "suspend" instruction,
        server.suspend.store(false, Ordering::Relaxed);
-        tokio::time::sleep(Duration::from_millis(FRONTEND_HEARTBEAT_INTERVAL_MILLIS)).await;
+        tokio::time::sleep(frontend_heartbeat_interval).await;
        // ... then frontend's suspend state is cleared:
        assert!(!frontend.instance.is_suspended());
        verify_suspend_state_by_http(&frontend, Ok(r#"[{"records":{"schema":{"column_schemas":[{"name":"Int64(1)","data_type":"Int64"}]},"rows":[[1]],"total_rows":1}}]"#)).await;
--- a/src/log-store/src/kafka/client_manager.rs
+++ b/src/log-store/src/kafka/client_manager.rs
@@ -16,7 +16,7 @@ use std::collections::HashMap;
 use std::sync::Arc;

 use common_wal::config::kafka::DatanodeKafkaConfig;
-use common_wal::config::kafka::common::{DEFAULT_BACKOFF_CONFIG, DEFAULT_CONNECT_TIMEOUT};
+use common_wal::config::kafka::common::DEFAULT_BACKOFF_CONFIG;
 use dashmap::DashMap;
 use rskafka::client::ClientBuilder;
 use rskafka::client::partition::{Compression, PartitionClient, UnknownTopicHandling};
@@ -79,7 +79,8 @@ impl ClientManager {
        // Sets backoff config for the top-level kafka client and all clients constructed by it.
        let mut builder = ClientBuilder::new(config.connection.broker_endpoints.clone())
            .backoff_config(DEFAULT_BACKOFF_CONFIG)
-            .connect_timeout(Some(DEFAULT_CONNECT_TIMEOUT));
+            .connect_timeout(Some(config.connection.connect_timeout))
+            .timeout(Some(config.connection.timeout));
        if let Some(sasl) = &config.connection.sasl {
            builder = builder.sasl_config(sasl.config.clone().into_sasl_config());
        };
--- a/src/meta-srv/src/bootstrap.rs
+++ b/src/meta-srv/src/bootstrap.rs
@@ -14,7 +14,6 @@

 use std::net::SocketAddr;
 use std::sync::Arc;
-use std::time::Duration;

 use api::v1::meta::cluster_server::ClusterServer;
 use api::v1::meta::heartbeat_server::HeartbeatServer;
@@ -60,11 +59,6 @@ use crate::service::admin::admin_axum_router;
 use crate::utils::etcd::create_etcd_client_with_tls;
 use crate::{Result, error};

-/// The default keep-alive interval for gRPC.
-const DEFAULT_GRPC_KEEP_ALIVE_INTERVAL: Duration = Duration::from_secs(10);
-/// The default keep-alive timeout for gRPC.
-const DEFAULT_GRPC_KEEP_ALIVE_TIMEOUT: Duration = Duration::from_secs(10);
-
 pub struct MetasrvInstance {
    metasrv: Arc<Metasrv>,

@@ -255,8 +249,8 @@ pub fn router(metasrv: Arc<Metasrv>) -> Router {
        // for admin services
        .accept_http1(true)
        // For quick network failures detection.
-        .http2_keepalive_interval(Some(DEFAULT_GRPC_KEEP_ALIVE_INTERVAL))
-        .http2_keepalive_timeout(Some(DEFAULT_GRPC_KEEP_ALIVE_TIMEOUT));
+        .http2_keepalive_interval(Some(metasrv.options().grpc.http2_keep_alive_interval))
+        .http2_keepalive_timeout(Some(metasrv.options().grpc.http2_keep_alive_timeout));
    let router = add_compressed_service!(router, HeartbeatServer::from_arc(metasrv.clone()));
    let router = add_compressed_service!(router, StoreServer::from_arc(metasrv.clone()));
    let router = add_compressed_service!(router, ClusterServer::from_arc(metasrv.clone()));
@@ -273,8 +267,12 @@ pub async fn metasrv_builder(
        (Some(kv_backend), _) => (kv_backend, None),
        (None, BackendImpl::MemoryStore) => (Arc::new(MemoryKvBackend::new()) as _, None),
        (None, BackendImpl::EtcdStore) => {
-            let etcd_client =
-                create_etcd_client_with_tls(&opts.store_addrs, opts.backend_tls.as_ref()).await?;
+            let etcd_client = create_etcd_client_with_tls(
+                &opts.store_addrs,
+                &opts.backend_client,
+                opts.backend_tls.as_ref(),
+            )
+            .await?;
            let kv_backend = EtcdStore::with_etcd_client(etcd_client.clone(), opts.max_txn_ops);
            let election = EtcdElection::with_etcd_client(
                &opts.grpc.server_addr,
--- a/src/meta-srv/src/discovery.rs
+++ b/src/meta-srv/src/discovery.rs
@@ -16,13 +16,9 @@ pub mod lease;
 pub mod node_info;
 pub mod utils;

-use std::time::Duration;
-
 use api::v1::meta::heartbeat_request::NodeWorkloads;
 use common_error::ext::BoxedError;
-use common_meta::distributed_time_constants::{
-    DATANODE_LEASE_SECS, FLOWNODE_LEASE_SECS, FRONTEND_HEARTBEAT_INTERVAL_MILLIS,
-};
+use common_meta::distributed_time_constants::default_distributed_time_constants;
 use common_meta::error::Result;
 use common_meta::peer::{Peer, PeerDiscovery, PeerResolver};
 use common_meta::{DatanodeId, FlownodeId};
@@ -38,7 +34,7 @@ impl PeerDiscovery for MetaPeerClient {
        utils::alive_frontends(
            &DefaultSystemTimer,
            self,
-            Duration::from_millis(FRONTEND_HEARTBEAT_INTERVAL_MILLIS),
+            default_distributed_time_constants().frontend_heartbeat_interval,
        )
        .await
        .map_err(BoxedError::new)
@@ -52,7 +48,7 @@ impl PeerDiscovery for MetaPeerClient {
        utils::alive_datanodes(
            &DefaultSystemTimer,
            self,
-            Duration::from_secs(DATANODE_LEASE_SECS),
+            default_distributed_time_constants().datanode_lease,
            filter,
        )
        .await
@@ -67,7 +63,7 @@ impl PeerDiscovery for MetaPeerClient {
        utils::alive_flownodes(
            &DefaultSystemTimer,
            self,
-            Duration::from_secs(FLOWNODE_LEASE_SECS),
+            default_distributed_time_constants().flownode_lease,
            filter,
        )
        .await
--- a/src/meta-srv/src/discovery/lease.rs
+++ b/src/meta-srv/src/discovery/lease.rs
@@ -102,7 +102,7 @@ mod tests {
    use api::v1::meta::heartbeat_request::NodeWorkloads;
    use api::v1::meta::{DatanodeWorkloads, FlownodeWorkloads};
    use common_meta::cluster::{FrontendStatus, NodeInfo, NodeInfoKey, NodeStatus, Role};
-    use common_meta::distributed_time_constants::FRONTEND_HEARTBEAT_INTERVAL_MILLIS;
+    use common_meta::distributed_time_constants::default_distributed_time_constants;
    use common_meta::kv_backend::ResettableKvBackendRef;
    use common_meta::peer::{Peer, PeerDiscovery};
    use common_meta::rpc::store::PutRequest;
@@ -473,8 +473,10 @@ mod tests {
        let client = create_meta_peer_client();
        let in_memory = client.memory_backend();

+        let frontend_heartbeat_interval =
+            default_distributed_time_constants().frontend_heartbeat_interval;
        let last_activity_ts =
-            current_time_millis() - FRONTEND_HEARTBEAT_INTERVAL_MILLIS as i64 - 1000;
+            current_time_millis() - frontend_heartbeat_interval.as_millis() as i64 - 1000;
        let active_frontend_node = NodeInfo {
            peer: Peer {
                id: 0,
--- a/src/meta-srv/src/failure_detector.rs
+++ b/src/meta-srv/src/failure_detector.rs
@@ -15,7 +15,6 @@
 use std::collections::VecDeque;
 use std::time::Duration;

-use common_meta::distributed_time_constants;
 use serde::{Deserialize, Serialize};

 const FIRST_HEARTBEAT_ESTIMATE_MILLIS: i64 = 1000;
@@ -79,9 +78,7 @@ impl Default for PhiAccrualFailureDetectorOptions {
        Self {
            threshold: 8_f32,
            min_std_deviation: Duration::from_millis(100),
-            acceptable_heartbeat_pause: Duration::from_secs(
-                distributed_time_constants::DATANODE_LEASE_SECS,
-            ),
+            acceptable_heartbeat_pause: Duration::from_secs(10),
        }
    }
 }
--- a/src/meta-srv/src/handler/region_lease_handler.rs
+++ b/src/meta-srv/src/handler/region_lease_handler.rs
@@ -134,7 +134,7 @@ mod test {
    use std::sync::Arc;

    use common_meta::datanode::{RegionManifestInfo, RegionStat, Stat};
-    use common_meta::distributed_time_constants;
+    use common_meta::distributed_time_constants::default_distributed_time_constants;
    use common_meta::key::TableMetadataManager;
    use common_meta::key::table_route::TableRouteValue;
    use common_meta::key::test_utils::new_test_table_info;
@@ -236,7 +236,7 @@ mod test {
        let opening_region_keeper = Arc::new(MemoryRegionKeeper::default());

        let handler = RegionLeaseHandler::new(
-            distributed_time_constants::REGION_LEASE_SECS,
+            default_distributed_time_constants().region_lease.as_secs(),
            table_metadata_manager.clone(),
            opening_region_keeper.clone(),
            None,
@@ -266,7 +266,7 @@ mod test {

        assert_eq!(
            acc.region_lease.as_ref().unwrap().lease_seconds,
-            distributed_time_constants::REGION_LEASE_SECS
+            default_distributed_time_constants().region_lease.as_secs()
        );

        assert_region_lease(
@@ -300,7 +300,7 @@ mod test {

        assert_eq!(
            acc.region_lease.as_ref().unwrap().lease_seconds,
-            distributed_time_constants::REGION_LEASE_SECS
+            default_distributed_time_constants().region_lease.as_secs()
        );

        assert_region_lease(
@@ -379,7 +379,7 @@ mod test {
        });

        let handler = RegionLeaseHandler::new(
-            distributed_time_constants::REGION_LEASE_SECS,
+            default_distributed_time_constants().region_lease.as_secs(),
            table_metadata_manager.clone(),
            Default::default(),
            None,
@@ -461,7 +461,7 @@ mod test {
            ..Default::default()
        });
        let handler = RegionLeaseHandler::new(
-            distributed_time_constants::REGION_LEASE_SECS,
+            default_distributed_time_constants().region_lease.as_secs(),
            table_metadata_manager.clone(),
            Default::default(),
            None,
--- a/src/meta-srv/src/metasrv.rs
+++ b/src/meta-srv/src/metasrv.rs
@@ -27,7 +27,7 @@ use common_event_recorder::EventRecorderOptions;
 use common_greptimedb_telemetry::GreptimeDBTelemetryTask;
 use common_meta::cache_invalidator::CacheInvalidatorRef;
 use common_meta::ddl_manager::DdlManagerRef;
-use common_meta::distributed_time_constants;
+use common_meta::distributed_time_constants::{self, default_distributed_time_constants};
 use common_meta::key::TableMetadataManagerRef;
 use common_meta::key::runtime_switch::RuntimeSwitchManagerRef;
 use common_meta::kv_backend::{KvBackendRef, ResettableKvBackend, ResettableKvBackendRef};
@@ -121,6 +121,27 @@ impl Default for StatsPersistenceOptions {
    }
 }

+#[derive(Clone, PartialEq, Serialize, Deserialize, Debug)]
+#[serde(default)]
+pub struct BackendClientOptions {
+    #[serde(with = "humantime_serde")]
+    pub keep_alive_timeout: Duration,
+    #[serde(with = "humantime_serde")]
+    pub keep_alive_interval: Duration,
+    #[serde(with = "humantime_serde")]
+    pub connect_timeout: Duration,
+}
+
+impl Default for BackendClientOptions {
+    fn default() -> Self {
+        Self {
+            keep_alive_interval: Duration::from_secs(10),
+            keep_alive_timeout: Duration::from_secs(3),
+            connect_timeout: Duration::from_secs(3),
+        }
+    }
+}
+
 #[derive(Clone, PartialEq, Serialize, Deserialize)]
 #[serde(default)]
 pub struct MetasrvOptions {
@@ -136,12 +157,22 @@ pub struct MetasrvOptions {
    /// Only applicable when using PostgreSQL or MySQL as the metadata store
    #[serde(default)]
    pub backend_tls: Option<TlsOption>,
+    /// The backend client options.
+    /// Currently, only applicable when using etcd as the metadata store.
+    #[serde(default)]
+    pub backend_client: BackendClientOptions,
    /// The type of selector.
    pub selector: SelectorType,
    /// Whether to use the memory store.
    pub use_memory_store: bool,
    /// Whether to enable region failover.
    pub enable_region_failover: bool,
+    /// The base heartbeat interval.
+    ///
+    /// This value is used to calculate the distributed time constants for components.
+    /// e.g., the region lease time is `heartbeat_interval * 3 + Duration::from_secs(1)`.
+    #[serde(with = "humantime_serde")]
+    pub heartbeat_interval: Duration,
    /// The delay before starting region failure detection.
    /// This delay helps prevent Metasrv from triggering unnecessary region failovers before all Datanodes are fully started.
    /// Especially useful when the cluster is not deployed with GreptimeDB Operator and maintenance mode is not enabled.
@@ -240,7 +271,9 @@ impl fmt::Debug for MetasrvOptions {
            .field("tracing", &self.tracing)
            .field("backend", &self.backend)
            .field("event_recorder", &self.event_recorder)
-            .field("stats_persistence", &self.stats_persistence);
+            .field("stats_persistence", &self.stats_persistence)
+            .field("heartbeat_interval", &self.heartbeat_interval)
+            .field("backend_client", &self.backend_client);

        #[cfg(any(feature = "pg_kvbackend", feature = "mysql_kvbackend"))]
        debug_struct.field("meta_table_name", &self.meta_table_name);
@@ -270,6 +303,7 @@ impl Default for MetasrvOptions {
            selector: SelectorType::default(),
            use_memory_store: false,
            enable_region_failover: false,
+            heartbeat_interval: distributed_time_constants::BASE_HEARTBEAT_INTERVAL,
            region_failure_detector_initialization_delay: Duration::from_secs(10 * 60),
            allow_region_failover_on_local_wal: false,
            grpc: GrpcOptions {
@@ -307,6 +341,7 @@ impl Default for MetasrvOptions {
            event_recorder: EventRecorderOptions::default(),
            stats_persistence: StatsPersistenceOptions::default(),
            gc: GcSchedulerOptions::default(),
+            backend_client: BackendClientOptions::default(),
        }
    }
 }
@@ -747,7 +782,7 @@ impl Metasrv {
            &DefaultSystemTimer,
            self.meta_peer_client.as_ref(),
            peer_id,
-            Duration::from_secs(distributed_time_constants::DATANODE_LEASE_SECS),
+            default_distributed_time_constants().datanode_lease,
        )
        .await
    }
--- a/src/meta-srv/src/metasrv/builder.rs
+++ b/src/meta-srv/src/metasrv/builder.rs
@@ -29,7 +29,7 @@ use common_meta::ddl::{
    DdlContext, NoopRegionFailureDetectorControl, RegionFailureDetectorControllerRef,
 };
 use common_meta::ddl_manager::{DdlManager, DdlManagerConfiguratorRef};
-use common_meta::distributed_time_constants::{self};
+use common_meta::distributed_time_constants::default_distributed_time_constants;
 use common_meta::key::TableMetadataManager;
 use common_meta::key::flow::FlowMetadataManager;
 use common_meta::key::flow::flow_state::FlowStateManager;
@@ -513,7 +513,7 @@ impl MetasrvBuilder {
            Some(handler_group_builder) => handler_group_builder,
            None => {
                let region_lease_handler = RegionLeaseHandler::new(
-                    distributed_time_constants::REGION_LEASE_SECS,
+                    default_distributed_time_constants().region_lease.as_secs(),
                    table_metadata_manager.clone(),
                    memory_region_keeper.clone(),
                    customized_region_lease_renewer,
--- a/src/meta-srv/src/procedure/region_migration.rs
+++ b/src/meta-srv/src/procedure/region_migration.rs
@@ -921,7 +921,7 @@ mod tests {
    use std::assert_matches::assert_matches;
    use std::sync::Arc;

-    use common_meta::distributed_time_constants::REGION_LEASE_SECS;
+    use common_meta::distributed_time_constants::default_distributed_time_constants;
    use common_meta::instruction::Instruction;
    use common_meta::key::test_utils::new_test_table_info;
    use common_meta::rpc::router::{Region, RegionRoute};
@@ -1192,8 +1192,10 @@ mod tests {
            .run_once()
            .await;

+        let region_lease = default_distributed_time_constants().region_lease.as_secs();
+
        // Ensure it didn't run into the slow path.
-        assert!(timer.elapsed().as_secs() < REGION_LEASE_SECS / 2);
+        assert!(timer.elapsed().as_secs() < region_lease / 2);

        runner.suite.verify_table_metadata().await;
    }
@@ -1539,8 +1541,9 @@ mod tests {
            .run_once()
            .await;

+        let region_lease = default_distributed_time_constants().region_lease.as_secs();
        // Ensure it didn't run into the slow path.
-        assert!(timer.elapsed().as_secs() < REGION_LEASE_SECS);
+        assert!(timer.elapsed().as_secs() < region_lease);
        runner.suite.verify_table_metadata().await;
    }
 }
--- a/src/meta-srv/src/procedure/region_migration/close_downgraded_region.rs
+++ b/src/meta-srv/src/procedure/region_migration/close_downgraded_region.rs
@@ -13,11 +13,10 @@
 // limitations under the License.

 use std::any::Any;
-use std::time::Duration;

 use api::v1::meta::MailboxMessage;
 use common_meta::RegionIdent;
-use common_meta::distributed_time_constants::REGION_LEASE_SECS;
+use common_meta::distributed_time_constants::default_distributed_time_constants;
 use common_meta::instruction::{Instruction, InstructionReply, SimpleReply};
 use common_procedure::{Context as ProcedureContext, Status};
 use common_telemetry::{info, warn};
@@ -30,9 +29,6 @@ use crate::procedure::region_migration::migration_end::RegionMigrationEnd;
 use crate::procedure::region_migration::{Context, State};
 use crate::service::mailbox::Channel;

-/// Uses lease time of a region as the timeout of closing a downgraded region.
-const CLOSE_DOWNGRADED_REGION_TIMEOUT: Duration = Duration::from_secs(REGION_LEASE_SECS);
-
 #[derive(Debug, Serialize, Deserialize)]
 pub struct CloseDowngradedRegion;

@@ -112,7 +108,7 @@ impl CloseDowngradedRegion {
        let ch = Channel::Datanode(downgrade_leader_datanode.id);
        let receiver = ctx
            .mailbox
-            .send(&ch, msg, CLOSE_DOWNGRADED_REGION_TIMEOUT)
+            .send(&ch, msg, default_distributed_time_constants().region_lease)
            .await?;

        match receiver.await {
--- a/src/meta-srv/src/procedure/region_migration/downgrade_leader_region.rs
+++ b/src/meta-srv/src/procedure/region_migration/downgrade_leader_region.rs
@@ -17,7 +17,7 @@ use std::time::Duration;

 use api::v1::meta::MailboxMessage;
 use common_error::ext::BoxedError;
-use common_meta::distributed_time_constants::REGION_LEASE_SECS;
+use common_meta::distributed_time_constants::default_distributed_time_constants;
 use common_meta::instruction::{
    DowngradeRegion, DowngradeRegionReply, DowngradeRegionsReply, Instruction, InstructionReply,
 };
@@ -64,7 +64,7 @@ impl State for DowngradeLeaderRegion {
        let now = Instant::now();
        // Ensures the `leader_region_lease_deadline` must exist after recovering.
        ctx.volatile_ctx
-            .set_leader_region_lease_deadline(Duration::from_secs(REGION_LEASE_SECS));
+            .set_leader_region_lease_deadline(default_distributed_time_constants().region_lease);

        match self.downgrade_region_with_retry(ctx).await {
            Ok(_) => {
@@ -277,14 +277,14 @@ impl DowngradeLeaderRegion {
        if let Some(last_connection_at) = last_connection_at {
            let now = current_time_millis();
            let elapsed = now - last_connection_at;
-            let region_lease = Duration::from_secs(REGION_LEASE_SECS);
+            let region_lease = default_distributed_time_constants().region_lease;

            // It's safe to update the region leader lease deadline here because:
            // 1. The old region leader has already been marked as downgraded in metadata,
            //    which means any attempts to renew its lease will be rejected.
            // 2. The pusher disconnect time record only gets removed when the datanode (from_peer)
            //    establishes a new heartbeat connection stream.
-            if elapsed >= (REGION_LEASE_SECS * 1000) as i64 {
+            if elapsed >= (region_lease.as_secs() * 1000) as i64 {
                ctx.volatile_ctx.reset_leader_region_lease_deadline();
                info!(
                    "Datanode {}({}) has been disconnected for longer than the region lease period ({:?}), reset leader region lease deadline to None, region: {:?}",
@@ -697,7 +697,8 @@ mod tests {
        let procedure_ctx = new_procedure_context();
        let (next, _) = state.next(&mut ctx, &procedure_ctx).await.unwrap();
        let elapsed = timer.elapsed().as_secs();
-        assert!(elapsed < REGION_LEASE_SECS / 2);
+        let region_lease = default_distributed_time_constants().region_lease.as_secs();
+        assert!(elapsed < region_lease / 2);
        assert_eq!(
            ctx.volatile_ctx
                .leader_region_last_entry_ids
--- a/src/meta-srv/src/procedure/region_migration/open_candidate_region.rs
+++ b/src/meta-srv/src/procedure/region_migration/open_candidate_region.rs
@@ -14,11 +14,10 @@

 use std::any::Any;
 use std::ops::Div;
-use std::time::Duration;

 use api::v1::meta::MailboxMessage;
 use common_meta::RegionIdent;
-use common_meta::distributed_time_constants::REGION_LEASE_SECS;
+use common_meta::distributed_time_constants::default_distributed_time_constants;
 use common_meta::instruction::{Instruction, InstructionReply, OpenRegion, SimpleReply};
 use common_meta::key::datanode_table::RegionInfo;
 use common_procedure::{Context as ProcedureContext, Status};
@@ -33,9 +32,6 @@ use crate::procedure::region_migration::flush_leader_region::PreFlushRegion;
 use crate::procedure::region_migration::{Context, State};
 use crate::service::mailbox::Channel;

-/// Uses lease time of a region as the timeout of opening a candidate region.
-const OPEN_CANDIDATE_REGION_TIMEOUT: Duration = Duration::from_secs(REGION_LEASE_SECS);
-
 #[derive(Debug, Serialize, Deserialize)]
 pub struct OpenCandidateRegion;

@@ -157,7 +153,9 @@ impl OpenCandidateRegion {
                .context(error::ExceededDeadlineSnafu {
                    operation: "Open candidate region",
                })?;
-        let operation_timeout = operation_timeout.div(2).max(OPEN_CANDIDATE_REGION_TIMEOUT);
+        let operation_timeout = operation_timeout
+            .div(2)
+            .max(default_distributed_time_constants().region_lease);
        let ch = Channel::Datanode(candidate.id);
        let now = Instant::now();
        let receiver = ctx.mailbox.send(&ch, msg, operation_timeout).await?;
--- a/src/meta-srv/src/service/heartbeat.rs
+++ b/src/meta-srv/src/service/heartbeat.rs
@@ -99,6 +99,7 @@ impl heartbeat_server::Heartbeat for Metasrv {
                            error!("Client disconnected: broken pipe");
                            break;
                        }
+                        error!(err; "Sending heartbeat response error");

                        if tx.send(Err(err)).await.is_err() {
                            info!("ReceiverStream was dropped; shutting down");
--- a/src/meta-srv/src/utils/etcd.rs
+++ b/src/meta-srv/src/utils/etcd.rs
@@ -12,17 +12,18 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use common_meta::distributed_time_constants::default_etcd_client_options;
 use common_meta::kv_backend::etcd::create_etcd_tls_options;
-use etcd_client::Client;
+use etcd_client::{Client, ConnectOptions};
 use servers::tls::{TlsMode, TlsOption};
 use snafu::ResultExt;

 use crate::error::{self, BuildTlsOptionsSnafu, Result};
+use crate::metasrv::BackendClientOptions;

 /// Creates an etcd client with TLS configuration.
 pub async fn create_etcd_client_with_tls(
    store_addrs: &[String],
+    client_options: &BackendClientOptions,
    tls_config: Option<&TlsOption>,
 ) -> Result<Client> {
    let etcd_endpoints = store_addrs
@@ -31,7 +32,12 @@ pub async fn create_etcd_client_with_tls(
        .filter(|x| !x.is_empty())
        .collect::<Vec<_>>();

-    let mut connect_options = default_etcd_client_options();
+    let mut connect_options = ConnectOptions::new()
+        .with_keep_alive_while_idle(true)
+        .with_keep_alive(
+            client_options.keep_alive_interval,
+            client_options.keep_alive_timeout,
+        );
    if let Some(tls_config) = tls_config
        && let Some(tls_options) = create_etcd_tls_options(&convert_tls_option(tls_config))
            .context(BuildTlsOptionsSnafu)?
--- a/src/mito-codec/src/index.rs
+++ b/src/mito-codec/src/index.rs
@@ -48,7 +48,7 @@ impl IndexValueCodec {
    ) -> Result<()> {
        ensure!(!value.is_null(), IndexEncodeNullSnafu);

-        if field.data_type().is_string() {
+        if field.encode_data_type().is_string() {
            let value = value
                .try_into_string()
                .context(FieldTypeMismatchSnafu)?
--- a/src/mito-codec/src/row_converter/dense.rs
+++ b/src/mito-codec/src/row_converter/dense.rs
@@ -57,15 +57,20 @@ impl SortField {
        &self.data_type
    }

-    pub fn estimated_size(&self) -> usize {
+    /// Returns the physical data type to encode of the field.
+    ///
+    /// For example, a dictionary field will be encoded as its value type.
+    pub fn encode_data_type(&self) -> &ConcreteDataType {
        match &self.data_type {
-            ConcreteDataType::Dictionary(dict_type) => {
-                Self::estimated_size_by_type(dict_type.value_type())
-            }
-            data_type => Self::estimated_size_by_type(data_type),
+            ConcreteDataType::Dictionary(dict_type) => dict_type.value_type(),
+            _ => &self.data_type,
        }
    }

+    pub fn estimated_size(&self) -> usize {
+        Self::estimated_size_by_type(self.encode_data_type())
+    }
+
    fn estimated_size_by_type(data_type: &ConcreteDataType) -> usize {
        match data_type {
            ConcreteDataType::Boolean(_) => 2,
@@ -98,12 +103,7 @@ impl SortField {
        serializer: &mut Serializer<&mut Vec<u8>>,
        value: &ValueRef,
    ) -> Result<()> {
-        match self.data_type() {
-            ConcreteDataType::Dictionary(dict_type) => {
-                Self::serialize_by_type(dict_type.value_type(), serializer, value)
-            }
-            data_type => Self::serialize_by_type(data_type, serializer, value),
-        }
+        Self::serialize_by_type(self.encode_data_type(), serializer, value)
    }

    fn serialize_by_type(
@@ -194,12 +194,7 @@ impl SortField {

    /// Deserialize a value from the deserializer.
    pub fn deserialize<B: Buf>(&self, deserializer: &mut Deserializer<B>) -> Result<Value> {
-        match &self.data_type {
-            ConcreteDataType::Dictionary(dict_type) => {
-                Self::deserialize_by_type(dict_type.value_type(), deserializer)
-            }
-            data_type => Self::deserialize_by_type(data_type, deserializer),
-        }
+        Self::deserialize_by_type(self.encode_data_type(), deserializer)
    }

    fn deserialize_by_type<B: Buf>(
@@ -301,12 +296,7 @@ impl SortField {
            return Ok(1);
        }

-        match &self.data_type {
-            ConcreteDataType::Dictionary(dict_type) => {
-                Self::skip_deserialize_by_type(dict_type.value_type(), bytes, deserializer)
-            }
-            data_type => Self::skip_deserialize_by_type(data_type, bytes, deserializer),
-        }
+        Self::skip_deserialize_by_type(self.encode_data_type(), bytes, deserializer)
    }

    fn skip_deserialize_by_type(
--- a/src/mito2/src/compaction/task.rs
+++ b/src/mito2/src/compaction/task.rs
@@ -25,7 +25,7 @@ use tokio::sync::mpsc;
 use crate::compaction::compactor::{CompactionRegion, Compactor};
 use crate::compaction::memory_manager::{CompactionMemoryGuard, CompactionMemoryManager};
 use crate::compaction::picker::{CompactionTask, PickerOutput};
-use crate::error::{CompactRegionSnafu, CompactionMemoryExhaustedSnafu, MemoryAcquireFailedSnafu};
+use crate::error::{CompactRegionSnafu, CompactionMemoryExhaustedSnafu};
 use crate::manifest::action::{RegionEdit, RegionMetaAction, RegionMetaActionList};
 use crate::metrics::{COMPACTION_FAILURE_COUNT, COMPACTION_MEMORY_WAIT, COMPACTION_STAGE_ELAPSED};
 use crate::region::RegionRoleState;
@@ -95,80 +95,16 @@ impl CompactionTaskImpl {
    async fn acquire_memory_with_policy(&self) -> error::Result<CompactionMemoryGuard> {
        let region_id = self.compaction_region.region_id;
        let requested_bytes = self.estimated_memory_bytes;
-        let limit_bytes = self.memory_manager.limit_bytes();
+        let policy = self.memory_policy;

-        if limit_bytes > 0 && requested_bytes > limit_bytes {
-            warn!(
-                "Compaction for region {} requires {} bytes but limit is {} bytes; cannot satisfy request",
-                region_id, requested_bytes, limit_bytes
-            );
-            return Err(CompactionMemoryExhaustedSnafu {
+        let _timer = COMPACTION_MEMORY_WAIT.start_timer();
+        self.memory_manager
+            .acquire_with_policy(requested_bytes, policy)
+            .await
+            .context(CompactionMemoryExhaustedSnafu {
                region_id,
-                required_bytes: requested_bytes,
-                limit_bytes,
-                policy: "exceed_limit".to_string(),
-            }
-            .build());
-        }
-
-        match self.memory_policy {
-            OnExhaustedPolicy::Wait {
-                timeout: wait_timeout,
-            } => {
-                let timer = COMPACTION_MEMORY_WAIT.start_timer();
-
-                match tokio::time::timeout(
-                    wait_timeout,
-                    self.memory_manager.acquire(requested_bytes),
-                )
-                .await
-                {
-                    Ok(Ok(guard)) => {
-                        timer.observe_duration();
-                        Ok(guard)
-                    }
-                    Ok(Err(e)) => {
-                        timer.observe_duration();
-                        Err(e).with_context(|_| MemoryAcquireFailedSnafu {
-                            region_id,
-                            policy: format!("wait_timeout({}ms)", wait_timeout.as_millis()),
-                        })
-                    }
-                    Err(_) => {
-                        timer.observe_duration();
-                        warn!(
-                            "Compaction for region {} waited {:?} for {} bytes but timed out",
-                            region_id, wait_timeout, requested_bytes
-                        );
-                        CompactionMemoryExhaustedSnafu {
-                            region_id,
-                            required_bytes: requested_bytes,
-                            limit_bytes,
-                            policy: format!("wait_timeout({}ms)", wait_timeout.as_millis()),
-                        }
-                        .fail()
-                    }
-                }
-            }
-            OnExhaustedPolicy::Fail => {
-                // Try to acquire, fail immediately if not available
-                self.memory_manager
-                    .try_acquire(requested_bytes)
-                    .ok_or_else(|| {
-                        warn!(
-                            "Compaction memory exhausted for region {} (policy=fail, need {} bytes, limit {} bytes)",
-                            region_id, requested_bytes, limit_bytes
-                        );
-                        CompactionMemoryExhaustedSnafu {
-                            region_id,
-                            required_bytes: requested_bytes,
-                            limit_bytes,
-                            policy: "fail".to_string(),
-                        }
-                        .build()
-                    })
-            }
-        }
+                policy: format!("{policy:?}"),
+            })
    }

    /// Remove expired ssts files, update manifest immediately
--- a/src/mito2/src/engine/basic_test.rs
+++ b/src/mito2/src/engine/basic_test.rs
@@ -872,9 +872,9 @@ StorageSstEntry { file_path: "test/11_0000000002/index/<file_id>.puffin", file_s
 StorageSstEntry { file_path: "test/22_0000000042/<file_id>.parquet", file_size: None, last_modified_ms: None, node_id: None }
 StorageSstEntry { file_path: "test/22_0000000042/index/<file_id>.puffin", file_size: None, last_modified_ms: None, node_id: None }"#).await;
    test_list_ssts_with_format(true, r#"
-ManifestSstEntry { table_dir: "test/", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test/11_0000000001/<file_id>.parquet", file_size: 2837, index_file_path: Some("test/11_0000000001/index/<file_id>.puffin"), index_file_size: Some(292), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640257(11, 1), node_id: None, visible: true }
-ManifestSstEntry { table_dir: "test/", region_id: 47244640258(11, 2), table_id: 11, region_number: 2, region_group: 0, region_sequence: 2, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test/11_0000000002/<file_id>.parquet", file_size: 2837, index_file_path: Some("test/11_0000000002/index/<file_id>.puffin"), index_file_size: Some(292), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640258(11, 2), node_id: None, visible: true }
-ManifestSstEntry { table_dir: "test/", region_id: 94489280554(22, 42), table_id: 22, region_number: 42, region_group: 0, region_sequence: 42, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test/22_0000000042/<file_id>.parquet", file_size: 2837, index_file_path: Some("test/22_0000000042/index/<file_id>.puffin"), index_file_size: Some(292), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 94489280554(22, 42), node_id: None, visible: true }"#, 
+ManifestSstEntry { table_dir: "test/", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test/11_0000000001/<file_id>.parquet", file_size: 2837, index_file_path: Some("test/11_0000000001/index/<file_id>.puffin"), index_file_size: Some(250), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640257(11, 1), node_id: None, visible: true }
+ManifestSstEntry { table_dir: "test/", region_id: 47244640258(11, 2), table_id: 11, region_number: 2, region_group: 0, region_sequence: 2, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test/11_0000000002/<file_id>.parquet", file_size: 2837, index_file_path: Some("test/11_0000000002/index/<file_id>.puffin"), index_file_size: Some(250), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 47244640258(11, 2), node_id: None, visible: true }
+ManifestSstEntry { table_dir: "test/", region_id: 94489280554(22, 42), table_id: 22, region_number: 42, region_group: 0, region_sequence: 42, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test/22_0000000042/<file_id>.parquet", file_size: 2837, index_file_path: Some("test/22_0000000042/index/<file_id>.puffin"), index_file_size: Some(250), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9000::Millisecond, sequence: Some(10), origin_region_id: 94489280554(22, 42), node_id: None, visible: true }"#, 
 r#"
 StorageSstEntry { file_path: "test/11_0000000001/<file_id>.parquet", file_size: None, last_modified_ms: None, node_id: None }
 StorageSstEntry { file_path: "test/11_0000000001/index/<file_id>.puffin", file_size: None, last_modified_ms: None, node_id: None }
--- a/src/mito2/src/error.rs
+++ b/src/mito2/src/error.rs
@@ -1042,20 +1042,8 @@ pub enum Error {
    #[snafu(display("Manual compaction is override by following operations."))]
    ManualCompactionOverride {},

-    #[snafu(display(
-        "Compaction memory limit exceeded for region {region_id}: required {required_bytes} bytes, limit {limit_bytes} bytes (policy: {policy})",
-    ))]
+    #[snafu(display("Compaction memory exhausted for region {region_id} (policy: {policy})",))]
    CompactionMemoryExhausted {
-        region_id: RegionId,
-        required_bytes: u64,
-        limit_bytes: u64,
-        policy: String,
-        #[snafu(implicit)]
-        location: Location,
-    },
-
-    #[snafu(display("Failed to acquire memory for region {region_id} (policy: {policy})"))]
-    MemoryAcquireFailed {
        region_id: RegionId,
        policy: String,
        #[snafu(source)]
@@ -1359,9 +1347,7 @@ impl ErrorExt for Error {

            ManualCompactionOverride {} => StatusCode::Cancelled,

-            CompactionMemoryExhausted { .. } => StatusCode::RuntimeResourcesExhausted,
-
-            MemoryAcquireFailed { source, .. } => source.status_code(),
+            CompactionMemoryExhausted { source, .. } => source.status_code(),

            IncompatibleWalProviderChange { .. } => StatusCode::InvalidArguments,

--- a/src/mito2/src/flush.rs
+++ b/src/mito2/src/flush.rs
@@ -801,7 +801,8 @@ fn memtable_flat_sources(
            if last_iter_rows > min_flush_rows {
                let maybe_dedup = merge_and_dedup(
                    &schema,
-                    options,
+                    options.append_mode,
+                    options.merge_mode(),
                    field_column_start,
                    std::mem::replace(&mut input_iters, Vec::with_capacity(num_ranges)),
                )?;
@@ -813,7 +814,13 @@ fn memtable_flat_sources(

        // Handle remaining iters.
        if !input_iters.is_empty() {
-            let maybe_dedup = merge_and_dedup(&schema, options, field_column_start, input_iters)?;
+            let maybe_dedup = merge_and_dedup(
+                &schema,
+                options.append_mode,
+                options.merge_mode(),
+                field_column_start,
+                input_iters,
+            )?;

            flat_sources.sources.push(FlatSource::Iter(maybe_dedup));
        }
@@ -822,19 +829,64 @@ fn memtable_flat_sources(
    Ok(flat_sources)
 }

-fn merge_and_dedup(
+/// Merges multiple record batch iterators and applies deduplication based on the specified mode.
+///
+/// This function is used during the flush process to combine data from multiple memtable ranges
+/// into a single stream while handling duplicate records according to the configured merge strategy.
+///
+/// # Arguments
+///
+/// * `schema` - The Arrow schema reference that defines the structure of the record batches
+/// * `append_mode` - When true, no deduplication is performed and all records are preserved.
+///                  This is used for append-only workloads where duplicate handling is not required.
+/// * `merge_mode` - The strategy used for deduplication when not in append mode:
+///   - `MergeMode::LastRow`: Keeps the last record for each primary key
+///   - `MergeMode::LastNonNull`: Keeps the last non-null values for each field
+/// * `field_column_start` - The starting column index for fields in the record batch.
+///                          Used when `MergeMode::LastNonNull` to identify which columns
+///                          contain field values versus primary key columns.
+/// * `input_iters` - A vector of record batch iterators to be merged and deduplicated
+///
+/// # Returns
+///
+/// Returns a boxed record batch iterator that yields the merged and potentially deduplicated
+/// record batches.
+///
+/// # Behavior
+///
+/// 1. Creates a `FlatMergeIterator` to merge all input iterators in sorted order based on
+///    primary key and timestamp
+/// 2. If `append_mode` is true, returns the merge iterator directly without deduplication
+/// 3. If `append_mode` is false, wraps the merge iterator with a `FlatDedupIterator` that
+///    applies the specified merge mode:
+///    - `LastRow`: Removes duplicate rows, keeping only the last one
+///    - `LastNonNull`: Removes duplicates but preserves the last non-null value for each field
+///
+/// # Examples
+///
+/// ```ignore
+/// let merged_iter = merge_and_dedup(
+///     &schema,
+///     false,  // not append mode, apply dedup
+///     MergeMode::LastRow,
+///     2,  // fields start at column 2 after primary key columns
+///     vec![iter1, iter2, iter3],
+/// )?;
+/// ```
+pub fn merge_and_dedup(
    schema: &SchemaRef,
-    options: &RegionOptions,
+    append_mode: bool,
+    merge_mode: MergeMode,
    field_column_start: usize,
    input_iters: Vec<BoxedRecordBatchIterator>,
 ) -> Result<BoxedRecordBatchIterator> {
    let merge_iter = FlatMergeIterator::new(schema.clone(), input_iters, DEFAULT_READ_BATCH_SIZE)?;
-    let maybe_dedup = if options.append_mode {
+    let maybe_dedup = if append_mode {
        // No dedup in append mode
        Box::new(merge_iter) as _
    } else {
        // Dedup according to merge mode.
-        match options.merge_mode() {
+        match merge_mode {
            MergeMode::LastRow => {
                Box::new(FlatDedupIterator::new(merge_iter, FlatLastRow::new(false))) as _
            }
--- a/src/mito2/src/gc.rs
+++ b/src/mito2/src/gc.rs
@@ -540,7 +540,7 @@ impl LocalGcWorker {
    fn filter_deletable_files(
        &self,
        entries: Vec<Entry>,
-        in_use_filenames: &HashSet<&FileId>,
+        in_use_filenames: &HashSet<FileId>,
        may_linger_filenames: &HashSet<&FileId>,
        eligible_for_removal: &HashSet<&FileId>,
        unknown_file_may_linger_until: chrono::DateTime<chrono::Utc>,
@@ -641,9 +641,6 @@ impl LocalGcWorker {
            .flatten()
            .collect::<HashSet<_>>();

-        // in use filenames, include sst and index files
-        let in_use_filenames = in_used.iter().collect::<HashSet<_>>();
-
        // When full_file_listing is false, skip expensive list operations and only delete
        // files that are tracked in recently_removed_files
        if !self.full_file_listing {
@@ -653,7 +650,7 @@ impl LocalGcWorker {
            // 3. Have passed the lingering time
            let files_to_delete: Vec<FileId> = eligible_for_removal
                .iter()
-                .filter(|file_id| !in_use_filenames.contains(*file_id))
+                .filter(|file_id| !in_used.contains(*file_id))
                .map(|&f| *f)
                .collect();

@@ -672,7 +669,7 @@ impl LocalGcWorker {
        let (all_unused_files_ready_for_delete, all_in_exist_linger_files) = self
            .filter_deletable_files(
                all_entries,
-                &in_use_filenames,
+                in_used,
                &may_linger_filenames,
                &eligible_for_removal,
                unknown_file_may_linger_until,
--- a/src/mito2/src/sst/parquet.rs
+++ b/src/mito2/src/sst/parquet.rs
@@ -95,21 +95,32 @@ mod tests {
    use std::collections::HashSet;
    use std::sync::Arc;

-    use api::v1::OpType;
+    use api::v1::{OpType, SemanticType};
+    use common_function::function::FunctionRef;
+    use common_function::function_factory::ScalarFunctionFactory;
+    use common_function::scalars::matches::MatchesFunction;
+    use common_function::scalars::matches_term::MatchesTermFunction;
    use common_time::Timestamp;
    use datafusion_common::{Column, ScalarValue};
+    use datafusion_expr::expr::ScalarFunction;
    use datafusion_expr::{BinaryExpr, Expr, Literal, Operator, col, lit};
    use datatypes::arrow;
    use datatypes::arrow::array::{
-        ArrayRef, BinaryDictionaryBuilder, RecordBatch, StringDictionaryBuilder,
+        ArrayRef, BinaryDictionaryBuilder, RecordBatch, StringArray, StringDictionaryBuilder,
        TimestampMillisecondArray, UInt8Array, UInt64Array,
    };
    use datatypes::arrow::datatypes::{DataType, Field, Schema, UInt32Type};
+    use datatypes::prelude::ConcreteDataType;
+    use datatypes::schema::{FulltextAnalyzer, FulltextBackend, FulltextOptions};
+    use object_store::ObjectStore;
    use parquet::arrow::AsyncArrowWriter;
    use parquet::basic::{Compression, Encoding, ZstdLevel};
    use parquet::file::metadata::KeyValue;
    use parquet::file::properties::WriterProperties;
+    use store_api::codec::PrimaryKeyEncoding;
+    use store_api::metadata::{ColumnMetadata, RegionMetadata, RegionMetadataBuilder};
    use store_api::region_request::PathType;
+    use store_api::storage::{ColumnSchema, RegionId};
    use table::predicate::Predicate;
    use tokio_util::compat::FuturesAsyncWriteCompatExt;

@@ -122,6 +133,7 @@ mod tests {
    use crate::sst::file::{FileHandle, FileMeta, RegionFileId, RegionIndexId};
    use crate::sst::file_purger::NoopFilePurger;
    use crate::sst::index::bloom_filter::applier::BloomFilterIndexApplierBuilder;
+    use crate::sst::index::fulltext_index::applier::builder::FulltextIndexApplierBuilder;
    use crate::sst::index::inverted_index::applier::builder::InvertedIndexApplierBuilder;
    use crate::sst::index::{IndexBuildType, Indexer, IndexerBuilder, IndexerBuilderImpl};
    use crate::sst::parquet::format::PrimaryKeyWriteFormat;
@@ -133,11 +145,13 @@ mod tests {
    use crate::test_util::sst_util::{
        assert_parquet_metadata_eq, build_test_binary_test_region_metadata, new_batch_by_range,
        new_batch_with_binary, new_batch_with_custom_sequence, new_primary_key, new_source,
-        sst_file_handle, sst_file_handle_with_file_id, sst_region_metadata,
+        new_sparse_primary_key, sst_file_handle, sst_file_handle_with_file_id, sst_region_metadata,
+        sst_region_metadata_with_encoding,
    };
    use crate::test_util::{TestEnv, check_reader_result};

    const FILE_DIR: &str = "/";
+    const REGION_ID: RegionId = RegionId::new(0, 0);

    #[derive(Clone)]
    struct FixedPathProvider {
@@ -1064,6 +1078,154 @@ mod tests {
        FlatSource::Iter(Box::new(batches.into_iter().map(Ok)))
    }

+    /// Creates a flat format RecordBatch for testing with sparse primary key encoding.
+    /// Similar to `new_record_batch_by_range` but without individual primary key columns.
+    fn new_record_batch_by_range_sparse(
+        tags: &[&str],
+        start: usize,
+        end: usize,
+        metadata: &Arc<RegionMetadata>,
+    ) -> RecordBatch {
+        assert!(end >= start);
+        let flat_schema = to_flat_sst_arrow_schema(
+            metadata,
+            &FlatSchemaOptions::from_encoding(PrimaryKeyEncoding::Sparse),
+        );
+
+        let num_rows = end - start;
+        let mut columns: Vec<ArrayRef> = Vec::new();
+
+        // NOTE: Individual primary key columns (tag_0, tag_1) are NOT included in sparse format
+
+        // Add field column (field_0)
+        let field_values: Vec<u64> = (start..end).map(|v| v as u64).collect();
+        columns.push(Arc::new(UInt64Array::from(field_values)) as ArrayRef);
+
+        // Add time index column (ts)
+        let timestamps: Vec<i64> = (start..end).map(|v| v as i64).collect();
+        columns.push(Arc::new(TimestampMillisecondArray::from(timestamps)) as ArrayRef);
+
+        // Add encoded primary key column using sparse encoding
+        let table_id = 1u32; // Test table ID
+        let tsid = 100u64; // Base TSID
+        let pk = new_sparse_primary_key(tags, metadata, table_id, tsid);
+
+        let mut pk_builder = BinaryDictionaryBuilder::<UInt32Type>::new();
+        for _ in 0..num_rows {
+            pk_builder.append(&pk).unwrap();
+        }
+        columns.push(Arc::new(pk_builder.finish()) as ArrayRef);
+
+        // Add sequence column
+        columns.push(Arc::new(UInt64Array::from_value(1000, num_rows)) as ArrayRef);
+
+        // Add op_type column
+        columns.push(Arc::new(UInt8Array::from_value(OpType::Put as u8, num_rows)) as ArrayRef);
+
+        RecordBatch::try_new(flat_schema, columns).unwrap()
+    }
+
+    /// Helper function to create IndexerBuilderImpl for tests.
+    fn create_test_indexer_builder(
+        env: &TestEnv,
+        object_store: ObjectStore,
+        file_path: RegionFilePathFactory,
+        metadata: Arc<RegionMetadata>,
+        row_group_size: usize,
+    ) -> IndexerBuilderImpl {
+        let puffin_manager = env.get_puffin_manager().build(object_store, file_path);
+        let intermediate_manager = env.get_intermediate_manager();
+
+        IndexerBuilderImpl {
+            build_type: IndexBuildType::Flush,
+            metadata,
+            row_group_size,
+            puffin_manager,
+            write_cache_enabled: false,
+            intermediate_manager,
+            index_options: IndexOptions {
+                inverted_index: InvertedIndexOptions {
+                    segment_row_count: 1,
+                    ..Default::default()
+                },
+            },
+            inverted_index_config: Default::default(),
+            fulltext_index_config: Default::default(),
+            bloom_filter_index_config: Default::default(),
+        }
+    }
+
+    /// Helper function to write flat SST and return SstInfo.
+    async fn write_flat_sst(
+        object_store: ObjectStore,
+        metadata: Arc<RegionMetadata>,
+        indexer_builder: IndexerBuilderImpl,
+        file_path: RegionFilePathFactory,
+        flat_source: FlatSource,
+        write_opts: &WriteOptions,
+    ) -> SstInfo {
+        let mut metrics = Metrics::new(WriteType::Flush);
+        let mut writer = ParquetWriter::new_with_object_store(
+            object_store,
+            metadata,
+            IndexConfig::default(),
+            indexer_builder,
+            file_path,
+            &mut metrics,
+        )
+        .await;
+
+        writer
+            .write_all_flat(flat_source, write_opts)
+            .await
+            .unwrap()
+            .remove(0)
+    }
+
+    /// Helper function to create FileHandle from SstInfo.
+    fn create_file_handle_from_sst_info(
+        info: &SstInfo,
+        metadata: &Arc<RegionMetadata>,
+    ) -> FileHandle {
+        FileHandle::new(
+            FileMeta {
+                region_id: metadata.region_id,
+                file_id: info.file_id,
+                time_range: info.time_range,
+                level: 0,
+                file_size: info.file_size,
+                max_row_group_uncompressed_size: info.max_row_group_uncompressed_size,
+                available_indexes: info.index_metadata.build_available_indexes(),
+                indexes: info.index_metadata.build_indexes(),
+                index_file_size: info.index_metadata.file_size,
+                index_version: 0,
+                num_row_groups: info.num_row_groups,
+                num_rows: info.num_rows as u64,
+                sequence: None,
+                partition_expr: match &metadata.partition_expr {
+                    Some(json_str) => partition::expr::PartitionExpr::from_json_str(json_str)
+                        .expect("partition expression should be valid JSON"),
+                    None => None,
+                },
+                num_series: 0,
+            },
+            Arc::new(NoopFilePurger),
+        )
+    }
+
+    /// Helper function to create test cache with standard settings.
+    fn create_test_cache() -> Arc<CacheManager> {
+        Arc::new(
+            CacheManager::builder()
+                .index_result_cache_size(1024 * 1024)
+                .index_metadata_size(1024 * 1024)
+                .index_content_page_size(1024 * 1024)
+                .index_content_size(1024 * 1024)
+                .puffin_metadata_size(1024 * 1024)
+                .build(),
+        )
+    }
+
    #[tokio::test]
    async fn test_write_flat_with_index() {
        let mut env = TestEnv::new().await;
@@ -1238,4 +1400,709 @@ mod tests {
            assert_eq!(*override_batch, expected_batch);
        }
    }
+
+    #[tokio::test]
+    async fn test_write_flat_read_with_inverted_index() {
+        let mut env = TestEnv::new().await;
+        let object_store = env.init_object_store_manager();
+        let file_path = RegionFilePathFactory::new(FILE_DIR.to_string(), PathType::Bare);
+        let metadata = Arc::new(sst_region_metadata());
+        let row_group_size = 100;
+
+        // Create flat format RecordBatches with non-overlapping timestamp ranges
+        // Each batch becomes one row group (row_group_size = 100)
+        // Data: ts tag_0 tag_1
+        // RG 0:   0-50  [a, d]
+        // RG 0:  50-100 [b, d]
+        // RG 1: 100-150 [c, d]
+        // RG 1: 150-200 [c, f]
+        let flat_batches = vec![
+            new_record_batch_by_range(&["a", "d"], 0, 50),
+            new_record_batch_by_range(&["b", "d"], 50, 100),
+            new_record_batch_by_range(&["c", "d"], 100, 150),
+            new_record_batch_by_range(&["c", "f"], 150, 200),
+        ];
+
+        let flat_source = new_flat_source_from_record_batches(flat_batches);
+
+        let write_opts = WriteOptions {
+            row_group_size,
+            ..Default::default()
+        };
+
+        let indexer_builder = create_test_indexer_builder(
+            &env,
+            object_store.clone(),
+            file_path.clone(),
+            metadata.clone(),
+            row_group_size,
+        );
+
+        let info = write_flat_sst(
+            object_store.clone(),
+            metadata.clone(),
+            indexer_builder,
+            file_path.clone(),
+            flat_source,
+            &write_opts,
+        )
+        .await;
+        assert_eq!(200, info.num_rows);
+        assert!(info.file_size > 0);
+        assert!(info.index_metadata.file_size > 0);
+
+        let handle = create_file_handle_from_sst_info(&info, &metadata);
+
+        let cache = create_test_cache();
+
+        // Test 1: Filter by tag_0 = "b"
+        // Expected: Only rows with tag_0="b"
+        let preds = vec![col("tag_0").eq(lit("b"))];
+        let inverted_index_applier = InvertedIndexApplierBuilder::new(
+            FILE_DIR.to_string(),
+            PathType::Bare,
+            object_store.clone(),
+            &metadata,
+            HashSet::from_iter([0]),
+            env.get_puffin_manager(),
+        )
+        .with_puffin_metadata_cache(cache.puffin_metadata_cache().cloned())
+        .with_inverted_index_cache(cache.inverted_index_cache().cloned())
+        .build(&preds)
+        .unwrap()
+        .map(Arc::new);
+
+        let builder = ParquetReaderBuilder::new(
+            FILE_DIR.to_string(),
+            PathType::Bare,
+            handle.clone(),
+            object_store.clone(),
+        )
+        .flat_format(true)
+        .predicate(Some(Predicate::new(preds)))
+        .inverted_index_appliers([inverted_index_applier.clone(), None])
+        .cache(CacheStrategy::EnableAll(cache.clone()));
+
+        let mut metrics = ReaderMetrics::default();
+        let (_context, selection) = builder.build_reader_input(&mut metrics).await.unwrap();
+
+        // Verify selection contains only RG 0 (tag_0="b", ts 0-100)
+        assert_eq!(selection.row_group_count(), 1);
+        assert_eq!(50, selection.get(0).unwrap().row_count());
+
+        // Verify filtering metrics
+        assert_eq!(metrics.filter_metrics.rg_total, 2);
+        assert_eq!(metrics.filter_metrics.rg_minmax_filtered, 1);
+        assert_eq!(metrics.filter_metrics.rg_inverted_filtered, 0);
+        assert_eq!(metrics.filter_metrics.rows_inverted_filtered, 50);
+    }
+
+    #[tokio::test]
+    async fn test_write_flat_read_with_bloom_filter() {
+        let mut env = TestEnv::new().await;
+        let object_store = env.init_object_store_manager();
+        let file_path = RegionFilePathFactory::new(FILE_DIR.to_string(), PathType::Bare);
+        let metadata = Arc::new(sst_region_metadata());
+        let row_group_size = 100;
+
+        // Create flat format RecordBatches with non-overlapping timestamp ranges
+        // Each batch becomes one row group (row_group_size = 100)
+        // Data: ts tag_0 tag_1
+        // RG 0:   0-50  [a, d]
+        // RG 0:  50-100 [b, e]
+        // RG 1: 100-150 [c, d]
+        // RG 1: 150-200 [c, f]
+        let flat_batches = vec![
+            new_record_batch_by_range(&["a", "d"], 0, 50),
+            new_record_batch_by_range(&["b", "e"], 50, 100),
+            new_record_batch_by_range(&["c", "d"], 100, 150),
+            new_record_batch_by_range(&["c", "f"], 150, 200),
+        ];
+
+        let flat_source = new_flat_source_from_record_batches(flat_batches);
+
+        let write_opts = WriteOptions {
+            row_group_size,
+            ..Default::default()
+        };
+
+        let indexer_builder = create_test_indexer_builder(
+            &env,
+            object_store.clone(),
+            file_path.clone(),
+            metadata.clone(),
+            row_group_size,
+        );
+
+        let info = write_flat_sst(
+            object_store.clone(),
+            metadata.clone(),
+            indexer_builder,
+            file_path.clone(),
+            flat_source,
+            &write_opts,
+        )
+        .await;
+        assert_eq!(200, info.num_rows);
+        assert!(info.file_size > 0);
+        assert!(info.index_metadata.file_size > 0);
+
+        let handle = create_file_handle_from_sst_info(&info, &metadata);
+
+        let cache = create_test_cache();
+
+        // Filter by ts >= 50 AND ts < 200 AND tag_1 = "d"
+        // Expected: RG 0 (ts 0-100) and RG 1 (ts 100-200), both have tag_1="d"
+        let preds = vec![
+            col("ts").gt_eq(lit(ScalarValue::TimestampMillisecond(Some(50), None))),
+            col("ts").lt(lit(ScalarValue::TimestampMillisecond(Some(200), None))),
+            col("tag_1").eq(lit("d")),
+        ];
+        let bloom_filter_applier = BloomFilterIndexApplierBuilder::new(
+            FILE_DIR.to_string(),
+            PathType::Bare,
+            object_store.clone(),
+            &metadata,
+            env.get_puffin_manager(),
+        )
+        .with_puffin_metadata_cache(cache.puffin_metadata_cache().cloned())
+        .with_bloom_filter_index_cache(cache.bloom_filter_index_cache().cloned())
+        .build(&preds)
+        .unwrap()
+        .map(Arc::new);
+
+        let builder = ParquetReaderBuilder::new(
+            FILE_DIR.to_string(),
+            PathType::Bare,
+            handle.clone(),
+            object_store.clone(),
+        )
+        .flat_format(true)
+        .predicate(Some(Predicate::new(preds)))
+        .bloom_filter_index_appliers([None, bloom_filter_applier.clone()])
+        .cache(CacheStrategy::EnableAll(cache.clone()));
+
+        let mut metrics = ReaderMetrics::default();
+        let (_context, selection) = builder.build_reader_input(&mut metrics).await.unwrap();
+
+        // Verify selection contains RG 0 and RG 1
+        assert_eq!(selection.row_group_count(), 2);
+        assert_eq!(50, selection.get(0).unwrap().row_count());
+        assert_eq!(50, selection.get(1).unwrap().row_count());
+
+        // Verify filtering metrics
+        assert_eq!(metrics.filter_metrics.rg_total, 2);
+        assert_eq!(metrics.filter_metrics.rg_minmax_filtered, 0);
+        assert_eq!(metrics.filter_metrics.rg_bloom_filtered, 0);
+        assert_eq!(metrics.filter_metrics.rows_bloom_filtered, 100);
+    }
+
+    #[tokio::test]
+    async fn test_write_flat_read_with_inverted_index_sparse() {
+        common_telemetry::init_default_ut_logging();
+
+        let mut env = TestEnv::new().await;
+        let object_store = env.init_object_store_manager();
+        let file_path = RegionFilePathFactory::new(FILE_DIR.to_string(), PathType::Bare);
+        let metadata = Arc::new(sst_region_metadata_with_encoding(
+            PrimaryKeyEncoding::Sparse,
+        ));
+        let row_group_size = 100;
+
+        // Create flat format RecordBatches with non-overlapping timestamp ranges
+        // Each batch becomes one row group (row_group_size = 100)
+        // Data: ts tag_0 tag_1
+        // RG 0:   0-50  [a, d]
+        // RG 0:  50-100 [b, d]
+        // RG 1: 100-150 [c, d]
+        // RG 1: 150-200 [c, f]
+        let flat_batches = vec![
+            new_record_batch_by_range_sparse(&["a", "d"], 0, 50, &metadata),
+            new_record_batch_by_range_sparse(&["b", "d"], 50, 100, &metadata),
+            new_record_batch_by_range_sparse(&["c", "d"], 100, 150, &metadata),
+            new_record_batch_by_range_sparse(&["c", "f"], 150, 200, &metadata),
+        ];
+
+        let flat_source = new_flat_source_from_record_batches(flat_batches);
+
+        let write_opts = WriteOptions {
+            row_group_size,
+            ..Default::default()
+        };
+
+        let indexer_builder = create_test_indexer_builder(
+            &env,
+            object_store.clone(),
+            file_path.clone(),
+            metadata.clone(),
+            row_group_size,
+        );
+
+        let info = write_flat_sst(
+            object_store.clone(),
+            metadata.clone(),
+            indexer_builder,
+            file_path.clone(),
+            flat_source,
+            &write_opts,
+        )
+        .await;
+        assert_eq!(200, info.num_rows);
+        assert!(info.file_size > 0);
+        assert!(info.index_metadata.file_size > 0);
+
+        let handle = create_file_handle_from_sst_info(&info, &metadata);
+
+        let cache = create_test_cache();
+
+        // Test 1: Filter by tag_0 = "b"
+        // Expected: Only rows with tag_0="b"
+        let preds = vec![col("tag_0").eq(lit("b"))];
+        let inverted_index_applier = InvertedIndexApplierBuilder::new(
+            FILE_DIR.to_string(),
+            PathType::Bare,
+            object_store.clone(),
+            &metadata,
+            HashSet::from_iter([0]),
+            env.get_puffin_manager(),
+        )
+        .with_puffin_metadata_cache(cache.puffin_metadata_cache().cloned())
+        .with_inverted_index_cache(cache.inverted_index_cache().cloned())
+        .build(&preds)
+        .unwrap()
+        .map(Arc::new);
+
+        let builder = ParquetReaderBuilder::new(
+            FILE_DIR.to_string(),
+            PathType::Bare,
+            handle.clone(),
+            object_store.clone(),
+        )
+        .flat_format(true)
+        .predicate(Some(Predicate::new(preds)))
+        .inverted_index_appliers([inverted_index_applier.clone(), None])
+        .cache(CacheStrategy::EnableAll(cache.clone()));
+
+        let mut metrics = ReaderMetrics::default();
+        let (_context, selection) = builder.build_reader_input(&mut metrics).await.unwrap();
+
+        // RG 0 has 50 matching rows (tag_0="b")
+        assert_eq!(selection.row_group_count(), 1);
+        assert_eq!(50, selection.get(0).unwrap().row_count());
+
+        // Verify filtering metrics
+        // Note: With sparse encoding, tag columns aren't stored separately,
+        // so minmax filtering on tags doesn't work (only inverted index)
+        assert_eq!(metrics.filter_metrics.rg_total, 2);
+        assert_eq!(metrics.filter_metrics.rg_minmax_filtered, 0); // No minmax stats for tags in sparse format
+        assert_eq!(metrics.filter_metrics.rg_inverted_filtered, 1);
+        assert_eq!(metrics.filter_metrics.rows_inverted_filtered, 150);
+    }
+
+    #[tokio::test]
+    async fn test_write_flat_read_with_bloom_filter_sparse() {
+        let mut env = TestEnv::new().await;
+        let object_store = env.init_object_store_manager();
+        let file_path = RegionFilePathFactory::new(FILE_DIR.to_string(), PathType::Bare);
+        let metadata = Arc::new(sst_region_metadata_with_encoding(
+            PrimaryKeyEncoding::Sparse,
+        ));
+        let row_group_size = 100;
+
+        // Create flat format RecordBatches with non-overlapping timestamp ranges
+        // Each batch becomes one row group (row_group_size = 100)
+        // Data: ts tag_0 tag_1
+        // RG 0:   0-50  [a, d]
+        // RG 0:  50-100 [b, e]
+        // RG 1: 100-150 [c, d]
+        // RG 1: 150-200 [c, f]
+        let flat_batches = vec![
+            new_record_batch_by_range_sparse(&["a", "d"], 0, 50, &metadata),
+            new_record_batch_by_range_sparse(&["b", "e"], 50, 100, &metadata),
+            new_record_batch_by_range_sparse(&["c", "d"], 100, 150, &metadata),
+            new_record_batch_by_range_sparse(&["c", "f"], 150, 200, &metadata),
+        ];
+
+        let flat_source = new_flat_source_from_record_batches(flat_batches);
+
+        let write_opts = WriteOptions {
+            row_group_size,
+            ..Default::default()
+        };
+
+        let indexer_builder = create_test_indexer_builder(
+            &env,
+            object_store.clone(),
+            file_path.clone(),
+            metadata.clone(),
+            row_group_size,
+        );
+
+        let info = write_flat_sst(
+            object_store.clone(),
+            metadata.clone(),
+            indexer_builder,
+            file_path.clone(),
+            flat_source,
+            &write_opts,
+        )
+        .await;
+        assert_eq!(200, info.num_rows);
+        assert!(info.file_size > 0);
+        assert!(info.index_metadata.file_size > 0);
+
+        let handle = create_file_handle_from_sst_info(&info, &metadata);
+
+        let cache = create_test_cache();
+
+        // Filter by ts >= 50 AND ts < 200 AND tag_1 = "d"
+        // Expected: RG 0 (ts 0-100) and RG 1 (ts 100-200), both have tag_1="d"
+        let preds = vec![
+            col("ts").gt_eq(lit(ScalarValue::TimestampMillisecond(Some(50), None))),
+            col("ts").lt(lit(ScalarValue::TimestampMillisecond(Some(200), None))),
+            col("tag_1").eq(lit("d")),
+        ];
+        let bloom_filter_applier = BloomFilterIndexApplierBuilder::new(
+            FILE_DIR.to_string(),
+            PathType::Bare,
+            object_store.clone(),
+            &metadata,
+            env.get_puffin_manager(),
+        )
+        .with_puffin_metadata_cache(cache.puffin_metadata_cache().cloned())
+        .with_bloom_filter_index_cache(cache.bloom_filter_index_cache().cloned())
+        .build(&preds)
+        .unwrap()
+        .map(Arc::new);
+
+        let builder = ParquetReaderBuilder::new(
+            FILE_DIR.to_string(),
+            PathType::Bare,
+            handle.clone(),
+            object_store.clone(),
+        )
+        .flat_format(true)
+        .predicate(Some(Predicate::new(preds)))
+        .bloom_filter_index_appliers([None, bloom_filter_applier.clone()])
+        .cache(CacheStrategy::EnableAll(cache.clone()));
+
+        let mut metrics = ReaderMetrics::default();
+        let (_context, selection) = builder.build_reader_input(&mut metrics).await.unwrap();
+
+        // Verify selection contains RG 0 and RG 1
+        assert_eq!(selection.row_group_count(), 2);
+        assert_eq!(50, selection.get(0).unwrap().row_count());
+        assert_eq!(50, selection.get(1).unwrap().row_count());
+
+        // Verify filtering metrics
+        assert_eq!(metrics.filter_metrics.rg_total, 2);
+        assert_eq!(metrics.filter_metrics.rg_minmax_filtered, 0);
+        assert_eq!(metrics.filter_metrics.rg_bloom_filtered, 0);
+        assert_eq!(metrics.filter_metrics.rows_bloom_filtered, 100);
+    }
+
+    /// Creates region metadata for testing fulltext indexes.
+    /// Schema: tag_0, text_bloom, text_tantivy, field_0, ts
+    fn fulltext_region_metadata() -> RegionMetadata {
+        let mut builder = RegionMetadataBuilder::new(REGION_ID);
+        builder
+            .push_column_metadata(ColumnMetadata {
+                column_schema: ColumnSchema::new(
+                    "tag_0".to_string(),
+                    ConcreteDataType::string_datatype(),
+                    true,
+                ),
+                semantic_type: SemanticType::Tag,
+                column_id: 0,
+            })
+            .push_column_metadata(ColumnMetadata {
+                column_schema: ColumnSchema::new(
+                    "text_bloom".to_string(),
+                    ConcreteDataType::string_datatype(),
+                    true,
+                )
+                .with_fulltext_options(FulltextOptions {
+                    enable: true,
+                    analyzer: FulltextAnalyzer::English,
+                    case_sensitive: false,
+                    backend: FulltextBackend::Bloom,
+                    granularity: 1,
+                    false_positive_rate_in_10000: 50,
+                })
+                .unwrap(),
+                semantic_type: SemanticType::Field,
+                column_id: 1,
+            })
+            .push_column_metadata(ColumnMetadata {
+                column_schema: ColumnSchema::new(
+                    "text_tantivy".to_string(),
+                    ConcreteDataType::string_datatype(),
+                    true,
+                )
+                .with_fulltext_options(FulltextOptions {
+                    enable: true,
+                    analyzer: FulltextAnalyzer::English,
+                    case_sensitive: false,
+                    backend: FulltextBackend::Tantivy,
+                    granularity: 1,
+                    false_positive_rate_in_10000: 50,
+                })
+                .unwrap(),
+                semantic_type: SemanticType::Field,
+                column_id: 2,
+            })
+            .push_column_metadata(ColumnMetadata {
+                column_schema: ColumnSchema::new(
+                    "field_0".to_string(),
+                    ConcreteDataType::uint64_datatype(),
+                    true,
+                ),
+                semantic_type: SemanticType::Field,
+                column_id: 3,
+            })
+            .push_column_metadata(ColumnMetadata {
+                column_schema: ColumnSchema::new(
+                    "ts".to_string(),
+                    ConcreteDataType::timestamp_millisecond_datatype(),
+                    false,
+                ),
+                semantic_type: SemanticType::Timestamp,
+                column_id: 4,
+            })
+            .primary_key(vec![0]);
+        builder.build().unwrap()
+    }
+
+    /// Creates a flat format RecordBatch with string fields for fulltext testing.
+    fn new_fulltext_record_batch_by_range(
+        tag: &str,
+        text_bloom: &str,
+        text_tantivy: &str,
+        start: usize,
+        end: usize,
+    ) -> RecordBatch {
+        assert!(end >= start);
+        let metadata = Arc::new(fulltext_region_metadata());
+        let flat_schema = to_flat_sst_arrow_schema(&metadata, &FlatSchemaOptions::default());
+
+        let num_rows = end - start;
+        let mut columns = Vec::new();
+
+        // Add primary key column (tag_0) as dictionary array
+        let mut tag_builder = StringDictionaryBuilder::<UInt32Type>::new();
+        for _ in 0..num_rows {
+            tag_builder.append_value(tag);
+        }
+        columns.push(Arc::new(tag_builder.finish()) as ArrayRef);
+
+        // Add text_bloom field (fulltext with bloom backend)
+        let text_bloom_values: Vec<_> = (0..num_rows).map(|_| text_bloom).collect();
+        columns.push(Arc::new(StringArray::from(text_bloom_values)));
+
+        // Add text_tantivy field (fulltext with tantivy backend)
+        let text_tantivy_values: Vec<_> = (0..num_rows).map(|_| text_tantivy).collect();
+        columns.push(Arc::new(StringArray::from(text_tantivy_values)));
+
+        // Add field column (field_0)
+        let field_values: Vec<u64> = (start..end).map(|v| v as u64).collect();
+        columns.push(Arc::new(UInt64Array::from(field_values)));
+
+        // Add time index column (ts)
+        let timestamps: Vec<i64> = (start..end).map(|v| v as i64).collect();
+        columns.push(Arc::new(TimestampMillisecondArray::from(timestamps)));
+
+        // Add encoded primary key column
+        let pk = new_primary_key(&[tag]);
+        let mut pk_builder = BinaryDictionaryBuilder::<UInt32Type>::new();
+        for _ in 0..num_rows {
+            pk_builder.append(&pk).unwrap();
+        }
+        columns.push(Arc::new(pk_builder.finish()));
+
+        // Add sequence column
+        columns.push(Arc::new(UInt64Array::from_value(1000, num_rows)));
+
+        // Add op_type column
+        columns.push(Arc::new(UInt8Array::from_value(
+            OpType::Put as u8,
+            num_rows,
+        )));
+
+        RecordBatch::try_new(flat_schema, columns).unwrap()
+    }
+
+    #[tokio::test]
+    async fn test_write_flat_read_with_fulltext_index() {
+        let mut env = TestEnv::new().await;
+        let object_store = env.init_object_store_manager();
+        let file_path = RegionFilePathFactory::new(FILE_DIR.to_string(), PathType::Bare);
+        let metadata = Arc::new(fulltext_region_metadata());
+        let row_group_size = 50;
+
+        // Create flat format RecordBatches with different text content
+        // RG 0:   0-50  tag="a", bloom="hello world", tantivy="quick brown fox"
+        // RG 1:  50-100 tag="b", bloom="hello world", tantivy="quick brown fox"
+        // RG 2: 100-150 tag="c", bloom="goodbye world", tantivy="lazy dog"
+        // RG 3: 150-200 tag="d", bloom="goodbye world", tantivy="lazy dog"
+        let flat_batches = vec![
+            new_fulltext_record_batch_by_range("a", "hello world", "quick brown fox", 0, 50),
+            new_fulltext_record_batch_by_range("b", "hello world", "quick brown fox", 50, 100),
+            new_fulltext_record_batch_by_range("c", "goodbye world", "lazy dog", 100, 150),
+            new_fulltext_record_batch_by_range("d", "goodbye world", "lazy dog", 150, 200),
+        ];
+
+        let flat_source = new_flat_source_from_record_batches(flat_batches);
+
+        let write_opts = WriteOptions {
+            row_group_size,
+            ..Default::default()
+        };
+
+        let indexer_builder = create_test_indexer_builder(
+            &env,
+            object_store.clone(),
+            file_path.clone(),
+            metadata.clone(),
+            row_group_size,
+        );
+
+        let mut info = write_flat_sst(
+            object_store.clone(),
+            metadata.clone(),
+            indexer_builder,
+            file_path.clone(),
+            flat_source,
+            &write_opts,
+        )
+        .await;
+        assert_eq!(200, info.num_rows);
+        assert!(info.file_size > 0);
+        assert!(info.index_metadata.file_size > 0);
+
+        // Verify fulltext indexes were created
+        assert!(info.index_metadata.fulltext_index.index_size > 0);
+        assert_eq!(info.index_metadata.fulltext_index.row_count, 200);
+        // text_bloom (column_id 1) and text_tantivy (column_id 2)
+        info.index_metadata.fulltext_index.columns.sort_unstable();
+        assert_eq!(info.index_metadata.fulltext_index.columns, vec![1, 2]);
+
+        assert_eq!(
+            (
+                Timestamp::new_millisecond(0),
+                Timestamp::new_millisecond(199)
+            ),
+            info.time_range
+        );
+
+        let handle = create_file_handle_from_sst_info(&info, &metadata);
+
+        let cache = create_test_cache();
+
+        // Helper functions to create fulltext function expressions
+        let matches_func = || {
+            Arc::new(
+                ScalarFunctionFactory::from(Arc::new(MatchesFunction::default()) as FunctionRef)
+                    .provide(Default::default()),
+            )
+        };
+
+        let matches_term_func = || {
+            Arc::new(
+                ScalarFunctionFactory::from(
+                    Arc::new(MatchesTermFunction::default()) as FunctionRef,
+                )
+                .provide(Default::default()),
+            )
+        };
+
+        // Test 1: Filter by text_bloom field using matches_term (bloom backend)
+        // Expected: RG 0 and RG 1 (rows 0-100) which have "hello" term
+        let preds = vec![Expr::ScalarFunction(ScalarFunction {
+            args: vec![col("text_bloom"), "hello".lit()],
+            func: matches_term_func(),
+        })];
+
+        let fulltext_applier = FulltextIndexApplierBuilder::new(
+            FILE_DIR.to_string(),
+            PathType::Bare,
+            object_store.clone(),
+            env.get_puffin_manager(),
+            &metadata,
+        )
+        .with_puffin_metadata_cache(cache.puffin_metadata_cache().cloned())
+        .with_bloom_filter_cache(cache.bloom_filter_index_cache().cloned())
+        .build(&preds)
+        .unwrap()
+        .map(Arc::new);
+
+        let builder = ParquetReaderBuilder::new(
+            FILE_DIR.to_string(),
+            PathType::Bare,
+            handle.clone(),
+            object_store.clone(),
+        )
+        .flat_format(true)
+        .predicate(Some(Predicate::new(preds)))
+        .fulltext_index_appliers([None, fulltext_applier.clone()])
+        .cache(CacheStrategy::EnableAll(cache.clone()));
+
+        let mut metrics = ReaderMetrics::default();
+        let (_context, selection) = builder.build_reader_input(&mut metrics).await.unwrap();
+
+        // Verify selection contains RG 0 and RG 1 (text_bloom="hello world")
+        assert_eq!(selection.row_group_count(), 2);
+        assert_eq!(50, selection.get(0).unwrap().row_count());
+        assert_eq!(50, selection.get(1).unwrap().row_count());
+
+        // Verify filtering metrics
+        assert_eq!(metrics.filter_metrics.rg_total, 4);
+        assert_eq!(metrics.filter_metrics.rg_minmax_filtered, 0);
+        assert_eq!(metrics.filter_metrics.rg_fulltext_filtered, 2);
+        assert_eq!(metrics.filter_metrics.rows_fulltext_filtered, 100);
+
+        // Test 2: Filter by text_tantivy field using matches (tantivy backend)
+        // Expected: RG 2 and RG 3 (rows 100-200) which have "lazy" in query
+        let preds = vec![Expr::ScalarFunction(ScalarFunction {
+            args: vec![col("text_tantivy"), "lazy".lit()],
+            func: matches_func(),
+        })];
+
+        let fulltext_applier = FulltextIndexApplierBuilder::new(
+            FILE_DIR.to_string(),
+            PathType::Bare,
+            object_store.clone(),
+            env.get_puffin_manager(),
+            &metadata,
+        )
+        .with_puffin_metadata_cache(cache.puffin_metadata_cache().cloned())
+        .with_bloom_filter_cache(cache.bloom_filter_index_cache().cloned())
+        .build(&preds)
+        .unwrap()
+        .map(Arc::new);
+
+        let builder = ParquetReaderBuilder::new(
+            FILE_DIR.to_string(),
+            PathType::Bare,
+            handle.clone(),
+            object_store.clone(),
+        )
+        .flat_format(true)
+        .predicate(Some(Predicate::new(preds)))
+        .fulltext_index_appliers([None, fulltext_applier.clone()])
+        .cache(CacheStrategy::EnableAll(cache.clone()));
+
+        let mut metrics = ReaderMetrics::default();
+        let (_context, selection) = builder.build_reader_input(&mut metrics).await.unwrap();
+
+        // Verify selection contains RG 2 and RG 3 (text_tantivy="lazy dog")
+        assert_eq!(selection.row_group_count(), 2);
+        assert_eq!(50, selection.get(2).unwrap().row_count());
+        assert_eq!(50, selection.get(3).unwrap().row_count());
+
+        // Verify filtering metrics
+        assert_eq!(metrics.filter_metrics.rg_total, 4);
+        assert_eq!(metrics.filter_metrics.rg_minmax_filtered, 0);
+        assert_eq!(metrics.filter_metrics.rg_fulltext_filtered, 2);
+        assert_eq!(metrics.filter_metrics.rows_fulltext_filtered, 100);
+    }
 }
--- a/src/mito2/src/test_util/scheduler_util.rs
+++ b/src/mito2/src/test_util/scheduler_util.rs
@@ -29,7 +29,7 @@ use tokio::sync::mpsc::Sender;
 use crate::access_layer::{AccessLayer, AccessLayerRef};
 use crate::cache::CacheManager;
 use crate::compaction::CompactionScheduler;
-use crate::compaction::memory_manager::{CompactionMemoryManager, new_compaction_memory_manager};
+use crate::compaction::memory_manager::new_compaction_memory_manager;
 use crate::config::MitoConfig;
 use crate::error::Result;
 use crate::flush::FlushScheduler;
--- a/src/mito2/src/test_util/sst_util.rs
+++ b/src/mito2/src/test_util/sst_util.rs
@@ -27,6 +27,10 @@ use parquet::file::metadata::ParquetMetaData;
 use store_api::metadata::{
    ColumnMetadata, RegionMetadata, RegionMetadataBuilder, RegionMetadataRef,
 };
+use store_api::metric_engine_consts::{
+    DATA_SCHEMA_TABLE_ID_COLUMN_NAME, DATA_SCHEMA_TSID_COLUMN_NAME,
+};
+use store_api::storage::consts::ReservedColumnId;
 use store_api::storage::{FileId, RegionId};

 use crate::read::{Batch, BatchBuilder, Source};
@@ -36,11 +40,44 @@ use crate::test_util::{VecBatchReader, new_batch_builder, new_noop_file_purger};
 /// Test region id.
 const REGION_ID: RegionId = RegionId::new(0, 0);

-/// Creates a new region metadata for testing SSTs.
+/// Creates a new region metadata for testing SSTs with specified encoding.
 ///
-/// Schema: tag_0, tag_1, field_0, ts
-pub fn sst_region_metadata() -> RegionMetadata {
+/// Dense schema: tag_0, tag_1, field_0, ts
+/// Sparse schema: __table_id, __tsid, tag_0, tag_1, field_0, ts
+pub fn sst_region_metadata_with_encoding(
+    encoding: store_api::codec::PrimaryKeyEncoding,
+) -> RegionMetadata {
    let mut builder = RegionMetadataBuilder::new(REGION_ID);
+
+    // For sparse encoding, add internal columns first
+    if encoding == store_api::codec::PrimaryKeyEncoding::Sparse {
+        builder
+            .push_column_metadata(ColumnMetadata {
+                column_schema: ColumnSchema::new(
+                    DATA_SCHEMA_TABLE_ID_COLUMN_NAME.to_string(),
+                    ConcreteDataType::uint32_datatype(),
+                    false,
+                )
+                .with_skipping_options(SkippingIndexOptions {
+                    granularity: 1,
+                    ..Default::default()
+                })
+                .unwrap(),
+                semantic_type: SemanticType::Tag,
+                column_id: ReservedColumnId::table_id(),
+            })
+            .push_column_metadata(ColumnMetadata {
+                column_schema: ColumnSchema::new(
+                    DATA_SCHEMA_TSID_COLUMN_NAME.to_string(),
+                    ConcreteDataType::uint64_datatype(),
+                    false,
+                ),
+                semantic_type: SemanticType::Tag,
+                column_id: ReservedColumnId::tsid(),
+            });
+    }
+
+    // Add user-defined columns (tag_0, tag_1, field_0, ts)
    builder
        .push_column_metadata(ColumnMetadata {
            column_schema: ColumnSchema::new(
@@ -83,12 +120,32 @@ pub fn sst_region_metadata() -> RegionMetadata {
            ),
            semantic_type: SemanticType::Timestamp,
            column_id: 3,
-        })
-        .primary_key(vec![0, 1]);
+        });
+
+    // Set primary key based on encoding
+    if encoding == store_api::codec::PrimaryKeyEncoding::Sparse {
+        builder.primary_key(vec![
+            ReservedColumnId::table_id(),
+            ReservedColumnId::tsid(),
+            0, // tag_0
+            1, // tag_1
+        ]);
+    } else {
+        builder.primary_key(vec![0, 1]); // Dense: just user tags
+    }
+
+    builder.primary_key_encoding(encoding);
    builder.build().unwrap()
 }

-/// Encodes a primary key for specific tags.
+/// Creates a new region metadata for testing SSTs.
+///
+/// Schema: tag_0, tag_1, field_0, ts
+pub fn sst_region_metadata() -> RegionMetadata {
+    sst_region_metadata_with_encoding(store_api::codec::PrimaryKeyEncoding::Dense)
+}
+
+/// Encodes a primary key for specific tags using dense encoding.
 pub fn new_primary_key(tags: &[&str]) -> Vec<u8> {
    let fields = (0..tags.len())
        .map(|idx| {
@@ -104,6 +161,31 @@ pub fn new_primary_key(tags: &[&str]) -> Vec<u8> {
        .unwrap()
 }

+/// Encodes a primary key for specific tags using sparse encoding.
+/// Includes internal columns (table_id, tsid) required by sparse format.
+pub fn new_sparse_primary_key(
+    tags: &[&str],
+    metadata: &Arc<RegionMetadata>,
+    table_id: u32,
+    tsid: u64,
+) -> Vec<u8> {
+    use mito_codec::row_converter::PrimaryKeyCodec;
+
+    let codec = mito_codec::row_converter::SparsePrimaryKeyCodec::new(metadata);
+
+    // Sparse encoding requires internal columns first, then user tags
+    let values = vec![
+        (ReservedColumnId::table_id(), ValueRef::UInt32(table_id)),
+        (ReservedColumnId::tsid(), ValueRef::UInt64(tsid)),
+        (0, ValueRef::String(tags[0])), // tag_0
+        (1, ValueRef::String(tags[1])), // tag_1
+    ];
+
+    let mut buffer = Vec::new();
+    codec.encode_value_refs(&values, &mut buffer).unwrap();
+    buffer
+}
+
 /// Creates a [Source] from `batches`.
 pub fn new_source(batches: &[Batch]) -> Source {
    let reader = VecBatchReader::new(batches);
--- a/src/object-store/src/config.rs
+++ b/src/object-store/src/config.rs
@@ -16,7 +16,6 @@ use std::time::Duration;

 use common_base::readable_size::ReadableSize;
 use common_base::secrets::{ExposeSecret, SecretString};
-use common_telemetry::tracing::warn;
 use opendal::services::{Azblob, Gcs, Oss, S3};
 use serde::{Deserialize, Serialize};

@@ -118,23 +117,25 @@ pub struct S3Connection {
    /// By default, opendal will send API to https://s3.us-east-1.amazonaws.com/bucket_name
    /// Enabled, opendal will send API to https://bucket_name.s3.us-east-1.amazonaws.com
    pub enable_virtual_host_style: bool,
+    /// Disable EC2 metadata service.
+    /// By default, opendal will use EC2 metadata service to load credentials from the instance metadata,
+    /// when access key id and secret access key are not provided.
+    /// If enabled, opendal will *NOT* use EC2 metadata service.
+    pub disable_ec2_metadata: bool,
 }

 impl From<&S3Connection> for S3 {
    fn from(connection: &S3Connection) -> Self {
        let root = util::normalize_dir(&connection.root);

-        let mut builder = S3::default().root(&root).bucket(&connection.bucket);
+        let mut builder = S3::default()
+            .root(&root)
+            .bucket(&connection.bucket)
+            .access_key_id(connection.access_key_id.expose_secret())
+            .secret_access_key(connection.secret_access_key.expose_secret());

-        if !connection.access_key_id.expose_secret().is_empty()
-            && !connection.secret_access_key.expose_secret().is_empty()
-        {
-            builder = builder
-                .access_key_id(connection.access_key_id.expose_secret())
-                .secret_access_key(connection.secret_access_key.expose_secret());
-        } else {
-            warn!("No access key id or secret access key provided, using anonymous access");
-            builder = builder.allow_anonymous().disable_ec2_metadata();
+        if connection.disable_ec2_metadata {
+            builder = builder.disable_ec2_metadata();
        }

        if let Some(endpoint) = &connection.endpoint {
--- a/src/operator/src/req_convert/insert/stmt_to_region.rs
+++ b/src/operator/src/req_convert/insert/stmt_to_region.rs
@@ -410,8 +410,7 @@ fn sql_value_to_value(
        })?
    } else {
        common_sql::convert::sql_value_to_value(
-            column,
-            &column_schema.data_type,
+            column_schema,
            sql_val,
            timezone,
            None,
--- a/src/operator/src/statement.rs
+++ b/src/operator/src/statement.rs
@@ -52,6 +52,7 @@ use common_time::Timestamp;
 use common_time::range::TimestampRange;
 use datafusion_expr::LogicalPlan;
 use datatypes::prelude::ConcreteDataType;
+use datatypes::schema::ColumnSchema;
 use humantime::format_duration;
 use itertools::Itertools;
 use partition::manager::{PartitionRuleManager, PartitionRuleManagerRef};
@@ -644,11 +645,20 @@ impl StatementExecutor {
            })?
            .unit();

+        let start_column = ColumnSchema::new(
+            "range_start",
+            ConcreteDataType::timestamp_datatype(time_unit),
+            false,
+        );
+        let end_column = ColumnSchema::new(
+            "range_end",
+            ConcreteDataType::timestamp_datatype(time_unit),
+            false,
+        );
        let mut time_ranges = Vec::with_capacity(sql_values_time_range.len());
        for (start, end) in sql_values_time_range {
            let start = common_sql::convert::sql_value_to_value(
-                "range_start",
-                &ConcreteDataType::timestamp_datatype(time_unit),
+                &start_column,
                start,
                Some(&query_ctx.timezone()),
                None,
@@ -667,8 +677,7 @@ impl StatementExecutor {
            })?;

            let end = common_sql::convert::sql_value_to_value(
-                "range_end",
-                &ConcreteDataType::timestamp_datatype(time_unit),
+                &end_column,
                end,
                Some(&query_ctx.timezone()),
                None,
--- a/src/operator/src/statement/admin.rs
+++ b/src/operator/src/statement/admin.rs
@@ -242,8 +242,12 @@ fn values_to_vectors_by_exact_types(
    args.iter()
        .zip(exact_types.iter())
        .map(|(value, data_type)| {
-            let data_type = &ConcreteDataType::from_arrow_type(data_type);
-            let value = sql_value_to_value(DUMMY_COLUMN, data_type, value, tz, None, false)
+            let schema = ColumnSchema::new(
+                DUMMY_COLUMN,
+                ConcreteDataType::from_arrow_type(data_type),
+                true,
+            );
+            let value = sql_value_to_value(&schema, value, tz, None, false)
                .context(error::SqlCommonSnafu)?;

            Ok(value_to_vector(value))
@@ -260,10 +264,12 @@ fn values_to_vectors_by_valid_types(
    args.iter()
        .map(|value| {
            for data_type in valid_types {
-                let data_type = &ConcreteDataType::from_arrow_type(data_type);
-                if let Ok(value) =
-                    sql_value_to_value(DUMMY_COLUMN, data_type, value, tz, None, false)
-                {
+                let schema = ColumnSchema::new(
+                    DUMMY_COLUMN,
+                    ConcreteDataType::from_arrow_type(data_type),
+                    true,
+                );
+                if let Ok(value) = sql_value_to_value(&schema, value, tz, None, false) {
                    return Ok(value_to_vector(value));
                }
            }
--- a/src/operator/src/statement/ddl.rs
+++ b/src/operator/src/statement/ddl.rs
@@ -50,7 +50,7 @@ use common_time::{Timestamp, Timezone};
 use datafusion_common::tree_node::TreeNodeVisitor;
 use datafusion_expr::LogicalPlan;
 use datatypes::prelude::ConcreteDataType;
-use datatypes::schema::{RawSchema, Schema};
+use datatypes::schema::{ColumnSchema, RawSchema, Schema};
 use datatypes::value::Value;
 use partition::expr::{Operand, PartitionExpr, RestrictedOp};
 use partition::multi_dim::MultiDimPartitionRule;
@@ -2001,8 +2001,7 @@ fn convert_value(
    unary_op: Option<UnaryOperator>,
 ) -> Result<Value> {
    sql_value_to_value(
-        "<NONAME>",
-        &data_type,
+        &ColumnSchema::new("<NONAME>", data_type, true),
        value,
        Some(timezone),
        unary_op,
--- a/src/servers/Cargo.toml
+++ b/src/servers/Cargo.toml
@@ -87,7 +87,7 @@ operator.workspace = true
 otel-arrow-rust.workspace = true
 parking_lot.workspace = true
 pg_interval = "0.4"
-pgwire = { version = "0.36.3", default-features = false, features = [
+pgwire = { version = "0.37", default-features = false, features = [
    "server-api-ring",
    "pg-ext-types",
 ] }
--- a/src/servers/src/grpc.rs
+++ b/src/servers/src/grpc.rs
@@ -81,6 +81,12 @@ pub struct GrpcOptions {
    /// Default to `None`, means infinite.
    #[serde(with = "humantime_serde")]
    pub max_connection_age: Option<Duration>,
+    /// The HTTP/2 keep-alive interval.
+    #[serde(with = "humantime_serde")]
+    pub http2_keep_alive_interval: Duration,
+    /// The HTTP/2 keep-alive timeout.
+    #[serde(with = "humantime_serde")]
+    pub http2_keep_alive_timeout: Duration,
 }

 impl GrpcOptions {
@@ -144,6 +150,8 @@ impl Default for GrpcOptions {
            runtime_size: 8,
            tls: TlsOption::default(),
            max_connection_age: None,
+            http2_keep_alive_interval: Duration::from_secs(10),
+            http2_keep_alive_timeout: Duration::from_secs(3),
        }
    }
 }
@@ -164,6 +172,8 @@ impl GrpcOptions {
            runtime_size: 8,
            tls: TlsOption::default(),
            max_connection_age: None,
+            http2_keep_alive_interval: Duration::from_secs(10),
+            http2_keep_alive_timeout: Duration::from_secs(3),
        }
    }

--- a/src/servers/src/heartbeat_options.rs
+++ b/src/servers/src/heartbeat_options.rs
@@ -34,12 +34,10 @@ impl HeartbeatOptions {
    pub fn frontend_default() -> Self {
        Self {
            // Frontend can send heartbeat with a longer interval.
-            interval: Duration::from_millis(
-                distributed_time_constants::FRONTEND_HEARTBEAT_INTERVAL_MILLIS,
-            ),
-            retry_interval: Duration::from_millis(
-                distributed_time_constants::HEARTBEAT_INTERVAL_MILLIS,
+            interval: distributed_time_constants::frontend_heartbeat_interval(
+                distributed_time_constants::BASE_HEARTBEAT_INTERVAL,
            ),
+            retry_interval: distributed_time_constants::BASE_HEARTBEAT_INTERVAL,
        }
    }
 }
@@ -47,10 +45,8 @@ impl HeartbeatOptions {
 impl Default for HeartbeatOptions {
    fn default() -> Self {
        Self {
-            interval: Duration::from_millis(distributed_time_constants::HEARTBEAT_INTERVAL_MILLIS),
-            retry_interval: Duration::from_millis(
-                distributed_time_constants::HEARTBEAT_INTERVAL_MILLIS,
-            ),
+            interval: distributed_time_constants::BASE_HEARTBEAT_INTERVAL,
+            retry_interval: distributed_time_constants::BASE_HEARTBEAT_INTERVAL,
        }
    }
 }
--- a/src/servers/src/mysql/helper.rs
+++ b/src/servers/src/mysql/helper.rs
@@ -22,6 +22,7 @@ use common_time::{Date, Timestamp};
 use datafusion_common::tree_node::{Transformed, TreeNode};
 use datafusion_expr::LogicalPlan;
 use datatypes::prelude::ConcreteDataType;
+use datatypes::schema::ColumnSchema;
 use datatypes::types::TimestampType;
 use datatypes::value::{self, Value};
 use itertools::Itertools;
@@ -254,9 +255,10 @@ pub fn convert_value(param: &ParamValue, t: &ConcreteDataType) -> Result<ScalarV
 /// Convert an MySQL expression to a scalar value.
 /// It automatically handles the conversion of strings to numeric values.
 pub fn convert_expr_to_scalar_value(param: &Expr, t: &ConcreteDataType) -> Result<ScalarValue> {
+    let column_schema = ColumnSchema::new("", t.clone(), true);
    match param {
        Expr::Value(v) => {
-            let v = sql_value_to_value("", t, &v.value, None, None, true);
+            let v = sql_value_to_value(&column_schema, &v.value, None, None, true);
            match v {
                Ok(v) => v
                    .try_to_scalar_value(t)
@@ -268,7 +270,7 @@ pub fn convert_expr_to_scalar_value(param: &Expr, t: &ConcreteDataType) -> Resul
            }
        }
        Expr::UnaryOp { op, expr } if let Expr::Value(v) = &**expr => {
-            let v = sql_value_to_value("", t, &v.value, None, Some(*op), true);
+            let v = sql_value_to_value(&column_schema, &v.value, None, Some(*op), true);
            match v {
                Ok(v) => v
                    .try_to_scalar_value(t)
--- a/src/servers/src/postgres/fixtures.rs
+++ b/src/servers/src/postgres/fixtures.rs
@@ -28,13 +28,13 @@ fn build_string_data_rows(
    schema: Arc<Vec<FieldInfo>>,
    rows: Vec<Vec<String>>,
 ) -> Vec<PgWireResult<DataRow>> {
+    let mut encoder = DataRowEncoder::new(schema.clone());
    rows.iter()
        .map(|row| {
-            let mut encoder = DataRowEncoder::new(schema.clone());
            for value in row {
                encoder.encode_field(&Some(value))?;
            }
-            encoder.finish()
+            Ok(encoder.take_row())
        })
        .collect()
 }
--- a/src/servers/src/postgres/handler.rs
+++ b/src/servers/src/postgres/handler.rs
@@ -262,6 +262,26 @@ impl QueryParser for DefaultQueryParser {
            })
        }
    }
+
+    fn get_parameter_types(&self, _stmt: &Self::Statement) -> PgWireResult<Vec<Type>> {
+        // we have our own implementation of describes in ExtendedQueryHandler
+        // so we don't use these methods
+        Err(PgWireError::ApiError(
+            "get_parameter_types is not expected to be called".into(),
+        ))
+    }
+
+    fn get_result_schema(
+        &self,
+        _stmt: &Self::Statement,
+        _column_format: Option<&Format>,
+    ) -> PgWireResult<Vec<FieldInfo>> {
+        // we have our own implementation of describes in ExtendedQueryHandler
+        // so we don't use these methods
+        Err(PgWireError::ApiError(
+            "get_result_schema is not expected to be called".into(),
+        ))
+    }
 }

 #[async_trait]
--- a/src/servers/src/postgres/types.rs
+++ b/src/servers/src/postgres/types.rs
@@ -395,13 +395,13 @@ impl Iterator for RecordBatchRowIterator {
    type Item = PgWireResult<DataRow>;

    fn next(&mut self) -> Option<Self::Item> {
+        let mut encoder = DataRowEncoder::new(self.pg_schema.clone());
        if self.i < self.record_batch.num_rows() {
-            let mut encoder = DataRowEncoder::new(self.pg_schema.clone());
            if let Err(e) = self.encode_row(self.i, &mut encoder) {
                return Some(Err(e));
            }
            self.i += 1;
-            Some(encoder.finish())
+            Some(Ok(encoder.take_row()))
        } else {
            None
        }
--- a/src/servers/src/prom_store.rs
+++ b/src/servers/src/prom_store.rs
@@ -26,7 +26,7 @@ use arrow::datatypes::{Float64Type, TimestampMillisecondType};
 use common_grpc::precision::Precision;
 use common_query::prelude::{greptime_timestamp, greptime_value};
 use common_recordbatch::{RecordBatch, RecordBatches};
-use common_telemetry::tracing;
+use common_telemetry::{tracing, warn};
 use datafusion::dataframe::DataFrame;
 use datafusion::prelude::{Expr, col, lit, regexp_match};
 use datafusion_common::ScalarValue;
@@ -415,6 +415,10 @@ pub fn to_grpc_row_insert_requests(request: &WriteRequest) -> Result<(RowInsertR
                table_data.add_row(one_row);
            }
        }
+
+        if !series.histograms.is_empty() {
+            warn!("Native histograms are not supported yet, data ignored");
+        }
    }

    Ok(multi_table_data.into_row_insert_requests())
--- a/src/servers/src/tls.rs
+++ b/src/servers/src/tls.rs
@@ -362,13 +362,13 @@ mod tests {
                cert_path: "/path/to/cert_path".to_string(),
                key_path: "/path/to/key_path".to_string(),
                ca_cert_path: String::new(),
-                watch: false
+                watch: false,
            },
            TlsOption::new(
                Some(Disable),
                Some("/path/to/cert_path".to_string()),
                Some("/path/to/key_path".to_string()),
-                false
+                false,
            )
        );
    }
--- a/src/sql/src/dialect.rs
+++ b/src/sql/src/dialect.rs
@@ -40,4 +40,8 @@ impl Dialect for GreptimeDbDialect {
    fn supports_filter_during_aggregation(&self) -> bool {
        true
    }
+
+    fn supports_struct_literal(&self) -> bool {
+        true
+    }
 }
--- a/src/sql/src/error.rs
+++ b/src/sql/src/error.rs
@@ -208,9 +208,9 @@ pub enum Error {
        location: Location,
    },

-    #[snafu(display("Invalid expr as option value, error: {error}"))]
-    InvalidExprAsOptionValue {
-        error: String,
+    #[snafu(display("Invalid JSON structure setting, reason: {reason}"))]
+    InvalidJsonStructureSetting {
+        reason: String,
        #[snafu(implicit)]
        location: Location,
    },
@@ -373,7 +373,7 @@ impl ErrorExt for Error {
            }

            InvalidColumnOption { .. }
-            | InvalidExprAsOptionValue { .. }
+            | InvalidJsonStructureSetting { .. }
            | InvalidDatabaseName { .. }
            | InvalidDatabaseOption { .. }
            | ColumnTypeMismatch { .. }
--- a/src/sql/src/parsers/create_parser/json.rs
+++ b/src/sql/src/parsers/create_parser/json.rs
@@ -40,16 +40,17 @@ pub(super) fn parse_json_datatype_options(parser: &mut Parser<'_>) -> Result<Opt

 #[cfg(test)]
 mod tests {
-    use sqlparser::ast::DataType;
+    use sqlparser::ast::{DataType, Expr, Ident, StructField};

    use crate::dialect::GreptimeDbDialect;
    use crate::parser::{ParseOptions, ParserContext};
    use crate::statements::OptionMap;
    use crate::statements::create::{
-        Column, JSON_FORMAT_FULL_STRUCTURED, JSON_FORMAT_PARTIAL, JSON_FORMAT_RAW, JSON_OPT_FORMAT,
-        JSON_OPT_UNSTRUCTURED_KEYS,
+        Column, JSON_FORMAT_FULL_STRUCTURED, JSON_FORMAT_PARTIAL, JSON_FORMAT_RAW, JSON_OPT_FIELDS,
+        JSON_OPT_FORMAT, JSON_OPT_UNSTRUCTURED_KEYS,
    };
    use crate::statements::statement::Statement;
+    use crate::util::OptionValue;

    #[test]
    fn test_parse_json_datatype_options() {
@@ -77,6 +78,41 @@ mod tests {

        let sql = r#"
 CREATE TABLE json_data (
+    my_json JSON(format = "partial", fields = Struct<i Int, "o.a" String, "o.b" String, `x.y.z` Float64>),
+    ts TIMESTAMP TIME INDEX,
+)"#;
+        let options = parse(sql).unwrap();
+        assert_eq!(options.len(), 1);
+        let option = options.value(JSON_OPT_FIELDS);
+        let expected = Some(&OptionValue::new(Expr::Struct {
+            values: vec![],
+            fields: vec![
+                StructField {
+                    field_name: Some(Ident::new("i")),
+                    field_type: DataType::Int(None),
+                    options: None,
+                },
+                StructField {
+                    field_name: Some(Ident::with_quote('"', "o.a")),
+                    field_type: DataType::String(None),
+                    options: None,
+                },
+                StructField {
+                    field_name: Some(Ident::with_quote('"', "o.b")),
+                    field_type: DataType::String(None),
+                    options: None,
+                },
+                StructField {
+                    field_name: Some(Ident::with_quote('`', "x.y.z")),
+                    field_type: DataType::Float64,
+                    options: None,
+                },
+            ],
+        }));
+        assert_eq!(option, expected);
+
+        let sql = r#"
+CREATE TABLE json_data (
    my_json JSON(format = "partial", unstructured_keys = ["k", "foo.bar", "a.b.c"]),
    ts TIMESTAMP TIME INDEX,
 )"#;
--- a/src/sql/src/statements.rs
+++ b/src/sql/src/statements.rs
@@ -40,6 +40,7 @@ use api::v1::SemanticType;
 use common_sql::default_constraint::parse_column_default_constraint;
 use common_time::timezone::Timezone;
 use datatypes::extension::json::{JsonExtensionType, JsonMetadata};
+use datatypes::json::JsonStructureSettings;
 use datatypes::prelude::ConcreteDataType;
 use datatypes::schema::{COMMENT_KEY, ColumnDefaultConstraint, ColumnSchema};
 use datatypes::types::json_type::JsonNativeType;
@@ -281,8 +282,17 @@ pub fn sql_data_type_to_concrete_data_type(
            }
        },
        SqlDataType::JSON => {
-            let format = if column_extensions.json_datatype_options.is_some() {
-                JsonFormat::Native(Box::new(JsonNativeType::Null))
+            let format = if let Some(x) = column_extensions.build_json_structure_settings()? {
+                if let Some(fields) = match x {
+                    JsonStructureSettings::Structured(fields) => fields,
+                    JsonStructureSettings::UnstructuredRaw => None,
+                    JsonStructureSettings::PartialUnstructuredByKey { fields, .. } => fields,
+                } {
+                    let datatype = &ConcreteDataType::Struct(fields);
+                    JsonFormat::Native(Box::new(datatype.into()))
+                } else {
+                    JsonFormat::Native(Box::new(JsonNativeType::Null))
+                }
            } else {
                JsonFormat::Jsonb
            };
--- a/src/sql/src/statements/create.rs
+++ b/src/sql/src/statements/create.rs
@@ -14,27 +14,30 @@

 use std::collections::{HashMap, HashSet};
 use std::fmt::{Display, Formatter};
+use std::sync::Arc;

 use common_catalog::consts::FILE_ENGINE;
+use datatypes::data_type::ConcreteDataType;
 use datatypes::json::JsonStructureSettings;
 use datatypes::schema::{
    FulltextOptions, SkippingIndexOptions, VectorDistanceMetric, VectorIndexEngineType,
    VectorIndexOptions,
 };
+use datatypes::types::StructType;
 use itertools::Itertools;
 use serde::Serialize;
-use snafu::ResultExt;
+use snafu::{OptionExt, ResultExt};
 use sqlparser::ast::{ColumnOptionDef, DataType, Expr, Query};
 use sqlparser_derive::{Visit, VisitMut};

 use crate::ast::{ColumnDef, Ident, ObjectName, Value as SqlValue};
 use crate::error::{
-    InvalidFlowQuerySnafu, InvalidSqlSnafu, Result, SetFulltextOptionSnafu,
-    SetSkippingIndexOptionSnafu,
+    InvalidFlowQuerySnafu, InvalidJsonStructureSettingSnafu, InvalidSqlSnafu, Result,
+    SetFulltextOptionSnafu, SetSkippingIndexOptionSnafu,
 };
-use crate::statements::OptionMap;
 use crate::statements::statement::Statement;
 use crate::statements::tql::Tql;
+use crate::statements::{OptionMap, sql_data_type_to_concrete_data_type};
 use crate::util::OptionValue;

 const LINE_SEP: &str = ",\n";
@@ -44,6 +47,7 @@ pub const VECTOR_OPT_DIM: &str = "dim";

 pub const JSON_OPT_UNSTRUCTURED_KEYS: &str = "unstructured_keys";
 pub const JSON_OPT_FORMAT: &str = "format";
+pub(crate) const JSON_OPT_FIELDS: &str = "fields";
 pub const JSON_FORMAT_FULL_STRUCTURED: &str = "structured";
 pub const JSON_FORMAT_RAW: &str = "raw";
 pub const JSON_FORMAT_PARTIAL: &str = "partial";
@@ -346,14 +350,51 @@ impl ColumnExtensions {
            })
            .unwrap_or_default();

+        let fields = if let Some(value) = options.value(JSON_OPT_FIELDS) {
+            let fields = value
+                .as_struct_fields()
+                .context(InvalidJsonStructureSettingSnafu {
+                    reason: format!(r#"expect "{JSON_OPT_FIELDS}" a struct, actual: "{value}""#,),
+                })?;
+            let fields = fields
+                .iter()
+                .map(|field| {
+                    let name = field.field_name.as_ref().map(|x| x.value.clone()).context(
+                        InvalidJsonStructureSettingSnafu {
+                            reason: format!(r#"missing field name in "{field}""#),
+                        },
+                    )?;
+                    let datatype = sql_data_type_to_concrete_data_type(
+                        &field.field_type,
+                        &Default::default(),
+                    )?;
+                    Ok(datatypes::types::StructField::new(name, datatype, true))
+                })
+                .collect::<Result<_>>()?;
+            Some(StructType::new(Arc::new(fields)))
+        } else {
+            None
+        };
+
        options
            .get(JSON_OPT_FORMAT)
            .map(|format| match format {
-                JSON_FORMAT_FULL_STRUCTURED => Ok(JsonStructureSettings::Structured(None)),
-                JSON_FORMAT_PARTIAL => Ok(JsonStructureSettings::PartialUnstructuredByKey {
-                    fields: None,
-                    unstructured_keys,
-                }),
+                JSON_FORMAT_FULL_STRUCTURED => Ok(JsonStructureSettings::Structured(fields)),
+                JSON_FORMAT_PARTIAL => {
+                    let fields = fields.map(|fields| {
+                        let mut fields = Arc::unwrap_or_clone(fields.fields());
+                        fields.push(datatypes::types::StructField::new(
+                            JsonStructureSettings::RAW_FIELD.to_string(),
+                            ConcreteDataType::string_datatype(),
+                            true,
+                        ));
+                        StructType::new(Arc::new(fields))
+                    });
+                    Ok(JsonStructureSettings::PartialUnstructuredByKey {
+                        fields,
+                        unstructured_keys,
+                    })
+                }
                JSON_FORMAT_RAW => Ok(JsonStructureSettings::UnstructuredRaw),
                _ => InvalidSqlSnafu {
                    msg: format!("unknown JSON datatype 'format': {format}"),
--- a/src/sql/src/util.rs
+++ b/src/sql/src/util.rs
@@ -17,14 +17,14 @@ use std::fmt::{Display, Formatter};

 use itertools::Itertools;
 use serde::Serialize;
-use snafu::ensure;
 use sqlparser::ast::{
-    Array, Expr, Ident, ObjectName, SetExpr, SqlOption, TableFactor, Value, ValueWithSpan,
+    Array, Expr, Ident, ObjectName, SetExpr, SqlOption, StructField, TableFactor, Value,
+    ValueWithSpan,
 };
 use sqlparser_derive::{Visit, VisitMut};

 use crate::ast::ObjectNamePartExt;
-use crate::error::{InvalidExprAsOptionValueSnafu, InvalidSqlSnafu, Result};
+use crate::error::{InvalidSqlSnafu, Result};
 use crate::statements::create::SqlOrTql;

 /// Format an [ObjectName] without any quote of its idents.
@@ -52,14 +52,8 @@ pub fn format_raw_object_name(name: &ObjectName) -> String {
 pub struct OptionValue(Expr);

 impl OptionValue {
-    fn try_new(expr: Expr) -> Result<Self> {
-        ensure!(
-            matches!(expr, Expr::Value(_) | Expr::Identifier(_) | Expr::Array(_)),
-            InvalidExprAsOptionValueSnafu {
-                error: format!("{expr} not accepted")
-            }
-        );
-        Ok(Self(expr))
+    pub fn new(expr: Expr) -> Self {
+        Self(expr)
    }

    fn expr_as_string(expr: &Expr) -> Option<&str> {
@@ -106,6 +100,13 @@ impl OptionValue {
            _ => None,
        }
    }
+
+    pub(crate) fn as_struct_fields(&self) -> Option<&[StructField]> {
+        match &self.0 {
+            Expr::Struct { fields, .. } => Some(fields),
+            _ => None,
+        }
+    }
 }

 impl From<String> for OptionValue {
@@ -155,7 +156,7 @@ pub fn parse_option_string(option: SqlOption) -> Result<(String, OptionValue)> {
        }
        .fail();
    };
-    let v = OptionValue::try_new(value)?;
+    let v = OptionValue::new(value);
    let k = key.value.to_lowercase();
    Ok((k, v))
 }
--- a/tests-fuzz/targets/failover/fuzz_failover_metric_regions.rs
+++ b/tests-fuzz/targets/failover/fuzz_failover_metric_regions.rs
@@ -19,7 +19,7 @@ use std::sync::Arc;
 use std::time::Duration;

 use arbitrary::{Arbitrary, Unstructured};
-use common_meta::distributed_time_constants;
+use common_meta::distributed_time_constants::default_distributed_time_constants;
 use common_telemetry::info;
 use libfuzzer_sys::fuzz_target;
 use rand::{Rng, SeedableRng};
@@ -254,10 +254,7 @@ async fn execute_failover(ctx: FuzzContext, input: FuzzInput) -> Result<()> {
    recover_pod_failure(ctx.kube.clone(), &ctx.namespace, &chaos_name).await?;
    wait_for_all_datanode_online(ctx.greptime.clone(), Duration::from_secs(60)).await;

-    tokio::time::sleep(Duration::from_secs(
-        distributed_time_constants::REGION_LEASE_SECS,
-    ))
-    .await;
+    tokio::time::sleep(default_distributed_time_constants().region_lease).await;
    // Validates value rows
    info!("Validates num of rows");

--- a/tests-fuzz/targets/failover/fuzz_failover_mito_regions.rs
+++ b/tests-fuzz/targets/failover/fuzz_failover_mito_regions.rs
@@ -19,7 +19,7 @@ use std::sync::Arc;
 use std::time::Duration;

 use arbitrary::{Arbitrary, Unstructured};
-use common_meta::distributed_time_constants;
+use common_meta::distributed_time_constants::default_distributed_time_constants;
 use common_telemetry::info;
 use common_time::util::current_time_millis;
 use futures::future::try_join_all;
@@ -322,10 +322,7 @@ async fn execute_failover(ctx: FuzzContext, input: FuzzInput) -> Result<()> {
    recover_pod_failure(ctx.kube.clone(), &ctx.namespace, &chaos_name).await?;
    wait_for_all_datanode_online(ctx.greptime.clone(), Duration::from_secs(60)).await;

-    tokio::time::sleep(Duration::from_secs(
-        distributed_time_constants::REGION_LEASE_SECS,
-    ))
-    .await;
+    tokio::time::sleep(default_distributed_time_constants().region_lease).await;
    // Validates value rows
    info!("Validates num of rows");
    for (table_ctx, expected_rows) in table_ctxs.iter().zip(affected_rows) {
--- a/tests-fuzz/targets/migration/fuzz_migrate_metric_regions.rs
+++ b/tests-fuzz/targets/migration/fuzz_migrate_metric_regions.rs
@@ -19,7 +19,7 @@ use std::sync::Arc;
 use std::time::Duration;

 use arbitrary::{Arbitrary, Unstructured};
-use common_meta::distributed_time_constants;
+use common_meta::distributed_time_constants::default_distributed_time_constants;
 use common_telemetry::info;
 use libfuzzer_sys::fuzz_target;
 use rand::{Rng, SeedableRng};
@@ -275,10 +275,7 @@ async fn migrate_regions(ctx: &FuzzContext, migrations: &[Migration]) -> Result<
        wait_for_migration(ctx, migration, &procedure_id).await;
    }

-    tokio::time::sleep(Duration::from_secs(
-        distributed_time_constants::REGION_LEASE_SECS,
-    ))
-    .await;
+    tokio::time::sleep(default_distributed_time_constants().region_lease).await;

    Ok(())
 }
--- a/tests-fuzz/targets/migration/fuzz_migrate_mito_regions.rs
+++ b/tests-fuzz/targets/migration/fuzz_migrate_mito_regions.rs
@@ -19,7 +19,7 @@ use std::sync::Arc;
 use std::time::Duration;

 use arbitrary::{Arbitrary, Unstructured};
-use common_meta::distributed_time_constants;
+use common_meta::distributed_time_constants::default_distributed_time_constants;
 use common_telemetry::info;
 use libfuzzer_sys::fuzz_target;
 use rand::{Rng, SeedableRng};
@@ -274,10 +274,7 @@ async fn migrate_regions(ctx: &FuzzContext, migrations: &[Migration]) -> Result<
        .await;
    }

-    tokio::time::sleep(Duration::from_secs(
-        distributed_time_constants::REGION_LEASE_SECS,
-    ))
-    .await;
+    tokio::time::sleep(default_distributed_time_constants().region_lease).await;

    Ok(())
 }
--- a/tests-integration/resources/jsonbench-head-10.ndjson
+++ b/tests-integration/resources/jsonbench-head-10.ndjson
@@ -0,0 +1,10 @@
+{"did":"did:plc:yj3sjq3blzpynh27cumnp5ks","time_us":1732206349000167,"kind":"commit","commit":{"rev":"3lbhtytnn2k2f","operation":"create","collection":"app.bsky.feed.post","rkey":"3lbhtyteurk2y","record":{"$type":"app.bsky.feed.post","createdAt":"2024-11-21T16:09:27.095Z","langs":["en"],"reply":{"parent":{"cid":"bafyreibfglofvqou2yiqvwzk4rcgkhhxrbunyemshdjledgwymimqkg24e","uri":"at://did:plc:6tr6tuzlx2db3rduzr2d6r24/app.bsky.feed.post/3lbhqo2rtys2z"},"root":{"cid":"bafyreibfglofvqou2yiqvwzk4rcgkhhxrbunyemshdjledgwymimqkg24e","uri":"at://did:plc:6tr6tuzlx2db3rduzr2d6r24/app.bsky.feed.post/3lbhqo2rtys2z"}},"text":"aaaaah.  LIght shines in a corner of WTF...."},"cid":"bafyreidblutgvj75o4q4akzyyejedjj6l3it6hgqwee6jpwv2wqph5fsgm"}}
+{"did":"did:plc:3i4xf2v4wcnyktgv6satke64","time_us":1732206349000644,"kind":"commit","commit":{"rev":"3lbhuvzds6d2a","operation":"create","collection":"app.bsky.feed.like","rkey":"3lbhuvzdked2a","record":{"$type":"app.bsky.feed.like","createdAt":"2024-11-21T16:25:46.221Z","subject":{"cid":"bafyreidjvrcmckkm765mct5fph36x7kupkfo35rjklbf2k76xkzwyiauge","uri":"at://did:plc:azrv4rcbws6kmcga4fsbphg2/app.bsky.feed.post/3lbgjdpbiec2l"}},"cid":"bafyreia5l5vrkh5oj4cjyhcqby2dprhyvcyofo2q5562tijlae2pzih23m"}}
+{"did":"did:plc:gccfnqqizz4urhchsaie6jft","time_us":1732206349001108,"kind":"commit","commit":{"rev":"3lbhuvze3gi2u","operation":"create","collection":"app.bsky.graph.follow","rkey":"3lbhuvzdtmi2u","record":{"$type":"app.bsky.graph.follow","createdAt":"2024-11-21T16:27:40.923Z","subject":"did:plc:r7cdh4sgzqbfdc6wcdxxti7c"},"cid":"bafyreiew2p6cgirfaj45qoenm4fgumib7xoloclrap3jgkz5es7g7kby3i"}}
+{"did":"did:plc:msxqf3twq7abtdw7dbfskphk","time_us":1732206349001372,"kind":"commit","commit":{"rev":"3lbhueija5p22","operation":"create","collection":"app.bsky.feed.like","rkey":"3lbhueiizcx22","record":{"$type":"app.bsky.feed.like","createdAt":"2024-11-21T16:15:58.232Z","subject":{"cid":"bafyreiavpshyqzrlo5m7fqodjhs6jevweqnif4phasiwimv4a7mnsqi2fe","uri":"at://did:plc:fusulxqc52zbrc75fi6xrcof/app.bsky.feed.post/3lbhskq5zn22f"}},"cid":"bafyreidjix4dauj2afjlbzmhj3a7gwftcevvmmy6edww6vrjdbst26rkby"}}
+{"did":"did:plc:l5o3qjrmfztir54cpwlv2eme","time_us":1732206349001905,"kind":"commit","commit":{"rev":"3lbhtytohxc2o","operation":"create","collection":"app.bsky.feed.post","rkey":"3lbhtytjqzk2q","record":{"$type":"app.bsky.feed.post","createdAt":"2024-11-21T16:09:27.254Z","langs":["en"],"reply":{"parent":{"cid":"bafyreih35fe2jj3gchmgk4amold4l6sfxd2sby5wrg3jrws5fkdypxrbg4","uri":"at://did:plc:6wx2gg5yqgvmlu35r6y3bk6d/app.bsky.feed.post/3lbhtj2eb4s2o"},"root":{"cid":"bafyreifipyt3vctd4ptuoicvio7rbr5xvjv4afwuggnd2prnmn55mu6luu","uri":"at://did:plc:474ldquxwzrlcvjhhbbk2wte/app.bsky.feed.post/3lbhdzrynik27"}},"text":"okay i take mine back because I hadn’t heard this one yet^^"},"cid":"bafyreigzdsdne3z2xxcakgisieyj7y47hj6eg7lj6v4q25ah5q2qotu5ku"}}
+{"did":"did:plc:jkaaf5j2yb2pvpx3ualm3vbh","time_us":1732206349002758,"kind":"commit","commit":{"rev":"3lbhudfo3yi2w","operation":"create","collection":"app.bsky.graph.follow","rkey":"3lbhudfnw4y2w","record":{"$type":"app.bsky.graph.follow","createdAt":"2024-11-21T16:15:21.495Z","subject":"did:plc:amsdn2tbjxo3xrwqneqhh4cm"},"cid":"bafyreiaa2vsdr4ckwjg4jq47zfd7mewidywfz3qh3dmglcd6ozi4xwdega"}}
+{"did":"did:plc:tdwz2h4id5dxezvohftsmffu","time_us":1732206349003106,"kind":"commit","commit":{"rev":"3lbhujcp4ix2n","operation":"create","collection":"app.bsky.graph.follow","rkey":"3lbhujcoxmp2n","record":{"$type":"app.bsky.graph.follow","createdAt":"2024-11-21T16:18:39.913Z","subject":"did:plc:gf3vum7insztt5rxrpxdz2id"},"cid":"bafyreihaatlpar3abtx6ck3kde2ksic6zzflk4ppduhf6dxurytqrv33ni"}}
+{"did":"did:plc:cdsd346mwow7aj3tgfkwsct3","time_us":1732206349003461,"kind":"commit","commit":{"rev":"3lbhus5vior2t","operation":"create","collection":"app.bsky.feed.repost","rkey":"3lbhus5vbtz2t","record":{"$type":"app.bsky.feed.repost","createdAt":"2024-11-21T16:23:36.714Z","subject":{"cid":"bafyreieaacfiobnuqvjhhsndyi5s3fd6krbzdduxsyrzfv43kczpcmkl6y","uri":"at://did:plc:o5q6dynpme4ndolc3heztasm/app.bsky.feed.post/3lbfli3qsoc2o"}},"cid":"bafyreid5ycocp5zq2g7fcx2xxzxrbafuh7b5qhtwuwiomzo6vqila2cbpu"}}
+{"did":"did:plc:s4bwqchfzm6gjqfeb6mexgbu","time_us":1732206349003907,"kind":"commit","commit":{"rev":"3lbhuvzeccx2w","operation":"create","collection":"app.bsky.feed.post","rkey":"3lbhuvxf4qs2m","record":{"$type":"app.bsky.feed.post","createdAt":"2024-11-21T16:25:44.376Z","langs":["en"],"reply":{"parent":{"cid":"bafyreiaev27cfcxxvn2pdhrwwquzwgclujnulzbcfnn4p4fwgb6migjhw4","uri":"at://did:plc:zec6cslvgc3hhdatrhk6pq5p/app.bsky.feed.post/3lbhujvds4c2b"},"root":{"cid":"bafyreif7qjxhvecwnhlynijj6pf47jwvtkahsz3zh2kaipwu2bw2dxwaqq","uri":"at://did:plc:s4bwqchfzm6gjqfeb6mexgbu/app.bsky.feed.post/3lbhug53kkk2m"}},"text":"ㅤ\nㅤㅤ⌜  Blinking. She hadn't realized she spoke out loud.  ⌟\n\nㅤ‘ It was nothing like that  —  .  I was only thinking  . . .  ’\n\nㅤㅤ⌜  Trailing off, her mind occupied.  ⌟\nㅤ"},"cid":"bafyreibugobcike72y4zxvdyz2oopyt6ywwqfielcwojkb27p7s6rlomgm"}}
+{"did":"did:plc:hbc74dlsxhq53kp5oxges6d7","time_us":1732206349004769,"kind":"commit","commit":{"rev":"3lbhuvzedg52j","operation":"create","collection":"app.bsky.feed.like","rkey":"3lbhuvzdyof2j","record":{"$type":"app.bsky.feed.like","createdAt":"2024-11-21T16:25:46.167Z","subject":{"cid":"bafyreiaumopip75nzx2xjbugtwemdppsyx54bd2odf6q45f3o7xkocgari","uri":"at://did:plc:ig2jv6gqup4t7gdq2pmanknw/app.bsky.feed.post/3lbhuvtlaec2c"}},"cid":"bafyreidjk2svg2fdjiiwohmfmvp3hdxhpb33ycnixzbkyib5m6cocindxq"}}
--- a/tests-integration/src/grpc.rs
+++ b/tests-integration/src/grpc.rs
@@ -76,11 +76,10 @@ mod test {

    use super::*;
    use crate::standalone::GreptimeDbStandaloneBuilder;
+    use crate::test_util::execute_sql;
    use crate::tests;
    use crate::tests::MockDistributedInstance;
-    use crate::tests::test_util::{
-        MockInstance, both_instances_cases, distributed, execute_sql, standalone,
-    };
+    use crate::tests::test_util::{MockInstance, both_instances_cases, distributed, standalone};

    #[tokio::test(flavor = "multi_thread")]
    async fn test_distributed_handle_ddl_request() {
--- a/tests-integration/src/test_util.rs
+++ b/tests-integration/src/test_util.rs
@@ -24,6 +24,7 @@ use common_base::Plugins;
 use common_config::Configurable;
 use common_meta::key::catalog_name::CatalogNameKey;
 use common_meta::key::schema_name::SchemaNameKey;
+use common_query::Output;
 use common_runtime::runtime::BuilderBuild;
 use common_runtime::{Builder as RuntimeBuilder, Runtime};
 use common_test_util::ports;
@@ -747,3 +748,10 @@ pub(crate) async fn prepare_another_catalog_and_schema(instance: &Instance) {
        .await
        .unwrap();
 }
+
+pub async fn execute_sql(instance: &Arc<Instance>, sql: &str) -> Output {
+    SqlQueryHandler::do_query(instance.as_ref(), sql, QueryContext::arc())
+        .await
+        .remove(0)
+        .unwrap()
+}
--- a/tests-integration/src/tests.rs
+++ b/tests-integration/src/tests.rs
@@ -18,7 +18,7 @@ mod instance_noop_wal_test;
 mod instance_test;
 mod promql_test;
 mod reconcile_table;
-pub(crate) mod test_util;
+pub mod test_util;

 use std::collections::HashMap;
 use std::sync::Arc;
--- a/tests-integration/src/tests/gc.rs
+++ b/tests-integration/src/tests/gc.rs
@@ -27,8 +27,8 @@ use store_api::storage::RegionId;
 use table::metadata::TableId;

 use crate::cluster::GreptimeDbClusterBuilder;
-use crate::test_util::{StorageType, TempDirGuard, get_test_store_config};
-use crate::tests::test_util::{MockInstanceBuilder, TestContext, execute_sql, wait_procedure};
+use crate::test_util::{StorageType, TempDirGuard, execute_sql, get_test_store_config};
+use crate::tests::test_util::{MockInstanceBuilder, TestContext, wait_procedure};

 /// Helper function to get table route information for GC procedure
 async fn get_table_route(
--- a/tests-integration/src/tests/instance_noop_wal_test.rs
+++ b/tests-integration/src/tests/instance_noop_wal_test.rs
@@ -18,9 +18,8 @@ use common_test_util::recordbatch::check_output_stream;
 use common_wal::config::{DatanodeWalConfig, MetasrvWalConfig};

 use crate::cluster::GreptimeDbClusterBuilder;
-use crate::tests::test_util::{
-    MockInstanceBuilder, RebuildableMockInstance, TestContext, execute_sql,
-};
+use crate::test_util::execute_sql;
+use crate::tests::test_util::{MockInstanceBuilder, RebuildableMockInstance, TestContext};

 pub(crate) async fn distributed_with_noop_wal() -> TestContext {
    common_telemetry::init_default_ut_logging();
--- a/tests-integration/src/tests/reconcile_table.rs
+++ b/tests-integration/src/tests/reconcile_table.rs
@@ -23,9 +23,10 @@ use common_test_util::recordbatch::check_output_stream;
 use table::table_reference::TableReference;

 use crate::cluster::GreptimeDbClusterBuilder;
+use crate::test_util::execute_sql;
 use crate::tests::test_util::{
-    MockInstanceBuilder, RebuildableMockInstance, TestContext, dump_kvbackend, execute_sql,
-    restore_kvbackend, try_execute_sql, wait_procedure,
+    MockInstanceBuilder, RebuildableMockInstance, TestContext, dump_kvbackend, restore_kvbackend,
+    try_execute_sql, wait_procedure,
 };

 const CREATE_MONITOR_TABLE_SQL: &str = r#"
--- a/tests-integration/src/tests/test_util.rs
+++ b/tests-integration/src/tests/test_util.rs
@@ -439,10 +439,6 @@ pub fn find_testing_resource(path: &str) -> String {
    prepare_path(&p)
 }

-pub async fn execute_sql(instance: &Arc<Instance>, sql: &str) -> Output {
-    execute_sql_with(instance, sql, QueryContext::arc()).await
-}
-
 pub async fn try_execute_sql(instance: &Arc<Instance>, sql: &str) -> Result<Output> {
    try_execute_sql_with(instance, sql, QueryContext::arc()).await
 }
@@ -455,16 +451,6 @@ pub async fn try_execute_sql_with(
    instance.do_query(sql, query_ctx).await.remove(0)
 }

-pub async fn execute_sql_with(
-    instance: &Arc<Instance>,
-    sql: &str,
-    query_ctx: QueryContextRef,
-) -> Output {
-    try_execute_sql_with(instance, sql, query_ctx)
-        .await
-        .unwrap_or_else(|e| panic!("Failed to execute sql: {sql}, error: {e:?}"))
-}
-
 /// Dump the kv backend to a vector of key-value pairs.
 pub async fn dump_kvbackend(kv_backend: &KvBackendRef) -> Vec<(Vec<u8>, Vec<u8>)> {
    let req = RangeRequest::new().with_range(vec![0], vec![0]);
--- a/tests-integration/tests/http.rs
+++ b/tests-integration/tests/http.rs
@@ -1397,6 +1397,8 @@ max_recv_message_size = "512MiB"
 max_send_message_size = "512MiB"
 flight_compression = "arrow_ipc"
 runtime_size = 8
+http2_keep_alive_interval = "10s"
+http2_keep_alive_timeout = "3s"

 [grpc.tls]
 mode = "disable"
@@ -1586,6 +1588,7 @@ fn drop_lines_with_inconsistent_results(input: String) -> String {
        "endpoint =",
        "region =",
        "enable_virtual_host_style =",
+        "disable_ec2_metadata =",
        "cache_path =",
        "cache_capacity =",
        "memory_pool_size =",
--- a/tests-integration/tests/jsonbench.rs
+++ b/tests-integration/tests/jsonbench.rs
@@ -0,0 +1,94 @@
+// Copyright 2023 Greptime Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::io::BufRead;
+use std::sync::Arc;
+use std::{fs, io};
+
+use common_test_util::find_workspace_path;
+use frontend::instance::Instance;
+use tests_integration::standalone::GreptimeDbStandaloneBuilder;
+use tests_integration::test_util::execute_sql;
+
+#[tokio::test]
+async fn test_load_jsonbench_data() {
+    common_telemetry::init_default_ut_logging();
+
+    let instance = GreptimeDbStandaloneBuilder::new("test_load_jsonbench_data")
+        .build()
+        .await;
+    let frontend = instance.fe_instance();
+
+    create_table(frontend).await;
+
+    desc_table(frontend).await;
+
+    insert_data(frontend).await.unwrap();
+}
+
+async fn insert_data(frontend: &Arc<Instance>) -> io::Result<()> {
+    let file = fs::File::open(find_workspace_path(
+        "tests-integration/resources/jsonbench-head-10.ndjson",
+    ))?;
+    let reader = io::BufReader::new(file);
+    for (i, line) in reader.lines().enumerate() {
+        let line = line?;
+        if line.is_empty() {
+            continue;
+        }
+        let sql = format!(
+            "INSERT INTO bluesky (ts, data) VALUES ({}, '{line}')",
+            i + 1,
+        );
+        let output = execute_sql(frontend, &sql).await;
+        let output = output.data.pretty_print().await;
+        assert_eq!(output, "Affected Rows: 1");
+    }
+    Ok(())
+}
+
+async fn desc_table(frontend: &Arc<Instance>) {
+    let sql = "DESC TABLE bluesky";
+    let expected = r#"
+--------+----------------------------------------------------------------------------------------------------------------------------------------------+-----+------+---------+---------------+
+| Column | Type                                                                                                                                         | Key | Null | Default | Semantic Type |
+--------+----------------------------------------------------------------------------------------------------------------------------------------------+-----+------+---------+---------------+
+| data   | Json<Object{"_raw": String, "commit.collection": String, "commit.operation": String, "did": String, "kind": String, "time_us": Number(I64)}> |     | YES  |         | FIELD         |
+| ts     | TimestampMillisecond                                                                                                                         | PRI | NO   |         | TIMESTAMP     |
+--------+----------------------------------------------------------------------------------------------------------------------------------------------+-----+------+---------+---------------+"#;
+    let output = execute_sql(frontend, sql).await;
+    let output = output.data.pretty_print().await;
+    assert_eq!(output, expected.trim());
+}
+
+async fn create_table(frontend: &Arc<Instance>) {
+    let sql = r#"
+CREATE TABLE bluesky (
+  "data" JSON (
+    format = "partial",
+    fields = Struct<
+      kind String,
+      "commit.operation" String,
+      "commit.collection" String,
+      did String,
+      time_us Bigint
+    >,
+  ),
+  ts Timestamp TIME INDEX,
+)
+"#;
+    let output = execute_sql(frontend, sql).await;
+    let output = output.data.pretty_print().await;
+    assert_eq!(output, "Affected Rows: 0");
+}
--- a/tests-integration/tests/main.rs
+++ b/tests-integration/tests/main.rs
@@ -16,6 +16,7 @@
 mod grpc;
 #[macro_use]
 mod http;
+mod jsonbench;
 #[macro_use]
 mod sql;
 #[macro_use]
Author	SHA1	Message	Date
luofucong	497dfde90b	ingest jsonbench data parse partial struct json datatype in create sql	2025-12-22 19:32:03 +08:00
Lei, HUANG	a8b512dded	chore: expose symbols (#7451 ) * chore/expose-symbols: ### Commit Message Enhance `merge_and_dedup` Functionality in `flush.rs` - Function Signature Update: Modified the `merge_and_dedup` function to accept `append_mode` and `merge_mode` as separate parameters instead of using `options`. - Function Accessibility: Changed the visibility of `merge_and_dedup` to `pub` to allow external access. - Function Calls Update: Updated calls to `merge_and_dedup` within `memtable_flat_sources` to align with the new function signature, passing `options.append_mode` and `options.merge_mode()` directly. Signed-off-by: Lei, HUANG <mrsatangel@gmail.com> * chore/expose-symbols: ### Add Merge and Deduplication Functionality - File: `src/mito2/src/flush.rs` - Introduced `merge_and_dedup` function to merge multiple record batch iterators and apply deduplication based on specified modes. - Added detailed documentation for the function, explaining its arguments, behavior, and usage examples. Signed-off-by: Lei, HUANG <mrsatangel@gmail.com> --------- Signed-off-by: Lei, HUANG <mrsatangel@gmail.com>	2025-12-22 05:39:03 +00:00
Ning Sun	bd8ffd3db9	feat: pgwire 0.37 (#7443 )	2025-12-22 05:13:39 +00:00
discord9	c0652f6dd5	chore: release push check against Cargo.toml (#7426 ) Signed-off-by: discord9 <discord9@163.com>	2025-12-19 13:16:15 +00:00
Yingwen	fed6cb0806	fix: flat format use correct encoding in indexer for tags (#7440 ) * test: add inverted and skipping test Signed-off-by: evenyag <realevenyag@gmail.com> * test: Add tests for fulltext index Signed-off-by: evenyag <realevenyag@gmail.com> * fix: index dictionary type in correct encoding in flat format Signed-off-by: evenyag <realevenyag@gmail.com> * refactor: use encode_data_type() in SortField Signed-off-by: evenyag <realevenyag@gmail.com> * refactor: refine imports Signed-off-by: evenyag <realevenyag@gmail.com> * test: add tests for sparse encoding Signed-off-by: evenyag <realevenyag@gmail.com> * chore: remove logs Signed-off-by: evenyag <realevenyag@gmail.com> * test: update list test Signed-off-by: evenyag <realevenyag@gmail.com> * test: simplify tests Signed-off-by: evenyag <realevenyag@gmail.com> --------- Signed-off-by: evenyag <realevenyag@gmail.com>	2025-12-19 07:36:44 +00:00
discord9	69659211f6	chore: fix bincode version (#7445 ) Signed-off-by: discord9 <discord9@163.com>	2025-12-19 07:36:28 +00:00
LFC	6332d91884	test: reduce execution time of test `test_suspend_frontend` (#7444 ) Signed-off-by: luofucong <luofc@foxmail.com>	2025-12-19 07:25:36 +00:00
Weny Xu	4d66bd96b8	feat: make distributed time constants and client timeouts configurable (#7433 ) Signed-off-by: WenyXu <wenymedia@gmail.com>	2025-12-19 02:23:20 +00:00
Ning Sun	2f4a15ec40	ci: ensure commits from main branch for whitelisted git dependencies (#7434 ) * chore: update proto to include native histogram * ci: add a CI check to ensure whitelisted dependencies are using their main branch * chore: add changes to Cargo.toml to trigger CI * chore: update proto * test: update test to include histogram	2025-12-18 14:10:33 +00:00
Lanqing Yang	658332fe68	chore(mito): nit remove extra hashset in gc workers (#7399 ) chore(mito): remove extra hashset in gc workers Signed-off-by: lyang24 <lanqingy93@gmail.com>	2025-12-18 13:09:32 +00:00
shuiyisong	c088d361a4	chore: expose `disable_ec2_metadata` option (#7439 ) chore: add option for disable ec2 metadata Signed-off-by: shuiyisong <xixing.sys@gmail.com>	2025-12-18 11:55:08 +00:00
shuiyisong	a85864067e	chore: remove `canonicalize` (#7430 ) * chore: remove canonicalize Signed-off-by: shuiyisong <xixing.sys@gmail.com> * chore: add match file name option Signed-off-by: shuiyisong <xixing.sys@gmail.com> * chore: update field name Signed-off-by: shuiyisong <xixing.sys@gmail.com> * chore: modify tls option Signed-off-by: shuiyisong <xixing.sys@gmail.com> * chore: update config file Signed-off-by: shuiyisong <xixing.sys@gmail.com> * chore: update config md Signed-off-by: shuiyisong <xixing.sys@gmail.com> * chore: update option to `enable_filename_match` Signed-off-by: shuiyisong <xixing.sys@gmail.com> * chore: address CR issues Signed-off-by: shuiyisong <xixing.sys@gmail.com> * chore: remove option Signed-off-by: shuiyisong <xixing.sys@gmail.com> * chore: remove unused test Signed-off-by: shuiyisong <xixing.sys@gmail.com> --------- Signed-off-by: shuiyisong <xixing.sys@gmail.com>	2025-12-18 09:39:10 +00:00
LFC	0df69c95aa	chore: use official etcd-client (#7432 ) Signed-off-by: luofucong <luofc@foxmail.com>	2025-12-18 06:25:48 +00:00
McKnight22	72eede8b38	refactor(cli): unify storage configuration for export command (#7280 ) * refactor(cli): unify storage configuration for export command - Utilize ObjectStoreConfig to unify storage configuration for export command - Support export command for Fs, S3, OSS, GCS and Azblob - Fix the Display implementation for SecretString always returned the string "SecretString([REDACTED])" even when the internal secret was empty. Signed-off-by: McKnight22 <tao.wang.22@outlook.com> * refactor(cli): unify storage configuration for export command - Change the encapsulation permissions of each configuration options for every storage backend to public access. Signed-off-by: McKnight22 <tao.wang.22@outlook.com> Co-authored-by: WenyXu <wenymedia@gmail.com> * refactor(cli): unify storage configuration for export command - Update the implementation of ObjectStoreConfig::build_xxx() using macro solutions Signed-off-by: McKnight22 <tao.wang.22@outlook.com> Co-authored-by: WenyXu <wenymedia@gmail.com> * refactor(cli): unify storage configuration for export command - Introduce config validation for each storage type Signed-off-by: McKnight22 <tao.wang.22@outlook.com> * refactor(cli): unify storage configuration for export command - Enable trait-based polymorphism for storage type handling (from inherent impl to trait impl) - Extract helper functions to reduce code duplication Signed-off-by: McKnight22 <tao.wang.22@outlook.com> * refactor(cli): unify storage configuration for export command - Improve SecretString handling and validation (Distinguishing between "not provided" and "empty string") - Add validation when using filesystem storage Signed-off-by: McKnight22 <tao.wang.22@outlook.com> * refactor(cli): unify storage configuration for export command - Refactor storage field validation with macro Signed-off-by: McKnight22 <tao.wang.22@outlook.com> * refactor(cli): unify storage configuration for export command - support GCS Application Default Credentials (like GKE, Cloud Run, or local development with ) in export (Enabling ADC without validating or to be present) (Making optional in GCS validation (defaults to https://storage.googleapis.com)) Signed-off-by: McKnight22 <tao.wang.22@outlook.com> * refactor(cli): unify storage configuration for export command This commit refactors the validation logic for object store configurations in the CLI to leverage clap features and reduce boilerplate. Key changes: - Update wrap_with_clap_prefix macro to use clap's requires attribute. This ensures that storage-specific options (e.g., --s3-bucket) are only accepted when the corresponding backend is enabled (e.g., --s3). - Simplify FieldValidator trait by removing the is_provided method, as dependency checks are now handled by clap. - Introduce validate_backend! macro to standardize the validation of required fields for enabled backends. - Refactor ExportCommand to remove explicit validation calls (validate_s3, etc.) and rely on the validation within backend constructors. - Add integration tests for ExportCommand to verify build success with S3, OSS, GCS, and Azblob configurations. Signed-off-by: McKnight22 <tao.wang.22@outlook.com> * refactor(cli): unify storage configuration for export command - Use macros to simplify storage export implementation Signed-off-by: McKnight22 <tao.wang.22@outlook.com> Co-authored-by: WenyXu <wenymedia@gmail.com> * refactor(cli): unify storage configuration for export command - Rollback StorageExport trait implementation to not using macro for better code clarity and maintainability - Introduce format_uri helper function to unify URI formatting logic - Fix OSS URI path bug inherited from legacy code Signed-off-by: McKnight22 <tao.wang.22@outlook.com> Co-authored-by: WenyXu <wenymedia@gmail.com> * refactor(cli): unify storage configuration for export command - Remove unnecessary async_trait Signed-off-by: McKnight22 <tao.wang.22@outlook.com> Co-authored-by: jeremyhi <jiachun_feng@proton.me> --------- Signed-off-by: McKnight22 <tao.wang.22@outlook.com> Co-authored-by: WenyXu <wenymedia@gmail.com> Co-authored-by: jeremyhi <jiachun_feng@proton.me>	2025-12-18 03:16:53 +00:00
jeremyhi	95eccd6cde	feat: introduce granularity for memory manager (#7416 ) * feat: introduce granularity for memory manager Signed-off-by: jeremyhi <fengjiachun@gmail.com> * chore: add unit test Signed-off-by: jeremyhi <fengjiachun@gmail.com> * chore: remove granularity getter for mamanger Signed-off-by: jeremyhi <fengjiachun@gmail.com> * Update src/common/memory-manager/src/manager.rs Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com> * feat: acquire_with_policy for manager Signed-off-by: jeremyhi <fengjiachun@gmail.com> --------- Signed-off-by: jeremyhi <fengjiachun@gmail.com> Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>	2025-12-17 11:08:51 +00:00