diff --git a/.github/workflows/dev-build.yml b/.github/workflows/dev-build.yml
index d03fbeff14..c3af006f54 100644
--- a/.github/workflows/dev-build.yml
+++ b/.github/workflows/dev-build.yml
@@ -30,7 +30,7 @@ on:
       linux_arm64_runner:
         type: choice
         description: The runner uses to build linux-arm64 artifacts
-        default: ec2-c6g.4xlarge-arm64
+        default: ec2-c6g.8xlarge-arm64
         options:
           - ec2-c6g.xlarge-arm64 # 4C8G
           - ec2-c6g.2xlarge-arm64 # 8C16G
diff --git a/.github/workflows/nightly-build.yml b/.github/workflows/nightly-build.yml
index 14ebb6e715..54af32a94b 100644
--- a/.github/workflows/nightly-build.yml
+++ b/.github/workflows/nightly-build.yml
@@ -27,7 +27,7 @@ on:
       linux_arm64_runner:
         type: choice
         description: The runner uses to build linux-arm64 artifacts
-        default: ec2-c6g.4xlarge-arm64
+        default: ec2-c6g.8xlarge-arm64
         options:
           - ec2-c6g.xlarge-arm64 # 4C8G
           - ec2-c6g.2xlarge-arm64 # 8C16G
diff --git a/.github/workflows/nightly-jsonbench.yaml b/.github/workflows/nightly-jsonbench.yaml
index 3667ee26a6..a9ce4dd363 100644
--- a/.github/workflows/nightly-jsonbench.yaml
+++ b/.github/workflows/nightly-jsonbench.yaml
@@ -1,19 +1,81 @@
 name: Nightly JSONBench
 
 on:
-  schedule:
-    # Trigger at 00:00(Asia/Shanghai) on every weekday.
-    - cron: "0 16 * * 0-4"
+  workflow_run:
+    workflows: [ "GreptimeDB Nightly Build" ]
+    types: [ completed ]
   workflow_dispatch:
+    inputs:
+      run_id:
+        description: The nightly build workflow run id to download GreptimeDB artifacts from
+        required: true
+        type: string
+
+permissions:
+  actions: read
+  contents: read
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
   cancel-in-progress: true
 
 jobs:
+  resolve-artifact:
+    name: Resolve GreptimeDB nightly artifact
+    if: ${{ github.repository == 'GreptimeTeam/greptimedb' && (github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success') }}
+    runs-on: ubuntu-latest
+    outputs:
+      artifact-name: ${{ steps.find-artifact.outputs.artifact-name }}
+      run-id: ${{ steps.resolve-run-id.outputs.run-id }}
+    steps:
+      - name: Resolve nightly build run id
+        id: resolve-run-id
+        shell: bash
+        env:
+          EVENT_NAME: ${{ github.event_name }}
+          WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }}
+          INPUT_RUN_ID: ${{ inputs.run_id }}
+        run: |
+          set -euo pipefail
+
+          if [[ "${EVENT_NAME}" == "workflow_dispatch" ]]; then
+            run_id="${INPUT_RUN_ID}"
+          else
+            run_id="${WORKFLOW_RUN_ID}"
+          fi
+
+          if [[ ! "${run_id}" =~ ^[0-9]+$ ]]; then
+            echo "Invalid workflow run id: ${run_id}"
+            exit 1
+          fi
+
+          echo "run-id=${run_id}" >> "${GITHUB_OUTPUT}"
+
+      - name: Find GreptimeDB nightly artifact
+        id: find-artifact
+        shell: bash
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          RUN_ID: ${{ steps.resolve-run-id.outputs.run-id }}
+        run: |
+          set -euo pipefail
+
+          artifact_name=$(gh api "repos/${GITHUB_REPOSITORY}/actions/runs/${RUN_ID}/artifacts" --paginate \
+            --jq '.artifacts[] | select(.name | test("^greptime-linux-arm64-nightly-[0-9]{8}-[0-9a-f]+$")) | .name' \
+            | head -n 1)
+
+          if [[ -z "${artifact_name}" ]]; then
+            echo "Cannot find linux arm64 nightly artifact in workflow run ${RUN_ID}."
+            exit 1
+          fi
+
+          echo "Download GreptimeDB artifact: ${artifact_name}"
+          echo "artifact-name=${artifact_name}" >> "${GITHUB_OUTPUT}"
+
   allocate-runner:
     name: Allocate runner
-    if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
+    if: ${{ github.repository == 'GreptimeTeam/greptimedb' && (github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success') }}
+    needs: [ resolve-artifact ]
     runs-on: ubuntu-latest
     outputs:
       linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }}
@@ -43,55 +105,50 @@ jobs:
 
   jsonbench:
     name: Run JSONBench
-    if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
-    needs: [ allocate-runner ]
+    if: ${{ github.repository == 'GreptimeTeam/greptimedb' && (github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success') }}
+    needs: [ resolve-artifact, allocate-runner ]
     runs-on: ${{ needs.allocate-runner.outputs.linux-arm64-runner }}
     timeout-minutes: 120
     env:
-      JSONBENCH_DATA_DIR: /home/runner/data/bluesky
-      JSONBENCH_OUTPUT_PREFIX: _ubuntu-latest
+      JSONBENCH_OUTPUT_PREFIX: _linux-arm64
     steps:
-      - name: Checkout
-        uses: actions/checkout@v4
+      - name: Download GreptimeDB nightly artifact
+        uses: actions/download-artifact@v4
         with:
-          fetch-depth: 0
-          persist-credentials: false
+          name: ${{ needs.resolve-artifact.outputs.artifact-name }}
+          path: greptimedb-artifact
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          run-id: ${{ needs.resolve-artifact.outputs.run-id }}
 
-      - uses: arduino/setup-protoc@v3
-        with:
-          repo-token: ${{ secrets.GITHUB_TOKEN }}
-
-      - uses: actions-rust-lang/setup-rust-toolchain@v1
-
-      - name: Rust Cache
-        uses: Swatinem/rust-cache@v2
-        with:
-          shared-key: "nightly-jsonbench"
-          cache-all-crates: "true"
-          save-if: ${{ github.ref == 'refs/heads/main' }}
-
-      - name: Build GreptimeDB
-        run: cargo build --profile nightly --bin greptime
-
-      - name: Reclaim disk space
+      - name: Prepare GreptimeDB binary
         shell: bash
         run: |
           set -euo pipefail
 
-          mkdir -p "${RUNNER_TEMP}/greptimedb-bin"
-          cp ./target/nightly/greptime "${RUNNER_TEMP}/greptimedb-bin/greptime"
-          chmod +x "${RUNNER_TEMP}/greptimedb-bin/greptime"
-
-          rm -rf ./target
+          tar -xzf "greptimedb-artifact/${{ needs.resolve-artifact.outputs.artifact-name }}.tar.gz"
+          cp "${{ needs.resolve-artifact.outputs.artifact-name }}/greptime" ./greptime
+          chmod +x ./greptime
+          rm -rf greptimedb-artifact "${{ needs.resolve-artifact.outputs.artifact-name }}"
 
       - name: Run JSONBench
+        env:
+          # TODO(LFC): Change to "3" (100m) when JSON2 ingestion performance is optimized.
+          JSONBENCH_DATASET: 2
         shell: bash
         run: |
           set -euo pipefail
 
-          cd "${RUNNER_TEMP}"
-          cp "${RUNNER_TEMP}/greptimedb-bin/greptime" ./greptime
-          chmod +x ./greptime
+          export JSONBENCH_DATA_DIR="/root/data/bluesky"
+          echo "Use JSONBench data directory ${JSONBENCH_DATA_DIR}"
+
+          echo "Cloning JSONBench"
+          git clone --branch greptimedb-new-json --depth 1 https://github.com/GreptimeTeam/JSONBench.git JSONBench
+
+          echo "Downloading JSONBench dataset choice ${JSONBENCH_DATASET} to ${JSONBENCH_DATA_DIR}"
+          mkdir -p "${JSONBENCH_DATA_DIR}"
+          printf "${JSONBENCH_DATASET}\n" | ./JSONBench/download_data.sh
+          downloaded_files=$(find "${JSONBENCH_DATA_DIR}" -type f | wc -l)
+          echo "Downloaded JSONBench dataset files: ${downloaded_files}"
 
           export GREPTIMEDB_STANDALONE__WAL__DIR=greptimedb_data/wal
           export GREPTIMEDB_STANDALONE__STORAGE__DATA_HOME=greptimedb_data
@@ -100,10 +157,12 @@ jobs:
           export GREPTIMEDB_STANDALONE__HTTP__BODY_LIMIT=1GB
           export GREPTIMEDB_STANDALONE__HTTP__TIMEOUT=500s
 
+          echo "Starting GreptimeDB standalone"
           ./greptime standalone start > greptimedb.log 2>&1 &
           greptime_pid=$!
           trap 'kill "${greptime_pid}" 2>/dev/null || true' EXIT
 
+          echo "Waiting for GreptimeDB health check"
           until curl -s --fail -o /dev/null http://localhost:4000/health; do
             if ! kill -0 "${greptime_pid}" 2>/dev/null; then
               cat greptimedb.log
@@ -111,12 +170,14 @@ jobs:
             fi
             sleep 1
           done
+          echo "GreptimeDB is ready"
 
-          git clone --branch greptimedb-new-json --depth 1 https://github.com/GreptimeTeam/JSONBench.git JSONBench
           cp ./greptime JSONBench/greptimedb/greptime
 
           cd JSONBench/greptimedb
-          ./main.sh 3 "${JSONBENCH_DATA_DIR}" success.log error.log "${JSONBENCH_OUTPUT_PREFIX}" false
+          echo "Running JSONBench main.sh with dataset choice ${JSONBENCH_DATASET} and install=false"
+          ./main.sh ${JSONBENCH_DATASET} "${JSONBENCH_DATA_DIR}" success.log error.log "${JSONBENCH_OUTPUT_PREFIX}" false
+          echo "JSONBench finished"
 
       - name: Upload JSONBench results
         if: always()
@@ -124,21 +185,21 @@ jobs:
         with:
           name: jsonbench-results
           path: |
-            ${{ runner.temp }}/greptimedb.log
-            ${{ runner.temp }}/JSONBench/greptimedb/*.log
-            ${{ runner.temp }}/JSONBench/greptimedb/*.total_size
-            ${{ runner.temp }}/JSONBench/greptimedb/*.data_size
-            ${{ runner.temp }}/JSONBench/greptimedb/*.index_size
-            ${{ runner.temp }}/JSONBench/greptimedb/*.count
-            ${{ runner.temp }}/JSONBench/greptimedb/*.results_runtime
-            ${{ runner.temp }}/JSONBench/greptimedb/*.query_results
+            ./greptimedb.log
+            ./JSONBench/greptimedb/*.log
+            ./JSONBench/greptimedb/*.total_size
+            ./JSONBench/greptimedb/*.data_size
+            ./JSONBench/greptimedb/*.index_size
+            ./JSONBench/greptimedb/*.count
+            ./JSONBench/greptimedb/*.results_runtime
+            ./JSONBench/greptimedb/*.query_results
           if-no-files-found: ignore
           retention-days: 7
 
   stop-linux-arm64-runner:
     name: Stop Linux ARM64 runner
     # It's always run as the last job in the workflow to make sure that the runner is released.
-    if: ${{ always() }}
+    if: ${{ always() && needs.allocate-runner.outputs.linux-arm64-ec2-runner-instance-id != '' }}
     runs-on: ubuntu-latest
     needs: [
       allocate-runner,
diff --git a/Cargo.lock b/Cargo.lock
index a65159d26a..2485a5ceec 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -79,8 +79,9 @@ checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
 dependencies = [
  "cfg-if",
  "const-random",
- "getrandom 0.3.3",
+ "getrandom 0.3.4",
  "once_cell",
+ "serde",
  "version_check",
  "zerocopy",
 ]
@@ -771,7 +772,7 @@ version = "4.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ef1e3e699d84ab1b0911a1010c5c106aa34ae89aeac103be5ce0c3859db1e891"
 dependencies = [
- "term",
+ "term 1.0.2",
 ]
 
 [[package]]
@@ -1427,6 +1428,12 @@ dependencies = [
  "syn 2.0.117",
 ]
 
+[[package]]
+name = "borrow-or-share"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc0b364ead1874514c8c2855ab558056ebfeb775653e7ae45ff72f28f8f3166c"
+
 [[package]]
 name = "borsh"
 version = "1.5.7"
@@ -1525,6 +1532,12 @@ dependencies = [
  "syn 1.0.109",
 ]
 
+[[package]]
+name = "bytecount"
+version = "0.6.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e"
+
 [[package]]
 name = "bytemuck"
 version = "1.23.1"
@@ -1635,7 +1648,7 @@ dependencies = [
  "paste",
  "prometheus 0.14.0",
  "promql-parser",
- "rand 0.9.1",
+ "rand 0.9.4",
  "serde",
  "serde_json",
  "session",
@@ -1973,7 +1986,7 @@ dependencies = [
  "partition",
  "paste",
  "query",
- "rand 0.9.1",
+ "rand 0.9.4",
  "reqwest 0.13.2",
  "serde",
  "serde_json",
@@ -2020,7 +2033,7 @@ dependencies = [
  "prometheus 0.14.0",
  "prost 0.14.1",
  "query",
- "rand 0.9.1",
+ "rand 0.9.4",
  "serde_json",
  "snafu 0.8.6",
  "store-api",
@@ -2031,6 +2044,15 @@ dependencies = [
  "tracing",
 ]
 
+[[package]]
+name = "clipboard-win"
+version = "5.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bde03770d3df201d4fb868f2c9c59e66a3e4e2bd06692a0fe701e7103c7e84d4"
+dependencies = [
+ "error-code",
+]
+
 [[package]]
 name = "clocksource"
 version = "0.8.1"
@@ -2123,7 +2145,7 @@ dependencies = [
  "prometheus 0.14.0",
  "prost 0.14.1",
  "query",
- "rand 0.9.1",
+ "rand 0.9.4",
  "regex",
  "reqwest 0.13.2",
  "serde",
@@ -2462,7 +2484,7 @@ dependencies = [
  "hyper-util",
  "lazy_static",
  "prost 0.14.1",
- "rand 0.9.1",
+ "rand 0.9.4",
  "serde",
  "serde_json",
  "snafu 0.8.6",
@@ -2581,7 +2603,7 @@ dependencies = [
  "prometheus 0.14.0",
  "prost 0.14.1",
  "prost-types 0.14.1",
- "rand 0.9.1",
+ "rand 0.9.4",
  "regex",
  "rskafka",
  "rustls",
@@ -2649,7 +2671,7 @@ dependencies = [
  "futures-util",
  "humantime-serde",
  "object-store",
- "rand 0.9.1",
+ "rand 0.9.4",
  "serde",
  "serde_json",
  "smallvec",
@@ -2833,7 +2855,7 @@ dependencies = [
  "common-query",
  "common-recordbatch",
  "once_cell",
- "rand 0.9.1",
+ "rand 0.9.4",
  "tempfile",
 ]
 
@@ -2849,7 +2871,7 @@ dependencies = [
  "humantime",
  "humantime-serde",
  "once_cell",
- "rand 0.9.1",
+ "rand 0.9.4",
  "serde",
  "serde_json",
  "snafu 0.8.6",
@@ -3594,6 +3616,12 @@ dependencies = [
  "parking_lot_core 0.9.11",
 ]
 
+[[package]]
+name = "data-encoding"
+version = "2.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4ae5f15dda3c708c0ade84bfee31ccab44a3da4f88015ed22f63732abe300c8"
+
 [[package]]
 name = "datafusion"
 version = "53.1.0"
@@ -3638,7 +3666,7 @@ dependencies = [
  "object_store",
  "parking_lot 0.12.4",
  "parquet",
- "rand 0.9.1",
+ "rand 0.9.4",
  "regex",
  "sqlparser",
  "tempfile",
@@ -3755,7 +3783,7 @@ dependencies = [
  "liblzma",
  "log",
  "object_store",
- "rand 0.9.1",
+ "rand 0.9.4",
  "tokio",
  "tokio-util",
  "url",
@@ -3881,7 +3909,7 @@ dependencies = [
  "log",
  "object_store",
  "parking_lot 0.12.4",
- "rand 0.9.1",
+ "rand 0.9.4",
  "tempfile",
  "url",
 ]
@@ -3944,7 +3972,7 @@ dependencies = [
  "md-5 0.10.6",
  "memchr",
  "num-traits",
- "rand 0.9.1",
+ "rand 0.9.4",
  "regex",
  "sha2 0.10.9",
  "unicode-segmentation",
@@ -4234,7 +4262,7 @@ dependencies = [
  "datafusion-proto-common",
  "object_store",
  "prost 0.14.1",
- "rand 0.9.1",
+ "rand 0.9.4",
 ]
 
 [[package]]
@@ -4685,6 +4713,27 @@ dependencies = [
  "crypto-common 0.2.1",
 ]
 
+[[package]]
+name = "dirs-next"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1"
+dependencies = [
+ "cfg-if",
+ "dirs-sys-next",
+]
+
+[[package]]
+name = "dirs-sys-next"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d"
+dependencies = [
+ "libc",
+ "redox_users",
+ "winapi",
+]
+
 [[package]]
 name = "displaydoc"
 version = "0.2.5"
@@ -4713,14 +4762,14 @@ dependencies = [
 
 [[package]]
 name = "dns-lookup"
-version = "2.0.4"
+version = "3.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e5766087c2235fec47fafa4cfecc81e494ee679d0fd4a59887ea0919bfb0e4fc"
+checksum = "6e39034cee21a2f5bbb66ba0e3689819c4bb5d00382a282006e802a7ffa6c41d"
 dependencies = [
  "cfg-if",
  "libc",
- "socket2 0.5.10",
- "windows-sys 0.48.0",
+ "socket2 0.6.0",
+ "windows-sys 0.60.2",
 ]
 
 [[package]]
@@ -4740,31 +4789,30 @@ dependencies = [
 
 [[package]]
 name = "domain"
-version = "0.11.0"
+version = "0.12.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a11dd7f04a6a6d2aea0153c6e31f5ea7af8b2efdf52cdaeea7a9a592c7fefef9"
+checksum = "8c469892dddfeff64ecfdbc64cf059c77fb0decaeccd4d5d484394bdd6312bac"
 dependencies = [
  "bumpalo",
  "bytes",
  "domain-macros",
  "futures-util",
- "hashbrown 0.14.5",
+ "hashbrown 0.17.1",
+ "jiff",
  "log",
- "moka",
  "octseq",
- "rand 0.8.5",
+ "rand 0.10.1",
  "serde",
  "smallvec",
- "time",
  "tokio",
  "tracing",
 ]
 
 [[package]]
 name = "domain-macros"
-version = "0.11.0"
+version = "0.12.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0e197fdfd2cdb5fdeb7f8ddcf3aed5d5d04ecde2890d448b14ffb716f7376b70"
+checksum = "6fef7ef74e413e36d5364db163ca577ccb56f2f74377705d5f920ee3e1544127"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -4849,6 +4897,15 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "email_address"
+version = "0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e079f19b08ca6239f47f8ba8509c11cf3ea30095831f7fed61441475edd8c449"
+dependencies = [
+ "serde",
+]
+
 [[package]]
 name = "ena"
 version = "0.14.3"
@@ -4962,6 +5019,12 @@ dependencies = [
  "windows-sys 0.59.0",
 ]
 
+[[package]]
+name = "error-code"
+version = "3.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dea2df4cf52843e0452895c455a1a2cfbb842a1e7329671acf418fdc53ed4c59"
+
 [[package]]
 name = "etcd-client"
 version = "0.17.0"
@@ -5018,6 +5081,12 @@ dependencies = [
  "pin-project-lite",
 ]
 
+[[package]]
+name = "exitcode"
+version = "1.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "de853764b47027c2e862a995c34978ffa63c1501f2e15f987ba11bd4f9bba193"
+
 [[package]]
 name = "fail"
 version = "0.5.1"
@@ -5043,9 +5112,9 @@ checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
 
 [[package]]
 name = "fancy-regex"
-version = "0.14.0"
+version = "0.17.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298"
+checksum = "72cf461f865c862bb7dc573f643dd6a2b6842f7c30b07882b56bd148cc2761b8"
 dependencies = [
  "bit-set",
  "regex-automata",
@@ -5274,7 +5343,7 @@ dependencies = [
  "prometheus 0.14.0",
  "prost 0.14.1",
  "query",
- "rand 0.9.1",
+ "rand 0.9.4",
  "serde",
  "serde_json",
  "servers",
@@ -5299,6 +5368,17 @@ dependencies = [
  "bitflags 1.3.2",
 ]
 
+[[package]]
+name = "fluent-uri"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bc74ac4d8359ae70623506d512209619e5cf8f347124910440dbc221714b328e"
+dependencies = [
+ "borrow-or-share",
+ "ref-cast",
+ "serde",
+]
+
 [[package]]
 name = "flume"
 version = "0.11.1"
@@ -5337,6 +5417,16 @@ dependencies = [
  "percent-encoding",
 ]
 
+[[package]]
+name = "fraction"
+version = "0.15.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e076045bb43dac435333ed5f04caf35c7463631d0dae2deb2638d94dd0a5b872"
+dependencies = [
+ "lazy_static",
+ "num",
+]
+
 [[package]]
 name = "fragile"
 version = "2.0.1"
@@ -5405,7 +5495,7 @@ dependencies = [
  "promql-parser",
  "prost 0.14.1",
  "query",
- "rand 0.9.1",
+ "rand 0.9.4",
  "reqwest 0.13.2",
  "serde",
  "serde_json",
@@ -5756,21 +5846,21 @@ dependencies = [
  "cfg-if",
  "js-sys",
  "libc",
- "wasi 0.11.1+wasi-snapshot-preview1",
+ "wasi",
  "wasm-bindgen",
 ]
 
 [[package]]
 name = "getrandom"
-version = "0.3.3"
+version = "0.3.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
+checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
 dependencies = [
  "cfg-if",
  "js-sys",
  "libc",
  "r-efi",
- "wasi 0.14.2+wasi-0.2.4",
+ "wasip2",
  "wasm-bindgen",
 ]
 
@@ -5842,9 +5932,9 @@ dependencies = [
 
 [[package]]
 name = "grok"
-version = "2.1.0"
+version = "2.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6c52724b609896f661a3f4641dd3a44dc602958ef615857c12d00756b4e9355b"
+checksum = "6ddab6a9c8bb998cb2fc3101fde8ef561b7c4970db3957be7a8eee1e168f666b"
 dependencies = [
  "glob",
  "onig",
@@ -5984,6 +6074,9 @@ name = "hashbrown"
 version = "0.17.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a"
+dependencies = [
+ "allocator-api2",
+]
 
 [[package]]
 name = "hashlink"
@@ -6650,7 +6743,7 @@ dependencies = [
  "pin-project",
  "prost 0.14.1",
  "puffin",
- "rand 0.9.1",
+ "rand 0.9.4",
  "rand_chacha 0.9.0",
  "regex",
  "regex-automata",
@@ -6841,6 +6934,15 @@ dependencies = [
  "derive_utils",
 ]
 
+[[package]]
+name = "ipcrypt-rs"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "96e4f67dbfc0f75d7b65953ecf0be3fd84ee0cb1ae72a00a4aa9a2f5518a2c80"
+dependencies = [
+ "aes",
+]
+
 [[package]]
 name = "ipnet"
 version = "2.11.0"
@@ -7016,6 +7118,36 @@ dependencies = [
  "windows-sys 0.45.0",
 ]
 
+[[package]]
+name = "jni"
+version = "0.22.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5efd9a482cf3a427f00d6b35f14332adc7902ce91efb778580e180ff90fa3498"
+dependencies = [
+ "cfg-if",
+ "combine",
+ "jni-macros",
+ "jni-sys 0.4.1",
+ "log",
+ "simd_cesu8",
+ "thiserror 2.0.17",
+ "walkdir",
+ "windows-link 0.2.1",
+]
+
+[[package]]
+name = "jni-macros"
+version = "0.22.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a00109accc170f0bdb141fed3e393c565b6f5e072365c3bd58f5b062591560a3"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "rustc_version",
+ "simd_cesu8",
+ "syn 2.0.117",
+]
+
 [[package]]
 name = "jni-sys"
 version = "0.3.1"
@@ -7050,7 +7182,7 @@ version = "0.1.33"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a"
 dependencies = [
- "getrandom 0.3.3",
+ "getrandom 0.3.4",
  "libc",
 ]
 
@@ -7139,11 +7271,38 @@ version = "0.4.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1c6e529149475ca0b2820835d3dce8fcc41c6b943ca608d32f35b449255e4627"
 dependencies = [
- "fluent-uri",
+ "fluent-uri 0.1.4",
  "serde",
  "serde_json",
 ]
 
+[[package]]
+name = "jsonschema"
+version = "0.38.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "89f50532ce4a0ba3ae930212908d8ec50e7806065c059fe9c75da2ece6132294"
+dependencies = [
+ "ahash 0.8.12",
+ "bytecount",
+ "data-encoding",
+ "email_address",
+ "fancy-regex",
+ "fraction",
+ "getrandom 0.3.4",
+ "idna",
+ "itoa",
+ "num-cmp",
+ "num-traits",
+ "percent-encoding",
+ "referencing",
+ "regex",
+ "regex-syntax",
+ "serde",
+ "serde_json",
+ "unicode-general-category",
+ "uuid-simd",
+]
+
 [[package]]
 name = "jsonwebtoken"
 version = "10.3.0"
@@ -7337,7 +7496,7 @@ dependencies = [
  "regex-syntax",
  "sha3",
  "string_cache",
- "term",
+ "term 1.0.2",
  "unicode-xid",
  "walkdir",
 ]
@@ -7736,7 +7895,7 @@ dependencies = [
  "protobuf 2.28.0",
  "protobuf-build",
  "raft-engine",
- "rand 0.9.1",
+ "rand 0.9.4",
  "rskafka",
  "serde",
  "serde_json",
@@ -8043,7 +8202,7 @@ dependencies = [
  "futures-util",
  "humantime-serde",
  "meta-srv",
- "rand 0.9.1",
+ "rand 0.9.4",
  "serde",
  "serde_json",
  "session",
@@ -8111,7 +8270,7 @@ dependencies = [
  "partition",
  "prometheus 0.14.0",
  "prost 0.14.1",
- "rand 0.9.1",
+ "rand 0.9.4",
  "regex",
  "rskafka",
  "serde",
@@ -8253,7 +8412,7 @@ checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c"
 dependencies = [
  "libc",
  "log",
- "wasi 0.11.1+wasi-snapshot-preview1",
+ "wasi",
  "windows-sys 0.59.0",
 ]
 
@@ -8343,7 +8502,7 @@ dependencies = [
  "prometheus 0.14.0",
  "prost 0.14.1",
  "puffin",
- "rand 0.9.1",
+ "rand 0.9.4",
  "rayon",
  "regex",
  "roaring",
@@ -8656,6 +8815,12 @@ dependencies = [
  "winapi",
 ]
 
+[[package]]
+name = "ndk-context"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "27b02d87554356db9e9a873add8782d4ea6e3e58ea071a9adb9a2e8ddb884a8b"
+
 [[package]]
 name = "neli"
 version = "0.6.5"
@@ -8758,6 +8923,15 @@ dependencies = [
  "memchr",
 ]
 
+[[package]]
+name = "nom-language"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2de2bc5b451bfedaef92c90b8939a8fff5770bdcc1fafd6239d086aab8fa6b29"
+dependencies = [
+ "nom 8.0.0",
+]
+
 [[package]]
 name = "notify"
 version = "8.0.0"
@@ -8841,6 +9015,12 @@ dependencies = [
  "zeroize",
 ]
 
+[[package]]
+name = "num-cmp"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "63335b2e2c34fae2fb0aa2cecfd9f0832a1e24b3b32ecec612c3426d46dc8aaa"
+
 [[package]]
 name = "num-complex"
 version = "0.4.6"
@@ -9001,6 +9181,31 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "objc2"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3a12a8ed07aefc768292f076dc3ac8c48f3781c8f2d5851dd3d98950e8c5a89f"
+dependencies = [
+ "objc2-encode",
+]
+
+[[package]]
+name = "objc2-encode"
+version = "4.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ef25abbcd74fb2609453eb695bd2f860d389e457f67dc17cafc8b8cbc89d0c33"
+
+[[package]]
+name = "objc2-foundation"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3e0adef53c21f888deb4fa59fc59f7eb17404926ee8a6f59f5df0fd7f9f3272"
+dependencies = [
+ "bitflags 2.11.1",
+ "objc2",
+]
+
 [[package]]
 name = "object"
 version = "0.36.7"
@@ -9040,7 +9245,7 @@ dependencies = [
  "object_store_opendal",
  "opendal",
  "prometheus 0.14.0",
- "rand 0.9.1",
+ "rand 0.9.4",
  "reqwest 0.13.2",
  "serde",
  "snafu 0.8.6",
@@ -9094,9 +9299,9 @@ dependencies = [
 
 [[package]]
 name = "octseq"
-version = "0.5.2"
+version = "0.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "126c3ca37c9c44cec575247f43a3e4374d8927684f129d2beeb0d2cef262fe12"
+checksum = "182eab3e1cd9cdc0ecf1ce3342d9844f3dc7d098f0694569bfdf327b612d69fd"
 dependencies = [
  "bytes",
  "serde",
@@ -9552,7 +9757,7 @@ dependencies = [
  "futures-util",
  "opentelemetry 0.30.0",
  "percent-encoding",
- "rand 0.9.1",
+ "rand 0.9.4",
  "serde_json",
  "thiserror 2.0.17",
  "tokio",
@@ -9570,7 +9775,7 @@ dependencies = [
  "futures-util",
  "opentelemetry 0.31.0",
  "percent-encoding",
- "rand 0.9.1",
+ "rand 0.9.4",
  "thiserror 2.0.17",
 ]
 
@@ -9748,7 +9953,7 @@ dependencies = [
  "paste",
  "prost 0.14.1",
  "prost-build 0.14.1",
- "rand 0.9.1",
+ "rand 0.9.4",
  "replace_with",
  "serde",
  "smallvec",
@@ -10154,7 +10359,7 @@ dependencies = [
  "md5",
  "pg_interval_2",
  "postgres-types",
- "rand 0.10.0",
+ "rand 0.10.1",
  "rust_decimal",
  "rustls-pki-types",
  "ryu",
@@ -10531,7 +10736,7 @@ dependencies = [
  "hmac",
  "md-5 0.10.6",
  "memchr",
- "rand 0.9.1",
+ "rand 0.9.4",
  "sha2 0.10.9",
  "stringprep",
 ]
@@ -10680,6 +10885,19 @@ dependencies = [
  "syn 2.0.117",
 ]
 
+[[package]]
+name = "prettytable-rs"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eea25e07510aa6ab6547308ebe3c036016d162b8da920dbb079e3ba8acf3d95a"
+dependencies = [
+ "encode_unicode",
+ "is-terminal",
+ "lazy_static",
+ "term 0.7.0",
+ "unicode-width 0.1.14",
+]
+
 [[package]]
 name = "proc-macro-crate"
 version = "1.3.1"
@@ -10845,7 +11063,7 @@ checksum = "bee689443a2bd0a16ab0348b52ee43e3b2d1b1f931c8aa5c9f8de4c86fbe8c40"
 dependencies = [
  "bitflags 2.11.1",
  "num-traits",
- "rand 0.9.1",
+ "rand 0.9.4",
  "rand_chacha 0.9.0",
  "rand_xorshift",
  "regex-syntax",
@@ -11272,7 +11490,7 @@ dependencies = [
  "promql",
  "promql-parser",
  "prost 0.14.1",
- "rand 0.9.1",
+ "rand 0.9.4",
  "regex",
  "serde",
  "serde_json",
@@ -11345,9 +11563,9 @@ checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098"
 dependencies = [
  "aws-lc-rs",
  "bytes",
- "getrandom 0.3.3",
+ "getrandom 0.3.4",
  "lru-slab",
- "rand 0.9.1",
+ "rand 0.9.4",
  "ring",
  "rustc-hash 2.1.1",
  "rustls",
@@ -11453,9 +11671,9 @@ dependencies = [
 
 [[package]]
 name = "rand"
-version = "0.9.1"
+version = "0.9.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97"
+checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea"
 dependencies = [
  "rand_chacha 0.9.0",
  "rand_core 0.9.3",
@@ -11463,9 +11681,9 @@ dependencies = [
 
 [[package]]
 name = "rand"
-version = "0.10.0"
+version = "0.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bc266eb313df6c5c09c1c7b1fbe2510961e5bcd3add930c1e31f7ed9da0feff8"
+checksum = "d2e8e8bcc7961af1fdac401278c6a831614941f6164ee3bf4ce61b7edb162207"
 dependencies = [
  "chacha20 0.10.0",
  "getrandom 0.4.1",
@@ -11508,7 +11726,7 @@ version = "0.9.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
 dependencies = [
- "getrandom 0.3.3",
+ "getrandom 0.3.4",
 ]
 
 [[package]]
@@ -11601,6 +11819,17 @@ dependencies = [
  "bitflags 2.11.1",
 ]
 
+[[package]]
+name = "redox_users"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43"
+dependencies = [
+ "getrandom 0.2.16",
+ "libredox",
+ "thiserror 1.0.69",
+]
+
 [[package]]
 name = "ref-cast"
 version = "1.0.24"
@@ -11621,6 +11850,21 @@ dependencies = [
  "syn 2.0.117",
 ]
 
+[[package]]
+name = "referencing"
+version = "0.38.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "15a8af0c6bb8eaf8b07cb06fc31ff30ca6fe19fb99afa476c276d8b24f365b0b"
+dependencies = [
+ "ahash 0.8.12",
+ "fluent-uri 0.4.1",
+ "getrandom 0.3.4",
+ "hashbrown 0.16.1",
+ "parking_lot 0.12.4",
+ "percent-encoding",
+ "serde_json",
+]
+
 [[package]]
 name = "regex"
 version = "1.12.2"
@@ -11686,6 +11930,15 @@ version = "1.9.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2"
 
+[[package]]
+name = "relative-path"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bca40a312222d8ba74837cb474edef44b37f561da5f773981007a10bbaa992b0"
+dependencies = [
+ "serde",
+]
+
 [[package]]
 name = "rend"
 version = "0.4.2"
@@ -11825,6 +12078,7 @@ dependencies = [
  "futures-channel",
  "futures-core",
  "futures-util",
+ "h2 0.4.11",
  "http 1.3.1",
  "http-body 1.0.1",
  "http-body-util",
@@ -11837,6 +12091,7 @@ dependencies = [
  "pin-project-lite",
  "quinn",
  "rustls",
+ "rustls-native-certs 0.8.1",
  "rustls-pki-types",
  "serde",
  "serde_json",
@@ -11898,6 +12153,50 @@ dependencies = [
  "web-sys",
 ]
 
+[[package]]
+name = "reqwest-middleware"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57f17d28a6e6acfe1733fe24bcd30774d13bffa4b8a22535b4c8c98423088d4e"
+dependencies = [
+ "anyhow",
+ "async-trait",
+ "http 1.3.1",
+ "reqwest 0.12.28",
+ "serde",
+ "thiserror 1.0.69",
+ "tower-service",
+]
+
+[[package]]
+name = "reqwest-retry"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "105747e3a037fe5bf17458d794de91149e575b6183fc72c85623a44abb9683f5"
+dependencies = [
+ "anyhow",
+ "async-trait",
+ "futures",
+ "getrandom 0.2.16",
+ "http 1.3.1",
+ "hyper 1.6.0",
+ "reqwest 0.12.28",
+ "reqwest-middleware",
+ "retry-policies",
+ "thiserror 2.0.17",
+ "tokio",
+ "wasmtimer",
+]
+
+[[package]]
+name = "retry-policies"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc05fbf560421a0357a750cbe78c7ca19d4923918490daabba313d5dbc871e47"
+dependencies = [
+ "rand 0.10.1",
+]
+
 [[package]]
 name = "rgb"
 version = "0.8.50"
@@ -11985,9 +12284,12 @@ dependencies = [
 
 [[package]]
 name = "roxmltree"
-version = "0.20.0"
+version = "0.21.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6c20b6793b5c2fa6553b250154b78d6d0db37e72700ae35fad9387a46f487c97"
+checksum = "f1964b10c76125c36f8afe190065a4bf9a87bf324842c05701330bba9f1cacbb"
+dependencies = [
+ "memchr",
+]
 
 [[package]]
 name = "rsa"
@@ -12042,7 +12344,7 @@ dependencies = [
  "integer-encoding 4.0.2",
  "lz4",
  "parking_lot 0.12.4",
- "rand 0.9.1",
+ "rand 0.9.4",
  "rsasl",
  "rustls",
  "snap",
@@ -12088,7 +12390,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "regex",
- "relative-path",
+ "relative-path 1.9.3",
  "rustc_version",
  "syn 2.0.117",
  "unicode-ident",
@@ -12309,7 +12611,7 @@ checksum = "1d99feebc72bae7ab76ba994bb5e121b8d83d910ca40b36e0921f53becc41784"
 dependencies = [
  "core-foundation 0.10.1",
  "core-foundation-sys",
- "jni",
+ "jni 0.21.1",
  "log",
  "once_cell",
  "rustls",
@@ -12346,6 +12648,25 @@ version = "1.0.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d"
 
+[[package]]
+name = "rustyline"
+version = "17.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e902948a25149d50edc1a8e0141aad50f54e22ba83ff988cf8f7c9ef07f50564"
+dependencies = [
+ "bitflags 2.11.1",
+ "cfg-if",
+ "clipboard-win",
+ "libc",
+ "log",
+ "memchr",
+ "nix 0.30.1",
+ "unicode-segmentation",
+ "unicode-width 0.2.1",
+ "utf8parse",
+ "windows-sys 0.60.2",
+]
+
 [[package]]
 name = "ryu"
 version = "1.0.20"
@@ -12724,6 +13045,19 @@ dependencies = [
  "unsafe-libyaml",
 ]
 
+[[package]]
+name = "serde_yaml_ng"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7b4db627b98b36d4203a7b458cf3573730f2bb591b28871d916dfa9efabfd41f"
+dependencies = [
+ "indexmap 2.13.0",
+ "itoa",
+ "ryu",
+ "serde",
+ "unsafe-libyaml",
+]
+
 [[package]]
 name = "servers"
 version = "1.1.0"
@@ -12820,7 +13154,7 @@ dependencies = [
  "prost 0.14.1",
  "query",
  "quoted-string",
- "rand 0.9.1",
+ "rand 0.9.4",
  "regex",
  "reqwest 0.13.2",
  "rust-embed",
@@ -12998,7 +13332,7 @@ version = "0.15.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c962f626b54771990066e5435ec8331d1462576cd2d1e62f24076ae014f92112"
 dependencies = [
- "getrandom 0.3.3",
+ "getrandom 0.3.4",
  "halfbrown",
  "ref-cast",
  "serde",
@@ -13007,6 +13341,16 @@ dependencies = [
  "value-trait",
 ]
 
+[[package]]
+name = "simd_cesu8"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94f90157bb87cddf702797c5dadfa0be7d266cdf49e22da2fcaa32eff75b2c33"
+dependencies = [
+ "rustc_version",
+ "simdutf8",
+]
+
 [[package]]
 name = "simdutf8"
 version = "0.1.5"
@@ -13898,12 +14242,12 @@ dependencies = [
 
 [[package]]
 name = "syslog_loose"
-version = "0.21.0"
+version = "0.22.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "161028c00842709450114c39db3b29f44c898055ed8833bb9b535aba7facf30e"
+checksum = "d6ec4df26907adce53e94eac201a9ba38744baea3bc97f34ffd591d5646231a6"
 dependencies = [
  "chrono",
- "nom 7.1.3",
+ "nom 8.0.0",
 ]
 
 [[package]]
@@ -14147,12 +14491,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16"
 dependencies = [
  "fastrand",
- "getrandom 0.3.3",
+ "getrandom 0.3.4",
  "once_cell",
  "rustix 1.0.7",
  "windows-sys 0.61.2",
 ]
 
+[[package]]
+name = "term"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f"
+dependencies = [
+ "dirs-next",
+ "rustversion",
+ "winapi",
+]
+
 [[package]]
 name = "term"
 version = "1.0.2"
@@ -14205,7 +14560,7 @@ dependencies = [
  "nix 0.28.0",
  "partition",
  "paste",
- "rand 0.9.1",
+ "rand 0.9.4",
  "rand_chacha 0.9.0",
  "reqwest 0.13.2",
  "rustls",
@@ -14292,7 +14647,7 @@ dependencies = [
  "plugins",
  "prost 0.14.1",
  "query",
- "rand 0.9.1",
+ "rand 0.9.4",
  "rstest",
  "rstest_reuse",
  "sea-query",
@@ -14582,7 +14937,7 @@ dependencies = [
  "pin-project-lite",
  "postgres-protocol",
  "postgres-types",
- "rand 0.9.1",
+ "rand 0.9.4",
  "socket2 0.5.10",
  "tokio",
  "tokio-util",
@@ -15233,6 +15588,12 @@ version = "0.3.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5"
 
+[[package]]
+name = "unicode-general-category"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b993bddc193ae5bd0d623b49ec06ac3e9312875fdae725a975c51db1cc1677f"
+
 [[package]]
 name = "unicode-ident"
 version = "1.0.22"
@@ -15366,11 +15727,21 @@ checksum = "b672338555252d43fd2240c714dc444b8c6fb0a5c5335e65a07bba7742735ddb"
 dependencies = [
  "getrandom 0.4.1",
  "js-sys",
- "rand 0.9.1",
+ "rand 0.9.4",
  "serde_core",
  "wasm-bindgen",
 ]
 
+[[package]]
+name = "uuid-simd"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "23b082222b4f6619906941c17eb2297fff4c2fb96cb60164170522942a200bd8"
+dependencies = [
+ "outref",
+ "vsimd",
+]
+
 [[package]]
 name = "valuable"
 version = "0.1.1"
@@ -15453,9 +15824,9 @@ dependencies = [
 
 [[package]]
 name = "vrl"
-version = "0.25.0"
+version = "0.33.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4f49394b948406ea1564aa00152e011d87a38ad35d277ebddda257a9ee39c419"
+checksum = "925a4d3321b18a200c82c3ec02ee2be2b4bf16db07a5ce7e2a9a888b795ea862"
 dependencies = [
  "aes",
  "aes-siv",
@@ -15485,8 +15856,10 @@ dependencies = [
  "domain",
  "dyn-clone",
  "encoding_rs",
+ "exitcode",
  "fancy-regex",
  "flate2",
+ "getrandom 0.3.4",
  "grok",
  "hex",
  "hmac",
@@ -15496,12 +15869,15 @@ dependencies = [
  "indexmap 2.13.0",
  "indoc",
  "influxdb-line-protocol",
+ "ipcrypt-rs",
  "itertools 0.14.0",
+ "jsonschema",
  "lalrpop",
  "lalrpop-util",
  "lz4_flex 0.11.6",
  "md-5 0.10.6",
- "nom 7.1.3",
+ "nom 8.0.0",
+ "nom-language",
  "ofb",
  "onig",
  "ordered-float 4.6.0",
@@ -15510,20 +15886,27 @@ dependencies = [
  "percent-encoding",
  "pest",
  "pest_derive",
+ "prettytable-rs",
  "prost 0.13.5",
  "prost-reflect",
  "psl",
  "psl-types",
  "publicsuffix",
  "quoted_printable",
- "rand 0.8.5",
+ "rand 0.9.4",
  "regex",
+ "relative-path 2.0.1",
+ "reqwest 0.12.28",
+ "reqwest-middleware",
+ "reqwest-retry",
  "roxmltree",
  "rust_decimal",
+ "rustyline",
  "seahash",
  "serde",
  "serde_json",
  "serde_yaml",
+ "serde_yaml_ng",
  "sha-1",
  "sha2 0.10.9",
  "sha3",
@@ -15531,6 +15914,8 @@ dependencies = [
  "snafu 0.8.6",
  "snap",
  "strip-ansi-escapes",
+ "strum 0.26.3",
+ "strum_macros 0.26.4",
  "syslog_loose",
  "termcolor",
  "thiserror 2.0.17",
@@ -15541,7 +15926,9 @@ dependencies = [
  "url",
  "utf8-width",
  "uuid",
+ "webbrowser",
  "woothee",
+ "xxhash-rust",
  "zstd",
 ]
 
@@ -15585,15 +15972,6 @@ version = "0.11.1+wasi-snapshot-preview1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
 
-[[package]]
-name = "wasi"
-version = "0.14.2+wasi-0.2.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3"
-dependencies = [
- "wit-bindgen-rt",
-]
-
 [[package]]
 name = "wasip2"
 version = "1.0.2+wasi-0.2.9"
@@ -15734,6 +16112,20 @@ dependencies = [
  "semver",
 ]
 
+[[package]]
+name = "wasmtimer"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1c598d6b99ea013e35844697fc4670d08339d5cda15588f193c6beedd12f644b"
+dependencies = [
+ "futures",
+ "js-sys",
+ "parking_lot 0.12.4",
+ "pin-utils",
+ "slab",
+ "wasm-bindgen",
+]
+
 [[package]]
 name = "web-sys"
 version = "0.3.95"
@@ -15754,6 +16146,22 @@ dependencies = [
  "wasm-bindgen",
 ]
 
+[[package]]
+name = "webbrowser"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fc95580916af1e68ff6a7be07446fc5db73ebf71cf092de939bbf5f7e189f72"
+dependencies = [
+ "core-foundation 0.10.1",
+ "jni 0.22.4",
+ "log",
+ "ndk-context",
+ "objc2",
+ "objc2-foundation",
+ "url",
+ "web-sys",
+]
+
 [[package]]
 name = "webpki"
 version = "0.22.4"
@@ -16040,6 +16448,15 @@ dependencies = [
  "windows-targets 0.52.6",
 ]
 
+[[package]]
+name = "windows-sys"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
+dependencies = [
+ "windows-targets 0.53.5",
+]
+
 [[package]]
 name = "windows-sys"
 version = "0.61.2"
@@ -16088,13 +16505,30 @@ dependencies = [
  "windows_aarch64_gnullvm 0.52.6",
  "windows_aarch64_msvc 0.52.6",
  "windows_i686_gnu 0.52.6",
- "windows_i686_gnullvm",
+ "windows_i686_gnullvm 0.52.6",
  "windows_i686_msvc 0.52.6",
  "windows_x86_64_gnu 0.52.6",
  "windows_x86_64_gnullvm 0.52.6",
  "windows_x86_64_msvc 0.52.6",
 ]
 
+[[package]]
+name = "windows-targets"
+version = "0.53.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3"
+dependencies = [
+ "windows-link 0.2.1",
+ "windows_aarch64_gnullvm 0.53.1",
+ "windows_aarch64_msvc 0.53.1",
+ "windows_i686_gnu 0.53.1",
+ "windows_i686_gnullvm 0.53.1",
+ "windows_i686_msvc 0.53.1",
+ "windows_x86_64_gnu 0.53.1",
+ "windows_x86_64_gnullvm 0.53.1",
+ "windows_x86_64_msvc 0.53.1",
+]
+
 [[package]]
 name = "windows-threading"
 version = "0.1.0"
@@ -16122,6 +16556,12 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
 
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
+
 [[package]]
 name = "windows_aarch64_msvc"
 version = "0.42.2"
@@ -16140,6 +16580,12 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
 
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
+
 [[package]]
 name = "windows_i686_gnu"
 version = "0.42.2"
@@ -16158,12 +16604,24 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
 
+[[package]]
+name = "windows_i686_gnu"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3"
+
 [[package]]
 name = "windows_i686_gnullvm"
 version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
 
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
+
 [[package]]
 name = "windows_i686_msvc"
 version = "0.42.2"
@@ -16182,6 +16640,12 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
 
+[[package]]
+name = "windows_i686_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2"
+
 [[package]]
 name = "windows_x86_64_gnu"
 version = "0.42.2"
@@ -16200,6 +16664,12 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
 
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
+
 [[package]]
 name = "windows_x86_64_gnullvm"
 version = "0.42.2"
@@ -16218,6 +16688,12 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
 
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
+
 [[package]]
 name = "windows_x86_64_msvc"
 version = "0.42.2"
@@ -16236,6 +16712,12 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
 
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
+
 [[package]]
 name = "winnow"
 version = "0.5.40"
@@ -16283,15 +16765,6 @@ dependencies = [
  "wit-parser",
 ]
 
-[[package]]
-name = "wit-bindgen-rt"
-version = "0.39.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
-dependencies = [
- "bitflags 2.11.1",
-]
-
 [[package]]
 name = "wit-bindgen-rust"
 version = "0.51.0"
@@ -16445,6 +16918,12 @@ dependencies = [
  "rustix 1.0.7",
 ]
 
+[[package]]
+name = "xxhash-rust"
+version = "0.8.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3"
+
 [[package]]
 name = "yaml-rust"
 version = "0.4.5"
diff --git a/Cargo.toml b/Cargo.toml
index 56200a24d6..ee8d4dcf11 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -259,7 +259,7 @@ tracing-opentelemetry = "0.31.0"
 tracing-subscriber = { version = "0.3", features = ["env-filter", "json", "fmt"] }
 typetag = "0.2"
 uuid = { version = "1.17", features = ["serde", "v4", "v7", "fast-rng"] }
-vrl = "0.25"
+vrl = "0.33"
 zstd = "0.13"
 # DO_NOT_REMOVE_THIS: END_OF_EXTERNAL_DEPENDENCIES
 
diff --git a/config/config.md b/config/config.md
index d9cffaf122..df06d2153c 100644
--- a/config/config.md
+++ b/config/config.md
@@ -451,6 +451,7 @@
 | `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.<br/>By default, it provides services after all regions have been initialized. |
 | `init_regions_parallelism` | Integer | `16` | Parallelism of initializing regions. |
 | `max_concurrent_queries` | Integer | `0` | The maximum concurrent queries allowed to be executed. Zero means unlimited. |
+| `concurrent_query_limiter_timeout` | String | `100ms` | Timeout to acquire a permit from the concurrent query limiter when `max_concurrent_queries` is reached. |
 | `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. Enabled by default. |
 | `http` | -- | -- | The HTTP server options. |
 | `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
diff --git a/config/datanode.example.toml b/config/datanode.example.toml
index d558918daf..9351c4e85d 100644
--- a/config/datanode.example.toml
+++ b/config/datanode.example.toml
@@ -20,6 +20,9 @@ init_regions_parallelism = 16
 ## The maximum concurrent queries allowed to be executed. Zero means unlimited.
 max_concurrent_queries = 0
 
+## Timeout to acquire a permit from the concurrent query limiter when `max_concurrent_queries` is reached.
+concurrent_query_limiter_timeout = "100ms"
+
 ## Enable telemetry to collect anonymous usage data. Enabled by default.
 #+ enable_telemetry = true
 
diff --git a/src/cli/src/data/export_v2/command.rs b/src/cli/src/data/export_v2/command.rs
index db0f576a4e..bb027bbef1 100644
--- a/src/cli/src/data/export_v2/command.rs
+++ b/src/cli/src/data/export_v2/command.rs
@@ -1077,7 +1077,9 @@ async fn verify_snapshot(storage: &OpenDalStorage) -> Result<VerifyReport> {
             ));
         }
         let data_files = storage.list_files_recursive("data/").await?;
-        if let Some(path) = data_files.first() {
+        // Report the lexicographically smallest path so the message is stable
+        // regardless of listing order across backends.
+        if let Some(path) = data_files.iter().min() {
             report.push_error(format!(
                 "Schema-only snapshot should not contain data files (found '{}')",
                 path
@@ -1103,75 +1105,113 @@ fn summarize_chunks(manifest: &Manifest) -> VerifyChunkSummary {
     }
 }
 
+/// A data file declared by a completed chunk that is expected to exist in storage.
+#[derive(Debug)]
+struct ChunkFile {
+    chunk_id: u32,
+    path: String,
+}
+
+/// Expected snapshot contents derived purely from the manifest (no object-store IO).
+///
+/// Separating planning from scanning makes it obvious which problems come from
+/// the manifest alone and which require comparing against actual storage.
+#[derive(Debug, Default)]
+struct VerifyPlan {
+    /// Valid data files declared by completed chunks; each must exist in storage.
+    files_to_check: Vec<ChunkFile>,
+    /// All syntactically-safe data paths declared by any chunk, regardless of
+    /// status. Used as the orphan-detection baseline so a listed-but-invalid
+    /// file is not also reported as unexpected.
+    claimed_data_files: HashSet<String>,
+    /// Total data-file references in completed chunks (valid + invalid).
+    data_files_total: usize,
+    /// Problems detectable from the manifest alone.
+    problems: Vec<VerifyProblem>,
+}
+
+/// Actual data files discovered under `data/` (the only object-store IO in
+/// chunk/data-file verification).
+#[derive(Debug)]
+struct VerifyDataScan {
+    existing_data_files: HashSet<String>,
+}
+
+/// Result of reconciling the manifest plan against the storage scan.
+#[derive(Debug, Default)]
+struct VerifyOutcome {
+    data_files_total: usize,
+    data_files_verified: usize,
+    problems: Vec<VerifyProblem>,
+}
+
 async fn verify_chunks_and_data_files(
     storage: &OpenDalStorage,
     report: &mut VerifyReport,
 ) -> Result<()> {
-    let existing_files: HashSet<_> = storage
-        .list_files_recursive("data/")
-        .await?
-        .into_iter()
-        .collect();
-    let mut data_files_total = 0;
-    let mut data_files_verified = 0;
-    let mut problems = Vec::new();
-    let mut seen_chunk_ids = HashSet::new();
-    let mut claimed_data_files = HashSet::new();
+    let plan = build_verify_plan(&report.manifest);
+    let scan = scan_data_files(storage).await?;
+    let outcome = reconcile_plan_with_scan(plan, &scan);
 
-    for chunk in &report.manifest.chunks {
+    report.data_files_total = outcome.data_files_total;
+    report.data_files_verified = outcome.data_files_verified;
+    report.problems.extend(outcome.problems);
+
+    Ok(())
+}
+
+/// Builds the expected-state plan from the manifest. Pure; performs no IO.
+fn build_verify_plan(manifest: &Manifest) -> VerifyPlan {
+    let mut plan = VerifyPlan::default();
+    let mut seen_chunk_ids = HashSet::new();
+
+    for chunk in &manifest.chunks {
         if !seen_chunk_ids.insert(chunk.id) {
-            problems.push(VerifyProblem {
+            plan.problems.push(VerifyProblem {
                 severity: VerifySeverity::Error,
                 message: format!("Chunk {}: duplicate chunk id", chunk.id),
             });
         }
         for file in &chunk.files {
             if let Some(path) = safe_manifest_data_file_path(file) {
-                claimed_data_files.insert(path.to_string());
+                plan.claimed_data_files.insert(path.to_string());
             }
         }
 
         match chunk.status {
             ChunkStatus::Completed => {
                 if chunk.files.is_empty() {
-                    problems.push(VerifyProblem {
+                    plan.problems.push(VerifyProblem {
                         severity: VerifySeverity::Error,
                         message: format!("Chunk {}: completed chunk has no data files", chunk.id),
                     });
                     continue;
                 }
-                let allowed_prefixes = report
-                    .manifest
+                let allowed_prefixes = manifest
                     .schemas
                     .iter()
                     .map(|schema| data_dir_for_schema_chunk(schema, chunk.id))
                     .collect::<Vec<_>>();
                 for file in &chunk.files {
-                    data_files_total += 1;
-                    let Some(path) = valid_manifest_data_file_path(file, &allowed_prefixes) else {
-                        problems.push(VerifyProblem {
+                    plan.data_files_total += 1;
+                    match valid_manifest_data_file_path(file, &allowed_prefixes) {
+                        Some(path) => plan.files_to_check.push(ChunkFile {
+                            chunk_id: chunk.id,
+                            path: path.to_string(),
+                        }),
+                        None => plan.problems.push(VerifyProblem {
                             severity: VerifySeverity::Error,
                             message: format!(
                                 "Chunk {}: invalid data file path '{}'",
                                 chunk.id, file
                             ),
-                        });
-                        continue;
-                    };
-
-                    if existing_files.contains(path) {
-                        data_files_verified += 1;
-                    } else {
-                        problems.push(VerifyProblem {
-                            severity: VerifySeverity::Error,
-                            message: format!("Chunk {}: missing file '{}'", chunk.id, path),
-                        });
+                        }),
                     }
                 }
             }
             ChunkStatus::Skipped => {
                 if !chunk.files.is_empty() {
-                    problems.push(VerifyProblem {
+                    plan.problems.push(VerifyProblem {
                         severity: VerifySeverity::Error,
                         message: format!(
                             "Chunk {}: skipped chunk should not list data files",
@@ -1181,20 +1221,20 @@ async fn verify_chunks_and_data_files(
                 }
             }
             ChunkStatus::Pending => {
-                problems.push(VerifyProblem {
+                plan.problems.push(VerifyProblem {
                     severity: VerifySeverity::Error,
                     message: format!("Chunk {}: status is 'pending'", chunk.id),
                 });
             }
             ChunkStatus::InProgress => {
-                problems.push(VerifyProblem {
+                plan.problems.push(VerifyProblem {
                     severity: VerifySeverity::Error,
                     message: format!("Chunk {}: status is 'in_progress'", chunk.id),
                 });
             }
             ChunkStatus::Failed => {
                 let reason = chunk.error.as_deref().unwrap_or("unknown error");
-                problems.push(VerifyProblem {
+                plan.problems.push(VerifyProblem {
                     severity: VerifySeverity::Error,
                     message: format!("Chunk {}: status is 'failed' (error: {})", chunk.id, reason),
                 });
@@ -1202,20 +1242,60 @@ async fn verify_chunks_and_data_files(
         }
     }
 
-    for path in &existing_files {
-        if !claimed_data_files.contains(path) {
+    plan
+}
+
+/// Lists all data files under `data/`. This is the only object-store IO in
+/// chunk/data-file verification.
+async fn scan_data_files(storage: &OpenDalStorage) -> Result<VerifyDataScan> {
+    let existing_data_files = storage
+        .list_files_recursive("data/")
+        .await?
+        .into_iter()
+        .collect();
+    Ok(VerifyDataScan {
+        existing_data_files,
+    })
+}
+
+/// Reconciles the manifest plan against the storage scan. Pure; performs no IO.
+///
+/// Emits missing-file problems for expected files absent from storage and
+/// unexpected-file problems for storage files no chunk claims. Unexpected files
+/// are sorted by path so output is deterministic regardless of listing order.
+fn reconcile_plan_with_scan(plan: VerifyPlan, scan: &VerifyDataScan) -> VerifyOutcome {
+    let mut problems = plan.problems;
+    let mut data_files_verified = 0;
+
+    for file in &plan.files_to_check {
+        if scan.existing_data_files.contains(&file.path) {
+            data_files_verified += 1;
+        } else {
             problems.push(VerifyProblem {
                 severity: VerifySeverity::Error,
-                message: format!("Unexpected data file '{}' is not listed in manifest", path),
+                message: format!("Chunk {}: missing file '{}'", file.chunk_id, file.path),
             });
         }
     }
 
-    report.data_files_total = data_files_total;
-    report.data_files_verified = data_files_verified;
-    report.problems.extend(problems);
+    let mut orphans: Vec<&String> = scan
+        .existing_data_files
+        .iter()
+        .filter(|path| !plan.claimed_data_files.contains(*path))
+        .collect();
+    orphans.sort();
+    for path in orphans {
+        problems.push(VerifyProblem {
+            severity: VerifySeverity::Error,
+            message: format!("Unexpected data file '{}' is not listed in manifest", path),
+        });
+    }
 
-    Ok(())
+    VerifyOutcome {
+        data_files_total: plan.data_files_total,
+        data_files_verified,
+        problems,
+    }
 }
 
 fn valid_manifest_data_file_path<'a>(
@@ -2294,6 +2374,90 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_build_verify_plan_classifies_chunks_without_io() {
+        let mut manifest = test_manifest(
+            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
+            false,
+            true,
+        );
+        // test_manifest(complete) gives: chunk 1 completed (1 file), chunk 2 skipped.
+        let mut failed = ChunkMeta::new(3, TimeRange::unbounded());
+        failed.mark_failed("boom".to_string());
+        manifest.chunks.push(failed);
+        manifest
+            .chunks
+            .push(ChunkMeta::new(4, TimeRange::unbounded()));
+
+        let plan = build_verify_plan(&manifest);
+
+        assert_eq!(plan.files_to_check.len(), 1);
+        assert_eq!(plan.files_to_check[0].chunk_id, 1);
+        assert_eq!(plan.files_to_check[0].path, "data/public/1/file.parquet");
+        assert_eq!(plan.data_files_total, 1);
+        assert!(
+            plan.claimed_data_files
+                .contains("data/public/1/file.parquet")
+        );
+        assert_eq!(plan.problems.len(), 2);
+        assert!(
+            plan.problems
+                .iter()
+                .any(|problem| problem.message.contains("status is 'failed'"))
+        );
+        assert!(
+            plan.problems
+                .iter()
+                .any(|problem| problem.message.contains("status is 'pending'"))
+        );
+    }
+
+    #[tokio::test]
+    async fn test_verify_snapshot_produces_deterministic_problem_output() {
+        let dir = tempdir().unwrap();
+        let manifest = test_manifest(
+            chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
+            false,
+            true,
+        );
+        write_root_manifest(dir.path(), manifest);
+        write_snapshot_file(dir.path(), "schema/schemas.json", b"[]");
+        write_default_ddl_files(dir.path());
+        write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data");
+        // Many orphan files under a known chunk prefix to stress ordering.
+        for i in 0..50 {
+            write_snapshot_file(
+                dir.path(),
+                &format!("data/public/1/orphan_{:02}.parquet", i),
+                b"x",
+            );
+        }
+
+        let storage = file_storage_for_dir(dir.path());
+        let messages = |report: &VerifyReport| {
+            report
+                .problems
+                .iter()
+                .map(|problem| problem.message.clone())
+                .collect::<Vec<_>>()
+        };
+        let first = messages(&verify_snapshot(&storage).await.unwrap());
+        let second = messages(&verify_snapshot(&storage).await.unwrap());
+
+        // Output is identical across runs despite HashSet-based scanning.
+        assert_eq!(first, second);
+
+        let orphans = first
+            .iter()
+            .filter(|message| message.contains("Unexpected data file"))
+            .cloned()
+            .collect::<Vec<_>>();
+        assert_eq!(orphans.len(), 50);
+        let mut sorted = orphans.clone();
+        sorted.sort();
+        assert_eq!(orphans, sorted);
+    }
+
     fn write_test_manifest(root: &std::path::Path, dir: &str, manifest: Manifest) {
         let snapshot_dir = root.join(dir);
         std::fs::create_dir_all(&snapshot_dir).unwrap();
diff --git a/src/cmd/src/datanode/scanbench.rs b/src/cmd/src/datanode/scanbench.rs
index b26705991c..b2a715ad31 100644
--- a/src/cmd/src/datanode/scanbench.rs
+++ b/src/cmd/src/datanode/scanbench.rs
@@ -524,6 +524,7 @@ impl ScanbenchCommand {
             options: HashMap::default(),
             skip_wal_replay: !self.enable_wal,
             checkpoint: None,
+            requirements: Default::default(),
         };
 
         engine
diff --git a/src/common/datasource/src/file_format.rs b/src/common/datasource/src/file_format.rs
index e36f94c0d2..d9d7b8b648 100644
--- a/src/common/datasource/src/file_format.rs
+++ b/src/common/datasource/src/file_format.rs
@@ -61,6 +61,7 @@ pub const FORMAT_COMPRESSION_TYPE: &str = "compression_type";
 pub const FORMAT_DELIMITER: &str = "delimiter";
 pub const FORMAT_SCHEMA_INFER_MAX_RECORD: &str = "schema_infer_max_record";
 pub const FORMAT_HAS_HEADER: &str = "has_header";
+pub const FORMAT_SKIP_BAD_RECORDS: &str = "skip_bad_records";
 pub const FORMAT_TYPE: &str = "format";
 pub const FILE_PATTERN: &str = "pattern";
 pub const TIMESTAMP_FORMAT: &str = "timestamp_format";
diff --git a/src/common/datasource/src/file_format/csv.rs b/src/common/datasource/src/file_format/csv.rs
index 77ea553f35..2b39051b48 100644
--- a/src/common/datasource/src/file_format/csv.rs
+++ b/src/common/datasource/src/file_format/csv.rs
@@ -13,15 +13,24 @@
 // limitations under the License.
 
 use std::collections::HashMap;
+use std::io;
 use std::str::FromStr;
+use std::sync::Arc;
+use std::task::Poll;
 
 use arrow::csv::reader::Format;
 use arrow::csv::{self, WriterBuilder};
+use arrow::error::ArrowError;
 use arrow::record_batch::RecordBatch;
-use arrow_schema::Schema;
+use arrow_schema::{Schema, SchemaRef};
 use async_trait::async_trait;
+use bytes::{Buf, Bytes};
 use common_runtime;
+use common_telemetry::warn;
 use datafusion::physical_plan::SendableRecordBatchStream;
+use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
+use futures::StreamExt;
+use futures::stream::BoxStream;
 use object_store::ObjectStore;
 use snafu::ResultExt;
 use tokio_util::compat::FuturesAsyncReadCompatExt;
@@ -34,9 +43,12 @@ use crate::file_format::{self, FileFormat, stream_to_file};
 use crate::share_buffer::SharedBuffer;
 use crate::util::normalize_infer_schema;
 
+const SKIP_BAD_RECORDS_BATCH_SIZE: usize = 1;
+
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct CsvFormat {
     pub has_header: bool,
+    pub skip_bad_records: bool,
     pub delimiter: u8,
     pub schema_infer_max_record: Option<usize>,
     pub compression_type: CompressionType,
@@ -76,13 +88,11 @@ impl TryFrom<&HashMap<String, String>> for CsvFormat {
                 })?);
         };
         if let Some(has_header) = value.get(file_format::FORMAT_HAS_HEADER) {
-            format.has_header = has_header.parse().map_err(|_| {
-                error::ParseFormatSnafu {
-                    key: file_format::FORMAT_HAS_HEADER,
-                    value: has_header,
-                }
-                .build()
-            })?;
+            format.has_header = parse_bool(file_format::FORMAT_HAS_HEADER, has_header)?;
+        };
+        if let Some(skip_bad_records) = value.get(file_format::FORMAT_SKIP_BAD_RECORDS) {
+            format.skip_bad_records =
+                parse_bool(file_format::FORMAT_SKIP_BAD_RECORDS, skip_bad_records)?;
         };
         if let Some(timestamp_format) = value.get(file_format::TIMESTAMP_FORMAT) {
             format.timestamp_format = Some(timestamp_format.clone());
@@ -97,10 +107,17 @@ impl TryFrom<&HashMap<String, String>> for CsvFormat {
     }
 }
 
+fn parse_bool(key: &'static str, value: &str) -> Result<bool> {
+    value
+        .parse()
+        .map_err(|_| error::ParseFormatSnafu { key, value }.build())
+}
+
 impl Default for CsvFormat {
     fn default() -> Self {
         Self {
             has_header: true,
+            skip_bad_records: false,
             delimiter: b',',
             schema_infer_max_record: Some(file_format::DEFAULT_SCHEMA_INFER_MAX_RECORD),
             compression_type: CompressionType::Uncompressed,
@@ -189,10 +206,136 @@ impl DfRecordBatchEncoder for csv::Writer<SharedBuffer> {
     }
 }
 
+/// Builds a CSV stream that can skip selected record-level parse/cast errors.
+///
+/// This recovery path intentionally uses one-record batches. It is slower than
+/// normal CSV scanning, but keeps each parse/cast failure isolated to a single
+/// record. Arrow's CSV decoder clears buffered rows before type parsing, so a
+/// failed multi-row flush cannot be safely retried row by row without replaying
+/// input bytes.
+pub async fn tolerant_csv_stream(
+    store: &ObjectStore,
+    path: &str,
+    schema: SchemaRef,
+    projection: Vec<usize>,
+    format: &CsvFormat,
+) -> Result<SendableRecordBatchStream> {
+    let meta = store
+        .stat(path)
+        .await
+        .context(error::ReadObjectSnafu { path })?;
+
+    let reader = store
+        .reader(path)
+        .await
+        .context(error::ReadObjectSnafu { path })?
+        .into_bytes_stream(0..meta.content_length())
+        .await
+        .context(error::ReadObjectSnafu { path })?;
+
+    let reader = format.compression_type.convert_stream(reader).boxed();
+    tolerant_csv_stream_from_reader(
+        reader,
+        path,
+        schema,
+        projection,
+        format.has_header,
+        format.delimiter,
+    )
+}
+
+fn tolerant_csv_stream_from_reader(
+    reader: BoxStream<'static, io::Result<Bytes>>,
+    path: &str,
+    schema: SchemaRef,
+    projection: Vec<usize>,
+    has_header: bool,
+    delimiter: u8,
+) -> Result<SendableRecordBatchStream> {
+    let projected_schema = Arc::new(
+        schema
+            .project(&projection)
+            .context(error::InferSchemaSnafu)?,
+    );
+    let mut decoder = csv::ReaderBuilder::new(schema)
+        .with_header(has_header)
+        .with_delimiter(delimiter)
+        .with_batch_size(SKIP_BAD_RECORDS_BATCH_SIZE)
+        .with_projection(projection)
+        .build_decoder();
+
+    let path = path.to_string();
+    let mut upstream = reader.fuse();
+    let mut buffered = Bytes::new();
+    let mut input_finished = false;
+    let stream = futures::stream::poll_fn(move |cx| {
+        loop {
+            while !input_finished {
+                if buffered.is_empty() {
+                    match futures::ready!(upstream.poll_next_unpin(cx)) {
+                        Some(Ok(bytes)) if bytes.is_empty() => continue,
+                        Some(Ok(bytes)) => buffered = bytes,
+                        Some(Err(error)) => return Poll::Ready(Some(Err(error.into()))),
+                        None => input_finished = true,
+                    }
+                }
+
+                let decoded = decoder.decode(buffered.as_ref())?;
+                if decoded > 0 {
+                    buffered.advance(decoded);
+                    continue;
+                }
+
+                if decoder.capacity() == 0 || input_finished {
+                    break;
+                }
+
+                if buffered.is_empty() {
+                    continue;
+                }
+
+                return Poll::Ready(Some(Err(ArrowError::ParseError(
+                    "CSV decoder made no progress while input bytes remain".to_string(),
+                ))));
+            }
+
+            match decoder.flush() {
+                Ok(Some(batch)) => return Poll::Ready(Some(Ok(batch))),
+                Ok(None) if input_finished => return Poll::Ready(None),
+                Ok(None) => continue,
+                Err(error) if is_skippable_arrow_error(&error) => {
+                    warn!(
+                        "Skipping bad CSV record while copying from {}: {}",
+                        path, error
+                    );
+                }
+                Err(error) => return Poll::Ready(Some(Err(error))),
+            }
+        }
+    })
+    .map(|result: std::result::Result<RecordBatch, ArrowError>| result.map_err(Into::into));
+
+    Ok(Box::pin(RecordBatchStreamAdapter::new(
+        projected_schema,
+        stream,
+    )))
+}
+
+pub fn is_skippable_arrow_error(error: &ArrowError) -> bool {
+    matches!(
+        error,
+        ArrowError::ParseError(_)
+            | ArrowError::CastError(_)
+            | ArrowError::ComputeError(_)
+            | ArrowError::InvalidArgumentError(_)
+    )
+}
+
 #[cfg(test)]
 mod tests {
     use std::sync::Arc;
 
+    use arrow_schema::{DataType, Field};
     use common_recordbatch::adapter::DfRecordBatchStreamAdapter;
     use common_recordbatch::{RecordBatch, RecordBatches};
     use common_test_util::find_workspace_path;
@@ -205,7 +348,7 @@ mod tests {
     use super::*;
     use crate::file_format::{
         FORMAT_COMPRESSION_TYPE, FORMAT_DELIMITER, FORMAT_HAS_HEADER,
-        FORMAT_SCHEMA_INFER_MAX_RECORD, FileFormat, file_to_stream,
+        FORMAT_SCHEMA_INFER_MAX_RECORD, FORMAT_SKIP_BAD_RECORDS, FileFormat, file_to_stream,
     };
     use crate::test_util::{format_schema, test_store};
 
@@ -331,11 +474,29 @@ mod tests {
                 schema_infer_max_record: Some(2000),
                 delimiter: b'\t',
                 has_header: false,
+                skip_bad_records: false,
                 timestamp_format: None,
                 time_format: None,
                 date_format: None
             }
         );
+
+        let map = HashMap::from([(FORMAT_SKIP_BAD_RECORDS.to_string(), "true".to_string())]);
+        let format = CsvFormat::try_from(&map).unwrap();
+
+        assert_eq!(
+            format,
+            CsvFormat {
+                skip_bad_records: true,
+                ..CsvFormat::default()
+            }
+        );
+    }
+
+    #[test]
+    fn test_try_from_rejects_invalid_bool_options() {
+        let map = HashMap::from([(FORMAT_SKIP_BAD_RECORDS.to_string(), "yes".to_string())]);
+        assert!(CsvFormat::try_from(&map).is_err());
     }
 
     #[tokio::test]
@@ -496,4 +657,63 @@ mod tests {
             assert_eq!(expected, pretty_print);
         }
     }
+
+    #[tokio::test]
+    async fn test_tolerant_csv_stream_continues_after_parse_error() {
+        let temp_dir = common_test_util::temp_dir::create_temp_dir("test_tolerant_csv_stream");
+        let csv_file_path = temp_dir.path().join("input.csv");
+        std::fs::write(
+            &csv_file_path,
+            "id,name,value\n1,Alice,10.5\nbad,Bad,20.0\nworse,Bad,21.0\n2,Bob,30.5",
+        )
+        .unwrap();
+
+        let store = test_store("/");
+        let schema = Arc::new(arrow_schema::Schema::new(vec![
+            Field::new("id", DataType::UInt32, false),
+            Field::new("name", DataType::Utf8, false),
+            Field::new("value", DataType::Float64, false),
+        ]));
+        let path = csv_file_path.to_str().unwrap();
+
+        let stream =
+            tolerant_csv_stream(&store, path, schema, vec![0, 1, 2], &CsvFormat::default())
+                .await
+                .unwrap();
+        let batches = stream.try_collect::<Vec<_>>().await.unwrap();
+        let pretty_print = arrow::util::pretty::pretty_format_batches(&batches)
+            .unwrap()
+            .to_string();
+        let expected = r#"+----+-------+-------+
+| id | name  | value |
++----+-------+-------+
+| 1  | Alice | 10.5  |
+| 2  | Bob   | 30.5  |
++----+-------+-------+"#;
+        assert_eq!(expected, pretty_print);
+    }
+
+    #[tokio::test]
+    async fn test_tolerant_csv_stream_fails_on_structural_csv_error() {
+        let temp_dir =
+            common_test_util::temp_dir::create_temp_dir("test_tolerant_csv_stream_csv_error");
+        let csv_file_path = temp_dir.path().join("input.csv");
+        std::fs::write(&csv_file_path, "id,name,value\n1,Alice,10.5\n2,Bob\n").unwrap();
+
+        let store = test_store("/");
+        let schema = Arc::new(arrow_schema::Schema::new(vec![
+            Field::new("id", DataType::UInt32, false),
+            Field::new("name", DataType::Utf8, false),
+            Field::new("value", DataType::Float64, false),
+        ]));
+        let path = csv_file_path.to_str().unwrap();
+
+        let stream =
+            tolerant_csv_stream(&store, path, schema, vec![0, 1, 2], &CsvFormat::default())
+                .await
+                .unwrap();
+        let error = stream.try_collect::<Vec<_>>().await.unwrap_err();
+
+        assert!(error.to_string().contains("incorrect number of fields"));
+    }
 }
diff --git a/src/common/meta/src/instruction.rs b/src/common/meta/src/instruction.rs
index 3fa6b1bad0..6872b9ad55 100644
--- a/src/common/meta/src/instruction.rs
+++ b/src/common/meta/src/instruction.rs
@@ -18,7 +18,7 @@ use std::time::Duration;
 
 use serde::{Deserialize, Deserializer, Serialize, Serializer};
 use store_api::region_engine::SyncRegionFromRequest;
-use store_api::region_request::RegionFlushReason;
+use store_api::region_request::{RegionFlushReason, RegionRequirements};
 use store_api::storage::{FileRefsManifest, GcReport, RegionId, RegionNumber};
 use strum::Display;
 use table::metadata::TableId;
@@ -179,12 +179,24 @@ impl Display for OpenRegion {
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
         write!(
             f,
-            "OpenRegion(region_ident={}, region_storage_path={})",
-            self.region_ident, self.region_storage_path
+            "OpenRegion(region_ident={}, region_storage_path={}, reason={:?})",
+            self.region_ident, self.region_storage_path, self.reason
         )
     }
 }
 
+/// The reason why an open region instruction is triggered.
+#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
+pub enum OpenRegionReason {
+    /// Open triggered before region migration.
+    RegionMigration,
+    /// Open triggered by region failover.
+    RegionFailover,
+    /// Open triggered when adding a follower region.
+    #[cfg(feature = "enterprise")]
+    RegionFollower,
+}
+
 #[serde_with::serde_as]
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 pub struct OpenRegion {
@@ -196,6 +208,10 @@ pub struct OpenRegion {
     pub region_wal_options: HashMap<RegionNumber, String>,
     #[serde(default)]
     pub skip_wal_replay: bool,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub reason: Option<OpenRegionReason>,
+    #[serde(default)]
+    pub requirements: RegionRequirements,
 }
 
 impl OpenRegion {
@@ -205,6 +221,8 @@ impl OpenRegion {
         region_options: HashMap<String, String>,
         region_wal_options: HashMap<RegionNumber, String>,
         skip_wal_replay: bool,
+        reason: Option<OpenRegionReason>,
+        requirements: RegionRequirements,
     ) -> Self {
         Self {
             region_ident,
@@ -212,6 +230,8 @@ impl OpenRegion {
             region_options,
             region_wal_options,
             skip_wal_replay,
+            reason,
+            requirements,
         }
     }
 }
@@ -1126,11 +1146,13 @@ mod tests {
             HashMap::new(),
             HashMap::new(),
             false,
+            None,
+            RegionRequirements::empty(),
         )]);
 
         let serialized = serde_json::to_string(&open_region).unwrap();
         assert_eq!(
-            r#"{"OpenRegions":[{"region_ident":{"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"},"region_storage_path":"test/foo","region_options":{},"region_wal_options":{},"skip_wal_replay":false}]}"#,
+            r#"{"OpenRegions":[{"region_ident":{"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"},"region_storage_path":"test/foo","region_options":{},"region_wal_options":{},"skip_wal_replay":false,"requirements":{"object_storage":false}}]}"#,
             serialized
         );
 
@@ -1213,6 +1235,8 @@ mod tests {
             HashMap::new(),
             HashMap::new(),
             false,
+            None,
+            RegionRequirements::empty(),
         )]);
         assert_eq!(open_region_instruction, open_region);
 
@@ -1368,10 +1392,41 @@ mod tests {
             region_options,
             region_wal_options: HashMap::new(),
             skip_wal_replay: false,
+            reason: None,
+            requirements: RegionRequirements::empty(),
         };
         assert_eq!(expected, deserialized);
     }
 
+    #[test]
+    fn test_serialize_open_region_with_reason_and_requirements() {
+        let open_region = OpenRegion::new(
+            RegionIdent {
+                datanode_id: 2,
+                table_id: 1024,
+                region_number: 1,
+                engine: "mito2".to_string(),
+            },
+            "test/foo",
+            HashMap::new(),
+            HashMap::new(),
+            false,
+            Some(OpenRegionReason::RegionMigration),
+            RegionRequirements::object_storage(),
+        );
+
+        let serialized = serde_json::to_string(&open_region).unwrap();
+        assert!(serialized.contains(r#""reason":"RegionMigration""#));
+        assert!(serialized.contains(r#""object_storage":true"#));
+
+        let deserialized: OpenRegion = serde_json::from_str(&serialized).unwrap();
+        assert_eq!(Some(OpenRegionReason::RegionMigration), deserialized.reason);
+        assert_eq!(
+            RegionRequirements::object_storage(),
+            deserialized.requirements
+        );
+    }
+
     #[test]
     fn test_flush_regions_creation() {
         let region_id = RegionId::new(1024, 1);
diff --git a/src/datanode/src/config.rs b/src/datanode/src/config.rs
index 2ce306006b..b757c95121 100644
--- a/src/datanode/src/config.rs
+++ b/src/datanode/src/config.rs
@@ -14,6 +14,8 @@
 
 //! Datanode configurations
 
+use std::time::Duration;
+
 use common_base::readable_size::ReadableSize;
 use common_config::{Configurable, DEFAULT_DATA_HOME};
 use common_options::memory::MemoryOptions;
@@ -75,6 +77,10 @@ pub struct DatanodeOptions {
     pub wal: DatanodeWalConfig,
     pub storage: StorageConfig,
     pub max_concurrent_queries: usize,
+    /// Timeout to acquire a permit from the concurrent query limiter when
+    /// `max_concurrent_queries` is reached. Only effective when the limiter is enabled.
+    #[serde(with = "humantime_serde")]
+    pub concurrent_query_limiter_timeout: Duration,
     /// Options for different store engines.
     pub region_engine: Vec<RegionEngineConfig>,
     pub logging: LoggingOptions,
@@ -131,6 +137,7 @@ impl Default for DatanodeOptions {
             wal: DatanodeWalConfig::default(),
             storage: StorageConfig::default(),
             max_concurrent_queries: 0,
+            concurrent_query_limiter_timeout: Duration::from_millis(100),
             region_engine: vec![
                 RegionEngineConfig::Mito(MitoConfig::default()),
                 RegionEngineConfig::File(FileEngineConfig::default()),
diff --git a/src/datanode/src/datanode.rs b/src/datanode/src/datanode.rs
index 9a2fe3d982..12d7c5109c 100644
--- a/src/datanode/src/datanode.rs
+++ b/src/datanode/src/datanode.rs
@@ -445,8 +445,7 @@ impl DatanodeBuilder {
             event_listener,
             table_provider_factory,
             opts.max_concurrent_queries,
-            //TODO: revaluate the hardcoded timeout on the next version of datanode concurrency limiter.
-            Duration::from_millis(100),
+            opts.concurrent_query_limiter_timeout,
             opts.grpc.flight_compression,
         );
 
diff --git a/src/datanode/src/heartbeat/handler.rs b/src/datanode/src/heartbeat/handler.rs
index 10948a3e7c..79e0baaef3 100644
--- a/src/datanode/src/heartbeat/handler.rs
+++ b/src/datanode/src/heartbeat/handler.rs
@@ -313,7 +313,7 @@ mod tests {
     use mito2::test_util::{CreateRequestBuilder, TestEnv};
     use store_api::path_utils::table_dir;
     use store_api::region_engine::RegionRole;
-    use store_api::region_request::{RegionCloseRequest, RegionRequest};
+    use store_api::region_request::{RegionCloseRequest, RegionRequest, RegionRequirements};
     use store_api::storage::RegionId;
     use tokio::sync::mpsc::{self, Receiver};
 
@@ -442,6 +442,8 @@ mod tests {
             HashMap::new(),
             HashMap::new(),
             false,
+            None,
+            RegionRequirements::empty(),
         )])
     }
 
diff --git a/src/datanode/src/heartbeat/handler/open_region.rs b/src/datanode/src/heartbeat/handler/open_region.rs
index 56c07a3efe..9c483e588d 100644
--- a/src/datanode/src/heartbeat/handler/open_region.rs
+++ b/src/datanode/src/heartbeat/handler/open_region.rs
@@ -14,6 +14,7 @@
 
 use common_meta::instruction::{InstructionReply, OpenRegion, SimpleReply};
 use common_meta::wal_provider::prepare_wal_options;
+use common_telemetry::info;
 use store_api::path_utils::table_dir;
 use store_api::region_request::{PathType, RegionOpenRequest};
 use store_api::storage::RegionId;
@@ -41,8 +42,13 @@ impl InstructionHandler for OpenRegionsHandler {
                     mut region_options,
                     region_wal_options,
                     skip_wal_replay,
+                    reason,
+                    requirements,
                 } = open_region;
                 let region_id = RegionId::new(region_ident.table_id, region_ident.region_number);
+                info!(
+                    "Received open region instruction, region_id: {region_id}, reason: {reason:?}"
+                );
                 prepare_wal_options(&mut region_options, region_id, &region_wal_options);
                 let request = RegionOpenRequest {
                     engine: region_ident.engine,
@@ -51,6 +57,7 @@ impl InstructionHandler for OpenRegionsHandler {
                     options: region_options,
                     skip_wal_replay,
                     checkpoint: None,
+                    requirements,
                 };
                 (region_id, request)
             })
@@ -85,7 +92,7 @@ mod tests {
     use mito2::engine::MITO_ENGINE_NAME;
     use mito2::test_util::{CreateRequestBuilder, TestEnv};
     use store_api::path_utils::table_dir;
-    use store_api::region_request::{RegionCloseRequest, RegionRequest};
+    use store_api::region_request::{RegionCloseRequest, RegionRequest, RegionRequirements};
     use store_api::storage::RegionId;
 
     use crate::heartbeat::handler::RegionHeartbeatResponseHandler;
@@ -98,17 +105,21 @@ mod tests {
     ) -> Instruction {
         let region_idents = region_ids
             .into_iter()
-            .map(|region_id| OpenRegion {
-                region_ident: RegionIdent {
-                    datanode_id: 0,
-                    table_id: region_id.table_id(),
-                    region_number: region_id.region_number(),
-                    engine: MITO_ENGINE_NAME.to_string(),
-                },
-                region_storage_path: storage_path.to_string(),
-                region_options: HashMap::new(),
-                region_wal_options: HashMap::new(),
-                skip_wal_replay: false,
+            .map(|region_id| {
+                OpenRegion::new(
+                    RegionIdent {
+                        datanode_id: 0,
+                        table_id: region_id.table_id(),
+                        region_number: region_id.region_number(),
+                        engine: MITO_ENGINE_NAME.to_string(),
+                    },
+                    storage_path,
+                    HashMap::new(),
+                    HashMap::new(),
+                    false,
+                    None,
+                    RegionRequirements::empty(),
+                )
             })
             .collect();
 
diff --git a/src/datanode/src/region_server.rs b/src/datanode/src/region_server.rs
index d5711e1761..ce831353d1 100644
--- a/src/datanode/src/region_server.rs
+++ b/src/datanode/src/region_server.rs
@@ -49,6 +49,7 @@ use common_telemetry::{debug, error, info, warn};
 use dashmap::DashMap;
 use datafusion::datasource::TableProvider;
 use datafusion_common::tree_node::TreeNode;
+use datatypes::schema::SchemaRef;
 use either::Either;
 use futures_util::Stream;
 use futures_util::future::try_join_all;
@@ -82,7 +83,7 @@ use store_api::region_request::{
     RegionOpenRequest, RegionRequest,
 };
 use store_api::storage::RegionId;
-use tokio::sync::{Semaphore, SemaphorePermit};
+use tokio::sync::{OwnedSemaphorePermit, Semaphore};
 use tokio::time::timeout;
 use tonic::{Request, Response, Result as TonicResult};
 
@@ -257,7 +258,7 @@ impl RegionServer {
         request: api::v1::region::QueryRequest,
         query_ctx: QueryContextRef,
     ) -> Result<SendableRecordBatchStream> {
-        let _permit = if let Some(p) = &self.inner.parallelism {
+        let permit = if let Some(p) = &self.inner.parallelism {
             Some(p.acquire().await?)
         } else {
             None
@@ -298,14 +299,13 @@ impl RegionServer {
             )
             .await?;
 
-        Ok(wrap_flow_region_watermark_stream(
-            stream, region_id, &query_ctx,
-        ))
+        let stream = wrap_flow_region_watermark_stream(stream, region_id, &query_ctx);
+        Ok(maybe_guard_stream(stream, permit))
     }
 
     #[tracing::instrument(skip_all)]
     pub async fn handle_read(&self, request: QueryRequest) -> Result<SendableRecordBatchStream> {
-        let _permit = if let Some(p) = &self.inner.parallelism {
+        let permit = if let Some(p) = &self.inner.parallelism {
             Some(p.acquire().await?)
         } else {
             None
@@ -332,9 +332,8 @@ impl RegionServer {
             .handle_read(QueryRequest { plan, ..request }, query_ctx.clone())
             .await?;
 
-        Ok(wrap_flow_region_watermark_stream(
-            stream, region_id, &query_ctx,
-        ))
+        let stream = wrap_flow_region_watermark_stream(stream, region_id, &query_ctx);
+        Ok(maybe_guard_stream(stream, permit))
     }
 
     /// Returns all opened and reportable regions.
@@ -1058,7 +1057,7 @@ struct RegionServerInner {
 }
 
 struct RegionServerParallelism {
-    semaphore: Semaphore,
+    semaphore: Arc<Semaphore>,
     timeout: Duration,
 }
 
@@ -1071,19 +1070,68 @@ impl RegionServerParallelism {
             return None;
         }
         Some(RegionServerParallelism {
-            semaphore: Semaphore::new(max_concurrent_queries),
+            semaphore: Arc::new(Semaphore::new(max_concurrent_queries)),
             timeout: concurrent_query_limiter_timeout,
         })
     }
 
-    pub async fn acquire(&self) -> Result<SemaphorePermit<'_>> {
-        timeout(self.timeout, self.semaphore.acquire())
+    pub async fn acquire(&self) -> Result<OwnedSemaphorePermit> {
+        timeout(self.timeout, self.semaphore.clone().acquire_owned())
             .await
             .context(ConcurrentQueryLimiterTimeoutSnafu)?
             .context(ConcurrentQueryLimiterClosedSnafu)
     }
 }
 
+/// Wraps a record batch stream and holds a concurrency permit until the stream is
+/// fully consumed (dropped), so `max_concurrent_queries` bounds the number of
+/// in-flight read streams, not just query planning.
+struct PermitGuardedStream {
+    inner: SendableRecordBatchStream,
+    _permit: OwnedSemaphorePermit,
+}
+
+impl RecordBatchStream for PermitGuardedStream {
+    fn name(&self) -> &str {
+        self.inner.name()
+    }
+
+    fn schema(&self) -> SchemaRef {
+        self.inner.schema()
+    }
+
+    fn output_ordering(&self) -> Option<&[OrderOption]> {
+        self.inner.output_ordering()
+    }
+
+    fn metrics(&self) -> Option<RecordBatchMetrics> {
+        self.inner.metrics()
+    }
+}
+
+impl Stream for PermitGuardedStream {
+    type Item = common_recordbatch::error::Result<RecordBatch>;
+
+    fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
+        self.inner.as_mut().poll_next(cx)
+    }
+}
+
+/// Wraps `stream` so it holds `permit` until fully consumed. Returns `stream`
+/// unchanged when no permit was acquired (limiter disabled).
+fn maybe_guard_stream(
+    stream: SendableRecordBatchStream,
+    permit: Option<OwnedSemaphorePermit>,
+) -> SendableRecordBatchStream {
+    match permit {
+        Some(permit) => Box::pin(PermitGuardedStream {
+            inner: stream,
+            _permit: permit,
+        }),
+        None => stream,
+    }
+}
+
 enum CurrentEngine {
     Engine(RegionEngineRef),
     EarlyReturn(AffectedRows),
@@ -2057,6 +2105,7 @@ mod tests {
                     options: Default::default(),
                     skip_wal_replay: false,
                     checkpoint: None,
+                    requirements: Default::default(),
                 }),
             )
             .await
@@ -2235,6 +2284,7 @@ mod tests {
                             options: Default::default(),
                             skip_wal_replay: false,
                             checkpoint: None,
+                            requirements: Default::default(),
                         },
                     ),
                     (
@@ -2246,6 +2296,7 @@ mod tests {
                             options: Default::default(),
                             skip_wal_replay: false,
                             checkpoint: None,
+                            requirements: Default::default(),
                         },
                     ),
                 ],
@@ -2268,6 +2319,7 @@ mod tests {
                             options: Default::default(),
                             skip_wal_replay: false,
                             checkpoint: None,
+                            requirements: Default::default(),
                         },
                     ),
                     (
@@ -2279,6 +2331,7 @@ mod tests {
                             options: Default::default(),
                             skip_wal_replay: false,
                             checkpoint: None,
+                            requirements: Default::default(),
                         },
                     ),
                 ],
diff --git a/src/datanode/src/utils.rs b/src/datanode/src/utils.rs
index c5cd008c28..816ae021ba 100644
--- a/src/datanode/src/utils.rs
+++ b/src/datanode/src/utils.rs
@@ -175,6 +175,7 @@ pub async fn build_region_open_requests(
                 options,
                 skip_wal_replay: false,
                 checkpoint,
+                requirements: Default::default(),
             },
         ));
     }
@@ -193,6 +194,7 @@ pub async fn build_region_open_requests(
                     options,
                     skip_wal_replay: true,
                     checkpoint: None,
+                    requirements: Default::default(),
                 },
             ));
         }
diff --git a/src/file-engine/src/engine.rs b/src/file-engine/src/engine.rs
index 175ebef237..2ddbb6c414 100644
--- a/src/file-engine/src/engine.rs
+++ b/src/file-engine/src/engine.rs
@@ -32,7 +32,7 @@ use store_api::region_engine::{
 };
 use store_api::region_request::{
     AffectedRows, RegionCloseRequest, RegionCreateRequest, RegionDropRequest, RegionOpenRequest,
-    RegionRequest,
+    RegionRequest, RegionRequirements,
 };
 use store_api::storage::{RegionId, ScanRequest, SequenceNumber};
 use tokio::sync::Mutex;
@@ -186,6 +186,24 @@ struct EngineInner {
 
 type EngineInnerRef = Arc<EngineInner>;
 
+fn ensure_open_requirements(
+    requirements: RegionRequirements,
+    object_store: &ObjectStore,
+) -> EngineResult<()> {
+    if !requirements.object_storage {
+        return Ok(());
+    }
+
+    ensure!(
+        object_store::util::is_object_storage(object_store),
+        UnsupportedSnafu {
+            operation: "open region with object storage requirement on non-object storage"
+        }
+    );
+
+    Ok(())
+}
+
 impl EngineInner {
     fn new(object_store: ObjectStore) -> Self {
         Self {
@@ -289,6 +307,8 @@ impl EngineInner {
             return Ok(0);
         }
 
+        ensure_open_requirements(request.requirements, &self.object_store)?;
+
         let res = FileRegion::open(region_id, request, &self.object_store).await;
         let region = res.inspect_err(|err| {
             error!(
@@ -356,3 +376,53 @@ impl EngineInner {
         self.regions.read().unwrap().contains_key(&region_id)
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use object_store::services::{Fs, S3};
+
+    use super::*;
+    use crate::error::Error;
+
+    fn build_fs_object_store() -> ObjectStore {
+        ObjectStore::new(Fs::default().root("/tmp"))
+            .unwrap()
+            .finish()
+    }
+
+    fn build_s3_object_store() -> ObjectStore {
+        ObjectStore::new(
+            S3::default()
+                .bucket("test-bucket")
+                .region("us-east-1")
+                .disable_ec2_metadata(),
+        )
+        .unwrap()
+        .finish()
+    }
+
+    #[test]
+    fn test_empty_open_requirements_are_supported() {
+        ensure_open_requirements(RegionRequirements::empty(), &build_fs_object_store()).unwrap();
+    }
+
+    #[test]
+    fn test_object_storage_open_requirement_rejects_fs_object_store() {
+        let err = ensure_open_requirements(
+            RegionRequirements::object_storage(),
+            &build_fs_object_store(),
+        )
+        .unwrap_err();
+
+        assert!(matches!(err, Error::Unsupported { .. }));
+    }
+
+    #[test]
+    fn test_object_storage_open_requirement_accepts_s3_object_store() {
+        ensure_open_requirements(
+            RegionRequirements::object_storage(),
+            &build_s3_object_store(),
+        )
+        .unwrap();
+    }
+}
diff --git a/src/file-engine/src/region.rs b/src/file-engine/src/region.rs
index 3808b33a67..aceec21aa5 100644
--- a/src/file-engine/src/region.rs
+++ b/src/file-engine/src/region.rs
@@ -181,6 +181,7 @@ mod tests {
             options: HashMap::default(),
             skip_wal_replay: false,
             checkpoint: None,
+            requirements: Default::default(),
         };
 
         let region = FileRegion::open(region_id, request, &object_store)
@@ -238,6 +239,7 @@ mod tests {
             options: HashMap::default(),
             skip_wal_replay: false,
             checkpoint: None,
+            requirements: Default::default(),
         };
         let err = FileRegion::open(region_id, request, &object_store)
             .await
diff --git a/src/flow/src/batching_mode/engine.rs b/src/flow/src/batching_mode/engine.rs
index 68fb3793e4..319ddcf2e7 100644
--- a/src/flow/src/batching_mode/engine.rs
+++ b/src/flow/src/batching_mode/engine.rs
@@ -630,8 +630,11 @@ impl BatchingEngine {
         let engine = self.query_engine.clone();
         let frontend = self.frontend_client.clone();
 
-        // check execute once first to detect any error early
+        // Create sink table if needed, then validate an existing/created sink schema before
+        // spawning the background task. This catches user-created sink schema mismatches at
+        // CREATE FLOW time instead of surfacing them later in the execution loop.
         task.check_or_create_sink_table(&engine, &frontend).await?;
+        task.validate_sink_table_schema(&engine).await?;
 
         let (start_tx, start_rx) = oneshot::channel();
 
diff --git a/src/flow/src/batching_mode/task.rs b/src/flow/src/batching_mode/task.rs
index cbd6a05cc2..3cd96b7525 100644
--- a/src/flow/src/batching_mode/task.rs
+++ b/src/flow/src/batching_mode/task.rs
@@ -265,6 +265,36 @@ impl BatchingTask {
         Ok(None)
     }
 
+    /// Validates that the sink table schema can accept this flow's output.
+    ///
+    /// This is a dry-run of the same schema matching logic used by runtime insert-plan
+    /// generation, but without adding dirty-window filters or executing the query. It is used
+    /// during CREATE FLOW to catch existing sink table mismatches early.
+    pub async fn validate_sink_table_schema(&self, engine: &QueryEngineRef) -> Result<(), Error> {
+        let (table, _) = get_table_info_df_schema(
+            self.config.catalog_manager.clone(),
+            self.config.sink_table_name.clone(),
+        )
+        .await?;
+
+        let table_meta = &table.table_info().meta;
+        let merge_mode_last_non_null =
+            is_merge_mode_last_non_null(&table_meta.options.extra_options);
+        let primary_key_indices = table_meta.primary_key_indices.clone();
+        let query_ctx = self.state.read().unwrap().query_ctx.clone();
+
+        gen_plan_with_matching_schema(
+            &self.config.query,
+            query_ctx,
+            engine.clone(),
+            table_meta.schema.clone(),
+            &primary_key_indices,
+            merge_mode_last_non_null,
+        )
+        .await
+        .map(|_| ())
+    }
+
     async fn is_table_exist(&self, table_name: &[String; 3]) -> Result<bool, Error> {
         self.config
             .catalog_manager
@@ -929,7 +959,7 @@ impl BatchingTask {
         let (expire_lower_bound, expire_upper_bound) =
             match (expire_time_window_bound, &self.config.query_type) {
                 (Some((Some(l), Some(u))), QueryType::Sql) => (l, u),
-                (None, QueryType::Sql) => {
+                (None, QueryType::Sql) if self.config.flow_eval_interval.is_none() => {
                     // if it's sql query and no time window lower/upper bound is found, just return the original query(with auto columns)
                     // use sink_table_meta to add to query the `update_at` and `__ts_placeholder` column's value too for compatibility reason
                     debug!(
@@ -950,7 +980,8 @@ impl BatchingTask {
                 }
                 _ => {
                     // Clean dirty windows for full-query/non-scoped paths,
-                    // such as TQL, that cannot use a time-window filter.
+                    // such as TQL or evaluation-interval SQL without a recognized
+                    // time-window expression, that cannot use a time-window filter.
                     let (_, dirty_windows_to_restore) = self.drain_dirty_windows_signal();
 
                     let plan_info = self
diff --git a/src/flow/src/batching_mode/task/test.rs b/src/flow/src/batching_mode/task/test.rs
index d64b4ef1b9..c42d564ce2 100644
--- a/src/flow/src/batching_mode/task/test.rs
+++ b/src/flow/src/batching_mode/task/test.rs
@@ -974,6 +974,38 @@ async fn test_non_scoped_path_generates_plan_with_empty_dirty_signal() {
     assert!(task.state.read().unwrap().dirty_time_windows.is_empty());
 }
 
+#[tokio::test]
+async fn test_no_time_window_sql_with_eval_interval_generates_plan_without_dirty_signal() {
+    let TestTaskParts {
+        mut task,
+        query_engine,
+        ..
+    } = new_test_task_engine_and_plan_with_query(
+        "SELECT number, ts FROM numbers_with_ts",
+        "missing_sink",
+    )
+    .await;
+    Arc::get_mut(&mut task.config)
+        .expect("test task config should be uniquely owned")
+        .flow_eval_interval = Some(Duration::from_secs(60));
+    task.state.write().unwrap().dirty_time_windows.clean();
+    let sink_schema = Arc::new(Schema::new(vec![
+        ColumnSchema::new("number", CDT::uint32_datatype(), false),
+        ColumnSchema::new("ts", CDT::timestamp_millisecond_datatype(), false).with_time_index(true),
+    ]));
+
+    let plan = task
+        .gen_query_with_time_window(query_engine, &sink_schema, &[], false, None)
+        .await
+        .unwrap()
+        .expect(
+            "eval-interval SQL without a time-window expr should run by interval, not dirty signal",
+        );
+
+    assert!(plan.can_advance_checkpoints);
+    assert!(task.state.read().unwrap().dirty_time_windows.is_empty());
+}
+
 #[tokio::test]
 async fn test_executed_query_failure_restores_scoped_dirty_windows_for_flush_path() {
     let (task, plan) = new_test_task_and_plan_with_missing_sink().await;
diff --git a/src/flow/src/batching_mode/utils.rs b/src/flow/src/batching_mode/utils.rs
index e86b1ee3be..5e033c6ae7 100644
--- a/src/flow/src/batching_mode/utils.rs
+++ b/src/flow/src/batching_mode/utils.rs
@@ -33,9 +33,10 @@ use datafusion_common::{
 };
 use datafusion_expr::logical_plan::{Aggregate, TableScan};
 use datafusion_expr::{
-    Distinct, JoinType, LogicalPlan, LogicalPlanBuilder, Operator, Projection, and, binary_expr,
-    bitwise_and, bitwise_or, bitwise_xor, is_null, or, when,
+    Distinct, ExprSchemable, JoinType, LogicalPlan, LogicalPlanBuilder, Operator, Projection, and,
+    binary_expr, bitwise_and, bitwise_or, bitwise_xor, is_null, or, when,
 };
+use datatypes::prelude::ConcreteDataType;
 use datatypes::schema::{ColumnSchema, SchemaRef};
 use query::QueryEngineRef;
 use query::parser::{DEFAULT_LOOKBACK_STRING, PromQuery, QueryLanguageParser, QueryStatement};
@@ -955,7 +956,7 @@ pub(crate) async fn gen_plan_with_matching_schema(
         .clone()
         .rewrite(&mut add_auto_column)
         .with_context(|_| DatafusionSnafu {
-            context: format!("Failed to rewrite plan:\n {}\n", plan),
+            context: "Failed to rewrite plan".to_string(),
         })?
         .data;
     Ok(plan)
@@ -1090,33 +1091,23 @@ impl ColumnMatcherRewriter {
     }
 
     /// modify the exprs in place so that it matches the schema and some auto columns are added
-    fn modify_project_exprs(&mut self, mut exprs: Vec<Expr>) -> DfResult<Vec<Expr>> {
+    fn modify_project_exprs(
+        &mut self,
+        mut exprs: Vec<Expr>,
+        input_schema: &DFSchema,
+    ) -> DfResult<Vec<Expr>> {
         if self.allow_partial {
             return self.modify_project_exprs_with_partial(exprs);
         }
 
+        let original_exprs = exprs.clone();
+
         let all_names = self
             .schema
             .column_schemas()
             .iter()
             .map(|c| c.name.clone())
             .collect::<BTreeSet<_>>();
-        // first match by position
-        for (idx, expr) in exprs.iter_mut().enumerate() {
-            if !all_names.contains(&expr.qualified_name().1)
-                && let Some(col_name) = self
-                    .schema
-                    .column_schemas()
-                    .get(idx)
-                    .map(|c| c.name.clone())
-            {
-                // if the data type mismatched, later check_execute will error out
-                // hence no need to check it here, beside, optimize pass might be able to cast it
-                // so checking here is not necessary
-                *expr = expr.clone().alias(col_name);
-            }
-        }
-
         // add columns if have different column count
         let query_col_cnt = exprs.len();
         let table_col_cnt = self.schema.column_schemas().len();
@@ -1140,10 +1131,9 @@ impl ColumnMatcherRewriter {
                 // is the update at column
                 exprs.push(datafusion::prelude::now().alias(&last_col_schema.name));
             } else {
-                // helpful error message
-                return Err(DataFusionError::Plan(format!(
-                    "Expect the last column in table to be timestamp column, found column {} with type {:?}",
-                    last_col_schema.name, last_col_schema.data_type
+                return Err(DataFusionError::Plan(format_flow_sink_schema_mismatch(
+                    &original_exprs,
+                    self.schema.as_ref(),
                 )));
             }
         } else if query_col_cnt + 2 == table_col_cnt {
@@ -1170,14 +1160,110 @@ impl ColumnMatcherRewriter {
                 )));
             }
         } else {
-            return Err(DataFusionError::Plan(format!(
-                "Expect table have 0,1 or 2 columns more than query columns, found {} query columns {:?}, {} table columns {:?}",
-                query_col_cnt,
-                exprs,
-                table_col_cnt,
-                self.schema.column_schemas()
+            return Err(DataFusionError::Plan(format_flow_sink_schema_mismatch(
+                &original_exprs,
+                self.schema.as_ref(),
             )));
         }
+
+        self.match_extra_output_columns(exprs, input_schema, &original_exprs, &all_names)
+    }
+
+    /// Match flow output columns whose names are not in the sink schema by the same position only.
+    ///
+    /// This keeps the legacy "omit output aliases and map by position" behavior, but only when the
+    /// sink column at the same index is actually missing from the flow output. If the extra output
+    /// would be aliased to a sink column that already exists elsewhere, report a schema mismatch
+    /// instead of guessing another sink column by type.
+    ///
+    /// In particular, this intentionally rejects cross-position remaps like
+    /// `record_time_window2 -> record_time_window`: they are easy to confuse with real schema
+    /// mismatches and should be fixed by giving the flow output the sink column name explicitly.
+    fn match_extra_output_columns(
+        &self,
+        mut exprs: Vec<Expr>,
+        input_schema: &DFSchema,
+        original_exprs: &[Expr],
+        all_names: &BTreeSet<String>,
+    ) -> DfResult<Vec<Expr>> {
+        let mut output_names = exprs
+            .iter()
+            .map(|expr| expr.qualified_name().1)
+            .collect::<Vec<_>>();
+        let output_name_set = output_names.iter().cloned().collect::<BTreeSet<_>>();
+        let extra_expr_indices = output_names
+            .iter()
+            .enumerate()
+            .filter_map(|(idx, name)| (!all_names.contains(name)).then_some(idx))
+            .collect::<Vec<_>>();
+        let missing_sink_indices = self
+            .schema
+            .column_schemas()
+            .iter()
+            .enumerate()
+            .filter_map(|(idx, column)| (!output_name_set.contains(&column.name)).then_some(idx))
+            .collect::<Vec<_>>();
+
+        if extra_expr_indices.is_empty() && missing_sink_indices.is_empty() {
+            return Ok(exprs);
+        }
+
+        if extra_expr_indices.len() != missing_sink_indices.len() {
+            return Err(DataFusionError::Plan(format_flow_sink_schema_mismatch(
+                original_exprs,
+                self.schema.as_ref(),
+            )));
+        }
+
+        let mut positional_matches = Vec::new();
+        for expr_idx in extra_expr_indices {
+            if !missing_sink_indices.contains(&expr_idx) {
+                return Err(DataFusionError::Plan(format_flow_sink_schema_mismatch(
+                    original_exprs,
+                    self.schema.as_ref(),
+                )));
+            }
+
+            let target_col_schema = &self.schema.column_schemas()[expr_idx];
+            let expr_type =
+                ConcreteDataType::from_arrow_type(&exprs[expr_idx].get_type(input_schema)?);
+            if is_obviously_incompatible_positional_match(&expr_type, &target_col_schema.data_type)
+            {
+                return Err(DataFusionError::Plan(format!(
+                    "Cannot match flow output column '{}' to sink column '{}' by position: incompatible data types, flow output type is {:?}, sink column type is {:?}. {}",
+                    output_names[expr_idx],
+                    target_col_schema.name,
+                    expr_type,
+                    target_col_schema.data_type,
+                    format_flow_sink_schema_mismatch(original_exprs, self.schema.as_ref())
+                )));
+            }
+
+            let target_name = target_col_schema.name.clone();
+            positional_matches.push(format!(
+                "{} -> {} (flow output type: {:?}, sink column type: {:?})",
+                output_names[expr_idx], target_name, expr_type, target_col_schema.data_type
+            ));
+            exprs[expr_idx] = exprs[expr_idx].clone().alias(target_name.clone());
+            output_names[expr_idx] = target_name;
+        }
+
+        if !positional_matches.is_empty() {
+            debug!(
+                "Matched flow output columns to sink columns by position: {:?}",
+                positional_matches
+            );
+        }
+
+        let duplicated_output_names = duplicate_names(&output_names);
+        if !duplicated_output_names.is_empty() {
+            return Err(DataFusionError::Plan(format!(
+                "Flow output schema contains duplicate column(s) after schema matching {:?}. {}",
+                duplicated_output_names,
+                format_flow_sink_schema_mismatch(&exprs, self.schema.as_ref())
+            )));
+        }
+
         Ok(exprs)
     }
 
@@ -1186,12 +1272,9 @@ impl ColumnMatcherRewriter {
         let query_col_cnt = exprs.len();
 
         if query_col_cnt > table_col_cnt {
-            return Err(DataFusionError::Plan(format!(
-                "Expect query column count <= table column count, found {} query columns {:?}, {} table columns {:?}",
-                query_col_cnt,
-                exprs,
-                table_col_cnt,
-                self.schema.column_schemas()
+            return Err(DataFusionError::Plan(format_flow_sink_schema_mismatch(
+                &exprs,
+                self.schema.as_ref(),
             )));
         }
 
@@ -1209,8 +1292,9 @@ impl ColumnMatcherRewriter {
             .collect();
         if !missing.is_empty() {
             return Err(DataFusionError::Plan(format!(
-                "Column(s) {:?} required by sink table are missing from flow output when merge_mode=last_non_null",
-                missing
+                "Column(s) {:?} required by sink table are missing from flow output when merge_mode=last_non_null. {}",
+                missing,
+                format_flow_sink_schema_mismatch(&exprs, self.schema.as_ref())
             )));
         }
 
@@ -1250,8 +1334,9 @@ impl ColumnMatcherRewriter {
         if !remap.is_empty() {
             let extra: Vec<_> = remap.keys().cloned().collect();
             return Err(DataFusionError::Plan(format!(
-                "Flow output has extra column(s) {:?} not found in sink schema when merge_mode=last_non_null",
-                extra
+                "Flow output has extra column(s) {:?} not found in sink schema when merge_mode=last_non_null. {}",
+                extra,
+                format_flow_sink_schema_mismatch(&exprs, self.schema.as_ref())
             )));
         }
 
@@ -1281,6 +1366,80 @@ impl ColumnMatcherRewriter {
     }
 }
 
+fn is_obviously_incompatible_positional_match(
+    expr_type: &ConcreteDataType,
+    sink_type: &ConcreteDataType,
+) -> bool {
+    // This is a coarse type-family guard for legacy positional aliasing, not a strict type equality
+    // check. For example, numeric width/sign differences are allowed here and left to downstream
+    // coercion, and untyped NULL can be coerced to any target type. Clearly different families such
+    // as timestamp vs string are rejected early.
+    if expr_type.is_null() || expr_type == sink_type {
+        return false;
+    }
+
+    expr_type.is_timestamp() != sink_type.is_timestamp()
+        || expr_type.is_string() != sink_type.is_string()
+        || expr_type.is_boolean() != sink_type.is_boolean()
+        || expr_type.is_json() != sink_type.is_json()
+        || expr_type.is_vector() != sink_type.is_vector()
+}
+
+fn duplicate_names(names: &[String]) -> Vec<String> {
+    let mut seen = HashSet::new();
+    let mut duplicated = BTreeSet::new();
+    for name in names {
+        if !seen.insert(name.as_str()) {
+            duplicated.insert(name.as_str());
+        }
+    }
+    duplicated.into_iter().map(str::to_string).collect()
+}
+
+fn format_flow_sink_schema_mismatch(
+    query_exprs: &[Expr],
+    sink_schema: &datatypes::schema::Schema,
+) -> String {
+    let flow_output_columns = query_exprs
+        .iter()
+        .map(|expr| expr.qualified_name().1)
+        .collect::<Vec<_>>();
+    let sink_table_columns = sink_schema
+        .column_schemas()
+        .iter()
+        .map(|col| col.name.clone())
+        .collect::<Vec<_>>();
+
+    let flow_output_set = flow_output_columns.iter().cloned().collect::<HashSet<_>>();
+    let sink_table_set = sink_table_columns.iter().cloned().collect::<HashSet<_>>();
+
+    let mut extra_flow_columns = flow_output_columns
+        .iter()
+        .filter(|name| !sink_table_set.contains(*name))
+        .cloned()
+        .collect::<Vec<_>>();
+    extra_flow_columns.sort();
+    extra_flow_columns.dedup();
+
+    let mut missing_sink_columns = sink_table_columns
+        .iter()
+        .filter(|name| !flow_output_set.contains(*name))
+        .cloned()
+        .collect::<Vec<_>>();
+    missing_sink_columns.sort();
+    missing_sink_columns.dedup();
+
+    format!(
+        "Flow output schema does not match sink table schema: found {} flow output columns and {} sink table columns. flow output columns: {:?}, sink table columns: {:?}, extra flow columns not in sink: {:?}, missing sink columns from flow output: {:?}",
+        flow_output_columns.len(),
+        sink_table_columns.len(),
+        flow_output_columns,
+        sink_table_columns,
+        extra_flow_columns,
+        missing_sink_columns
+    )
+}
+
 impl TreeNodeRewriter for ColumnMatcherRewriter {
     type Node = LogicalPlan;
     fn f_down(&mut self, mut node: Self::Node) -> DfResult<Transformed<Self::Node>> {
@@ -1327,7 +1486,7 @@ impl TreeNodeRewriter for ColumnMatcherRewriter {
         // if not, wrap it in a projection
         if let LogicalPlan::Projection(project) = &node {
             let exprs = project.expr.clone();
-            let exprs = self.modify_project_exprs(exprs)?;
+            let exprs = self.modify_project_exprs(exprs, project.input.schema())?;
 
             self.is_rewritten = true;
             let new_plan =
@@ -1341,7 +1500,7 @@ impl TreeNodeRewriter for ColumnMatcherRewriter {
                     field.name(),
                 )));
             }
-            let exprs = self.modify_project_exprs(exprs)?;
+            let exprs = self.modify_project_exprs(exprs, node.schema())?;
             self.is_rewritten = true;
             let new_plan =
                 LogicalPlan::Projection(Projection::try_new(exprs, Arc::new(node.clone()))?);
diff --git a/src/flow/src/batching_mode/utils/test.rs b/src/flow/src/batching_mode/utils/test.rs
index 317b0a5475..9ca1186fb6 100644
--- a/src/flow/src/batching_mode/utils/test.rs
+++ b/src/flow/src/batching_mode/utils/test.rs
@@ -14,6 +14,7 @@
 
 use std::sync::Arc;
 
+use catalog::RegisterTableRequest;
 use common_recordbatch::RecordBatch;
 use common_time::Timestamp;
 use datafusion_common::tree_node::TreeNode as _;
@@ -29,7 +30,9 @@ use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan};
 use table::test_util::MemTable;
 
 use super::*;
+use crate::batching_mode::BatchingModeOptions;
 use crate::batching_mode::state::FilterExprInfo;
+use crate::batching_mode::task::{BatchingTask, TaskArgs};
 use crate::test_utils::create_test_query_engine;
 
 fn u32_table(table_name: &str, columns: Vec<&str>, rows: usize) -> TableRef {
@@ -432,9 +435,7 @@ async fn test_add_auto_column_rewriter() {
         // error datatype mismatch
         (
             "SELECT number, ts FROM numbers_with_ts",
-            Err(
-                "Expect the last column in table to be timestamp column, found column atat with type Int8",
-            ),
+            Err("missing sink columns from flow output: [\"atat\"]"),
             vec![
                 ColumnSchema::new("number", ConcreteDataType::int32_datatype(), true),
                 ColumnSchema::new(
@@ -498,6 +499,383 @@ async fn test_add_auto_column_rewriter() {
     }
 }
 
+#[tokio::test]
+async fn test_gen_plan_with_matching_schema_reports_extra_flow_columns_before_positional_alias() {
+    let query_engine = create_test_query_engine();
+    let ctx = QueryContext::arc();
+    let sink_schema = Arc::new(Schema::new(vec![
+        ColumnSchema::new("number", ConcreteDataType::uint32_datatype(), true),
+        ColumnSchema::new(
+            "ts",
+            ConcreteDataType::timestamp_millisecond_datatype(),
+            false,
+        )
+        .with_time_index(true),
+        ColumnSchema::new(
+            "max(numbers_with_ts.number)",
+            ConcreteDataType::uint32_datatype(),
+            true,
+        ),
+    ]));
+
+    let err = gen_plan_with_matching_schema(
+        "SELECT number, number AS extra, ts, max(number) FROM numbers_with_ts GROUP BY number, ts",
+        ctx,
+        query_engine,
+        sink_schema,
+        &[],
+        false,
+    )
+    .await
+    .unwrap_err()
+    .to_string();
+
+    assert!(
+        err.contains("Flow output schema does not match sink table schema"),
+        "{err}"
+    );
+    assert!(err.contains("flow output columns"), "{err}");
+    assert!(err.contains("sink table columns"), "{err}");
+    assert!(err.contains("extra flow columns not in sink"), "{err}");
+    assert!(err.contains("extra"), "{err}");
+    assert!(
+        !err.contains("extra AS ts"),
+        "schema error should not primarily expose positional alias: {err}"
+    );
+}
+
+#[tokio::test]
+async fn test_gen_plan_with_matching_schema_rejects_positional_alias_type_mismatch() {
+    let query_engine = create_test_query_engine();
+    let ctx = QueryContext::arc();
+    let sink_schema = Arc::new(Schema::new(vec![
+        ColumnSchema::new("number", ConcreteDataType::uint32_datatype(), true),
+        ColumnSchema::new(
+            "event_time",
+            ConcreteDataType::timestamp_millisecond_datatype(),
+            false,
+        )
+        .with_time_index(true),
+        ColumnSchema::new(
+            "max(numbers_with_ts.number)",
+            ConcreteDataType::uint32_datatype(),
+            true,
+        ),
+    ]));
+
+    let err = gen_plan_with_matching_schema(
+        "SELECT number, number AS not_time, max(number) FROM numbers_with_ts GROUP BY number",
+        ctx,
+        query_engine,
+        sink_schema,
+        &[],
+        false,
+    )
+    .await
+    .unwrap_err()
+    .to_string();
+
+    assert!(
+        err.contains(
+            "Cannot match flow output column 'not_time' to sink column 'event_time' by position"
+        ),
+        "{err}"
+    );
+    assert!(err.contains("incompatible data types"), "{err}");
+    assert!(
+        !err.contains("not_time AS event_time"),
+        "schema error should not expose an incompatible positional alias: {err}"
+    );
+}
+
+#[tokio::test]
+async fn test_gen_plan_with_matching_schema_rejects_cross_position_extra_column_match() {
+    let query_engine = create_test_query_engine();
+    let ctx = QueryContext::arc();
+    let sink_schema = Arc::new(Schema::new(vec![
+        ColumnSchema::new("number", ConcreteDataType::uint32_datatype(), true),
+        ColumnSchema::new(
+            "time_window",
+            ConcreteDataType::timestamp_millisecond_datatype(),
+            false,
+        )
+        .with_time_index(true),
+        ColumnSchema::new(
+            "ts",
+            ConcreteDataType::timestamp_millisecond_datatype(),
+            true,
+        ),
+    ]));
+
+    let err = gen_plan_with_matching_schema(
+        "SELECT number, ts, date_bin('5 minutes', ts) AS time_window2 FROM numbers_with_ts GROUP BY number, ts, time_window2",
+        ctx,
+        query_engine,
+        sink_schema,
+        &[],
+        false,
+    )
+    .await
+    .unwrap_err()
+    .to_string();
+
+    assert!(
+        err.contains("Flow output schema does not match sink table schema"),
+        "{err}"
+    );
+    assert!(err.contains("time_window2"), "{err}");
+    assert!(err.contains("time_window"), "{err}");
+    assert!(!err.contains("DuplicateUnqualifiedField"), "{err}");
+}
+
+#[tokio::test]
+async fn test_gen_plan_with_matching_schema_accepts_out_of_order_matching_names() {
+    let query_engine = create_test_query_engine();
+    let ctx = QueryContext::arc();
+    let sink_schema = Arc::new(Schema::new(vec![
+        ColumnSchema::new("number", ConcreteDataType::uint32_datatype(), true),
+        ColumnSchema::new(
+            "time_window",
+            ConcreteDataType::timestamp_millisecond_datatype(),
+            false,
+        )
+        .with_time_index(true),
+        ColumnSchema::new(
+            "ts",
+            ConcreteDataType::timestamp_millisecond_datatype(),
+            true,
+        ),
+    ]));
+
+    let plan = gen_plan_with_matching_schema(
+        "SELECT number, ts, date_bin('5 minutes', ts) AS time_window FROM numbers_with_ts GROUP BY number, ts, time_window",
+        ctx,
+        query_engine,
+        sink_schema,
+        &[],
+        false,
+    )
+    .await
+    .unwrap();
+    let output_names = plan
+        .schema()
+        .fields()
+        .iter()
+        .map(|field| field.name().clone())
+        .collect::<Vec<_>>();
+
+    assert_eq!(
+        output_names,
+        vec![
+            "number".to_string(),
+            "ts".to_string(),
+            "time_window".to_string()
+        ]
+    );
+    assert!(duplicate_names(&output_names).is_empty());
+}
+
+#[tokio::test]
+async fn test_gen_plan_with_matching_schema_allows_numeric_positional_alias() {
+    let query_engine = create_test_query_engine();
+    let ctx = QueryContext::arc();
+    let sink_schema = Arc::new(Schema::new(vec![
+        ColumnSchema::new("renamed_number", ConcreteDataType::int64_datatype(), true),
+        ColumnSchema::new(
+            "ts",
+            ConcreteDataType::timestamp_millisecond_datatype(),
+            false,
+        )
+        .with_time_index(true),
+    ]));
+
+    let plan = gen_plan_with_matching_schema(
+        "SELECT number, ts FROM numbers_with_ts",
+        ctx,
+        query_engine,
+        sink_schema,
+        &[],
+        false,
+    )
+    .await
+    .unwrap();
+    let sql = df_plan_to_sql(&plan).unwrap();
+
+    assert_eq!(
+        "SELECT numbers_with_ts.number AS renamed_number, numbers_with_ts.ts FROM numbers_with_ts",
+        sql
+    );
+}
+
+#[tokio::test]
+async fn test_gen_plan_with_matching_schema_allows_null_positional_alias() {
+    let query_engine = create_test_query_engine();
+    let ctx = QueryContext::arc();
+    let sink_schema = Arc::new(Schema::new(vec![
+        ColumnSchema::new("number", ConcreteDataType::uint32_datatype(), true),
+        ColumnSchema::new("label", ConcreteDataType::string_datatype(), true),
+    ]));
+
+    let plan = gen_plan_with_matching_schema(
+        "SELECT number, NULL AS label_placeholder FROM numbers_with_ts",
+        ctx,
+        query_engine,
+        sink_schema,
+        &[],
+        false,
+    )
+    .await
+    .unwrap();
+    let output_names = plan
+        .schema()
+        .fields()
+        .iter()
+        .map(|field| field.name().clone())
+        .collect::<Vec<_>>();
+    let sql = df_plan_to_sql(&plan).unwrap();
+
+    assert_eq!(
+        output_names,
+        vec!["number".to_string(), "label".to_string()]
+    );
+    assert!(sql.contains("NULL AS label"), "{sql}");
+}
+
+#[tokio::test]
+async fn test_gen_plan_with_matching_schema_accepts_matching_flow_schema() {
+    let query_engine = create_test_query_engine();
+    let ctx = QueryContext::arc();
+    let sink_schema = Arc::new(Schema::new(vec![
+        ColumnSchema::new("number", ConcreteDataType::uint32_datatype(), true),
+        ColumnSchema::new("extra", ConcreteDataType::uint32_datatype(), true),
+        ColumnSchema::new(
+            "ts",
+            ConcreteDataType::timestamp_millisecond_datatype(),
+            false,
+        )
+        .with_time_index(true),
+        ColumnSchema::new(
+            "max(numbers_with_ts.number)",
+            ConcreteDataType::uint32_datatype(),
+            true,
+        ),
+    ]));
+
+    let plan = gen_plan_with_matching_schema(
+        "SELECT number, number AS extra, ts, max(number) FROM numbers_with_ts GROUP BY number, ts",
+        ctx,
+        query_engine,
+        sink_schema,
+        &[],
+        false,
+    )
+    .await
+    .unwrap();
+    let sql = df_plan_to_sql(&plan).unwrap();
+
+    assert_eq!(
+        "SELECT numbers_with_ts.number, numbers_with_ts.number AS extra, numbers_with_ts.ts, max(numbers_with_ts.number) FROM numbers_with_ts GROUP BY numbers_with_ts.number, numbers_with_ts.ts",
+        sql
+    );
+}
+
+#[tokio::test]
+async fn test_validate_sink_table_schema_rejects_existing_sink_missing_flow_column() {
+    let query_engine = create_test_query_engine();
+    let query_ctx = QueryContext::arc();
+    let sql = "SELECT number, number AS extra, max(number) FROM numbers_with_ts GROUP BY number";
+    let plan = sql_to_df_plan(query_ctx.clone(), query_engine.clone(), sql, true)
+        .await
+        .unwrap();
+
+    let catalog_manager = catalog::memory::new_memory_catalog_manager().unwrap();
+    let sink_table_name = [
+        "greptime".to_string(),
+        "public".to_string(),
+        "existing_sink".to_string(),
+    ];
+    let sink_table = u32_table(
+        "existing_sink",
+        vec!["number", "max(numbers_with_ts.number)"],
+        0,
+    );
+    catalog_manager
+        .register_table_sync(RegisterTableRequest {
+            catalog: sink_table_name[0].clone(),
+            schema: sink_table_name[1].clone(),
+            table_name: sink_table_name[2].clone(),
+            table_id: 4096,
+            table: sink_table,
+        })
+        .unwrap();
+
+    let (_shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel();
+    let task = BatchingTask::try_new(TaskArgs {
+        flow_id: 1,
+        query: sql,
+        plan,
+        time_window_expr: None,
+        expire_after: None,
+        sink_table_name,
+        source_table_names: vec![[
+            "greptime".to_string(),
+            "public".to_string(),
+            "numbers_with_ts".to_string(),
+        ]],
+        query_ctx,
+        catalog_manager,
+        shutdown_rx,
+        batch_opts: Arc::new(BatchingModeOptions::default()),
+        flow_eval_interval: None,
+    })
+    .unwrap();
+
+    let err = task
+        .validate_sink_table_schema(&query_engine)
+        .await
+        .unwrap_err()
+        .to_string();
+
+    assert!(
+        err.contains("Flow output schema does not match sink table schema"),
+        "{err}"
+    );
+    assert!(err.contains("extra"), "{err}");
+}
+
+#[tokio::test]
+async fn test_gen_plan_with_matching_schema_allow_partial_fills_nullable_columns() {
+    let query_engine = create_test_query_engine();
+    let ctx = QueryContext::arc();
+    let sink_schema = Arc::new(Schema::new(vec![
+        ColumnSchema::new("number", ConcreteDataType::uint32_datatype(), false),
+        ColumnSchema::new(
+            "ts",
+            ConcreteDataType::timestamp_millisecond_datatype(),
+            false,
+        )
+        .with_time_index(true),
+        ColumnSchema::new("optional_value", ConcreteDataType::uint32_datatype(), true),
+    ]));
+
+    let plan = gen_plan_with_matching_schema(
+        "SELECT number, ts FROM numbers_with_ts",
+        ctx,
+        query_engine,
+        sink_schema,
+        &[0],
+        true,
+    )
+    .await
+    .unwrap();
+    let sql = df_plan_to_sql(&plan).unwrap();
+
+    assert_eq!(
+        "SELECT numbers_with_ts.number, numbers_with_ts.ts, NULL AS optional_value FROM numbers_with_ts",
+        sql
+    );
+}
+
 #[tokio::test]
 async fn test_find_group_by_exprs() {
     let testcases = vec![
@@ -1491,3 +1869,118 @@ async fn test_analyze_incremental_aggregate_plan_rejects_cast_wrapped_alias() {
         );
     }
 }
+
+#[tokio::test]
+async fn test_gen_plan_with_matching_schema_last_non_null_rejects_missing_primary_key_column() {
+    let query_engine = create_test_query_engine();
+    let ctx = QueryContext::arc();
+    // Sink table with primary_key_indices=[0] ("number"), time_index="ts", and merge_mode=last_non_null.
+    // The flow query omits "number", which is a required primary-key column.
+    let sink_schema = Arc::new(Schema::new(vec![
+        ColumnSchema::new("number", ConcreteDataType::uint32_datatype(), true),
+        ColumnSchema::new(
+            "ts",
+            ConcreteDataType::timestamp_millisecond_datatype(),
+            false,
+        )
+        .with_time_index(true),
+        ColumnSchema::new("optional_value", ConcreteDataType::uint32_datatype(), true),
+    ]));
+
+    let err = gen_plan_with_matching_schema(
+        "SELECT ts FROM numbers_with_ts",
+        ctx,
+        query_engine,
+        sink_schema,
+        &[0],
+        true,
+    )
+    .await
+    .unwrap_err()
+    .to_string();
+
+    assert!(
+        err.contains(
+            "required by sink table are missing from flow output when merge_mode=last_non_null"
+        ),
+        "{err}"
+    );
+    assert!(err.contains("number"), "{err}");
+}
+
+#[tokio::test]
+async fn test_gen_plan_with_matching_schema_last_non_null_rejects_missing_time_index_column() {
+    let query_engine = create_test_query_engine();
+    let ctx = QueryContext::arc();
+    // Sink table with primary_key_indices=[0] ("number"), time_index="ts", and merge_mode=last_non_null.
+    // The flow query omits "ts", which is a required time-index column.
+    let sink_schema = Arc::new(Schema::new(vec![
+        ColumnSchema::new("number", ConcreteDataType::uint32_datatype(), true),
+        ColumnSchema::new(
+            "ts",
+            ConcreteDataType::timestamp_millisecond_datatype(),
+            false,
+        )
+        .with_time_index(true),
+        ColumnSchema::new("optional_value", ConcreteDataType::uint32_datatype(), true),
+    ]));
+
+    let err = gen_plan_with_matching_schema(
+        "SELECT number FROM numbers_with_ts",
+        ctx,
+        query_engine,
+        sink_schema,
+        &[0],
+        true,
+    )
+    .await
+    .unwrap_err()
+    .to_string();
+
+    assert!(
+        err.contains(
+            "required by sink table are missing from flow output when merge_mode=last_non_null"
+        ),
+        "{err}"
+    );
+    assert!(err.contains("ts"), "{err}");
+}
+
+#[tokio::test]
+async fn test_gen_plan_with_matching_schema_last_non_null_rejects_extra_flow_column() {
+    let query_engine = create_test_query_engine();
+    let ctx = QueryContext::arc();
+    // Sink table with merge_mode=last_non_null.
+    // Sink has 3 columns: number (pk), ts (time_index), optional_value (nullable).
+    // Flow outputs: number, number AS extra, ts → "extra" is not in sink schema.
+    // query_col_cnt(3) <= table_col_cnt(3), so the extra branch is reached.
+    let sink_schema = Arc::new(Schema::new(vec![
+        ColumnSchema::new("number", ConcreteDataType::uint32_datatype(), true),
+        ColumnSchema::new(
+            "ts",
+            ConcreteDataType::timestamp_millisecond_datatype(),
+            false,
+        )
+        .with_time_index(true),
+        ColumnSchema::new("optional_value", ConcreteDataType::uint32_datatype(), true),
+    ]));
+
+    let err = gen_plan_with_matching_schema(
+        "SELECT number, number AS extra, ts FROM numbers_with_ts",
+        ctx,
+        query_engine,
+        sink_schema,
+        &[0],
+        true,
+    )
+    .await
+    .unwrap_err()
+    .to_string();
+
+    assert!(err.contains("extra column(s)"), "{err}");
+    assert!(err.contains("extra"), "{err}");
+    assert!(
+        err.contains("Flow output schema does not match sink table schema"),
+        "{err}"
+    );
+}
diff --git a/src/frontend/src/server.rs b/src/frontend/src/server.rs
index e66ae718ba..6b120ccba5 100644
--- a/src/frontend/src/server.rs
+++ b/src/frontend/src/server.rs
@@ -288,7 +288,6 @@ where
 
         let http_server = builder
             .with_metrics_handler(MetricsHandler)
-            .with_plugins(self.plugins.clone())
             .with_greptime_config_options(toml)
             .build();
         Ok(http_server)
diff --git a/src/meta-client/src/client.rs b/src/meta-client/src/client.rs
index cbd9b43151..de41caf19e 100644
--- a/src/meta-client/src/client.rs
+++ b/src/meta-client/src/client.rs
@@ -1344,7 +1344,7 @@ mod tests {
 
         // Generates rough 10MB data, which is larger than the default grpc message size limit.
         for i in 0..10 {
-            let data: Vec<u8> = (0..1024 * 1024).map(|_| rng.random()).collect();
+            let data: Vec<u8> = (0..1024 * 1024).map(|_| rng.random::<u8>()).collect();
             in_memory
                 .put(
                     PutRequest::new()
diff --git a/src/meta-srv/src/procedure/region_migration/open_candidate_region.rs b/src/meta-srv/src/procedure/region_migration/open_candidate_region.rs
index 0c0e5de5d7..792c66bdc9 100644
--- a/src/meta-srv/src/procedure/region_migration/open_candidate_region.rs
+++ b/src/meta-srv/src/procedure/region_migration/open_candidate_region.rs
@@ -18,7 +18,9 @@ use std::ops::Div;
 use api::v1::meta::MailboxMessage;
 use common_meta::RegionIdent;
 use common_meta::distributed_time_constants::default_distributed_time_constants;
-use common_meta::instruction::{Instruction, InstructionReply, OpenRegion, SimpleReply};
+use common_meta::instruction::{
+    Instruction, InstructionReply, OpenRegion, OpenRegionReason, SimpleReply,
+};
 use common_meta::key::datanode_table::RegionInfo;
 use common_procedure::{Context as ProcedureContext, Status};
 use common_telemetry::info;
@@ -26,12 +28,13 @@ use common_telemetry::tracing_context::TracingContext;
 use serde::{Deserialize, Serialize};
 use snafu::{OptionExt, ResultExt};
 use store_api::region_engine::RegionRole;
+use store_api::region_request::RegionRequirements;
 use tokio::time::Instant;
 
 use crate::error::{self, Result};
 use crate::handler::HeartbeatMailbox;
 use crate::procedure::region_migration::flush_leader_region::PreFlushRegion;
-use crate::procedure::region_migration::{Context, State};
+use crate::procedure::region_migration::{Context, RegionMigrationTriggerReason, State};
 use crate::service::mailbox::Channel;
 
 #[derive(Debug, Serialize, Deserialize)]
@@ -67,6 +70,10 @@ impl OpenCandidateRegion {
         let region_ids = ctx.persistent_ctx.region_ids.clone();
         let from_peer_id = ctx.persistent_ctx.from_peer.id;
         let to_peer_id = ctx.persistent_ctx.to_peer.id;
+        let reason = match ctx.persistent_ctx.trigger_reason {
+            RegionMigrationTriggerReason::Failover => OpenRegionReason::RegionFailover,
+            _ => OpenRegionReason::RegionMigration,
+        };
         let datanode_table_values = ctx.get_from_peer_datanode_table_values().await?;
         let mut open_regions = Vec::with_capacity(region_ids.len());
 
@@ -97,6 +104,8 @@ impl OpenCandidateRegion {
                 region_options,
                 region_wal_options,
                 true,
+                Some(reason),
+                RegionRequirements::object_storage(),
             ));
         }
 
@@ -233,18 +242,20 @@ mod tests {
     }
 
     fn new_mock_open_instruction(datanode_id: DatanodeId, region_id: RegionId) -> Instruction {
-        Instruction::OpenRegions(vec![OpenRegion {
-            region_ident: RegionIdent {
+        Instruction::OpenRegions(vec![OpenRegion::new(
+            RegionIdent {
                 datanode_id,
                 table_id: region_id.table_id(),
                 region_number: region_id.region_number(),
                 engine: MITO2_ENGINE.to_string(),
             },
-            region_storage_path: "/bar/foo/region/".to_string(),
-            region_options: Default::default(),
-            region_wal_options: Default::default(),
-            skip_wal_replay: true,
-        }])
+            "/bar/foo/region/",
+            Default::default(),
+            Default::default(),
+            true,
+            Some(OpenRegionReason::RegionMigration),
+            RegionRequirements::object_storage(),
+        )])
     }
 
     #[tokio::test]
@@ -263,6 +274,57 @@ mod tests {
         assert!(!err.is_retryable());
     }
 
+    #[tokio::test]
+    async fn test_build_open_region_instruction_reason() {
+        let state = OpenCandidateRegion;
+        let mut persistent_context = new_persistent_context();
+        let from_peer_id = persistent_context.from_peer.id;
+        let region_id = persistent_context.region_ids[0];
+        let env = TestingEnv::new();
+
+        let table_info = new_test_table_info(1024);
+        let region_routes = vec![RegionRoute {
+            region: Region::new_test(region_id),
+            leader_peer: Some(Peer::empty(from_peer_id)),
+            ..Default::default()
+        }];
+        env.table_metadata_manager()
+            .create_table_metadata(
+                table_info,
+                TableRouteValue::physical(region_routes),
+                HashMap::default(),
+            )
+            .await
+            .unwrap();
+
+        let mut ctx = env
+            .context_factory()
+            .new_context(persistent_context.clone());
+        let instruction = state.build_open_region_instruction(&mut ctx).await.unwrap();
+        let open_regions = instruction.into_open_regions().unwrap();
+        assert_eq!(
+            Some(OpenRegionReason::RegionMigration),
+            open_regions[0].reason
+        );
+        assert_eq!(
+            RegionRequirements::object_storage(),
+            open_regions[0].requirements
+        );
+
+        persistent_context.trigger_reason = RegionMigrationTriggerReason::Failover;
+        let mut ctx = env.context_factory().new_context(persistent_context);
+        let instruction = state.build_open_region_instruction(&mut ctx).await.unwrap();
+        let open_regions = instruction.into_open_regions().unwrap();
+        assert_eq!(
+            Some(OpenRegionReason::RegionFailover),
+            open_regions[0].reason
+        );
+        assert_eq!(
+            RegionRequirements::object_storage(),
+            open_regions[0].requirements
+        );
+    }
+
     #[tokio::test]
     async fn test_datanode_is_unreachable() {
         let state = OpenCandidateRegion;
diff --git a/src/metric-engine/src/engine.rs b/src/metric-engine/src/engine.rs
index ef4d802cfc..fa9ef804cc 100644
--- a/src/metric-engine/src/engine.rs
+++ b/src/metric-engine/src/engine.rs
@@ -620,6 +620,7 @@ mod test {
             options: physical_region_option,
             skip_wal_replay: false,
             checkpoint: None,
+            requirements: Default::default(),
         };
         engine
             .handle_request(physical_region_id, RegionRequest::Open(open_request))
@@ -644,6 +645,7 @@ mod test {
             options: HashMap::new(),
             skip_wal_replay: false,
             checkpoint: None,
+            requirements: Default::default(),
         };
         engine
             .handle_request(
@@ -721,6 +723,7 @@ mod test {
             options: physical_region_option,
             skip_wal_replay: false,
             checkpoint: None,
+            requirements: Default::default(),
         };
         // Opening an already opened region should succeed.
         // Since the region is already open, no metadata recovery operations will be performed.
@@ -749,6 +752,7 @@ mod test {
             options: physical_region_option,
             skip_wal_replay: false,
             checkpoint: None,
+            requirements: Default::default(),
         };
         let err = metric_engine
             .handle_request(physical_region_id, RegionRequest::Open(open_request))
@@ -854,6 +858,7 @@ mod test {
                         options: options.clone(),
                         skip_wal_replay: true,
                         checkpoint: None,
+                        requirements: Default::default(),
                     },
                 )
             })
diff --git a/src/metric-engine/src/engine/open.rs b/src/metric-engine/src/engine/open.rs
index 59b1cfd928..8fcdfcd821 100644
--- a/src/metric-engine/src/engine/open.rs
+++ b/src/metric-engine/src/engine/open.rs
@@ -222,6 +222,7 @@ impl MetricEngineInner {
                 entry_id: checkpoint.metadata_entry_id.unwrap_or_default(),
                 metadata_entry_id: None,
             }),
+            requirements: request.requirements,
         };
 
         let mut data_region_options = request.options;
@@ -239,6 +240,7 @@ impl MetricEngineInner {
                 entry_id: checkpoint.entry_id,
                 metadata_entry_id: None,
             }),
+            requirements: request.requirements,
         };
 
         (open_metadata_region_request, open_data_region_request)
diff --git a/src/metric-engine/src/engine/sync/region.rs b/src/metric-engine/src/engine/sync/region.rs
index cbe6515a19..d1f92bef64 100644
--- a/src/metric-engine/src/engine/sync/region.rs
+++ b/src/metric-engine/src/engine/sync/region.rs
@@ -321,6 +321,7 @@ mod tests {
                     options: physical_region_option,
                     skip_wal_replay: false,
                     checkpoint: None,
+                    requirements: Default::default(),
                 }),
             )
             .await
diff --git a/src/metric-engine/src/test_util.rs b/src/metric-engine/src/test_util.rs
index ec55a01903..8d4a822b6b 100644
--- a/src/metric-engine/src/test_util.rs
+++ b/src/metric-engine/src/test_util.rs
@@ -144,6 +144,7 @@ impl TestEnv {
                     options: physical_region_option,
                     skip_wal_replay: true,
                     checkpoint: None,
+                    requirements: Default::default(),
                 }),
             )
             .await
diff --git a/src/mito2/Cargo.toml b/src/mito2/Cargo.toml
index 99e3439879..ea281f2c32 100644
--- a/src/mito2/Cargo.toml
+++ b/src/mito2/Cargo.toml
@@ -8,6 +8,7 @@ license.workspace = true
 default = []
 test = ["common-test-util", "rstest", "rstest_reuse", "rskafka"]
 testing = ["test"]
+test-shared-fs-region-migration = []
 enterprise = []
 vector_index = ["dep:roaring", "index/vector_index"]
 
diff --git a/src/mito2/src/engine/alter_test.rs b/src/mito2/src/engine/alter_test.rs
index a7798a7678..b43f057ea6 100644
--- a/src/mito2/src/engine/alter_test.rs
+++ b/src/mito2/src/engine/alter_test.rs
@@ -277,6 +277,7 @@ async fn test_alter_region_with_format(flat_format: bool) {
                 options: HashMap::default(),
                 skip_wal_replay: false,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
@@ -481,6 +482,7 @@ async fn test_put_after_alter_with_format(flat_format: bool) {
                 options: HashMap::default(),
                 skip_wal_replay: false,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
@@ -844,6 +846,7 @@ async fn test_alter_column_fulltext_options_with_format(flat_format: bool) {
                 options: HashMap::default(),
                 skip_wal_replay: false,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
@@ -979,6 +982,7 @@ async fn test_alter_column_set_inverted_index_with_format(flat_format: bool) {
                 options: HashMap::default(),
                 skip_wal_replay: false,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
@@ -1248,6 +1252,7 @@ async fn test_alter_region_sst_format_with_flush() {
                 options: HashMap::default(),
                 skip_wal_replay: false,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
@@ -1366,6 +1371,7 @@ async fn test_alter_region_sst_format_without_flush() {
                 options: HashMap::default(),
                 skip_wal_replay: false,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
@@ -1492,6 +1498,7 @@ async fn test_alter_region_sst_format_flat_to_pk_with_flush() {
                 options: HashMap::default(),
                 skip_wal_replay: false,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
@@ -1610,6 +1617,7 @@ async fn test_alter_region_sst_format_flat_to_pk_without_flush() {
                 options: HashMap::default(),
                 skip_wal_replay: false,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
@@ -1725,6 +1733,7 @@ async fn test_alter_region_append_mode_with_flush() {
                 options: HashMap::default(),
                 skip_wal_replay: false,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
@@ -1843,6 +1852,7 @@ async fn test_alter_region_append_mode_without_flush() {
                 options: HashMap::default(),
                 skip_wal_replay: false,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
diff --git a/src/mito2/src/engine/append_mode_test.rs b/src/mito2/src/engine/append_mode_test.rs
index de58e04e46..188e28ccf5 100644
--- a/src/mito2/src/engine/append_mode_test.rs
+++ b/src/mito2/src/engine/append_mode_test.rs
@@ -348,6 +348,7 @@ async fn test_alter_append_mode_clears_merge_mode_with_format(flat_format: bool)
                 options,
                 skip_wal_replay: false,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
diff --git a/src/mito2/src/engine/basic_test.rs b/src/mito2/src/engine/basic_test.rs
index e1e462f692..0cc122573e 100644
--- a/src/mito2/src/engine/basic_test.rs
+++ b/src/mito2/src/engine/basic_test.rs
@@ -196,6 +196,7 @@ async fn test_region_replay_with_format(factory: Option<LogStoreFactory>, flat_f
                 options,
                 skip_wal_replay: false,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
diff --git a/src/mito2/src/engine/batch_catchup_test.rs b/src/mito2/src/engine/batch_catchup_test.rs
index dc0b552adc..a3808b1999 100644
--- a/src/mito2/src/engine/batch_catchup_test.rs
+++ b/src/mito2/src/engine/batch_catchup_test.rs
@@ -160,6 +160,7 @@ async fn test_batch_catchup_with_format(factory: Option<LogStoreFactory>, flat_f
                     skip_wal_replay: true,
                     path_type: PathType::Bare,
                     checkpoint: None,
+                    requirements: Default::default(),
                 },
             )
         })
diff --git a/src/mito2/src/engine/batch_open_test.rs b/src/mito2/src/engine/batch_open_test.rs
index 6b16b3c120..2522cf2f84 100644
--- a/src/mito2/src/engine/batch_open_test.rs
+++ b/src/mito2/src/engine/batch_open_test.rs
@@ -136,6 +136,7 @@ async fn test_batch_open_with_format(factory: Option<LogStoreFactory>, flat_form
                     skip_wal_replay: false,
                     path_type: PathType::Bare,
                     checkpoint: None,
+                    requirements: Default::default(),
                 },
             )
         })
@@ -149,6 +150,7 @@ async fn test_batch_open_with_format(factory: Option<LogStoreFactory>, flat_form
             skip_wal_replay: false,
             path_type: PathType::Bare,
             checkpoint: None,
+            requirements: Default::default(),
         },
     ));
 
@@ -221,6 +223,7 @@ async fn test_batch_open_err_with_format(factory: Option<LogStoreFactory>, flat_
                     skip_wal_replay: false,
                     path_type: PathType::Bare,
                     checkpoint: None,
+                    requirements: Default::default(),
                 },
             )
         })
diff --git a/src/mito2/src/engine/bump_committed_sequence_test.rs b/src/mito2/src/engine/bump_committed_sequence_test.rs
index 12db0044c5..23a5af8865 100644
--- a/src/mito2/src/engine/bump_committed_sequence_test.rs
+++ b/src/mito2/src/engine/bump_committed_sequence_test.rs
@@ -112,6 +112,7 @@ async fn test_bump_committed_sequence_with_format(flat_format: bool) {
                 options: HashMap::default(),
                 skip_wal_replay: false,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
@@ -151,6 +152,7 @@ async fn test_bump_committed_sequence_with_format(flat_format: bool) {
                 options: HashMap::default(),
                 skip_wal_replay: false,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
diff --git a/src/mito2/src/engine/catchup_test.rs b/src/mito2/src/engine/catchup_test.rs
index e10e91b51b..b79a2b0625 100644
--- a/src/mito2/src/engine/catchup_test.rs
+++ b/src/mito2/src/engine/catchup_test.rs
@@ -97,6 +97,7 @@ async fn test_catchup_with_last_entry_id(factory: Option<LogStoreFactory>) {
                 options,
                 skip_wal_replay: false,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
@@ -218,6 +219,7 @@ async fn test_catchup_with_incorrect_last_entry_id(factory: Option<LogStoreFacto
                 options,
                 skip_wal_replay: false,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
@@ -321,6 +323,7 @@ async fn test_catchup_without_last_entry_id(factory: Option<LogStoreFactory>) {
                 options,
                 skip_wal_replay: false,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
@@ -423,6 +426,7 @@ async fn test_catchup_with_manifest_update(factory: Option<LogStoreFactory>) {
                 options,
                 skip_wal_replay: false,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
@@ -527,6 +531,7 @@ async fn open_region(
                 skip_wal_replay,
                 path_type: PathType::Bare,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
@@ -622,6 +627,7 @@ async fn test_local_catchup(factory: Option<LogStoreFactory>) {
                 skip_wal_replay: true,
                 path_type: PathType::Bare,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
diff --git a/src/mito2/src/engine/compaction_test.rs b/src/mito2/src/engine/compaction_test.rs
index f76e9f8bf9..fd0982b7e5 100644
--- a/src/mito2/src/engine/compaction_test.rs
+++ b/src/mito2/src/engine/compaction_test.rs
@@ -1023,6 +1023,7 @@ async fn test_change_region_compaction_window_with_format(flat_format: bool) {
                 options: Default::default(),
                 skip_wal_replay: false,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
@@ -1125,6 +1126,7 @@ async fn test_open_overwrite_compaction_window_with_format(flat_format: bool) {
                 options,
                 skip_wal_replay: false,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
diff --git a/src/mito2/src/engine/open_test.rs b/src/mito2/src/engine/open_test.rs
index 28ad1de71e..11279954a9 100644
--- a/src/mito2/src/engine/open_test.rs
+++ b/src/mito2/src/engine/open_test.rs
@@ -64,6 +64,7 @@ async fn test_engine_open_empty_with_format(flat_format: bool) {
                 options: HashMap::default(),
                 skip_wal_replay: false,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
@@ -110,6 +111,7 @@ async fn test_engine_open_existing_with_format(flat_format: bool) {
                 options: HashMap::default(),
                 skip_wal_replay: false,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
@@ -237,6 +239,7 @@ async fn test_engine_region_open_with_options_with_format(flat_format: bool) {
                 options: HashMap::from([("ttl".to_string(), "4d".to_string())]),
                 skip_wal_replay: false,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
@@ -297,6 +300,7 @@ async fn test_engine_region_open_with_custom_store_with_format(flat_format: bool
                 options: HashMap::from([("storage".to_string(), "Gcs".to_string())]),
                 skip_wal_replay: false,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
@@ -392,6 +396,7 @@ async fn test_open_region_skip_wal_replay_with_format(flat_format: bool) {
                 options: Default::default(),
                 skip_wal_replay: true,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
@@ -431,6 +436,7 @@ async fn test_open_region_skip_wal_replay_with_format(flat_format: bool) {
                 options: Default::default(),
                 skip_wal_replay: false,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
@@ -484,6 +490,7 @@ async fn test_open_region_wait_for_opening_region_ok_with_format(flat_format: bo
                     options: HashMap::default(),
                     skip_wal_replay: false,
                     checkpoint: None,
+                    requirements: Default::default(),
                 }),
             )
             .await
@@ -535,6 +542,7 @@ async fn test_open_region_wait_for_opening_region_err_with_format(flat_format: b
                     options: HashMap::default(),
                     skip_wal_replay: false,
                     checkpoint: None,
+                    requirements: Default::default(),
                 }),
             )
             .await
@@ -691,6 +699,7 @@ async fn test_open_backfills_partition_expr_with_fetcher() {
                 options: HashMap::default(),
                 skip_wal_replay: false,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
@@ -725,6 +734,7 @@ async fn test_open_backfills_partition_expr_with_fetcher() {
                 options: HashMap::default(),
                 skip_wal_replay: false,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
@@ -766,6 +776,7 @@ async fn test_open_keeps_none_without_fetcher() {
                 options: HashMap::default(),
                 skip_wal_replay: false,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
diff --git a/src/mito2/src/engine/parallel_test.rs b/src/mito2/src/engine/parallel_test.rs
index b88a60739b..5a1354ec15 100644
--- a/src/mito2/src/engine/parallel_test.rs
+++ b/src/mito2/src/engine/parallel_test.rs
@@ -52,6 +52,7 @@ async fn scan_in_parallel(
                 skip_wal_replay: false,
                 path_type: PathType::Bare,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
diff --git a/src/mito2/src/engine/skip_wal_test.rs b/src/mito2/src/engine/skip_wal_test.rs
index 97f159b8ac..3b6cf89f07 100644
--- a/src/mito2/src/engine/skip_wal_test.rs
+++ b/src/mito2/src/engine/skip_wal_test.rs
@@ -87,6 +87,7 @@ async fn test_close_region_skip_wal(insert: bool) {
                 options: request.options.clone(),
                 skip_wal_replay: false,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
@@ -154,6 +155,7 @@ async fn test_close_follower_region_skip_wal() {
                 options: request.options.clone(),
                 skip_wal_replay: false,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
@@ -271,6 +273,7 @@ async fn test_close_region_after_truncate_skip_wal() {
                 options: request.options,
                 skip_wal_replay: false,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
diff --git a/src/mito2/src/engine/sync_test.rs b/src/mito2/src/engine/sync_test.rs
index 17d73b1848..657ee868ce 100644
--- a/src/mito2/src/engine/sync_test.rs
+++ b/src/mito2/src/engine/sync_test.rs
@@ -127,6 +127,7 @@ async fn test_sync_after_flush_region_with_format(flat_format: bool) {
                 // Ensure the region is not replayed from the WAL.
                 skip_wal_replay: true,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
@@ -239,6 +240,7 @@ async fn test_sync_after_alter_region_with_format(flat_format: bool) {
                 // Ensure the region is not replayed from the WAL.
                 skip_wal_replay: true,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
diff --git a/src/mito2/src/engine/truncate_test.rs b/src/mito2/src/engine/truncate_test.rs
index 8c3fdad75d..8c6dd023f0 100644
--- a/src/mito2/src/engine/truncate_test.rs
+++ b/src/mito2/src/engine/truncate_test.rs
@@ -323,6 +323,7 @@ async fn test_engine_truncate_reopen_with_format(flat_format: bool) {
                 options: HashMap::default(),
                 skip_wal_replay: false,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
@@ -447,6 +448,7 @@ async fn test_engine_truncate_during_flush_with_format(flat_format: bool) {
                 options: HashMap::default(),
                 skip_wal_replay: false,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
diff --git a/src/mito2/src/error.rs b/src/mito2/src/error.rs
index 3571f7c0c4..2278b61669 100644
--- a/src/mito2/src/error.rs
+++ b/src/mito2/src/error.rs
@@ -916,6 +916,20 @@ pub enum Error {
         source: Arc<Error>,
     },
 
+    #[snafu(display(
+        "Region {} does not satisfy open requirement '{}': {}",
+        region_id,
+        requirement,
+        reason
+    ))]
+    OpenRegionRequirement {
+        region_id: RegionId,
+        requirement: &'static str,
+        reason: &'static str,
+        #[snafu(implicit)]
+        location: Location,
+    },
+
     #[snafu(display("Failed to parse job id"))]
     ParseJobId {
         #[snafu(implicit)]
@@ -1376,6 +1390,7 @@ impl ErrorExt for Error {
             PrimaryKeyLengthMismatch { .. } => StatusCode::InvalidArguments,
             InvalidSender { .. } => StatusCode::InvalidArguments,
             InvalidSchedulerState { .. } => StatusCode::InvalidArguments,
+            OpenRegionRequirement { .. } => StatusCode::InvalidArguments,
             DeleteSsts { .. } | DeleteIndex { .. } | DeleteIndexes { .. } => {
                 StatusCode::StorageUnavailable
             }
diff --git a/src/mito2/src/region/opener.rs b/src/mito2/src/region/opener.rs
index 3142a87c38..412172aead 100644
--- a/src/mito2/src/region/opener.rs
+++ b/src/mito2/src/region/opener.rs
@@ -27,8 +27,9 @@ use futures::future::BoxFuture;
 use log_store::kafka::log_store::KafkaLogStore;
 use log_store::noop::log_store::NoopLogStore;
 use log_store::raft_engine::log_store::RaftEngineLogStore;
+use object_store::ObjectStore;
 use object_store::manager::ObjectStoreManagerRef;
-use object_store::util::normalize_dir;
+use object_store::util::{is_object_storage, normalize_dir};
 use snafu::{OptionExt, ResultExt, ensure};
 use store_api::logstore::LogStore;
 use store_api::logstore::provider::Provider;
@@ -36,7 +37,7 @@ use store_api::metadata::{
     ColumnMetadata, RegionMetadata, RegionMetadataBuilder, RegionMetadataRef,
 };
 use store_api::region_engine::RegionRole;
-use store_api::region_request::PathType;
+use store_api::region_request::{PathType, RegionRequirements};
 use store_api::storage::{ColumnId, RegionId};
 use tokio::sync::Semaphore;
 
@@ -46,8 +47,8 @@ use crate::cache::file_cache::{FileCache, FileType, IndexKey};
 use crate::config::MitoConfig;
 use crate::error;
 use crate::error::{
-    EmptyRegionDirSnafu, InvalidMetadataSnafu, ObjectStoreNotFoundSnafu, RegionCorruptedSnafu,
-    Result, StaleLogEntrySnafu,
+    EmptyRegionDirSnafu, InvalidMetadataSnafu, InvalidRegionOptionsSnafu, ObjectStoreNotFoundSnafu,
+    RegionCorruptedSnafu, Result, StaleLogEntrySnafu,
 };
 use crate::manifest::action::RegionManifest;
 use crate::manifest::manager::{RegionManifestManager, RegionManifestOptions};
@@ -206,6 +207,29 @@ impl RegionOpener {
         Ok(self)
     }
 
+    /// Ensures the current region open request satisfies its requirements.
+    pub(crate) fn ensure_open_requirements(&self, requirements: RegionRequirements) -> Result<()> {
+        if !requirements.object_storage {
+            return Ok(());
+        }
+
+        let options = self.options.as_ref().context(InvalidRegionOptionsSnafu {
+            reason: "missing region options before requirement check".to_string(),
+        })?;
+        let object_store = get_object_store(&options.storage, &self.object_store_manager)?;
+
+        ensure!(
+            supports_open_region_object_storage_requirement(&object_store),
+            error::OpenRegionRequirementSnafu {
+                region_id: self.region_id,
+                requirement: "object storage",
+                reason: "region data must be accessible from another datanode",
+            }
+        );
+
+        Ok(())
+    }
+
     /// Sets the cache manager for the region.
     pub(crate) fn cache(mut self, cache_manager: Option<CacheManagerRef>) -> Self {
         self.cache_manager = cache_manager;
@@ -597,6 +621,21 @@ impl RegionOpener {
     }
 }
 
+#[cfg(not(feature = "test-shared-fs-region-migration"))]
+fn supports_open_region_object_storage_requirement(object_store: &ObjectStore) -> bool {
+    is_object_storage(object_store)
+}
+
+#[cfg(feature = "test-shared-fs-region-migration")]
+fn supports_open_region_object_storage_requirement(object_store: &ObjectStore) -> bool {
+    // Integration tests can configure multiple datanodes to share the same
+    // temporary home dir. That makes file storage accessible to all test
+    // datanodes, but production file storage still does not satisfy this
+    // requirement.
+    is_object_storage(object_store)
+        || object_store.info().scheme() == object_store::services::FS_SCHEME
+}
+
 /// Creates a version builder from a region manifest.
 pub(crate) fn version_builder_from_manifest(
     manifest: &RegionManifest,
@@ -1172,14 +1211,17 @@ mod tests {
     use datatypes::arrow::array::{ArrayRef, BinaryArray, Int64Array};
     use datatypes::arrow::record_batch::RecordBatch;
     use object_store::ObjectStore;
-    use object_store::services::{Fs, Memory};
+    use object_store::services::{Fs, Memory, S3};
     use parquet::arrow::ArrowWriter;
     use parquet::file::metadata::KeyValue;
     use parquet::file::properties::WriterProperties;
     use store_api::region_request::PathType;
     use store_api::storage::{FileId, RegionId};
 
-    use super::{preload_parquet_meta_cache_for_files, sanitize_region_options};
+    use super::{
+        preload_parquet_meta_cache_for_files, sanitize_region_options,
+        supports_open_region_object_storage_requirement,
+    };
     use crate::cache::CacheManager;
     use crate::cache::file_cache::{FileType, IndexKey};
     use crate::manifest::action::{RegionManifest, RemovedFilesRecord};
@@ -1207,6 +1249,48 @@ mod tests {
         }
     }
 
+    fn build_fs_object_store() -> ObjectStore {
+        ObjectStore::new(Fs::default().root("/tmp"))
+            .unwrap()
+            .finish()
+    }
+
+    #[test]
+    #[cfg(not(feature = "test-shared-fs-region-migration"))]
+    fn test_open_requirement_rejects_fs_object_store() {
+        let object_store = build_fs_object_store();
+
+        assert!(!supports_open_region_object_storage_requirement(
+            &object_store
+        ));
+    }
+
+    #[test]
+    #[cfg(feature = "test-shared-fs-region-migration")]
+    fn test_open_requirement_accepts_shared_fs_object_store_for_tests() {
+        let object_store = build_fs_object_store();
+
+        assert!(supports_open_region_object_storage_requirement(
+            &object_store
+        ));
+    }
+
+    #[test]
+    fn test_open_requirement_accepts_s3_object_store() {
+        let object_store = ObjectStore::new(
+            S3::default()
+                .bucket("test-bucket")
+                .region("us-east-1")
+                .disable_ec2_metadata(),
+        )
+        .unwrap()
+        .finish();
+
+        assert!(supports_open_region_object_storage_requirement(
+            &object_store
+        ));
+    }
+
     #[test]
     fn test_sanitize_region_options_options_format_wins() {
         // Manifest persisted PrimaryKey, but the re-parsed options now request Flat
diff --git a/src/mito2/src/test_util.rs b/src/mito2/src/test_util.rs
index 792b954a6b..65dfc8b9e1 100644
--- a/src/mito2/src/test_util.rs
+++ b/src/mito2/src/test_util.rs
@@ -1307,6 +1307,7 @@ pub async fn reopen_region(
                 skip_wal_replay: false,
                 path_type: PathType::Bare,
                 checkpoint: None,
+                requirements: Default::default(),
             }),
         )
         .await
diff --git a/src/mito2/src/worker/handle_open.rs b/src/mito2/src/worker/handle_open.rs
index 73bdca775c..a154140d98 100644
--- a/src/mito2/src/worker/handle_open.rs
+++ b/src/mito2/src/worker/handle_open.rs
@@ -87,14 +87,11 @@ impl<S: LogStore> RegionWorkerLoop<S> {
         else {
             return;
         };
-        if let Err(err) = self.check_and_cleanup_region(region_id, &request).await {
-            sender.send(Err(err));
-            return;
-        }
         info!("Try to open region {}, worker: {}", region_id, self.id);
         sanitize_open_request_options(&mut request.options);
 
         // Open region from specific region dir.
+        let requirements = request.requirements;
         let opener = match RegionOpener::new(
             region_id,
             &request.table_dir,
@@ -112,7 +109,7 @@ impl<S: LogStore> RegionWorkerLoop<S> {
         .cache(Some(self.cache_manager.clone()))
         .wal_entry_reader(wal_entry_receiver.map(|receiver| Box::new(receiver) as _))
         .replay_checkpoint(request.checkpoint.map(|checkpoint| checkpoint.entry_id))
-        .parse_options(request.options)
+        .parse_options(request.options.clone())
         {
             Ok(opener) => opener,
             Err(err) => {
@@ -121,6 +118,16 @@ impl<S: LogStore> RegionWorkerLoop<S> {
             }
         };
 
+        if let Err(err) = opener.ensure_open_requirements(requirements) {
+            sender.send(Err(err));
+            return;
+        }
+
+        if let Err(err) = self.check_and_cleanup_region(region_id, &request).await {
+            sender.send(Err(err));
+            return;
+        }
+
         let now = Instant::now();
         let regions = self.regions.clone();
         let wal = self.wal.clone();
diff --git a/src/object-store/src/util.rs b/src/object-store/src/util.rs
index 849f91b729..92f0bd7299 100644
--- a/src/object-store/src/util.rs
+++ b/src/object-store/src/util.rs
@@ -22,11 +22,17 @@ use opendal::layers::{
     LoggingInterceptor, LoggingLayer, RetryEvent, RetryInterceptor, RetryLayer, TracingLayer,
 };
 use opendal::raw::{AccessorInfo, HttpClient, Operation};
+use opendal::services::FS_SCHEME;
 use snafu::ResultExt;
 
 use crate::config::HttpClientConfig;
 use crate::{ObjectStore, error};
 
+/// Returns true if the object store is not backed by local filesystem.
+pub fn is_object_storage(object_store: &ObjectStore) -> bool {
+    object_store.info().scheme() != FS_SCHEME
+}
+
 /// Join two paths and normalize the output dir.
 ///
 /// The output dir is always ends with `/`. e.g.
@@ -249,7 +255,11 @@ impl RetryInterceptor for PrintDetailedError {
 
 #[cfg(test)]
 mod tests {
+    use opendal::services::Fs;
+
     use super::*;
+    use crate::ObjectStore;
+    use crate::util::is_object_storage;
 
     #[test]
     fn test_normalize_dir() {
@@ -289,4 +299,14 @@ mod tests {
         assert_eq!("/abc", join_path("//", "/abc"));
         assert_eq!("abc/def", join_path("abc/", "//def"));
     }
+
+    #[test]
+    fn test_fs_is_not_object_storage() {
+        let object_store = ObjectStore::new(Fs::default().root("/tmp"))
+            .unwrap()
+            .finish();
+
+        assert_eq!(FS_SCHEME, object_store.info().scheme());
+        assert!(!is_object_storage(&object_store));
+    }
 }
diff --git a/src/operator/src/statement/copy_table_from.rs b/src/operator/src/statement/copy_table_from.rs
index 6f58603247..cae2835242 100644
--- a/src/operator/src/statement/copy_table_from.rs
+++ b/src/operator/src/statement/copy_table_from.rs
@@ -15,11 +15,15 @@
 use std::collections::HashMap;
 use std::future::Future;
 use std::path::Path;
+use std::pin::Pin;
 use std::sync::Arc;
+use std::task::{Context, Poll};
 
 use client::{Output, OutputData, OutputMeta};
 use common_base::readable_size::ReadableSize;
-use common_datasource::file_format::csv::CsvFormat;
+use common_datasource::file_format::csv::{
+    CsvFormat, is_skippable_arrow_error, tolerant_csv_stream,
+};
 use common_datasource::file_format::json::JsonFormat;
 use common_datasource::file_format::orc::{ReaderAdapter, infer_orc_schema, new_orc_stream_reader};
 use common_datasource::file_format::{FileFormat, Format, file_to_stream};
@@ -33,10 +37,13 @@ use common_telemetry::{debug, tracing};
 use datafusion::datasource::physical_plan::{CsvSource, FileSource, JsonSource};
 use datafusion::parquet::arrow::ParquetRecordBatchStreamBuilder;
 use datafusion::parquet::arrow::arrow_reader::ArrowReaderMetadata;
+use datafusion_common::DataFusionError;
+use datafusion_common::arrow::error::ArrowError;
 use datafusion_common::config::CsvOptions;
 use datafusion_expr::Expr;
 use datatypes::arrow::compute::can_cast_types;
 use datatypes::arrow::datatypes::{DataType as ArrowDataType, Schema, SchemaRef};
+use datatypes::arrow::record_batch::RecordBatch;
 use datatypes::vectors::Helper;
 use futures_util::StreamExt;
 use object_store::{Entry, EntryMode, ObjectStore};
@@ -221,23 +228,42 @@ impl StatementExecutor {
                 let csv_source = CsvSource::new(schema.clone())
                     .with_csv_options(options)
                     .with_batch_size(DEFAULT_BATCH_SIZE);
-                let stream = file_to_stream(
-                    object_store,
-                    path,
-                    csv_source,
-                    Some(projection),
-                    format.compression_type,
-                )
-                .await
-                .context(error::BuildFileStreamSnafu)?;
+                let stream = if format.skip_bad_records {
+                    let reader_schema =
+                        csv_reader_schema_for_skip_bad_records(schema, &compat_schema);
+                    tolerant_csv_stream(
+                        object_store,
+                        path,
+                        Arc::new(reader_schema),
+                        projection.clone(),
+                        format,
+                    )
+                    .await
+                    .context(error::BuildFileStreamSnafu)?
+                } else {
+                    file_to_stream(
+                        object_store,
+                        path,
+                        csv_source,
+                        Some(projection),
+                        format.compression_type,
+                    )
+                    .await
+                    .context(error::BuildFileStreamSnafu)?
+                };
 
-                Ok(Box::pin(
+                let stream = Box::pin(
                     // The projection is already applied in the CSV reader when we created the stream,
                     // so we pass None here to avoid double projection which would cause schema mismatch errors.
                     RecordBatchStreamTypeAdapter::new(output_schema, stream, None)
                         .with_filter(filters)
                         .context(error::PhysicalExprSnafu)?,
-                ))
+                );
+                if format.skip_bad_records {
+                    Ok(Box::pin(SkipBadRecordsStream::new(stream, path)))
+                } else {
+                    Ok(stream)
+                }
             }
             FileMetadata::Json {
                 path,
@@ -469,6 +495,58 @@ fn gen_insert_output(rows_inserted: usize, insert_cost: usize) -> Output {
     )
 }
 
+struct SkipBadRecordsStream {
+    inner: DfSendableRecordBatchStream,
+    path: String,
+}
+
+impl SkipBadRecordsStream {
+    fn new(inner: DfSendableRecordBatchStream, path: impl Into<String>) -> Self {
+        Self {
+            inner,
+            path: path.into(),
+        }
+    }
+}
+
+impl datafusion::physical_plan::RecordBatchStream for SkipBadRecordsStream {
+    fn schema(&self) -> SchemaRef {
+        self.inner.schema()
+    }
+}
+
+impl futures::Stream for SkipBadRecordsStream {
+    type Item = datafusion_common::Result<RecordBatch>;
+
+    fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
+        let this = self.get_mut();
+        loop {
+            match this.inner.as_mut().poll_next(cx) {
+                Poll::Ready(Some(Err(error))) if is_skippable_record_error(&error) => {
+                    common_telemetry::warn!(
+                        "Skipping bad record while copying from {}: {}",
+                        this.path,
+                        error
+                    );
+                    continue;
+                }
+                other => return other,
+            }
+        }
+    }
+}
+
+fn is_skippable_record_error(error: &DataFusionError) -> bool {
+    match error {
+        DataFusionError::ArrowError(error, _) => is_skippable_arrow_error(error),
+        DataFusionError::External(error) => error
+            .downcast_ref::<ArrowError>()
+            .is_some_and(is_skippable_arrow_error),
+        DataFusionError::Context(_, error) => is_skippable_record_error(error),
+        _ => false,
+    }
+}
+
 /// Executes all pending inserts all at once, drain pending requests and reset pending bytes.
 async fn batch_insert(
     pending: &mut Vec<impl Future<Output = Result<Output>>>,
@@ -498,6 +576,59 @@ fn can_cast_types_for_greptime(from: &ArrowDataType, to: &ArrowDataType) -> bool
     can_cast_types(from, to)
 }
 
+fn csv_reader_schema_for_skip_bad_records(file: &SchemaRef, compat: &SchemaRef) -> Schema {
+    let fields = file
+        .fields()
+        .iter()
+        .enumerate()
+        .map(|(idx, file_field)| {
+            let compat_field = compat
+                .fields()
+                .find(file_field.name())
+                .map(|(_, field)| field);
+
+            match compat_field {
+                Some(compat_field) if can_csv_reader_parse_type(compat_field.data_type()) => {
+                    compat_field.clone()
+                }
+                _ => file.fields()[idx].clone(),
+            }
+        })
+        .collect::<Vec<_>>();
+
+    Schema::new_with_metadata(fields, file.metadata().clone())
+}
+
+fn can_csv_reader_parse_type(data_type: &ArrowDataType) -> bool {
+    match data_type {
+        ArrowDataType::Boolean
+        | ArrowDataType::Decimal32(_, _)
+        | ArrowDataType::Decimal64(_, _)
+        | ArrowDataType::Decimal128(_, _)
+        | ArrowDataType::Decimal256(_, _)
+        | ArrowDataType::Int8
+        | ArrowDataType::Int16
+        | ArrowDataType::Int32
+        | ArrowDataType::Int64
+        | ArrowDataType::UInt8
+        | ArrowDataType::UInt16
+        | ArrowDataType::UInt32
+        | ArrowDataType::UInt64
+        | ArrowDataType::Float32
+        | ArrowDataType::Float64
+        | ArrowDataType::Date32
+        | ArrowDataType::Date64
+        | ArrowDataType::Time32(_)
+        | ArrowDataType::Time64(_)
+        | ArrowDataType::Timestamp(_, _)
+        | ArrowDataType::Null
+        | ArrowDataType::Utf8
+        | ArrowDataType::Utf8View => true,
+        ArrowDataType::Dictionary(_, value_type) => value_type.as_ref() == &ArrowDataType::Utf8,
+        _ => false,
+    }
+}
+
 fn ensure_schema_compatible(from: &SchemaRef, to: &SchemaRef) -> Result<()> {
     let not_match = from
         .fields
@@ -780,4 +911,31 @@ mod tests {
             assert_eq!(test.0.project(&fp).unwrap(), test.1.project(&tp).unwrap());
         }
     }
+
+    #[test]
+    fn test_csv_reader_schema_for_skip_bad_records() {
+        let file_schema = make_test_schema(&[
+            Field::new("id", DataType::Utf8, true),
+            Field::new("jsons", DataType::Utf8, true),
+            Field::new("ts", DataType::Utf8, true),
+        ]);
+        let compat_schema = make_test_schema(&[
+            Field::new("id", DataType::UInt32, true),
+            Field::new("jsons", DataType::Binary, true),
+            Field::new(
+                "ts",
+                DataType::Timestamp(datatypes::arrow::datatypes::TimeUnit::Millisecond, None),
+                true,
+            ),
+        ]);
+
+        let reader_schema = csv_reader_schema_for_skip_bad_records(&file_schema, &compat_schema);
+
+        assert_eq!(reader_schema.field(0).data_type(), &DataType::UInt32);
+        assert_eq!(reader_schema.field(1).data_type(), &DataType::Utf8);
+        assert_eq!(
+            reader_schema.field(2).data_type(),
+            compat_schema.field(2).data_type()
+        );
+    }
 }
diff --git a/src/pipeline/benches/processor.rs b/src/pipeline/benches/processor.rs
index 83a8e53225..e088c89c6b 100644
--- a/src/pipeline/benches/processor.rs
+++ b/src/pipeline/benches/processor.rs
@@ -233,6 +233,36 @@ transform:
     parse(&Content::Yaml(pipeline_yaml)).unwrap()
 }
 
+fn prepare_vrl_pipeline() -> Pipeline {
+    let pipeline_yaml = r#"
+---
+description: Minimal VRL processor benchmark
+
+processors:
+  - vrl:
+      source: |
+        .service_alias = .service
+        .host_alias = .host
+        del(.unused)
+        .processed = true
+        .
+
+transform:
+  - field: service
+    type: string
+  - field: host
+    type: string
+  - field: service_alias
+    type: string
+  - field: host_alias
+    type: string
+  - field: processed
+    type: boolean
+"#;
+
+    parse(&Content::Yaml(pipeline_yaml)).unwrap()
+}
+
 fn criterion_benchmark(c: &mut Criterion) {
     let input_value_str = include_str!("./data.log");
     let input_value = Deserializer::from_str(input_value_str)
@@ -262,6 +292,41 @@ fn criterion_benchmark(c: &mut Criterion) {
         })
     });
     group.finish();
+
+    let vrl_input_value = (0..128)
+        .map(|i| {
+            serde_json::json!({
+                "service": "frontend",
+                "host": format!("host-{i}"),
+                "unused": "drop-me"
+            })
+            .into()
+        })
+        .collect::<Vec<VrlValue>>();
+    let vrl_pipeline = prepare_vrl_pipeline();
+
+    let (vrl_pipeline, mut vrl_schema_info, vrl_pipeline_def, vrl_pipeline_param) =
+        setup_pipeline!(vrl_pipeline);
+    let vrl_pipeline_ctx = PipelineContext::new(
+        &vrl_pipeline_def,
+        &vrl_pipeline_param,
+        session::context::Channel::Unknown,
+    );
+
+    let mut group = c.benchmark_group("vrl processor");
+    group.sample_size(50);
+    group.bench_function("processor mut", |b| {
+        b.iter(|| {
+            processor_mut(
+                black_box(vrl_pipeline.clone()),
+                black_box(&vrl_pipeline_ctx),
+                black_box(&mut vrl_schema_info),
+                black_box(vrl_input_value.clone()),
+            )
+            .unwrap();
+        })
+    });
+    group.finish();
 }
 
 // Testing the pipeline's performance in converting Json to Rows
diff --git a/src/pipeline/src/etl/processor/vrl_processor.rs b/src/pipeline/src/etl/processor/vrl_processor.rs
index 20258a0427..ee3452523d 100644
--- a/src/pipeline/src/etl/processor/vrl_processor.rs
+++ b/src/pipeline/src/etl/processor/vrl_processor.rs
@@ -12,9 +12,11 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use std::cell::RefCell;
 use std::collections::BTreeMap;
 
 use chrono_tz::Tz;
+use once_cell::sync::Lazy;
 use snafu::{OptionExt, ensure};
 use vrl::compiler::runtime::Runtime;
 use vrl::compiler::{Program, TargetValue, compile};
@@ -31,6 +33,12 @@ use crate::etl::processor::yaml_string;
 pub(crate) const PROCESSOR_VRL: &str = "vrl";
 const SOURCE: &str = "source";
 
+static UTC_TIMEZONE: Lazy<TimeZone> = Lazy::new(|| TimeZone::Named(Tz::UTC));
+
+thread_local! {
+    static VRL_RUNTIME: RefCell<Runtime> = RefCell::new(Runtime::default());
+}
+
 #[derive(Debug)]
 pub struct VrlProcessor {
     source: String,
@@ -74,10 +82,14 @@ impl VrlProcessor {
             secrets: Secrets::default(),
         };
 
-        let timezone = TimeZone::Named(Tz::UTC);
-        let mut runtime = Runtime::default();
-        let re = runtime
-            .resolve(&mut target, &self.program, &timezone)
+        let re = VRL_RUNTIME
+            .with(|runtime| {
+                let mut runtime = runtime.borrow_mut();
+                runtime.clear();
+                let result = runtime.resolve(&mut target, &self.program, &UTC_TIMEZONE);
+                runtime.clear();
+                result
+            })
             .map_err(|e| {
                 ExecuteVrlSnafu {
                     msg: e.get_expression_error().to_string(),
diff --git a/src/servers/src/configurator.rs b/src/servers/src/configurator.rs
index e8ba8264bd..7116fe0ce8 100644
--- a/src/servers/src/configurator.rs
+++ b/src/servers/src/configurator.rs
@@ -14,25 +14,11 @@
 
 use std::sync::Arc;
 
-use axum::Router as HttpRouter;
 use common_error::ext::BoxedError;
 use tonic::transport::server::Router as GrpcRouter;
 
 use crate::grpc::builder::GrpcServerBuilder;
 
-/// A configurator that customizes or enhances an HTTP router.
-#[async_trait::async_trait]
-pub trait HttpConfigurator<C>: Send + Sync {
-    /// Configures the given HTTP router using the provided context.
-    async fn configure_http(
-        &self,
-        route: HttpRouter,
-        ctx: C,
-    ) -> std::result::Result<HttpRouter, BoxedError>;
-}
-
-pub type HttpConfiguratorRef<C> = Arc<dyn HttpConfigurator<C>>;
-
 /// A configurator that customizes or enhances a gRPC router.
 #[async_trait::async_trait]
 pub trait GrpcRouterConfigurator<C>: Send + Sync {
diff --git a/src/servers/src/grpc.rs b/src/servers/src/grpc.rs
index 3adfd24945..50dd0b69c4 100644
--- a/src/servers/src/grpc.rs
+++ b/src/servers/src/grpc.rs
@@ -24,7 +24,7 @@ pub mod prom_query_gateway;
 pub mod region_server;
 
 use std::any::Any;
-use std::net::SocketAddr;
+use std::net::{IpAddr, SocketAddr};
 use std::time::Duration;
 
 use api::v1::health_check_server::{HealthCheck, HealthCheckServer};
@@ -95,14 +95,8 @@ impl GrpcOptions {
         if self.server_addr.is_empty() {
             match local_ip_address::local_ip() {
                 Ok(ip) => {
-                    let detected_addr = format!(
-                        "{}:{}",
-                        ip,
-                        self.bind_addr
-                            .split(':')
-                            .nth(1)
-                            .unwrap_or(DEFAULT_GRPC_ADDR_PORT)
-                    );
+                    let port = port_from_bind_addr(&self.bind_addr);
+                    let detected_addr = format_server_addr(ip, port);
                     info!("Using detected: {} as server address", detected_addr);
                     self.server_addr = detected_addr;
                 }
@@ -131,7 +125,18 @@ impl GrpcOptions {
     }
 }
 
-const DEFAULT_GRPC_ADDR_PORT: &str = "4001";
+const DEFAULT_GRPC_ADDR_PORT: u16 = 4001;
+
+fn port_from_bind_addr(bind_addr: &str) -> u16 {
+    bind_addr
+        .rsplit_once(':')
+        .and_then(|(_, port)| port.parse().ok())
+        .unwrap_or(DEFAULT_GRPC_ADDR_PORT)
+}
+
+fn format_server_addr(ip: IpAddr, port: u16) -> String {
+    SocketAddr::new(ip, port).to_string()
+}
 
 const DEFAULT_INTERNAL_GRPC_ADDR_PORT: &str = "4010";
 
@@ -415,3 +420,36 @@ impl Server for GrpcServer {
         self
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
+
+    use super::{DEFAULT_GRPC_ADDR_PORT, format_server_addr, port_from_bind_addr};
+
+    #[test]
+    fn test_port_from_bind_addr() {
+        assert_eq!(3002, port_from_bind_addr("127.0.0.1:3002"));
+        assert_eq!(3002, port_from_bind_addr("[::]:3002"));
+        assert_eq!(
+            3002,
+            port_from_bind_addr("greptimedb-metasrv.default.svc.cluster.local:3002")
+        );
+        assert_eq!(
+            DEFAULT_GRPC_ADDR_PORT,
+            port_from_bind_addr("invalid-bind-addr")
+        );
+    }
+
+    #[test]
+    fn test_format_server_addr() {
+        assert_eq!(
+            "127.0.0.1:3002",
+            format_server_addr(IpAddr::V4(Ipv4Addr::LOCALHOST), 3002)
+        );
+        assert_eq!(
+            "[::1]:3002",
+            format_server_addr(IpAddr::V6(Ipv6Addr::LOCALHOST), 3002)
+        );
+    }
+}
diff --git a/src/servers/src/http.rs b/src/servers/src/http.rs
index e5dc5380d1..6d3ab76ec1 100644
--- a/src/servers/src/http.rs
+++ b/src/servers/src/http.rs
@@ -27,7 +27,6 @@ use axum::response::{IntoResponse, Response};
 use axum::routing::Route;
 use axum::serve::ListenerExt;
 use axum::{Router, middleware, routing};
-use common_base::Plugins;
 use common_base::readable_size::ReadableSize;
 use common_recordbatch::RecordBatch;
 use common_telemetry::{error, info};
@@ -52,11 +51,9 @@ use tower_http::trace::TraceLayer;
 
 use self::authorize::AuthState;
 use self::result::table_result::TableResponse;
-use crate::configurator::HttpConfiguratorRef;
 use crate::elasticsearch;
 use crate::error::{
-    AddressBindSnafu, AlreadyStartedSnafu, Error, InternalIoSnafu, InvalidHeaderValueSnafu,
-    OtherSnafu, Result,
+    AddressBindSnafu, AlreadyStartedSnafu, Error, InternalIoSnafu, InvalidHeaderValueSnafu, Result,
 };
 use crate::http::influxdb::{influxdb_health, influxdb_ping, influxdb_write_v1, influxdb_write_v2};
 use crate::http::otlp::OtlpState;
@@ -139,9 +136,6 @@ pub struct HttpServer {
     user_provider: Option<UserProviderRef>,
     memory_limiter: ServerMemoryLimiter,
 
-    // plugins
-    plugins: Plugins,
-
     // server configs
     options: HttpOptions,
     bind_addr: Option<SocketAddr>,
@@ -516,7 +510,6 @@ pub struct DashboardState {
 
 pub struct HttpServerBuilder {
     options: HttpOptions,
-    plugins: Plugins,
     user_provider: Option<UserProviderRef>,
     router: Router,
     memory_limiter: ServerMemoryLimiter,
@@ -526,7 +519,6 @@ impl HttpServerBuilder {
     pub fn new(options: HttpOptions) -> Self {
         Self {
             options,
-            plugins: Plugins::default(),
             user_provider: None,
             router: Router::new(),
             memory_limiter: ServerMemoryLimiter::default(),
@@ -687,10 +679,6 @@ impl HttpServerBuilder {
         Self { router, ..self }
     }
 
-    pub fn with_plugins(self, plugins: Plugins) -> Self {
-        Self { plugins, ..self }
-    }
-
     pub fn with_greptime_config_options(self, opts: String) -> Self {
         let config_router = HttpServer::route_config(GreptimeOptionsConfigState {
             greptime_config_options: opts,
@@ -748,7 +736,6 @@ impl HttpServerBuilder {
             options: self.options,
             user_provider: self.user_provider,
             shutdown_tx: Mutex::new(None),
-            plugins: self.plugins,
             router: StdMutex::new(self.router),
             bind_addr: None,
             memory_limiter: self.memory_limiter,
@@ -1237,14 +1224,7 @@ impl Server for HttpServer {
                 AlreadyStartedSnafu { server: "HTTP" }
             );
 
-            let mut app = self.make_app();
-            if let Some(configurator) = self.plugins.get::<HttpConfiguratorRef<()>>() {
-                app = configurator
-                    .configure_http(app, ())
-                    .await
-                    .context(OtherSnafu)?;
-            }
-            let app = self.build(app)?;
+            let app = self.build(self.make_app())?;
             let listener = tokio::net::TcpListener::bind(listening)
                 .await
                 .context(AddressBindSnafu { addr: listening })?
diff --git a/src/sql/src/parsers/copy_parser.rs b/src/sql/src/parsers/copy_parser.rs
index 9a2eddcc78..491912c82e 100644
--- a/src/sql/src/parsers/copy_parser.rs
+++ b/src/sql/src/parsers/copy_parser.rs
@@ -401,6 +401,28 @@ mod tests {
         }
     }
 
+    #[test]
+    fn test_parse_copy_table_from_csv_options() {
+        let sql =
+            "COPY my_table FROM '/tmp/test.csv' WITH (FORMAT = 'CSV', SKIP_BAD_RECORDS = 'false')";
+        let mut result =
+            ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default())
+                .unwrap();
+        assert_eq!(1, result.len());
+
+        let statement = result.remove(0);
+        assert_matches!(statement, Statement::Copy { .. });
+        match statement {
+            Statement::Copy(crate::statements::copy::Copy::CopyTable(CopyTable::From(
+                copy_table,
+            ))) => {
+                assert_eq!(copy_table.with.get("format"), Some("CSV"));
+                assert_eq!(copy_table.with.get("skip_bad_records"), Some("false"));
+            }
+            _ => unreachable!(),
+        }
+    }
+
     #[test]
     fn test_parse_copy_table_to() {
         struct Test<'a> {
diff --git a/src/sql/src/util.rs b/src/sql/src/util.rs
index bbf5ce3277..f627c43e48 100644
--- a/src/sql/src/util.rs
+++ b/src/sql/src/util.rs
@@ -27,7 +27,7 @@ use serde::Serialize;
 use snafu::ensure;
 use sqlparser::ast::{
     Array, Expr, Ident, ObjectName, ObjectNamePart, SetExpr, SqlOption, StructField, TableFactor,
-    Value, ValueWithSpan,
+    TableWithJoins, Value, ValueWithSpan,
 };
 use sqlparser_derive::{Visit, VisitMut};
 
@@ -195,7 +195,7 @@ pub fn extract_tables_from_query(query: &SqlOrTql) -> impl Iterator<Item = Objec
 
     match query {
         SqlOrTql::Sql(query, _) => {
-            extract_tables_from_set_expr(&query.inner.body, &mut names);
+            extract_tables_from_sql_query(&query.inner, &mut names);
             extract_tables_from_hybrid_cte_query(query, &mut names);
         }
         SqlOrTql::Tql(tql, _) => extract_tables_from_tql(tql, &mut names),
@@ -205,26 +205,34 @@ pub fn extract_tables_from_query(query: &SqlOrTql) -> impl Iterator<Item = Objec
 }
 
 fn extract_tables_from_hybrid_cte_query(query: &Query, sql_names: &mut HashSet<ObjectName>) {
-    let mut tql_names = HashSet::new();
-    let mut cte_names: HashSet<String> = HashSet::new();
     if let Some(hybrid_cte) = &query.hybrid_cte {
+        let mut cte_names: HashSet<String> = hybrid_cte
+            .cte_tables
+            .iter()
+            .map(|cte| ParserContext::canonicalize_identifier(cte.name.clone()).value)
+            .collect();
+        remove_cte_names(sql_names, &cte_names);
+
+        cte_names.clear();
         for cte in &hybrid_cte.cte_tables {
-            cte_names.insert(ParserContext::canonicalize_identifier(cte.name.clone()).value);
-            if let CteContent::Tql(tql) = &cte.content {
-                extract_tables_from_tql(tql, &mut tql_names);
+            let cte_name = ParserContext::canonicalize_identifier(cte.name.clone()).value;
+            let mut cte_query_names = HashSet::new();
+            match &cte.content {
+                CteContent::Sql(cte_query) => {
+                    extract_tables_from_sql_query(cte_query, &mut cte_query_names)
+                }
+                CteContent::Tql(tql) => extract_tables_from_tql(tql, &mut cte_query_names),
+            }
+            if hybrid_cte.recursive {
+                cte_names.insert(cte_name.clone());
+            }
+            remove_cte_names(&mut cte_query_names, &cte_names);
+            sql_names.extend(cte_query_names);
+            if !hybrid_cte.recursive {
+                cte_names.insert(cte_name);
             }
         }
     }
-
-    if let Some(with) = &query.inner.with {
-        for cte in &with.cte_tables {
-            cte_names.insert(ParserContext::canonicalize_identifier(cte.alias.name.clone()).value);
-        }
-    }
-
-    remove_cte_names(sql_names, &cte_names);
-
-    sql_names.extend(tql_names);
 }
 
 fn remove_cte_names(names: &mut HashSet<ObjectName>, cte_names: &HashSet<String>) {
@@ -339,6 +347,33 @@ pub fn location_to_index(sql: &str, location: &sqlparser::tokenizer::Location) -
     index - 1
 }
 
+/// Helper function for [extract_tables_from_query].
+///
+/// Handle [sqlparser::ast::Query].
+fn extract_tables_from_sql_query(query: &sqlparser::ast::Query, names: &mut HashSet<ObjectName>) {
+    let mut cte_names = HashSet::new();
+    if let Some(with) = &query.with {
+        for cte in &with.cte_tables {
+            let cte_name = ParserContext::canonicalize_identifier(cte.alias.name.clone()).value;
+            let mut cte_query_names = HashSet::new();
+            extract_tables_from_sql_query(&cte.query, &mut cte_query_names);
+            if with.recursive {
+                cte_names.insert(cte_name.clone());
+            }
+            remove_cte_names(&mut cte_query_names, &cte_names);
+            names.extend(cte_query_names);
+            if !with.recursive {
+                cte_names.insert(cte_name);
+            }
+        }
+    }
+
+    let mut body_names = HashSet::new();
+    extract_tables_from_set_expr(&query.body, &mut body_names);
+    remove_cte_names(&mut body_names, &cte_names);
+    names.extend(body_names);
+}
+
 /// Helper function for [extract_tables_from_query].
 ///
 /// Handle [SetExpr].
@@ -346,14 +381,11 @@ fn extract_tables_from_set_expr(set_expr: &SetExpr, names: &mut HashSet<ObjectNa
     match set_expr {
         SetExpr::Select(select) => {
             for from in &select.from {
-                table_factor_to_object_name(&from.relation, names);
-                for join in &from.joins {
-                    table_factor_to_object_name(&join.relation, names);
-                }
+                extract_tables_from_table_with_joins(from, names);
             }
         }
         SetExpr::Query(query) => {
-            extract_tables_from_set_expr(&query.body, names);
+            extract_tables_from_sql_query(query, names);
         }
         SetExpr::SetOperation { left, right, .. } => {
             extract_tables_from_set_expr(left, names);
@@ -363,12 +395,47 @@ fn extract_tables_from_set_expr(set_expr: &SetExpr, names: &mut HashSet<ObjectNa
     };
 }
 
+/// Helper function for [extract_tables_from_query].
+///
+/// Handle [TableWithJoins].
+fn extract_tables_from_table_with_joins(
+    table_with_joins: &TableWithJoins,
+    names: &mut HashSet<ObjectName>,
+) {
+    table_factor_to_object_name(&table_with_joins.relation, names);
+    for join in &table_with_joins.joins {
+        table_factor_to_object_name(&join.relation, names);
+    }
+}
+
 /// Helper function for [extract_tables_from_query].
 ///
 /// Handle [TableFactor].
 fn table_factor_to_object_name(table_factor: &TableFactor, names: &mut HashSet<ObjectName>) {
-    if let TableFactor::Table { name, .. } = table_factor {
-        names.insert(name.to_owned());
+    match table_factor {
+        TableFactor::Table { name, .. } => {
+            names.insert(name.to_owned());
+        }
+        TableFactor::Derived { subquery, .. } => {
+            extract_tables_from_sql_query(subquery, names);
+        }
+        TableFactor::NestedJoin {
+            table_with_joins, ..
+        } => {
+            extract_tables_from_table_with_joins(table_with_joins, names);
+        }
+        TableFactor::Pivot { table, .. }
+        | TableFactor::Unpivot { table, .. }
+        | TableFactor::MatchRecognize { table, .. } => {
+            table_factor_to_object_name(table, names);
+        }
+        TableFactor::TableFunction { .. }
+        | TableFactor::Function { .. }
+        | TableFactor::UNNEST { .. }
+        | TableFactor::JsonTable { .. }
+        | TableFactor::OpenJsonTable { .. }
+        | TableFactor::XmlTable { .. }
+        | TableFactor::SemanticView { .. } => {}
     }
 }
 
@@ -458,6 +525,91 @@ TQL EVAL (now() - '15s'::interval, now(), '5s') count_values("status_code", {__n
         }
     }
 
+    #[test]
+    fn test_extract_tables_from_sql_query_with_derived_join() {
+        let sql = r#"
+CREATE FLOW flow_batch_join_subquery SINK TO flow_batch_join_sink
+EVAL INTERVAL '1m' AS
+SELECT a.symbol, b.mark_price
+FROM (
+    SELECT inst_id AS symbol, max(ts) AS mark_iv_ts
+    FROM flow_batch_join_opt_summary
+    GROUP BY inst_id
+) a
+LEFT JOIN (
+    SELECT symbol, max(mark_price) AS mark_price
+    FROM flow_batch_join_market_v5
+    WHERE "type" = 'OPTION_MARK'
+    GROUP BY symbol
+) b ON a.symbol = b.symbol;
+"#;
+        let mut stmts =
+            ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default())
+                .unwrap();
+        let Statement::CreateFlow(create_flow) = stmts.pop().unwrap() else {
+            unreachable!()
+        };
+
+        let mut tables = extract_tables_from_query(&create_flow.query)
+            .map(|table| format_raw_object_name(&table))
+            .collect_vec();
+        tables.sort();
+        assert_eq!(
+            vec![
+                "flow_batch_join_market_v5".to_string(),
+                "flow_batch_join_opt_summary".to_string(),
+            ],
+            tables
+        );
+    }
+
+    #[test]
+    fn test_extract_tables_from_sql_query_with_cte_scopes() {
+        let testcases = vec![
+            (
+                r#"
+WITH source AS (
+    SELECT * FROM source
+)
+SELECT * FROM source;
+"#,
+                vec!["source".to_string()],
+            ),
+            (
+                r#"
+WITH first_cte AS (
+    SELECT * FROM physical_source
+), second_cte AS (
+    SELECT * FROM first_cte
+)
+SELECT * FROM second_cte;
+"#,
+                vec!["physical_source".to_string()],
+            ),
+        ];
+
+        for (sql, expected_tables) in testcases {
+            let mut stmts = ParserContext::create_with_dialect(
+                sql,
+                &GreptimeDbDialect {},
+                ParseOptions::default(),
+            )
+            .unwrap();
+            let Statement::Query(query) = stmts.pop().unwrap() else {
+                unreachable!()
+            };
+
+            let mut tables = HashSet::new();
+            extract_tables_from_sql_query(&query.inner, &mut tables);
+            let mut tables = tables
+                .into_iter()
+                .map(|table| format_raw_object_name(&table))
+                .collect_vec();
+            tables.sort();
+            assert_eq!(expected_tables, tables);
+        }
+    }
+
     #[test]
     fn test_extract_tables_from_tql_query_with_schema_matcher() {
         let sql = r#"
diff --git a/src/store-api/src/region_request.rs b/src/store-api/src/region_request.rs
index 951abca1be..abf96736ec 100644
--- a/src/store-api/src/region_request.rs
+++ b/src/store-api/src/region_request.rs
@@ -315,6 +315,7 @@ fn make_region_open(open: OpenRequest) -> Result<Vec<(RegionId, RegionRequest)>>
             options: open.options,
             skip_wal_replay: false,
             checkpoint: None,
+            requirements: Default::default(),
         }),
     )])
 }
@@ -566,6 +567,28 @@ pub struct RegionDropRequest {
     pub partial_drop: bool,
 }
 
+/// Requirements for a region request.
+#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(default)]
+pub struct RegionRequirements {
+    /// Whether the region data must be backed by object storage.
+    pub object_storage: bool,
+}
+
+impl RegionRequirements {
+    /// Returns empty requirements.
+    pub fn empty() -> Self {
+        Self::default()
+    }
+
+    /// Returns requirements for object storage.
+    pub fn object_storage() -> Self {
+        Self {
+            object_storage: true,
+        }
+    }
+}
+
 /// Open region request.
 #[derive(Debug, Clone)]
 pub struct RegionOpenRequest {
@@ -581,6 +604,8 @@ pub struct RegionOpenRequest {
     pub skip_wal_replay: bool,
     /// Replay checkpoint.
     pub checkpoint: Option<ReplayCheckpoint>,
+    /// Requirements for opening the region.
+    pub requirements: RegionRequirements,
 }
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
diff --git a/tests-integration/Cargo.toml b/tests-integration/Cargo.toml
index 43850e4ed3..f51badc8d6 100644
--- a/tests-integration/Cargo.toml
+++ b/tests-integration/Cargo.toml
@@ -63,7 +63,7 @@ log-query = { workspace = true }
 loki-proto.workspace = true
 meta-client.workspace = true
 meta-srv = { workspace = true, features = ["mock"] }
-mito2.workspace = true
+mito2 = { workspace = true, features = ["test-shared-fs-region-migration"] }
 object-store.workspace = true
 operator = { workspace = true, features = ["testing"] }
 plugins.workspace = true
diff --git a/tests/cases/standalone/common/copy/copy_from_fs_csv.result b/tests/cases/standalone/common/copy/copy_from_fs_csv.result
index 2e3373af15..ad81cf1f77 100644
--- a/tests/cases/standalone/common/copy/copy_from_fs_csv.result
+++ b/tests/cases/standalone/common/copy/copy_from_fs_csv.result
@@ -183,6 +183,24 @@ select * from csv_null_prefix_import;
 | final | 2023-11-14T22:13:23 |
 +-------+---------------------+
 
+CREATE TABLE csv_skip_bad_records(host_id int, host_name string, reading_value double, ts timestamp time index);
+
+Affected Rows: 0
+
+-- SQLNESS ENV PWD
+Copy csv_skip_bad_records FROM '$PWD/tests/data/csv/skip_bad_records.csv' WITH (format='csv', skip_bad_records='true');
+
+Affected Rows: 2
+
+select * from csv_skip_bad_records order by ts;
+
++---------+-----------+---------------+---------------------+
+| host_id | host_name | reading_value | ts                  |
++---------+-----------+---------------+---------------------+
+| 1       | Alice     | 10.5          | 2024-01-01T00:00:00 |
+| 2       | Bob       | 30.5          | 2024-01-01T00:00:02 |
++---------+-----------+---------------+---------------------+
+
 drop table demo;
 
 Affected Rows: 0
@@ -219,3 +237,7 @@ drop table csv_null_prefix_import;
 
 Affected Rows: 0
 
+drop table csv_skip_bad_records;
+
+Affected Rows: 0
+
diff --git a/tests/cases/standalone/common/copy/copy_from_fs_csv.sql b/tests/cases/standalone/common/copy/copy_from_fs_csv.sql
index 0901048177..cd6cfcbd23 100644
--- a/tests/cases/standalone/common/copy/copy_from_fs_csv.sql
+++ b/tests/cases/standalone/common/copy/copy_from_fs_csv.sql
@@ -73,6 +73,13 @@ Copy csv_null_prefix_import FROM '${SQLNESS_HOME}/demo/export/csv_null_prefix.cs
 
 select * from csv_null_prefix_import;
 
+CREATE TABLE csv_skip_bad_records(host_id int, host_name string, reading_value double, ts timestamp time index);
+
+-- SQLNESS ENV PWD
+Copy csv_skip_bad_records FROM '$PWD/tests/data/csv/skip_bad_records.csv' WITH (format='csv', skip_bad_records='true');
+
+select * from csv_skip_bad_records order by ts;
+
 drop table demo;
 
 drop table with_filename;
@@ -90,3 +97,5 @@ drop table demo_with_less_columns;
 drop table csv_null_prefix;
 
 drop table csv_null_prefix_import;
+
+drop table csv_skip_bad_records;
diff --git a/tests/cases/standalone/common/flow/flow_batch_join_subquery.result b/tests/cases/standalone/common/flow/flow_batch_join_subquery.result
new file mode 100644
index 0000000000..0d590cbdbc
--- /dev/null
+++ b/tests/cases/standalone/common/flow/flow_batch_join_subquery.result
@@ -0,0 +1,130 @@
+CREATE DATABASE flow_join_fixture;
+
+Affected Rows: 1
+
+CREATE TABLE flow_join_fixture."left_samples" (
+    source_id STRING,
+    left_value DOUBLE,
+    event_ts TIMESTAMP,
+    observed_at TIMESTAMP TIME INDEX
+);
+
+Affected Rows: 0
+
+CREATE TABLE flow_join_fixture."right_samples" (
+    source_id STRING,
+    right_value DOUBLE,
+    sample_kind STRING,
+    event_ts TIMESTAMP,
+    observed_at TIMESTAMP TIME INDEX
+);
+
+Affected Rows: 0
+
+-- Verify batching flow creation accepts aggregate subqueries joined by LEFT JOIN.
+CREATE FLOW flow_batch_join_subquery SINK TO flow_batch_join_sink
+EVAL INTERVAL '5m' AS
+SELECT
+    l.source_id,
+    l.measure_name,
+    l.bucket_time,
+    l.left_event_ts,
+    l.left_value,
+    r.right_event_ts,
+    r.right_value
+FROM (
+    SELECT
+        source_id,
+        'sample' AS measure_name,
+        date_trunc('minute', now()) AS bucket_time,
+        max(event_ts) AS left_event_ts,
+        last_value(left_value ORDER BY observed_at) AS left_value
+    FROM
+        flow_join_fixture."left_samples"
+    WHERE
+        observed_at BETWEEN date_trunc('minute', now()) - INTERVAL '5 minutes'
+            AND date_trunc('minute', now())
+    GROUP BY
+        source_id
+) l
+LEFT JOIN (
+    SELECT
+        source_id,
+        'sample' AS measure_name,
+        date_trunc('minute', now()) AS bucket_time,
+        max(event_ts) AS right_event_ts,
+        last_value(right_value ORDER BY observed_at) AS right_value
+    FROM
+        flow_join_fixture."right_samples"
+    WHERE
+        observed_at BETWEEN date_trunc('minute', now()) - INTERVAL '5 minutes'
+            AND date_trunc('minute', now())
+        AND sample_kind = 'primary'
+    GROUP BY
+        source_id
+) r ON l.source_id = r.source_id AND l.bucket_time = r.bucket_time;
+
+Affected Rows: 0
+
+SELECT
+    source_table_names LIKE '%left_samples%' AS has_left_source,
+    source_table_names LIKE '%right_samples%' AS has_right_source,
+    options LIKE '%"flow_type":"batching"%' AS is_batching_flow
+FROM
+    INFORMATION_SCHEMA.FLOWS
+WHERE
+    flow_name = 'flow_batch_join_subquery';
+
++-----------------+------------------+------------------+
+| has_left_source | has_right_source | is_batching_flow |
++-----------------+------------------+------------------+
+| true            | true             | true             |
++-----------------+------------------+------------------+
+
+INSERT INTO flow_join_fixture."left_samples" VALUES
+    ('source-a', 0.12, date_trunc('minute', now()), date_trunc('minute', now()));
+
+Affected Rows: 1
+
+INSERT INTO flow_join_fixture."right_samples" VALUES
+    ('source-a', 100.5, 'primary', date_trunc('minute', now()), date_trunc('minute', now()));
+
+Affected Rows: 1
+
+-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED  |
+ADMIN FLUSH_FLOW('flow_batch_join_subquery');
+
++----------------------------------------------+
+| ADMIN FLUSH_FLOW('flow_batch_join_subquery') |
++----------------------------------------------+
+|  FLOW_FLUSHED  |
++----------------------------------------------+
+
+SELECT source_id, measure_name, left_value, right_value FROM flow_batch_join_sink ORDER BY source_id;
+
++-----------+--------------+------------+-------------+
+| source_id | measure_name | left_value | right_value |
++-----------+--------------+------------+-------------+
+| source-a  | sample       | 0.12       | 100.5       |
++-----------+--------------+------------+-------------+
+
+DROP FLOW flow_batch_join_subquery;
+
+Affected Rows: 0
+
+DROP TABLE flow_batch_join_sink;
+
+Affected Rows: 0
+
+DROP TABLE flow_join_fixture."left_samples";
+
+Affected Rows: 0
+
+DROP TABLE flow_join_fixture."right_samples";
+
+Affected Rows: 0
+
+DROP DATABASE flow_join_fixture;
+
+Affected Rows: 0
+
diff --git a/tests/cases/standalone/common/flow/flow_batch_join_subquery.sql b/tests/cases/standalone/common/flow/flow_batch_join_subquery.sql
new file mode 100644
index 0000000000..f37aafdf4f
--- /dev/null
+++ b/tests/cases/standalone/common/flow/flow_batch_join_subquery.sql
@@ -0,0 +1,85 @@
+CREATE DATABASE flow_join_fixture;
+
+CREATE TABLE flow_join_fixture."left_samples" (
+    source_id STRING,
+    left_value DOUBLE,
+    event_ts TIMESTAMP,
+    observed_at TIMESTAMP TIME INDEX
+);
+
+CREATE TABLE flow_join_fixture."right_samples" (
+    source_id STRING,
+    right_value DOUBLE,
+    sample_kind STRING,
+    event_ts TIMESTAMP,
+    observed_at TIMESTAMP TIME INDEX
+);
+
+-- Verify batching flow creation accepts aggregate subqueries joined by LEFT JOIN.
+CREATE FLOW flow_batch_join_subquery SINK TO flow_batch_join_sink
+EVAL INTERVAL '5m' AS
+SELECT
+    l.source_id,
+    l.measure_name,
+    l.bucket_time,
+    l.left_event_ts,
+    l.left_value,
+    r.right_event_ts,
+    r.right_value
+FROM (
+    SELECT
+        source_id,
+        'sample' AS measure_name,
+        date_trunc('minute', now()) AS bucket_time,
+        max(event_ts) AS left_event_ts,
+        last_value(left_value ORDER BY observed_at) AS left_value
+    FROM
+        flow_join_fixture."left_samples"
+    WHERE
+        observed_at BETWEEN date_trunc('minute', now()) - INTERVAL '5 minutes'
+            AND date_trunc('minute', now())
+    GROUP BY
+        source_id
+) l
+LEFT JOIN (
+    SELECT
+        source_id,
+        'sample' AS measure_name,
+        date_trunc('minute', now()) AS bucket_time,
+        max(event_ts) AS right_event_ts,
+        last_value(right_value ORDER BY observed_at) AS right_value
+    FROM
+        flow_join_fixture."right_samples"
+    WHERE
+        observed_at BETWEEN date_trunc('minute', now()) - INTERVAL '5 minutes'
+            AND date_trunc('minute', now())
+        AND sample_kind = 'primary'
+    GROUP BY
+        source_id
+) r ON l.source_id = r.source_id AND l.bucket_time = r.bucket_time;
+
+SELECT
+    source_table_names LIKE '%left_samples%' AS has_left_source,
+    source_table_names LIKE '%right_samples%' AS has_right_source,
+    options LIKE '%"flow_type":"batching"%' AS is_batching_flow
+FROM
+    INFORMATION_SCHEMA.FLOWS
+WHERE
+    flow_name = 'flow_batch_join_subquery';
+
+INSERT INTO flow_join_fixture."left_samples" VALUES
+    ('source-a', 0.12, date_trunc('minute', now()), date_trunc('minute', now()));
+
+INSERT INTO flow_join_fixture."right_samples" VALUES
+    ('source-a', 100.5, 'primary', date_trunc('minute', now()), date_trunc('minute', now()));
+
+-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED  |
+ADMIN FLUSH_FLOW('flow_batch_join_subquery');
+
+SELECT source_id, measure_name, left_value, right_value FROM flow_batch_join_sink ORDER BY source_id;
+
+DROP FLOW flow_batch_join_subquery;
+DROP TABLE flow_batch_join_sink;
+DROP TABLE flow_join_fixture."left_samples";
+DROP TABLE flow_join_fixture."right_samples";
+DROP DATABASE flow_join_fixture;
diff --git a/tests/cases/standalone/common/flow/flow_last_non_null.result b/tests/cases/standalone/common/flow/flow_last_non_null.result
index 50cb46faa3..0c03c19399 100644
--- a/tests/cases/standalone/common/flow/flow_last_non_null.result
+++ b/tests/cases/standalone/common/flow/flow_last_non_null.result
@@ -162,6 +162,8 @@ CREATE TABLE approx_rate (
 
 Affected Rows: 0
 
+-- Without merge_mode=last_non_null, this partial output is rejected at CREATE FLOW time.
+-- SQLNESS REPLACE (in\scontext:\sFailed\sto\srewrite\splan:\sError\sduring\splanning:.*) in context: Failed to rewrite plan
 CREATE FLOW find_approx_rate SINK TO approx_rate AS
 SELECT
     (max(byte) - min(byte)) / 30.0 as rate,
@@ -172,24 +174,7 @@ from
 GROUP BY
     time_window;
 
-Affected Rows: 0
-
-INSERT INTO
-    bytes_log
-VALUES
-    (NULL, '2023-01-01 00:00:01'),
-    (300, '2023-01-01 00:00:31');
-
-Affected Rows: 2
-
--- should return error
-ADMIN FLUSH_FLOW('find_approx_rate');
-
-Error: 1002(Unexpected), Failed to execute admin function flush_flow: Execution error: Internal error: 1003
-
-DROP FLOW find_approx_rate;
-
-Affected Rows: 0
+Error: 3001(EngineExecuteQuery), Datafusion error: Plan("Flow output schema does not match sink table schema: found 3 flow output columns and 4 sink table columns. flow output columns: [\"rate\", \"time_window\", \"update_at\"], sink table columns: [\"rate\", \"time_window\", \"update_at\", \"bb\"], extra flow columns not in sink: [], missing sink columns from flow output: [\"bb\"]") in context: Failed to rewrite plan
 
 DROP TABLE bytes_log;
 
diff --git a/tests/cases/standalone/common/flow/flow_last_non_null.sql b/tests/cases/standalone/common/flow/flow_last_non_null.sql
index 95ebe4aaa6..29c5444f95 100644
--- a/tests/cases/standalone/common/flow/flow_last_non_null.sql
+++ b/tests/cases/standalone/common/flow/flow_last_non_null.sql
@@ -84,6 +84,8 @@ CREATE TABLE approx_rate (
     TIME INDEX(time_window)
 );
 
+-- Without merge_mode=last_non_null, this partial output is rejected at CREATE FLOW time.
+-- SQLNESS REPLACE (in\scontext:\sFailed\sto\srewrite\splan:\sError\sduring\splanning:.*) in context: Failed to rewrite plan
 CREATE FLOW find_approx_rate SINK TO approx_rate AS
 SELECT
     (max(byte) - min(byte)) / 30.0 as rate,
@@ -93,16 +95,5 @@ from
     bytes_log
 GROUP BY
     time_window;
-
-INSERT INTO
-    bytes_log
-VALUES
-    (NULL, '2023-01-01 00:00:01'),
-    (300, '2023-01-01 00:00:31');
-
--- should return error
-ADMIN FLUSH_FLOW('find_approx_rate');
-
-DROP FLOW find_approx_rate;
 DROP TABLE bytes_log;
 DROP TABLE approx_rate;
diff --git a/tests/cases/standalone/common/flow/flow_sink_schema_mismatch.result b/tests/cases/standalone/common/flow/flow_sink_schema_mismatch.result
new file mode 100644
index 0000000000..54fcba2285
--- /dev/null
+++ b/tests/cases/standalone/common/flow/flow_sink_schema_mismatch.result
@@ -0,0 +1,123 @@
+-- Verify that batching flow rejects CREATE FLOW when the pre-existing sink
+-- table schema does not match the flow output (create-time validation, not runtime).
+CREATE TABLE source_mm (
+    "number" INT,
+    extra STRING,
+    ts TIMESTAMP TIME INDEX
+);
+
+Affected Rows: 0
+
+-- Pre-create a sink table that is intentionally missing the "extra" column.
+-- This case validates batching mode at CREATE FLOW time, before any INSERT/FLUSH.
+CREATE TABLE sink_mm (
+    "number" INT,
+    time_window TIMESTAMP TIME INDEX,
+    cnt BIGINT
+);
+
+Affected Rows: 0
+
+-- This CREATE FLOW should fail immediately: the flow outputs (number, extra, time_window, cnt)
+-- but sink_mm has only (number, time_window, cnt).
+-- SQLNESS REPLACE (in\scontext:\sFailed\sto\srewrite\splan:\sError\sduring\splanning:.*) in context: Failed to rewrite plan
+CREATE FLOW mismatch_flow SINK TO sink_mm AS
+SELECT
+    "number",
+    extra,
+    date_bin(INTERVAL '1 second', ts) as time_window,
+    count(*) as cnt
+FROM
+    source_mm
+GROUP BY
+    "number", extra, time_window;
+
+Error: 3001(EngineExecuteQuery), Datafusion error: Plan("Flow output schema does not match sink table schema: found 4 flow output columns and 3 sink table columns. flow output columns: [\"number\", \"extra\", \"time_window\", \"cnt\"], sink table columns: [\"number\", \"time_window\", \"cnt\"], extra flow columns not in sink: [\"extra\"], missing sink columns from flow output: []") in context: Failed to rewrite plan
+
+DROP TABLE source_mm;
+
+Affected Rows: 0
+
+DROP TABLE sink_mm;
+
+Affected Rows: 0
+
+-- TQL/PromQL flows use the same create-time sink schema validation path.
+CREATE TABLE tql_source_mm (
+    `value` DOUBLE,
+    ts TIMESTAMP TIME INDEX,
+    sensor STRING,
+    loc STRING,
+    PRIMARY KEY (sensor, loc)
+);
+
+Affected Rows: 0
+
+-- Pre-create a TQL sink table that is intentionally missing the "sensor" tag column.
+CREATE TABLE tql_sink_mm (
+    `value` DOUBLE,
+    ts TIMESTAMP TIME INDEX
+);
+
+Affected Rows: 0
+
+-- This CREATE FLOW should fail immediately: the TQL output has (value, sensor, ts),
+-- but tql_sink_mm has only (value, ts).
+-- SQLNESS REPLACE (in\scontext:\sFailed\sto\srewrite\splan:\sError\sduring\splanning:.*) in context: Failed to rewrite plan
+CREATE FLOW tql_mismatch_flow
+SINK TO tql_sink_mm
+EVAL INTERVAL '1m' AS
+TQL EVAL (now() - '1m'::interval, now(), '1m')
+avg by(sensor) (tql_source_mm) AS value;
+
+Error: 3001(EngineExecuteQuery), Datafusion error: Plan("Flow output schema does not match sink table schema: found 3 flow output columns and 2 sink table columns. flow output columns: [\"value\", \"sensor\", \"ts\"], sink table columns: [\"value\", \"ts\"], extra flow columns not in sink: [\"sensor\"], missing sink columns from flow output: []") in context: Failed to rewrite plan
+
+DROP TABLE tql_source_mm;
+
+Affected Rows: 0
+
+DROP TABLE tql_sink_mm;
+
+Affected Rows: 0
+
+-- Real merge_mode=last_non_null sink options should enable partial schema validation.
+CREATE TABLE lnn_source_mm (
+    device STRING,
+    val DOUBLE,
+    ts TIMESTAMP TIME INDEX
+);
+
+Affected Rows: 0
+
+CREATE TABLE lnn_sink_mm (
+    device STRING,
+    time_window TIMESTAMP TIME INDEX,
+    cnt BIGINT,
+    PRIMARY KEY (device)
+) WITH('merge_mode'='last_non_null');
+
+Affected Rows: 0
+
+-- This CREATE FLOW should fail through the last_non_null partial validator: the
+-- sink primary key "device" is required but absent from the flow output.
+-- SQLNESS REPLACE (in\scontext:\sFailed\sto\srewrite\splan:\sError\sduring\splanning:.*) in context: Failed to rewrite plan
+CREATE FLOW lnn_missing_pk_flow
+SINK TO lnn_sink_mm AS
+SELECT
+    date_bin(INTERVAL '1 second', ts) as time_window,
+    count(*) as cnt
+FROM
+    lnn_source_mm
+GROUP BY
+    time_window;
+
+Error: 3001(EngineExecuteQuery), Datafusion error: Plan("Column(s) [\"device\"] required by sink table are missing from flow output when merge_mode=last_non_null. Flow output schema does not match sink table schema: found 2 flow output columns and 3 sink table columns. flow output columns: [\"time_window\", \"cnt\"], sink table columns: [\"device\", \"time_window\", \"cnt\"], extra flow columns not in sink: [], missing sink columns from flow output: [\"device\"]") in context: Failed to rewrite plan
+
+DROP TABLE lnn_source_mm;
+
+Affected Rows: 0
+
+DROP TABLE lnn_sink_mm;
+
+Affected Rows: 0
+
diff --git a/tests/cases/standalone/common/flow/flow_sink_schema_mismatch.sql b/tests/cases/standalone/common/flow/flow_sink_schema_mismatch.sql
new file mode 100644
index 0000000000..2d00799817
--- /dev/null
+++ b/tests/cases/standalone/common/flow/flow_sink_schema_mismatch.sql
@@ -0,0 +1,89 @@
+-- Verify that batching flow rejects CREATE FLOW when the pre-existing sink
+-- table schema does not match the flow output (create-time validation, not runtime).
+CREATE TABLE source_mm (
+    "number" INT,
+    extra STRING,
+    ts TIMESTAMP TIME INDEX
+);
+
+-- Pre-create a sink table that is intentionally missing the "extra" column.
+-- This case validates batching mode at CREATE FLOW time, before any INSERT/FLUSH.
+CREATE TABLE sink_mm (
+    "number" INT,
+    time_window TIMESTAMP TIME INDEX,
+    cnt BIGINT
+);
+
+-- This CREATE FLOW should fail immediately: the flow outputs (number, extra, time_window, cnt)
+-- but sink_mm has only (number, time_window, cnt).
+-- SQLNESS REPLACE (in\scontext:\sFailed\sto\srewrite\splan:\sError\sduring\splanning:.*) in context: Failed to rewrite plan
+CREATE FLOW mismatch_flow SINK TO sink_mm AS
+SELECT
+    "number",
+    extra,
+    date_bin(INTERVAL '1 second', ts) as time_window,
+    count(*) as cnt
+FROM
+    source_mm
+GROUP BY
+    "number", extra, time_window;
+
+DROP TABLE source_mm;
+DROP TABLE sink_mm;
+
+-- TQL/PromQL flows use the same create-time sink schema validation path.
+CREATE TABLE tql_source_mm (
+    `value` DOUBLE,
+    ts TIMESTAMP TIME INDEX,
+    sensor STRING,
+    loc STRING,
+    PRIMARY KEY (sensor, loc)
+);
+
+-- Pre-create a TQL sink table that is intentionally missing the "sensor" tag column.
+CREATE TABLE tql_sink_mm (
+    `value` DOUBLE,
+    ts TIMESTAMP TIME INDEX
+);
+
+-- This CREATE FLOW should fail immediately: the TQL output has (value, sensor, ts),
+-- but tql_sink_mm has only (value, ts).
+-- SQLNESS REPLACE (in\scontext:\sFailed\sto\srewrite\splan:\sError\sduring\splanning:.*) in context: Failed to rewrite plan
+CREATE FLOW tql_mismatch_flow
+SINK TO tql_sink_mm
+EVAL INTERVAL '1m' AS
+TQL EVAL (now() - '1m'::interval, now(), '1m')
+avg by(sensor) (tql_source_mm) AS value;
+
+DROP TABLE tql_source_mm;
+DROP TABLE tql_sink_mm;
+
+-- Real merge_mode=last_non_null sink options should enable partial schema validation.
+CREATE TABLE lnn_source_mm (
+    device STRING,
+    val DOUBLE,
+    ts TIMESTAMP TIME INDEX
+);
+
+CREATE TABLE lnn_sink_mm (
+    device STRING,
+    time_window TIMESTAMP TIME INDEX,
+    cnt BIGINT,
+    PRIMARY KEY (device)
+) WITH('merge_mode'='last_non_null');
+
+-- This CREATE FLOW should fail through the last_non_null partial validator: the
+-- sink primary key "device" is required but absent from the flow output.
+-- SQLNESS REPLACE (in\scontext:\sFailed\sto\srewrite\splan:\sError\sduring\splanning:.*) in context: Failed to rewrite plan
+CREATE FLOW lnn_missing_pk_flow
+SINK TO lnn_sink_mm AS
+SELECT
+    date_bin(INTERVAL '1 second', ts) as time_window,
+    count(*) as cnt
+FROM
+    lnn_source_mm
+GROUP BY
+    time_window;
+
+DROP TABLE lnn_source_mm;
+DROP TABLE lnn_sink_mm;
diff --git a/tests/cases/standalone/flow-tql/flow_tql_missing_value_sink_schema.result b/tests/cases/standalone/flow-tql/flow_tql_missing_value_sink_schema.result
new file mode 100644
index 0000000000..53df353078
--- /dev/null
+++ b/tests/cases/standalone/flow-tql/flow_tql_missing_value_sink_schema.result
@@ -0,0 +1,90 @@
+-- Regression for a TQL flow whose pre-created sink table is missing the value
+-- output column. The labels are intentionally minimal and anonymous.
+CREATE DATABASE source_schema;
+
+Affected Rows: 1
+
+CREATE DATABASE sink_schema;
+
+Affected Rows: 1
+
+USE source_schema;
+
+Affected Rows: 0
+
+CREATE TABLE metric_input (
+  namespace STRING NULL,
+  app STRING NULL,
+  greptime_timestamp TIMESTAMP(3) NOT NULL,
+  greptime_value DOUBLE NULL,
+  TIME INDEX (greptime_timestamp),
+  PRIMARY KEY (namespace, app)
+);
+
+Affected Rows: 0
+
+INSERT INTO metric_input VALUES
+  ('ns', 'app-a', '2026-01-23T03:40:00Z', 10.0),
+  ('ns', 'app-a', '2026-01-23T03:50:00Z', 20.0);
+
+Affected Rows: 2
+
+USE sink_schema;
+
+Affected Rows: 0
+
+-- Intentionally omit greptime_value DOUBLE from the pre-created sink table.
+CREATE TABLE missing_value_sink (
+  namespace STRING NULL,
+  app STRING NULL,
+  greptime_timestamp TIMESTAMP(3) NOT NULL,
+  TIME INDEX (greptime_timestamp),
+  PRIMARY KEY (namespace, app)
+)
+ENGINE=mito;
+
+Affected Rows: 0
+
+-- SQLNESS REPLACE (in\scontext:\sFailed\sto\srewrite\splan:\sError\sduring\splanning:.*) in context: Failed to rewrite plan
+CREATE FLOW missing_value_flow
+SINK TO sink_schema.missing_value_sink
+EVAL INTERVAL '3600 s'
+AS TQL EVAL (
+  date_bin('2m'::interval, now() - '2m'::interval),
+  date_bin('2m'::interval, now() - '2m'::interval),
+  '1h'
+)
+  avg by (namespace, app) (
+    avg_over_time(metric_input{__schema__="source_schema"}[1h])
+  );
+
+Error: 3001(EngineExecuteQuery), Datafusion error: Plan("Flow output schema does not match sink table schema: found 4 flow output columns and 3 sink table columns. flow output columns: [\"namespace\", \"app\", \"greptime_timestamp\", \"avg(prom_avg_over_time(greptime_timestamp_range,greptime_value))\"], sink table columns: [\"namespace\", \"app\", \"greptime_timestamp\"], extra flow columns not in sink: [\"avg(prom_avg_over_time(greptime_timestamp_range,greptime_value))\"], missing sink columns from flow output: []") in context: Failed to rewrite plan
+
+DROP FLOW IF EXISTS missing_value_flow;
+
+Affected Rows: 0
+
+DROP TABLE missing_value_sink;
+
+Affected Rows: 0
+
+USE source_schema;
+
+Affected Rows: 0
+
+DROP TABLE metric_input;
+
+Affected Rows: 0
+
+USE public;
+
+Affected Rows: 0
+
+DROP DATABASE sink_schema;
+
+Affected Rows: 0
+
+DROP DATABASE source_schema;
+
+Affected Rows: 0
+
diff --git a/tests/cases/standalone/flow-tql/flow_tql_missing_value_sink_schema.sql b/tests/cases/standalone/flow-tql/flow_tql_missing_value_sink_schema.sql
new file mode 100644
index 0000000000..3693775800
--- /dev/null
+++ b/tests/cases/standalone/flow-tql/flow_tql_missing_value_sink_schema.sql
@@ -0,0 +1,55 @@
+-- Regression for a TQL flow whose pre-created sink table is missing the value
+-- output column. The labels are intentionally minimal and anonymous.
+
+CREATE DATABASE source_schema;
+CREATE DATABASE sink_schema;
+
+USE source_schema;
+
+CREATE TABLE metric_input (
+  namespace STRING NULL,
+  app STRING NULL,
+  greptime_timestamp TIMESTAMP(3) NOT NULL,
+  greptime_value DOUBLE NULL,
+  TIME INDEX (greptime_timestamp),
+  PRIMARY KEY (namespace, app)
+);
+
+INSERT INTO metric_input VALUES
+  ('ns', 'app-a', '2026-01-23T03:40:00Z', 10.0),
+  ('ns', 'app-a', '2026-01-23T03:50:00Z', 20.0);
+
+USE sink_schema;
+
+-- Intentionally omit greptime_value DOUBLE from the pre-created sink table.
+CREATE TABLE missing_value_sink (
+  namespace STRING NULL,
+  app STRING NULL,
+  greptime_timestamp TIMESTAMP(3) NOT NULL,
+  TIME INDEX (greptime_timestamp),
+  PRIMARY KEY (namespace, app)
+)
+ENGINE=mito;
+
+-- SQLNESS REPLACE (in\scontext:\sFailed\sto\srewrite\splan:\sError\sduring\splanning:.*) in context: Failed to rewrite plan
+CREATE FLOW missing_value_flow
+SINK TO sink_schema.missing_value_sink
+EVAL INTERVAL '3600 s'
+AS TQL EVAL (
+  date_bin('2m'::interval, now() - '2m'::interval),
+  date_bin('2m'::interval, now() - '2m'::interval),
+  '1h'
+)
+  avg by (namespace, app) (
+    avg_over_time(metric_input{__schema__="source_schema"}[1h])
+  );
+
+DROP FLOW IF EXISTS missing_value_flow;
+DROP TABLE missing_value_sink;
+
+USE source_schema;
+DROP TABLE metric_input;
+
+USE public;
+DROP DATABASE sink_schema;
+DROP DATABASE source_schema;
diff --git a/tests/data/csv/skip_bad_records.csv b/tests/data/csv/skip_bad_records.csv
new file mode 100644
index 0000000000..f4c40d5d6e
--- /dev/null
+++ b/tests/data/csv/skip_bad_records.csv
@@ -0,0 +1,4 @@
+host_id,host_name,reading_value,ts
+1,Alice,10.5,2024-01-01T00:00:00
+bad,Bad,20.0,2024-01-01T00:00:01
+2,Bob,30.5,2024-01-01T00:00:02