diff --git a/.github/workflows/dev-build.yml b/.github/workflows/dev-build.yml index d03fbeff14..c3af006f54 100644 --- a/.github/workflows/dev-build.yml +++ b/.github/workflows/dev-build.yml @@ -30,7 +30,7 @@ on: linux_arm64_runner: type: choice description: The runner uses to build linux-arm64 artifacts - default: ec2-c6g.4xlarge-arm64 + default: ec2-c6g.8xlarge-arm64 options: - ec2-c6g.xlarge-arm64 # 4C8G - ec2-c6g.2xlarge-arm64 # 8C16G diff --git a/.github/workflows/nightly-build.yml b/.github/workflows/nightly-build.yml index 14ebb6e715..54af32a94b 100644 --- a/.github/workflows/nightly-build.yml +++ b/.github/workflows/nightly-build.yml @@ -27,7 +27,7 @@ on: linux_arm64_runner: type: choice description: The runner uses to build linux-arm64 artifacts - default: ec2-c6g.4xlarge-arm64 + default: ec2-c6g.8xlarge-arm64 options: - ec2-c6g.xlarge-arm64 # 4C8G - ec2-c6g.2xlarge-arm64 # 8C16G diff --git a/.github/workflows/nightly-jsonbench.yaml b/.github/workflows/nightly-jsonbench.yaml index 3667ee26a6..a9ce4dd363 100644 --- a/.github/workflows/nightly-jsonbench.yaml +++ b/.github/workflows/nightly-jsonbench.yaml @@ -1,19 +1,81 @@ name: Nightly JSONBench on: - schedule: - # Trigger at 00:00(Asia/Shanghai) on every weekday. - - cron: "0 16 * * 0-4" + workflow_run: + workflows: [ "GreptimeDB Nightly Build" ] + types: [ completed ] workflow_dispatch: + inputs: + run_id: + description: The nightly build workflow run id to download GreptimeDB artifacts from + required: true + type: string + +permissions: + actions: read + contents: read concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true jobs: + resolve-artifact: + name: Resolve GreptimeDB nightly artifact + if: ${{ github.repository == 'GreptimeTeam/greptimedb' && (github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success') }} + runs-on: ubuntu-latest + outputs: + artifact-name: ${{ steps.find-artifact.outputs.artifact-name }} + run-id: ${{ steps.resolve-run-id.outputs.run-id }} + steps: + - name: Resolve nightly build run id + id: resolve-run-id + shell: bash + env: + EVENT_NAME: ${{ github.event_name }} + WORKFLOW_RUN_ID: ${{ github.event.workflow_run.id }} + INPUT_RUN_ID: ${{ inputs.run_id }} + run: | + set -euo pipefail + + if [[ "${EVENT_NAME}" == "workflow_dispatch" ]]; then + run_id="${INPUT_RUN_ID}" + else + run_id="${WORKFLOW_RUN_ID}" + fi + + if [[ ! "${run_id}" =~ ^[0-9]+$ ]]; then + echo "Invalid workflow run id: ${run_id}" + exit 1 + fi + + echo "run-id=${run_id}" >> "${GITHUB_OUTPUT}" + + - name: Find GreptimeDB nightly artifact + id: find-artifact + shell: bash + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + RUN_ID: ${{ steps.resolve-run-id.outputs.run-id }} + run: | + set -euo pipefail + + artifact_name=$(gh api "repos/${GITHUB_REPOSITORY}/actions/runs/${RUN_ID}/artifacts" --paginate \ + --jq '.artifacts[] | select(.name | test("^greptime-linux-arm64-nightly-[0-9]{8}-[0-9a-f]+$")) | .name' \ + | head -n 1) + + if [[ -z "${artifact_name}" ]]; then + echo "Cannot find linux arm64 nightly artifact in workflow run ${RUN_ID}." + exit 1 + fi + + echo "Download GreptimeDB artifact: ${artifact_name}" + echo "artifact-name=${artifact_name}" >> "${GITHUB_OUTPUT}" + allocate-runner: name: Allocate runner - if: ${{ github.repository == 'GreptimeTeam/greptimedb' }} + if: ${{ github.repository == 'GreptimeTeam/greptimedb' && (github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success') }} + needs: [ resolve-artifact ] runs-on: ubuntu-latest outputs: linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }} @@ -43,55 +105,50 @@ jobs: jsonbench: name: Run JSONBench - if: ${{ github.repository == 'GreptimeTeam/greptimedb' }} - needs: [ allocate-runner ] + if: ${{ github.repository == 'GreptimeTeam/greptimedb' && (github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success') }} + needs: [ resolve-artifact, allocate-runner ] runs-on: ${{ needs.allocate-runner.outputs.linux-arm64-runner }} timeout-minutes: 120 env: - JSONBENCH_DATA_DIR: /home/runner/data/bluesky - JSONBENCH_OUTPUT_PREFIX: _ubuntu-latest + JSONBENCH_OUTPUT_PREFIX: _linux-arm64 steps: - - name: Checkout - uses: actions/checkout@v4 + - name: Download GreptimeDB nightly artifact + uses: actions/download-artifact@v4 with: - fetch-depth: 0 - persist-credentials: false + name: ${{ needs.resolve-artifact.outputs.artifact-name }} + path: greptimedb-artifact + github-token: ${{ secrets.GITHUB_TOKEN }} + run-id: ${{ needs.resolve-artifact.outputs.run-id }} - - uses: arduino/setup-protoc@v3 - with: - repo-token: ${{ secrets.GITHUB_TOKEN }} - - - uses: actions-rust-lang/setup-rust-toolchain@v1 - - - name: Rust Cache - uses: Swatinem/rust-cache@v2 - with: - shared-key: "nightly-jsonbench" - cache-all-crates: "true" - save-if: ${{ github.ref == 'refs/heads/main' }} - - - name: Build GreptimeDB - run: cargo build --profile nightly --bin greptime - - - name: Reclaim disk space + - name: Prepare GreptimeDB binary shell: bash run: | set -euo pipefail - mkdir -p "${RUNNER_TEMP}/greptimedb-bin" - cp ./target/nightly/greptime "${RUNNER_TEMP}/greptimedb-bin/greptime" - chmod +x "${RUNNER_TEMP}/greptimedb-bin/greptime" - - rm -rf ./target + tar -xzf "greptimedb-artifact/${{ needs.resolve-artifact.outputs.artifact-name }}.tar.gz" + cp "${{ needs.resolve-artifact.outputs.artifact-name }}/greptime" ./greptime + chmod +x ./greptime + rm -rf greptimedb-artifact "${{ needs.resolve-artifact.outputs.artifact-name }}" - name: Run JSONBench + env: + # TODO(LFC): Change to "3" (100m) when JSON2 ingestion performance is optimized. + JSONBENCH_DATASET: 2 shell: bash run: | set -euo pipefail - cd "${RUNNER_TEMP}" - cp "${RUNNER_TEMP}/greptimedb-bin/greptime" ./greptime - chmod +x ./greptime + export JSONBENCH_DATA_DIR="/root/data/bluesky" + echo "Use JSONBench data directory ${JSONBENCH_DATA_DIR}" + + echo "Cloning JSONBench" + git clone --branch greptimedb-new-json --depth 1 https://github.com/GreptimeTeam/JSONBench.git JSONBench + + echo "Downloading JSONBench dataset choice ${JSONBENCH_DATASET} to ${JSONBENCH_DATA_DIR}" + mkdir -p "${JSONBENCH_DATA_DIR}" + printf "${JSONBENCH_DATASET}\n" | ./JSONBench/download_data.sh + downloaded_files=$(find "${JSONBENCH_DATA_DIR}" -type f | wc -l) + echo "Downloaded JSONBench dataset files: ${downloaded_files}" export GREPTIMEDB_STANDALONE__WAL__DIR=greptimedb_data/wal export GREPTIMEDB_STANDALONE__STORAGE__DATA_HOME=greptimedb_data @@ -100,10 +157,12 @@ jobs: export GREPTIMEDB_STANDALONE__HTTP__BODY_LIMIT=1GB export GREPTIMEDB_STANDALONE__HTTP__TIMEOUT=500s + echo "Starting GreptimeDB standalone" ./greptime standalone start > greptimedb.log 2>&1 & greptime_pid=$! trap 'kill "${greptime_pid}" 2>/dev/null || true' EXIT + echo "Waiting for GreptimeDB health check" until curl -s --fail -o /dev/null http://localhost:4000/health; do if ! kill -0 "${greptime_pid}" 2>/dev/null; then cat greptimedb.log @@ -111,12 +170,14 @@ jobs: fi sleep 1 done + echo "GreptimeDB is ready" - git clone --branch greptimedb-new-json --depth 1 https://github.com/GreptimeTeam/JSONBench.git JSONBench cp ./greptime JSONBench/greptimedb/greptime cd JSONBench/greptimedb - ./main.sh 3 "${JSONBENCH_DATA_DIR}" success.log error.log "${JSONBENCH_OUTPUT_PREFIX}" false + echo "Running JSONBench main.sh with dataset choice ${JSONBENCH_DATASET} and install=false" + ./main.sh ${JSONBENCH_DATASET} "${JSONBENCH_DATA_DIR}" success.log error.log "${JSONBENCH_OUTPUT_PREFIX}" false + echo "JSONBench finished" - name: Upload JSONBench results if: always() @@ -124,21 +185,21 @@ jobs: with: name: jsonbench-results path: | - ${{ runner.temp }}/greptimedb.log - ${{ runner.temp }}/JSONBench/greptimedb/*.log - ${{ runner.temp }}/JSONBench/greptimedb/*.total_size - ${{ runner.temp }}/JSONBench/greptimedb/*.data_size - ${{ runner.temp }}/JSONBench/greptimedb/*.index_size - ${{ runner.temp }}/JSONBench/greptimedb/*.count - ${{ runner.temp }}/JSONBench/greptimedb/*.results_runtime - ${{ runner.temp }}/JSONBench/greptimedb/*.query_results + ./greptimedb.log + ./JSONBench/greptimedb/*.log + ./JSONBench/greptimedb/*.total_size + ./JSONBench/greptimedb/*.data_size + ./JSONBench/greptimedb/*.index_size + ./JSONBench/greptimedb/*.count + ./JSONBench/greptimedb/*.results_runtime + ./JSONBench/greptimedb/*.query_results if-no-files-found: ignore retention-days: 7 stop-linux-arm64-runner: name: Stop Linux ARM64 runner # It's always run as the last job in the workflow to make sure that the runner is released. - if: ${{ always() }} + if: ${{ always() && needs.allocate-runner.outputs.linux-arm64-ec2-runner-instance-id != '' }} runs-on: ubuntu-latest needs: [ allocate-runner, diff --git a/Cargo.lock b/Cargo.lock index a65159d26a..2485a5ceec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -79,8 +79,9 @@ checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" dependencies = [ "cfg-if", "const-random", - "getrandom 0.3.3", + "getrandom 0.3.4", "once_cell", + "serde", "version_check", "zerocopy", ] @@ -771,7 +772,7 @@ version = "4.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ef1e3e699d84ab1b0911a1010c5c106aa34ae89aeac103be5ce0c3859db1e891" dependencies = [ - "term", + "term 1.0.2", ] [[package]] @@ -1427,6 +1428,12 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "borrow-or-share" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc0b364ead1874514c8c2855ab558056ebfeb775653e7ae45ff72f28f8f3166c" + [[package]] name = "borsh" version = "1.5.7" @@ -1525,6 +1532,12 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "bytecount" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e" + [[package]] name = "bytemuck" version = "1.23.1" @@ -1635,7 +1648,7 @@ dependencies = [ "paste", "prometheus 0.14.0", "promql-parser", - "rand 0.9.1", + "rand 0.9.4", "serde", "serde_json", "session", @@ -1973,7 +1986,7 @@ dependencies = [ "partition", "paste", "query", - "rand 0.9.1", + "rand 0.9.4", "reqwest 0.13.2", "serde", "serde_json", @@ -2020,7 +2033,7 @@ dependencies = [ "prometheus 0.14.0", "prost 0.14.1", "query", - "rand 0.9.1", + "rand 0.9.4", "serde_json", "snafu 0.8.6", "store-api", @@ -2031,6 +2044,15 @@ dependencies = [ "tracing", ] +[[package]] +name = "clipboard-win" +version = "5.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bde03770d3df201d4fb868f2c9c59e66a3e4e2bd06692a0fe701e7103c7e84d4" +dependencies = [ + "error-code", +] + [[package]] name = "clocksource" version = "0.8.1" @@ -2123,7 +2145,7 @@ dependencies = [ "prometheus 0.14.0", "prost 0.14.1", "query", - "rand 0.9.1", + "rand 0.9.4", "regex", "reqwest 0.13.2", "serde", @@ -2462,7 +2484,7 @@ dependencies = [ "hyper-util", "lazy_static", "prost 0.14.1", - "rand 0.9.1", + "rand 0.9.4", "serde", "serde_json", "snafu 0.8.6", @@ -2581,7 +2603,7 @@ dependencies = [ "prometheus 0.14.0", "prost 0.14.1", "prost-types 0.14.1", - "rand 0.9.1", + "rand 0.9.4", "regex", "rskafka", "rustls", @@ -2649,7 +2671,7 @@ dependencies = [ "futures-util", "humantime-serde", "object-store", - "rand 0.9.1", + "rand 0.9.4", "serde", "serde_json", "smallvec", @@ -2833,7 +2855,7 @@ dependencies = [ "common-query", "common-recordbatch", "once_cell", - "rand 0.9.1", + "rand 0.9.4", "tempfile", ] @@ -2849,7 +2871,7 @@ dependencies = [ "humantime", "humantime-serde", "once_cell", - "rand 0.9.1", + "rand 0.9.4", "serde", "serde_json", "snafu 0.8.6", @@ -3594,6 +3616,12 @@ dependencies = [ "parking_lot_core 0.9.11", ] +[[package]] +name = "data-encoding" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4ae5f15dda3c708c0ade84bfee31ccab44a3da4f88015ed22f63732abe300c8" + [[package]] name = "datafusion" version = "53.1.0" @@ -3638,7 +3666,7 @@ dependencies = [ "object_store", "parking_lot 0.12.4", "parquet", - "rand 0.9.1", + "rand 0.9.4", "regex", "sqlparser", "tempfile", @@ -3755,7 +3783,7 @@ dependencies = [ "liblzma", "log", "object_store", - "rand 0.9.1", + "rand 0.9.4", "tokio", "tokio-util", "url", @@ -3881,7 +3909,7 @@ dependencies = [ "log", "object_store", "parking_lot 0.12.4", - "rand 0.9.1", + "rand 0.9.4", "tempfile", "url", ] @@ -3944,7 +3972,7 @@ dependencies = [ "md-5 0.10.6", "memchr", "num-traits", - "rand 0.9.1", + "rand 0.9.4", "regex", "sha2 0.10.9", "unicode-segmentation", @@ -4234,7 +4262,7 @@ dependencies = [ "datafusion-proto-common", "object_store", "prost 0.14.1", - "rand 0.9.1", + "rand 0.9.4", ] [[package]] @@ -4685,6 +4713,27 @@ dependencies = [ "crypto-common 0.2.1", ] +[[package]] +name = "dirs-next" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" +dependencies = [ + "cfg-if", + "dirs-sys-next", +] + +[[package]] +name = "dirs-sys-next" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" +dependencies = [ + "libc", + "redox_users", + "winapi", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -4713,14 +4762,14 @@ dependencies = [ [[package]] name = "dns-lookup" -version = "2.0.4" +version = "3.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5766087c2235fec47fafa4cfecc81e494ee679d0fd4a59887ea0919bfb0e4fc" +checksum = "6e39034cee21a2f5bbb66ba0e3689819c4bb5d00382a282006e802a7ffa6c41d" dependencies = [ "cfg-if", "libc", - "socket2 0.5.10", - "windows-sys 0.48.0", + "socket2 0.6.0", + "windows-sys 0.60.2", ] [[package]] @@ -4740,31 +4789,30 @@ dependencies = [ [[package]] name = "domain" -version = "0.11.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a11dd7f04a6a6d2aea0153c6e31f5ea7af8b2efdf52cdaeea7a9a592c7fefef9" +checksum = "8c469892dddfeff64ecfdbc64cf059c77fb0decaeccd4d5d484394bdd6312bac" dependencies = [ "bumpalo", "bytes", "domain-macros", "futures-util", - "hashbrown 0.14.5", + "hashbrown 0.17.1", + "jiff", "log", - "moka", "octseq", - "rand 0.8.5", + "rand 0.10.1", "serde", "smallvec", - "time", "tokio", "tracing", ] [[package]] name = "domain-macros" -version = "0.11.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e197fdfd2cdb5fdeb7f8ddcf3aed5d5d04ecde2890d448b14ffb716f7376b70" +checksum = "6fef7ef74e413e36d5364db163ca577ccb56f2f74377705d5f920ee3e1544127" dependencies = [ "proc-macro2", "quote", @@ -4849,6 +4897,15 @@ dependencies = [ "serde", ] +[[package]] +name = "email_address" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e079f19b08ca6239f47f8ba8509c11cf3ea30095831f7fed61441475edd8c449" +dependencies = [ + "serde", +] + [[package]] name = "ena" version = "0.14.3" @@ -4962,6 +5019,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "error-code" +version = "3.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dea2df4cf52843e0452895c455a1a2cfbb842a1e7329671acf418fdc53ed4c59" + [[package]] name = "etcd-client" version = "0.17.0" @@ -5018,6 +5081,12 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "exitcode" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de853764b47027c2e862a995c34978ffa63c1501f2e15f987ba11bd4f9bba193" + [[package]] name = "fail" version = "0.5.1" @@ -5043,9 +5112,9 @@ checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" [[package]] name = "fancy-regex" -version = "0.14.0" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298" +checksum = "72cf461f865c862bb7dc573f643dd6a2b6842f7c30b07882b56bd148cc2761b8" dependencies = [ "bit-set", "regex-automata", @@ -5274,7 +5343,7 @@ dependencies = [ "prometheus 0.14.0", "prost 0.14.1", "query", - "rand 0.9.1", + "rand 0.9.4", "serde", "serde_json", "servers", @@ -5299,6 +5368,17 @@ dependencies = [ "bitflags 1.3.2", ] +[[package]] +name = "fluent-uri" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc74ac4d8359ae70623506d512209619e5cf8f347124910440dbc221714b328e" +dependencies = [ + "borrow-or-share", + "ref-cast", + "serde", +] + [[package]] name = "flume" version = "0.11.1" @@ -5337,6 +5417,16 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fraction" +version = "0.15.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e076045bb43dac435333ed5f04caf35c7463631d0dae2deb2638d94dd0a5b872" +dependencies = [ + "lazy_static", + "num", +] + [[package]] name = "fragile" version = "2.0.1" @@ -5405,7 +5495,7 @@ dependencies = [ "promql-parser", "prost 0.14.1", "query", - "rand 0.9.1", + "rand 0.9.4", "reqwest 0.13.2", "serde", "serde_json", @@ -5756,21 +5846,21 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "wasi 0.11.1+wasi-snapshot-preview1", + "wasi", "wasm-bindgen", ] [[package]] name = "getrandom" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", "js-sys", "libc", "r-efi", - "wasi 0.14.2+wasi-0.2.4", + "wasip2", "wasm-bindgen", ] @@ -5842,9 +5932,9 @@ dependencies = [ [[package]] name = "grok" -version = "2.1.0" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c52724b609896f661a3f4641dd3a44dc602958ef615857c12d00756b4e9355b" +checksum = "6ddab6a9c8bb998cb2fc3101fde8ef561b7c4970db3957be7a8eee1e168f666b" dependencies = [ "glob", "onig", @@ -5984,6 +6074,9 @@ name = "hashbrown" version = "0.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" +dependencies = [ + "allocator-api2", +] [[package]] name = "hashlink" @@ -6650,7 +6743,7 @@ dependencies = [ "pin-project", "prost 0.14.1", "puffin", - "rand 0.9.1", + "rand 0.9.4", "rand_chacha 0.9.0", "regex", "regex-automata", @@ -6841,6 +6934,15 @@ dependencies = [ "derive_utils", ] +[[package]] +name = "ipcrypt-rs" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96e4f67dbfc0f75d7b65953ecf0be3fd84ee0cb1ae72a00a4aa9a2f5518a2c80" +dependencies = [ + "aes", +] + [[package]] name = "ipnet" version = "2.11.0" @@ -7016,6 +7118,36 @@ dependencies = [ "windows-sys 0.45.0", ] +[[package]] +name = "jni" +version = "0.22.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efd9a482cf3a427f00d6b35f14332adc7902ce91efb778580e180ff90fa3498" +dependencies = [ + "cfg-if", + "combine", + "jni-macros", + "jni-sys 0.4.1", + "log", + "simd_cesu8", + "thiserror 2.0.17", + "walkdir", + "windows-link 0.2.1", +] + +[[package]] +name = "jni-macros" +version = "0.22.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a00109accc170f0bdb141fed3e393c565b6f5e072365c3bd58f5b062591560a3" +dependencies = [ + "proc-macro2", + "quote", + "rustc_version", + "simd_cesu8", + "syn 2.0.117", +] + [[package]] name = "jni-sys" version = "0.3.1" @@ -7050,7 +7182,7 @@ version = "0.1.33" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a" dependencies = [ - "getrandom 0.3.3", + "getrandom 0.3.4", "libc", ] @@ -7139,11 +7271,38 @@ version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c6e529149475ca0b2820835d3dce8fcc41c6b943ca608d32f35b449255e4627" dependencies = [ - "fluent-uri", + "fluent-uri 0.1.4", "serde", "serde_json", ] +[[package]] +name = "jsonschema" +version = "0.38.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89f50532ce4a0ba3ae930212908d8ec50e7806065c059fe9c75da2ece6132294" +dependencies = [ + "ahash 0.8.12", + "bytecount", + "data-encoding", + "email_address", + "fancy-regex", + "fraction", + "getrandom 0.3.4", + "idna", + "itoa", + "num-cmp", + "num-traits", + "percent-encoding", + "referencing", + "regex", + "regex-syntax", + "serde", + "serde_json", + "unicode-general-category", + "uuid-simd", +] + [[package]] name = "jsonwebtoken" version = "10.3.0" @@ -7337,7 +7496,7 @@ dependencies = [ "regex-syntax", "sha3", "string_cache", - "term", + "term 1.0.2", "unicode-xid", "walkdir", ] @@ -7736,7 +7895,7 @@ dependencies = [ "protobuf 2.28.0", "protobuf-build", "raft-engine", - "rand 0.9.1", + "rand 0.9.4", "rskafka", "serde", "serde_json", @@ -8043,7 +8202,7 @@ dependencies = [ "futures-util", "humantime-serde", "meta-srv", - "rand 0.9.1", + "rand 0.9.4", "serde", "serde_json", "session", @@ -8111,7 +8270,7 @@ dependencies = [ "partition", "prometheus 0.14.0", "prost 0.14.1", - "rand 0.9.1", + "rand 0.9.4", "regex", "rskafka", "serde", @@ -8253,7 +8412,7 @@ checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" dependencies = [ "libc", "log", - "wasi 0.11.1+wasi-snapshot-preview1", + "wasi", "windows-sys 0.59.0", ] @@ -8343,7 +8502,7 @@ dependencies = [ "prometheus 0.14.0", "prost 0.14.1", "puffin", - "rand 0.9.1", + "rand 0.9.4", "rayon", "regex", "roaring", @@ -8656,6 +8815,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "ndk-context" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27b02d87554356db9e9a873add8782d4ea6e3e58ea071a9adb9a2e8ddb884a8b" + [[package]] name = "neli" version = "0.6.5" @@ -8758,6 +8923,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "nom-language" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2de2bc5b451bfedaef92c90b8939a8fff5770bdcc1fafd6239d086aab8fa6b29" +dependencies = [ + "nom 8.0.0", +] + [[package]] name = "notify" version = "8.0.0" @@ -8841,6 +9015,12 @@ dependencies = [ "zeroize", ] +[[package]] +name = "num-cmp" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63335b2e2c34fae2fb0aa2cecfd9f0832a1e24b3b32ecec612c3426d46dc8aaa" + [[package]] name = "num-complex" version = "0.4.6" @@ -9001,6 +9181,31 @@ dependencies = [ "libc", ] +[[package]] +name = "objc2" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a12a8ed07aefc768292f076dc3ac8c48f3781c8f2d5851dd3d98950e8c5a89f" +dependencies = [ + "objc2-encode", +] + +[[package]] +name = "objc2-encode" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef25abbcd74fb2609453eb695bd2f860d389e457f67dc17cafc8b8cbc89d0c33" + +[[package]] +name = "objc2-foundation" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3e0adef53c21f888deb4fa59fc59f7eb17404926ee8a6f59f5df0fd7f9f3272" +dependencies = [ + "bitflags 2.11.1", + "objc2", +] + [[package]] name = "object" version = "0.36.7" @@ -9040,7 +9245,7 @@ dependencies = [ "object_store_opendal", "opendal", "prometheus 0.14.0", - "rand 0.9.1", + "rand 0.9.4", "reqwest 0.13.2", "serde", "snafu 0.8.6", @@ -9094,9 +9299,9 @@ dependencies = [ [[package]] name = "octseq" -version = "0.5.2" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "126c3ca37c9c44cec575247f43a3e4374d8927684f129d2beeb0d2cef262fe12" +checksum = "182eab3e1cd9cdc0ecf1ce3342d9844f3dc7d098f0694569bfdf327b612d69fd" dependencies = [ "bytes", "serde", @@ -9552,7 +9757,7 @@ dependencies = [ "futures-util", "opentelemetry 0.30.0", "percent-encoding", - "rand 0.9.1", + "rand 0.9.4", "serde_json", "thiserror 2.0.17", "tokio", @@ -9570,7 +9775,7 @@ dependencies = [ "futures-util", "opentelemetry 0.31.0", "percent-encoding", - "rand 0.9.1", + "rand 0.9.4", "thiserror 2.0.17", ] @@ -9748,7 +9953,7 @@ dependencies = [ "paste", "prost 0.14.1", "prost-build 0.14.1", - "rand 0.9.1", + "rand 0.9.4", "replace_with", "serde", "smallvec", @@ -10154,7 +10359,7 @@ dependencies = [ "md5", "pg_interval_2", "postgres-types", - "rand 0.10.0", + "rand 0.10.1", "rust_decimal", "rustls-pki-types", "ryu", @@ -10531,7 +10736,7 @@ dependencies = [ "hmac", "md-5 0.10.6", "memchr", - "rand 0.9.1", + "rand 0.9.4", "sha2 0.10.9", "stringprep", ] @@ -10680,6 +10885,19 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "prettytable-rs" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eea25e07510aa6ab6547308ebe3c036016d162b8da920dbb079e3ba8acf3d95a" +dependencies = [ + "encode_unicode", + "is-terminal", + "lazy_static", + "term 0.7.0", + "unicode-width 0.1.14", +] + [[package]] name = "proc-macro-crate" version = "1.3.1" @@ -10845,7 +11063,7 @@ checksum = "bee689443a2bd0a16ab0348b52ee43e3b2d1b1f931c8aa5c9f8de4c86fbe8c40" dependencies = [ "bitflags 2.11.1", "num-traits", - "rand 0.9.1", + "rand 0.9.4", "rand_chacha 0.9.0", "rand_xorshift", "regex-syntax", @@ -11272,7 +11490,7 @@ dependencies = [ "promql", "promql-parser", "prost 0.14.1", - "rand 0.9.1", + "rand 0.9.4", "regex", "serde", "serde_json", @@ -11345,9 +11563,9 @@ checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" dependencies = [ "aws-lc-rs", "bytes", - "getrandom 0.3.3", + "getrandom 0.3.4", "lru-slab", - "rand 0.9.1", + "rand 0.9.4", "ring", "rustc-hash 2.1.1", "rustls", @@ -11453,9 +11671,9 @@ dependencies = [ [[package]] name = "rand" -version = "0.9.1" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97" +checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea" dependencies = [ "rand_chacha 0.9.0", "rand_core 0.9.3", @@ -11463,9 +11681,9 @@ dependencies = [ [[package]] name = "rand" -version = "0.10.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc266eb313df6c5c09c1c7b1fbe2510961e5bcd3add930c1e31f7ed9da0feff8" +checksum = "d2e8e8bcc7961af1fdac401278c6a831614941f6164ee3bf4ce61b7edb162207" dependencies = [ "chacha20 0.10.0", "getrandom 0.4.1", @@ -11508,7 +11726,7 @@ version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" dependencies = [ - "getrandom 0.3.3", + "getrandom 0.3.4", ] [[package]] @@ -11601,6 +11819,17 @@ dependencies = [ "bitflags 2.11.1", ] +[[package]] +name = "redox_users" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" +dependencies = [ + "getrandom 0.2.16", + "libredox", + "thiserror 1.0.69", +] + [[package]] name = "ref-cast" version = "1.0.24" @@ -11621,6 +11850,21 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "referencing" +version = "0.38.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15a8af0c6bb8eaf8b07cb06fc31ff30ca6fe19fb99afa476c276d8b24f365b0b" +dependencies = [ + "ahash 0.8.12", + "fluent-uri 0.4.1", + "getrandom 0.3.4", + "hashbrown 0.16.1", + "parking_lot 0.12.4", + "percent-encoding", + "serde_json", +] + [[package]] name = "regex" version = "1.12.2" @@ -11686,6 +11930,15 @@ version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" +[[package]] +name = "relative-path" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bca40a312222d8ba74837cb474edef44b37f561da5f773981007a10bbaa992b0" +dependencies = [ + "serde", +] + [[package]] name = "rend" version = "0.4.2" @@ -11825,6 +12078,7 @@ dependencies = [ "futures-channel", "futures-core", "futures-util", + "h2 0.4.11", "http 1.3.1", "http-body 1.0.1", "http-body-util", @@ -11837,6 +12091,7 @@ dependencies = [ "pin-project-lite", "quinn", "rustls", + "rustls-native-certs 0.8.1", "rustls-pki-types", "serde", "serde_json", @@ -11898,6 +12153,50 @@ dependencies = [ "web-sys", ] +[[package]] +name = "reqwest-middleware" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57f17d28a6e6acfe1733fe24bcd30774d13bffa4b8a22535b4c8c98423088d4e" +dependencies = [ + "anyhow", + "async-trait", + "http 1.3.1", + "reqwest 0.12.28", + "serde", + "thiserror 1.0.69", + "tower-service", +] + +[[package]] +name = "reqwest-retry" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "105747e3a037fe5bf17458d794de91149e575b6183fc72c85623a44abb9683f5" +dependencies = [ + "anyhow", + "async-trait", + "futures", + "getrandom 0.2.16", + "http 1.3.1", + "hyper 1.6.0", + "reqwest 0.12.28", + "reqwest-middleware", + "retry-policies", + "thiserror 2.0.17", + "tokio", + "wasmtimer", +] + +[[package]] +name = "retry-policies" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc05fbf560421a0357a750cbe78c7ca19d4923918490daabba313d5dbc871e47" +dependencies = [ + "rand 0.10.1", +] + [[package]] name = "rgb" version = "0.8.50" @@ -11985,9 +12284,12 @@ dependencies = [ [[package]] name = "roxmltree" -version = "0.20.0" +version = "0.21.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c20b6793b5c2fa6553b250154b78d6d0db37e72700ae35fad9387a46f487c97" +checksum = "f1964b10c76125c36f8afe190065a4bf9a87bf324842c05701330bba9f1cacbb" +dependencies = [ + "memchr", +] [[package]] name = "rsa" @@ -12042,7 +12344,7 @@ dependencies = [ "integer-encoding 4.0.2", "lz4", "parking_lot 0.12.4", - "rand 0.9.1", + "rand 0.9.4", "rsasl", "rustls", "snap", @@ -12088,7 +12390,7 @@ dependencies = [ "proc-macro2", "quote", "regex", - "relative-path", + "relative-path 1.9.3", "rustc_version", "syn 2.0.117", "unicode-ident", @@ -12309,7 +12611,7 @@ checksum = "1d99feebc72bae7ab76ba994bb5e121b8d83d910ca40b36e0921f53becc41784" dependencies = [ "core-foundation 0.10.1", "core-foundation-sys", - "jni", + "jni 0.21.1", "log", "once_cell", "rustls", @@ -12346,6 +12648,25 @@ version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" +[[package]] +name = "rustyline" +version = "17.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e902948a25149d50edc1a8e0141aad50f54e22ba83ff988cf8f7c9ef07f50564" +dependencies = [ + "bitflags 2.11.1", + "cfg-if", + "clipboard-win", + "libc", + "log", + "memchr", + "nix 0.30.1", + "unicode-segmentation", + "unicode-width 0.2.1", + "utf8parse", + "windows-sys 0.60.2", +] + [[package]] name = "ryu" version = "1.0.20" @@ -12724,6 +13045,19 @@ dependencies = [ "unsafe-libyaml", ] +[[package]] +name = "serde_yaml_ng" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b4db627b98b36d4203a7b458cf3573730f2bb591b28871d916dfa9efabfd41f" +dependencies = [ + "indexmap 2.13.0", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + [[package]] name = "servers" version = "1.1.0" @@ -12820,7 +13154,7 @@ dependencies = [ "prost 0.14.1", "query", "quoted-string", - "rand 0.9.1", + "rand 0.9.4", "regex", "reqwest 0.13.2", "rust-embed", @@ -12998,7 +13332,7 @@ version = "0.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c962f626b54771990066e5435ec8331d1462576cd2d1e62f24076ae014f92112" dependencies = [ - "getrandom 0.3.3", + "getrandom 0.3.4", "halfbrown", "ref-cast", "serde", @@ -13007,6 +13341,16 @@ dependencies = [ "value-trait", ] +[[package]] +name = "simd_cesu8" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94f90157bb87cddf702797c5dadfa0be7d266cdf49e22da2fcaa32eff75b2c33" +dependencies = [ + "rustc_version", + "simdutf8", +] + [[package]] name = "simdutf8" version = "0.1.5" @@ -13898,12 +14242,12 @@ dependencies = [ [[package]] name = "syslog_loose" -version = "0.21.0" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "161028c00842709450114c39db3b29f44c898055ed8833bb9b535aba7facf30e" +checksum = "d6ec4df26907adce53e94eac201a9ba38744baea3bc97f34ffd591d5646231a6" dependencies = [ "chrono", - "nom 7.1.3", + "nom 8.0.0", ] [[package]] @@ -14147,12 +14491,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" dependencies = [ "fastrand", - "getrandom 0.3.3", + "getrandom 0.3.4", "once_cell", "rustix 1.0.7", "windows-sys 0.61.2", ] +[[package]] +name = "term" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f" +dependencies = [ + "dirs-next", + "rustversion", + "winapi", +] + [[package]] name = "term" version = "1.0.2" @@ -14205,7 +14560,7 @@ dependencies = [ "nix 0.28.0", "partition", "paste", - "rand 0.9.1", + "rand 0.9.4", "rand_chacha 0.9.0", "reqwest 0.13.2", "rustls", @@ -14292,7 +14647,7 @@ dependencies = [ "plugins", "prost 0.14.1", "query", - "rand 0.9.1", + "rand 0.9.4", "rstest", "rstest_reuse", "sea-query", @@ -14582,7 +14937,7 @@ dependencies = [ "pin-project-lite", "postgres-protocol", "postgres-types", - "rand 0.9.1", + "rand 0.9.4", "socket2 0.5.10", "tokio", "tokio-util", @@ -15233,6 +15588,12 @@ version = "0.3.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" +[[package]] +name = "unicode-general-category" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b993bddc193ae5bd0d623b49ec06ac3e9312875fdae725a975c51db1cc1677f" + [[package]] name = "unicode-ident" version = "1.0.22" @@ -15366,11 +15727,21 @@ checksum = "b672338555252d43fd2240c714dc444b8c6fb0a5c5335e65a07bba7742735ddb" dependencies = [ "getrandom 0.4.1", "js-sys", - "rand 0.9.1", + "rand 0.9.4", "serde_core", "wasm-bindgen", ] +[[package]] +name = "uuid-simd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b082222b4f6619906941c17eb2297fff4c2fb96cb60164170522942a200bd8" +dependencies = [ + "outref", + "vsimd", +] + [[package]] name = "valuable" version = "0.1.1" @@ -15453,9 +15824,9 @@ dependencies = [ [[package]] name = "vrl" -version = "0.25.0" +version = "0.33.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f49394b948406ea1564aa00152e011d87a38ad35d277ebddda257a9ee39c419" +checksum = "925a4d3321b18a200c82c3ec02ee2be2b4bf16db07a5ce7e2a9a888b795ea862" dependencies = [ "aes", "aes-siv", @@ -15485,8 +15856,10 @@ dependencies = [ "domain", "dyn-clone", "encoding_rs", + "exitcode", "fancy-regex", "flate2", + "getrandom 0.3.4", "grok", "hex", "hmac", @@ -15496,12 +15869,15 @@ dependencies = [ "indexmap 2.13.0", "indoc", "influxdb-line-protocol", + "ipcrypt-rs", "itertools 0.14.0", + "jsonschema", "lalrpop", "lalrpop-util", "lz4_flex 0.11.6", "md-5 0.10.6", - "nom 7.1.3", + "nom 8.0.0", + "nom-language", "ofb", "onig", "ordered-float 4.6.0", @@ -15510,20 +15886,27 @@ dependencies = [ "percent-encoding", "pest", "pest_derive", + "prettytable-rs", "prost 0.13.5", "prost-reflect", "psl", "psl-types", "publicsuffix", "quoted_printable", - "rand 0.8.5", + "rand 0.9.4", "regex", + "relative-path 2.0.1", + "reqwest 0.12.28", + "reqwest-middleware", + "reqwest-retry", "roxmltree", "rust_decimal", + "rustyline", "seahash", "serde", "serde_json", "serde_yaml", + "serde_yaml_ng", "sha-1", "sha2 0.10.9", "sha3", @@ -15531,6 +15914,8 @@ dependencies = [ "snafu 0.8.6", "snap", "strip-ansi-escapes", + "strum 0.26.3", + "strum_macros 0.26.4", "syslog_loose", "termcolor", "thiserror 2.0.17", @@ -15541,7 +15926,9 @@ dependencies = [ "url", "utf8-width", "uuid", + "webbrowser", "woothee", + "xxhash-rust", "zstd", ] @@ -15585,15 +15972,6 @@ version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" -[[package]] -name = "wasi" -version = "0.14.2+wasi-0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" -dependencies = [ - "wit-bindgen-rt", -] - [[package]] name = "wasip2" version = "1.0.2+wasi-0.2.9" @@ -15734,6 +16112,20 @@ dependencies = [ "semver", ] +[[package]] +name = "wasmtimer" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c598d6b99ea013e35844697fc4670d08339d5cda15588f193c6beedd12f644b" +dependencies = [ + "futures", + "js-sys", + "parking_lot 0.12.4", + "pin-utils", + "slab", + "wasm-bindgen", +] + [[package]] name = "web-sys" version = "0.3.95" @@ -15754,6 +16146,22 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "webbrowser" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fc95580916af1e68ff6a7be07446fc5db73ebf71cf092de939bbf5f7e189f72" +dependencies = [ + "core-foundation 0.10.1", + "jni 0.22.4", + "log", + "ndk-context", + "objc2", + "objc2-foundation", + "url", + "web-sys", +] + [[package]] name = "webpki" version = "0.22.4" @@ -16040,6 +16448,15 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.5", +] + [[package]] name = "windows-sys" version = "0.61.2" @@ -16088,13 +16505,30 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm", + "windows_i686_gnullvm 0.52.6", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", "windows_x86_64_msvc 0.52.6", ] +[[package]] +name = "windows-targets" +version = "0.53.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link 0.2.1", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", +] + [[package]] name = "windows-threading" version = "0.1.0" @@ -16122,6 +16556,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" + [[package]] name = "windows_aarch64_msvc" version = "0.42.2" @@ -16140,6 +16580,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" + [[package]] name = "windows_i686_gnu" version = "0.42.2" @@ -16158,12 +16604,24 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" + [[package]] name = "windows_i686_msvc" version = "0.42.2" @@ -16182,6 +16640,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_i686_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" + [[package]] name = "windows_x86_64_gnu" version = "0.42.2" @@ -16200,6 +16664,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + [[package]] name = "windows_x86_64_gnullvm" version = "0.42.2" @@ -16218,6 +16688,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" + [[package]] name = "windows_x86_64_msvc" version = "0.42.2" @@ -16236,6 +16712,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" + [[package]] name = "winnow" version = "0.5.40" @@ -16283,15 +16765,6 @@ dependencies = [ "wit-parser", ] -[[package]] -name = "wit-bindgen-rt" -version = "0.39.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" -dependencies = [ - "bitflags 2.11.1", -] - [[package]] name = "wit-bindgen-rust" version = "0.51.0" @@ -16445,6 +16918,12 @@ dependencies = [ "rustix 1.0.7", ] +[[package]] +name = "xxhash-rust" +version = "0.8.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" + [[package]] name = "yaml-rust" version = "0.4.5" diff --git a/Cargo.toml b/Cargo.toml index 56200a24d6..ee8d4dcf11 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -259,7 +259,7 @@ tracing-opentelemetry = "0.31.0" tracing-subscriber = { version = "0.3", features = ["env-filter", "json", "fmt"] } typetag = "0.2" uuid = { version = "1.17", features = ["serde", "v4", "v7", "fast-rng"] } -vrl = "0.25" +vrl = "0.33" zstd = "0.13" # DO_NOT_REMOVE_THIS: END_OF_EXTERNAL_DEPENDENCIES diff --git a/config/config.md b/config/config.md index d9cffaf122..df06d2153c 100644 --- a/config/config.md +++ b/config/config.md @@ -451,6 +451,7 @@ | `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.
By default, it provides services after all regions have been initialized. | | `init_regions_parallelism` | Integer | `16` | Parallelism of initializing regions. | | `max_concurrent_queries` | Integer | `0` | The maximum concurrent queries allowed to be executed. Zero means unlimited. | +| `concurrent_query_limiter_timeout` | String | `100ms` | Timeout to acquire a permit from the concurrent query limiter when `max_concurrent_queries` is reached. | | `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. Enabled by default. | | `http` | -- | -- | The HTTP server options. | | `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. | diff --git a/config/datanode.example.toml b/config/datanode.example.toml index d558918daf..9351c4e85d 100644 --- a/config/datanode.example.toml +++ b/config/datanode.example.toml @@ -20,6 +20,9 @@ init_regions_parallelism = 16 ## The maximum concurrent queries allowed to be executed. Zero means unlimited. max_concurrent_queries = 0 +## Timeout to acquire a permit from the concurrent query limiter when `max_concurrent_queries` is reached. +concurrent_query_limiter_timeout = "100ms" + ## Enable telemetry to collect anonymous usage data. Enabled by default. #+ enable_telemetry = true diff --git a/src/cli/src/data/export_v2/command.rs b/src/cli/src/data/export_v2/command.rs index db0f576a4e..bb027bbef1 100644 --- a/src/cli/src/data/export_v2/command.rs +++ b/src/cli/src/data/export_v2/command.rs @@ -1077,7 +1077,9 @@ async fn verify_snapshot(storage: &OpenDalStorage) -> Result { )); } let data_files = storage.list_files_recursive("data/").await?; - if let Some(path) = data_files.first() { + // Report the lexicographically smallest path so the message is stable + // regardless of listing order across backends. + if let Some(path) = data_files.iter().min() { report.push_error(format!( "Schema-only snapshot should not contain data files (found '{}')", path @@ -1103,75 +1105,113 @@ fn summarize_chunks(manifest: &Manifest) -> VerifyChunkSummary { } } +/// A data file declared by a completed chunk that is expected to exist in storage. +#[derive(Debug)] +struct ChunkFile { + chunk_id: u32, + path: String, +} + +/// Expected snapshot contents derived purely from the manifest (no object-store IO). +/// +/// Separating planning from scanning makes it obvious which problems come from +/// the manifest alone and which require comparing against actual storage. +#[derive(Debug, Default)] +struct VerifyPlan { + /// Valid data files declared by completed chunks; each must exist in storage. + files_to_check: Vec, + /// All syntactically-safe data paths declared by any chunk, regardless of + /// status. Used as the orphan-detection baseline so a listed-but-invalid + /// file is not also reported as unexpected. + claimed_data_files: HashSet, + /// Total data-file references in completed chunks (valid + invalid). + data_files_total: usize, + /// Problems detectable from the manifest alone. + problems: Vec, +} + +/// Actual data files discovered under `data/` (the only object-store IO in +/// chunk/data-file verification). +#[derive(Debug)] +struct VerifyDataScan { + existing_data_files: HashSet, +} + +/// Result of reconciling the manifest plan against the storage scan. +#[derive(Debug, Default)] +struct VerifyOutcome { + data_files_total: usize, + data_files_verified: usize, + problems: Vec, +} + async fn verify_chunks_and_data_files( storage: &OpenDalStorage, report: &mut VerifyReport, ) -> Result<()> { - let existing_files: HashSet<_> = storage - .list_files_recursive("data/") - .await? - .into_iter() - .collect(); - let mut data_files_total = 0; - let mut data_files_verified = 0; - let mut problems = Vec::new(); - let mut seen_chunk_ids = HashSet::new(); - let mut claimed_data_files = HashSet::new(); + let plan = build_verify_plan(&report.manifest); + let scan = scan_data_files(storage).await?; + let outcome = reconcile_plan_with_scan(plan, &scan); - for chunk in &report.manifest.chunks { + report.data_files_total = outcome.data_files_total; + report.data_files_verified = outcome.data_files_verified; + report.problems.extend(outcome.problems); + + Ok(()) +} + +/// Builds the expected-state plan from the manifest. Pure; performs no IO. +fn build_verify_plan(manifest: &Manifest) -> VerifyPlan { + let mut plan = VerifyPlan::default(); + let mut seen_chunk_ids = HashSet::new(); + + for chunk in &manifest.chunks { if !seen_chunk_ids.insert(chunk.id) { - problems.push(VerifyProblem { + plan.problems.push(VerifyProblem { severity: VerifySeverity::Error, message: format!("Chunk {}: duplicate chunk id", chunk.id), }); } for file in &chunk.files { if let Some(path) = safe_manifest_data_file_path(file) { - claimed_data_files.insert(path.to_string()); + plan.claimed_data_files.insert(path.to_string()); } } match chunk.status { ChunkStatus::Completed => { if chunk.files.is_empty() { - problems.push(VerifyProblem { + plan.problems.push(VerifyProblem { severity: VerifySeverity::Error, message: format!("Chunk {}: completed chunk has no data files", chunk.id), }); continue; } - let allowed_prefixes = report - .manifest + let allowed_prefixes = manifest .schemas .iter() .map(|schema| data_dir_for_schema_chunk(schema, chunk.id)) .collect::>(); for file in &chunk.files { - data_files_total += 1; - let Some(path) = valid_manifest_data_file_path(file, &allowed_prefixes) else { - problems.push(VerifyProblem { + plan.data_files_total += 1; + match valid_manifest_data_file_path(file, &allowed_prefixes) { + Some(path) => plan.files_to_check.push(ChunkFile { + chunk_id: chunk.id, + path: path.to_string(), + }), + None => plan.problems.push(VerifyProblem { severity: VerifySeverity::Error, message: format!( "Chunk {}: invalid data file path '{}'", chunk.id, file ), - }); - continue; - }; - - if existing_files.contains(path) { - data_files_verified += 1; - } else { - problems.push(VerifyProblem { - severity: VerifySeverity::Error, - message: format!("Chunk {}: missing file '{}'", chunk.id, path), - }); + }), } } } ChunkStatus::Skipped => { if !chunk.files.is_empty() { - problems.push(VerifyProblem { + plan.problems.push(VerifyProblem { severity: VerifySeverity::Error, message: format!( "Chunk {}: skipped chunk should not list data files", @@ -1181,20 +1221,20 @@ async fn verify_chunks_and_data_files( } } ChunkStatus::Pending => { - problems.push(VerifyProblem { + plan.problems.push(VerifyProblem { severity: VerifySeverity::Error, message: format!("Chunk {}: status is 'pending'", chunk.id), }); } ChunkStatus::InProgress => { - problems.push(VerifyProblem { + plan.problems.push(VerifyProblem { severity: VerifySeverity::Error, message: format!("Chunk {}: status is 'in_progress'", chunk.id), }); } ChunkStatus::Failed => { let reason = chunk.error.as_deref().unwrap_or("unknown error"); - problems.push(VerifyProblem { + plan.problems.push(VerifyProblem { severity: VerifySeverity::Error, message: format!("Chunk {}: status is 'failed' (error: {})", chunk.id, reason), }); @@ -1202,20 +1242,60 @@ async fn verify_chunks_and_data_files( } } - for path in &existing_files { - if !claimed_data_files.contains(path) { + plan +} + +/// Lists all data files under `data/`. This is the only object-store IO in +/// chunk/data-file verification. +async fn scan_data_files(storage: &OpenDalStorage) -> Result { + let existing_data_files = storage + .list_files_recursive("data/") + .await? + .into_iter() + .collect(); + Ok(VerifyDataScan { + existing_data_files, + }) +} + +/// Reconciles the manifest plan against the storage scan. Pure; performs no IO. +/// +/// Emits missing-file problems for expected files absent from storage and +/// unexpected-file problems for storage files no chunk claims. Unexpected files +/// are sorted by path so output is deterministic regardless of listing order. +fn reconcile_plan_with_scan(plan: VerifyPlan, scan: &VerifyDataScan) -> VerifyOutcome { + let mut problems = plan.problems; + let mut data_files_verified = 0; + + for file in &plan.files_to_check { + if scan.existing_data_files.contains(&file.path) { + data_files_verified += 1; + } else { problems.push(VerifyProblem { severity: VerifySeverity::Error, - message: format!("Unexpected data file '{}' is not listed in manifest", path), + message: format!("Chunk {}: missing file '{}'", file.chunk_id, file.path), }); } } - report.data_files_total = data_files_total; - report.data_files_verified = data_files_verified; - report.problems.extend(problems); + let mut orphans: Vec<&String> = scan + .existing_data_files + .iter() + .filter(|path| !plan.claimed_data_files.contains(*path)) + .collect(); + orphans.sort(); + for path in orphans { + problems.push(VerifyProblem { + severity: VerifySeverity::Error, + message: format!("Unexpected data file '{}' is not listed in manifest", path), + }); + } - Ok(()) + VerifyOutcome { + data_files_total: plan.data_files_total, + data_files_verified, + problems, + } } fn valid_manifest_data_file_path<'a>( @@ -2294,6 +2374,90 @@ mod tests { ); } + #[test] + fn test_build_verify_plan_classifies_chunks_without_io() { + let mut manifest = test_manifest( + chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(), + false, + true, + ); + // test_manifest(complete) gives: chunk 1 completed (1 file), chunk 2 skipped. + let mut failed = ChunkMeta::new(3, TimeRange::unbounded()); + failed.mark_failed("boom".to_string()); + manifest.chunks.push(failed); + manifest + .chunks + .push(ChunkMeta::new(4, TimeRange::unbounded())); + + let plan = build_verify_plan(&manifest); + + assert_eq!(plan.files_to_check.len(), 1); + assert_eq!(plan.files_to_check[0].chunk_id, 1); + assert_eq!(plan.files_to_check[0].path, "data/public/1/file.parquet"); + assert_eq!(plan.data_files_total, 1); + assert!( + plan.claimed_data_files + .contains("data/public/1/file.parquet") + ); + assert_eq!(plan.problems.len(), 2); + assert!( + plan.problems + .iter() + .any(|problem| problem.message.contains("status is 'failed'")) + ); + assert!( + plan.problems + .iter() + .any(|problem| problem.message.contains("status is 'pending'")) + ); + } + + #[tokio::test] + async fn test_verify_snapshot_produces_deterministic_problem_output() { + let dir = tempdir().unwrap(); + let manifest = test_manifest( + chrono::Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(), + false, + true, + ); + write_root_manifest(dir.path(), manifest); + write_snapshot_file(dir.path(), "schema/schemas.json", b"[]"); + write_default_ddl_files(dir.path()); + write_snapshot_file(dir.path(), "data/public/1/file.parquet", b"data"); + // Many orphan files under a known chunk prefix to stress ordering. + for i in 0..50 { + write_snapshot_file( + dir.path(), + &format!("data/public/1/orphan_{:02}.parquet", i), + b"x", + ); + } + + let storage = file_storage_for_dir(dir.path()); + let messages = |report: &VerifyReport| { + report + .problems + .iter() + .map(|problem| problem.message.clone()) + .collect::>() + }; + let first = messages(&verify_snapshot(&storage).await.unwrap()); + let second = messages(&verify_snapshot(&storage).await.unwrap()); + + // Output is identical across runs despite HashSet-based scanning. + assert_eq!(first, second); + + let orphans = first + .iter() + .filter(|message| message.contains("Unexpected data file")) + .cloned() + .collect::>(); + assert_eq!(orphans.len(), 50); + let mut sorted = orphans.clone(); + sorted.sort(); + assert_eq!(orphans, sorted); + } + fn write_test_manifest(root: &std::path::Path, dir: &str, manifest: Manifest) { let snapshot_dir = root.join(dir); std::fs::create_dir_all(&snapshot_dir).unwrap(); diff --git a/src/cmd/src/datanode/scanbench.rs b/src/cmd/src/datanode/scanbench.rs index b26705991c..b2a715ad31 100644 --- a/src/cmd/src/datanode/scanbench.rs +++ b/src/cmd/src/datanode/scanbench.rs @@ -524,6 +524,7 @@ impl ScanbenchCommand { options: HashMap::default(), skip_wal_replay: !self.enable_wal, checkpoint: None, + requirements: Default::default(), }; engine diff --git a/src/common/datasource/src/file_format.rs b/src/common/datasource/src/file_format.rs index e36f94c0d2..d9d7b8b648 100644 --- a/src/common/datasource/src/file_format.rs +++ b/src/common/datasource/src/file_format.rs @@ -61,6 +61,7 @@ pub const FORMAT_COMPRESSION_TYPE: &str = "compression_type"; pub const FORMAT_DELIMITER: &str = "delimiter"; pub const FORMAT_SCHEMA_INFER_MAX_RECORD: &str = "schema_infer_max_record"; pub const FORMAT_HAS_HEADER: &str = "has_header"; +pub const FORMAT_SKIP_BAD_RECORDS: &str = "skip_bad_records"; pub const FORMAT_TYPE: &str = "format"; pub const FILE_PATTERN: &str = "pattern"; pub const TIMESTAMP_FORMAT: &str = "timestamp_format"; diff --git a/src/common/datasource/src/file_format/csv.rs b/src/common/datasource/src/file_format/csv.rs index 77ea553f35..2b39051b48 100644 --- a/src/common/datasource/src/file_format/csv.rs +++ b/src/common/datasource/src/file_format/csv.rs @@ -13,15 +13,24 @@ // limitations under the License. use std::collections::HashMap; +use std::io; use std::str::FromStr; +use std::sync::Arc; +use std::task::Poll; use arrow::csv::reader::Format; use arrow::csv::{self, WriterBuilder}; +use arrow::error::ArrowError; use arrow::record_batch::RecordBatch; -use arrow_schema::Schema; +use arrow_schema::{Schema, SchemaRef}; use async_trait::async_trait; +use bytes::{Buf, Bytes}; use common_runtime; +use common_telemetry::warn; use datafusion::physical_plan::SendableRecordBatchStream; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; +use futures::StreamExt; +use futures::stream::BoxStream; use object_store::ObjectStore; use snafu::ResultExt; use tokio_util::compat::FuturesAsyncReadCompatExt; @@ -34,9 +43,12 @@ use crate::file_format::{self, FileFormat, stream_to_file}; use crate::share_buffer::SharedBuffer; use crate::util::normalize_infer_schema; +const SKIP_BAD_RECORDS_BATCH_SIZE: usize = 1; + #[derive(Debug, Clone, PartialEq, Eq)] pub struct CsvFormat { pub has_header: bool, + pub skip_bad_records: bool, pub delimiter: u8, pub schema_infer_max_record: Option, pub compression_type: CompressionType, @@ -76,13 +88,11 @@ impl TryFrom<&HashMap> for CsvFormat { })?); }; if let Some(has_header) = value.get(file_format::FORMAT_HAS_HEADER) { - format.has_header = has_header.parse().map_err(|_| { - error::ParseFormatSnafu { - key: file_format::FORMAT_HAS_HEADER, - value: has_header, - } - .build() - })?; + format.has_header = parse_bool(file_format::FORMAT_HAS_HEADER, has_header)?; + }; + if let Some(skip_bad_records) = value.get(file_format::FORMAT_SKIP_BAD_RECORDS) { + format.skip_bad_records = + parse_bool(file_format::FORMAT_SKIP_BAD_RECORDS, skip_bad_records)?; }; if let Some(timestamp_format) = value.get(file_format::TIMESTAMP_FORMAT) { format.timestamp_format = Some(timestamp_format.clone()); @@ -97,10 +107,17 @@ impl TryFrom<&HashMap> for CsvFormat { } } +fn parse_bool(key: &'static str, value: &str) -> Result { + value + .parse() + .map_err(|_| error::ParseFormatSnafu { key, value }.build()) +} + impl Default for CsvFormat { fn default() -> Self { Self { has_header: true, + skip_bad_records: false, delimiter: b',', schema_infer_max_record: Some(file_format::DEFAULT_SCHEMA_INFER_MAX_RECORD), compression_type: CompressionType::Uncompressed, @@ -189,10 +206,136 @@ impl DfRecordBatchEncoder for csv::Writer { } } +/// Builds a CSV stream that can skip selected record-level parse/cast errors. +/// +/// This recovery path intentionally uses one-record batches. It is slower than +/// normal CSV scanning, but keeps each parse/cast failure isolated to a single +/// record. Arrow's CSV decoder clears buffered rows before type parsing, so a +/// failed multi-row flush cannot be safely retried row by row without replaying +/// input bytes. +pub async fn tolerant_csv_stream( + store: &ObjectStore, + path: &str, + schema: SchemaRef, + projection: Vec, + format: &CsvFormat, +) -> Result { + let meta = store + .stat(path) + .await + .context(error::ReadObjectSnafu { path })?; + + let reader = store + .reader(path) + .await + .context(error::ReadObjectSnafu { path })? + .into_bytes_stream(0..meta.content_length()) + .await + .context(error::ReadObjectSnafu { path })?; + + let reader = format.compression_type.convert_stream(reader).boxed(); + tolerant_csv_stream_from_reader( + reader, + path, + schema, + projection, + format.has_header, + format.delimiter, + ) +} + +fn tolerant_csv_stream_from_reader( + reader: BoxStream<'static, io::Result>, + path: &str, + schema: SchemaRef, + projection: Vec, + has_header: bool, + delimiter: u8, +) -> Result { + let projected_schema = Arc::new( + schema + .project(&projection) + .context(error::InferSchemaSnafu)?, + ); + let mut decoder = csv::ReaderBuilder::new(schema) + .with_header(has_header) + .with_delimiter(delimiter) + .with_batch_size(SKIP_BAD_RECORDS_BATCH_SIZE) + .with_projection(projection) + .build_decoder(); + + let path = path.to_string(); + let mut upstream = reader.fuse(); + let mut buffered = Bytes::new(); + let mut input_finished = false; + let stream = futures::stream::poll_fn(move |cx| { + loop { + while !input_finished { + if buffered.is_empty() { + match futures::ready!(upstream.poll_next_unpin(cx)) { + Some(Ok(bytes)) if bytes.is_empty() => continue, + Some(Ok(bytes)) => buffered = bytes, + Some(Err(error)) => return Poll::Ready(Some(Err(error.into()))), + None => input_finished = true, + } + } + + let decoded = decoder.decode(buffered.as_ref())?; + if decoded > 0 { + buffered.advance(decoded); + continue; + } + + if decoder.capacity() == 0 || input_finished { + break; + } + + if buffered.is_empty() { + continue; + } + + return Poll::Ready(Some(Err(ArrowError::ParseError( + "CSV decoder made no progress while input bytes remain".to_string(), + )))); + } + + match decoder.flush() { + Ok(Some(batch)) => return Poll::Ready(Some(Ok(batch))), + Ok(None) if input_finished => return Poll::Ready(None), + Ok(None) => continue, + Err(error) if is_skippable_arrow_error(&error) => { + warn!( + "Skipping bad CSV record while copying from {}: {}", + path, error + ); + } + Err(error) => return Poll::Ready(Some(Err(error))), + } + } + }) + .map(|result: std::result::Result| result.map_err(Into::into)); + + Ok(Box::pin(RecordBatchStreamAdapter::new( + projected_schema, + stream, + ))) +} + +pub fn is_skippable_arrow_error(error: &ArrowError) -> bool { + matches!( + error, + ArrowError::ParseError(_) + | ArrowError::CastError(_) + | ArrowError::ComputeError(_) + | ArrowError::InvalidArgumentError(_) + ) +} + #[cfg(test)] mod tests { use std::sync::Arc; + use arrow_schema::{DataType, Field}; use common_recordbatch::adapter::DfRecordBatchStreamAdapter; use common_recordbatch::{RecordBatch, RecordBatches}; use common_test_util::find_workspace_path; @@ -205,7 +348,7 @@ mod tests { use super::*; use crate::file_format::{ FORMAT_COMPRESSION_TYPE, FORMAT_DELIMITER, FORMAT_HAS_HEADER, - FORMAT_SCHEMA_INFER_MAX_RECORD, FileFormat, file_to_stream, + FORMAT_SCHEMA_INFER_MAX_RECORD, FORMAT_SKIP_BAD_RECORDS, FileFormat, file_to_stream, }; use crate::test_util::{format_schema, test_store}; @@ -331,11 +474,29 @@ mod tests { schema_infer_max_record: Some(2000), delimiter: b'\t', has_header: false, + skip_bad_records: false, timestamp_format: None, time_format: None, date_format: None } ); + + let map = HashMap::from([(FORMAT_SKIP_BAD_RECORDS.to_string(), "true".to_string())]); + let format = CsvFormat::try_from(&map).unwrap(); + + assert_eq!( + format, + CsvFormat { + skip_bad_records: true, + ..CsvFormat::default() + } + ); + } + + #[test] + fn test_try_from_rejects_invalid_bool_options() { + let map = HashMap::from([(FORMAT_SKIP_BAD_RECORDS.to_string(), "yes".to_string())]); + assert!(CsvFormat::try_from(&map).is_err()); } #[tokio::test] @@ -496,4 +657,63 @@ mod tests { assert_eq!(expected, pretty_print); } } + + #[tokio::test] + async fn test_tolerant_csv_stream_continues_after_parse_error() { + let temp_dir = common_test_util::temp_dir::create_temp_dir("test_tolerant_csv_stream"); + let csv_file_path = temp_dir.path().join("input.csv"); + std::fs::write( + &csv_file_path, + "id,name,value\n1,Alice,10.5\nbad,Bad,20.0\nworse,Bad,21.0\n2,Bob,30.5", + ) + .unwrap(); + + let store = test_store("/"); + let schema = Arc::new(arrow_schema::Schema::new(vec![ + Field::new("id", DataType::UInt32, false), + Field::new("name", DataType::Utf8, false), + Field::new("value", DataType::Float64, false), + ])); + let path = csv_file_path.to_str().unwrap(); + + let stream = + tolerant_csv_stream(&store, path, schema, vec![0, 1, 2], &CsvFormat::default()) + .await + .unwrap(); + let batches = stream.try_collect::>().await.unwrap(); + let pretty_print = arrow::util::pretty::pretty_format_batches(&batches) + .unwrap() + .to_string(); + let expected = r#"+----+-------+-------+ +| id | name | value | ++----+-------+-------+ +| 1 | Alice | 10.5 | +| 2 | Bob | 30.5 | ++----+-------+-------+"#; + assert_eq!(expected, pretty_print); + } + + #[tokio::test] + async fn test_tolerant_csv_stream_fails_on_structural_csv_error() { + let temp_dir = + common_test_util::temp_dir::create_temp_dir("test_tolerant_csv_stream_csv_error"); + let csv_file_path = temp_dir.path().join("input.csv"); + std::fs::write(&csv_file_path, "id,name,value\n1,Alice,10.5\n2,Bob\n").unwrap(); + + let store = test_store("/"); + let schema = Arc::new(arrow_schema::Schema::new(vec![ + Field::new("id", DataType::UInt32, false), + Field::new("name", DataType::Utf8, false), + Field::new("value", DataType::Float64, false), + ])); + let path = csv_file_path.to_str().unwrap(); + + let stream = + tolerant_csv_stream(&store, path, schema, vec![0, 1, 2], &CsvFormat::default()) + .await + .unwrap(); + let error = stream.try_collect::>().await.unwrap_err(); + + assert!(error.to_string().contains("incorrect number of fields")); + } } diff --git a/src/common/meta/src/instruction.rs b/src/common/meta/src/instruction.rs index 3fa6b1bad0..6872b9ad55 100644 --- a/src/common/meta/src/instruction.rs +++ b/src/common/meta/src/instruction.rs @@ -18,7 +18,7 @@ use std::time::Duration; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use store_api::region_engine::SyncRegionFromRequest; -use store_api::region_request::RegionFlushReason; +use store_api::region_request::{RegionFlushReason, RegionRequirements}; use store_api::storage::{FileRefsManifest, GcReport, RegionId, RegionNumber}; use strum::Display; use table::metadata::TableId; @@ -179,12 +179,24 @@ impl Display for OpenRegion { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!( f, - "OpenRegion(region_ident={}, region_storage_path={})", - self.region_ident, self.region_storage_path + "OpenRegion(region_ident={}, region_storage_path={}, reason={:?})", + self.region_ident, self.region_storage_path, self.reason ) } } +/// The reason why an open region instruction is triggered. +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +pub enum OpenRegionReason { + /// Open triggered before region migration. + RegionMigration, + /// Open triggered by region failover. + RegionFailover, + /// Open triggered when adding a follower region. + #[cfg(feature = "enterprise")] + RegionFollower, +} + #[serde_with::serde_as] #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct OpenRegion { @@ -196,6 +208,10 @@ pub struct OpenRegion { pub region_wal_options: HashMap, #[serde(default)] pub skip_wal_replay: bool, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub reason: Option, + #[serde(default)] + pub requirements: RegionRequirements, } impl OpenRegion { @@ -205,6 +221,8 @@ impl OpenRegion { region_options: HashMap, region_wal_options: HashMap, skip_wal_replay: bool, + reason: Option, + requirements: RegionRequirements, ) -> Self { Self { region_ident, @@ -212,6 +230,8 @@ impl OpenRegion { region_options, region_wal_options, skip_wal_replay, + reason, + requirements, } } } @@ -1126,11 +1146,13 @@ mod tests { HashMap::new(), HashMap::new(), false, + None, + RegionRequirements::empty(), )]); let serialized = serde_json::to_string(&open_region).unwrap(); assert_eq!( - r#"{"OpenRegions":[{"region_ident":{"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"},"region_storage_path":"test/foo","region_options":{},"region_wal_options":{},"skip_wal_replay":false}]}"#, + r#"{"OpenRegions":[{"region_ident":{"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"},"region_storage_path":"test/foo","region_options":{},"region_wal_options":{},"skip_wal_replay":false,"requirements":{"object_storage":false}}]}"#, serialized ); @@ -1213,6 +1235,8 @@ mod tests { HashMap::new(), HashMap::new(), false, + None, + RegionRequirements::empty(), )]); assert_eq!(open_region_instruction, open_region); @@ -1368,10 +1392,41 @@ mod tests { region_options, region_wal_options: HashMap::new(), skip_wal_replay: false, + reason: None, + requirements: RegionRequirements::empty(), }; assert_eq!(expected, deserialized); } + #[test] + fn test_serialize_open_region_with_reason_and_requirements() { + let open_region = OpenRegion::new( + RegionIdent { + datanode_id: 2, + table_id: 1024, + region_number: 1, + engine: "mito2".to_string(), + }, + "test/foo", + HashMap::new(), + HashMap::new(), + false, + Some(OpenRegionReason::RegionMigration), + RegionRequirements::object_storage(), + ); + + let serialized = serde_json::to_string(&open_region).unwrap(); + assert!(serialized.contains(r#""reason":"RegionMigration""#)); + assert!(serialized.contains(r#""object_storage":true"#)); + + let deserialized: OpenRegion = serde_json::from_str(&serialized).unwrap(); + assert_eq!(Some(OpenRegionReason::RegionMigration), deserialized.reason); + assert_eq!( + RegionRequirements::object_storage(), + deserialized.requirements + ); + } + #[test] fn test_flush_regions_creation() { let region_id = RegionId::new(1024, 1); diff --git a/src/datanode/src/config.rs b/src/datanode/src/config.rs index 2ce306006b..b757c95121 100644 --- a/src/datanode/src/config.rs +++ b/src/datanode/src/config.rs @@ -14,6 +14,8 @@ //! Datanode configurations +use std::time::Duration; + use common_base::readable_size::ReadableSize; use common_config::{Configurable, DEFAULT_DATA_HOME}; use common_options::memory::MemoryOptions; @@ -75,6 +77,10 @@ pub struct DatanodeOptions { pub wal: DatanodeWalConfig, pub storage: StorageConfig, pub max_concurrent_queries: usize, + /// Timeout to acquire a permit from the concurrent query limiter when + /// `max_concurrent_queries` is reached. Only effective when the limiter is enabled. + #[serde(with = "humantime_serde")] + pub concurrent_query_limiter_timeout: Duration, /// Options for different store engines. pub region_engine: Vec, pub logging: LoggingOptions, @@ -131,6 +137,7 @@ impl Default for DatanodeOptions { wal: DatanodeWalConfig::default(), storage: StorageConfig::default(), max_concurrent_queries: 0, + concurrent_query_limiter_timeout: Duration::from_millis(100), region_engine: vec![ RegionEngineConfig::Mito(MitoConfig::default()), RegionEngineConfig::File(FileEngineConfig::default()), diff --git a/src/datanode/src/datanode.rs b/src/datanode/src/datanode.rs index 9a2fe3d982..12d7c5109c 100644 --- a/src/datanode/src/datanode.rs +++ b/src/datanode/src/datanode.rs @@ -445,8 +445,7 @@ impl DatanodeBuilder { event_listener, table_provider_factory, opts.max_concurrent_queries, - //TODO: revaluate the hardcoded timeout on the next version of datanode concurrency limiter. - Duration::from_millis(100), + opts.concurrent_query_limiter_timeout, opts.grpc.flight_compression, ); diff --git a/src/datanode/src/heartbeat/handler.rs b/src/datanode/src/heartbeat/handler.rs index 10948a3e7c..79e0baaef3 100644 --- a/src/datanode/src/heartbeat/handler.rs +++ b/src/datanode/src/heartbeat/handler.rs @@ -313,7 +313,7 @@ mod tests { use mito2::test_util::{CreateRequestBuilder, TestEnv}; use store_api::path_utils::table_dir; use store_api::region_engine::RegionRole; - use store_api::region_request::{RegionCloseRequest, RegionRequest}; + use store_api::region_request::{RegionCloseRequest, RegionRequest, RegionRequirements}; use store_api::storage::RegionId; use tokio::sync::mpsc::{self, Receiver}; @@ -442,6 +442,8 @@ mod tests { HashMap::new(), HashMap::new(), false, + None, + RegionRequirements::empty(), )]) } diff --git a/src/datanode/src/heartbeat/handler/open_region.rs b/src/datanode/src/heartbeat/handler/open_region.rs index 56c07a3efe..9c483e588d 100644 --- a/src/datanode/src/heartbeat/handler/open_region.rs +++ b/src/datanode/src/heartbeat/handler/open_region.rs @@ -14,6 +14,7 @@ use common_meta::instruction::{InstructionReply, OpenRegion, SimpleReply}; use common_meta::wal_provider::prepare_wal_options; +use common_telemetry::info; use store_api::path_utils::table_dir; use store_api::region_request::{PathType, RegionOpenRequest}; use store_api::storage::RegionId; @@ -41,8 +42,13 @@ impl InstructionHandler for OpenRegionsHandler { mut region_options, region_wal_options, skip_wal_replay, + reason, + requirements, } = open_region; let region_id = RegionId::new(region_ident.table_id, region_ident.region_number); + info!( + "Received open region instruction, region_id: {region_id}, reason: {reason:?}" + ); prepare_wal_options(&mut region_options, region_id, ®ion_wal_options); let request = RegionOpenRequest { engine: region_ident.engine, @@ -51,6 +57,7 @@ impl InstructionHandler for OpenRegionsHandler { options: region_options, skip_wal_replay, checkpoint: None, + requirements, }; (region_id, request) }) @@ -85,7 +92,7 @@ mod tests { use mito2::engine::MITO_ENGINE_NAME; use mito2::test_util::{CreateRequestBuilder, TestEnv}; use store_api::path_utils::table_dir; - use store_api::region_request::{RegionCloseRequest, RegionRequest}; + use store_api::region_request::{RegionCloseRequest, RegionRequest, RegionRequirements}; use store_api::storage::RegionId; use crate::heartbeat::handler::RegionHeartbeatResponseHandler; @@ -98,17 +105,21 @@ mod tests { ) -> Instruction { let region_idents = region_ids .into_iter() - .map(|region_id| OpenRegion { - region_ident: RegionIdent { - datanode_id: 0, - table_id: region_id.table_id(), - region_number: region_id.region_number(), - engine: MITO_ENGINE_NAME.to_string(), - }, - region_storage_path: storage_path.to_string(), - region_options: HashMap::new(), - region_wal_options: HashMap::new(), - skip_wal_replay: false, + .map(|region_id| { + OpenRegion::new( + RegionIdent { + datanode_id: 0, + table_id: region_id.table_id(), + region_number: region_id.region_number(), + engine: MITO_ENGINE_NAME.to_string(), + }, + storage_path, + HashMap::new(), + HashMap::new(), + false, + None, + RegionRequirements::empty(), + ) }) .collect(); diff --git a/src/datanode/src/region_server.rs b/src/datanode/src/region_server.rs index d5711e1761..ce831353d1 100644 --- a/src/datanode/src/region_server.rs +++ b/src/datanode/src/region_server.rs @@ -49,6 +49,7 @@ use common_telemetry::{debug, error, info, warn}; use dashmap::DashMap; use datafusion::datasource::TableProvider; use datafusion_common::tree_node::TreeNode; +use datatypes::schema::SchemaRef; use either::Either; use futures_util::Stream; use futures_util::future::try_join_all; @@ -82,7 +83,7 @@ use store_api::region_request::{ RegionOpenRequest, RegionRequest, }; use store_api::storage::RegionId; -use tokio::sync::{Semaphore, SemaphorePermit}; +use tokio::sync::{OwnedSemaphorePermit, Semaphore}; use tokio::time::timeout; use tonic::{Request, Response, Result as TonicResult}; @@ -257,7 +258,7 @@ impl RegionServer { request: api::v1::region::QueryRequest, query_ctx: QueryContextRef, ) -> Result { - let _permit = if let Some(p) = &self.inner.parallelism { + let permit = if let Some(p) = &self.inner.parallelism { Some(p.acquire().await?) } else { None @@ -298,14 +299,13 @@ impl RegionServer { ) .await?; - Ok(wrap_flow_region_watermark_stream( - stream, region_id, &query_ctx, - )) + let stream = wrap_flow_region_watermark_stream(stream, region_id, &query_ctx); + Ok(maybe_guard_stream(stream, permit)) } #[tracing::instrument(skip_all)] pub async fn handle_read(&self, request: QueryRequest) -> Result { - let _permit = if let Some(p) = &self.inner.parallelism { + let permit = if let Some(p) = &self.inner.parallelism { Some(p.acquire().await?) } else { None @@ -332,9 +332,8 @@ impl RegionServer { .handle_read(QueryRequest { plan, ..request }, query_ctx.clone()) .await?; - Ok(wrap_flow_region_watermark_stream( - stream, region_id, &query_ctx, - )) + let stream = wrap_flow_region_watermark_stream(stream, region_id, &query_ctx); + Ok(maybe_guard_stream(stream, permit)) } /// Returns all opened and reportable regions. @@ -1058,7 +1057,7 @@ struct RegionServerInner { } struct RegionServerParallelism { - semaphore: Semaphore, + semaphore: Arc, timeout: Duration, } @@ -1071,19 +1070,68 @@ impl RegionServerParallelism { return None; } Some(RegionServerParallelism { - semaphore: Semaphore::new(max_concurrent_queries), + semaphore: Arc::new(Semaphore::new(max_concurrent_queries)), timeout: concurrent_query_limiter_timeout, }) } - pub async fn acquire(&self) -> Result> { - timeout(self.timeout, self.semaphore.acquire()) + pub async fn acquire(&self) -> Result { + timeout(self.timeout, self.semaphore.clone().acquire_owned()) .await .context(ConcurrentQueryLimiterTimeoutSnafu)? .context(ConcurrentQueryLimiterClosedSnafu) } } +/// Wraps a record batch stream and holds a concurrency permit until the stream is +/// fully consumed (dropped), so `max_concurrent_queries` bounds the number of +/// in-flight read streams, not just query planning. +struct PermitGuardedStream { + inner: SendableRecordBatchStream, + _permit: OwnedSemaphorePermit, +} + +impl RecordBatchStream for PermitGuardedStream { + fn name(&self) -> &str { + self.inner.name() + } + + fn schema(&self) -> SchemaRef { + self.inner.schema() + } + + fn output_ordering(&self) -> Option<&[OrderOption]> { + self.inner.output_ordering() + } + + fn metrics(&self) -> Option { + self.inner.metrics() + } +} + +impl Stream for PermitGuardedStream { + type Item = common_recordbatch::error::Result; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + self.inner.as_mut().poll_next(cx) + } +} + +/// Wraps `stream` so it holds `permit` until fully consumed. Returns `stream` +/// unchanged when no permit was acquired (limiter disabled). +fn maybe_guard_stream( + stream: SendableRecordBatchStream, + permit: Option, +) -> SendableRecordBatchStream { + match permit { + Some(permit) => Box::pin(PermitGuardedStream { + inner: stream, + _permit: permit, + }), + None => stream, + } +} + enum CurrentEngine { Engine(RegionEngineRef), EarlyReturn(AffectedRows), @@ -2057,6 +2105,7 @@ mod tests { options: Default::default(), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await @@ -2235,6 +2284,7 @@ mod tests { options: Default::default(), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }, ), ( @@ -2246,6 +2296,7 @@ mod tests { options: Default::default(), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }, ), ], @@ -2268,6 +2319,7 @@ mod tests { options: Default::default(), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }, ), ( @@ -2279,6 +2331,7 @@ mod tests { options: Default::default(), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }, ), ], diff --git a/src/datanode/src/utils.rs b/src/datanode/src/utils.rs index c5cd008c28..816ae021ba 100644 --- a/src/datanode/src/utils.rs +++ b/src/datanode/src/utils.rs @@ -175,6 +175,7 @@ pub async fn build_region_open_requests( options, skip_wal_replay: false, checkpoint, + requirements: Default::default(), }, )); } @@ -193,6 +194,7 @@ pub async fn build_region_open_requests( options, skip_wal_replay: true, checkpoint: None, + requirements: Default::default(), }, )); } diff --git a/src/file-engine/src/engine.rs b/src/file-engine/src/engine.rs index 175ebef237..2ddbb6c414 100644 --- a/src/file-engine/src/engine.rs +++ b/src/file-engine/src/engine.rs @@ -32,7 +32,7 @@ use store_api::region_engine::{ }; use store_api::region_request::{ AffectedRows, RegionCloseRequest, RegionCreateRequest, RegionDropRequest, RegionOpenRequest, - RegionRequest, + RegionRequest, RegionRequirements, }; use store_api::storage::{RegionId, ScanRequest, SequenceNumber}; use tokio::sync::Mutex; @@ -186,6 +186,24 @@ struct EngineInner { type EngineInnerRef = Arc; +fn ensure_open_requirements( + requirements: RegionRequirements, + object_store: &ObjectStore, +) -> EngineResult<()> { + if !requirements.object_storage { + return Ok(()); + } + + ensure!( + object_store::util::is_object_storage(object_store), + UnsupportedSnafu { + operation: "open region with object storage requirement on non-object storage" + } + ); + + Ok(()) +} + impl EngineInner { fn new(object_store: ObjectStore) -> Self { Self { @@ -289,6 +307,8 @@ impl EngineInner { return Ok(0); } + ensure_open_requirements(request.requirements, &self.object_store)?; + let res = FileRegion::open(region_id, request, &self.object_store).await; let region = res.inspect_err(|err| { error!( @@ -356,3 +376,53 @@ impl EngineInner { self.regions.read().unwrap().contains_key(®ion_id) } } + +#[cfg(test)] +mod tests { + use object_store::services::{Fs, S3}; + + use super::*; + use crate::error::Error; + + fn build_fs_object_store() -> ObjectStore { + ObjectStore::new(Fs::default().root("/tmp")) + .unwrap() + .finish() + } + + fn build_s3_object_store() -> ObjectStore { + ObjectStore::new( + S3::default() + .bucket("test-bucket") + .region("us-east-1") + .disable_ec2_metadata(), + ) + .unwrap() + .finish() + } + + #[test] + fn test_empty_open_requirements_are_supported() { + ensure_open_requirements(RegionRequirements::empty(), &build_fs_object_store()).unwrap(); + } + + #[test] + fn test_object_storage_open_requirement_rejects_fs_object_store() { + let err = ensure_open_requirements( + RegionRequirements::object_storage(), + &build_fs_object_store(), + ) + .unwrap_err(); + + assert!(matches!(err, Error::Unsupported { .. })); + } + + #[test] + fn test_object_storage_open_requirement_accepts_s3_object_store() { + ensure_open_requirements( + RegionRequirements::object_storage(), + &build_s3_object_store(), + ) + .unwrap(); + } +} diff --git a/src/file-engine/src/region.rs b/src/file-engine/src/region.rs index 3808b33a67..aceec21aa5 100644 --- a/src/file-engine/src/region.rs +++ b/src/file-engine/src/region.rs @@ -181,6 +181,7 @@ mod tests { options: HashMap::default(), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }; let region = FileRegion::open(region_id, request, &object_store) @@ -238,6 +239,7 @@ mod tests { options: HashMap::default(), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }; let err = FileRegion::open(region_id, request, &object_store) .await diff --git a/src/flow/src/batching_mode/engine.rs b/src/flow/src/batching_mode/engine.rs index 68fb3793e4..319ddcf2e7 100644 --- a/src/flow/src/batching_mode/engine.rs +++ b/src/flow/src/batching_mode/engine.rs @@ -630,8 +630,11 @@ impl BatchingEngine { let engine = self.query_engine.clone(); let frontend = self.frontend_client.clone(); - // check execute once first to detect any error early + // Create sink table if needed, then validate an existing/created sink schema before + // spawning the background task. This catches user-created sink schema mismatches at + // CREATE FLOW time instead of surfacing them later in the execution loop. task.check_or_create_sink_table(&engine, &frontend).await?; + task.validate_sink_table_schema(&engine).await?; let (start_tx, start_rx) = oneshot::channel(); diff --git a/src/flow/src/batching_mode/task.rs b/src/flow/src/batching_mode/task.rs index cbd6a05cc2..3cd96b7525 100644 --- a/src/flow/src/batching_mode/task.rs +++ b/src/flow/src/batching_mode/task.rs @@ -265,6 +265,36 @@ impl BatchingTask { Ok(None) } + /// Validates that the sink table schema can accept this flow's output. + /// + /// This is a dry-run of the same schema matching logic used by runtime insert-plan + /// generation, but without adding dirty-window filters or executing the query. It is used + /// during CREATE FLOW to catch existing sink table mismatches early. + pub async fn validate_sink_table_schema(&self, engine: &QueryEngineRef) -> Result<(), Error> { + let (table, _) = get_table_info_df_schema( + self.config.catalog_manager.clone(), + self.config.sink_table_name.clone(), + ) + .await?; + + let table_meta = &table.table_info().meta; + let merge_mode_last_non_null = + is_merge_mode_last_non_null(&table_meta.options.extra_options); + let primary_key_indices = table_meta.primary_key_indices.clone(); + let query_ctx = self.state.read().unwrap().query_ctx.clone(); + + gen_plan_with_matching_schema( + &self.config.query, + query_ctx, + engine.clone(), + table_meta.schema.clone(), + &primary_key_indices, + merge_mode_last_non_null, + ) + .await + .map(|_| ()) + } + async fn is_table_exist(&self, table_name: &[String; 3]) -> Result { self.config .catalog_manager @@ -929,7 +959,7 @@ impl BatchingTask { let (expire_lower_bound, expire_upper_bound) = match (expire_time_window_bound, &self.config.query_type) { (Some((Some(l), Some(u))), QueryType::Sql) => (l, u), - (None, QueryType::Sql) => { + (None, QueryType::Sql) if self.config.flow_eval_interval.is_none() => { // if it's sql query and no time window lower/upper bound is found, just return the original query(with auto columns) // use sink_table_meta to add to query the `update_at` and `__ts_placeholder` column's value too for compatibility reason debug!( @@ -950,7 +980,8 @@ impl BatchingTask { } _ => { // Clean dirty windows for full-query/non-scoped paths, - // such as TQL, that cannot use a time-window filter. + // such as TQL or evaluation-interval SQL without a recognized + // time-window expression, that cannot use a time-window filter. let (_, dirty_windows_to_restore) = self.drain_dirty_windows_signal(); let plan_info = self diff --git a/src/flow/src/batching_mode/task/test.rs b/src/flow/src/batching_mode/task/test.rs index d64b4ef1b9..c42d564ce2 100644 --- a/src/flow/src/batching_mode/task/test.rs +++ b/src/flow/src/batching_mode/task/test.rs @@ -974,6 +974,38 @@ async fn test_non_scoped_path_generates_plan_with_empty_dirty_signal() { assert!(task.state.read().unwrap().dirty_time_windows.is_empty()); } +#[tokio::test] +async fn test_no_time_window_sql_with_eval_interval_generates_plan_without_dirty_signal() { + let TestTaskParts { + mut task, + query_engine, + .. + } = new_test_task_engine_and_plan_with_query( + "SELECT number, ts FROM numbers_with_ts", + "missing_sink", + ) + .await; + Arc::get_mut(&mut task.config) + .expect("test task config should be uniquely owned") + .flow_eval_interval = Some(Duration::from_secs(60)); + task.state.write().unwrap().dirty_time_windows.clean(); + let sink_schema = Arc::new(Schema::new(vec![ + ColumnSchema::new("number", CDT::uint32_datatype(), false), + ColumnSchema::new("ts", CDT::timestamp_millisecond_datatype(), false).with_time_index(true), + ])); + + let plan = task + .gen_query_with_time_window(query_engine, &sink_schema, &[], false, None) + .await + .unwrap() + .expect( + "eval-interval SQL without a time-window expr should run by interval, not dirty signal", + ); + + assert!(plan.can_advance_checkpoints); + assert!(task.state.read().unwrap().dirty_time_windows.is_empty()); +} + #[tokio::test] async fn test_executed_query_failure_restores_scoped_dirty_windows_for_flush_path() { let (task, plan) = new_test_task_and_plan_with_missing_sink().await; diff --git a/src/flow/src/batching_mode/utils.rs b/src/flow/src/batching_mode/utils.rs index e86b1ee3be..5e033c6ae7 100644 --- a/src/flow/src/batching_mode/utils.rs +++ b/src/flow/src/batching_mode/utils.rs @@ -33,9 +33,10 @@ use datafusion_common::{ }; use datafusion_expr::logical_plan::{Aggregate, TableScan}; use datafusion_expr::{ - Distinct, JoinType, LogicalPlan, LogicalPlanBuilder, Operator, Projection, and, binary_expr, - bitwise_and, bitwise_or, bitwise_xor, is_null, or, when, + Distinct, ExprSchemable, JoinType, LogicalPlan, LogicalPlanBuilder, Operator, Projection, and, + binary_expr, bitwise_and, bitwise_or, bitwise_xor, is_null, or, when, }; +use datatypes::prelude::ConcreteDataType; use datatypes::schema::{ColumnSchema, SchemaRef}; use query::QueryEngineRef; use query::parser::{DEFAULT_LOOKBACK_STRING, PromQuery, QueryLanguageParser, QueryStatement}; @@ -955,7 +956,7 @@ pub(crate) async fn gen_plan_with_matching_schema( .clone() .rewrite(&mut add_auto_column) .with_context(|_| DatafusionSnafu { - context: format!("Failed to rewrite plan:\n {}\n", plan), + context: "Failed to rewrite plan".to_string(), })? .data; Ok(plan) @@ -1090,33 +1091,23 @@ impl ColumnMatcherRewriter { } /// modify the exprs in place so that it matches the schema and some auto columns are added - fn modify_project_exprs(&mut self, mut exprs: Vec) -> DfResult> { + fn modify_project_exprs( + &mut self, + mut exprs: Vec, + input_schema: &DFSchema, + ) -> DfResult> { if self.allow_partial { return self.modify_project_exprs_with_partial(exprs); } + let original_exprs = exprs.clone(); + let all_names = self .schema .column_schemas() .iter() .map(|c| c.name.clone()) .collect::>(); - // first match by position - for (idx, expr) in exprs.iter_mut().enumerate() { - if !all_names.contains(&expr.qualified_name().1) - && let Some(col_name) = self - .schema - .column_schemas() - .get(idx) - .map(|c| c.name.clone()) - { - // if the data type mismatched, later check_execute will error out - // hence no need to check it here, beside, optimize pass might be able to cast it - // so checking here is not necessary - *expr = expr.clone().alias(col_name); - } - } - // add columns if have different column count let query_col_cnt = exprs.len(); let table_col_cnt = self.schema.column_schemas().len(); @@ -1140,10 +1131,9 @@ impl ColumnMatcherRewriter { // is the update at column exprs.push(datafusion::prelude::now().alias(&last_col_schema.name)); } else { - // helpful error message - return Err(DataFusionError::Plan(format!( - "Expect the last column in table to be timestamp column, found column {} with type {:?}", - last_col_schema.name, last_col_schema.data_type + return Err(DataFusionError::Plan(format_flow_sink_schema_mismatch( + &original_exprs, + self.schema.as_ref(), ))); } } else if query_col_cnt + 2 == table_col_cnt { @@ -1170,14 +1160,110 @@ impl ColumnMatcherRewriter { ))); } } else { - return Err(DataFusionError::Plan(format!( - "Expect table have 0,1 or 2 columns more than query columns, found {} query columns {:?}, {} table columns {:?}", - query_col_cnt, - exprs, - table_col_cnt, - self.schema.column_schemas() + return Err(DataFusionError::Plan(format_flow_sink_schema_mismatch( + &original_exprs, + self.schema.as_ref(), ))); } + + self.match_extra_output_columns(exprs, input_schema, &original_exprs, &all_names) + } + + /// Match flow output columns whose names are not in the sink schema by the same position only. + /// + /// This keeps the legacy "omit output aliases and map by position" behavior, but only when the + /// sink column at the same index is actually missing from the flow output. If the extra output + /// would be aliased to a sink column that already exists elsewhere, report a schema mismatch + /// instead of guessing another sink column by type. + /// + /// In particular, this intentionally rejects cross-position remaps like + /// `record_time_window2 -> record_time_window`: they are easy to confuse with real schema + /// mismatches and should be fixed by giving the flow output the sink column name explicitly. + fn match_extra_output_columns( + &self, + mut exprs: Vec, + input_schema: &DFSchema, + original_exprs: &[Expr], + all_names: &BTreeSet, + ) -> DfResult> { + let mut output_names = exprs + .iter() + .map(|expr| expr.qualified_name().1) + .collect::>(); + let output_name_set = output_names.iter().cloned().collect::>(); + let extra_expr_indices = output_names + .iter() + .enumerate() + .filter_map(|(idx, name)| (!all_names.contains(name)).then_some(idx)) + .collect::>(); + let missing_sink_indices = self + .schema + .column_schemas() + .iter() + .enumerate() + .filter_map(|(idx, column)| (!output_name_set.contains(&column.name)).then_some(idx)) + .collect::>(); + + if extra_expr_indices.is_empty() && missing_sink_indices.is_empty() { + return Ok(exprs); + } + + if extra_expr_indices.len() != missing_sink_indices.len() { + return Err(DataFusionError::Plan(format_flow_sink_schema_mismatch( + original_exprs, + self.schema.as_ref(), + ))); + } + + let mut positional_matches = Vec::new(); + for expr_idx in extra_expr_indices { + if !missing_sink_indices.contains(&expr_idx) { + return Err(DataFusionError::Plan(format_flow_sink_schema_mismatch( + original_exprs, + self.schema.as_ref(), + ))); + } + + let target_col_schema = &self.schema.column_schemas()[expr_idx]; + let expr_type = + ConcreteDataType::from_arrow_type(&exprs[expr_idx].get_type(input_schema)?); + if is_obviously_incompatible_positional_match(&expr_type, &target_col_schema.data_type) + { + return Err(DataFusionError::Plan(format!( + "Cannot match flow output column '{}' to sink column '{}' by position: incompatible data types, flow output type is {:?}, sink column type is {:?}. {}", + output_names[expr_idx], + target_col_schema.name, + expr_type, + target_col_schema.data_type, + format_flow_sink_schema_mismatch(original_exprs, self.schema.as_ref()) + ))); + } + + let target_name = target_col_schema.name.clone(); + positional_matches.push(format!( + "{} -> {} (flow output type: {:?}, sink column type: {:?})", + output_names[expr_idx], target_name, expr_type, target_col_schema.data_type + )); + exprs[expr_idx] = exprs[expr_idx].clone().alias(target_name.clone()); + output_names[expr_idx] = target_name; + } + + if !positional_matches.is_empty() { + debug!( + "Matched flow output columns to sink columns by position: {:?}", + positional_matches + ); + } + + let duplicated_output_names = duplicate_names(&output_names); + if !duplicated_output_names.is_empty() { + return Err(DataFusionError::Plan(format!( + "Flow output schema contains duplicate column(s) after schema matching {:?}. {}", + duplicated_output_names, + format_flow_sink_schema_mismatch(&exprs, self.schema.as_ref()) + ))); + } + Ok(exprs) } @@ -1186,12 +1272,9 @@ impl ColumnMatcherRewriter { let query_col_cnt = exprs.len(); if query_col_cnt > table_col_cnt { - return Err(DataFusionError::Plan(format!( - "Expect query column count <= table column count, found {} query columns {:?}, {} table columns {:?}", - query_col_cnt, - exprs, - table_col_cnt, - self.schema.column_schemas() + return Err(DataFusionError::Plan(format_flow_sink_schema_mismatch( + &exprs, + self.schema.as_ref(), ))); } @@ -1209,8 +1292,9 @@ impl ColumnMatcherRewriter { .collect(); if !missing.is_empty() { return Err(DataFusionError::Plan(format!( - "Column(s) {:?} required by sink table are missing from flow output when merge_mode=last_non_null", - missing + "Column(s) {:?} required by sink table are missing from flow output when merge_mode=last_non_null. {}", + missing, + format_flow_sink_schema_mismatch(&exprs, self.schema.as_ref()) ))); } @@ -1250,8 +1334,9 @@ impl ColumnMatcherRewriter { if !remap.is_empty() { let extra: Vec<_> = remap.keys().cloned().collect(); return Err(DataFusionError::Plan(format!( - "Flow output has extra column(s) {:?} not found in sink schema when merge_mode=last_non_null", - extra + "Flow output has extra column(s) {:?} not found in sink schema when merge_mode=last_non_null. {}", + extra, + format_flow_sink_schema_mismatch(&exprs, self.schema.as_ref()) ))); } @@ -1281,6 +1366,80 @@ impl ColumnMatcherRewriter { } } +fn is_obviously_incompatible_positional_match( + expr_type: &ConcreteDataType, + sink_type: &ConcreteDataType, +) -> bool { + // This is a coarse type-family guard for legacy positional aliasing, not a strict type equality + // check. For example, numeric width/sign differences are allowed here and left to downstream + // coercion, and untyped NULL can be coerced to any target type. Clearly different families such + // as timestamp vs string are rejected early. + if expr_type.is_null() || expr_type == sink_type { + return false; + } + + expr_type.is_timestamp() != sink_type.is_timestamp() + || expr_type.is_string() != sink_type.is_string() + || expr_type.is_boolean() != sink_type.is_boolean() + || expr_type.is_json() != sink_type.is_json() + || expr_type.is_vector() != sink_type.is_vector() +} + +fn duplicate_names(names: &[String]) -> Vec { + let mut seen = HashSet::new(); + let mut duplicated = BTreeSet::new(); + for name in names { + if !seen.insert(name.as_str()) { + duplicated.insert(name.as_str()); + } + } + duplicated.into_iter().map(str::to_string).collect() +} + +fn format_flow_sink_schema_mismatch( + query_exprs: &[Expr], + sink_schema: &datatypes::schema::Schema, +) -> String { + let flow_output_columns = query_exprs + .iter() + .map(|expr| expr.qualified_name().1) + .collect::>(); + let sink_table_columns = sink_schema + .column_schemas() + .iter() + .map(|col| col.name.clone()) + .collect::>(); + + let flow_output_set = flow_output_columns.iter().cloned().collect::>(); + let sink_table_set = sink_table_columns.iter().cloned().collect::>(); + + let mut extra_flow_columns = flow_output_columns + .iter() + .filter(|name| !sink_table_set.contains(*name)) + .cloned() + .collect::>(); + extra_flow_columns.sort(); + extra_flow_columns.dedup(); + + let mut missing_sink_columns = sink_table_columns + .iter() + .filter(|name| !flow_output_set.contains(*name)) + .cloned() + .collect::>(); + missing_sink_columns.sort(); + missing_sink_columns.dedup(); + + format!( + "Flow output schema does not match sink table schema: found {} flow output columns and {} sink table columns. flow output columns: {:?}, sink table columns: {:?}, extra flow columns not in sink: {:?}, missing sink columns from flow output: {:?}", + flow_output_columns.len(), + sink_table_columns.len(), + flow_output_columns, + sink_table_columns, + extra_flow_columns, + missing_sink_columns + ) +} + impl TreeNodeRewriter for ColumnMatcherRewriter { type Node = LogicalPlan; fn f_down(&mut self, mut node: Self::Node) -> DfResult> { @@ -1327,7 +1486,7 @@ impl TreeNodeRewriter for ColumnMatcherRewriter { // if not, wrap it in a projection if let LogicalPlan::Projection(project) = &node { let exprs = project.expr.clone(); - let exprs = self.modify_project_exprs(exprs)?; + let exprs = self.modify_project_exprs(exprs, project.input.schema())?; self.is_rewritten = true; let new_plan = @@ -1341,7 +1500,7 @@ impl TreeNodeRewriter for ColumnMatcherRewriter { field.name(), ))); } - let exprs = self.modify_project_exprs(exprs)?; + let exprs = self.modify_project_exprs(exprs, node.schema())?; self.is_rewritten = true; let new_plan = LogicalPlan::Projection(Projection::try_new(exprs, Arc::new(node.clone()))?); diff --git a/src/flow/src/batching_mode/utils/test.rs b/src/flow/src/batching_mode/utils/test.rs index 317b0a5475..9ca1186fb6 100644 --- a/src/flow/src/batching_mode/utils/test.rs +++ b/src/flow/src/batching_mode/utils/test.rs @@ -14,6 +14,7 @@ use std::sync::Arc; +use catalog::RegisterTableRequest; use common_recordbatch::RecordBatch; use common_time::Timestamp; use datafusion_common::tree_node::TreeNode as _; @@ -29,7 +30,9 @@ use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan}; use table::test_util::MemTable; use super::*; +use crate::batching_mode::BatchingModeOptions; use crate::batching_mode::state::FilterExprInfo; +use crate::batching_mode::task::{BatchingTask, TaskArgs}; use crate::test_utils::create_test_query_engine; fn u32_table(table_name: &str, columns: Vec<&str>, rows: usize) -> TableRef { @@ -432,9 +435,7 @@ async fn test_add_auto_column_rewriter() { // error datatype mismatch ( "SELECT number, ts FROM numbers_with_ts", - Err( - "Expect the last column in table to be timestamp column, found column atat with type Int8", - ), + Err("missing sink columns from flow output: [\"atat\"]"), vec![ ColumnSchema::new("number", ConcreteDataType::int32_datatype(), true), ColumnSchema::new( @@ -498,6 +499,383 @@ async fn test_add_auto_column_rewriter() { } } +#[tokio::test] +async fn test_gen_plan_with_matching_schema_reports_extra_flow_columns_before_positional_alias() { + let query_engine = create_test_query_engine(); + let ctx = QueryContext::arc(); + let sink_schema = Arc::new(Schema::new(vec![ + ColumnSchema::new("number", ConcreteDataType::uint32_datatype(), true), + ColumnSchema::new( + "ts", + ConcreteDataType::timestamp_millisecond_datatype(), + false, + ) + .with_time_index(true), + ColumnSchema::new( + "max(numbers_with_ts.number)", + ConcreteDataType::uint32_datatype(), + true, + ), + ])); + + let err = gen_plan_with_matching_schema( + "SELECT number, number AS extra, ts, max(number) FROM numbers_with_ts GROUP BY number, ts", + ctx, + query_engine, + sink_schema, + &[], + false, + ) + .await + .unwrap_err() + .to_string(); + + assert!( + err.contains("Flow output schema does not match sink table schema"), + "{err}" + ); + assert!(err.contains("flow output columns"), "{err}"); + assert!(err.contains("sink table columns"), "{err}"); + assert!(err.contains("extra flow columns not in sink"), "{err}"); + assert!(err.contains("extra"), "{err}"); + assert!( + !err.contains("extra AS ts"), + "schema error should not primarily expose positional alias: {err}" + ); +} + +#[tokio::test] +async fn test_gen_plan_with_matching_schema_rejects_positional_alias_type_mismatch() { + let query_engine = create_test_query_engine(); + let ctx = QueryContext::arc(); + let sink_schema = Arc::new(Schema::new(vec![ + ColumnSchema::new("number", ConcreteDataType::uint32_datatype(), true), + ColumnSchema::new( + "event_time", + ConcreteDataType::timestamp_millisecond_datatype(), + false, + ) + .with_time_index(true), + ColumnSchema::new( + "max(numbers_with_ts.number)", + ConcreteDataType::uint32_datatype(), + true, + ), + ])); + + let err = gen_plan_with_matching_schema( + "SELECT number, number AS not_time, max(number) FROM numbers_with_ts GROUP BY number", + ctx, + query_engine, + sink_schema, + &[], + false, + ) + .await + .unwrap_err() + .to_string(); + + assert!( + err.contains( + "Cannot match flow output column 'not_time' to sink column 'event_time' by position" + ), + "{err}" + ); + assert!(err.contains("incompatible data types"), "{err}"); + assert!( + !err.contains("not_time AS event_time"), + "schema error should not expose an incompatible positional alias: {err}" + ); +} + +#[tokio::test] +async fn test_gen_plan_with_matching_schema_rejects_cross_position_extra_column_match() { + let query_engine = create_test_query_engine(); + let ctx = QueryContext::arc(); + let sink_schema = Arc::new(Schema::new(vec![ + ColumnSchema::new("number", ConcreteDataType::uint32_datatype(), true), + ColumnSchema::new( + "time_window", + ConcreteDataType::timestamp_millisecond_datatype(), + false, + ) + .with_time_index(true), + ColumnSchema::new( + "ts", + ConcreteDataType::timestamp_millisecond_datatype(), + true, + ), + ])); + + let err = gen_plan_with_matching_schema( + "SELECT number, ts, date_bin('5 minutes', ts) AS time_window2 FROM numbers_with_ts GROUP BY number, ts, time_window2", + ctx, + query_engine, + sink_schema, + &[], + false, + ) + .await + .unwrap_err() + .to_string(); + + assert!( + err.contains("Flow output schema does not match sink table schema"), + "{err}" + ); + assert!(err.contains("time_window2"), "{err}"); + assert!(err.contains("time_window"), "{err}"); + assert!(!err.contains("DuplicateUnqualifiedField"), "{err}"); +} + +#[tokio::test] +async fn test_gen_plan_with_matching_schema_accepts_out_of_order_matching_names() { + let query_engine = create_test_query_engine(); + let ctx = QueryContext::arc(); + let sink_schema = Arc::new(Schema::new(vec![ + ColumnSchema::new("number", ConcreteDataType::uint32_datatype(), true), + ColumnSchema::new( + "time_window", + ConcreteDataType::timestamp_millisecond_datatype(), + false, + ) + .with_time_index(true), + ColumnSchema::new( + "ts", + ConcreteDataType::timestamp_millisecond_datatype(), + true, + ), + ])); + + let plan = gen_plan_with_matching_schema( + "SELECT number, ts, date_bin('5 minutes', ts) AS time_window FROM numbers_with_ts GROUP BY number, ts, time_window", + ctx, + query_engine, + sink_schema, + &[], + false, + ) + .await + .unwrap(); + let output_names = plan + .schema() + .fields() + .iter() + .map(|field| field.name().clone()) + .collect::>(); + + assert_eq!( + output_names, + vec![ + "number".to_string(), + "ts".to_string(), + "time_window".to_string() + ] + ); + assert!(duplicate_names(&output_names).is_empty()); +} + +#[tokio::test] +async fn test_gen_plan_with_matching_schema_allows_numeric_positional_alias() { + let query_engine = create_test_query_engine(); + let ctx = QueryContext::arc(); + let sink_schema = Arc::new(Schema::new(vec![ + ColumnSchema::new("renamed_number", ConcreteDataType::int64_datatype(), true), + ColumnSchema::new( + "ts", + ConcreteDataType::timestamp_millisecond_datatype(), + false, + ) + .with_time_index(true), + ])); + + let plan = gen_plan_with_matching_schema( + "SELECT number, ts FROM numbers_with_ts", + ctx, + query_engine, + sink_schema, + &[], + false, + ) + .await + .unwrap(); + let sql = df_plan_to_sql(&plan).unwrap(); + + assert_eq!( + "SELECT numbers_with_ts.number AS renamed_number, numbers_with_ts.ts FROM numbers_with_ts", + sql + ); +} + +#[tokio::test] +async fn test_gen_plan_with_matching_schema_allows_null_positional_alias() { + let query_engine = create_test_query_engine(); + let ctx = QueryContext::arc(); + let sink_schema = Arc::new(Schema::new(vec![ + ColumnSchema::new("number", ConcreteDataType::uint32_datatype(), true), + ColumnSchema::new("label", ConcreteDataType::string_datatype(), true), + ])); + + let plan = gen_plan_with_matching_schema( + "SELECT number, NULL AS label_placeholder FROM numbers_with_ts", + ctx, + query_engine, + sink_schema, + &[], + false, + ) + .await + .unwrap(); + let output_names = plan + .schema() + .fields() + .iter() + .map(|field| field.name().clone()) + .collect::>(); + let sql = df_plan_to_sql(&plan).unwrap(); + + assert_eq!( + output_names, + vec!["number".to_string(), "label".to_string()] + ); + assert!(sql.contains("NULL AS label"), "{sql}"); +} + +#[tokio::test] +async fn test_gen_plan_with_matching_schema_accepts_matching_flow_schema() { + let query_engine = create_test_query_engine(); + let ctx = QueryContext::arc(); + let sink_schema = Arc::new(Schema::new(vec![ + ColumnSchema::new("number", ConcreteDataType::uint32_datatype(), true), + ColumnSchema::new("extra", ConcreteDataType::uint32_datatype(), true), + ColumnSchema::new( + "ts", + ConcreteDataType::timestamp_millisecond_datatype(), + false, + ) + .with_time_index(true), + ColumnSchema::new( + "max(numbers_with_ts.number)", + ConcreteDataType::uint32_datatype(), + true, + ), + ])); + + let plan = gen_plan_with_matching_schema( + "SELECT number, number AS extra, ts, max(number) FROM numbers_with_ts GROUP BY number, ts", + ctx, + query_engine, + sink_schema, + &[], + false, + ) + .await + .unwrap(); + let sql = df_plan_to_sql(&plan).unwrap(); + + assert_eq!( + "SELECT numbers_with_ts.number, numbers_with_ts.number AS extra, numbers_with_ts.ts, max(numbers_with_ts.number) FROM numbers_with_ts GROUP BY numbers_with_ts.number, numbers_with_ts.ts", + sql + ); +} + +#[tokio::test] +async fn test_validate_sink_table_schema_rejects_existing_sink_missing_flow_column() { + let query_engine = create_test_query_engine(); + let query_ctx = QueryContext::arc(); + let sql = "SELECT number, number AS extra, max(number) FROM numbers_with_ts GROUP BY number"; + let plan = sql_to_df_plan(query_ctx.clone(), query_engine.clone(), sql, true) + .await + .unwrap(); + + let catalog_manager = catalog::memory::new_memory_catalog_manager().unwrap(); + let sink_table_name = [ + "greptime".to_string(), + "public".to_string(), + "existing_sink".to_string(), + ]; + let sink_table = u32_table( + "existing_sink", + vec!["number", "max(numbers_with_ts.number)"], + 0, + ); + catalog_manager + .register_table_sync(RegisterTableRequest { + catalog: sink_table_name[0].clone(), + schema: sink_table_name[1].clone(), + table_name: sink_table_name[2].clone(), + table_id: 4096, + table: sink_table, + }) + .unwrap(); + + let (_shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel(); + let task = BatchingTask::try_new(TaskArgs { + flow_id: 1, + query: sql, + plan, + time_window_expr: None, + expire_after: None, + sink_table_name, + source_table_names: vec![[ + "greptime".to_string(), + "public".to_string(), + "numbers_with_ts".to_string(), + ]], + query_ctx, + catalog_manager, + shutdown_rx, + batch_opts: Arc::new(BatchingModeOptions::default()), + flow_eval_interval: None, + }) + .unwrap(); + + let err = task + .validate_sink_table_schema(&query_engine) + .await + .unwrap_err() + .to_string(); + + assert!( + err.contains("Flow output schema does not match sink table schema"), + "{err}" + ); + assert!(err.contains("extra"), "{err}"); +} + +#[tokio::test] +async fn test_gen_plan_with_matching_schema_allow_partial_fills_nullable_columns() { + let query_engine = create_test_query_engine(); + let ctx = QueryContext::arc(); + let sink_schema = Arc::new(Schema::new(vec![ + ColumnSchema::new("number", ConcreteDataType::uint32_datatype(), false), + ColumnSchema::new( + "ts", + ConcreteDataType::timestamp_millisecond_datatype(), + false, + ) + .with_time_index(true), + ColumnSchema::new("optional_value", ConcreteDataType::uint32_datatype(), true), + ])); + + let plan = gen_plan_with_matching_schema( + "SELECT number, ts FROM numbers_with_ts", + ctx, + query_engine, + sink_schema, + &[0], + true, + ) + .await + .unwrap(); + let sql = df_plan_to_sql(&plan).unwrap(); + + assert_eq!( + "SELECT numbers_with_ts.number, numbers_with_ts.ts, NULL AS optional_value FROM numbers_with_ts", + sql + ); +} + #[tokio::test] async fn test_find_group_by_exprs() { let testcases = vec![ @@ -1491,3 +1869,118 @@ async fn test_analyze_incremental_aggregate_plan_rejects_cast_wrapped_alias() { ); } } + +#[tokio::test] +async fn test_gen_plan_with_matching_schema_last_non_null_rejects_missing_primary_key_column() { + let query_engine = create_test_query_engine(); + let ctx = QueryContext::arc(); + // Sink table with primary_key_indices=[0] ("number"), time_index="ts", and merge_mode=last_non_null. + // The flow query omits "number", which is a required primary-key column. + let sink_schema = Arc::new(Schema::new(vec![ + ColumnSchema::new("number", ConcreteDataType::uint32_datatype(), true), + ColumnSchema::new( + "ts", + ConcreteDataType::timestamp_millisecond_datatype(), + false, + ) + .with_time_index(true), + ColumnSchema::new("optional_value", ConcreteDataType::uint32_datatype(), true), + ])); + + let err = gen_plan_with_matching_schema( + "SELECT ts FROM numbers_with_ts", + ctx, + query_engine, + sink_schema, + &[0], + true, + ) + .await + .unwrap_err() + .to_string(); + + assert!( + err.contains( + "required by sink table are missing from flow output when merge_mode=last_non_null" + ), + "{err}" + ); + assert!(err.contains("number"), "{err}"); +} + +#[tokio::test] +async fn test_gen_plan_with_matching_schema_last_non_null_rejects_missing_time_index_column() { + let query_engine = create_test_query_engine(); + let ctx = QueryContext::arc(); + // Sink table with primary_key_indices=[0] ("number"), time_index="ts", and merge_mode=last_non_null. + // The flow query omits "ts", which is a required time-index column. + let sink_schema = Arc::new(Schema::new(vec![ + ColumnSchema::new("number", ConcreteDataType::uint32_datatype(), true), + ColumnSchema::new( + "ts", + ConcreteDataType::timestamp_millisecond_datatype(), + false, + ) + .with_time_index(true), + ColumnSchema::new("optional_value", ConcreteDataType::uint32_datatype(), true), + ])); + + let err = gen_plan_with_matching_schema( + "SELECT number FROM numbers_with_ts", + ctx, + query_engine, + sink_schema, + &[0], + true, + ) + .await + .unwrap_err() + .to_string(); + + assert!( + err.contains( + "required by sink table are missing from flow output when merge_mode=last_non_null" + ), + "{err}" + ); + assert!(err.contains("ts"), "{err}"); +} + +#[tokio::test] +async fn test_gen_plan_with_matching_schema_last_non_null_rejects_extra_flow_column() { + let query_engine = create_test_query_engine(); + let ctx = QueryContext::arc(); + // Sink table with merge_mode=last_non_null. + // Sink has 3 columns: number (pk), ts (time_index), optional_value (nullable). + // Flow outputs: number, number AS extra, ts → "extra" is not in sink schema. + // query_col_cnt(3) <= table_col_cnt(3), so the extra branch is reached. + let sink_schema = Arc::new(Schema::new(vec![ + ColumnSchema::new("number", ConcreteDataType::uint32_datatype(), true), + ColumnSchema::new( + "ts", + ConcreteDataType::timestamp_millisecond_datatype(), + false, + ) + .with_time_index(true), + ColumnSchema::new("optional_value", ConcreteDataType::uint32_datatype(), true), + ])); + + let err = gen_plan_with_matching_schema( + "SELECT number, number AS extra, ts FROM numbers_with_ts", + ctx, + query_engine, + sink_schema, + &[0], + true, + ) + .await + .unwrap_err() + .to_string(); + + assert!(err.contains("extra column(s)"), "{err}"); + assert!(err.contains("extra"), "{err}"); + assert!( + err.contains("Flow output schema does not match sink table schema"), + "{err}" + ); +} diff --git a/src/frontend/src/server.rs b/src/frontend/src/server.rs index e66ae718ba..6b120ccba5 100644 --- a/src/frontend/src/server.rs +++ b/src/frontend/src/server.rs @@ -288,7 +288,6 @@ where let http_server = builder .with_metrics_handler(MetricsHandler) - .with_plugins(self.plugins.clone()) .with_greptime_config_options(toml) .build(); Ok(http_server) diff --git a/src/meta-client/src/client.rs b/src/meta-client/src/client.rs index cbd9b43151..de41caf19e 100644 --- a/src/meta-client/src/client.rs +++ b/src/meta-client/src/client.rs @@ -1344,7 +1344,7 @@ mod tests { // Generates rough 10MB data, which is larger than the default grpc message size limit. for i in 0..10 { - let data: Vec = (0..1024 * 1024).map(|_| rng.random()).collect(); + let data: Vec = (0..1024 * 1024).map(|_| rng.random::()).collect(); in_memory .put( PutRequest::new() diff --git a/src/meta-srv/src/procedure/region_migration/open_candidate_region.rs b/src/meta-srv/src/procedure/region_migration/open_candidate_region.rs index 0c0e5de5d7..792c66bdc9 100644 --- a/src/meta-srv/src/procedure/region_migration/open_candidate_region.rs +++ b/src/meta-srv/src/procedure/region_migration/open_candidate_region.rs @@ -18,7 +18,9 @@ use std::ops::Div; use api::v1::meta::MailboxMessage; use common_meta::RegionIdent; use common_meta::distributed_time_constants::default_distributed_time_constants; -use common_meta::instruction::{Instruction, InstructionReply, OpenRegion, SimpleReply}; +use common_meta::instruction::{ + Instruction, InstructionReply, OpenRegion, OpenRegionReason, SimpleReply, +}; use common_meta::key::datanode_table::RegionInfo; use common_procedure::{Context as ProcedureContext, Status}; use common_telemetry::info; @@ -26,12 +28,13 @@ use common_telemetry::tracing_context::TracingContext; use serde::{Deserialize, Serialize}; use snafu::{OptionExt, ResultExt}; use store_api::region_engine::RegionRole; +use store_api::region_request::RegionRequirements; use tokio::time::Instant; use crate::error::{self, Result}; use crate::handler::HeartbeatMailbox; use crate::procedure::region_migration::flush_leader_region::PreFlushRegion; -use crate::procedure::region_migration::{Context, State}; +use crate::procedure::region_migration::{Context, RegionMigrationTriggerReason, State}; use crate::service::mailbox::Channel; #[derive(Debug, Serialize, Deserialize)] @@ -67,6 +70,10 @@ impl OpenCandidateRegion { let region_ids = ctx.persistent_ctx.region_ids.clone(); let from_peer_id = ctx.persistent_ctx.from_peer.id; let to_peer_id = ctx.persistent_ctx.to_peer.id; + let reason = match ctx.persistent_ctx.trigger_reason { + RegionMigrationTriggerReason::Failover => OpenRegionReason::RegionFailover, + _ => OpenRegionReason::RegionMigration, + }; let datanode_table_values = ctx.get_from_peer_datanode_table_values().await?; let mut open_regions = Vec::with_capacity(region_ids.len()); @@ -97,6 +104,8 @@ impl OpenCandidateRegion { region_options, region_wal_options, true, + Some(reason), + RegionRequirements::object_storage(), )); } @@ -233,18 +242,20 @@ mod tests { } fn new_mock_open_instruction(datanode_id: DatanodeId, region_id: RegionId) -> Instruction { - Instruction::OpenRegions(vec![OpenRegion { - region_ident: RegionIdent { + Instruction::OpenRegions(vec![OpenRegion::new( + RegionIdent { datanode_id, table_id: region_id.table_id(), region_number: region_id.region_number(), engine: MITO2_ENGINE.to_string(), }, - region_storage_path: "/bar/foo/region/".to_string(), - region_options: Default::default(), - region_wal_options: Default::default(), - skip_wal_replay: true, - }]) + "/bar/foo/region/", + Default::default(), + Default::default(), + true, + Some(OpenRegionReason::RegionMigration), + RegionRequirements::object_storage(), + )]) } #[tokio::test] @@ -263,6 +274,57 @@ mod tests { assert!(!err.is_retryable()); } + #[tokio::test] + async fn test_build_open_region_instruction_reason() { + let state = OpenCandidateRegion; + let mut persistent_context = new_persistent_context(); + let from_peer_id = persistent_context.from_peer.id; + let region_id = persistent_context.region_ids[0]; + let env = TestingEnv::new(); + + let table_info = new_test_table_info(1024); + let region_routes = vec![RegionRoute { + region: Region::new_test(region_id), + leader_peer: Some(Peer::empty(from_peer_id)), + ..Default::default() + }]; + env.table_metadata_manager() + .create_table_metadata( + table_info, + TableRouteValue::physical(region_routes), + HashMap::default(), + ) + .await + .unwrap(); + + let mut ctx = env + .context_factory() + .new_context(persistent_context.clone()); + let instruction = state.build_open_region_instruction(&mut ctx).await.unwrap(); + let open_regions = instruction.into_open_regions().unwrap(); + assert_eq!( + Some(OpenRegionReason::RegionMigration), + open_regions[0].reason + ); + assert_eq!( + RegionRequirements::object_storage(), + open_regions[0].requirements + ); + + persistent_context.trigger_reason = RegionMigrationTriggerReason::Failover; + let mut ctx = env.context_factory().new_context(persistent_context); + let instruction = state.build_open_region_instruction(&mut ctx).await.unwrap(); + let open_regions = instruction.into_open_regions().unwrap(); + assert_eq!( + Some(OpenRegionReason::RegionFailover), + open_regions[0].reason + ); + assert_eq!( + RegionRequirements::object_storage(), + open_regions[0].requirements + ); + } + #[tokio::test] async fn test_datanode_is_unreachable() { let state = OpenCandidateRegion; diff --git a/src/metric-engine/src/engine.rs b/src/metric-engine/src/engine.rs index ef4d802cfc..fa9ef804cc 100644 --- a/src/metric-engine/src/engine.rs +++ b/src/metric-engine/src/engine.rs @@ -620,6 +620,7 @@ mod test { options: physical_region_option, skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }; engine .handle_request(physical_region_id, RegionRequest::Open(open_request)) @@ -644,6 +645,7 @@ mod test { options: HashMap::new(), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }; engine .handle_request( @@ -721,6 +723,7 @@ mod test { options: physical_region_option, skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }; // Opening an already opened region should succeed. // Since the region is already open, no metadata recovery operations will be performed. @@ -749,6 +752,7 @@ mod test { options: physical_region_option, skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }; let err = metric_engine .handle_request(physical_region_id, RegionRequest::Open(open_request)) @@ -854,6 +858,7 @@ mod test { options: options.clone(), skip_wal_replay: true, checkpoint: None, + requirements: Default::default(), }, ) }) diff --git a/src/metric-engine/src/engine/open.rs b/src/metric-engine/src/engine/open.rs index 59b1cfd928..8fcdfcd821 100644 --- a/src/metric-engine/src/engine/open.rs +++ b/src/metric-engine/src/engine/open.rs @@ -222,6 +222,7 @@ impl MetricEngineInner { entry_id: checkpoint.metadata_entry_id.unwrap_or_default(), metadata_entry_id: None, }), + requirements: request.requirements, }; let mut data_region_options = request.options; @@ -239,6 +240,7 @@ impl MetricEngineInner { entry_id: checkpoint.entry_id, metadata_entry_id: None, }), + requirements: request.requirements, }; (open_metadata_region_request, open_data_region_request) diff --git a/src/metric-engine/src/engine/sync/region.rs b/src/metric-engine/src/engine/sync/region.rs index cbe6515a19..d1f92bef64 100644 --- a/src/metric-engine/src/engine/sync/region.rs +++ b/src/metric-engine/src/engine/sync/region.rs @@ -321,6 +321,7 @@ mod tests { options: physical_region_option, skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await diff --git a/src/metric-engine/src/test_util.rs b/src/metric-engine/src/test_util.rs index ec55a01903..8d4a822b6b 100644 --- a/src/metric-engine/src/test_util.rs +++ b/src/metric-engine/src/test_util.rs @@ -144,6 +144,7 @@ impl TestEnv { options: physical_region_option, skip_wal_replay: true, checkpoint: None, + requirements: Default::default(), }), ) .await diff --git a/src/mito2/Cargo.toml b/src/mito2/Cargo.toml index 99e3439879..ea281f2c32 100644 --- a/src/mito2/Cargo.toml +++ b/src/mito2/Cargo.toml @@ -8,6 +8,7 @@ license.workspace = true default = [] test = ["common-test-util", "rstest", "rstest_reuse", "rskafka"] testing = ["test"] +test-shared-fs-region-migration = [] enterprise = [] vector_index = ["dep:roaring", "index/vector_index"] diff --git a/src/mito2/src/engine/alter_test.rs b/src/mito2/src/engine/alter_test.rs index a7798a7678..b43f057ea6 100644 --- a/src/mito2/src/engine/alter_test.rs +++ b/src/mito2/src/engine/alter_test.rs @@ -277,6 +277,7 @@ async fn test_alter_region_with_format(flat_format: bool) { options: HashMap::default(), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await @@ -481,6 +482,7 @@ async fn test_put_after_alter_with_format(flat_format: bool) { options: HashMap::default(), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await @@ -844,6 +846,7 @@ async fn test_alter_column_fulltext_options_with_format(flat_format: bool) { options: HashMap::default(), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await @@ -979,6 +982,7 @@ async fn test_alter_column_set_inverted_index_with_format(flat_format: bool) { options: HashMap::default(), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await @@ -1248,6 +1252,7 @@ async fn test_alter_region_sst_format_with_flush() { options: HashMap::default(), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await @@ -1366,6 +1371,7 @@ async fn test_alter_region_sst_format_without_flush() { options: HashMap::default(), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await @@ -1492,6 +1498,7 @@ async fn test_alter_region_sst_format_flat_to_pk_with_flush() { options: HashMap::default(), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await @@ -1610,6 +1617,7 @@ async fn test_alter_region_sst_format_flat_to_pk_without_flush() { options: HashMap::default(), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await @@ -1725,6 +1733,7 @@ async fn test_alter_region_append_mode_with_flush() { options: HashMap::default(), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await @@ -1843,6 +1852,7 @@ async fn test_alter_region_append_mode_without_flush() { options: HashMap::default(), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await diff --git a/src/mito2/src/engine/append_mode_test.rs b/src/mito2/src/engine/append_mode_test.rs index de58e04e46..188e28ccf5 100644 --- a/src/mito2/src/engine/append_mode_test.rs +++ b/src/mito2/src/engine/append_mode_test.rs @@ -348,6 +348,7 @@ async fn test_alter_append_mode_clears_merge_mode_with_format(flat_format: bool) options, skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await diff --git a/src/mito2/src/engine/basic_test.rs b/src/mito2/src/engine/basic_test.rs index e1e462f692..0cc122573e 100644 --- a/src/mito2/src/engine/basic_test.rs +++ b/src/mito2/src/engine/basic_test.rs @@ -196,6 +196,7 @@ async fn test_region_replay_with_format(factory: Option, flat_f options, skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await diff --git a/src/mito2/src/engine/batch_catchup_test.rs b/src/mito2/src/engine/batch_catchup_test.rs index dc0b552adc..a3808b1999 100644 --- a/src/mito2/src/engine/batch_catchup_test.rs +++ b/src/mito2/src/engine/batch_catchup_test.rs @@ -160,6 +160,7 @@ async fn test_batch_catchup_with_format(factory: Option, flat_f skip_wal_replay: true, path_type: PathType::Bare, checkpoint: None, + requirements: Default::default(), }, ) }) diff --git a/src/mito2/src/engine/batch_open_test.rs b/src/mito2/src/engine/batch_open_test.rs index 6b16b3c120..2522cf2f84 100644 --- a/src/mito2/src/engine/batch_open_test.rs +++ b/src/mito2/src/engine/batch_open_test.rs @@ -136,6 +136,7 @@ async fn test_batch_open_with_format(factory: Option, flat_form skip_wal_replay: false, path_type: PathType::Bare, checkpoint: None, + requirements: Default::default(), }, ) }) @@ -149,6 +150,7 @@ async fn test_batch_open_with_format(factory: Option, flat_form skip_wal_replay: false, path_type: PathType::Bare, checkpoint: None, + requirements: Default::default(), }, )); @@ -221,6 +223,7 @@ async fn test_batch_open_err_with_format(factory: Option, flat_ skip_wal_replay: false, path_type: PathType::Bare, checkpoint: None, + requirements: Default::default(), }, ) }) diff --git a/src/mito2/src/engine/bump_committed_sequence_test.rs b/src/mito2/src/engine/bump_committed_sequence_test.rs index 12db0044c5..23a5af8865 100644 --- a/src/mito2/src/engine/bump_committed_sequence_test.rs +++ b/src/mito2/src/engine/bump_committed_sequence_test.rs @@ -112,6 +112,7 @@ async fn test_bump_committed_sequence_with_format(flat_format: bool) { options: HashMap::default(), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await @@ -151,6 +152,7 @@ async fn test_bump_committed_sequence_with_format(flat_format: bool) { options: HashMap::default(), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await diff --git a/src/mito2/src/engine/catchup_test.rs b/src/mito2/src/engine/catchup_test.rs index e10e91b51b..b79a2b0625 100644 --- a/src/mito2/src/engine/catchup_test.rs +++ b/src/mito2/src/engine/catchup_test.rs @@ -97,6 +97,7 @@ async fn test_catchup_with_last_entry_id(factory: Option) { options, skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await @@ -218,6 +219,7 @@ async fn test_catchup_with_incorrect_last_entry_id(factory: Option) { options, skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await @@ -423,6 +426,7 @@ async fn test_catchup_with_manifest_update(factory: Option) { options, skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await @@ -527,6 +531,7 @@ async fn open_region( skip_wal_replay, path_type: PathType::Bare, checkpoint: None, + requirements: Default::default(), }), ) .await @@ -622,6 +627,7 @@ async fn test_local_catchup(factory: Option) { skip_wal_replay: true, path_type: PathType::Bare, checkpoint: None, + requirements: Default::default(), }), ) .await diff --git a/src/mito2/src/engine/compaction_test.rs b/src/mito2/src/engine/compaction_test.rs index f76e9f8bf9..fd0982b7e5 100644 --- a/src/mito2/src/engine/compaction_test.rs +++ b/src/mito2/src/engine/compaction_test.rs @@ -1023,6 +1023,7 @@ async fn test_change_region_compaction_window_with_format(flat_format: bool) { options: Default::default(), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await @@ -1125,6 +1126,7 @@ async fn test_open_overwrite_compaction_window_with_format(flat_format: bool) { options, skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await diff --git a/src/mito2/src/engine/open_test.rs b/src/mito2/src/engine/open_test.rs index 28ad1de71e..11279954a9 100644 --- a/src/mito2/src/engine/open_test.rs +++ b/src/mito2/src/engine/open_test.rs @@ -64,6 +64,7 @@ async fn test_engine_open_empty_with_format(flat_format: bool) { options: HashMap::default(), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await @@ -110,6 +111,7 @@ async fn test_engine_open_existing_with_format(flat_format: bool) { options: HashMap::default(), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await @@ -237,6 +239,7 @@ async fn test_engine_region_open_with_options_with_format(flat_format: bool) { options: HashMap::from([("ttl".to_string(), "4d".to_string())]), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await @@ -297,6 +300,7 @@ async fn test_engine_region_open_with_custom_store_with_format(flat_format: bool options: HashMap::from([("storage".to_string(), "Gcs".to_string())]), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await @@ -392,6 +396,7 @@ async fn test_open_region_skip_wal_replay_with_format(flat_format: bool) { options: Default::default(), skip_wal_replay: true, checkpoint: None, + requirements: Default::default(), }), ) .await @@ -431,6 +436,7 @@ async fn test_open_region_skip_wal_replay_with_format(flat_format: bool) { options: Default::default(), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await @@ -484,6 +490,7 @@ async fn test_open_region_wait_for_opening_region_ok_with_format(flat_format: bo options: HashMap::default(), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await @@ -535,6 +542,7 @@ async fn test_open_region_wait_for_opening_region_err_with_format(flat_format: b options: HashMap::default(), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await @@ -691,6 +699,7 @@ async fn test_open_backfills_partition_expr_with_fetcher() { options: HashMap::default(), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await @@ -725,6 +734,7 @@ async fn test_open_backfills_partition_expr_with_fetcher() { options: HashMap::default(), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await @@ -766,6 +776,7 @@ async fn test_open_keeps_none_without_fetcher() { options: HashMap::default(), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await diff --git a/src/mito2/src/engine/parallel_test.rs b/src/mito2/src/engine/parallel_test.rs index b88a60739b..5a1354ec15 100644 --- a/src/mito2/src/engine/parallel_test.rs +++ b/src/mito2/src/engine/parallel_test.rs @@ -52,6 +52,7 @@ async fn scan_in_parallel( skip_wal_replay: false, path_type: PathType::Bare, checkpoint: None, + requirements: Default::default(), }), ) .await diff --git a/src/mito2/src/engine/skip_wal_test.rs b/src/mito2/src/engine/skip_wal_test.rs index 97f159b8ac..3b6cf89f07 100644 --- a/src/mito2/src/engine/skip_wal_test.rs +++ b/src/mito2/src/engine/skip_wal_test.rs @@ -87,6 +87,7 @@ async fn test_close_region_skip_wal(insert: bool) { options: request.options.clone(), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await @@ -154,6 +155,7 @@ async fn test_close_follower_region_skip_wal() { options: request.options.clone(), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await @@ -271,6 +273,7 @@ async fn test_close_region_after_truncate_skip_wal() { options: request.options, skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await diff --git a/src/mito2/src/engine/sync_test.rs b/src/mito2/src/engine/sync_test.rs index 17d73b1848..657ee868ce 100644 --- a/src/mito2/src/engine/sync_test.rs +++ b/src/mito2/src/engine/sync_test.rs @@ -127,6 +127,7 @@ async fn test_sync_after_flush_region_with_format(flat_format: bool) { // Ensure the region is not replayed from the WAL. skip_wal_replay: true, checkpoint: None, + requirements: Default::default(), }), ) .await @@ -239,6 +240,7 @@ async fn test_sync_after_alter_region_with_format(flat_format: bool) { // Ensure the region is not replayed from the WAL. skip_wal_replay: true, checkpoint: None, + requirements: Default::default(), }), ) .await diff --git a/src/mito2/src/engine/truncate_test.rs b/src/mito2/src/engine/truncate_test.rs index 8c3fdad75d..8c6dd023f0 100644 --- a/src/mito2/src/engine/truncate_test.rs +++ b/src/mito2/src/engine/truncate_test.rs @@ -323,6 +323,7 @@ async fn test_engine_truncate_reopen_with_format(flat_format: bool) { options: HashMap::default(), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await @@ -447,6 +448,7 @@ async fn test_engine_truncate_during_flush_with_format(flat_format: bool) { options: HashMap::default(), skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), ) .await diff --git a/src/mito2/src/error.rs b/src/mito2/src/error.rs index 3571f7c0c4..2278b61669 100644 --- a/src/mito2/src/error.rs +++ b/src/mito2/src/error.rs @@ -916,6 +916,20 @@ pub enum Error { source: Arc, }, + #[snafu(display( + "Region {} does not satisfy open requirement '{}': {}", + region_id, + requirement, + reason + ))] + OpenRegionRequirement { + region_id: RegionId, + requirement: &'static str, + reason: &'static str, + #[snafu(implicit)] + location: Location, + }, + #[snafu(display("Failed to parse job id"))] ParseJobId { #[snafu(implicit)] @@ -1376,6 +1390,7 @@ impl ErrorExt for Error { PrimaryKeyLengthMismatch { .. } => StatusCode::InvalidArguments, InvalidSender { .. } => StatusCode::InvalidArguments, InvalidSchedulerState { .. } => StatusCode::InvalidArguments, + OpenRegionRequirement { .. } => StatusCode::InvalidArguments, DeleteSsts { .. } | DeleteIndex { .. } | DeleteIndexes { .. } => { StatusCode::StorageUnavailable } diff --git a/src/mito2/src/region/opener.rs b/src/mito2/src/region/opener.rs index 3142a87c38..412172aead 100644 --- a/src/mito2/src/region/opener.rs +++ b/src/mito2/src/region/opener.rs @@ -27,8 +27,9 @@ use futures::future::BoxFuture; use log_store::kafka::log_store::KafkaLogStore; use log_store::noop::log_store::NoopLogStore; use log_store::raft_engine::log_store::RaftEngineLogStore; +use object_store::ObjectStore; use object_store::manager::ObjectStoreManagerRef; -use object_store::util::normalize_dir; +use object_store::util::{is_object_storage, normalize_dir}; use snafu::{OptionExt, ResultExt, ensure}; use store_api::logstore::LogStore; use store_api::logstore::provider::Provider; @@ -36,7 +37,7 @@ use store_api::metadata::{ ColumnMetadata, RegionMetadata, RegionMetadataBuilder, RegionMetadataRef, }; use store_api::region_engine::RegionRole; -use store_api::region_request::PathType; +use store_api::region_request::{PathType, RegionRequirements}; use store_api::storage::{ColumnId, RegionId}; use tokio::sync::Semaphore; @@ -46,8 +47,8 @@ use crate::cache::file_cache::{FileCache, FileType, IndexKey}; use crate::config::MitoConfig; use crate::error; use crate::error::{ - EmptyRegionDirSnafu, InvalidMetadataSnafu, ObjectStoreNotFoundSnafu, RegionCorruptedSnafu, - Result, StaleLogEntrySnafu, + EmptyRegionDirSnafu, InvalidMetadataSnafu, InvalidRegionOptionsSnafu, ObjectStoreNotFoundSnafu, + RegionCorruptedSnafu, Result, StaleLogEntrySnafu, }; use crate::manifest::action::RegionManifest; use crate::manifest::manager::{RegionManifestManager, RegionManifestOptions}; @@ -206,6 +207,29 @@ impl RegionOpener { Ok(self) } + /// Ensures the current region open request satisfies its requirements. + pub(crate) fn ensure_open_requirements(&self, requirements: RegionRequirements) -> Result<()> { + if !requirements.object_storage { + return Ok(()); + } + + let options = self.options.as_ref().context(InvalidRegionOptionsSnafu { + reason: "missing region options before requirement check".to_string(), + })?; + let object_store = get_object_store(&options.storage, &self.object_store_manager)?; + + ensure!( + supports_open_region_object_storage_requirement(&object_store), + error::OpenRegionRequirementSnafu { + region_id: self.region_id, + requirement: "object storage", + reason: "region data must be accessible from another datanode", + } + ); + + Ok(()) + } + /// Sets the cache manager for the region. pub(crate) fn cache(mut self, cache_manager: Option) -> Self { self.cache_manager = cache_manager; @@ -597,6 +621,21 @@ impl RegionOpener { } } +#[cfg(not(feature = "test-shared-fs-region-migration"))] +fn supports_open_region_object_storage_requirement(object_store: &ObjectStore) -> bool { + is_object_storage(object_store) +} + +#[cfg(feature = "test-shared-fs-region-migration")] +fn supports_open_region_object_storage_requirement(object_store: &ObjectStore) -> bool { + // Integration tests can configure multiple datanodes to share the same + // temporary home dir. That makes file storage accessible to all test + // datanodes, but production file storage still does not satisfy this + // requirement. + is_object_storage(object_store) + || object_store.info().scheme() == object_store::services::FS_SCHEME +} + /// Creates a version builder from a region manifest. pub(crate) fn version_builder_from_manifest( manifest: &RegionManifest, @@ -1172,14 +1211,17 @@ mod tests { use datatypes::arrow::array::{ArrayRef, BinaryArray, Int64Array}; use datatypes::arrow::record_batch::RecordBatch; use object_store::ObjectStore; - use object_store::services::{Fs, Memory}; + use object_store::services::{Fs, Memory, S3}; use parquet::arrow::ArrowWriter; use parquet::file::metadata::KeyValue; use parquet::file::properties::WriterProperties; use store_api::region_request::PathType; use store_api::storage::{FileId, RegionId}; - use super::{preload_parquet_meta_cache_for_files, sanitize_region_options}; + use super::{ + preload_parquet_meta_cache_for_files, sanitize_region_options, + supports_open_region_object_storage_requirement, + }; use crate::cache::CacheManager; use crate::cache::file_cache::{FileType, IndexKey}; use crate::manifest::action::{RegionManifest, RemovedFilesRecord}; @@ -1207,6 +1249,48 @@ mod tests { } } + fn build_fs_object_store() -> ObjectStore { + ObjectStore::new(Fs::default().root("/tmp")) + .unwrap() + .finish() + } + + #[test] + #[cfg(not(feature = "test-shared-fs-region-migration"))] + fn test_open_requirement_rejects_fs_object_store() { + let object_store = build_fs_object_store(); + + assert!(!supports_open_region_object_storage_requirement( + &object_store + )); + } + + #[test] + #[cfg(feature = "test-shared-fs-region-migration")] + fn test_open_requirement_accepts_shared_fs_object_store_for_tests() { + let object_store = build_fs_object_store(); + + assert!(supports_open_region_object_storage_requirement( + &object_store + )); + } + + #[test] + fn test_open_requirement_accepts_s3_object_store() { + let object_store = ObjectStore::new( + S3::default() + .bucket("test-bucket") + .region("us-east-1") + .disable_ec2_metadata(), + ) + .unwrap() + .finish(); + + assert!(supports_open_region_object_storage_requirement( + &object_store + )); + } + #[test] fn test_sanitize_region_options_options_format_wins() { // Manifest persisted PrimaryKey, but the re-parsed options now request Flat diff --git a/src/mito2/src/test_util.rs b/src/mito2/src/test_util.rs index 792b954a6b..65dfc8b9e1 100644 --- a/src/mito2/src/test_util.rs +++ b/src/mito2/src/test_util.rs @@ -1307,6 +1307,7 @@ pub async fn reopen_region( skip_wal_replay: false, path_type: PathType::Bare, checkpoint: None, + requirements: Default::default(), }), ) .await diff --git a/src/mito2/src/worker/handle_open.rs b/src/mito2/src/worker/handle_open.rs index 73bdca775c..a154140d98 100644 --- a/src/mito2/src/worker/handle_open.rs +++ b/src/mito2/src/worker/handle_open.rs @@ -87,14 +87,11 @@ impl RegionWorkerLoop { else { return; }; - if let Err(err) = self.check_and_cleanup_region(region_id, &request).await { - sender.send(Err(err)); - return; - } info!("Try to open region {}, worker: {}", region_id, self.id); sanitize_open_request_options(&mut request.options); // Open region from specific region dir. + let requirements = request.requirements; let opener = match RegionOpener::new( region_id, &request.table_dir, @@ -112,7 +109,7 @@ impl RegionWorkerLoop { .cache(Some(self.cache_manager.clone())) .wal_entry_reader(wal_entry_receiver.map(|receiver| Box::new(receiver) as _)) .replay_checkpoint(request.checkpoint.map(|checkpoint| checkpoint.entry_id)) - .parse_options(request.options) + .parse_options(request.options.clone()) { Ok(opener) => opener, Err(err) => { @@ -121,6 +118,16 @@ impl RegionWorkerLoop { } }; + if let Err(err) = opener.ensure_open_requirements(requirements) { + sender.send(Err(err)); + return; + } + + if let Err(err) = self.check_and_cleanup_region(region_id, &request).await { + sender.send(Err(err)); + return; + } + let now = Instant::now(); let regions = self.regions.clone(); let wal = self.wal.clone(); diff --git a/src/object-store/src/util.rs b/src/object-store/src/util.rs index 849f91b729..92f0bd7299 100644 --- a/src/object-store/src/util.rs +++ b/src/object-store/src/util.rs @@ -22,11 +22,17 @@ use opendal::layers::{ LoggingInterceptor, LoggingLayer, RetryEvent, RetryInterceptor, RetryLayer, TracingLayer, }; use opendal::raw::{AccessorInfo, HttpClient, Operation}; +use opendal::services::FS_SCHEME; use snafu::ResultExt; use crate::config::HttpClientConfig; use crate::{ObjectStore, error}; +/// Returns true if the object store is not backed by local filesystem. +pub fn is_object_storage(object_store: &ObjectStore) -> bool { + object_store.info().scheme() != FS_SCHEME +} + /// Join two paths and normalize the output dir. /// /// The output dir is always ends with `/`. e.g. @@ -249,7 +255,11 @@ impl RetryInterceptor for PrintDetailedError { #[cfg(test)] mod tests { + use opendal::services::Fs; + use super::*; + use crate::ObjectStore; + use crate::util::is_object_storage; #[test] fn test_normalize_dir() { @@ -289,4 +299,14 @@ mod tests { assert_eq!("/abc", join_path("//", "/abc")); assert_eq!("abc/def", join_path("abc/", "//def")); } + + #[test] + fn test_fs_is_not_object_storage() { + let object_store = ObjectStore::new(Fs::default().root("/tmp")) + .unwrap() + .finish(); + + assert_eq!(FS_SCHEME, object_store.info().scheme()); + assert!(!is_object_storage(&object_store)); + } } diff --git a/src/operator/src/statement/copy_table_from.rs b/src/operator/src/statement/copy_table_from.rs index 6f58603247..cae2835242 100644 --- a/src/operator/src/statement/copy_table_from.rs +++ b/src/operator/src/statement/copy_table_from.rs @@ -15,11 +15,15 @@ use std::collections::HashMap; use std::future::Future; use std::path::Path; +use std::pin::Pin; use std::sync::Arc; +use std::task::{Context, Poll}; use client::{Output, OutputData, OutputMeta}; use common_base::readable_size::ReadableSize; -use common_datasource::file_format::csv::CsvFormat; +use common_datasource::file_format::csv::{ + CsvFormat, is_skippable_arrow_error, tolerant_csv_stream, +}; use common_datasource::file_format::json::JsonFormat; use common_datasource::file_format::orc::{ReaderAdapter, infer_orc_schema, new_orc_stream_reader}; use common_datasource::file_format::{FileFormat, Format, file_to_stream}; @@ -33,10 +37,13 @@ use common_telemetry::{debug, tracing}; use datafusion::datasource::physical_plan::{CsvSource, FileSource, JsonSource}; use datafusion::parquet::arrow::ParquetRecordBatchStreamBuilder; use datafusion::parquet::arrow::arrow_reader::ArrowReaderMetadata; +use datafusion_common::DataFusionError; +use datafusion_common::arrow::error::ArrowError; use datafusion_common::config::CsvOptions; use datafusion_expr::Expr; use datatypes::arrow::compute::can_cast_types; use datatypes::arrow::datatypes::{DataType as ArrowDataType, Schema, SchemaRef}; +use datatypes::arrow::record_batch::RecordBatch; use datatypes::vectors::Helper; use futures_util::StreamExt; use object_store::{Entry, EntryMode, ObjectStore}; @@ -221,23 +228,42 @@ impl StatementExecutor { let csv_source = CsvSource::new(schema.clone()) .with_csv_options(options) .with_batch_size(DEFAULT_BATCH_SIZE); - let stream = file_to_stream( - object_store, - path, - csv_source, - Some(projection), - format.compression_type, - ) - .await - .context(error::BuildFileStreamSnafu)?; + let stream = if format.skip_bad_records { + let reader_schema = + csv_reader_schema_for_skip_bad_records(schema, &compat_schema); + tolerant_csv_stream( + object_store, + path, + Arc::new(reader_schema), + projection.clone(), + format, + ) + .await + .context(error::BuildFileStreamSnafu)? + } else { + file_to_stream( + object_store, + path, + csv_source, + Some(projection), + format.compression_type, + ) + .await + .context(error::BuildFileStreamSnafu)? + }; - Ok(Box::pin( + let stream = Box::pin( // The projection is already applied in the CSV reader when we created the stream, // so we pass None here to avoid double projection which would cause schema mismatch errors. RecordBatchStreamTypeAdapter::new(output_schema, stream, None) .with_filter(filters) .context(error::PhysicalExprSnafu)?, - )) + ); + if format.skip_bad_records { + Ok(Box::pin(SkipBadRecordsStream::new(stream, path))) + } else { + Ok(stream) + } } FileMetadata::Json { path, @@ -469,6 +495,58 @@ fn gen_insert_output(rows_inserted: usize, insert_cost: usize) -> Output { ) } +struct SkipBadRecordsStream { + inner: DfSendableRecordBatchStream, + path: String, +} + +impl SkipBadRecordsStream { + fn new(inner: DfSendableRecordBatchStream, path: impl Into) -> Self { + Self { + inner, + path: path.into(), + } + } +} + +impl datafusion::physical_plan::RecordBatchStream for SkipBadRecordsStream { + fn schema(&self) -> SchemaRef { + self.inner.schema() + } +} + +impl futures::Stream for SkipBadRecordsStream { + type Item = datafusion_common::Result; + + fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let this = self.get_mut(); + loop { + match this.inner.as_mut().poll_next(cx) { + Poll::Ready(Some(Err(error))) if is_skippable_record_error(&error) => { + common_telemetry::warn!( + "Skipping bad record while copying from {}: {}", + this.path, + error + ); + continue; + } + other => return other, + } + } + } +} + +fn is_skippable_record_error(error: &DataFusionError) -> bool { + match error { + DataFusionError::ArrowError(error, _) => is_skippable_arrow_error(error), + DataFusionError::External(error) => error + .downcast_ref::() + .is_some_and(is_skippable_arrow_error), + DataFusionError::Context(_, error) => is_skippable_record_error(error), + _ => false, + } +} + /// Executes all pending inserts all at once, drain pending requests and reset pending bytes. async fn batch_insert( pending: &mut Vec>>, @@ -498,6 +576,59 @@ fn can_cast_types_for_greptime(from: &ArrowDataType, to: &ArrowDataType) -> bool can_cast_types(from, to) } +fn csv_reader_schema_for_skip_bad_records(file: &SchemaRef, compat: &SchemaRef) -> Schema { + let fields = file + .fields() + .iter() + .enumerate() + .map(|(idx, file_field)| { + let compat_field = compat + .fields() + .find(file_field.name()) + .map(|(_, field)| field); + + match compat_field { + Some(compat_field) if can_csv_reader_parse_type(compat_field.data_type()) => { + compat_field.clone() + } + _ => file.fields()[idx].clone(), + } + }) + .collect::>(); + + Schema::new_with_metadata(fields, file.metadata().clone()) +} + +fn can_csv_reader_parse_type(data_type: &ArrowDataType) -> bool { + match data_type { + ArrowDataType::Boolean + | ArrowDataType::Decimal32(_, _) + | ArrowDataType::Decimal64(_, _) + | ArrowDataType::Decimal128(_, _) + | ArrowDataType::Decimal256(_, _) + | ArrowDataType::Int8 + | ArrowDataType::Int16 + | ArrowDataType::Int32 + | ArrowDataType::Int64 + | ArrowDataType::UInt8 + | ArrowDataType::UInt16 + | ArrowDataType::UInt32 + | ArrowDataType::UInt64 + | ArrowDataType::Float32 + | ArrowDataType::Float64 + | ArrowDataType::Date32 + | ArrowDataType::Date64 + | ArrowDataType::Time32(_) + | ArrowDataType::Time64(_) + | ArrowDataType::Timestamp(_, _) + | ArrowDataType::Null + | ArrowDataType::Utf8 + | ArrowDataType::Utf8View => true, + ArrowDataType::Dictionary(_, value_type) => value_type.as_ref() == &ArrowDataType::Utf8, + _ => false, + } +} + fn ensure_schema_compatible(from: &SchemaRef, to: &SchemaRef) -> Result<()> { let not_match = from .fields @@ -780,4 +911,31 @@ mod tests { assert_eq!(test.0.project(&fp).unwrap(), test.1.project(&tp).unwrap()); } } + + #[test] + fn test_csv_reader_schema_for_skip_bad_records() { + let file_schema = make_test_schema(&[ + Field::new("id", DataType::Utf8, true), + Field::new("jsons", DataType::Utf8, true), + Field::new("ts", DataType::Utf8, true), + ]); + let compat_schema = make_test_schema(&[ + Field::new("id", DataType::UInt32, true), + Field::new("jsons", DataType::Binary, true), + Field::new( + "ts", + DataType::Timestamp(datatypes::arrow::datatypes::TimeUnit::Millisecond, None), + true, + ), + ]); + + let reader_schema = csv_reader_schema_for_skip_bad_records(&file_schema, &compat_schema); + + assert_eq!(reader_schema.field(0).data_type(), &DataType::UInt32); + assert_eq!(reader_schema.field(1).data_type(), &DataType::Utf8); + assert_eq!( + reader_schema.field(2).data_type(), + compat_schema.field(2).data_type() + ); + } } diff --git a/src/pipeline/benches/processor.rs b/src/pipeline/benches/processor.rs index 83a8e53225..e088c89c6b 100644 --- a/src/pipeline/benches/processor.rs +++ b/src/pipeline/benches/processor.rs @@ -233,6 +233,36 @@ transform: parse(&Content::Yaml(pipeline_yaml)).unwrap() } +fn prepare_vrl_pipeline() -> Pipeline { + let pipeline_yaml = r#" +--- +description: Minimal VRL processor benchmark + +processors: + - vrl: + source: | + .service_alias = .service + .host_alias = .host + del(.unused) + .processed = true + . + +transform: + - field: service + type: string + - field: host + type: string + - field: service_alias + type: string + - field: host_alias + type: string + - field: processed + type: boolean +"#; + + parse(&Content::Yaml(pipeline_yaml)).unwrap() +} + fn criterion_benchmark(c: &mut Criterion) { let input_value_str = include_str!("./data.log"); let input_value = Deserializer::from_str(input_value_str) @@ -262,6 +292,41 @@ fn criterion_benchmark(c: &mut Criterion) { }) }); group.finish(); + + let vrl_input_value = (0..128) + .map(|i| { + serde_json::json!({ + "service": "frontend", + "host": format!("host-{i}"), + "unused": "drop-me" + }) + .into() + }) + .collect::>(); + let vrl_pipeline = prepare_vrl_pipeline(); + + let (vrl_pipeline, mut vrl_schema_info, vrl_pipeline_def, vrl_pipeline_param) = + setup_pipeline!(vrl_pipeline); + let vrl_pipeline_ctx = PipelineContext::new( + &vrl_pipeline_def, + &vrl_pipeline_param, + session::context::Channel::Unknown, + ); + + let mut group = c.benchmark_group("vrl processor"); + group.sample_size(50); + group.bench_function("processor mut", |b| { + b.iter(|| { + processor_mut( + black_box(vrl_pipeline.clone()), + black_box(&vrl_pipeline_ctx), + black_box(&mut vrl_schema_info), + black_box(vrl_input_value.clone()), + ) + .unwrap(); + }) + }); + group.finish(); } // Testing the pipeline's performance in converting Json to Rows diff --git a/src/pipeline/src/etl/processor/vrl_processor.rs b/src/pipeline/src/etl/processor/vrl_processor.rs index 20258a0427..ee3452523d 100644 --- a/src/pipeline/src/etl/processor/vrl_processor.rs +++ b/src/pipeline/src/etl/processor/vrl_processor.rs @@ -12,9 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::cell::RefCell; use std::collections::BTreeMap; use chrono_tz::Tz; +use once_cell::sync::Lazy; use snafu::{OptionExt, ensure}; use vrl::compiler::runtime::Runtime; use vrl::compiler::{Program, TargetValue, compile}; @@ -31,6 +33,12 @@ use crate::etl::processor::yaml_string; pub(crate) const PROCESSOR_VRL: &str = "vrl"; const SOURCE: &str = "source"; +static UTC_TIMEZONE: Lazy = Lazy::new(|| TimeZone::Named(Tz::UTC)); + +thread_local! { + static VRL_RUNTIME: RefCell = RefCell::new(Runtime::default()); +} + #[derive(Debug)] pub struct VrlProcessor { source: String, @@ -74,10 +82,14 @@ impl VrlProcessor { secrets: Secrets::default(), }; - let timezone = TimeZone::Named(Tz::UTC); - let mut runtime = Runtime::default(); - let re = runtime - .resolve(&mut target, &self.program, &timezone) + let re = VRL_RUNTIME + .with(|runtime| { + let mut runtime = runtime.borrow_mut(); + runtime.clear(); + let result = runtime.resolve(&mut target, &self.program, &UTC_TIMEZONE); + runtime.clear(); + result + }) .map_err(|e| { ExecuteVrlSnafu { msg: e.get_expression_error().to_string(), diff --git a/src/servers/src/configurator.rs b/src/servers/src/configurator.rs index e8ba8264bd..7116fe0ce8 100644 --- a/src/servers/src/configurator.rs +++ b/src/servers/src/configurator.rs @@ -14,25 +14,11 @@ use std::sync::Arc; -use axum::Router as HttpRouter; use common_error::ext::BoxedError; use tonic::transport::server::Router as GrpcRouter; use crate::grpc::builder::GrpcServerBuilder; -/// A configurator that customizes or enhances an HTTP router. -#[async_trait::async_trait] -pub trait HttpConfigurator: Send + Sync { - /// Configures the given HTTP router using the provided context. - async fn configure_http( - &self, - route: HttpRouter, - ctx: C, - ) -> std::result::Result; -} - -pub type HttpConfiguratorRef = Arc>; - /// A configurator that customizes or enhances a gRPC router. #[async_trait::async_trait] pub trait GrpcRouterConfigurator: Send + Sync { diff --git a/src/servers/src/grpc.rs b/src/servers/src/grpc.rs index 3adfd24945..50dd0b69c4 100644 --- a/src/servers/src/grpc.rs +++ b/src/servers/src/grpc.rs @@ -24,7 +24,7 @@ pub mod prom_query_gateway; pub mod region_server; use std::any::Any; -use std::net::SocketAddr; +use std::net::{IpAddr, SocketAddr}; use std::time::Duration; use api::v1::health_check_server::{HealthCheck, HealthCheckServer}; @@ -95,14 +95,8 @@ impl GrpcOptions { if self.server_addr.is_empty() { match local_ip_address::local_ip() { Ok(ip) => { - let detected_addr = format!( - "{}:{}", - ip, - self.bind_addr - .split(':') - .nth(1) - .unwrap_or(DEFAULT_GRPC_ADDR_PORT) - ); + let port = port_from_bind_addr(&self.bind_addr); + let detected_addr = format_server_addr(ip, port); info!("Using detected: {} as server address", detected_addr); self.server_addr = detected_addr; } @@ -131,7 +125,18 @@ impl GrpcOptions { } } -const DEFAULT_GRPC_ADDR_PORT: &str = "4001"; +const DEFAULT_GRPC_ADDR_PORT: u16 = 4001; + +fn port_from_bind_addr(bind_addr: &str) -> u16 { + bind_addr + .rsplit_once(':') + .and_then(|(_, port)| port.parse().ok()) + .unwrap_or(DEFAULT_GRPC_ADDR_PORT) +} + +fn format_server_addr(ip: IpAddr, port: u16) -> String { + SocketAddr::new(ip, port).to_string() +} const DEFAULT_INTERNAL_GRPC_ADDR_PORT: &str = "4010"; @@ -415,3 +420,36 @@ impl Server for GrpcServer { self } } + +#[cfg(test)] +mod tests { + use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; + + use super::{DEFAULT_GRPC_ADDR_PORT, format_server_addr, port_from_bind_addr}; + + #[test] + fn test_port_from_bind_addr() { + assert_eq!(3002, port_from_bind_addr("127.0.0.1:3002")); + assert_eq!(3002, port_from_bind_addr("[::]:3002")); + assert_eq!( + 3002, + port_from_bind_addr("greptimedb-metasrv.default.svc.cluster.local:3002") + ); + assert_eq!( + DEFAULT_GRPC_ADDR_PORT, + port_from_bind_addr("invalid-bind-addr") + ); + } + + #[test] + fn test_format_server_addr() { + assert_eq!( + "127.0.0.1:3002", + format_server_addr(IpAddr::V4(Ipv4Addr::LOCALHOST), 3002) + ); + assert_eq!( + "[::1]:3002", + format_server_addr(IpAddr::V6(Ipv6Addr::LOCALHOST), 3002) + ); + } +} diff --git a/src/servers/src/http.rs b/src/servers/src/http.rs index e5dc5380d1..6d3ab76ec1 100644 --- a/src/servers/src/http.rs +++ b/src/servers/src/http.rs @@ -27,7 +27,6 @@ use axum::response::{IntoResponse, Response}; use axum::routing::Route; use axum::serve::ListenerExt; use axum::{Router, middleware, routing}; -use common_base::Plugins; use common_base::readable_size::ReadableSize; use common_recordbatch::RecordBatch; use common_telemetry::{error, info}; @@ -52,11 +51,9 @@ use tower_http::trace::TraceLayer; use self::authorize::AuthState; use self::result::table_result::TableResponse; -use crate::configurator::HttpConfiguratorRef; use crate::elasticsearch; use crate::error::{ - AddressBindSnafu, AlreadyStartedSnafu, Error, InternalIoSnafu, InvalidHeaderValueSnafu, - OtherSnafu, Result, + AddressBindSnafu, AlreadyStartedSnafu, Error, InternalIoSnafu, InvalidHeaderValueSnafu, Result, }; use crate::http::influxdb::{influxdb_health, influxdb_ping, influxdb_write_v1, influxdb_write_v2}; use crate::http::otlp::OtlpState; @@ -139,9 +136,6 @@ pub struct HttpServer { user_provider: Option, memory_limiter: ServerMemoryLimiter, - // plugins - plugins: Plugins, - // server configs options: HttpOptions, bind_addr: Option, @@ -516,7 +510,6 @@ pub struct DashboardState { pub struct HttpServerBuilder { options: HttpOptions, - plugins: Plugins, user_provider: Option, router: Router, memory_limiter: ServerMemoryLimiter, @@ -526,7 +519,6 @@ impl HttpServerBuilder { pub fn new(options: HttpOptions) -> Self { Self { options, - plugins: Plugins::default(), user_provider: None, router: Router::new(), memory_limiter: ServerMemoryLimiter::default(), @@ -687,10 +679,6 @@ impl HttpServerBuilder { Self { router, ..self } } - pub fn with_plugins(self, plugins: Plugins) -> Self { - Self { plugins, ..self } - } - pub fn with_greptime_config_options(self, opts: String) -> Self { let config_router = HttpServer::route_config(GreptimeOptionsConfigState { greptime_config_options: opts, @@ -748,7 +736,6 @@ impl HttpServerBuilder { options: self.options, user_provider: self.user_provider, shutdown_tx: Mutex::new(None), - plugins: self.plugins, router: StdMutex::new(self.router), bind_addr: None, memory_limiter: self.memory_limiter, @@ -1237,14 +1224,7 @@ impl Server for HttpServer { AlreadyStartedSnafu { server: "HTTP" } ); - let mut app = self.make_app(); - if let Some(configurator) = self.plugins.get::>() { - app = configurator - .configure_http(app, ()) - .await - .context(OtherSnafu)?; - } - let app = self.build(app)?; + let app = self.build(self.make_app())?; let listener = tokio::net::TcpListener::bind(listening) .await .context(AddressBindSnafu { addr: listening })? diff --git a/src/sql/src/parsers/copy_parser.rs b/src/sql/src/parsers/copy_parser.rs index 9a2eddcc78..491912c82e 100644 --- a/src/sql/src/parsers/copy_parser.rs +++ b/src/sql/src/parsers/copy_parser.rs @@ -401,6 +401,28 @@ mod tests { } } + #[test] + fn test_parse_copy_table_from_csv_options() { + let sql = + "COPY my_table FROM '/tmp/test.csv' WITH (FORMAT = 'CSV', SKIP_BAD_RECORDS = 'false')"; + let mut result = + ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()) + .unwrap(); + assert_eq!(1, result.len()); + + let statement = result.remove(0); + assert_matches!(statement, Statement::Copy { .. }); + match statement { + Statement::Copy(crate::statements::copy::Copy::CopyTable(CopyTable::From( + copy_table, + ))) => { + assert_eq!(copy_table.with.get("format"), Some("CSV")); + assert_eq!(copy_table.with.get("skip_bad_records"), Some("false")); + } + _ => unreachable!(), + } + } + #[test] fn test_parse_copy_table_to() { struct Test<'a> { diff --git a/src/sql/src/util.rs b/src/sql/src/util.rs index bbf5ce3277..f627c43e48 100644 --- a/src/sql/src/util.rs +++ b/src/sql/src/util.rs @@ -27,7 +27,7 @@ use serde::Serialize; use snafu::ensure; use sqlparser::ast::{ Array, Expr, Ident, ObjectName, ObjectNamePart, SetExpr, SqlOption, StructField, TableFactor, - Value, ValueWithSpan, + TableWithJoins, Value, ValueWithSpan, }; use sqlparser_derive::{Visit, VisitMut}; @@ -195,7 +195,7 @@ pub fn extract_tables_from_query(query: &SqlOrTql) -> impl Iterator { - extract_tables_from_set_expr(&query.inner.body, &mut names); + extract_tables_from_sql_query(&query.inner, &mut names); extract_tables_from_hybrid_cte_query(query, &mut names); } SqlOrTql::Tql(tql, _) => extract_tables_from_tql(tql, &mut names), @@ -205,26 +205,34 @@ pub fn extract_tables_from_query(query: &SqlOrTql) -> impl Iterator) { - let mut tql_names = HashSet::new(); - let mut cte_names: HashSet = HashSet::new(); if let Some(hybrid_cte) = &query.hybrid_cte { + let mut cte_names: HashSet = hybrid_cte + .cte_tables + .iter() + .map(|cte| ParserContext::canonicalize_identifier(cte.name.clone()).value) + .collect(); + remove_cte_names(sql_names, &cte_names); + + cte_names.clear(); for cte in &hybrid_cte.cte_tables { - cte_names.insert(ParserContext::canonicalize_identifier(cte.name.clone()).value); - if let CteContent::Tql(tql) = &cte.content { - extract_tables_from_tql(tql, &mut tql_names); + let cte_name = ParserContext::canonicalize_identifier(cte.name.clone()).value; + let mut cte_query_names = HashSet::new(); + match &cte.content { + CteContent::Sql(cte_query) => { + extract_tables_from_sql_query(cte_query, &mut cte_query_names) + } + CteContent::Tql(tql) => extract_tables_from_tql(tql, &mut cte_query_names), + } + if hybrid_cte.recursive { + cte_names.insert(cte_name.clone()); + } + remove_cte_names(&mut cte_query_names, &cte_names); + sql_names.extend(cte_query_names); + if !hybrid_cte.recursive { + cte_names.insert(cte_name); } } } - - if let Some(with) = &query.inner.with { - for cte in &with.cte_tables { - cte_names.insert(ParserContext::canonicalize_identifier(cte.alias.name.clone()).value); - } - } - - remove_cte_names(sql_names, &cte_names); - - sql_names.extend(tql_names); } fn remove_cte_names(names: &mut HashSet, cte_names: &HashSet) { @@ -339,6 +347,33 @@ pub fn location_to_index(sql: &str, location: &sqlparser::tokenizer::Location) - index - 1 } +/// Helper function for [extract_tables_from_query]. +/// +/// Handle [sqlparser::ast::Query]. +fn extract_tables_from_sql_query(query: &sqlparser::ast::Query, names: &mut HashSet) { + let mut cte_names = HashSet::new(); + if let Some(with) = &query.with { + for cte in &with.cte_tables { + let cte_name = ParserContext::canonicalize_identifier(cte.alias.name.clone()).value; + let mut cte_query_names = HashSet::new(); + extract_tables_from_sql_query(&cte.query, &mut cte_query_names); + if with.recursive { + cte_names.insert(cte_name.clone()); + } + remove_cte_names(&mut cte_query_names, &cte_names); + names.extend(cte_query_names); + if !with.recursive { + cte_names.insert(cte_name); + } + } + } + + let mut body_names = HashSet::new(); + extract_tables_from_set_expr(&query.body, &mut body_names); + remove_cte_names(&mut body_names, &cte_names); + names.extend(body_names); +} + /// Helper function for [extract_tables_from_query]. /// /// Handle [SetExpr]. @@ -346,14 +381,11 @@ fn extract_tables_from_set_expr(set_expr: &SetExpr, names: &mut HashSet { for from in &select.from { - table_factor_to_object_name(&from.relation, names); - for join in &from.joins { - table_factor_to_object_name(&join.relation, names); - } + extract_tables_from_table_with_joins(from, names); } } SetExpr::Query(query) => { - extract_tables_from_set_expr(&query.body, names); + extract_tables_from_sql_query(query, names); } SetExpr::SetOperation { left, right, .. } => { extract_tables_from_set_expr(left, names); @@ -363,12 +395,47 @@ fn extract_tables_from_set_expr(set_expr: &SetExpr, names: &mut HashSet, +) { + table_factor_to_object_name(&table_with_joins.relation, names); + for join in &table_with_joins.joins { + table_factor_to_object_name(&join.relation, names); + } +} + /// Helper function for [extract_tables_from_query]. /// /// Handle [TableFactor]. fn table_factor_to_object_name(table_factor: &TableFactor, names: &mut HashSet) { - if let TableFactor::Table { name, .. } = table_factor { - names.insert(name.to_owned()); + match table_factor { + TableFactor::Table { name, .. } => { + names.insert(name.to_owned()); + } + TableFactor::Derived { subquery, .. } => { + extract_tables_from_sql_query(subquery, names); + } + TableFactor::NestedJoin { + table_with_joins, .. + } => { + extract_tables_from_table_with_joins(table_with_joins, names); + } + TableFactor::Pivot { table, .. } + | TableFactor::Unpivot { table, .. } + | TableFactor::MatchRecognize { table, .. } => { + table_factor_to_object_name(table, names); + } + TableFactor::TableFunction { .. } + | TableFactor::Function { .. } + | TableFactor::UNNEST { .. } + | TableFactor::JsonTable { .. } + | TableFactor::OpenJsonTable { .. } + | TableFactor::XmlTable { .. } + | TableFactor::SemanticView { .. } => {} } } @@ -458,6 +525,91 @@ TQL EVAL (now() - '15s'::interval, now(), '5s') count_values("status_code", {__n } } + #[test] + fn test_extract_tables_from_sql_query_with_derived_join() { + let sql = r#" +CREATE FLOW flow_batch_join_subquery SINK TO flow_batch_join_sink +EVAL INTERVAL '1m' AS +SELECT a.symbol, b.mark_price +FROM ( + SELECT inst_id AS symbol, max(ts) AS mark_iv_ts + FROM flow_batch_join_opt_summary + GROUP BY inst_id +) a +LEFT JOIN ( + SELECT symbol, max(mark_price) AS mark_price + FROM flow_batch_join_market_v5 + WHERE "type" = 'OPTION_MARK' + GROUP BY symbol +) b ON a.symbol = b.symbol; +"#; + let mut stmts = + ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()) + .unwrap(); + let Statement::CreateFlow(create_flow) = stmts.pop().unwrap() else { + unreachable!() + }; + + let mut tables = extract_tables_from_query(&create_flow.query) + .map(|table| format_raw_object_name(&table)) + .collect_vec(); + tables.sort(); + assert_eq!( + vec![ + "flow_batch_join_market_v5".to_string(), + "flow_batch_join_opt_summary".to_string(), + ], + tables + ); + } + + #[test] + fn test_extract_tables_from_sql_query_with_cte_scopes() { + let testcases = vec![ + ( + r#" +WITH source AS ( + SELECT * FROM source +) +SELECT * FROM source; +"#, + vec!["source".to_string()], + ), + ( + r#" +WITH first_cte AS ( + SELECT * FROM physical_source +), second_cte AS ( + SELECT * FROM first_cte +) +SELECT * FROM second_cte; +"#, + vec!["physical_source".to_string()], + ), + ]; + + for (sql, expected_tables) in testcases { + let mut stmts = ParserContext::create_with_dialect( + sql, + &GreptimeDbDialect {}, + ParseOptions::default(), + ) + .unwrap(); + let Statement::Query(query) = stmts.pop().unwrap() else { + unreachable!() + }; + + let mut tables = HashSet::new(); + extract_tables_from_sql_query(&query.inner, &mut tables); + let mut tables = tables + .into_iter() + .map(|table| format_raw_object_name(&table)) + .collect_vec(); + tables.sort(); + assert_eq!(expected_tables, tables); + } + } + #[test] fn test_extract_tables_from_tql_query_with_schema_matcher() { let sql = r#" diff --git a/src/store-api/src/region_request.rs b/src/store-api/src/region_request.rs index 951abca1be..abf96736ec 100644 --- a/src/store-api/src/region_request.rs +++ b/src/store-api/src/region_request.rs @@ -315,6 +315,7 @@ fn make_region_open(open: OpenRequest) -> Result> options: open.options, skip_wal_replay: false, checkpoint: None, + requirements: Default::default(), }), )]) } @@ -566,6 +567,28 @@ pub struct RegionDropRequest { pub partial_drop: bool, } +/// Requirements for a region request. +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)] +#[serde(default)] +pub struct RegionRequirements { + /// Whether the region data must be backed by object storage. + pub object_storage: bool, +} + +impl RegionRequirements { + /// Returns empty requirements. + pub fn empty() -> Self { + Self::default() + } + + /// Returns requirements for object storage. + pub fn object_storage() -> Self { + Self { + object_storage: true, + } + } +} + /// Open region request. #[derive(Debug, Clone)] pub struct RegionOpenRequest { @@ -581,6 +604,8 @@ pub struct RegionOpenRequest { pub skip_wal_replay: bool, /// Replay checkpoint. pub checkpoint: Option, + /// Requirements for opening the region. + pub requirements: RegionRequirements, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] diff --git a/tests-integration/Cargo.toml b/tests-integration/Cargo.toml index 43850e4ed3..f51badc8d6 100644 --- a/tests-integration/Cargo.toml +++ b/tests-integration/Cargo.toml @@ -63,7 +63,7 @@ log-query = { workspace = true } loki-proto.workspace = true meta-client.workspace = true meta-srv = { workspace = true, features = ["mock"] } -mito2.workspace = true +mito2 = { workspace = true, features = ["test-shared-fs-region-migration"] } object-store.workspace = true operator = { workspace = true, features = ["testing"] } plugins.workspace = true diff --git a/tests/cases/standalone/common/copy/copy_from_fs_csv.result b/tests/cases/standalone/common/copy/copy_from_fs_csv.result index 2e3373af15..ad81cf1f77 100644 --- a/tests/cases/standalone/common/copy/copy_from_fs_csv.result +++ b/tests/cases/standalone/common/copy/copy_from_fs_csv.result @@ -183,6 +183,24 @@ select * from csv_null_prefix_import; | final | 2023-11-14T22:13:23 | +-------+---------------------+ +CREATE TABLE csv_skip_bad_records(host_id int, host_name string, reading_value double, ts timestamp time index); + +Affected Rows: 0 + +-- SQLNESS ENV PWD +Copy csv_skip_bad_records FROM '$PWD/tests/data/csv/skip_bad_records.csv' WITH (format='csv', skip_bad_records='true'); + +Affected Rows: 2 + +select * from csv_skip_bad_records order by ts; + ++---------+-----------+---------------+---------------------+ +| host_id | host_name | reading_value | ts | ++---------+-----------+---------------+---------------------+ +| 1 | Alice | 10.5 | 2024-01-01T00:00:00 | +| 2 | Bob | 30.5 | 2024-01-01T00:00:02 | ++---------+-----------+---------------+---------------------+ + drop table demo; Affected Rows: 0 @@ -219,3 +237,7 @@ drop table csv_null_prefix_import; Affected Rows: 0 +drop table csv_skip_bad_records; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/copy/copy_from_fs_csv.sql b/tests/cases/standalone/common/copy/copy_from_fs_csv.sql index 0901048177..cd6cfcbd23 100644 --- a/tests/cases/standalone/common/copy/copy_from_fs_csv.sql +++ b/tests/cases/standalone/common/copy/copy_from_fs_csv.sql @@ -73,6 +73,13 @@ Copy csv_null_prefix_import FROM '${SQLNESS_HOME}/demo/export/csv_null_prefix.cs select * from csv_null_prefix_import; +CREATE TABLE csv_skip_bad_records(host_id int, host_name string, reading_value double, ts timestamp time index); + +-- SQLNESS ENV PWD +Copy csv_skip_bad_records FROM '$PWD/tests/data/csv/skip_bad_records.csv' WITH (format='csv', skip_bad_records='true'); + +select * from csv_skip_bad_records order by ts; + drop table demo; drop table with_filename; @@ -90,3 +97,5 @@ drop table demo_with_less_columns; drop table csv_null_prefix; drop table csv_null_prefix_import; + +drop table csv_skip_bad_records; diff --git a/tests/cases/standalone/common/flow/flow_batch_join_subquery.result b/tests/cases/standalone/common/flow/flow_batch_join_subquery.result new file mode 100644 index 0000000000..0d590cbdbc --- /dev/null +++ b/tests/cases/standalone/common/flow/flow_batch_join_subquery.result @@ -0,0 +1,130 @@ +CREATE DATABASE flow_join_fixture; + +Affected Rows: 1 + +CREATE TABLE flow_join_fixture."left_samples" ( + source_id STRING, + left_value DOUBLE, + event_ts TIMESTAMP, + observed_at TIMESTAMP TIME INDEX +); + +Affected Rows: 0 + +CREATE TABLE flow_join_fixture."right_samples" ( + source_id STRING, + right_value DOUBLE, + sample_kind STRING, + event_ts TIMESTAMP, + observed_at TIMESTAMP TIME INDEX +); + +Affected Rows: 0 + +-- Verify batching flow creation accepts aggregate subqueries joined by LEFT JOIN. +CREATE FLOW flow_batch_join_subquery SINK TO flow_batch_join_sink +EVAL INTERVAL '5m' AS +SELECT + l.source_id, + l.measure_name, + l.bucket_time, + l.left_event_ts, + l.left_value, + r.right_event_ts, + r.right_value +FROM ( + SELECT + source_id, + 'sample' AS measure_name, + date_trunc('minute', now()) AS bucket_time, + max(event_ts) AS left_event_ts, + last_value(left_value ORDER BY observed_at) AS left_value + FROM + flow_join_fixture."left_samples" + WHERE + observed_at BETWEEN date_trunc('minute', now()) - INTERVAL '5 minutes' + AND date_trunc('minute', now()) + GROUP BY + source_id +) l +LEFT JOIN ( + SELECT + source_id, + 'sample' AS measure_name, + date_trunc('minute', now()) AS bucket_time, + max(event_ts) AS right_event_ts, + last_value(right_value ORDER BY observed_at) AS right_value + FROM + flow_join_fixture."right_samples" + WHERE + observed_at BETWEEN date_trunc('minute', now()) - INTERVAL '5 minutes' + AND date_trunc('minute', now()) + AND sample_kind = 'primary' + GROUP BY + source_id +) r ON l.source_id = r.source_id AND l.bucket_time = r.bucket_time; + +Affected Rows: 0 + +SELECT + source_table_names LIKE '%left_samples%' AS has_left_source, + source_table_names LIKE '%right_samples%' AS has_right_source, + options LIKE '%"flow_type":"batching"%' AS is_batching_flow +FROM + INFORMATION_SCHEMA.FLOWS +WHERE + flow_name = 'flow_batch_join_subquery'; + ++-----------------+------------------+------------------+ +| has_left_source | has_right_source | is_batching_flow | ++-----------------+------------------+------------------+ +| true | true | true | ++-----------------+------------------+------------------+ + +INSERT INTO flow_join_fixture."left_samples" VALUES + ('source-a', 0.12, date_trunc('minute', now()), date_trunc('minute', now())); + +Affected Rows: 1 + +INSERT INTO flow_join_fixture."right_samples" VALUES + ('source-a', 100.5, 'primary', date_trunc('minute', now()), date_trunc('minute', now())); + +Affected Rows: 1 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('flow_batch_join_subquery'); + ++----------------------------------------------+ +| ADMIN FLUSH_FLOW('flow_batch_join_subquery') | ++----------------------------------------------+ +| FLOW_FLUSHED | ++----------------------------------------------+ + +SELECT source_id, measure_name, left_value, right_value FROM flow_batch_join_sink ORDER BY source_id; + ++-----------+--------------+------------+-------------+ +| source_id | measure_name | left_value | right_value | ++-----------+--------------+------------+-------------+ +| source-a | sample | 0.12 | 100.5 | ++-----------+--------------+------------+-------------+ + +DROP FLOW flow_batch_join_subquery; + +Affected Rows: 0 + +DROP TABLE flow_batch_join_sink; + +Affected Rows: 0 + +DROP TABLE flow_join_fixture."left_samples"; + +Affected Rows: 0 + +DROP TABLE flow_join_fixture."right_samples"; + +Affected Rows: 0 + +DROP DATABASE flow_join_fixture; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/flow/flow_batch_join_subquery.sql b/tests/cases/standalone/common/flow/flow_batch_join_subquery.sql new file mode 100644 index 0000000000..f37aafdf4f --- /dev/null +++ b/tests/cases/standalone/common/flow/flow_batch_join_subquery.sql @@ -0,0 +1,85 @@ +CREATE DATABASE flow_join_fixture; + +CREATE TABLE flow_join_fixture."left_samples" ( + source_id STRING, + left_value DOUBLE, + event_ts TIMESTAMP, + observed_at TIMESTAMP TIME INDEX +); + +CREATE TABLE flow_join_fixture."right_samples" ( + source_id STRING, + right_value DOUBLE, + sample_kind STRING, + event_ts TIMESTAMP, + observed_at TIMESTAMP TIME INDEX +); + +-- Verify batching flow creation accepts aggregate subqueries joined by LEFT JOIN. +CREATE FLOW flow_batch_join_subquery SINK TO flow_batch_join_sink +EVAL INTERVAL '5m' AS +SELECT + l.source_id, + l.measure_name, + l.bucket_time, + l.left_event_ts, + l.left_value, + r.right_event_ts, + r.right_value +FROM ( + SELECT + source_id, + 'sample' AS measure_name, + date_trunc('minute', now()) AS bucket_time, + max(event_ts) AS left_event_ts, + last_value(left_value ORDER BY observed_at) AS left_value + FROM + flow_join_fixture."left_samples" + WHERE + observed_at BETWEEN date_trunc('minute', now()) - INTERVAL '5 minutes' + AND date_trunc('minute', now()) + GROUP BY + source_id +) l +LEFT JOIN ( + SELECT + source_id, + 'sample' AS measure_name, + date_trunc('minute', now()) AS bucket_time, + max(event_ts) AS right_event_ts, + last_value(right_value ORDER BY observed_at) AS right_value + FROM + flow_join_fixture."right_samples" + WHERE + observed_at BETWEEN date_trunc('minute', now()) - INTERVAL '5 minutes' + AND date_trunc('minute', now()) + AND sample_kind = 'primary' + GROUP BY + source_id +) r ON l.source_id = r.source_id AND l.bucket_time = r.bucket_time; + +SELECT + source_table_names LIKE '%left_samples%' AS has_left_source, + source_table_names LIKE '%right_samples%' AS has_right_source, + options LIKE '%"flow_type":"batching"%' AS is_batching_flow +FROM + INFORMATION_SCHEMA.FLOWS +WHERE + flow_name = 'flow_batch_join_subquery'; + +INSERT INTO flow_join_fixture."left_samples" VALUES + ('source-a', 0.12, date_trunc('minute', now()), date_trunc('minute', now())); + +INSERT INTO flow_join_fixture."right_samples" VALUES + ('source-a', 100.5, 'primary', date_trunc('minute', now()), date_trunc('minute', now())); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('flow_batch_join_subquery'); + +SELECT source_id, measure_name, left_value, right_value FROM flow_batch_join_sink ORDER BY source_id; + +DROP FLOW flow_batch_join_subquery; +DROP TABLE flow_batch_join_sink; +DROP TABLE flow_join_fixture."left_samples"; +DROP TABLE flow_join_fixture."right_samples"; +DROP DATABASE flow_join_fixture; diff --git a/tests/cases/standalone/common/flow/flow_last_non_null.result b/tests/cases/standalone/common/flow/flow_last_non_null.result index 50cb46faa3..0c03c19399 100644 --- a/tests/cases/standalone/common/flow/flow_last_non_null.result +++ b/tests/cases/standalone/common/flow/flow_last_non_null.result @@ -162,6 +162,8 @@ CREATE TABLE approx_rate ( Affected Rows: 0 +-- Without merge_mode=last_non_null, this partial output is rejected at CREATE FLOW time. +-- SQLNESS REPLACE (in\scontext:\sFailed\sto\srewrite\splan:\sError\sduring\splanning:.*) in context: Failed to rewrite plan CREATE FLOW find_approx_rate SINK TO approx_rate AS SELECT (max(byte) - min(byte)) / 30.0 as rate, @@ -172,24 +174,7 @@ from GROUP BY time_window; -Affected Rows: 0 - -INSERT INTO - bytes_log -VALUES - (NULL, '2023-01-01 00:00:01'), - (300, '2023-01-01 00:00:31'); - -Affected Rows: 2 - --- should return error -ADMIN FLUSH_FLOW('find_approx_rate'); - -Error: 1002(Unexpected), Failed to execute admin function flush_flow: Execution error: Internal error: 1003 - -DROP FLOW find_approx_rate; - -Affected Rows: 0 +Error: 3001(EngineExecuteQuery), Datafusion error: Plan("Flow output schema does not match sink table schema: found 3 flow output columns and 4 sink table columns. flow output columns: [\"rate\", \"time_window\", \"update_at\"], sink table columns: [\"rate\", \"time_window\", \"update_at\", \"bb\"], extra flow columns not in sink: [], missing sink columns from flow output: [\"bb\"]") in context: Failed to rewrite plan DROP TABLE bytes_log; diff --git a/tests/cases/standalone/common/flow/flow_last_non_null.sql b/tests/cases/standalone/common/flow/flow_last_non_null.sql index 95ebe4aaa6..29c5444f95 100644 --- a/tests/cases/standalone/common/flow/flow_last_non_null.sql +++ b/tests/cases/standalone/common/flow/flow_last_non_null.sql @@ -84,6 +84,8 @@ CREATE TABLE approx_rate ( TIME INDEX(time_window) ); +-- Without merge_mode=last_non_null, this partial output is rejected at CREATE FLOW time. +-- SQLNESS REPLACE (in\scontext:\sFailed\sto\srewrite\splan:\sError\sduring\splanning:.*) in context: Failed to rewrite plan CREATE FLOW find_approx_rate SINK TO approx_rate AS SELECT (max(byte) - min(byte)) / 30.0 as rate, @@ -93,16 +95,5 @@ from bytes_log GROUP BY time_window; - -INSERT INTO - bytes_log -VALUES - (NULL, '2023-01-01 00:00:01'), - (300, '2023-01-01 00:00:31'); - --- should return error -ADMIN FLUSH_FLOW('find_approx_rate'); - -DROP FLOW find_approx_rate; DROP TABLE bytes_log; DROP TABLE approx_rate; diff --git a/tests/cases/standalone/common/flow/flow_sink_schema_mismatch.result b/tests/cases/standalone/common/flow/flow_sink_schema_mismatch.result new file mode 100644 index 0000000000..54fcba2285 --- /dev/null +++ b/tests/cases/standalone/common/flow/flow_sink_schema_mismatch.result @@ -0,0 +1,123 @@ +-- Verify that batching flow rejects CREATE FLOW when the pre-existing sink +-- table schema does not match the flow output (create-time validation, not runtime). +CREATE TABLE source_mm ( + "number" INT, + extra STRING, + ts TIMESTAMP TIME INDEX +); + +Affected Rows: 0 + +-- Pre-create a sink table that is intentionally missing the "extra" column. +-- This case validates batching mode at CREATE FLOW time, before any INSERT/FLUSH. +CREATE TABLE sink_mm ( + "number" INT, + time_window TIMESTAMP TIME INDEX, + cnt BIGINT +); + +Affected Rows: 0 + +-- This CREATE FLOW should fail immediately: the flow outputs (number, extra, time_window, cnt) +-- but sink_mm has only (number, time_window, cnt). +-- SQLNESS REPLACE (in\scontext:\sFailed\sto\srewrite\splan:\sError\sduring\splanning:.*) in context: Failed to rewrite plan +CREATE FLOW mismatch_flow SINK TO sink_mm AS +SELECT + "number", + extra, + date_bin(INTERVAL '1 second', ts) as time_window, + count(*) as cnt +FROM + source_mm +GROUP BY + "number", extra, time_window; + +Error: 3001(EngineExecuteQuery), Datafusion error: Plan("Flow output schema does not match sink table schema: found 4 flow output columns and 3 sink table columns. flow output columns: [\"number\", \"extra\", \"time_window\", \"cnt\"], sink table columns: [\"number\", \"time_window\", \"cnt\"], extra flow columns not in sink: [\"extra\"], missing sink columns from flow output: []") in context: Failed to rewrite plan + +DROP TABLE source_mm; + +Affected Rows: 0 + +DROP TABLE sink_mm; + +Affected Rows: 0 + +-- TQL/PromQL flows use the same create-time sink schema validation path. +CREATE TABLE tql_source_mm ( + `value` DOUBLE, + ts TIMESTAMP TIME INDEX, + sensor STRING, + loc STRING, + PRIMARY KEY (sensor, loc) +); + +Affected Rows: 0 + +-- Pre-create a TQL sink table that is intentionally missing the "sensor" tag column. +CREATE TABLE tql_sink_mm ( + `value` DOUBLE, + ts TIMESTAMP TIME INDEX +); + +Affected Rows: 0 + +-- This CREATE FLOW should fail immediately: the TQL output has (value, sensor, ts), +-- but tql_sink_mm has only (value, ts). +-- SQLNESS REPLACE (in\scontext:\sFailed\sto\srewrite\splan:\sError\sduring\splanning:.*) in context: Failed to rewrite plan +CREATE FLOW tql_mismatch_flow +SINK TO tql_sink_mm +EVAL INTERVAL '1m' AS +TQL EVAL (now() - '1m'::interval, now(), '1m') +avg by(sensor) (tql_source_mm) AS value; + +Error: 3001(EngineExecuteQuery), Datafusion error: Plan("Flow output schema does not match sink table schema: found 3 flow output columns and 2 sink table columns. flow output columns: [\"value\", \"sensor\", \"ts\"], sink table columns: [\"value\", \"ts\"], extra flow columns not in sink: [\"sensor\"], missing sink columns from flow output: []") in context: Failed to rewrite plan + +DROP TABLE tql_source_mm; + +Affected Rows: 0 + +DROP TABLE tql_sink_mm; + +Affected Rows: 0 + +-- Real merge_mode=last_non_null sink options should enable partial schema validation. +CREATE TABLE lnn_source_mm ( + device STRING, + val DOUBLE, + ts TIMESTAMP TIME INDEX +); + +Affected Rows: 0 + +CREATE TABLE lnn_sink_mm ( + device STRING, + time_window TIMESTAMP TIME INDEX, + cnt BIGINT, + PRIMARY KEY (device) +) WITH('merge_mode'='last_non_null'); + +Affected Rows: 0 + +-- This CREATE FLOW should fail through the last_non_null partial validator: the +-- sink primary key "device" is required but absent from the flow output. +-- SQLNESS REPLACE (in\scontext:\sFailed\sto\srewrite\splan:\sError\sduring\splanning:.*) in context: Failed to rewrite plan +CREATE FLOW lnn_missing_pk_flow +SINK TO lnn_sink_mm AS +SELECT + date_bin(INTERVAL '1 second', ts) as time_window, + count(*) as cnt +FROM + lnn_source_mm +GROUP BY + time_window; + +Error: 3001(EngineExecuteQuery), Datafusion error: Plan("Column(s) [\"device\"] required by sink table are missing from flow output when merge_mode=last_non_null. Flow output schema does not match sink table schema: found 2 flow output columns and 3 sink table columns. flow output columns: [\"time_window\", \"cnt\"], sink table columns: [\"device\", \"time_window\", \"cnt\"], extra flow columns not in sink: [], missing sink columns from flow output: [\"device\"]") in context: Failed to rewrite plan + +DROP TABLE lnn_source_mm; + +Affected Rows: 0 + +DROP TABLE lnn_sink_mm; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/flow/flow_sink_schema_mismatch.sql b/tests/cases/standalone/common/flow/flow_sink_schema_mismatch.sql new file mode 100644 index 0000000000..2d00799817 --- /dev/null +++ b/tests/cases/standalone/common/flow/flow_sink_schema_mismatch.sql @@ -0,0 +1,89 @@ +-- Verify that batching flow rejects CREATE FLOW when the pre-existing sink +-- table schema does not match the flow output (create-time validation, not runtime). +CREATE TABLE source_mm ( + "number" INT, + extra STRING, + ts TIMESTAMP TIME INDEX +); + +-- Pre-create a sink table that is intentionally missing the "extra" column. +-- This case validates batching mode at CREATE FLOW time, before any INSERT/FLUSH. +CREATE TABLE sink_mm ( + "number" INT, + time_window TIMESTAMP TIME INDEX, + cnt BIGINT +); + +-- This CREATE FLOW should fail immediately: the flow outputs (number, extra, time_window, cnt) +-- but sink_mm has only (number, time_window, cnt). +-- SQLNESS REPLACE (in\scontext:\sFailed\sto\srewrite\splan:\sError\sduring\splanning:.*) in context: Failed to rewrite plan +CREATE FLOW mismatch_flow SINK TO sink_mm AS +SELECT + "number", + extra, + date_bin(INTERVAL '1 second', ts) as time_window, + count(*) as cnt +FROM + source_mm +GROUP BY + "number", extra, time_window; + +DROP TABLE source_mm; +DROP TABLE sink_mm; + +-- TQL/PromQL flows use the same create-time sink schema validation path. +CREATE TABLE tql_source_mm ( + `value` DOUBLE, + ts TIMESTAMP TIME INDEX, + sensor STRING, + loc STRING, + PRIMARY KEY (sensor, loc) +); + +-- Pre-create a TQL sink table that is intentionally missing the "sensor" tag column. +CREATE TABLE tql_sink_mm ( + `value` DOUBLE, + ts TIMESTAMP TIME INDEX +); + +-- This CREATE FLOW should fail immediately: the TQL output has (value, sensor, ts), +-- but tql_sink_mm has only (value, ts). +-- SQLNESS REPLACE (in\scontext:\sFailed\sto\srewrite\splan:\sError\sduring\splanning:.*) in context: Failed to rewrite plan +CREATE FLOW tql_mismatch_flow +SINK TO tql_sink_mm +EVAL INTERVAL '1m' AS +TQL EVAL (now() - '1m'::interval, now(), '1m') +avg by(sensor) (tql_source_mm) AS value; + +DROP TABLE tql_source_mm; +DROP TABLE tql_sink_mm; + +-- Real merge_mode=last_non_null sink options should enable partial schema validation. +CREATE TABLE lnn_source_mm ( + device STRING, + val DOUBLE, + ts TIMESTAMP TIME INDEX +); + +CREATE TABLE lnn_sink_mm ( + device STRING, + time_window TIMESTAMP TIME INDEX, + cnt BIGINT, + PRIMARY KEY (device) +) WITH('merge_mode'='last_non_null'); + +-- This CREATE FLOW should fail through the last_non_null partial validator: the +-- sink primary key "device" is required but absent from the flow output. +-- SQLNESS REPLACE (in\scontext:\sFailed\sto\srewrite\splan:\sError\sduring\splanning:.*) in context: Failed to rewrite plan +CREATE FLOW lnn_missing_pk_flow +SINK TO lnn_sink_mm AS +SELECT + date_bin(INTERVAL '1 second', ts) as time_window, + count(*) as cnt +FROM + lnn_source_mm +GROUP BY + time_window; + +DROP TABLE lnn_source_mm; +DROP TABLE lnn_sink_mm; diff --git a/tests/cases/standalone/flow-tql/flow_tql_missing_value_sink_schema.result b/tests/cases/standalone/flow-tql/flow_tql_missing_value_sink_schema.result new file mode 100644 index 0000000000..53df353078 --- /dev/null +++ b/tests/cases/standalone/flow-tql/flow_tql_missing_value_sink_schema.result @@ -0,0 +1,90 @@ +-- Regression for a TQL flow whose pre-created sink table is missing the value +-- output column. The labels are intentionally minimal and anonymous. +CREATE DATABASE source_schema; + +Affected Rows: 1 + +CREATE DATABASE sink_schema; + +Affected Rows: 1 + +USE source_schema; + +Affected Rows: 0 + +CREATE TABLE metric_input ( + namespace STRING NULL, + app STRING NULL, + greptime_timestamp TIMESTAMP(3) NOT NULL, + greptime_value DOUBLE NULL, + TIME INDEX (greptime_timestamp), + PRIMARY KEY (namespace, app) +); + +Affected Rows: 0 + +INSERT INTO metric_input VALUES + ('ns', 'app-a', '2026-01-23T03:40:00Z', 10.0), + ('ns', 'app-a', '2026-01-23T03:50:00Z', 20.0); + +Affected Rows: 2 + +USE sink_schema; + +Affected Rows: 0 + +-- Intentionally omit greptime_value DOUBLE from the pre-created sink table. +CREATE TABLE missing_value_sink ( + namespace STRING NULL, + app STRING NULL, + greptime_timestamp TIMESTAMP(3) NOT NULL, + TIME INDEX (greptime_timestamp), + PRIMARY KEY (namespace, app) +) +ENGINE=mito; + +Affected Rows: 0 + +-- SQLNESS REPLACE (in\scontext:\sFailed\sto\srewrite\splan:\sError\sduring\splanning:.*) in context: Failed to rewrite plan +CREATE FLOW missing_value_flow +SINK TO sink_schema.missing_value_sink +EVAL INTERVAL '3600 s' +AS TQL EVAL ( + date_bin('2m'::interval, now() - '2m'::interval), + date_bin('2m'::interval, now() - '2m'::interval), + '1h' +) + avg by (namespace, app) ( + avg_over_time(metric_input{__schema__="source_schema"}[1h]) + ); + +Error: 3001(EngineExecuteQuery), Datafusion error: Plan("Flow output schema does not match sink table schema: found 4 flow output columns and 3 sink table columns. flow output columns: [\"namespace\", \"app\", \"greptime_timestamp\", \"avg(prom_avg_over_time(greptime_timestamp_range,greptime_value))\"], sink table columns: [\"namespace\", \"app\", \"greptime_timestamp\"], extra flow columns not in sink: [\"avg(prom_avg_over_time(greptime_timestamp_range,greptime_value))\"], missing sink columns from flow output: []") in context: Failed to rewrite plan + +DROP FLOW IF EXISTS missing_value_flow; + +Affected Rows: 0 + +DROP TABLE missing_value_sink; + +Affected Rows: 0 + +USE source_schema; + +Affected Rows: 0 + +DROP TABLE metric_input; + +Affected Rows: 0 + +USE public; + +Affected Rows: 0 + +DROP DATABASE sink_schema; + +Affected Rows: 0 + +DROP DATABASE source_schema; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/flow-tql/flow_tql_missing_value_sink_schema.sql b/tests/cases/standalone/flow-tql/flow_tql_missing_value_sink_schema.sql new file mode 100644 index 0000000000..3693775800 --- /dev/null +++ b/tests/cases/standalone/flow-tql/flow_tql_missing_value_sink_schema.sql @@ -0,0 +1,55 @@ +-- Regression for a TQL flow whose pre-created sink table is missing the value +-- output column. The labels are intentionally minimal and anonymous. + +CREATE DATABASE source_schema; +CREATE DATABASE sink_schema; + +USE source_schema; + +CREATE TABLE metric_input ( + namespace STRING NULL, + app STRING NULL, + greptime_timestamp TIMESTAMP(3) NOT NULL, + greptime_value DOUBLE NULL, + TIME INDEX (greptime_timestamp), + PRIMARY KEY (namespace, app) +); + +INSERT INTO metric_input VALUES + ('ns', 'app-a', '2026-01-23T03:40:00Z', 10.0), + ('ns', 'app-a', '2026-01-23T03:50:00Z', 20.0); + +USE sink_schema; + +-- Intentionally omit greptime_value DOUBLE from the pre-created sink table. +CREATE TABLE missing_value_sink ( + namespace STRING NULL, + app STRING NULL, + greptime_timestamp TIMESTAMP(3) NOT NULL, + TIME INDEX (greptime_timestamp), + PRIMARY KEY (namespace, app) +) +ENGINE=mito; + +-- SQLNESS REPLACE (in\scontext:\sFailed\sto\srewrite\splan:\sError\sduring\splanning:.*) in context: Failed to rewrite plan +CREATE FLOW missing_value_flow +SINK TO sink_schema.missing_value_sink +EVAL INTERVAL '3600 s' +AS TQL EVAL ( + date_bin('2m'::interval, now() - '2m'::interval), + date_bin('2m'::interval, now() - '2m'::interval), + '1h' +) + avg by (namespace, app) ( + avg_over_time(metric_input{__schema__="source_schema"}[1h]) + ); + +DROP FLOW IF EXISTS missing_value_flow; +DROP TABLE missing_value_sink; + +USE source_schema; +DROP TABLE metric_input; + +USE public; +DROP DATABASE sink_schema; +DROP DATABASE source_schema; diff --git a/tests/data/csv/skip_bad_records.csv b/tests/data/csv/skip_bad_records.csv new file mode 100644 index 0000000000..f4c40d5d6e --- /dev/null +++ b/tests/data/csv/skip_bad_records.csv @@ -0,0 +1,4 @@ +host_id,host_name,reading_value,ts +1,Alice,10.5,2024-01-01T00:00:00 +bad,Bad,20.0,2024-01-01T00:00:01 +2,Bob,30.5,2024-01-01T00:00:02