diff --git a/.coderabbit.yaml b/.coderabbit.yaml deleted file mode 100644 index 01bc346444..0000000000 --- a/.coderabbit.yaml +++ /dev/null @@ -1,15 +0,0 @@ -# yaml-language-server: $schema=https://coderabbit.ai/integrations/schema.v2.json -language: "en-US" -early_access: false -reviews: - profile: "chill" - request_changes_workflow: false - high_level_summary: true - poem: true - review_status: true - collapse_walkthrough: false - auto_review: - enabled: false - drafts: false -chat: - auto_reply: true diff --git a/.github/scripts/create-version.sh b/.github/scripts/create-version.sh index 1de37df190..0e8218ba01 100755 --- a/.github/scripts/create-version.sh +++ b/.github/scripts/create-version.sh @@ -10,17 +10,17 @@ set -e function create_version() { # Read from envrionment variables. if [ -z "$GITHUB_EVENT_NAME" ]; then - echo "GITHUB_EVENT_NAME is empty" + echo "GITHUB_EVENT_NAME is empty" >&2 exit 1 fi if [ -z "$NEXT_RELEASE_VERSION" ]; then - echo "NEXT_RELEASE_VERSION is empty" - exit 1 + echo "NEXT_RELEASE_VERSION is empty, use version from Cargo.toml" >&2 + export NEXT_RELEASE_VERSION=$(grep '^version = ' Cargo.toml | cut -d '"' -f 2 | head -n 1) fi if [ -z "$NIGHTLY_RELEASE_PREFIX" ]; then - echo "NIGHTLY_RELEASE_PREFIX is empty" + echo "NIGHTLY_RELEASE_PREFIX is empty" >&2 exit 1 fi @@ -35,7 +35,7 @@ function create_version() { # It will be like 'dev-2023080819-f0e7216c'. if [ "$NEXT_RELEASE_VERSION" = dev ]; then if [ -z "$COMMIT_SHA" ]; then - echo "COMMIT_SHA is empty in dev build" + echo "COMMIT_SHA is empty in dev build" >&2 exit 1 fi echo "dev-$(date "+%Y%m%d-%s")-$(echo "$COMMIT_SHA" | cut -c1-8)" @@ -45,7 +45,7 @@ function create_version() { # Note: Only output 'version=xxx' to stdout when everything is ok, so that it can be used in GitHub Actions Outputs. if [ "$GITHUB_EVENT_NAME" = push ]; then if [ -z "$GITHUB_REF_NAME" ]; then - echo "GITHUB_REF_NAME is empty in push event" + echo "GITHUB_REF_NAME is empty in push event" >&2 exit 1 fi echo "$GITHUB_REF_NAME" @@ -54,7 +54,7 @@ function create_version() { elif [ "$GITHUB_EVENT_NAME" = schedule ]; then echo "$NEXT_RELEASE_VERSION-$NIGHTLY_RELEASE_PREFIX-$(date "+%Y%m%d")" else - echo "Unsupported GITHUB_EVENT_NAME: $GITHUB_EVENT_NAME" + echo "Unsupported GITHUB_EVENT_NAME: $GITHUB_EVENT_NAME" >&2 exit 1 fi } diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index fe85a6f2c8..b3c7ee4cdd 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -90,8 +90,6 @@ env: # The scheduled version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-YYYYMMDD', like v0.2.0-nigthly-20230313; NIGHTLY_RELEASE_PREFIX: nightly - # Note: The NEXT_RELEASE_VERSION should be modified manually by every formal release. - NEXT_RELEASE_VERSION: v0.14.0 jobs: allocate-runners: @@ -135,7 +133,6 @@ jobs: env: GITHUB_EVENT_NAME: ${{ github.event_name }} GITHUB_REF_NAME: ${{ github.ref_name }} - NEXT_RELEASE_VERSION: ${{ env.NEXT_RELEASE_VERSION }} NIGHTLY_RELEASE_PREFIX: ${{ env.NIGHTLY_RELEASE_PREFIX }} - name: Allocate linux-amd64 runner diff --git a/Cargo.lock b/Cargo.lock index 839ceafddc..1603528b22 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -173,9 +173,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.89" +version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6" +checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" [[package]] name = "anymap2" @@ -185,7 +185,7 @@ checksum = "d301b3b94cb4b2f23d7917810addbbaff90738e0ca2be692bd027e70d7e0330c" [[package]] name = "api" -version = "0.14.0" +version = "0.15.0" dependencies = [ "common-base", "common-decimal", @@ -915,7 +915,7 @@ dependencies = [ [[package]] name = "auth" -version = "0.14.0" +version = "0.15.0" dependencies = [ "api", "async-trait", @@ -1537,7 +1537,7 @@ dependencies = [ [[package]] name = "cache" -version = "0.14.0" +version = "0.15.0" dependencies = [ "catalog", "common-error", @@ -1561,7 +1561,7 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "catalog" -version = "0.14.0" +version = "0.15.0" dependencies = [ "api", "arrow 54.2.1", @@ -1597,7 +1597,7 @@ dependencies = [ "partition", "paste", "prometheus", - "rustc-hash 2.0.0", + "rustc-hash 2.1.1", "serde_json", "session", "snafu 0.8.5", @@ -1619,9 +1619,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.1.24" +version = "1.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "812acba72f0a070b003d3697490d2b55b837230ae7c6c6497f05cc2ddbb8d938" +checksum = "04da6a0d40b948dfc4fa8f5bbf402b0fc1a64a28dbf7d12ffd683550f2c1b63a" dependencies = [ "jobserver", "libc", @@ -1874,7 +1874,7 @@ checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" [[package]] name = "cli" -version = "0.14.0" +version = "0.15.0" dependencies = [ "async-trait", "auth", @@ -1917,7 +1917,7 @@ dependencies = [ "session", "snafu 0.8.5", "store-api", - "substrait 0.14.0", + "substrait 0.15.0", "table", "tempfile", "tokio", @@ -1926,7 +1926,7 @@ dependencies = [ [[package]] name = "client" -version = "0.14.0" +version = "0.15.0" dependencies = [ "api", "arc-swap", @@ -1955,7 +1955,7 @@ dependencies = [ "rand 0.9.0", "serde_json", "snafu 0.8.5", - "substrait 0.14.0", + "substrait 0.15.0", "substrait 0.37.3", "tokio", "tokio-stream", @@ -1996,7 +1996,7 @@ dependencies = [ [[package]] name = "cmd" -version = "0.14.0" +version = "0.15.0" dependencies = [ "async-trait", "auth", @@ -2056,7 +2056,7 @@ dependencies = [ "similar-asserts", "snafu 0.8.5", "store-api", - "substrait 0.14.0", + "substrait 0.15.0", "table", "temp-env", "tempfile", @@ -2102,7 +2102,7 @@ checksum = "55b672471b4e9f9e95499ea597ff64941a309b2cdbffcc46f2cc5e2d971fd335" [[package]] name = "common-base" -version = "0.14.0" +version = "0.15.0" dependencies = [ "anymap2", "async-trait", @@ -2124,11 +2124,11 @@ dependencies = [ [[package]] name = "common-catalog" -version = "0.14.0" +version = "0.15.0" [[package]] name = "common-config" -version = "0.14.0" +version = "0.15.0" dependencies = [ "common-base", "common-error", @@ -2153,7 +2153,7 @@ dependencies = [ [[package]] name = "common-datasource" -version = "0.14.0" +version = "0.15.0" dependencies = [ "arrow 54.2.1", "arrow-schema 54.3.1", @@ -2190,7 +2190,7 @@ dependencies = [ [[package]] name = "common-decimal" -version = "0.14.0" +version = "0.15.0" dependencies = [ "bigdecimal 0.4.8", "common-error", @@ -2203,7 +2203,7 @@ dependencies = [ [[package]] name = "common-error" -version = "0.14.0" +version = "0.15.0" dependencies = [ "common-macro", "http 1.1.0", @@ -2214,7 +2214,7 @@ dependencies = [ [[package]] name = "common-frontend" -version = "0.14.0" +version = "0.15.0" dependencies = [ "async-trait", "common-error", @@ -2224,7 +2224,7 @@ dependencies = [ [[package]] name = "common-function" -version = "0.14.0" +version = "0.15.0" dependencies = [ "ahash 0.8.11", "api", @@ -2277,7 +2277,7 @@ dependencies = [ [[package]] name = "common-greptimedb-telemetry" -version = "0.14.0" +version = "0.15.0" dependencies = [ "async-trait", "common-runtime", @@ -2294,7 +2294,7 @@ dependencies = [ [[package]] name = "common-grpc" -version = "0.14.0" +version = "0.15.0" dependencies = [ "api", "arrow-flight", @@ -2325,7 +2325,7 @@ dependencies = [ [[package]] name = "common-grpc-expr" -version = "0.14.0" +version = "0.15.0" dependencies = [ "api", "common-base", @@ -2344,7 +2344,7 @@ dependencies = [ [[package]] name = "common-macro" -version = "0.14.0" +version = "0.15.0" dependencies = [ "arc-swap", "common-query", @@ -2358,7 +2358,7 @@ dependencies = [ [[package]] name = "common-mem-prof" -version = "0.14.0" +version = "0.15.0" dependencies = [ "common-error", "common-macro", @@ -2371,7 +2371,7 @@ dependencies = [ [[package]] name = "common-meta" -version = "0.14.0" +version = "0.15.0" dependencies = [ "anymap2", "api", @@ -2432,7 +2432,7 @@ dependencies = [ [[package]] name = "common-options" -version = "0.14.0" +version = "0.15.0" dependencies = [ "common-grpc", "humantime-serde", @@ -2441,11 +2441,11 @@ dependencies = [ [[package]] name = "common-plugins" -version = "0.14.0" +version = "0.15.0" [[package]] name = "common-pprof" -version = "0.14.0" +version = "0.15.0" dependencies = [ "common-error", "common-macro", @@ -2457,7 +2457,7 @@ dependencies = [ [[package]] name = "common-procedure" -version = "0.14.0" +version = "0.15.0" dependencies = [ "async-stream", "async-trait", @@ -2484,7 +2484,7 @@ dependencies = [ [[package]] name = "common-procedure-test" -version = "0.14.0" +version = "0.15.0" dependencies = [ "async-trait", "common-procedure", @@ -2493,7 +2493,7 @@ dependencies = [ [[package]] name = "common-query" -version = "0.14.0" +version = "0.15.0" dependencies = [ "api", "async-trait", @@ -2510,7 +2510,7 @@ dependencies = [ "futures-util", "serde", "snafu 0.8.5", - "sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e98e6b322426a9d397a71efef17075966223c089)", + "sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=0cf6c04490d59435ee965edd2078e8855bd8471e)", "sqlparser_derive 0.1.1", "statrs", "store-api", @@ -2519,7 +2519,7 @@ dependencies = [ [[package]] name = "common-recordbatch" -version = "0.14.0" +version = "0.15.0" dependencies = [ "arc-swap", "common-error", @@ -2539,7 +2539,7 @@ dependencies = [ [[package]] name = "common-runtime" -version = "0.14.0" +version = "0.15.0" dependencies = [ "async-trait", "clap 4.5.19", @@ -2569,14 +2569,14 @@ dependencies = [ [[package]] name = "common-session" -version = "0.14.0" +version = "0.15.0" dependencies = [ "strum 0.27.1", ] [[package]] name = "common-telemetry" -version = "0.14.0" +version = "0.15.0" dependencies = [ "atty", "backtrace", @@ -2604,7 +2604,7 @@ dependencies = [ [[package]] name = "common-test-util" -version = "0.14.0" +version = "0.15.0" dependencies = [ "client", "common-query", @@ -2616,7 +2616,7 @@ dependencies = [ [[package]] name = "common-time" -version = "0.14.0" +version = "0.15.0" dependencies = [ "arrow 54.2.1", "chrono", @@ -2634,7 +2634,7 @@ dependencies = [ [[package]] name = "common-version" -version = "0.14.0" +version = "0.15.0" dependencies = [ "build-data", "const_format", @@ -2644,7 +2644,7 @@ dependencies = [ [[package]] name = "common-wal" -version = "0.14.0" +version = "0.15.0" dependencies = [ "common-base", "common-error", @@ -2946,9 +2946,9 @@ dependencies = [ [[package]] name = "crossbeam-channel" -version = "0.5.13" +version = "0.5.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" dependencies = [ "crossbeam-utils", ] @@ -3110,14 +3110,14 @@ dependencies = [ [[package]] name = "data-encoding" -version = "2.6.0" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8566979429cf69b49a5c740c60791108e86440e8be149bbea4fe54d2c32d6e2" +checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" [[package]] name = "datafusion" version = "45.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4" dependencies = [ "arrow 54.2.1", "arrow-array 54.2.1", @@ -3168,7 +3168,7 @@ dependencies = [ [[package]] name = "datafusion-catalog" version = "45.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4" dependencies = [ "arrow 54.2.1", "async-trait", @@ -3188,7 +3188,7 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" version = "45.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4" dependencies = [ "arrow 54.2.1", "arrow-schema 54.3.1", @@ -3211,7 +3211,7 @@ dependencies = [ [[package]] name = "datafusion-common" version = "45.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4" dependencies = [ "ahash 0.8.11", "arrow 54.2.1", @@ -3236,7 +3236,7 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" version = "45.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4" dependencies = [ "log", "tokio", @@ -3245,12 +3245,12 @@ dependencies = [ [[package]] name = "datafusion-doc" version = "45.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4" [[package]] name = "datafusion-execution" version = "45.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4" dependencies = [ "arrow 54.2.1", "dashmap", @@ -3268,7 +3268,7 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "45.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4" dependencies = [ "arrow 54.2.1", "chrono", @@ -3288,7 +3288,7 @@ dependencies = [ [[package]] name = "datafusion-expr-common" version = "45.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4" dependencies = [ "arrow 54.2.1", "datafusion-common", @@ -3299,7 +3299,7 @@ dependencies = [ [[package]] name = "datafusion-functions" version = "45.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4" dependencies = [ "arrow 54.2.1", "arrow-buffer 54.3.1", @@ -3328,7 +3328,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" version = "45.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4" dependencies = [ "ahash 0.8.11", "arrow 54.2.1", @@ -3349,7 +3349,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" version = "45.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4" dependencies = [ "ahash 0.8.11", "arrow 54.2.1", @@ -3361,7 +3361,7 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" version = "45.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4" dependencies = [ "arrow 54.2.1", "arrow-array 54.2.1", @@ -3383,7 +3383,7 @@ dependencies = [ [[package]] name = "datafusion-functions-table" version = "45.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4" dependencies = [ "arrow 54.2.1", "async-trait", @@ -3398,7 +3398,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window" version = "45.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4" dependencies = [ "datafusion-common", "datafusion-doc", @@ -3414,7 +3414,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" version = "45.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -3423,7 +3423,7 @@ dependencies = [ [[package]] name = "datafusion-macros" version = "45.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4" dependencies = [ "datafusion-expr", "quote", @@ -3433,7 +3433,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "45.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4" dependencies = [ "arrow 54.2.1", "chrono", @@ -3451,7 +3451,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "45.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4" dependencies = [ "ahash 0.8.11", "arrow 54.2.1", @@ -3474,7 +3474,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" version = "45.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4" dependencies = [ "ahash 0.8.11", "arrow 54.2.1", @@ -3487,7 +3487,7 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" version = "45.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4" dependencies = [ "arrow 54.2.1", "arrow-schema 54.3.1", @@ -3508,7 +3508,7 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" version = "45.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4" dependencies = [ "ahash 0.8.11", "arrow 54.2.1", @@ -3538,7 +3538,7 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "45.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4" dependencies = [ "arrow 54.2.1", "arrow-array 54.2.1", @@ -3556,7 +3556,7 @@ dependencies = [ [[package]] name = "datafusion-substrait" version = "45.0.0" -source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220" +source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4" dependencies = [ "async-recursion", "async-trait", @@ -3572,7 +3572,7 @@ dependencies = [ [[package]] name = "datanode" -version = "0.14.0" +version = "0.15.0" dependencies = [ "api", "arrow-flight", @@ -3624,7 +3624,7 @@ dependencies = [ "session", "snafu 0.8.5", "store-api", - "substrait 0.14.0", + "substrait 0.15.0", "table", "tokio", "toml 0.8.19", @@ -3633,7 +3633,7 @@ dependencies = [ [[package]] name = "datatypes" -version = "0.14.0" +version = "0.15.0" dependencies = [ "arrow 54.2.1", "arrow-array 54.2.1", @@ -3656,7 +3656,7 @@ dependencies = [ "serde", "serde_json", "snafu 0.8.5", - "sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e98e6b322426a9d397a71efef17075966223c089)", + "sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=0cf6c04490d59435ee965edd2078e8855bd8471e)", "sqlparser_derive 0.1.1", ] @@ -4259,7 +4259,7 @@ dependencies = [ [[package]] name = "file-engine" -version = "0.14.0" +version = "0.15.0" dependencies = [ "api", "async-trait", @@ -4382,7 +4382,7 @@ checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8" [[package]] name = "flow" -version = "0.14.0" +version = "0.15.0" dependencies = [ "api", "arrow 54.2.1", @@ -4444,7 +4444,7 @@ dependencies = [ "snafu 0.8.5", "store-api", "strum 0.27.1", - "substrait 0.14.0", + "substrait 0.15.0", "table", "tokio", "tonic 0.12.3", @@ -4499,7 +4499,7 @@ checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa" [[package]] name = "frontend" -version = "0.14.0" +version = "0.15.0" dependencies = [ "api", "arc-swap", @@ -4553,10 +4553,10 @@ dependencies = [ "session", "snafu 0.8.5", "sql", - "sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e98e6b322426a9d397a71efef17075966223c089)", + "sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=0cf6c04490d59435ee965edd2078e8855bd8471e)", "store-api", "strfmt", - "substrait 0.14.0", + "substrait 0.15.0", "table", "tokio", "toml 0.8.19", @@ -5795,7 +5795,7 @@ dependencies = [ [[package]] name = "index" -version = "0.14.0" +version = "0.15.0" dependencies = [ "async-trait", "asynchronous-codec", @@ -6599,13 +6599,13 @@ dependencies = [ [[package]] name = "log" -version = "0.4.22" +version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" [[package]] name = "log-query" -version = "0.14.0" +version = "0.15.0" dependencies = [ "chrono", "common-error", @@ -6617,7 +6617,7 @@ dependencies = [ [[package]] name = "log-store" -version = "0.14.0" +version = "0.15.0" dependencies = [ "async-stream", "async-trait", @@ -6911,7 +6911,7 @@ dependencies = [ [[package]] name = "meta-client" -version = "0.14.0" +version = "0.15.0" dependencies = [ "api", "async-trait", @@ -6939,7 +6939,7 @@ dependencies = [ [[package]] name = "meta-srv" -version = "0.14.0" +version = "0.15.0" dependencies = [ "api", "async-trait", @@ -7029,7 +7029,7 @@ dependencies = [ [[package]] name = "metric-engine" -version = "0.14.0" +version = "0.15.0" dependencies = [ "api", "aquamarine", @@ -7118,7 +7118,7 @@ dependencies = [ [[package]] name = "mito2" -version = "0.14.0" +version = "0.15.0" dependencies = [ "api", "aquamarine", @@ -7780,7 +7780,7 @@ version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af1844ef2428cc3e1cb900be36181049ef3d3193c63e43026cfe202983b27a56" dependencies = [ - "proc-macro-crate 1.3.1", + "proc-macro-crate 3.2.0", "proc-macro2", "quote", "syn 2.0.100", @@ -7824,7 +7824,7 @@ dependencies = [ [[package]] name = "object-store" -version = "0.14.0" +version = "0.15.0" dependencies = [ "anyhow", "bytes", @@ -8119,7 +8119,7 @@ dependencies = [ [[package]] name = "operator" -version = "0.14.0" +version = "0.15.0" dependencies = [ "ahash 0.8.11", "api", @@ -8166,9 +8166,9 @@ dependencies = [ "session", "snafu 0.8.5", "sql", - "sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e98e6b322426a9d397a71efef17075966223c089)", + "sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=0cf6c04490d59435ee965edd2078e8855bd8471e)", "store-api", - "substrait 0.14.0", + "substrait 0.15.0", "table", "tokio", "tokio-util", @@ -8423,7 +8423,7 @@ dependencies = [ [[package]] name = "partition" -version = "0.14.0" +version = "0.15.0" dependencies = [ "api", "async-trait", @@ -8443,7 +8443,7 @@ dependencies = [ "session", "snafu 0.8.5", "sql", - "sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e98e6b322426a9d397a71efef17075966223c089)", + "sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=0cf6c04490d59435ee965edd2078e8855bd8471e)", "store-api", "table", ] @@ -8705,7 +8705,7 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pipeline" -version = "0.14.0" +version = "0.15.0" dependencies = [ "ahash 0.8.11", "api", @@ -8847,7 +8847,7 @@ dependencies = [ [[package]] name = "plugins" -version = "0.14.0" +version = "0.15.0" dependencies = [ "auth", "clap 4.5.19", @@ -9127,7 +9127,7 @@ dependencies = [ [[package]] name = "promql" -version = "0.14.0" +version = "0.15.0" dependencies = [ "ahash 0.8.11", "async-trait", @@ -9373,7 +9373,7 @@ dependencies = [ [[package]] name = "puffin" -version = "0.14.0" +version = "0.15.0" dependencies = [ "async-compression 0.4.13", "async-trait", @@ -9414,7 +9414,7 @@ dependencies = [ [[package]] name = "query" -version = "0.14.0" +version = "0.15.0" dependencies = [ "ahash 0.8.11", "api", @@ -9477,10 +9477,10 @@ dependencies = [ "session", "snafu 0.8.5", "sql", - "sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e98e6b322426a9d397a71efef17075966223c089)", + "sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=0cf6c04490d59435ee965edd2078e8855bd8471e)", "statrs", "store-api", - "substrait 0.14.0", + "substrait 0.15.0", "table", "tokio", "tokio-stream", @@ -9527,7 +9527,7 @@ dependencies = [ "pin-project-lite", "quinn-proto", "quinn-udp", - "rustc-hash 2.0.0", + "rustc-hash 2.1.1", "rustls", "socket2", "thiserror 1.0.64", @@ -9544,7 +9544,7 @@ dependencies = [ "bytes", "rand 0.8.5", "ring", - "rustc-hash 2.0.0", + "rustc-hash 2.1.1", "rustls", "slab", "thiserror 1.0.64", @@ -9821,9 +9821,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", @@ -10005,15 +10005,14 @@ dependencies = [ [[package]] name = "ring" -version = "0.17.8" +version = "0.17.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" +checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" dependencies = [ "cc", "cfg-if", "getrandom 0.2.15", "libc", - "spin", "untrusted", "windows-sys 0.52.0", ] @@ -10334,9 +10333,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustc-hash" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" [[package]] name = "rustc_version" @@ -10831,7 +10830,7 @@ dependencies = [ [[package]] name = "servers" -version = "0.14.0" +version = "0.15.0" dependencies = [ "ahash 0.8.11", "api", @@ -10951,7 +10950,7 @@ dependencies = [ [[package]] name = "session" -version = "0.14.0" +version = "0.15.0" dependencies = [ "api", "arc-swap", @@ -11159,9 +11158,9 @@ dependencies = [ [[package]] name = "smallbitvec" -version = "2.5.3" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcc3fc564a4b53fd1e8589628efafe57602d91bde78be18186b5f61e8faea470" +checksum = "d31d263dd118560e1a492922182ab6ca6dc1d03a3bf54e7699993f31a4150e3f" [[package]] name = "smallvec" @@ -11276,7 +11275,7 @@ dependencies = [ [[package]] name = "sql" -version = "0.14.0" +version = "0.15.0" dependencies = [ "api", "chrono", @@ -11304,7 +11303,7 @@ dependencies = [ "serde", "serde_json", "snafu 0.8.5", - "sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e98e6b322426a9d397a71efef17075966223c089)", + "sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=0cf6c04490d59435ee965edd2078e8855bd8471e)", "sqlparser_derive 0.1.1", "store-api", "table", @@ -11331,7 +11330,7 @@ dependencies = [ [[package]] name = "sqlness-runner" -version = "0.14.0" +version = "0.15.0" dependencies = [ "async-trait", "clap 4.5.19", @@ -11373,7 +11372,7 @@ dependencies = [ [[package]] name = "sqlparser" version = "0.54.0" -source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e98e6b322426a9d397a71efef17075966223c089#e98e6b322426a9d397a71efef17075966223c089" +source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=0cf6c04490d59435ee965edd2078e8855bd8471e#0cf6c04490d59435ee965edd2078e8855bd8471e" dependencies = [ "lazy_static", "log", @@ -11381,7 +11380,7 @@ dependencies = [ "regex", "serde", "sqlparser 0.54.0 (registry+https://github.com/rust-lang/crates.io-index)", - "sqlparser_derive 0.3.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e98e6b322426a9d397a71efef17075966223c089)", + "sqlparser_derive 0.3.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=0cf6c04490d59435ee965edd2078e8855bd8471e)", ] [[package]] @@ -11409,7 +11408,7 @@ dependencies = [ [[package]] name = "sqlparser_derive" version = "0.3.0" -source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e98e6b322426a9d397a71efef17075966223c089#e98e6b322426a9d397a71efef17075966223c089" +source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=0cf6c04490d59435ee965edd2078e8855bd8471e#0cf6c04490d59435ee965edd2078e8855bd8471e" dependencies = [ "proc-macro2", "quote", @@ -11650,7 +11649,7 @@ dependencies = [ [[package]] name = "store-api" -version = "0.14.0" +version = "0.15.0" dependencies = [ "api", "aquamarine", @@ -11799,7 +11798,7 @@ dependencies = [ [[package]] name = "substrait" -version = "0.14.0" +version = "0.15.0" dependencies = [ "async-trait", "bytes", @@ -11979,7 +11978,7 @@ dependencies = [ [[package]] name = "table" -version = "0.14.0" +version = "0.15.0" dependencies = [ "api", "async-trait", @@ -12230,7 +12229,7 @@ checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" [[package]] name = "tests-fuzz" -version = "0.14.0" +version = "0.15.0" dependencies = [ "arbitrary", "async-trait", @@ -12264,7 +12263,7 @@ dependencies = [ "serde_yaml", "snafu 0.8.5", "sql", - "sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e98e6b322426a9d397a71efef17075966223c089)", + "sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=0cf6c04490d59435ee965edd2078e8855bd8471e)", "sqlx", "store-api", "strum 0.27.1", @@ -12274,7 +12273,7 @@ dependencies = [ [[package]] name = "tests-integration" -version = "0.14.0" +version = "0.15.0" dependencies = [ "api", "arrow-flight", @@ -12341,7 +12340,7 @@ dependencies = [ "sql", "sqlx", "store-api", - "substrait 0.14.0", + "substrait 0.15.0", "table", "tempfile", "time", diff --git a/Cargo.toml b/Cargo.toml index 92dba96d00..b0a049bbd1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -68,15 +68,16 @@ members = [ resolver = "2" [workspace.package] -version = "0.14.0" +version = "0.15.0" edition = "2021" license = "Apache-2.0" [workspace.lints] -clippy.print_stdout = "warn" -clippy.print_stderr = "warn" clippy.dbg_macro = "warn" clippy.implicit_clone = "warn" +clippy.result_large_err = "allow" +clippy.large_enum_variant = "allow" +clippy.doc_overindented_list_items = "allow" rust.unknown_lints = "deny" rust.unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] } @@ -112,15 +113,15 @@ clap = { version = "4.4", features = ["derive"] } config = "0.13.0" crossbeam-utils = "0.8" dashmap = "6.1" -datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "5bbedc6704162afb03478f56ffb629405a4e1220" } -datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "5bbedc6704162afb03478f56ffb629405a4e1220" } -datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "5bbedc6704162afb03478f56ffb629405a4e1220" } -datafusion-functions = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "5bbedc6704162afb03478f56ffb629405a4e1220" } -datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "5bbedc6704162afb03478f56ffb629405a4e1220" } -datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "5bbedc6704162afb03478f56ffb629405a4e1220" } -datafusion-physical-plan = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "5bbedc6704162afb03478f56ffb629405a4e1220" } -datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "5bbedc6704162afb03478f56ffb629405a4e1220" } -datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "5bbedc6704162afb03478f56ffb629405a4e1220" } +datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" } +datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" } +datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" } +datafusion-functions = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" } +datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" } +datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" } +datafusion-physical-plan = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" } +datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" } +datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" } deadpool = "0.12" deadpool-postgres = "0.14" derive_builder = "0.20" @@ -191,7 +192,7 @@ simd-json = "0.15" similar-asserts = "1.6.0" smallvec = { version = "1", features = ["serde"] } snafu = "0.8" -sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "e98e6b322426a9d397a71efef17075966223c089", features = [ +sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "0cf6c04490d59435ee965edd2078e8855bd8471e", features = [ "visitor", "serde", ] } # branch = "v0.54.x" diff --git a/config/config.md b/config/config.md index f34a41d861..f3230190c9 100644 --- a/config/config.md +++ b/config/config.md @@ -319,6 +319,7 @@ | `selector` | String | `round_robin` | Datanode selector type.
- `round_robin` (default value)
- `lease_based`
- `load_based`
For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". | | `use_memory_store` | Bool | `false` | Store data in memory. | | `enable_region_failover` | Bool | `false` | Whether to enable region failover.
This feature is only available on GreptimeDB running on cluster mode and
- Using Remote WAL
- Using shared storage (e.g., s3). | +| `allow_region_failover_on_local_wal` | Bool | `false` | Whether to allow region failover on local WAL.
**This option is not recommended to be set to true, because it may lead to data loss during failover.** | | `node_max_idle_time` | String | `24hours` | Max allowed idle time before removing node info from metasrv memory. | | `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. Enabled by default. | | `runtime` | -- | -- | The runtime options. | diff --git a/config/metasrv.example.toml b/config/metasrv.example.toml index 89c92352b2..0e7f9b74f0 100644 --- a/config/metasrv.example.toml +++ b/config/metasrv.example.toml @@ -50,6 +50,10 @@ use_memory_store = false ## - Using shared storage (e.g., s3). enable_region_failover = false +## Whether to allow region failover on local WAL. +## **This option is not recommended to be set to true, because it may lead to data loss during failover.** +allow_region_failover_on_local_wal = false + ## Max allowed idle time before removing node info from metasrv memory. node_max_idle_time = "24hours" diff --git a/flake.lock b/flake.lock index cfea27d34b..f2b2521130 100644 --- a/flake.lock +++ b/flake.lock @@ -8,11 +8,11 @@ "rust-analyzer-src": "rust-analyzer-src" }, "locked": { - "lastModified": 1737613896, - "narHash": "sha256-ldqXIglq74C7yKMFUzrS9xMT/EVs26vZpOD68Sh7OcU=", + "lastModified": 1742452566, + "narHash": "sha256-sVuLDQ2UIWfXUBbctzrZrXM2X05YjX08K7XHMztt36E=", "owner": "nix-community", "repo": "fenix", - "rev": "303a062fdd8e89f233db05868468975d17855d80", + "rev": "7d9ba794daf5e8cc7ee728859bc688d8e26d5f06", "type": "github" }, "original": { @@ -41,11 +41,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1737569578, - "narHash": "sha256-6qY0pk2QmUtBT9Mywdvif0i/CLVgpCjMUn6g9vB+f3M=", + "lastModified": 1743576891, + "narHash": "sha256-vXiKURtntURybE6FMNFAVpRPr8+e8KoLPrYs9TGuAKc=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "47addd76727f42d351590c905d9d1905ca895b82", + "rev": "44a69ed688786e98a101f02b712c313f1ade37ab", "type": "github" }, "original": { @@ -65,11 +65,11 @@ "rust-analyzer-src": { "flake": false, "locked": { - "lastModified": 1737581772, - "narHash": "sha256-t1P2Pe3FAX9TlJsCZbmJ3wn+C4qr6aSMypAOu8WNsN0=", + "lastModified": 1742296961, + "narHash": "sha256-gCpvEQOrugHWLimD1wTFOJHagnSEP6VYBDspq96Idu0=", "owner": "rust-lang", "repo": "rust-analyzer", - "rev": "582af7ee9c8d84f5d534272fc7de9f292bd849be", + "rev": "15d87419f1a123d8f888d608129c3ce3ff8f13d4", "type": "github" }, "original": { diff --git a/flake.nix b/flake.nix index a6d9fbc0df..225f631721 100644 --- a/flake.nix +++ b/flake.nix @@ -21,7 +21,7 @@ lib = nixpkgs.lib; rustToolchain = fenix.packages.${system}.fromToolchainName { name = (lib.importTOML ./rust-toolchain.toml).toolchain.channel; - sha256 = "sha256-f/CVA1EC61EWbh0SjaRNhLL0Ypx2ObupbzigZp8NmL4="; + sha256 = "sha256-i0Sh/ZFFsHlZ3oFZFc24qdk6Cd8Do8OPU4HJQsrKOeM="; }; in { diff --git a/rust-toolchain.toml b/rust-toolchain.toml index eb2546003b..5d547223f2 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,2 +1,2 @@ [toolchain] -channel = "nightly-2024-12-25" +channel = "nightly-2025-04-15" diff --git a/src/catalog/src/system_schema/pg_catalog/pg_namespace/oid_map.rs b/src/catalog/src/system_schema/pg_catalog/pg_namespace/oid_map.rs index edbdac25c7..a2165d731c 100644 --- a/src/catalog/src/system_schema/pg_catalog/pg_namespace/oid_map.rs +++ b/src/catalog/src/system_schema/pg_catalog/pg_namespace/oid_map.rs @@ -84,12 +84,6 @@ mod tests { let key1 = "3178510"; let key2 = "4215648"; - // have collision - assert_eq!( - oid_map.hasher.hash_one(key1) as u32, - oid_map.hasher.hash_one(key2) as u32 - ); - // insert them into oid_map let oid1 = oid_map.get_oid(key1); let oid2 = oid_map.get_oid(key2); diff --git a/src/common/function/src/scalars/matches_term.rs b/src/common/function/src/scalars/matches_term.rs index c99c5ca572..54cf556e85 100644 --- a/src/common/function/src/scalars/matches_term.rs +++ b/src/common/function/src/scalars/matches_term.rs @@ -12,8 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::fmt; +use std::iter::repeat_n; use std::sync::Arc; -use std::{fmt, iter}; use common_query::error::{InvalidFuncArgsSnafu, Result}; use common_query::prelude::Volatility; @@ -126,9 +127,10 @@ impl Function for MatchesTermFunction { let term = term_column.get_ref(0).as_string().unwrap(); match term { None => { - return Ok(Arc::new(BooleanVector::from_iter( - iter::repeat(None).take(text_column.len()), - ))); + return Ok(Arc::new(BooleanVector::from_iter(repeat_n( + None, + text_column.len(), + )))); } Some(term) => Some(MatchesTermFinder::new(term)), } @@ -217,7 +219,7 @@ impl MatchesTermFinder { } let mut pos = 0; - while let Some(found_pos) = self.finder.find(text[pos..].as_bytes()) { + while let Some(found_pos) = self.finder.find(&text.as_bytes()[pos..]) { let actual_pos = pos + found_pos; let prev_ok = self.starts_with_non_alnum diff --git a/src/common/function/src/scalars/math/rate.rs b/src/common/function/src/scalars/math/rate.rs index e296fb9496..cbe4c92550 100644 --- a/src/common/function/src/scalars/math/rate.rs +++ b/src/common/function/src/scalars/math/rate.rs @@ -37,7 +37,7 @@ impl fmt::Display for RateFunction { impl Function for RateFunction { fn name(&self) -> &str { - "prom_rate" + "rate" } fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result { @@ -82,7 +82,7 @@ mod tests { #[test] fn test_rate_function() { let rate = RateFunction; - assert_eq!("prom_rate", rate.name()); + assert_eq!("rate", rate.name()); assert_eq!( ConcreteDataType::float64_datatype(), rate.return_type(&[]).unwrap() diff --git a/src/common/function/src/scalars/uddsketch_calc.rs b/src/common/function/src/scalars/uddsketch_calc.rs index 5c0beb4fec..f429766eb7 100644 --- a/src/common/function/src/scalars/uddsketch_calc.rs +++ b/src/common/function/src/scalars/uddsketch_calc.rs @@ -115,6 +115,13 @@ impl Function for UddSketchCalcFunction { } }; + // Check if the sketch is empty, if so, return null + // This is important to avoid panics when calling estimate_quantile on an empty sketch + // In practice, this will happen if input is all null + if sketch.bucket_iter().count() == 0 { + builder.push_null(); + continue; + } // Compute the estimated quantile from the sketch let result = sketch.estimate_quantile(perc); builder.push(Some(result)); diff --git a/src/common/meta/src/lib.rs b/src/common/meta/src/lib.rs index b1cc18d5e4..7bfbd78f9c 100644 --- a/src/common/meta/src/lib.rs +++ b/src/common/meta/src/lib.rs @@ -15,8 +15,6 @@ #![feature(assert_matches)] #![feature(btree_extract_if)] #![feature(let_chains)] -#![feature(extract_if)] -#![feature(hash_extract_if)] pub mod cache; pub mod cache_invalidator; diff --git a/src/common/meta/src/rpc/router.rs b/src/common/meta/src/rpc/router.rs index 7ddd104c61..2386ca73a7 100644 --- a/src/common/meta/src/rpc/router.rs +++ b/src/common/meta/src/rpc/router.rs @@ -176,15 +176,12 @@ impl TableRoute { })? .into(); - let leader_peer = peers - .get(region_route.leader_peer_index as usize) - .cloned() - .map(Into::into); + let leader_peer = peers.get(region_route.leader_peer_index as usize).cloned(); let follower_peers = region_route .follower_peer_indexes .into_iter() - .filter_map(|x| peers.get(x as usize).cloned().map(Into::into)) + .filter_map(|x| peers.get(x as usize).cloned()) .collect::>(); region_routes.push(RegionRoute { diff --git a/src/common/query/src/logical_plan/accumulator.rs b/src/common/query/src/logical_plan/accumulator.rs index 32f1b4587c..a9c499d323 100644 --- a/src/common/query/src/logical_plan/accumulator.rs +++ b/src/common/query/src/logical_plan/accumulator.rs @@ -24,7 +24,7 @@ use datatypes::prelude::*; use datatypes::vectors::{Helper as VectorHelper, VectorRef}; use snafu::ResultExt; -use crate::error::{self, Error, FromScalarValueSnafu, IntoVectorSnafu, Result}; +use crate::error::{self, FromScalarValueSnafu, IntoVectorSnafu, Result}; use crate::prelude::*; pub type AggregateFunctionCreatorRef = Arc; @@ -166,8 +166,7 @@ impl DfAccumulator for DfAccumulatorAdaptor { let output_type = self.creator.output_type()?; let scalar_value = value .try_to_scalar_value(&output_type) - .context(error::ToScalarValueSnafu) - .map_err(Error::from)?; + .context(error::ToScalarValueSnafu)?; Ok(scalar_value) } diff --git a/src/datatypes/src/schema/constraint.rs b/src/datatypes/src/schema/constraint.rs index 1a2128c200..560500810f 100644 --- a/src/datatypes/src/schema/constraint.rs +++ b/src/datatypes/src/schema/constraint.rs @@ -253,9 +253,10 @@ fn create_current_timestamp_vector( data_type: &ConcreteDataType, num_rows: usize, ) -> Result { - let current_timestamp_vector = TimestampMillisecondVector::from_values( - std::iter::repeat(util::current_time_millis()).take(num_rows), - ); + let current_timestamp_vector = TimestampMillisecondVector::from_values(std::iter::repeat_n( + util::current_time_millis(), + num_rows, + )); if data_type.is_timestamp() { current_timestamp_vector.cast(data_type) } else { diff --git a/src/datatypes/src/vectors/constant.rs b/src/datatypes/src/vectors/constant.rs index 66587cf1d7..3ccade1392 100644 --- a/src/datatypes/src/vectors/constant.rs +++ b/src/datatypes/src/vectors/constant.rs @@ -198,8 +198,7 @@ impl fmt::Debug for ConstantVector { impl Serializable for ConstantVector { fn serialize_to_json(&self) -> Result> { - std::iter::repeat(self.get(0)) - .take(self.len()) + std::iter::repeat_n(self.get(0), self.len()) .map(serde_json::Value::try_from) .collect::>() .context(SerializeSnafu) diff --git a/src/datatypes/src/vectors/decimal.rs b/src/datatypes/src/vectors/decimal.rs index cce26e3e3e..e446b36de3 100644 --- a/src/datatypes/src/vectors/decimal.rs +++ b/src/datatypes/src/vectors/decimal.rs @@ -412,7 +412,7 @@ pub(crate) fn replicate_decimal128( // Safety: std::iter::Repeat and std::iter::Take implement TrustedLen. builder .mutable_array - .append_trusted_len_iter(std::iter::repeat(data).take(repeat_times)); + .append_trusted_len_iter(std::iter::repeat_n(data, repeat_times)); } } None => { diff --git a/src/datatypes/src/vectors/dictionary.rs b/src/datatypes/src/vectors/dictionary.rs index e6831d2ed7..07994d13bd 100644 --- a/src/datatypes/src/vectors/dictionary.rs +++ b/src/datatypes/src/vectors/dictionary.rs @@ -16,8 +16,8 @@ use std::any::Any; use std::sync::Arc; use arrow::array::Array; -use arrow::datatypes::Int32Type; -use arrow_array::{ArrayRef, DictionaryArray, Int32Array}; +use arrow::datatypes::Int64Type; +use arrow_array::{ArrayRef, DictionaryArray, Int64Array}; use serde_json::Value as JsonValue; use snafu::ResultExt; @@ -32,7 +32,7 @@ use crate::vectors::{self, Helper, Validity, Vector, VectorRef}; /// Vector of dictionaries, basically backed by Arrow's `DictionaryArray`. #[derive(Debug, PartialEq)] pub struct DictionaryVector { - array: DictionaryArray, + array: DictionaryArray, /// The datatype of the items in the dictionary. item_type: ConcreteDataType, /// The vector of items in the dictionary. @@ -41,7 +41,7 @@ pub struct DictionaryVector { impl DictionaryVector { /// Create a new instance of `DictionaryVector` from a dictionary array and item type - pub fn new(array: DictionaryArray, item_type: ConcreteDataType) -> Result { + pub fn new(array: DictionaryArray, item_type: ConcreteDataType) -> Result { let item_vector = Helper::try_into_vector(array.values())?; Ok(Self { @@ -52,12 +52,12 @@ impl DictionaryVector { } /// Returns the underlying Arrow dictionary array - pub fn array(&self) -> &DictionaryArray { + pub fn array(&self) -> &DictionaryArray { &self.array } /// Returns the keys array of this dictionary - pub fn keys(&self) -> &arrow_array::PrimitiveArray { + pub fn keys(&self) -> &arrow_array::PrimitiveArray { self.array.keys() } @@ -74,7 +74,7 @@ impl DictionaryVector { impl Vector for DictionaryVector { fn data_type(&self) -> ConcreteDataType { ConcreteDataType::Dictionary(DictionaryType::new( - ConcreteDataType::int32_datatype(), + ConcreteDataType::int64_datatype(), self.item_type.clone(), )) } @@ -163,10 +163,10 @@ impl Serializable for DictionaryVector { } } -impl TryFrom> for DictionaryVector { +impl TryFrom> for DictionaryVector { type Error = crate::error::Error; - fn try_from(array: DictionaryArray) -> Result { + fn try_from(array: DictionaryArray) -> Result { let item_type = ConcreteDataType::from_arrow_type(array.values().data_type()); let item_vector = Helper::try_into_vector(array.values())?; @@ -243,7 +243,7 @@ impl VectorOp for DictionaryVector { previous_offset = offset; } - let new_keys = Int32Array::from(replicated_keys); + let new_keys = Int64Array::from(replicated_keys); let new_array = DictionaryArray::try_new(new_keys, self.values().clone()) .expect("Failed to create replicated dictionary array"); @@ -261,7 +261,7 @@ impl VectorOp for DictionaryVector { let filtered_key_array = filtered_key_vector.to_arrow_array(); let filtered_key_array = filtered_key_array .as_any() - .downcast_ref::() + .downcast_ref::() .unwrap(); let new_array = DictionaryArray::try_new(filtered_key_array.clone(), self.values().clone()) @@ -291,7 +291,7 @@ impl VectorOp for DictionaryVector { let key_vector = Helper::try_into_vector(&key_array)?; let new_key_vector = key_vector.take(indices)?; let new_key_array = new_key_vector.to_arrow_array(); - let new_key_array = new_key_array.as_any().downcast_ref::().unwrap(); + let new_key_array = new_key_array.as_any().downcast_ref::().unwrap(); let new_array = DictionaryArray::try_new(new_key_array.clone(), self.values().clone()) .expect("Failed to create filtered dictionary array"); @@ -318,7 +318,7 @@ mod tests { // Keys: [0, 1, 2, null, 1, 3] // Resulting in: ["a", "b", "c", null, "b", "d"] let values = StringArray::from(vec!["a", "b", "c", "d"]); - let keys = Int32Array::from(vec![Some(0), Some(1), Some(2), None, Some(1), Some(3)]); + let keys = Int64Array::from(vec![Some(0), Some(1), Some(2), None, Some(1), Some(3)]); let dict_array = DictionaryArray::new(keys, Arc::new(values)); DictionaryVector::try_from(dict_array).unwrap() } @@ -404,7 +404,7 @@ mod tests { assert_eq!( casted.data_type(), ConcreteDataType::Dictionary(DictionaryType::new( - ConcreteDataType::int32_datatype(), + ConcreteDataType::int64_datatype(), ConcreteDataType::string_datatype(), )) ); diff --git a/src/datatypes/src/vectors/helper.rs b/src/datatypes/src/vectors/helper.rs index cb8c8972a7..4e23d56809 100644 --- a/src/datatypes/src/vectors/helper.rs +++ b/src/datatypes/src/vectors/helper.rs @@ -20,7 +20,7 @@ use std::sync::Arc; use arrow::array::{Array, ArrayRef, StringArray}; use arrow::compute; use arrow::compute::kernels::comparison; -use arrow::datatypes::{DataType as ArrowDataType, Int32Type, TimeUnit}; +use arrow::datatypes::{DataType as ArrowDataType, Int64Type, TimeUnit}; use arrow_array::DictionaryArray; use arrow_schema::IntervalUnit; use datafusion_common::ScalarValue; @@ -348,11 +348,11 @@ impl Helper { ArrowDataType::Decimal128(_, _) => { Arc::new(Decimal128Vector::try_from_arrow_array(array)?) } - ArrowDataType::Dictionary(key, value) if matches!(&**key, ArrowDataType::Int32) => { + ArrowDataType::Dictionary(key, value) if matches!(&**key, ArrowDataType::Int64) => { let array = array .as_ref() .as_any() - .downcast_ref::>() + .downcast_ref::>() .unwrap(); // Safety: the type is guarded by match arm condition Arc::new(DictionaryVector::new( array.clone(), diff --git a/src/datatypes/src/vectors/null.rs b/src/datatypes/src/vectors/null.rs index 292e2c5e33..e745ee13d6 100644 --- a/src/datatypes/src/vectors/null.rs +++ b/src/datatypes/src/vectors/null.rs @@ -120,9 +120,7 @@ impl fmt::Debug for NullVector { impl Serializable for NullVector { fn serialize_to_json(&self) -> Result> { - Ok(std::iter::repeat(serde_json::Value::Null) - .take(self.len()) - .collect()) + Ok(std::iter::repeat_n(serde_json::Value::Null, self.len()).collect()) } } diff --git a/src/datatypes/src/vectors/primitive.rs b/src/datatypes/src/vectors/primitive.rs index 7b059e0d07..f3e49183f5 100644 --- a/src/datatypes/src/vectors/primitive.rs +++ b/src/datatypes/src/vectors/primitive.rs @@ -388,7 +388,7 @@ pub(crate) fn replicate_primitive( // Safety: std::iter::Repeat and std::iter::Take implement TrustedLen. builder .mutable_array - .append_trusted_len_iter(std::iter::repeat(data).take(repeat_times)); + .append_trusted_len_iter(std::iter::repeat_n(data, repeat_times)); } } None => { diff --git a/src/flow/src/batching_mode.rs b/src/flow/src/batching_mode.rs index 152ad5781c..031c7aad4b 100644 --- a/src/flow/src/batching_mode.rs +++ b/src/flow/src/batching_mode.rs @@ -32,3 +32,9 @@ pub const SLOW_QUERY_THRESHOLD: Duration = Duration::from_secs(60); /// The minimum duration between two queries execution by batching mode task const MIN_REFRESH_DURATION: Duration = Duration::new(5, 0); + +/// Grpc connection timeout +const GRPC_CONN_TIMEOUT: Duration = Duration::from_secs(5); + +/// Grpc max retry number +const GRPC_MAX_RETRIES: u32 = 3; diff --git a/src/flow/src/batching_mode/frontend_client.rs b/src/flow/src/batching_mode/frontend_client.rs index 2454e86251..9f16ea07fa 100644 --- a/src/flow/src/batching_mode/frontend_client.rs +++ b/src/flow/src/batching_mode/frontend_client.rs @@ -25,12 +25,15 @@ use common_meta::cluster::{NodeInfo, NodeInfoKey, Role}; use common_meta::peer::Peer; use common_meta::rpc::store::RangeRequest; use common_query::Output; +use common_telemetry::warn; use meta_client::client::MetaClient; use servers::query_handler::grpc::GrpcQueryHandler; use session::context::{QueryContextBuilder, QueryContextRef}; use snafu::{OptionExt, ResultExt}; -use crate::batching_mode::DEFAULT_BATCHING_ENGINE_QUERY_TIMEOUT; +use crate::batching_mode::{ + DEFAULT_BATCHING_ENGINE_QUERY_TIMEOUT, GRPC_CONN_TIMEOUT, GRPC_MAX_RETRIES, +}; use crate::error::{ExternalSnafu, InvalidRequestSnafu, UnexpectedSnafu}; use crate::Error; @@ -99,7 +102,9 @@ impl FrontendClient { Self::Distributed { meta_client, chnl_mgr: { - let cfg = ChannelConfig::new().timeout(DEFAULT_BATCHING_ENGINE_QUERY_TIMEOUT); + let cfg = ChannelConfig::new() + .connect_timeout(GRPC_CONN_TIMEOUT) + .timeout(DEFAULT_BATCHING_ENGINE_QUERY_TIMEOUT); ChannelManager::with_config(cfg) }, } @@ -223,12 +228,32 @@ impl FrontendClient { peer: db.peer.clone(), }); - db.database - .handle(req.clone()) - .await - .with_context(|_| InvalidRequestSnafu { - context: format!("Failed to handle request: {:?}", req), - }) + let mut retry = 0; + + loop { + let ret = db.database.handle(req.clone()).await.with_context(|_| { + InvalidRequestSnafu { + context: format!("Failed to handle request: {:?}", req), + } + }); + if let Err(err) = ret { + if retry < GRPC_MAX_RETRIES { + retry += 1; + warn!( + "Failed to send request to grpc handle at Peer={:?}, retry = {}, error = {:?}", + db.peer, retry, err + ); + continue; + } else { + common_telemetry::error!( + "Failed to send request to grpc handle at Peer={:?} after {} retries, error = {:?}", + db.peer, retry, err + ); + return Err(err); + } + } + return ret; + } } FrontendClient::Standalone { database_client } => { let ctx = QueryContextBuilder::default() diff --git a/src/flow/src/batching_mode/task.rs b/src/flow/src/batching_mode/task.rs index 1547faae11..bb1f296c90 100644 --- a/src/flow/src/batching_mode/task.rs +++ b/src/flow/src/batching_mode/task.rs @@ -53,6 +53,7 @@ use crate::batching_mode::utils::{ use crate::batching_mode::{ DEFAULT_BATCHING_ENGINE_QUERY_TIMEOUT, MIN_REFRESH_DURATION, SLOW_QUERY_THRESHOLD, }; +use crate::df_optimizer::apply_df_optimizer; use crate::error::{ ConvertColumnSchemaSnafu, DatafusionSnafu, ExternalSnafu, InvalidQuerySnafu, SubstraitEncodeLogicalPlanSnafu, UnexpectedSnafu, @@ -541,7 +542,10 @@ impl BatchingTask { .clone() .rewrite(&mut add_auto_column) .with_context(|_| DatafusionSnafu { - context: format!("Failed to rewrite plan {:?}", self.config.plan), + context: format!( + "Failed to rewrite plan:\n {}\n", + self.config.plan + ), })? .data; let schema_len = plan.schema().fields().len(); @@ -573,16 +577,19 @@ impl BatchingTask { let mut add_filter = AddFilterRewriter::new(expr); let mut add_auto_column = AddAutoColumnRewriter::new(sink_table_schema.clone()); - // make a not optimized plan for clearer unparse + let plan = sql_to_df_plan(query_ctx.clone(), engine.clone(), &self.config.query, false) .await?; - plan.clone() + let rewrite = plan + .clone() .rewrite(&mut add_filter) .and_then(|p| p.data.rewrite(&mut add_auto_column)) .with_context(|_| DatafusionSnafu { - context: format!("Failed to rewrite plan {plan:?}"), + context: format!("Failed to rewrite plan:\n {}\n", plan), })? - .data + .data; + // only apply optimize after complex rewrite is done + apply_df_optimizer(rewrite).await? }; Ok(Some((new_plan, schema_len))) diff --git a/src/flow/src/batching_mode/time_window.rs b/src/flow/src/batching_mode/time_window.rs index e6a0d6ad8c..398250fc8b 100644 --- a/src/flow/src/batching_mode/time_window.rs +++ b/src/flow/src/batching_mode/time_window.rs @@ -704,6 +704,28 @@ mod test { ), "SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('2025-02-24 10:48:00' AS TIMESTAMP)) AND (ts <= CAST('2025-02-24 10:49:00' AS TIMESTAMP))) GROUP BY arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)')" ), + // complex time window index with where + ( + "SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS time_window FROM numbers_with_ts WHERE number in (2, 3, 4) GROUP BY time_window;", + Timestamp::new(1740394109, TimeUnit::Second), + ( + "ts".to_string(), + Some(Timestamp::new(1740394080, TimeUnit::Second)), + Some(Timestamp::new(1740394140, TimeUnit::Second)), + ), + "SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS time_window FROM numbers_with_ts WHERE numbers_with_ts.number IN (2, 3, 4) AND ((ts >= CAST('2025-02-24 10:48:00' AS TIMESTAMP)) AND (ts <= CAST('2025-02-24 10:49:00' AS TIMESTAMP))) GROUP BY arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)')" + ), + // complex time window index with between and + ( + "SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS time_window FROM numbers_with_ts WHERE number BETWEEN 2 AND 4 GROUP BY time_window;", + Timestamp::new(1740394109, TimeUnit::Second), + ( + "ts".to_string(), + Some(Timestamp::new(1740394080, TimeUnit::Second)), + Some(Timestamp::new(1740394140, TimeUnit::Second)), + ), + "SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS time_window FROM numbers_with_ts WHERE (numbers_with_ts.number BETWEEN 2 AND 4) AND ((ts >= CAST('2025-02-24 10:48:00' AS TIMESTAMP)) AND (ts <= CAST('2025-02-24 10:49:00' AS TIMESTAMP))) GROUP BY arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)')" + ), // no time index ( "SELECT date_bin('5 minutes', ts) FROM numbers_with_ts;", diff --git a/src/flow/src/batching_mode/utils.rs b/src/flow/src/batching_mode/utils.rs index 7aa6a8b12f..117db03665 100644 --- a/src/flow/src/batching_mode/utils.rs +++ b/src/flow/src/batching_mode/utils.rs @@ -342,8 +342,8 @@ impl TreeNodeRewriter for AddAutoColumnRewriter { } } else { return Err(DataFusionError::Plan(format!( - "Expect table have 0,1 or 2 columns more than query columns, found {} query columns {:?}, {} table columns {:?} at node {:?}", - query_col_cnt, exprs, table_col_cnt, self.schema.column_schemas(), node + "Expect table have 0,1 or 2 columns more than query columns, found {} query columns {:?}, {} table columns {:?}", + query_col_cnt, exprs, table_col_cnt, self.schema.column_schemas() ))); } @@ -406,7 +406,9 @@ mod test { use datatypes::prelude::ConcreteDataType; use datatypes::schema::{ColumnSchema, Schema}; use pretty_assertions::assert_eq; + use query::query_engine::DefaultSerializer; use session::context::QueryContext; + use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan}; use super::*; use crate::test_utils::create_test_query_engine; @@ -701,4 +703,18 @@ mod test { ); } } + + #[tokio::test] + async fn test_null_cast() { + let query_engine = create_test_query_engine(); + let ctx = QueryContext::arc(); + let sql = "SELECT NULL::DOUBLE FROM numbers_with_ts"; + let plan = sql_to_df_plan(ctx, query_engine.clone(), sql, false) + .await + .unwrap(); + + let _sub_plan = DFLogicalSubstraitConvertor {} + .encode(&plan, DefaultSerializer) + .unwrap(); + } } diff --git a/src/flow/src/df_optimizer.rs b/src/flow/src/df_optimizer.rs index d83bb77718..bef5b3ed79 100644 --- a/src/flow/src/df_optimizer.rs +++ b/src/flow/src/df_optimizer.rs @@ -25,7 +25,6 @@ use datafusion::config::ConfigOptions; use datafusion::error::DataFusionError; use datafusion::functions_aggregate::count::count_udaf; use datafusion::functions_aggregate::sum::sum_udaf; -use datafusion::optimizer::analyzer::count_wildcard_rule::CountWildcardRule; use datafusion::optimizer::analyzer::type_coercion::TypeCoercion; use datafusion::optimizer::common_subexpr_eliminate::CommonSubexprEliminate; use datafusion::optimizer::optimize_projections::OptimizeProjections; @@ -42,6 +41,7 @@ use datafusion_expr::{ BinaryExpr, ColumnarValue, Expr, Operator, Projection, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, Volatility, }; +use query::optimizer::count_wildcard::CountWildcardToTimeIndexRule; use query::parser::QueryLanguageParser; use query::query_engine::DefaultSerializer; use query::QueryEngine; @@ -61,9 +61,9 @@ pub async fn apply_df_optimizer( ) -> Result { let cfg = ConfigOptions::new(); let analyzer = Analyzer::with_rules(vec![ - Arc::new(CountWildcardRule::new()), - Arc::new(AvgExpandRule::new()), - Arc::new(TumbleExpandRule::new()), + Arc::new(CountWildcardToTimeIndexRule), + Arc::new(AvgExpandRule), + Arc::new(TumbleExpandRule), Arc::new(CheckGroupByRule::new()), Arc::new(TypeCoercion::new()), ]); @@ -128,13 +128,7 @@ pub async fn sql_to_flow_plan( } #[derive(Debug)] -struct AvgExpandRule {} - -impl AvgExpandRule { - pub fn new() -> Self { - Self {} - } -} +struct AvgExpandRule; impl AnalyzerRule for AvgExpandRule { fn analyze( @@ -331,13 +325,7 @@ impl TreeNodeRewriter for ExpandAvgRewriter<'_> { /// expand tumble in aggr expr to tumble_start and tumble_end with column name like `window_start` #[derive(Debug)] -struct TumbleExpandRule {} - -impl TumbleExpandRule { - pub fn new() -> Self { - Self {} - } -} +struct TumbleExpandRule; impl AnalyzerRule for TumbleExpandRule { fn analyze( diff --git a/src/index/src/fulltext_index/tokenizer.rs b/src/index/src/fulltext_index/tokenizer.rs index b00e7fda9c..54aa33edc8 100644 --- a/src/index/src/fulltext_index/tokenizer.rs +++ b/src/index/src/fulltext_index/tokenizer.rs @@ -46,7 +46,11 @@ pub struct ChineseTokenizer; impl Tokenizer for ChineseTokenizer { fn tokenize<'a>(&self, text: &'a str) -> Vec<&'a str> { - JIEBA.cut(text, false) + if text.is_ascii() { + EnglishTokenizer {}.tokenize(text) + } else { + JIEBA.cut(text, false) + } } } diff --git a/src/index/src/inverted_index/create/sort/external_sort.rs b/src/index/src/inverted_index/create/sort/external_sort.rs index e8f67b7b7b..3b4eaebc5c 100644 --- a/src/index/src/inverted_index/create/sort/external_sort.rs +++ b/src/index/src/inverted_index/create/sort/external_sort.rs @@ -481,7 +481,7 @@ mod tests { let mock_values = dic_values .iter() - .flat_map(|(value, size)| iter::repeat(value.clone()).take(*size)) + .flat_map(|(value, size)| std::iter::repeat_n(value.clone(), *size)) .collect::>(); let sorted_result = sorted_result(&mock_values, segment_row_count); diff --git a/src/meta-srv/src/bootstrap.rs b/src/meta-srv/src/bootstrap.rs index 3b27295301..40e41bb815 100644 --- a/src/meta-srv/src/bootstrap.rs +++ b/src/meta-srv/src/bootstrap.rs @@ -66,10 +66,12 @@ use crate::election::postgres::PgElection; #[cfg(any(feature = "pg_kvbackend", feature = "mysql_kvbackend"))] use crate::election::CANDIDATE_LEASE_SECS; use crate::metasrv::builder::MetasrvBuilder; -use crate::metasrv::{BackendImpl, Metasrv, MetasrvOptions, SelectorRef}; +use crate::metasrv::{BackendImpl, Metasrv, MetasrvOptions, SelectTarget, SelectorRef}; +use crate::node_excluder::NodeExcluderRef; use crate::selector::lease_based::LeaseBasedSelector; use crate::selector::load_based::LoadBasedSelector; use crate::selector::round_robin::RoundRobinSelector; +use crate::selector::weight_compute::RegionNumsBasedWeightCompute; use crate::selector::SelectorType; use crate::service::admin; use crate::{error, Result}; @@ -294,14 +296,25 @@ pub async fn metasrv_builder( let in_memory = Arc::new(MemoryKvBackend::new()) as ResettableKvBackendRef; + let node_excluder = plugins + .get::() + .unwrap_or_else(|| Arc::new(Vec::new()) as NodeExcluderRef); let selector = if let Some(selector) = plugins.get::() { info!("Using selector from plugins"); selector } else { let selector = match opts.selector { - SelectorType::LoadBased => Arc::new(LoadBasedSelector::default()) as SelectorRef, - SelectorType::LeaseBased => Arc::new(LeaseBasedSelector) as SelectorRef, - SelectorType::RoundRobin => Arc::new(RoundRobinSelector::default()) as SelectorRef, + SelectorType::LoadBased => Arc::new(LoadBasedSelector::new( + RegionNumsBasedWeightCompute, + node_excluder, + )) as SelectorRef, + SelectorType::LeaseBased => { + Arc::new(LeaseBasedSelector::new(node_excluder)) as SelectorRef + } + SelectorType::RoundRobin => Arc::new(RoundRobinSelector::new( + SelectTarget::Datanode, + node_excluder, + )) as SelectorRef, }; info!( "Using selector from options, selector type: {}", diff --git a/src/meta-srv/src/lib.rs b/src/meta-srv/src/lib.rs index ebd3b7b54f..20b9285723 100644 --- a/src/meta-srv/src/lib.rs +++ b/src/meta-srv/src/lib.rs @@ -14,7 +14,6 @@ #![feature(result_flattening)] #![feature(assert_matches)] -#![feature(extract_if)] #![feature(hash_set_entry)] pub mod bootstrap; diff --git a/src/meta-srv/src/metasrv.rs b/src/meta-srv/src/metasrv.rs index 34b3cac25e..6c9111dd9c 100644 --- a/src/meta-srv/src/metasrv.rs +++ b/src/meta-srv/src/metasrv.rs @@ -111,6 +111,11 @@ pub struct MetasrvOptions { pub use_memory_store: bool, /// Whether to enable region failover. pub enable_region_failover: bool, + /// Whether to allow region failover on local WAL. + /// + /// If it's true, the region failover will be allowed even if the local WAL is used. + /// Note that this option is not recommended to be set to true, because it may lead to data loss during failover. + pub allow_region_failover_on_local_wal: bool, /// The HTTP server options. pub http: HttpOptions, /// The logging options. @@ -173,6 +178,7 @@ impl Default for MetasrvOptions { selector: SelectorType::default(), use_memory_store: false, enable_region_failover: false, + allow_region_failover_on_local_wal: false, http: HttpOptions::default(), logging: LoggingOptions { dir: format!("{METASRV_HOME}/logs"), diff --git a/src/meta-srv/src/metasrv/builder.rs b/src/meta-srv/src/metasrv/builder.rs index ec8f6ef253..0c93e4e4c7 100644 --- a/src/meta-srv/src/metasrv/builder.rs +++ b/src/meta-srv/src/metasrv/builder.rs @@ -40,7 +40,8 @@ use common_meta::state_store::KvStateStore; use common_meta::wal_options_allocator::{build_kafka_client, build_wal_options_allocator}; use common_procedure::local::{LocalManager, ManagerConfig}; use common_procedure::ProcedureManagerRef; -use snafu::ResultExt; +use common_telemetry::warn; +use snafu::{ensure, ResultExt}; use crate::cache_invalidator::MetasrvCacheInvalidator; use crate::cluster::{MetaPeerClientBuilder, MetaPeerClientRef}; @@ -190,7 +191,7 @@ impl MetasrvBuilder { let meta_peer_client = meta_peer_client .unwrap_or_else(|| build_default_meta_peer_client(&election, &in_memory)); - let selector = selector.unwrap_or_else(|| Arc::new(LeaseBasedSelector)); + let selector = selector.unwrap_or_else(|| Arc::new(LeaseBasedSelector::default())); let pushers = Pushers::default(); let mailbox = build_mailbox(&kv_backend, &pushers); let procedure_manager = build_procedure_manager(&options, &kv_backend); @@ -234,13 +235,17 @@ impl MetasrvBuilder { )) }); + let flow_selector = Arc::new(RoundRobinSelector::new( + SelectTarget::Flownode, + Arc::new(Vec::new()), + )) as SelectorRef; + let flow_metadata_allocator = { // for now flownode just use round-robin selector - let flow_selector = RoundRobinSelector::new(SelectTarget::Flownode); let flow_selector_ctx = selector_ctx.clone(); let peer_allocator = Arc::new(FlowPeerAllocator::new( flow_selector_ctx, - Arc::new(flow_selector), + flow_selector.clone(), )); let seq = Arc::new( SequenceBuilder::new(FLOW_ID_SEQ, kv_backend.clone()) @@ -272,18 +277,25 @@ impl MetasrvBuilder { }, )); let peer_lookup_service = Arc::new(MetaPeerLookupService::new(meta_peer_client.clone())); + if !is_remote_wal && options.enable_region_failover { - return error::UnexpectedSnafu { - violated: "Region failover is not supported in the local WAL implementation!", + ensure!( + options.allow_region_failover_on_local_wal, + error::UnexpectedSnafu { + violated: "Region failover is not supported in the local WAL implementation! + If you want to enable region failover for local WAL, please set `allow_region_failover_on_local_wal` to true.", + } + ); + if options.allow_region_failover_on_local_wal { + warn!("Region failover is force enabled in the local WAL implementation! This may lead to data loss during failover!"); } - .fail(); } let (tx, rx) = RegionSupervisor::channel(); let (region_failure_detector_controller, region_supervisor_ticker): ( RegionFailureDetectorControllerRef, Option>, - ) = if options.enable_region_failover && is_remote_wal { + ) = if options.enable_region_failover { ( Arc::new(RegionFailureDetectorControl::new(tx.clone())) as _, Some(Arc::new(RegionSupervisorTicker::new( @@ -309,7 +321,7 @@ impl MetasrvBuilder { )); region_migration_manager.try_start()?; - let region_failover_handler = if options.enable_region_failover && is_remote_wal { + let region_failover_handler = if options.enable_region_failover { let region_supervisor = RegionSupervisor::new( rx, options.failure_detector, @@ -420,7 +432,7 @@ impl MetasrvBuilder { meta_peer_client: meta_peer_client.clone(), selector, // TODO(jeremy): We do not allow configuring the flow selector. - flow_selector: Arc::new(RoundRobinSelector::new(SelectTarget::Flownode)), + flow_selector, handler_group: RwLock::new(None), handler_group_builder: Mutex::new(Some(handler_group_builder)), election, diff --git a/src/meta-srv/src/metrics.rs b/src/meta-srv/src/metrics.rs index ffbe986d72..2984a91a1c 100644 --- a/src/meta-srv/src/metrics.rs +++ b/src/meta-srv/src/metrics.rs @@ -71,4 +71,13 @@ lazy_static! { /// The remote WAL prune execute counter. pub static ref METRIC_META_REMOTE_WAL_PRUNE_EXECUTE: IntCounterVec = register_int_counter_vec!("greptime_meta_remote_wal_prune_execute", "meta remote wal prune execute", &["topic_name"]).unwrap(); + /// The migration stage elapsed histogram. + pub static ref METRIC_META_REGION_MIGRATION_STAGE_ELAPSED: HistogramVec = register_histogram_vec!( + "greptime_meta_region_migration_stage_elapsed", + "meta region migration stage elapsed", + &["stage"], + // 0.01 ~ 1000 + exponential_buckets(0.01, 10.0, 7).unwrap(), + ) + .unwrap(); } diff --git a/src/meta-srv/src/mocks.rs b/src/meta-srv/src/mocks.rs index 656ceeb3d9..29ed3a5ae8 100644 --- a/src/meta-srv/src/mocks.rs +++ b/src/meta-srv/src/mocks.rs @@ -141,10 +141,7 @@ pub async fn mock( if let Some(client) = client { Ok(TokioIo::new(client)) } else { - Err(std::io::Error::new( - std::io::ErrorKind::Other, - "Client already taken", - )) + Err(std::io::Error::other("Client already taken")) } } }), diff --git a/src/meta-srv/src/node_excluder.rs b/src/meta-srv/src/node_excluder.rs index f9e892f092..a7bc6e0f69 100644 --- a/src/meta-srv/src/node_excluder.rs +++ b/src/meta-srv/src/node_excluder.rs @@ -24,3 +24,9 @@ pub trait NodeExcluder: Send + Sync { /// Returns the excluded datanode ids. fn excluded_datanode_ids(&self) -> &Vec; } + +impl NodeExcluder for Vec { + fn excluded_datanode_ids(&self) -> &Vec { + self + } +} diff --git a/src/meta-srv/src/procedure/region_migration.rs b/src/meta-srv/src/procedure/region_migration.rs index b2f1eed711..43b444d3b1 100644 --- a/src/meta-srv/src/procedure/region_migration.rs +++ b/src/meta-srv/src/procedure/region_migration.rs @@ -25,7 +25,7 @@ pub(crate) mod update_metadata; pub(crate) mod upgrade_candidate_region; use std::any::Any; -use std::fmt::Debug; +use std::fmt::{Debug, Display}; use std::time::Duration; use common_error::ext::BoxedError; @@ -43,7 +43,7 @@ use common_procedure::error::{ Error as ProcedureError, FromJsonSnafu, Result as ProcedureResult, ToJsonSnafu, }; use common_procedure::{Context as ProcedureContext, LockKey, Procedure, Status, StringKey}; -use common_telemetry::info; +use common_telemetry::{error, info}; use manager::RegionMigrationProcedureGuard; pub use manager::{ RegionMigrationManagerRef, RegionMigrationProcedureTask, RegionMigrationProcedureTracker, @@ -55,7 +55,10 @@ use tokio::time::Instant; use self::migration_start::RegionMigrationStart; use crate::error::{self, Result}; -use crate::metrics::{METRIC_META_REGION_MIGRATION_ERROR, METRIC_META_REGION_MIGRATION_EXECUTE}; +use crate::metrics::{ + METRIC_META_REGION_MIGRATION_ERROR, METRIC_META_REGION_MIGRATION_EXECUTE, + METRIC_META_REGION_MIGRATION_STAGE_ELAPSED, +}; use crate::service::mailbox::MailboxRef; /// The default timeout for region migration. @@ -103,6 +106,82 @@ impl PersistentContext { } } +/// Metrics of region migration. +#[derive(Debug, Clone, Default)] +pub struct Metrics { + /// Elapsed time of downgrading region and upgrading region. + operations_elapsed: Duration, + /// Elapsed time of downgrading leader region. + downgrade_leader_region_elapsed: Duration, + /// Elapsed time of open candidate region. + open_candidate_region_elapsed: Duration, + /// Elapsed time of upgrade candidate region. + upgrade_candidate_region_elapsed: Duration, +} + +impl Display for Metrics { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "operations_elapsed: {:?}, downgrade_leader_region_elapsed: {:?}, open_candidate_region_elapsed: {:?}, upgrade_candidate_region_elapsed: {:?}", + self.operations_elapsed, + self.downgrade_leader_region_elapsed, + self.open_candidate_region_elapsed, + self.upgrade_candidate_region_elapsed + ) + } +} + +impl Metrics { + /// Updates the elapsed time of downgrading region and upgrading region. + pub fn update_operations_elapsed(&mut self, elapsed: Duration) { + self.operations_elapsed += elapsed; + } + + /// Updates the elapsed time of downgrading leader region. + pub fn update_downgrade_leader_region_elapsed(&mut self, elapsed: Duration) { + self.downgrade_leader_region_elapsed += elapsed; + } + + /// Updates the elapsed time of open candidate region. + pub fn update_open_candidate_region_elapsed(&mut self, elapsed: Duration) { + self.open_candidate_region_elapsed += elapsed; + } + + /// Updates the elapsed time of upgrade candidate region. + pub fn update_upgrade_candidate_region_elapsed(&mut self, elapsed: Duration) { + self.upgrade_candidate_region_elapsed += elapsed; + } +} + +impl Drop for Metrics { + fn drop(&mut self) { + if !self.operations_elapsed.is_zero() { + METRIC_META_REGION_MIGRATION_STAGE_ELAPSED + .with_label_values(&["operations"]) + .observe(self.operations_elapsed.as_secs_f64()); + } + + if !self.downgrade_leader_region_elapsed.is_zero() { + METRIC_META_REGION_MIGRATION_STAGE_ELAPSED + .with_label_values(&["downgrade_leader_region"]) + .observe(self.downgrade_leader_region_elapsed.as_secs_f64()); + } + + if !self.open_candidate_region_elapsed.is_zero() { + METRIC_META_REGION_MIGRATION_STAGE_ELAPSED + .with_label_values(&["open_candidate_region"]) + .observe(self.open_candidate_region_elapsed.as_secs_f64()); + } + + if !self.upgrade_candidate_region_elapsed.is_zero() { + METRIC_META_REGION_MIGRATION_STAGE_ELAPSED + .with_label_values(&["upgrade_candidate_region"]) + .observe(self.upgrade_candidate_region_elapsed.as_secs_f64()); + } + } +} + /// It's shared in each step and available in executing (including retrying). /// /// It will be dropped if the procedure runner crashes. @@ -132,8 +211,8 @@ pub struct VolatileContext { leader_region_last_entry_id: Option, /// The last_entry_id of leader metadata region (Only used for metric engine). leader_region_metadata_last_entry_id: Option, - /// Elapsed time of downgrading region and upgrading region. - operations_elapsed: Duration, + /// Metrics of region migration. + metrics: Metrics, } impl VolatileContext { @@ -231,12 +310,35 @@ impl Context { pub fn next_operation_timeout(&self) -> Option { self.persistent_ctx .timeout - .checked_sub(self.volatile_ctx.operations_elapsed) + .checked_sub(self.volatile_ctx.metrics.operations_elapsed) } /// Updates operations elapsed. pub fn update_operations_elapsed(&mut self, instant: Instant) { - self.volatile_ctx.operations_elapsed += instant.elapsed(); + self.volatile_ctx + .metrics + .update_operations_elapsed(instant.elapsed()); + } + + /// Updates the elapsed time of downgrading leader region. + pub fn update_downgrade_leader_region_elapsed(&mut self, instant: Instant) { + self.volatile_ctx + .metrics + .update_downgrade_leader_region_elapsed(instant.elapsed()); + } + + /// Updates the elapsed time of open candidate region. + pub fn update_open_candidate_region_elapsed(&mut self, instant: Instant) { + self.volatile_ctx + .metrics + .update_open_candidate_region_elapsed(instant.elapsed()); + } + + /// Updates the elapsed time of upgrade candidate region. + pub fn update_upgrade_candidate_region_elapsed(&mut self, instant: Instant) { + self.volatile_ctx + .metrics + .update_upgrade_candidate_region_elapsed(instant.elapsed()); } /// Returns address of meta server. @@ -550,6 +652,14 @@ impl Procedure for RegionMigrationProcedure { .inc(); ProcedureError::retry_later(e) } else { + error!( + e; + "Region migration procedure failed, region_id: {}, from_peer: {}, to_peer: {}, {}", + self.context.region_id(), + self.context.persistent_ctx.from_peer, + self.context.persistent_ctx.to_peer, + self.context.volatile_ctx.metrics, + ); METRIC_META_REGION_MIGRATION_ERROR .with_label_values(&[name, "external"]) .inc(); diff --git a/src/meta-srv/src/procedure/region_migration/close_downgraded_region.rs b/src/meta-srv/src/procedure/region_migration/close_downgraded_region.rs index 94256ba5ec..ba13f7cdea 100644 --- a/src/meta-srv/src/procedure/region_migration/close_downgraded_region.rs +++ b/src/meta-srv/src/procedure/region_migration/close_downgraded_region.rs @@ -46,7 +46,13 @@ impl State for CloseDowngradedRegion { let region_id = ctx.region_id(); warn!(err; "Failed to close downgraded leader region: {region_id} on datanode {:?}", downgrade_leader_datanode); } - + info!( + "Region migration is finished: region_id: {}, from_peer: {}, to_peer: {}, {}", + ctx.region_id(), + ctx.persistent_ctx.from_peer, + ctx.persistent_ctx.to_peer, + ctx.volatile_ctx.metrics, + ); Ok((Box::new(RegionMigrationEnd), Status::done())) } diff --git a/src/meta-srv/src/procedure/region_migration/downgrade_leader_region.rs b/src/meta-srv/src/procedure/region_migration/downgrade_leader_region.rs index 02b7216fe7..93481adc54 100644 --- a/src/meta-srv/src/procedure/region_migration/downgrade_leader_region.rs +++ b/src/meta-srv/src/procedure/region_migration/downgrade_leader_region.rs @@ -54,6 +54,7 @@ impl Default for DowngradeLeaderRegion { #[typetag::serde] impl State for DowngradeLeaderRegion { async fn next(&mut self, ctx: &mut Context) -> Result<(Box, Status)> { + let now = Instant::now(); // Ensures the `leader_region_lease_deadline` must exist after recovering. ctx.volatile_ctx .set_leader_region_lease_deadline(Duration::from_secs(REGION_LEASE_SECS)); @@ -77,6 +78,7 @@ impl State for DowngradeLeaderRegion { } } } + ctx.update_downgrade_leader_region_elapsed(now); Ok(( Box::new(UpgradeCandidateRegion::default()), @@ -348,7 +350,8 @@ mod tests { let env = TestingEnv::new(); let mut ctx = env.context_factory().new_context(persistent_context); prepare_table_metadata(&ctx, HashMap::default()).await; - ctx.volatile_ctx.operations_elapsed = ctx.persistent_ctx.timeout + Duration::from_secs(1); + ctx.volatile_ctx.metrics.operations_elapsed = + ctx.persistent_ctx.timeout + Duration::from_secs(1); let err = state.downgrade_region(&mut ctx).await.unwrap_err(); @@ -591,7 +594,8 @@ mod tests { let mut ctx = env.context_factory().new_context(persistent_context); let mailbox_ctx = env.mailbox_context(); let mailbox = mailbox_ctx.mailbox().clone(); - ctx.volatile_ctx.operations_elapsed = ctx.persistent_ctx.timeout + Duration::from_secs(1); + ctx.volatile_ctx.metrics.operations_elapsed = + ctx.persistent_ctx.timeout + Duration::from_secs(1); let (tx, rx) = tokio::sync::mpsc::channel(1); mailbox_ctx diff --git a/src/meta-srv/src/procedure/region_migration/migration_abort.rs b/src/meta-srv/src/procedure/region_migration/migration_abort.rs index af56843045..d364f0c8b9 100644 --- a/src/meta-srv/src/procedure/region_migration/migration_abort.rs +++ b/src/meta-srv/src/procedure/region_migration/migration_abort.rs @@ -15,6 +15,7 @@ use std::any::Any; use common_procedure::Status; +use common_telemetry::warn; use serde::{Deserialize, Serialize}; use crate::error::{self, Result}; @@ -37,7 +38,15 @@ impl RegionMigrationAbort { #[async_trait::async_trait] #[typetag::serde] impl State for RegionMigrationAbort { - async fn next(&mut self, _: &mut Context) -> Result<(Box, Status)> { + async fn next(&mut self, ctx: &mut Context) -> Result<(Box, Status)> { + warn!( + "Region migration is aborted: {}, region_id: {}, from_peer: {}, to_peer: {}, {}", + self.reason, + ctx.region_id(), + ctx.persistent_ctx.from_peer, + ctx.persistent_ctx.to_peer, + ctx.volatile_ctx.metrics, + ); error::MigrationAbortSnafu { reason: &self.reason, } diff --git a/src/meta-srv/src/procedure/region_migration/open_candidate_region.rs b/src/meta-srv/src/procedure/region_migration/open_candidate_region.rs index 6cacf75063..6d1c81d3ed 100644 --- a/src/meta-srv/src/procedure/region_migration/open_candidate_region.rs +++ b/src/meta-srv/src/procedure/region_migration/open_candidate_region.rs @@ -13,7 +13,7 @@ // limitations under the License. use std::any::Any; -use std::time::{Duration, Instant}; +use std::time::Duration; use api::v1::meta::MailboxMessage; use common_meta::distributed_time_constants::REGION_LEASE_SECS; @@ -24,6 +24,7 @@ use common_procedure::Status; use common_telemetry::info; use serde::{Deserialize, Serialize}; use snafu::{OptionExt, ResultExt}; +use tokio::time::Instant; use crate::error::{self, Result}; use crate::handler::HeartbeatMailbox; @@ -42,7 +43,9 @@ pub struct OpenCandidateRegion; impl State for OpenCandidateRegion { async fn next(&mut self, ctx: &mut Context) -> Result<(Box, Status)> { let instruction = self.build_open_region_instruction(ctx).await?; + let now = Instant::now(); self.open_candidate_region(ctx, instruction).await?; + ctx.update_open_candidate_region_elapsed(now); Ok(( Box::new(UpdateMetadata::Downgrade), diff --git a/src/meta-srv/src/procedure/region_migration/upgrade_candidate_region.rs b/src/meta-srv/src/procedure/region_migration/upgrade_candidate_region.rs index 552b9d3863..8f3741dbac 100644 --- a/src/meta-srv/src/procedure/region_migration/upgrade_candidate_region.rs +++ b/src/meta-srv/src/procedure/region_migration/upgrade_candidate_region.rs @@ -54,9 +54,12 @@ impl Default for UpgradeCandidateRegion { #[typetag::serde] impl State for UpgradeCandidateRegion { async fn next(&mut self, ctx: &mut Context) -> Result<(Box, Status)> { + let now = Instant::now(); if self.upgrade_region_with_retry(ctx).await { + ctx.update_upgrade_candidate_region_elapsed(now); Ok((Box::new(UpdateMetadata::Upgrade), Status::executing(false))) } else { + ctx.update_upgrade_candidate_region_elapsed(now); Ok((Box::new(UpdateMetadata::Rollback), Status::executing(false))) } } @@ -288,7 +291,8 @@ mod tests { let persistent_context = new_persistent_context(); let env = TestingEnv::new(); let mut ctx = env.context_factory().new_context(persistent_context); - ctx.volatile_ctx.operations_elapsed = ctx.persistent_ctx.timeout + Duration::from_secs(1); + ctx.volatile_ctx.metrics.operations_elapsed = + ctx.persistent_ctx.timeout + Duration::from_secs(1); let err = state.upgrade_region(&ctx).await.unwrap_err(); @@ -558,7 +562,8 @@ mod tests { let mut ctx = env.context_factory().new_context(persistent_context); let mailbox_ctx = env.mailbox_context(); let mailbox = mailbox_ctx.mailbox().clone(); - ctx.volatile_ctx.operations_elapsed = ctx.persistent_ctx.timeout + Duration::from_secs(1); + ctx.volatile_ctx.metrics.operations_elapsed = + ctx.persistent_ctx.timeout + Duration::from_secs(1); let (tx, rx) = tokio::sync::mpsc::channel(1); mailbox_ctx diff --git a/src/meta-srv/src/selector.rs b/src/meta-srv/src/selector.rs index ce166ae05c..96fbda241d 100644 --- a/src/meta-srv/src/selector.rs +++ b/src/meta-srv/src/selector.rs @@ -18,7 +18,7 @@ pub mod load_based; pub mod round_robin; #[cfg(test)] pub(crate) mod test_utils; -mod weight_compute; +pub mod weight_compute; pub mod weighted_choose; use std::collections::HashSet; diff --git a/src/meta-srv/src/selector/lease_based.rs b/src/meta-srv/src/selector/lease_based.rs index e60157989d..448c26b08e 100644 --- a/src/meta-srv/src/selector/lease_based.rs +++ b/src/meta-srv/src/selector/lease_based.rs @@ -12,17 +12,37 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashSet; +use std::sync::Arc; + use common_meta::peer::Peer; use crate::error::Result; use crate::lease; use crate::metasrv::SelectorContext; +use crate::node_excluder::NodeExcluderRef; use crate::selector::common::{choose_items, filter_out_excluded_peers}; use crate::selector::weighted_choose::{RandomWeightedChoose, WeightedItem}; use crate::selector::{Selector, SelectorOptions}; /// Select all alive datanodes based using a random weighted choose. -pub struct LeaseBasedSelector; +pub struct LeaseBasedSelector { + node_excluder: NodeExcluderRef, +} + +impl LeaseBasedSelector { + pub fn new(node_excluder: NodeExcluderRef) -> Self { + Self { node_excluder } + } +} + +impl Default for LeaseBasedSelector { + fn default() -> Self { + Self { + node_excluder: Arc::new(Vec::new()), + } + } +} #[async_trait::async_trait] impl Selector for LeaseBasedSelector { @@ -47,7 +67,14 @@ impl Selector for LeaseBasedSelector { .collect(); // 3. choose peers by weight_array. - filter_out_excluded_peers(&mut weight_array, &opts.exclude_peer_ids); + let mut exclude_peer_ids = self + .node_excluder + .excluded_datanode_ids() + .iter() + .cloned() + .collect::>(); + exclude_peer_ids.extend(opts.exclude_peer_ids.iter()); + filter_out_excluded_peers(&mut weight_array, &exclude_peer_ids); let mut weighted_choose = RandomWeightedChoose::new(weight_array); let selected = choose_items(&opts, &mut weighted_choose)?; diff --git a/src/meta-srv/src/selector/load_based.rs b/src/meta-srv/src/selector/load_based.rs index d98a4ace5d..4f33245a28 100644 --- a/src/meta-srv/src/selector/load_based.rs +++ b/src/meta-srv/src/selector/load_based.rs @@ -12,7 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; +use std::sync::Arc; use common_meta::datanode::{DatanodeStatKey, DatanodeStatValue}; use common_meta::key::TableMetadataManager; @@ -26,6 +27,7 @@ use crate::error::{self, Result}; use crate::key::{DatanodeLeaseKey, LeaseValue}; use crate::lease; use crate::metasrv::SelectorContext; +use crate::node_excluder::NodeExcluderRef; use crate::selector::common::{choose_items, filter_out_excluded_peers}; use crate::selector::weight_compute::{RegionNumsBasedWeightCompute, WeightCompute}; use crate::selector::weighted_choose::RandomWeightedChoose; @@ -33,11 +35,15 @@ use crate::selector::{Selector, SelectorOptions}; pub struct LoadBasedSelector { weight_compute: C, + node_excluder: NodeExcluderRef, } impl LoadBasedSelector { - pub fn new(weight_compute: C) -> Self { - Self { weight_compute } + pub fn new(weight_compute: C, node_excluder: NodeExcluderRef) -> Self { + Self { + weight_compute, + node_excluder, + } } } @@ -45,6 +51,7 @@ impl Default for LoadBasedSelector { fn default() -> Self { Self { weight_compute: RegionNumsBasedWeightCompute, + node_excluder: Arc::new(Vec::new()), } } } @@ -88,7 +95,14 @@ where let mut weight_array = self.weight_compute.compute(&stat_kvs); // 5. choose peers by weight_array. - filter_out_excluded_peers(&mut weight_array, &opts.exclude_peer_ids); + let mut exclude_peer_ids = self + .node_excluder + .excluded_datanode_ids() + .iter() + .cloned() + .collect::>(); + exclude_peer_ids.extend(opts.exclude_peer_ids.iter()); + filter_out_excluded_peers(&mut weight_array, &exclude_peer_ids); let mut weighted_choose = RandomWeightedChoose::new(weight_array); let selected = choose_items(&opts, &mut weighted_choose)?; diff --git a/src/meta-srv/src/selector/round_robin.rs b/src/meta-srv/src/selector/round_robin.rs index 2c849cb194..d930ca06cb 100644 --- a/src/meta-srv/src/selector/round_robin.rs +++ b/src/meta-srv/src/selector/round_robin.rs @@ -12,7 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashSet; use std::sync::atomic::AtomicUsize; +use std::sync::Arc; use common_meta::peer::Peer; use snafu::ensure; @@ -20,6 +22,7 @@ use snafu::ensure; use crate::error::{NoEnoughAvailableNodeSnafu, Result}; use crate::lease; use crate::metasrv::{SelectTarget, SelectorContext}; +use crate::node_excluder::NodeExcluderRef; use crate::selector::{Selector, SelectorOptions}; /// Round-robin selector that returns the next peer in the list in sequence. @@ -32,6 +35,7 @@ use crate::selector::{Selector, SelectorOptions}; pub struct RoundRobinSelector { select_target: SelectTarget, counter: AtomicUsize, + node_excluder: NodeExcluderRef, } impl Default for RoundRobinSelector { @@ -39,32 +43,38 @@ impl Default for RoundRobinSelector { Self { select_target: SelectTarget::Datanode, counter: AtomicUsize::new(0), + node_excluder: Arc::new(Vec::new()), } } } impl RoundRobinSelector { - pub fn new(select_target: SelectTarget) -> Self { + pub fn new(select_target: SelectTarget, node_excluder: NodeExcluderRef) -> Self { Self { select_target, + node_excluder, ..Default::default() } } - async fn get_peers( - &self, - min_required_items: usize, - ctx: &SelectorContext, - ) -> Result> { + async fn get_peers(&self, opts: &SelectorOptions, ctx: &SelectorContext) -> Result> { let mut peers = match self.select_target { SelectTarget::Datanode => { // 1. get alive datanodes. let lease_kvs = lease::alive_datanodes(&ctx.meta_peer_client, ctx.datanode_lease_secs).await?; + let mut exclude_peer_ids = self + .node_excluder + .excluded_datanode_ids() + .iter() + .cloned() + .collect::>(); + exclude_peer_ids.extend(opts.exclude_peer_ids.iter()); // 2. map into peers lease_kvs .into_iter() + .filter(|(k, _)| !exclude_peer_ids.contains(&k.node_id)) .map(|(k, v)| Peer::new(k.node_id, v.node_addr)) .collect::>() } @@ -84,8 +94,8 @@ impl RoundRobinSelector { ensure!( !peers.is_empty(), NoEnoughAvailableNodeSnafu { - required: min_required_items, - available: 0usize, + required: opts.min_required_items, + available: peers.len(), select_target: self.select_target } ); @@ -103,7 +113,7 @@ impl Selector for RoundRobinSelector { type Output = Vec; async fn select(&self, ctx: &Self::Context, opts: SelectorOptions) -> Result> { - let peers = self.get_peers(opts.min_required_items, ctx).await?; + let peers = self.get_peers(&opts, ctx).await?; // choose peers let mut selected = Vec::with_capacity(opts.min_required_items); for _ in 0..opts.min_required_items { @@ -176,4 +186,42 @@ mod test { assert_eq!(peers.len(), 2); assert_eq!(peers, vec![peer2.clone(), peer3.clone()]); } + + #[tokio::test] + async fn test_round_robin_selector_with_exclude_peer_ids() { + let selector = RoundRobinSelector::new(SelectTarget::Datanode, Arc::new(vec![5])); + let ctx = create_selector_context(); + // add three nodes + let peer1 = Peer { + id: 2, + addr: "node1".to_string(), + }; + let peer2 = Peer { + id: 5, + addr: "node2".to_string(), + }; + let peer3 = Peer { + id: 8, + addr: "node3".to_string(), + }; + put_datanodes( + &ctx.meta_peer_client, + vec![peer1.clone(), peer2.clone(), peer3.clone()], + ) + .await; + + let peers = selector + .select( + &ctx, + SelectorOptions { + min_required_items: 1, + allow_duplication: true, + exclude_peer_ids: HashSet::from([2]), + }, + ) + .await + .unwrap(); + assert_eq!(peers.len(), 1); + assert_eq!(peers, vec![peer3.clone()]); + } } diff --git a/src/meta-srv/src/service/store/cached_kv.rs b/src/meta-srv/src/service/store/cached_kv.rs index b26c2a558f..f86b42a9e2 100644 --- a/src/meta-srv/src/service/store/cached_kv.rs +++ b/src/meta-srv/src/service/store/cached_kv.rs @@ -278,7 +278,7 @@ impl KvBackend for LeaderCachedKvBackend { let remote_res = self.store.batch_get(remote_req).await?; let put_req = BatchPutRequest { - kvs: remote_res.kvs.clone().into_iter().map(Into::into).collect(), + kvs: remote_res.kvs.clone().into_iter().collect(), ..Default::default() }; let _ = self.cache.batch_put(put_req).await?; diff --git a/src/mito2/src/read/projection.rs b/src/mito2/src/read/projection.rs index 883f554066..6d6de78a74 100644 --- a/src/mito2/src/read/projection.rs +++ b/src/mito2/src/read/projection.rs @@ -363,9 +363,9 @@ mod tests { builder .push_field_array( *column_id, - Arc::new(Int64Array::from_iter_values( - std::iter::repeat(*field).take(num_rows), - )), + Arc::new(Int64Array::from_iter_values(std::iter::repeat_n( + *field, num_rows, + ))), ) .unwrap(); } diff --git a/src/mito2/src/read/seq_scan.rs b/src/mito2/src/read/seq_scan.rs index aefec6a983..e48ca633da 100644 --- a/src/mito2/src/read/seq_scan.rs +++ b/src/mito2/src/read/seq_scan.rs @@ -206,6 +206,14 @@ impl SeqScan { .build(), )); } + if self.properties.partitions[partition].is_empty() { + return Ok(Box::pin(RecordBatchStreamWrapper::new( + self.stream_ctx.input.mapper.output_schema(), + common_recordbatch::EmptyRecordBatchStream::new( + self.stream_ctx.input.mapper.output_schema(), + ), + ))); + } let stream_ctx = self.stream_ctx.clone(); let semaphore = self.new_semaphore(); diff --git a/src/mito2/src/sst/index/bloom_filter/creator.rs b/src/mito2/src/sst/index/bloom_filter/creator.rs index 5c00b1a19c..53821c7cf2 100644 --- a/src/mito2/src/sst/index/bloom_filter/creator.rs +++ b/src/mito2/src/sst/index/bloom_filter/creator.rs @@ -346,7 +346,6 @@ impl BloomFilterIndexer { #[cfg(test)] pub(crate) mod tests { - use std::iter; use api::v1::SemanticType; use datatypes::data_type::ConcreteDataType; @@ -461,15 +460,15 @@ pub(crate) mod tests { Batch::new( primary_key, - Arc::new(UInt64Vector::from_iter_values( - iter::repeat(0).take(num_rows), - )), - Arc::new(UInt64Vector::from_iter_values( - iter::repeat(0).take(num_rows), - )), - Arc::new(UInt8Vector::from_iter_values( - iter::repeat(1).take(num_rows), - )), + Arc::new(UInt64Vector::from_iter_values(std::iter::repeat_n( + 0, num_rows, + ))), + Arc::new(UInt64Vector::from_iter_values(std::iter::repeat_n( + 0, num_rows, + ))), + Arc::new(UInt8Vector::from_iter_values(std::iter::repeat_n( + 1, num_rows, + ))), vec![u64_field], ) .unwrap() diff --git a/src/mito2/src/sst/index/fulltext_index/creator.rs b/src/mito2/src/sst/index/fulltext_index/creator.rs index 1d884ac3a5..fc9aae9f42 100644 --- a/src/mito2/src/sst/index/fulltext_index/creator.rs +++ b/src/mito2/src/sst/index/fulltext_index/creator.rs @@ -489,12 +489,12 @@ mod tests { Arc::new(UInt64Vector::from_iter_values( (0..num_rows).map(|n| n as u64), )), - Arc::new(UInt64Vector::from_iter_values( - std::iter::repeat(0).take(num_rows), - )), - Arc::new(UInt8Vector::from_iter_values( - std::iter::repeat(1).take(num_rows), - )), + Arc::new(UInt64Vector::from_iter_values(std::iter::repeat_n( + 0, num_rows, + ))), + Arc::new(UInt8Vector::from_iter_values(std::iter::repeat_n( + 1, num_rows, + ))), vec![ BatchColumn { column_id: 1, diff --git a/src/mito2/src/sst/index/inverted_index/creator.rs b/src/mito2/src/sst/index/inverted_index/creator.rs index 8991b72aec..6f44979c78 100644 --- a/src/mito2/src/sst/index/inverted_index/creator.rs +++ b/src/mito2/src/sst/index/inverted_index/creator.rs @@ -326,7 +326,6 @@ impl InvertedIndexer { #[cfg(test)] mod tests { use std::collections::BTreeSet; - use std::iter; use api::v1::SemanticType; use datafusion_expr::{binary_expr, col, lit, Expr as DfExpr, Operator}; @@ -424,15 +423,15 @@ mod tests { Batch::new( primary_key, - Arc::new(UInt64Vector::from_iter_values( - iter::repeat(0).take(num_rows), - )), - Arc::new(UInt64Vector::from_iter_values( - iter::repeat(0).take(num_rows), - )), - Arc::new(UInt8Vector::from_iter_values( - iter::repeat(1).take(num_rows), - )), + Arc::new(UInt64Vector::from_iter_values(std::iter::repeat_n( + 0, num_rows, + ))), + Arc::new(UInt64Vector::from_iter_values(std::iter::repeat_n( + 0, num_rows, + ))), + Arc::new(UInt8Vector::from_iter_values(std::iter::repeat_n( + 1, num_rows, + ))), vec![u64_field], ) .unwrap() diff --git a/src/mito2/src/sst/parquet/format.rs b/src/mito2/src/sst/parquet/format.rs index c90907f0eb..005e276bbd 100644 --- a/src/mito2/src/sst/parquet/format.rs +++ b/src/mito2/src/sst/parquet/format.rs @@ -755,7 +755,7 @@ mod tests { )); let mut keys = vec![]; for (index, num_rows) in pk_row_nums.iter().map(|v| v.1).enumerate() { - keys.extend(std::iter::repeat(index as u32).take(num_rows)); + keys.extend(std::iter::repeat_n(index as u32, num_rows)); } let keys = UInt32Array::from(keys); Arc::new(DictionaryArray::new(keys, values)) diff --git a/src/operator/src/req_convert/insert/fill_impure_default.rs b/src/operator/src/req_convert/insert/fill_impure_default.rs index a60138c6e5..cf1e1565a8 100644 --- a/src/operator/src/req_convert/insert/fill_impure_default.rs +++ b/src/operator/src/req_convert/insert/fill_impure_default.rs @@ -85,11 +85,9 @@ impl ImpureDefaultFiller { .schema .iter() .filter_map(|schema| { - if self.impure_columns.contains_key(&schema.column_name) { - Some(&schema.column_name) - } else { - None - } + self.impure_columns + .contains_key(&schema.column_name) + .then_some(&schema.column_name) }) .collect(); diff --git a/src/pipeline/src/etl/processor/dissect.rs b/src/pipeline/src/etl/processor/dissect.rs index 8c31f42ace..8034d984d2 100644 --- a/src/pipeline/src/etl/processor/dissect.rs +++ b/src/pipeline/src/etl/processor/dissect.rs @@ -325,7 +325,7 @@ impl std::str::FromStr for Pattern { impl Pattern { fn check(&self) -> Result<()> { - if self.len() == 0 { + if self.is_empty() { return DissectEmptyPatternSnafu.fail(); } diff --git a/src/promql/src/extension_plan/instant_manipulate.rs b/src/promql/src/extension_plan/instant_manipulate.rs index 1071e94f9a..a3339a4e95 100644 --- a/src/promql/src/extension_plan/instant_manipulate.rs +++ b/src/promql/src/extension_plan/instant_manipulate.rs @@ -91,9 +91,9 @@ impl UserDefinedLogicalNodeCore for InstantManipulate { _exprs: Vec, inputs: Vec, ) -> DataFusionResult { - if inputs.is_empty() { + if inputs.len() != 1 { return Err(DataFusionError::Internal( - "InstantManipulate should have at least one input".to_string(), + "InstantManipulate should have exact one input".to_string(), )); } @@ -354,6 +354,9 @@ impl Stream for InstantManipulateStream { fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { let poll = match ready!(self.input.poll_next_unpin(cx)) { Some(Ok(batch)) => { + if batch.num_rows() == 0 { + return Poll::Pending; + } let timer = std::time::Instant::now(); self.num_series.add(1); let result = Ok(batch).and_then(|batch| self.manipulate(batch)); diff --git a/src/promql/src/extension_plan/range_manipulate.rs b/src/promql/src/extension_plan/range_manipulate.rs index 1e1cac3555..7af75b0458 100644 --- a/src/promql/src/extension_plan/range_manipulate.rs +++ b/src/promql/src/extension_plan/range_manipulate.rs @@ -42,7 +42,7 @@ use greptime_proto::substrait_extension as pb; use prost::Message; use snafu::ResultExt; -use crate::error::{DataFusionPlanningSnafu, DeserializeSnafu, Result}; +use crate::error::{DeserializeSnafu, Result}; use crate::extension_plan::{Millisecond, METRIC_NUM_SERIES}; use crate::metrics::PROMQL_SERIES_COUNT; use crate::range_array::RangeArray; @@ -194,20 +194,26 @@ impl RangeManipulate { pub fn deserialize(bytes: &[u8]) -> Result { let pb_range_manipulate = pb::RangeManipulate::decode(bytes).context(DeserializeSnafu)?; + let empty_schema = Arc::new(DFSchema::empty()); let placeholder_plan = LogicalPlan::EmptyRelation(EmptyRelation { produce_one_row: false, - schema: Arc::new(DFSchema::empty()), + schema: empty_schema.clone(), }); - Self::new( - pb_range_manipulate.start, - pb_range_manipulate.end, - pb_range_manipulate.interval, - pb_range_manipulate.range, - pb_range_manipulate.time_index, - pb_range_manipulate.tag_columns, - placeholder_plan, - ) - .context(DataFusionPlanningSnafu) + + // Unlike `Self::new()`, this method doesn't check the input schema as it will fail + // because the input schema is empty. + // But this is Ok since datafusion guarantees to call `with_exprs_and_inputs` for the + // deserialized plan. + Ok(Self { + start: pb_range_manipulate.start, + end: pb_range_manipulate.end, + interval: pb_range_manipulate.interval, + range: pb_range_manipulate.range, + time_index: pb_range_manipulate.time_index, + field_columns: pb_range_manipulate.tag_columns, + input: placeholder_plan, + output_schema: empty_schema, + }) } } @@ -270,14 +276,19 @@ impl UserDefinedLogicalNodeCore for RangeManipulate { fn with_exprs_and_inputs( &self, _exprs: Vec, - inputs: Vec, + mut inputs: Vec, ) -> DataFusionResult { - if inputs.is_empty() { + if inputs.len() != 1 { return Err(DataFusionError::Internal( - "RangeManipulate should have at least one input".to_string(), + "RangeManipulate should have at exact one input".to_string(), )); } + let input: LogicalPlan = inputs.pop().unwrap(); + let input_schema = input.schema(); + let output_schema = + Self::calculate_output_schema(input_schema, &self.time_index, &self.field_columns)?; + Ok(Self { start: self.start, end: self.end, @@ -285,8 +296,8 @@ impl UserDefinedLogicalNodeCore for RangeManipulate { range: self.range, time_index: self.time_index.clone(), field_columns: self.field_columns.clone(), - input: inputs.into_iter().next().unwrap(), - output_schema: self.output_schema.clone(), + input, + output_schema, }) } } diff --git a/src/promql/src/extension_plan/series_divide.rs b/src/promql/src/extension_plan/series_divide.rs index 06ef942762..36e1c10f42 100644 --- a/src/promql/src/extension_plan/series_divide.rs +++ b/src/promql/src/extension_plan/series_divide.rs @@ -106,6 +106,10 @@ impl SeriesDivide { }) } + pub fn tags(&self) -> &[String] { + &self.tag_columns + } + pub fn serialize(&self) -> Vec { pb::SeriesDivide { tag_columns: self.tag_columns.clone(), @@ -315,7 +319,9 @@ impl Stream for SeriesDivideStream { let next_batch = ready!(self.as_mut().fetch_next_batch(cx)).transpose()?; let timer = std::time::Instant::now(); if let Some(next_batch) = next_batch { - self.buffer.push(next_batch); + if next_batch.num_rows() != 0 { + self.buffer.push(next_batch); + } continue; } else { // input stream is ended diff --git a/src/promql/src/functions.rs b/src/promql/src/functions.rs index dade00ea7b..fee6387d20 100644 --- a/src/promql/src/functions.rs +++ b/src/promql/src/functions.rs @@ -40,17 +40,17 @@ pub use holt_winters::HoltWinters; pub use idelta::IDelta; pub use predict_linear::PredictLinear; pub use quantile::QuantileOverTime; -pub use quantile_aggr::quantile_udaf; +pub use quantile_aggr::{quantile_udaf, QUANTILE_NAME}; pub use resets::Resets; pub use round::Round; +/// Extracts an array from a `ColumnarValue`. +/// +/// If the `ColumnarValue` is a scalar, it converts it to an array of size 1. pub(crate) fn extract_array(columnar_value: &ColumnarValue) -> Result { - if let ColumnarValue::Array(array) = columnar_value { - Ok(array.clone()) - } else { - Err(DataFusionError::Execution( - "expect array as input, found scalar value".to_string(), - )) + match columnar_value { + ColumnarValue::Array(array) => Ok(array.clone()), + ColumnarValue::Scalar(scalar) => Ok(scalar.to_array_of_size(1)?), } } diff --git a/src/promql/src/functions/aggr_over_time.rs b/src/promql/src/functions/aggr_over_time.rs index 298959ef35..841f28e0df 100644 --- a/src/promql/src/functions/aggr_over_time.rs +++ b/src/promql/src/functions/aggr_over_time.rs @@ -231,6 +231,7 @@ mod test { AvgOverTime::scalar_udf(), ts_array, value_array, + vec![], vec![ Some(49.9999995), Some(45.8618844), @@ -253,6 +254,7 @@ mod test { MinOverTime::scalar_udf(), ts_array, value_array, + vec![], vec![ Some(12.345678), Some(12.345678), @@ -275,6 +277,7 @@ mod test { MaxOverTime::scalar_udf(), ts_array, value_array, + vec![], vec![ Some(87.654321), Some(87.654321), @@ -297,6 +300,7 @@ mod test { SumOverTime::scalar_udf(), ts_array, value_array, + vec![], vec![ Some(99.999999), Some(229.309422), @@ -319,6 +323,7 @@ mod test { CountOverTime::scalar_udf(), ts_array, value_array, + vec![], vec![ Some(2.0), Some(5.0), @@ -341,6 +346,7 @@ mod test { LastOverTime::scalar_udf(), ts_array, value_array, + vec![], vec![ Some(87.654321), Some(70.710678), @@ -363,6 +369,7 @@ mod test { AbsentOverTime::scalar_udf(), ts_array, value_array, + vec![], vec![ None, None, @@ -385,6 +392,7 @@ mod test { PresentOverTime::scalar_udf(), ts_array, value_array, + vec![], vec![ Some(1.0), Some(1.0), @@ -407,6 +415,7 @@ mod test { StdvarOverTime::scalar_udf(), ts_array, value_array, + vec![], vec![ Some(1417.8479276253622), Some(808.999919713209), @@ -442,6 +451,7 @@ mod test { StdvarOverTime::scalar_udf(), RangeArray::from_ranges(ts_array, ranges).unwrap(), RangeArray::from_ranges(values_array, ranges).unwrap(), + vec![], vec![Some(0.0), Some(10.559999999999999)], ); } @@ -453,6 +463,7 @@ mod test { StddevOverTime::scalar_udf(), ts_array, value_array, + vec![], vec![ Some(37.6543215), Some(28.442923895289123), @@ -488,6 +499,7 @@ mod test { StddevOverTime::scalar_udf(), RangeArray::from_ranges(ts_array, ranges).unwrap(), RangeArray::from_ranges(values_array, ranges).unwrap(), + vec![], vec![Some(0.0), Some(3.249615361854384)], ); } diff --git a/src/promql/src/functions/changes.rs b/src/promql/src/functions/changes.rs index 743f941652..21819436e6 100644 --- a/src/promql/src/functions/changes.rs +++ b/src/promql/src/functions/changes.rs @@ -90,6 +90,7 @@ mod test { Changes::scalar_udf(), ts_array_1, value_array_1, + vec![], vec![Some(0.0), Some(3.0), Some(5.0), Some(8.0), None], ); @@ -101,6 +102,7 @@ mod test { Changes::scalar_udf(), ts_array_2, value_array_2, + vec![], vec![Some(0.0), Some(3.0), Some(5.0), Some(9.0), None], ); @@ -111,6 +113,7 @@ mod test { Changes::scalar_udf(), ts_array_3, value_array_3, + vec![], vec![Some(0.0), Some(0.0), Some(1.0), Some(1.0), None], ); } diff --git a/src/promql/src/functions/deriv.rs b/src/promql/src/functions/deriv.rs index 90b09f0d40..49e6718911 100644 --- a/src/promql/src/functions/deriv.rs +++ b/src/promql/src/functions/deriv.rs @@ -74,6 +74,7 @@ mod test { Deriv::scalar_udf(), ts_array, value_array, + vec![], vec![Some(10.606060606060607), None], ); } @@ -99,6 +100,7 @@ mod test { Deriv::scalar_udf(), ts_range_array, value_range_array, + vec![], vec![Some(0.0)], ); } diff --git a/src/promql/src/functions/extrapolate_rate.rs b/src/promql/src/functions/extrapolate_rate.rs index 8977eaf083..aadaab904c 100644 --- a/src/promql/src/functions/extrapolate_rate.rs +++ b/src/promql/src/functions/extrapolate_rate.rs @@ -34,11 +34,11 @@ use std::sync::Arc; use datafusion::arrow::array::{Float64Array, TimestampMillisecondArray}; use datafusion::arrow::datatypes::TimeUnit; -use datafusion::common::DataFusionError; +use datafusion::common::{DataFusionError, Result as DfResult}; use datafusion::logical_expr::{ScalarUDF, Volatility}; use datafusion::physical_plan::ColumnarValue; use datafusion_expr::create_udf; -use datatypes::arrow::array::Array; +use datatypes::arrow::array::{Array, Int64Array}; use datatypes::arrow::datatypes::DataType; use crate::extension_plan::Millisecond; @@ -53,7 +53,7 @@ pub type Increase = ExtrapolatedRate; /// from #[derive(Debug)] pub struct ExtrapolatedRate { - /// Range duration in millisecond + /// Range length in milliseconds. range_length: i64, } @@ -63,7 +63,7 @@ impl ExtrapolatedRate ScalarUDF { + fn scalar_udf_with_name(name: &str) -> ScalarUDF { let input_types = vec![ // timestamp range vector RangeArray::convert_data_type(DataType::Timestamp(TimeUnit::Millisecond, None)), @@ -71,6 +71,8 @@ impl ExtrapolatedRate ExtrapolatedRate Result { - assert_eq!(input.len(), 3); + fn create_function(inputs: &[ColumnarValue]) -> DfResult { + if inputs.len() != 4 { + return Err(DataFusionError::Plan( + "ExtrapolatedRate function should have 4 inputs".to_string(), + )); + } + + let range_length_array = extract_array(&inputs[3])?; + let range_length = range_length_array + .as_any() + .downcast_ref::() + .unwrap() + .value(0) as i64; + + Ok(Self::new(range_length)) + } + + /// Input parameters: + /// * 0: timestamp range vector + /// * 1: value range vector + /// * 2: timestamp vector + /// * 3: range length. Range duration in millisecond. Not used here + fn calc(&self, input: &[ColumnarValue]) -> DfResult { + assert_eq!(input.len(), 4); // construct matrix from input let ts_array = extract_array(&input[0])?; @@ -204,34 +228,34 @@ impl ExtrapolatedRate { - pub fn name() -> &'static str { + pub const fn name() -> &'static str { "prom_delta" } - pub fn scalar_udf(range_length: i64) -> ScalarUDF { - Self::scalar_udf_with_name(Self::name(), range_length) + pub fn scalar_udf() -> ScalarUDF { + Self::scalar_udf_with_name(Self::name()) } } // rate impl ExtrapolatedRate { - pub fn name() -> &'static str { + pub const fn name() -> &'static str { "prom_rate" } - pub fn scalar_udf(range_length: i64) -> ScalarUDF { - Self::scalar_udf_with_name(Self::name(), range_length) + pub fn scalar_udf() -> ScalarUDF { + Self::scalar_udf_with_name(Self::name()) } } // increase impl ExtrapolatedRate { - pub fn name() -> &'static str { + pub const fn name() -> &'static str { "prom_increase" } - pub fn scalar_udf(range_length: i64) -> ScalarUDF { - Self::scalar_udf_with_name(Self::name(), range_length) + pub fn scalar_udf() -> ScalarUDF { + Self::scalar_udf_with_name(Self::name()) } } @@ -271,6 +295,7 @@ mod test { ColumnarValue::Array(Arc::new(ts_range.into_dict())), ColumnarValue::Array(Arc::new(value_range.into_dict())), ColumnarValue::Array(timestamps), + ColumnarValue::Array(Arc::new(Int64Array::from(vec![5]))), ]; let output = extract_array( &ExtrapolatedRate::::new(5) diff --git a/src/promql/src/functions/holt_winters.rs b/src/promql/src/functions/holt_winters.rs index 3f26abffb1..8e722c8651 100644 --- a/src/promql/src/functions/holt_winters.rs +++ b/src/promql/src/functions/holt_winters.rs @@ -22,6 +22,7 @@ use datafusion::arrow::datatypes::TimeUnit; use datafusion::common::DataFusionError; use datafusion::logical_expr::{ScalarUDF, Volatility}; use datafusion::physical_plan::ColumnarValue; +use datafusion_common::ScalarValue; use datafusion_expr::create_udf; use datatypes::arrow::array::Array; use datatypes::arrow::datatypes::DataType; @@ -62,6 +63,10 @@ impl HoltWinters { vec![ RangeArray::convert_data_type(DataType::Timestamp(TimeUnit::Millisecond, None)), RangeArray::convert_data_type(DataType::Float64), + // sf + DataType::Float64, + // tf + DataType::Float64, ] } @@ -69,20 +74,39 @@ impl HoltWinters { DataType::Float64 } - pub fn scalar_udf(level: f64, trend: f64) -> ScalarUDF { + pub fn scalar_udf() -> ScalarUDF { create_udf( Self::name(), Self::input_type(), Self::return_type(), Volatility::Volatile, - Arc::new(move |input: &_| Self::new(level, trend).calc(input)) as _, + Arc::new(move |input: &_| Self::create_function(input)?.calc(input)) as _, ) } + fn create_function(inputs: &[ColumnarValue]) -> Result { + if inputs.len() != 4 { + return Err(DataFusionError::Plan( + "HoltWinters function should have 4 inputs".to_string(), + )); + } + let ColumnarValue::Scalar(ScalarValue::Float64(Some(sf))) = inputs[2] else { + return Err(DataFusionError::Plan( + "HoltWinters function's third input should be a scalar float64".to_string(), + )); + }; + let ColumnarValue::Scalar(ScalarValue::Float64(Some(tf))) = inputs[3] else { + return Err(DataFusionError::Plan( + "HoltWinters function's fourth input should be a scalar float64".to_string(), + )); + }; + Ok(Self::new(sf, tf)) + } + fn calc(&self, input: &[ColumnarValue]) -> Result { // construct matrix from input. // The third one is level param, the fourth - trend param which are included in fields. - assert_eq!(input.len(), 2); + assert_eq!(input.len(), 4); let ts_array = extract_array(&input[0])?; let value_array = extract_array(&input[1])?; @@ -264,9 +288,13 @@ mod tests { let ts_range_array = RangeArray::from_ranges(ts_array, ranges).unwrap(); let value_range_array = RangeArray::from_ranges(values_array, ranges).unwrap(); simple_range_udf_runner( - HoltWinters::scalar_udf(0.5, 0.1), + HoltWinters::scalar_udf(), ts_range_array, value_range_array, + vec![ + ScalarValue::Float64(Some(0.5)), + ScalarValue::Float64(Some(0.1)), + ], vec![Some(5.0)], ); } @@ -287,9 +315,13 @@ mod tests { let ts_range_array = RangeArray::from_ranges(ts_array, ranges).unwrap(); let value_range_array = RangeArray::from_ranges(values_array, ranges).unwrap(); simple_range_udf_runner( - HoltWinters::scalar_udf(0.5, 0.1), + HoltWinters::scalar_udf(), ts_range_array, value_range_array, + vec![ + ScalarValue::Float64(Some(0.5)), + ScalarValue::Float64(Some(0.1)), + ], vec![Some(38.18119566835938)], ); } @@ -315,9 +347,13 @@ mod tests { let (ts_range_array, value_range_array) = create_ts_and_value_range_arrays(query, ranges.clone()); simple_range_udf_runner( - HoltWinters::scalar_udf(0.01, 0.1), + HoltWinters::scalar_udf(), ts_range_array, value_range_array, + vec![ + ScalarValue::Float64(Some(0.01)), + ScalarValue::Float64(Some(0.1)), + ], vec![Some(expected)], ); } diff --git a/src/promql/src/functions/idelta.rs b/src/promql/src/functions/idelta.rs index c5d1897a3e..a70a1dee3c 100644 --- a/src/promql/src/functions/idelta.rs +++ b/src/promql/src/functions/idelta.rs @@ -190,6 +190,7 @@ mod test { IDelta::::scalar_udf(), ts_range_array, value_range_array, + vec![], vec![Some(1.0), Some(-5.0), None, Some(6.0), None, None], ); @@ -200,6 +201,7 @@ mod test { IDelta::::scalar_udf(), ts_range_array, value_range_array, + vec![], // the second point represent counter reset vec![Some(0.5), Some(0.0), None, Some(3.0), None, None], ); diff --git a/src/promql/src/functions/predict_linear.rs b/src/promql/src/functions/predict_linear.rs index 4b945cabbb..d3c1e8214c 100644 --- a/src/promql/src/functions/predict_linear.rs +++ b/src/promql/src/functions/predict_linear.rs @@ -22,6 +22,7 @@ use datafusion::arrow::datatypes::TimeUnit; use datafusion::common::DataFusionError; use datafusion::logical_expr::{ScalarUDF, Volatility}; use datafusion::physical_plan::ColumnarValue; +use datafusion_common::ScalarValue; use datafusion_expr::create_udf; use datatypes::arrow::array::Array; use datatypes::arrow::datatypes::DataType; @@ -44,25 +45,41 @@ impl PredictLinear { "prom_predict_linear" } - pub fn scalar_udf(t: i64) -> ScalarUDF { + pub fn scalar_udf() -> ScalarUDF { let input_types = vec![ // time index column RangeArray::convert_data_type(DataType::Timestamp(TimeUnit::Millisecond, None)), // value column RangeArray::convert_data_type(DataType::Float64), + // t + DataType::Int64, ]; create_udf( Self::name(), input_types, DataType::Float64, Volatility::Volatile, - Arc::new(move |input: &_| Self::new(t).predict_linear(input)) as _, + Arc::new(move |input: &_| Self::create_function(input)?.predict_linear(input)) as _, ) } + fn create_function(inputs: &[ColumnarValue]) -> Result { + if inputs.len() != 3 { + return Err(DataFusionError::Plan( + "PredictLinear function should have 3 inputs".to_string(), + )); + } + let ColumnarValue::Scalar(ScalarValue::Int64(Some(t))) = inputs[2] else { + return Err(DataFusionError::Plan( + "PredictLinear function's third input should be a scalar int64".to_string(), + )); + }; + Ok(Self::new(t)) + } + fn predict_linear(&self, input: &[ColumnarValue]) -> Result { // construct matrix from input. - assert_eq!(input.len(), 2); + assert_eq!(input.len(), 3); let ts_array = extract_array(&input[0])?; let value_array = extract_array(&input[1])?; @@ -190,9 +207,10 @@ mod test { let ts_array = RangeArray::from_ranges(ts_array, ranges).unwrap(); let value_array = RangeArray::from_ranges(values_array, ranges).unwrap(); simple_range_udf_runner( - PredictLinear::scalar_udf(0), + PredictLinear::scalar_udf(), ts_array, value_array, + vec![ScalarValue::Int64(Some(0))], vec![None, None], ); } @@ -201,9 +219,10 @@ mod test { fn calculate_predict_linear_test1() { let (ts_array, value_array) = build_test_range_arrays(); simple_range_udf_runner( - PredictLinear::scalar_udf(0), + PredictLinear::scalar_udf(), ts_array, value_array, + vec![ScalarValue::Int64(Some(0))], // value at t = 0 vec![Some(38.63636363636364)], ); @@ -213,9 +232,10 @@ mod test { fn calculate_predict_linear_test2() { let (ts_array, value_array) = build_test_range_arrays(); simple_range_udf_runner( - PredictLinear::scalar_udf(3000), + PredictLinear::scalar_udf(), ts_array, value_array, + vec![ScalarValue::Int64(Some(3000))], // value at t = 3000 vec![Some(31856.818181818187)], ); @@ -225,9 +245,10 @@ mod test { fn calculate_predict_linear_test3() { let (ts_array, value_array) = build_test_range_arrays(); simple_range_udf_runner( - PredictLinear::scalar_udf(4200), + PredictLinear::scalar_udf(), ts_array, value_array, + vec![ScalarValue::Int64(Some(4200))], // value at t = 4200 vec![Some(44584.09090909091)], ); @@ -237,9 +258,10 @@ mod test { fn calculate_predict_linear_test4() { let (ts_array, value_array) = build_test_range_arrays(); simple_range_udf_runner( - PredictLinear::scalar_udf(6600), + PredictLinear::scalar_udf(), ts_array, value_array, + vec![ScalarValue::Int64(Some(6600))], // value at t = 6600 vec![Some(70038.63636363638)], ); @@ -249,9 +271,10 @@ mod test { fn calculate_predict_linear_test5() { let (ts_array, value_array) = build_test_range_arrays(); simple_range_udf_runner( - PredictLinear::scalar_udf(7800), + PredictLinear::scalar_udf(), ts_array, value_array, + vec![ScalarValue::Int64(Some(7800))], // value at t = 7800 vec![Some(82765.9090909091)], ); diff --git a/src/promql/src/functions/quantile.rs b/src/promql/src/functions/quantile.rs index f975a76cf4..7fd553287d 100644 --- a/src/promql/src/functions/quantile.rs +++ b/src/promql/src/functions/quantile.rs @@ -19,6 +19,7 @@ use datafusion::arrow::datatypes::TimeUnit; use datafusion::common::DataFusionError; use datafusion::logical_expr::{ScalarUDF, Volatility}; use datafusion::physical_plan::ColumnarValue; +use datafusion_common::ScalarValue; use datafusion_expr::create_udf; use datatypes::arrow::array::Array; use datatypes::arrow::datatypes::DataType; @@ -40,22 +41,38 @@ impl QuantileOverTime { "prom_quantile_over_time" } - pub fn scalar_udf(quantile: f64) -> ScalarUDF { + pub fn scalar_udf() -> ScalarUDF { let input_types = vec![ // time index column RangeArray::convert_data_type(DataType::Timestamp(TimeUnit::Millisecond, None)), // value column RangeArray::convert_data_type(DataType::Float64), + // quantile + DataType::Float64, ]; create_udf( Self::name(), input_types, DataType::Float64, Volatility::Volatile, - Arc::new(move |input: &_| Self::new(quantile).quantile_over_time(input)) as _, + Arc::new(move |input: &_| Self::create_function(input)?.quantile_over_time(input)) as _, ) } + fn create_function(inputs: &[ColumnarValue]) -> Result { + if inputs.len() != 3 { + return Err(DataFusionError::Plan( + "QuantileOverTime function should have 3 inputs".to_string(), + )); + } + let ColumnarValue::Scalar(ScalarValue::Float64(Some(quantile))) = inputs[2] else { + return Err(DataFusionError::Plan( + "QuantileOverTime function's third input should be a scalar float64".to_string(), + )); + }; + Ok(Self::new(quantile)) + } + fn quantile_over_time( &self, input: &[ColumnarValue], diff --git a/src/promql/src/functions/quantile_aggr.rs b/src/promql/src/functions/quantile_aggr.rs index 2f8d9edd9d..5652f57342 100644 --- a/src/promql/src/functions/quantile_aggr.rs +++ b/src/promql/src/functions/quantile_aggr.rs @@ -16,16 +16,18 @@ use std::sync::Arc; use datafusion::arrow::array::{ArrayRef, AsArray}; use datafusion::common::cast::{as_list_array, as_primitive_array, as_struct_array}; -use datafusion::error::Result as DfResult; +use datafusion::error::{DataFusionError, Result as DfResult}; use datafusion::logical_expr::{Accumulator as DfAccumulator, AggregateUDF, Volatility}; +use datafusion::physical_plan::expressions::Literal; use datafusion::prelude::create_udaf; use datafusion_common::ScalarValue; +use datafusion_expr::function::AccumulatorArgs; use datatypes::arrow::array::{ListArray, StructArray}; use datatypes::arrow::datatypes::{DataType, Field, Float64Type}; use crate::functions::quantile::quantile_impl; -const QUANTILE_NAME: &str = "quantile"; +pub const QUANTILE_NAME: &str = "quantile"; const VALUES_FIELD_NAME: &str = "values"; const DEFAULT_LIST_FIELD_NAME: &str = "item"; @@ -38,16 +40,16 @@ pub struct QuantileAccumulator { /// Create a quantile `AggregateUDF` for PromQL quantile operator, /// which calculates φ-quantile (0 ≤ φ ≤ 1) over dimensions -pub fn quantile_udaf(q: f64) -> Arc { +pub fn quantile_udaf() -> Arc { Arc::new(create_udaf( QUANTILE_NAME, - // Input type: (values) - vec![DataType::Float64], + // Input type: (φ, values) + vec![DataType::Float64, DataType::Float64], // Output type: the φ-quantile Arc::new(DataType::Float64), Volatility::Volatile, // Create the accumulator - Arc::new(move |_| Ok(Box::new(QuantileAccumulator::new(q)))), + Arc::new(QuantileAccumulator::from_args), // Intermediate state types Arc::new(vec![DataType::Struct( vec![Field::new( @@ -65,17 +67,40 @@ pub fn quantile_udaf(q: f64) -> Arc { } impl QuantileAccumulator { - pub fn new(q: f64) -> Self { + fn new(q: f64) -> Self { Self { q, ..Default::default() } } + + pub fn from_args(args: AccumulatorArgs) -> DfResult> { + if args.exprs.len() != 2 { + return Err(DataFusionError::Plan( + "Quantile function should have 2 inputs".to_string(), + )); + } + + let q = match &args.exprs[0] + .as_any() + .downcast_ref::() + .map(|lit| lit.value()) + { + Some(ScalarValue::Float64(Some(q))) => *q, + _ => { + return Err(DataFusionError::Internal( + "Invalid quantile value".to_string(), + )) + } + }; + + Ok(Box::new(Self::new(q))) + } } impl DfAccumulator for QuantileAccumulator { fn update_batch(&mut self, values: &[ArrayRef]) -> DfResult<()> { - let f64_array = values[0].as_primitive::(); + let f64_array = values[1].as_primitive::(); self.values.extend(f64_array); @@ -162,9 +187,10 @@ mod tests { #[test] fn test_quantile_accumulator_single_value() { let mut accumulator = QuantileAccumulator::new(0.5); + let q = create_f64_array(vec![Some(0.5)]); let input = create_f64_array(vec![Some(10.0)]); - accumulator.update_batch(&[input]).unwrap(); + accumulator.update_batch(&[q, input]).unwrap(); let result = accumulator.evaluate().unwrap(); assert_eq!(result, ScalarValue::Float64(Some(10.0))); @@ -173,9 +199,10 @@ mod tests { #[test] fn test_quantile_accumulator_multiple_values() { let mut accumulator = QuantileAccumulator::new(0.5); + let q = create_f64_array(vec![Some(0.5)]); let input = create_f64_array(vec![Some(1.0), Some(2.0), Some(3.0), Some(4.0), Some(5.0)]); - accumulator.update_batch(&[input]).unwrap(); + accumulator.update_batch(&[q, input]).unwrap(); let result = accumulator.evaluate().unwrap(); assert_eq!(result, ScalarValue::Float64(Some(3.0))); @@ -184,9 +211,10 @@ mod tests { #[test] fn test_quantile_accumulator_with_nulls() { let mut accumulator = QuantileAccumulator::new(0.5); + let q = create_f64_array(vec![Some(0.5)]); let input = create_f64_array(vec![Some(1.0), None, Some(3.0), Some(4.0), Some(5.0)]); - accumulator.update_batch(&[input]).unwrap(); + accumulator.update_batch(&[q, input]).unwrap(); let result = accumulator.evaluate().unwrap(); assert_eq!(result, ScalarValue::Float64(Some(3.0))); @@ -195,11 +223,12 @@ mod tests { #[test] fn test_quantile_accumulator_multiple_batches() { let mut accumulator = QuantileAccumulator::new(0.5); + let q = create_f64_array(vec![Some(0.5)]); let input1 = create_f64_array(vec![Some(1.0), Some(2.0)]); let input2 = create_f64_array(vec![Some(3.0), Some(4.0), Some(5.0)]); - accumulator.update_batch(&[input1]).unwrap(); - accumulator.update_batch(&[input2]).unwrap(); + accumulator.update_batch(&[q.clone(), input1]).unwrap(); + accumulator.update_batch(&[q, input2]).unwrap(); let result = accumulator.evaluate().unwrap(); assert_eq!(result, ScalarValue::Float64(Some(3.0))); @@ -208,29 +237,33 @@ mod tests { #[test] fn test_quantile_accumulator_different_quantiles() { let mut min_accumulator = QuantileAccumulator::new(0.0); + let q = create_f64_array(vec![Some(0.0)]); let input = create_f64_array(vec![Some(1.0), Some(2.0), Some(3.0), Some(4.0), Some(5.0)]); - min_accumulator.update_batch(&[input.clone()]).unwrap(); + min_accumulator.update_batch(&[q, input.clone()]).unwrap(); assert_eq!( min_accumulator.evaluate().unwrap(), ScalarValue::Float64(Some(1.0)) ); let mut q1_accumulator = QuantileAccumulator::new(0.25); - q1_accumulator.update_batch(&[input.clone()]).unwrap(); + let q = create_f64_array(vec![Some(0.25)]); + q1_accumulator.update_batch(&[q, input.clone()]).unwrap(); assert_eq!( q1_accumulator.evaluate().unwrap(), ScalarValue::Float64(Some(2.0)) ); let mut q3_accumulator = QuantileAccumulator::new(0.75); - q3_accumulator.update_batch(&[input.clone()]).unwrap(); + let q = create_f64_array(vec![Some(0.75)]); + q3_accumulator.update_batch(&[q, input.clone()]).unwrap(); assert_eq!( q3_accumulator.evaluate().unwrap(), ScalarValue::Float64(Some(4.0)) ); let mut max_accumulator = QuantileAccumulator::new(1.0); - max_accumulator.update_batch(&[input]).unwrap(); + let q = create_f64_array(vec![Some(1.0)]); + max_accumulator.update_batch(&[q, input]).unwrap(); assert_eq!( max_accumulator.evaluate().unwrap(), ScalarValue::Float64(Some(5.0)) @@ -240,10 +273,11 @@ mod tests { #[test] fn test_quantile_accumulator_size() { let mut accumulator = QuantileAccumulator::new(0.5); + let q = create_f64_array(vec![Some(0.5)]); let input = create_f64_array(vec![Some(1.0), Some(2.0), Some(3.0)]); let initial_size = accumulator.size(); - accumulator.update_batch(&[input]).unwrap(); + accumulator.update_batch(&[q, input]).unwrap(); let after_update_size = accumulator.size(); assert!(after_update_size >= initial_size); @@ -252,14 +286,16 @@ mod tests { #[test] fn test_quantile_accumulator_state_and_merge() -> DfResult<()> { let mut acc1 = QuantileAccumulator::new(0.5); + let q = create_f64_array(vec![Some(0.5)]); let input1 = create_f64_array(vec![Some(1.0), Some(2.0)]); - acc1.update_batch(&[input1])?; + acc1.update_batch(&[q, input1])?; let state1 = acc1.state()?; let mut acc2 = QuantileAccumulator::new(0.5); + let q = create_f64_array(vec![Some(0.5)]); let input2 = create_f64_array(vec![Some(3.0), Some(4.0), Some(5.0)]); - acc2.update_batch(&[input2])?; + acc2.update_batch(&[q, input2])?; let mut struct_builders = vec![]; for scalar in &state1 { @@ -280,16 +316,16 @@ mod tests { #[test] fn test_quantile_accumulator_with_extreme_values() { let mut accumulator = QuantileAccumulator::new(0.5); + let q = create_f64_array(vec![Some(0.5)]); let input = create_f64_array(vec![Some(f64::MAX), Some(f64::MIN), Some(0.0)]); - accumulator.update_batch(&[input]).unwrap(); + accumulator.update_batch(&[q, input]).unwrap(); let _result = accumulator.evaluate().unwrap(); } #[test] fn test_quantile_udaf_creation() { - let q = 0.5; - let udaf = quantile_udaf(q); + let udaf = quantile_udaf(); assert_eq!(udaf.name(), QUANTILE_NAME); assert_eq!(udaf.return_type(&[]).unwrap(), DataType::Float64); diff --git a/src/promql/src/functions/resets.rs b/src/promql/src/functions/resets.rs index 7df44b5e76..05d091db0d 100644 --- a/src/promql/src/functions/resets.rs +++ b/src/promql/src/functions/resets.rs @@ -90,6 +90,7 @@ mod test { Resets::scalar_udf(), ts_array_1, value_array_1, + vec![], vec![Some(0.0), Some(1.0), Some(2.0), Some(3.0), None], ); @@ -101,6 +102,7 @@ mod test { Resets::scalar_udf(), ts_array_2, value_array_2, + vec![], vec![Some(0.0), Some(0.0), Some(1.0), Some(1.0), None], ); @@ -111,6 +113,7 @@ mod test { Resets::scalar_udf(), ts_array_3, value_array_3, + vec![], vec![Some(0.0), Some(0.0), Some(0.0), Some(0.0), None], ); } diff --git a/src/promql/src/functions/round.rs b/src/promql/src/functions/round.rs index d1c9d318d8..c3931c5424 100644 --- a/src/promql/src/functions/round.rs +++ b/src/promql/src/functions/round.rs @@ -15,6 +15,7 @@ use std::sync::Arc; use datafusion::error::DataFusionError; +use datafusion_common::ScalarValue; use datafusion_expr::{create_udf, ColumnarValue, ScalarUDF, Volatility}; use datatypes::arrow::array::AsArray; use datatypes::arrow::datatypes::{DataType, Float64Type}; @@ -36,25 +37,39 @@ impl Round { } fn input_type() -> Vec { - vec![DataType::Float64] + vec![DataType::Float64, DataType::Float64] } pub fn return_type() -> DataType { DataType::Float64 } - pub fn scalar_udf(nearest: f64) -> ScalarUDF { + pub fn scalar_udf() -> ScalarUDF { create_udf( Self::name(), Self::input_type(), Self::return_type(), Volatility::Volatile, - Arc::new(move |input: &_| Self::new(nearest).calc(input)) as _, + Arc::new(move |input: &_| Self::create_function(input)?.calc(input)) as _, ) } + fn create_function(inputs: &[ColumnarValue]) -> Result { + if inputs.len() != 2 { + return Err(DataFusionError::Plan( + "Round function should have 2 inputs".to_string(), + )); + } + let ColumnarValue::Scalar(ScalarValue::Float64(Some(nearest))) = inputs[1] else { + return Err(DataFusionError::Plan( + "Round function's second input should be a scalar float64".to_string(), + )); + }; + Ok(Self::new(nearest)) + } + fn calc(&self, input: &[ColumnarValue]) -> Result { - assert_eq!(input.len(), 1); + assert_eq!(input.len(), 2); let value_array = extract_array(&input[0])?; @@ -80,8 +95,11 @@ mod tests { use super::*; fn test_round_f64(value: Vec, nearest: f64, expected: Vec) { - let round_udf = Round::scalar_udf(nearest); - let input = vec![ColumnarValue::Array(Arc::new(Float64Array::from(value)))]; + let round_udf = Round::scalar_udf(); + let input = vec![ + ColumnarValue::Array(Arc::new(Float64Array::from(value))), + ColumnarValue::Scalar(ScalarValue::Float64(Some(nearest))), + ]; let args = ScalarFunctionArgs { args: input, number_rows: 1, diff --git a/src/promql/src/functions/test_util.rs b/src/promql/src/functions/test_util.rs index 46ad6ec1a8..fb76ca52b5 100644 --- a/src/promql/src/functions/test_util.rs +++ b/src/promql/src/functions/test_util.rs @@ -17,6 +17,7 @@ use std::sync::Arc; use datafusion::arrow::array::Float64Array; use datafusion::logical_expr::ScalarUDF; use datafusion::physical_plan::ColumnarValue; +use datafusion_common::ScalarValue; use datafusion_expr::ScalarFunctionArgs; use datatypes::arrow::datatypes::DataType; @@ -28,13 +29,17 @@ pub fn simple_range_udf_runner( range_fn: ScalarUDF, input_ts: RangeArray, input_value: RangeArray, + other_args: Vec, expected: Vec>, ) { let num_rows = input_ts.len(); - let input = vec![ + let input = [ ColumnarValue::Array(Arc::new(input_ts.into_dict())), ColumnarValue::Array(Arc::new(input_value.into_dict())), - ]; + ] + .into_iter() + .chain(other_args.into_iter().map(ColumnarValue::Scalar)) + .collect::>(); let args = ScalarFunctionArgs { args: input, number_rows: num_rows, diff --git a/src/query/src/dist_plan/commutativity.rs b/src/query/src/dist_plan/commutativity.rs index 5b3cb0f2db..59871e991c 100644 --- a/src/query/src/dist_plan/commutativity.rs +++ b/src/query/src/dist_plan/commutativity.rs @@ -55,12 +55,16 @@ impl Categorizer { LogicalPlan::Filter(filter) => Self::check_expr(&filter.predicate), LogicalPlan::Window(_) => Commutativity::Unimplemented, LogicalPlan::Aggregate(aggr) => { - if Self::check_partition(&aggr.group_expr, &partition_cols) { - return Commutativity::Commutative; + if !Self::check_partition(&aggr.group_expr, &partition_cols) { + return Commutativity::NonCommutative; } - - // check all children exprs and uses the strictest level - Commutativity::Unimplemented + for expr in &aggr.aggr_expr { + let commutativity = Self::check_expr(expr); + if !matches!(commutativity, Commutativity::Commutative) { + return commutativity; + } + } + Commutativity::Commutative } LogicalPlan::Sort(_) => { if partition_cols.is_empty() { @@ -94,7 +98,7 @@ impl Categorizer { } } LogicalPlan::Extension(extension) => { - Self::check_extension_plan(extension.node.as_ref() as _) + Self::check_extension_plan(extension.node.as_ref() as _, &partition_cols) } LogicalPlan::Distinct(_) => { if partition_cols.is_empty() { @@ -116,13 +120,30 @@ impl Categorizer { } } - pub fn check_extension_plan(plan: &dyn UserDefinedLogicalNode) -> Commutativity { + pub fn check_extension_plan( + plan: &dyn UserDefinedLogicalNode, + partition_cols: &[String], + ) -> Commutativity { match plan.name() { - name if name == EmptyMetric::name() + name if name == SeriesDivide::name() => { + let series_divide = plan.as_any().downcast_ref::().unwrap(); + let tags = series_divide.tags().iter().collect::>(); + for partition_col in partition_cols { + if !tags.contains(partition_col) { + return Commutativity::NonCommutative; + } + } + Commutativity::Commutative + } + name if name == SeriesNormalize::name() || name == InstantManipulate::name() - || name == SeriesNormalize::name() - || name == RangeManipulate::name() - || name == SeriesDivide::name() + || name == RangeManipulate::name() => + { + // They should always follows Series Divide. + // Either all commutative or all non-commutative (which will be blocked by SeriesDivide). + Commutativity::Commutative + } + name if name == EmptyMetric::name() || name == MergeScanLogicalPlan::name() || name == MergeSortLogicalPlan::name() => { @@ -148,8 +169,9 @@ impl Categorizer { | Expr::Negative(_) | Expr::Between(_) | Expr::Exists(_) - | Expr::InList(_) - | Expr::ScalarFunction(_) => Commutativity::Commutative, + | Expr::InList(_) => Commutativity::Commutative, + Expr::ScalarFunction(_udf) => Commutativity::Commutative, + Expr::AggregateFunction(_udaf) => Commutativity::Commutative, Expr::Like(_) | Expr::SimilarTo(_) @@ -158,7 +180,6 @@ impl Categorizer { | Expr::Case(_) | Expr::Cast(_) | Expr::TryCast(_) - | Expr::AggregateFunction(_) | Expr::WindowFunction(_) | Expr::InSubquery(_) | Expr::ScalarSubquery(_) diff --git a/src/query/src/lib.rs b/src/query/src/lib.rs index 26fbfb27cd..428aa5cb45 100644 --- a/src/query/src/lib.rs +++ b/src/query/src/lib.rs @@ -14,7 +14,6 @@ #![feature(let_chains)] #![feature(int_roundings)] -#![feature(trait_upcasting)] #![feature(try_blocks)] #![feature(stmt_expr_attributes)] #![feature(iterator_try_collect)] @@ -28,7 +27,7 @@ pub mod error; pub mod executor; pub mod log_query; pub mod metrics; -mod optimizer; +pub mod optimizer; pub mod options; pub mod parser; mod part_sort; diff --git a/src/query/src/optimizer/parallelize_scan.rs b/src/query/src/optimizer/parallelize_scan.rs index 909ae11295..9d2f9215c2 100644 --- a/src/query/src/optimizer/parallelize_scan.rs +++ b/src/query/src/optimizer/parallelize_scan.rs @@ -138,26 +138,18 @@ impl ParallelizeScan { ) -> Vec> { if ranges.is_empty() { // Returns a single partition with no range. - return vec![vec![]]; + return vec![vec![]; expected_partition_num]; } if ranges.len() == 1 { - return vec![ranges]; + let mut vec = vec![vec![]; expected_partition_num]; + vec[0] = ranges; + return vec; } // Sort ranges by number of rows in descending order. ranges.sort_by(|a, b| b.num_rows.cmp(&a.num_rows)); - // Get the max row number of the ranges. Note that the number of rows may be 0 if statistics are not available. - let max_rows = ranges[0].num_rows; - let total_rows = ranges.iter().map(|range| range.num_rows).sum::(); - // Computes the partition num by the max row number. This eliminates the unbalance of the partitions. - let balanced_partition_num = if max_rows > 0 { - total_rows.div_ceil(max_rows) - } else { - ranges.len() - }; - let actual_partition_num = expected_partition_num.min(balanced_partition_num).max(1); - let mut partition_ranges = vec![vec![]; actual_partition_num]; + let mut partition_ranges = vec![vec![]; expected_partition_num]; #[derive(Eq, PartialEq)] struct HeapNode { @@ -179,7 +171,7 @@ impl ParallelizeScan { } let mut part_heap = - BinaryHeap::from_iter((0..actual_partition_num).map(|partition_idx| HeapNode { + BinaryHeap::from_iter((0..expected_partition_num).map(|partition_idx| HeapNode { num_rows: 0, partition_idx, })); @@ -270,7 +262,7 @@ mod test { ]; assert_eq!(result, expected); - // assign 4 ranges to 5 partitions. Only 4 partitions are returned. + // assign 4 ranges to 5 partitions. let expected_partition_num = 5; let result = ParallelizeScan::assign_partition_range(ranges, expected_partition_num); let expected = vec![ @@ -280,32 +272,31 @@ mod test { num_rows: 250, identifier: 4, }], + vec![PartitionRange { + start: Timestamp::new(0, TimeUnit::Second), + end: Timestamp::new(10, TimeUnit::Second), + num_rows: 100, + identifier: 1, + }], vec![PartitionRange { start: Timestamp::new(10, TimeUnit::Second), end: Timestamp::new(20, TimeUnit::Second), num_rows: 200, identifier: 2, }], - vec![ - PartitionRange { - start: Timestamp::new(20, TimeUnit::Second), - end: Timestamp::new(30, TimeUnit::Second), - num_rows: 150, - identifier: 3, - }, - PartitionRange { - start: Timestamp::new(0, TimeUnit::Second), - end: Timestamp::new(10, TimeUnit::Second), - num_rows: 100, - identifier: 1, - }, - ], + vec![], + vec![PartitionRange { + start: Timestamp::new(20, TimeUnit::Second), + end: Timestamp::new(30, TimeUnit::Second), + num_rows: 150, + identifier: 3, + }], ]; assert_eq!(result, expected); - // assign 0 ranges to 5 partitions. Only 1 partition is returned. + // assign 0 ranges to 5 partitions. Should return 5 empty ranges. let result = ParallelizeScan::assign_partition_range(vec![], 5); - assert_eq!(result.len(), 1); + assert_eq!(result.len(), 5); } #[test] diff --git a/src/query/src/part_sort.rs b/src/query/src/part_sort.rs index 1c784c8b33..cd35fb66fb 100644 --- a/src/query/src/part_sort.rs +++ b/src/query/src/part_sort.rs @@ -348,7 +348,7 @@ impl PartSortStream { &self, sort_column: &ArrayRef, ) -> datafusion_common::Result> { - if sort_column.len() == 0 { + if sort_column.is_empty() { return Ok(Some(0)); } diff --git a/src/query/src/promql/planner.rs b/src/query/src/promql/planner.rs index f81d2052dc..9f5d67a578 100644 --- a/src/query/src/promql/planner.rs +++ b/src/query/src/promql/planner.rs @@ -31,7 +31,7 @@ use datafusion::functions_aggregate::stddev::stddev_pop_udaf; use datafusion::functions_aggregate::sum::sum_udaf; use datafusion::functions_aggregate::variance::var_pop_udaf; use datafusion::functions_window::row_number::RowNumber; -use datafusion::logical_expr::expr::{AggregateFunction, Alias, ScalarFunction, WindowFunction}; +use datafusion::logical_expr::expr::{Alias, ScalarFunction, WindowFunction}; use datafusion::logical_expr::expr_rewriter::normalize_cols; use datafusion::logical_expr::{ BinaryExpr, Cast, Extension, LogicalPlan, LogicalPlanBuilder, Operator, @@ -1425,15 +1425,18 @@ impl PromPlanner { let field_column_pos = 0; let mut exprs = Vec::with_capacity(self.ctx.field_columns.len()); let scalar_func = match func.name { - "increase" => ScalarFunc::ExtrapolateUdf(Arc::new(Increase::scalar_udf( + "increase" => ScalarFunc::ExtrapolateUdf( + Arc::new(Increase::scalar_udf()), self.ctx.range.context(ExpectRangeSelectorSnafu)?, - ))), - "rate" => ScalarFunc::ExtrapolateUdf(Arc::new(Rate::scalar_udf( + ), + "rate" => ScalarFunc::ExtrapolateUdf( + Arc::new(Rate::scalar_udf()), self.ctx.range.context(ExpectRangeSelectorSnafu)?, - ))), - "delta" => ScalarFunc::ExtrapolateUdf(Arc::new(Delta::scalar_udf( + ), + "delta" => ScalarFunc::ExtrapolateUdf( + Arc::new(Delta::scalar_udf()), self.ctx.range.context(ExpectRangeSelectorSnafu)?, - ))), + ), "idelta" => ScalarFunc::Udf(Arc::new(IDelta::::scalar_udf())), "irate" => ScalarFunc::Udf(Arc::new(IDelta::::scalar_udf())), "resets" => ScalarFunc::Udf(Arc::new(Resets::scalar_udf())), @@ -1449,50 +1452,9 @@ impl PromPlanner { "present_over_time" => ScalarFunc::Udf(Arc::new(PresentOverTime::scalar_udf())), "stddev_over_time" => ScalarFunc::Udf(Arc::new(StddevOverTime::scalar_udf())), "stdvar_over_time" => ScalarFunc::Udf(Arc::new(StdvarOverTime::scalar_udf())), - "quantile_over_time" => { - let quantile_expr = match other_input_exprs.pop_front() { - Some(DfExpr::Literal(ScalarValue::Float64(Some(quantile)))) => quantile, - other => UnexpectedPlanExprSnafu { - desc: format!("expected f64 literal as quantile, but found {:?}", other), - } - .fail()?, - }; - ScalarFunc::Udf(Arc::new(QuantileOverTime::scalar_udf(quantile_expr))) - } - "predict_linear" => { - let t_expr = match other_input_exprs.pop_front() { - Some(DfExpr::Literal(ScalarValue::Float64(Some(t)))) => t as i64, - Some(DfExpr::Literal(ScalarValue::Int64(Some(t)))) => t, - other => UnexpectedPlanExprSnafu { - desc: format!("expected i64 literal as t, but found {:?}", other), - } - .fail()?, - }; - ScalarFunc::Udf(Arc::new(PredictLinear::scalar_udf(t_expr))) - } - "holt_winters" => { - let sf_exp = match other_input_exprs.pop_front() { - Some(DfExpr::Literal(ScalarValue::Float64(Some(sf)))) => sf, - other => UnexpectedPlanExprSnafu { - desc: format!( - "expected f64 literal as smoothing factor, but found {:?}", - other - ), - } - .fail()?, - }; - let tf_exp = match other_input_exprs.pop_front() { - Some(DfExpr::Literal(ScalarValue::Float64(Some(tf)))) => tf, - other => UnexpectedPlanExprSnafu { - desc: format!( - "expected f64 literal as trend factor, but found {:?}", - other - ), - } - .fail()?, - }; - ScalarFunc::Udf(Arc::new(HoltWinters::scalar_udf(sf_exp, tf_exp))) - } + "quantile_over_time" => ScalarFunc::Udf(Arc::new(QuantileOverTime::scalar_udf())), + "predict_linear" => ScalarFunc::Udf(Arc::new(PredictLinear::scalar_udf())), + "holt_winters" => ScalarFunc::Udf(Arc::new(HoltWinters::scalar_udf())), "time" => { exprs.push(build_special_time_expr( self.ctx.time_index_column.as_ref().unwrap(), @@ -1627,17 +1589,10 @@ impl PromPlanner { ScalarFunc::GeneratedExpr } "round" => { - let nearest = match other_input_exprs.pop_front() { - Some(DfExpr::Literal(ScalarValue::Float64(Some(t)))) => t, - Some(DfExpr::Literal(ScalarValue::Int64(Some(t)))) => t as f64, - None => 0.0, - other => UnexpectedPlanExprSnafu { - desc: format!("expected f64 literal as t, but found {:?}", other), - } - .fail()?, - }; - - ScalarFunc::DataFusionUdf(Arc::new(Round::scalar_udf(nearest))) + if other_input_exprs.is_empty() { + other_input_exprs.push_front(DfExpr::Literal(ScalarValue::Float64(Some(0.0)))); + } + ScalarFunc::DataFusionUdf(Arc::new(Round::scalar_udf())) } _ => { @@ -1695,7 +1650,7 @@ impl PromPlanner { let _ = other_input_exprs.remove(field_column_pos + 1); let _ = other_input_exprs.remove(field_column_pos); } - ScalarFunc::ExtrapolateUdf(func) => { + ScalarFunc::ExtrapolateUdf(func, range_length) => { let ts_range_expr = DfExpr::Column(Column::from_name( RangeManipulate::build_timestamp_range_name( self.ctx.time_index_column.as_ref().unwrap(), @@ -1705,11 +1660,13 @@ impl PromPlanner { other_input_exprs.insert(field_column_pos + 1, col_expr); other_input_exprs .insert(field_column_pos + 2, self.create_time_index_column_expr()?); + other_input_exprs.push_back(lit(range_length)); let fn_expr = DfExpr::ScalarFunction(ScalarFunction { func, args: other_input_exprs.clone().into(), }); exprs.push(fn_expr); + let _ = other_input_exprs.pop_back(); let _ = other_input_exprs.remove(field_column_pos + 2); let _ = other_input_exprs.remove(field_column_pos + 1); let _ = other_input_exprs.remove(field_column_pos); @@ -1972,11 +1929,13 @@ impl PromPlanner { param: &Option>, input_plan: &LogicalPlan, ) -> Result<(Vec, Vec)> { + let mut non_col_args = Vec::new(); let aggr = match op.id() { token::T_SUM => sum_udaf(), token::T_QUANTILE => { let q = Self::get_param_value_as_f64(op, param)?; - quantile_udaf(q) + non_col_args.push(lit(q)); + quantile_udaf() } token::T_AVG => avg_udaf(), token::T_COUNT_VALUES | token::T_COUNT => count_udaf(), @@ -1998,16 +1957,12 @@ impl PromPlanner { .field_columns .iter() .map(|col| { - Ok(DfExpr::AggregateFunction(AggregateFunction { - func: aggr.clone(), - args: vec![DfExpr::Column(Column::from_name(col))], - distinct: false, - filter: None, - order_by: None, - null_treatment: None, - })) + non_col_args.push(DfExpr::Column(Column::from_name(col))); + let expr = aggr.call(non_col_args.clone()); + non_col_args.pop(); + expr }) - .collect::>>()?; + .collect::>(); // if the aggregator is `count_values`, it must be grouped by current fields. let prev_field_exprs = if op.id() == token::T_COUNT_VALUES { @@ -2941,7 +2896,8 @@ enum ScalarFunc { Udf(Arc), // todo(ruihang): maybe merge with Udf later /// UDF that require extra information like range length to be evaluated. - ExtrapolateUdf(Arc), + /// The second argument is range length. + ExtrapolateUdf(Arc, i64), /// Func that doesn't require input, like `time()`. GeneratedExpr, } @@ -3595,8 +3551,8 @@ mod test { async fn increase_aggr() { let query = "increase(some_metric[5m])"; let expected = String::from( - "Filter: prom_increase(timestamp_range,field_0,timestamp) IS NOT NULL [timestamp:Timestamp(Millisecond, None), prom_increase(timestamp_range,field_0,timestamp):Float64;N, tag_0:Utf8]\ - \n Projection: some_metric.timestamp, prom_increase(timestamp_range, field_0, some_metric.timestamp) AS prom_increase(timestamp_range,field_0,timestamp), some_metric.tag_0 [timestamp:Timestamp(Millisecond, None), prom_increase(timestamp_range,field_0,timestamp):Float64;N, tag_0:Utf8]\ + "Filter: prom_increase(timestamp_range,field_0,timestamp,Int64(300000)) IS NOT NULL [timestamp:Timestamp(Millisecond, None), prom_increase(timestamp_range,field_0,timestamp,Int64(300000)):Float64;N, tag_0:Utf8]\ + \n Projection: some_metric.timestamp, prom_increase(timestamp_range, field_0, some_metric.timestamp, Int64(300000)) AS prom_increase(timestamp_range,field_0,timestamp,Int64(300000)), some_metric.tag_0 [timestamp:Timestamp(Millisecond, None), prom_increase(timestamp_range,field_0,timestamp,Int64(300000)):Float64;N, tag_0:Utf8]\ \n PromRangeManipulate: req range=[0..100000000], interval=[5000], eval range=[300000], time index=[timestamp], values=[\"field_0\"] [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Dictionary(Int64, Float64);N, timestamp_range:Dictionary(Int64, Timestamp(Millisecond, None))]\ \n PromSeriesNormalize: offset=[0], time index=[timestamp], filter NaN: [true] [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N]\ \n PromSeriesDivide: tags=[\"tag_0\"] [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N]\ @@ -4395,8 +4351,8 @@ mod test { let plan = PromPlanner::stmt_to_plan(table_provider, &eval_stmt, &build_session_state()) .await .unwrap(); - let expected = "Sort: prometheus_tsdb_head_series.greptime_timestamp ASC NULLS LAST [greptime_timestamp:Timestamp(Millisecond, None), quantile(sum(prometheus_tsdb_head_series.greptime_value)):Float64;N]\ - \n Aggregate: groupBy=[[prometheus_tsdb_head_series.greptime_timestamp]], aggr=[[quantile(sum(prometheus_tsdb_head_series.greptime_value))]] [greptime_timestamp:Timestamp(Millisecond, None), quantile(sum(prometheus_tsdb_head_series.greptime_value)):Float64;N]\ + let expected = "Sort: prometheus_tsdb_head_series.greptime_timestamp ASC NULLS LAST [greptime_timestamp:Timestamp(Millisecond, None), quantile(Float64(0.3),sum(prometheus_tsdb_head_series.greptime_value)):Float64;N]\ + \n Aggregate: groupBy=[[prometheus_tsdb_head_series.greptime_timestamp]], aggr=[[quantile(Float64(0.3), sum(prometheus_tsdb_head_series.greptime_value))]] [greptime_timestamp:Timestamp(Millisecond, None), quantile(Float64(0.3),sum(prometheus_tsdb_head_series.greptime_value)):Float64;N]\ \n Sort: prometheus_tsdb_head_series.ip ASC NULLS LAST, prometheus_tsdb_head_series.greptime_timestamp ASC NULLS LAST [ip:Utf8, greptime_timestamp:Timestamp(Millisecond, None), sum(prometheus_tsdb_head_series.greptime_value):Float64;N]\ \n Aggregate: groupBy=[[prometheus_tsdb_head_series.ip, prometheus_tsdb_head_series.greptime_timestamp]], aggr=[[sum(prometheus_tsdb_head_series.greptime_value)]] [ip:Utf8, greptime_timestamp:Timestamp(Millisecond, None), sum(prometheus_tsdb_head_series.greptime_value):Float64;N]\ \n PromInstantManipulate: range=[0..100000000], lookback=[1000], interval=[5000], time index=[greptime_timestamp] [ip:Utf8, greptime_timestamp:Timestamp(Millisecond, None), greptime_value:Float64;N]\ diff --git a/src/query/src/query_engine/default_serializer.rs b/src/query/src/query_engine/default_serializer.rs index c3feed1d55..4dbf008bb6 100644 --- a/src/query/src/query_engine/default_serializer.rs +++ b/src/query/src/query_engine/default_serializer.rs @@ -29,6 +29,10 @@ use datafusion::execution::{FunctionRegistry, SessionStateBuilder}; use datafusion::logical_expr::LogicalPlan; use datafusion_expr::UserDefinedLogicalNode; use greptime_proto::substrait_extension::MergeScan as PbMergeScan; +use promql::functions::{ + AbsentOverTime, AvgOverTime, Changes, CountOverTime, Deriv, IDelta, LastOverTime, MaxOverTime, + MinOverTime, PresentOverTime, Resets, StddevOverTime, StdvarOverTime, SumOverTime, +}; use prost::Message; use session::context::QueryContextRef; use snafu::ResultExt; @@ -132,6 +136,24 @@ impl SubstraitPlanDecoder for DefaultPlanDecoder { let _ = session_state.register_udaf(Arc::new(HllState::state_udf_impl())); let _ = session_state.register_udaf(Arc::new(HllState::merge_udf_impl())); let _ = session_state.register_udaf(Arc::new(GeoPathAccumulator::udf_impl())); + + // TODO(ruihang): add increase, rate, delta + let _ = session_state.register_udf(Arc::new(IDelta::::scalar_udf())); + let _ = session_state.register_udf(Arc::new(IDelta::::scalar_udf())); + let _ = session_state.register_udf(Arc::new(Resets::scalar_udf())); + let _ = session_state.register_udf(Arc::new(Changes::scalar_udf())); + let _ = session_state.register_udf(Arc::new(Deriv::scalar_udf())); + let _ = session_state.register_udf(Arc::new(AvgOverTime::scalar_udf())); + let _ = session_state.register_udf(Arc::new(MinOverTime::scalar_udf())); + let _ = session_state.register_udf(Arc::new(MaxOverTime::scalar_udf())); + let _ = session_state.register_udf(Arc::new(SumOverTime::scalar_udf())); + let _ = session_state.register_udf(Arc::new(CountOverTime::scalar_udf())); + let _ = session_state.register_udf(Arc::new(LastOverTime::scalar_udf())); + let _ = session_state.register_udf(Arc::new(AbsentOverTime::scalar_udf())); + let _ = session_state.register_udf(Arc::new(PresentOverTime::scalar_udf())); + let _ = session_state.register_udf(Arc::new(StddevOverTime::scalar_udf())); + let _ = session_state.register_udf(Arc::new(StdvarOverTime::scalar_udf())); + // TODO(ruihang): add quantile_over_time, predict_linear, holt_winters, round } let logical_plan = DFLogicalSubstraitConvertor .decode(message, session_state) diff --git a/src/query/src/query_engine/state.rs b/src/query/src/query_engine/state.rs index d55ab471f9..7acd38aa37 100644 --- a/src/query/src/query_engine/state.rs +++ b/src/query/src/query_engine/state.rs @@ -31,6 +31,7 @@ use datafusion::error::Result as DfResult; use datafusion::execution::context::{QueryPlanner, SessionConfig, SessionContext, SessionState}; use datafusion::execution::runtime_env::RuntimeEnv; use datafusion::execution::SessionStateBuilder; +use datafusion::physical_optimizer::enforce_sorting::EnforceSorting; use datafusion::physical_optimizer::optimizer::PhysicalOptimizer; use datafusion::physical_optimizer::sanity_checker::SanityCheckPlan; use datafusion::physical_optimizer::PhysicalOptimizerRule; @@ -142,6 +143,9 @@ impl QueryEngineState { physical_optimizer .rules .insert(1, Arc::new(PassDistribution)); + physical_optimizer + .rules + .insert(2, Arc::new(EnforceSorting {})); // Add rule for windowed sort physical_optimizer .rules diff --git a/src/servers/src/http/timeout.rs b/src/servers/src/http/timeout.rs index 050ec492e0..adfc29cd95 100644 --- a/src/servers/src/http/timeout.rs +++ b/src/servers/src/http/timeout.rs @@ -117,7 +117,7 @@ where fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll> { match self.inner.poll_ready(cx) { Poll::Pending => Poll::Pending, - Poll::Ready(r) => Poll::Ready(r.map_err(Into::into)), + Poll::Ready(r) => Poll::Ready(r), } } diff --git a/src/servers/src/lib.rs b/src/servers/src/lib.rs index a13cd0ce1f..f55bc76e17 100644 --- a/src/servers/src/lib.rs +++ b/src/servers/src/lib.rs @@ -17,7 +17,6 @@ #![feature(exclusive_wrapper)] #![feature(let_chains)] #![feature(if_let_guard)] -#![feature(trait_upcasting)] use datafusion_expr::LogicalPlan; use datatypes::schema::Schema; diff --git a/src/sql/src/statements/transform.rs b/src/sql/src/statements/transform.rs index 2ca642cd11..7bd4218d2f 100644 --- a/src/sql/src/statements/transform.rs +++ b/src/sql/src/statements/transform.rs @@ -55,7 +55,7 @@ pub fn transform_statements(stmts: &mut Vec) -> Result<()> { } } - visit_expressions_mut(stmts, |expr| { + let _ = visit_expressions_mut(stmts, |expr| { for rule in RULES.iter() { rule.visit_expr(expr)?; } diff --git a/src/store-api/src/metadata.rs b/src/store-api/src/metadata.rs index 426e3f69ca..e8372a8df2 100644 --- a/src/store-api/src/metadata.rs +++ b/src/store-api/src/metadata.rs @@ -290,7 +290,7 @@ impl RegionMetadata { pub fn project(&self, projection: &[ColumnId]) -> Result { // check time index ensure!( - projection.iter().any(|id| *id == self.time_index), + projection.contains(&self.time_index), TimeIndexNotFoundSnafu ); diff --git a/src/table/src/lib.rs b/src/table/src/lib.rs index 64e72029b8..5fb800ab3c 100644 --- a/src/table/src/lib.rs +++ b/src/table/src/lib.rs @@ -14,6 +14,7 @@ #![feature(assert_matches)] #![feature(try_blocks)] +#![feature(let_chains)] pub mod dist_table; pub mod error; diff --git a/src/table/src/table/adapter.rs b/src/table/src/table/adapter.rs index 2cf9a9647f..4ba880b1eb 100644 --- a/src/table/src/table/adapter.rs +++ b/src/table/src/table/adapter.rs @@ -95,7 +95,7 @@ impl TableProvider for DfTableProviderAdapter { filters: &[Expr], limit: Option, ) -> DfResult> { - let filters: Vec = filters.iter().map(Clone::clone).map(Into::into).collect(); + let filters: Vec = filters.iter().map(Clone::clone).collect(); let request = { let mut request = self.scan_req.lock().unwrap(); request.filters = filters; diff --git a/src/table/src/table/scan.rs b/src/table/src/table/scan.rs index cd69b22b5a..53690e84eb 100644 --- a/src/table/src/table/scan.rs +++ b/src/table/src/table/scan.rs @@ -82,11 +82,17 @@ impl RegionScanExec { if scanner.properties().is_logical_region() { pk_names.sort_unstable(); } - let mut pk_columns: Vec = pk_names - .into_iter() + let pk_columns = pk_names + .iter() + .filter_map( + |col| Some(Arc::new(Column::new_with_schema(col, &arrow_schema).ok()?) as _), + ) + .collect::>(); + let mut pk_sort_columns: Vec = pk_names + .iter() .filter_map(|col| { Some(PhysicalSortExpr::new( - Arc::new(Column::new_with_schema(&col, &arrow_schema).ok()?) as _, + Arc::new(Column::new_with_schema(col, &arrow_schema).ok()?) as _, SortOptions { descending: false, nulls_first: true, @@ -113,28 +119,37 @@ impl RegionScanExec { let eq_props = match request.distribution { Some(TimeSeriesDistribution::PerSeries) => { if let Some(ts) = ts_col { - pk_columns.push(ts); + pk_sort_columns.push(ts); } EquivalenceProperties::new_with_orderings( arrow_schema.clone(), - &[LexOrdering::new(pk_columns)], + &[LexOrdering::new(pk_sort_columns)], ) } Some(TimeSeriesDistribution::TimeWindowed) => { if let Some(ts_col) = ts_col { - pk_columns.insert(0, ts_col); + pk_sort_columns.insert(0, ts_col); } EquivalenceProperties::new_with_orderings( arrow_schema.clone(), - &[LexOrdering::new(pk_columns)], + &[LexOrdering::new(pk_sort_columns)], ) } None => EquivalenceProperties::new(arrow_schema.clone()), }; + let partitioning = match request.distribution { + Some(TimeSeriesDistribution::PerSeries) => { + Partitioning::Hash(pk_columns.clone(), num_output_partition) + } + Some(TimeSeriesDistribution::TimeWindowed) | None => { + Partitioning::UnknownPartitioning(num_output_partition) + } + }; + let properties = PlanProperties::new( eq_props, - Partitioning::UnknownPartitioning(num_output_partition), + partitioning, EmissionType::Incremental, Boundedness::Bounded, ); @@ -188,9 +203,14 @@ impl RegionScanExec { warn!("Setting partition ranges more than once for RegionScanExec"); } - let num_partitions = partitions.len(); let mut properties = self.properties.clone(); - properties.partitioning = Partitioning::UnknownPartitioning(num_partitions); + let new_partitioning = match properties.partitioning { + Partitioning::Hash(ref columns, _) => { + Partitioning::Hash(columns.clone(), target_partitions) + } + _ => Partitioning::UnknownPartitioning(target_partitions), + }; + properties.partitioning = new_partitioning; { let mut scanner = self.scanner.lock().unwrap(); diff --git a/tests-fuzz/src/utils.rs b/tests-fuzz/src/utils.rs index f52d75f4da..ec7f1d8b27 100644 --- a/tests-fuzz/src/utils.rs +++ b/tests-fuzz/src/utils.rs @@ -85,11 +85,7 @@ pub struct UnstableTestVariables { pub fn load_unstable_test_env_variables() -> UnstableTestVariables { let _ = dotenv::dotenv(); let binary_path = env::var(GT_FUZZ_BINARY_PATH).expect("GT_FUZZ_BINARY_PATH not found"); - let root_dir = if let Ok(root) = env::var(GT_FUZZ_INSTANCE_ROOT_DIR) { - Some(root) - } else { - None - }; + let root_dir = env::var(GT_FUZZ_INSTANCE_ROOT_DIR).ok(); UnstableTestVariables { binary_path, diff --git a/tests-fuzz/targets/unstable/fuzz_create_table_standalone.rs b/tests-fuzz/targets/unstable/fuzz_create_table_standalone.rs index 575659ab8e..53369e9792 100644 --- a/tests-fuzz/targets/unstable/fuzz_create_table_standalone.rs +++ b/tests-fuzz/targets/unstable/fuzz_create_table_standalone.rs @@ -157,7 +157,7 @@ async fn execute_unstable_create_table( } Err(err) => { // FIXME(weny): support to retry it later. - if matches!(err, sqlx::Error::PoolTimedOut { .. }) { + if matches!(err, sqlx::Error::PoolTimedOut) { warn!("ignore pool timeout, sql: {sql}"); continue; } diff --git a/tests-integration/src/cluster.rs b/tests-integration/src/cluster.rs index 836c3a5483..0b18a71d3a 100644 --- a/tests-integration/src/cluster.rs +++ b/tests-integration/src/cluster.rs @@ -489,10 +489,7 @@ async fn create_datanode_client(datanode: &Datanode) -> (String, Client) { if let Some(client) = client { Ok(TokioIo::new(client)) } else { - Err(std::io::Error::new( - std::io::ErrorKind::Other, - "Client already taken", - )) + Err(std::io::Error::other("Client already taken")) } } }), diff --git a/tests/cases/standalone/common/flow/flow_step_aggr.result b/tests/cases/standalone/common/flow/flow_step_aggr.result new file mode 100644 index 0000000000..ab76a67617 --- /dev/null +++ b/tests/cases/standalone/common/flow/flow_step_aggr.result @@ -0,0 +1,266 @@ +CREATE TABLE access_log ( + "url" STRING, + user_id BIGINT, + ts TIMESTAMP TIME INDEX, + PRIMARY KEY ("url", user_id) +); + +Affected Rows: 0 + +CREATE TABLE access_log_10s ( + "url" STRING, + time_window timestamp time INDEX, + state BINARY, + PRIMARY KEY ("url") +); + +Affected Rows: 0 + +CREATE FLOW calc_access_log_10s SINK TO access_log_10s +AS +SELECT + "url", + date_bin('10s'::INTERVAL, ts) AS time_window, + hll(user_id) AS state +FROM + access_log +GROUP BY + "url", + time_window; + +Affected Rows: 0 + +-- insert 4 rows of data +INSERT INTO access_log VALUES + ("/dashboard", 1, "2025-03-04 00:00:00"), + ("/dashboard", 1, "2025-03-04 00:00:01"), + ("/dashboard", 2, "2025-03-04 00:00:05"), + ("/not_found", 3, "2025-03-04 00:00:11"), + ("/dashboard", 4, "2025-03-04 00:00:15"); + +Affected Rows: 5 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('calc_access_log_10s'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('calc_access_log_10s') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +-- query should return 3 rows +SELECT "url", time_window FROM access_log_10s +ORDER BY + time_window; + ++------------+---------------------+ +| url | time_window | ++------------+---------------------+ +| /dashboard | 2025-03-04T00:00:00 | +| /dashboard | 2025-03-04T00:00:10 | +| /not_found | 2025-03-04T00:00:10 | ++------------+---------------------+ + +-- use hll_count to query the approximate data in access_log_10s +SELECT "url", time_window, hll_count(state) FROM access_log_10s +ORDER BY + time_window; + ++------------+---------------------+---------------------------------+ +| url | time_window | hll_count(access_log_10s.state) | ++------------+---------------------+---------------------------------+ +| /dashboard | 2025-03-04T00:00:00 | 2 | +| /dashboard | 2025-03-04T00:00:10 | 1 | +| /not_found | 2025-03-04T00:00:10 | 1 | ++------------+---------------------+---------------------------------+ + +-- further, we can aggregate 10 seconds of data to every minute, by using hll_merge to merge 10 seconds of hyperloglog state +SELECT + "url", + date_bin('1 minute'::INTERVAL, time_window) AS time_window_1m, + hll_count(hll_merge(state)) as uv_per_min +FROM + access_log_10s +GROUP BY + "url", + time_window_1m +ORDER BY + time_window_1m; + ++------------+---------------------+------------+ +| url | time_window_1m | uv_per_min | ++------------+---------------------+------------+ +| /not_found | 2025-03-04T00:00:00 | 1 | +| /dashboard | 2025-03-04T00:00:00 | 3 | ++------------+---------------------+------------+ + +DROP FLOW calc_access_log_10s; + +Affected Rows: 0 + +DROP TABLE access_log_10s; + +Affected Rows: 0 + +DROP TABLE access_log; + +Affected Rows: 0 + +CREATE TABLE percentile_base ( + "id" INT PRIMARY KEY, + "value" DOUBLE, + ts timestamp(0) time index +); + +Affected Rows: 0 + +CREATE TABLE percentile_5s ( + "percentile_state" BINARY, + time_window timestamp(0) time index +); + +Affected Rows: 0 + +CREATE FLOW calc_percentile_5s SINK TO percentile_5s +AS +SELECT + uddsketch_state(128, 0.01, "value") AS "value", + date_bin('5 seconds'::INTERVAL, ts) AS time_window +FROM + percentile_base +WHERE + "value" > 0 AND "value" < 70 +GROUP BY + time_window; + +Affected Rows: 0 + +INSERT INTO percentile_base ("id", "value", ts) VALUES + (1, 10.0, 1), + (2, 20.0, 2), + (3, 30.0, 3), + (4, 40.0, 4), + (5, 50.0, 5), + (6, 60.0, 6), + (7, 70.0, 7), + (8, 80.0, 8), + (9, 90.0, 9), + (10, 100.0, 10); + +Affected Rows: 10 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('calc_percentile_5s'); + ++----------------------------------------+ +| ADMIN FLUSH_FLOW('calc_percentile_5s') | ++----------------------------------------+ +| FLOW_FLUSHED | ++----------------------------------------+ + +SELECT + time_window, + uddsketch_calc(0.99, `percentile_state`) AS p99 +FROM + percentile_5s +ORDER BY + time_window; + ++---------------------+--------------------+ +| time_window | p99 | ++---------------------+--------------------+ +| 1970-01-01T00:00:00 | 40.04777053326359 | +| 1970-01-01T00:00:05 | 59.745049810145126 | ++---------------------+--------------------+ + +DROP FLOW calc_percentile_5s; + +Affected Rows: 0 + +DROP TABLE percentile_5s; + +Affected Rows: 0 + +DROP TABLE percentile_base; + +Affected Rows: 0 + +CREATE TABLE percentile_base ( + "id" INT PRIMARY KEY, + "value" DOUBLE, + ts timestamp(0) time index +); + +Affected Rows: 0 + +CREATE TABLE percentile_5s ( + "percentile_state" BINARY, + time_window timestamp(0) time index +); + +Affected Rows: 0 + +CREATE FLOW calc_percentile_5s SINK TO percentile_5s +AS +SELECT + uddsketch_state(128, 0.01, CASE WHEN "value" > 0 AND "value" < 70 THEN "value" ELSE NULL END) AS "value", + date_bin('5 seconds'::INTERVAL, ts) AS time_window +FROM + percentile_base +GROUP BY + time_window; + +Affected Rows: 0 + +INSERT INTO percentile_base ("id", "value", ts) VALUES + (1, 10.0, 1), + (2, 20.0, 2), + (3, 30.0, 3), + (4, 40.0, 4), + (5, 50.0, 5), + (6, 60.0, 6), + (7, 70.0, 7), + (8, 80.0, 8), + (9, 90.0, 9), + (10, 100.0, 10); + +Affected Rows: 10 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('calc_percentile_5s'); + ++----------------------------------------+ +| ADMIN FLUSH_FLOW('calc_percentile_5s') | ++----------------------------------------+ +| FLOW_FLUSHED | ++----------------------------------------+ + +SELECT + time_window, + uddsketch_calc(0.99, percentile_state) AS p99 +FROM + percentile_5s +ORDER BY + time_window; + ++---------------------+--------------------+ +| time_window | p99 | ++---------------------+--------------------+ +| 1970-01-01T00:00:00 | 40.04777053326359 | +| 1970-01-01T00:00:05 | 59.745049810145126 | +| 1970-01-01T00:00:10 | | ++---------------------+--------------------+ + +DROP FLOW calc_percentile_5s; + +Affected Rows: 0 + +DROP TABLE percentile_5s; + +Affected Rows: 0 + +DROP TABLE percentile_base; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/flow/flow_step_aggr.sql b/tests/cases/standalone/common/flow/flow_step_aggr.sql new file mode 100644 index 0000000000..44dde88912 --- /dev/null +++ b/tests/cases/standalone/common/flow/flow_step_aggr.sql @@ -0,0 +1,161 @@ +CREATE TABLE access_log ( + "url" STRING, + user_id BIGINT, + ts TIMESTAMP TIME INDEX, + PRIMARY KEY ("url", user_id) +); + +CREATE TABLE access_log_10s ( + "url" STRING, + time_window timestamp time INDEX, + state BINARY, + PRIMARY KEY ("url") +); + +CREATE FLOW calc_access_log_10s SINK TO access_log_10s +AS +SELECT + "url", + date_bin('10s'::INTERVAL, ts) AS time_window, + hll(user_id) AS state +FROM + access_log +GROUP BY + "url", + time_window; + +-- insert 4 rows of data +INSERT INTO access_log VALUES + ("/dashboard", 1, "2025-03-04 00:00:00"), + ("/dashboard", 1, "2025-03-04 00:00:01"), + ("/dashboard", 2, "2025-03-04 00:00:05"), + ("/not_found", 3, "2025-03-04 00:00:11"), + ("/dashboard", 4, "2025-03-04 00:00:15"); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('calc_access_log_10s'); + +-- query should return 3 rows +SELECT "url", time_window FROM access_log_10s +ORDER BY + time_window; + +-- use hll_count to query the approximate data in access_log_10s +SELECT "url", time_window, hll_count(state) FROM access_log_10s +ORDER BY + time_window; + +-- further, we can aggregate 10 seconds of data to every minute, by using hll_merge to merge 10 seconds of hyperloglog state +SELECT + "url", + date_bin('1 minute'::INTERVAL, time_window) AS time_window_1m, + hll_count(hll_merge(state)) as uv_per_min +FROM + access_log_10s +GROUP BY + "url", + time_window_1m +ORDER BY + time_window_1m; + +DROP FLOW calc_access_log_10s; +DROP TABLE access_log_10s; +DROP TABLE access_log; + +CREATE TABLE percentile_base ( + "id" INT PRIMARY KEY, + "value" DOUBLE, + ts timestamp(0) time index +); + +CREATE TABLE percentile_5s ( + "percentile_state" BINARY, + time_window timestamp(0) time index +); + +CREATE FLOW calc_percentile_5s SINK TO percentile_5s +AS +SELECT + uddsketch_state(128, 0.01, "value") AS "value", + date_bin('5 seconds'::INTERVAL, ts) AS time_window +FROM + percentile_base +WHERE + "value" > 0 AND "value" < 70 +GROUP BY + time_window; + +INSERT INTO percentile_base ("id", "value", ts) VALUES + (1, 10.0, 1), + (2, 20.0, 2), + (3, 30.0, 3), + (4, 40.0, 4), + (5, 50.0, 5), + (6, 60.0, 6), + (7, 70.0, 7), + (8, 80.0, 8), + (9, 90.0, 9), + (10, 100.0, 10); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('calc_percentile_5s'); + +SELECT + time_window, + uddsketch_calc(0.99, `percentile_state`) AS p99 +FROM + percentile_5s +ORDER BY + time_window; + +DROP FLOW calc_percentile_5s; +DROP TABLE percentile_5s; +DROP TABLE percentile_base; + +CREATE TABLE percentile_base ( + "id" INT PRIMARY KEY, + "value" DOUBLE, + ts timestamp(0) time index +); + +CREATE TABLE percentile_5s ( + "percentile_state" BINARY, + time_window timestamp(0) time index +); + +CREATE FLOW calc_percentile_5s SINK TO percentile_5s +AS +SELECT + uddsketch_state(128, 0.01, CASE WHEN "value" > 0 AND "value" < 70 THEN "value" ELSE NULL END) AS "value", + date_bin('5 seconds'::INTERVAL, ts) AS time_window +FROM + percentile_base +GROUP BY + time_window; + +INSERT INTO percentile_base ("id", "value", ts) VALUES + (1, 10.0, 1), + (2, 20.0, 2), + (3, 30.0, 3), + (4, 40.0, 4), + (5, 50.0, 5), + (6, 60.0, 6), + (7, 70.0, 7), + (8, 80.0, 8), + (9, 90.0, 9), + (10, 100.0, 10); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('calc_percentile_5s'); + +SELECT + time_window, + uddsketch_calc(0.99, percentile_state) AS p99 +FROM + percentile_5s +ORDER BY + time_window; + +DROP FLOW calc_percentile_5s; +DROP TABLE percentile_5s; +DROP TABLE percentile_base; diff --git a/tests/cases/standalone/common/promql/quantile.result b/tests/cases/standalone/common/promql/quantile.result index 8676bbbb77..c3aa1ef1ec 100644 --- a/tests/cases/standalone/common/promql/quantile.result +++ b/tests/cases/standalone/common/promql/quantile.result @@ -30,40 +30,40 @@ Affected Rows: 16 TQL EVAL (0, 15, '5s') quantile(0.5, test); -+---------------------+--------------------+ -| ts | quantile(test.val) | -+---------------------+--------------------+ -| 1970-01-01T00:00:00 | 2.5 | -| 1970-01-01T00:00:05 | 6.5 | -| 1970-01-01T00:00:10 | 10.5 | -| 1970-01-01T00:00:15 | 14.5 | -+---------------------+--------------------+ ++---------------------+---------------------------------+ +| ts | quantile(Float64(0.5),test.val) | ++---------------------+---------------------------------+ +| 1970-01-01T00:00:00 | 2.5 | +| 1970-01-01T00:00:05 | 6.5 | +| 1970-01-01T00:00:10 | 10.5 | +| 1970-01-01T00:00:15 | 14.5 | ++---------------------+---------------------------------+ TQL EVAL (0, 15, '5s') quantile(0.5, test) by (idc); -+------+---------------------+--------------------+ -| idc | ts | quantile(test.val) | -+------+---------------------+--------------------+ -| idc1 | 1970-01-01T00:00:00 | 1.5 | -| idc1 | 1970-01-01T00:00:05 | 5.5 | -| idc1 | 1970-01-01T00:00:10 | 9.5 | -| idc1 | 1970-01-01T00:00:15 | 13.5 | -| idc2 | 1970-01-01T00:00:00 | 3.5 | -| idc2 | 1970-01-01T00:00:05 | 7.5 | -| idc2 | 1970-01-01T00:00:10 | 11.5 | -| idc2 | 1970-01-01T00:00:15 | 15.5 | -+------+---------------------+--------------------+ ++------+---------------------+---------------------------------+ +| idc | ts | quantile(Float64(0.5),test.val) | ++------+---------------------+---------------------------------+ +| idc1 | 1970-01-01T00:00:00 | 1.5 | +| idc1 | 1970-01-01T00:00:05 | 5.5 | +| idc1 | 1970-01-01T00:00:10 | 9.5 | +| idc1 | 1970-01-01T00:00:15 | 13.5 | +| idc2 | 1970-01-01T00:00:00 | 3.5 | +| idc2 | 1970-01-01T00:00:05 | 7.5 | +| idc2 | 1970-01-01T00:00:10 | 11.5 | +| idc2 | 1970-01-01T00:00:15 | 15.5 | ++------+---------------------+---------------------------------+ TQL EVAL (0, 15, '5s') quantile(0.5, sum(test) by (idc)); -+---------------------+-------------------------+ -| ts | quantile(sum(test.val)) | -+---------------------+-------------------------+ -| 1970-01-01T00:00:00 | 5.0 | -| 1970-01-01T00:00:05 | 13.0 | -| 1970-01-01T00:00:10 | 21.0 | -| 1970-01-01T00:00:15 | 29.0 | -+---------------------+-------------------------+ ++---------------------+--------------------------------------+ +| ts | quantile(Float64(0.5),sum(test.val)) | ++---------------------+--------------------------------------+ +| 1970-01-01T00:00:00 | 5.0 | +| 1970-01-01T00:00:05 | 13.0 | +| 1970-01-01T00:00:10 | 21.0 | +| 1970-01-01T00:00:15 | 29.0 | ++---------------------+--------------------------------------+ DROP TABLE test; diff --git a/tests/cases/standalone/common/promql/round_fn.result b/tests/cases/standalone/common/promql/round_fn.result index fe12ca6f67..5fe7e2beb0 100644 --- a/tests/cases/standalone/common/promql/round_fn.result +++ b/tests/cases/standalone/common/promql/round_fn.result @@ -18,62 +18,62 @@ Affected Rows: 4 -- SQLNESS SORT_RESULT 3 1 tql eval (3, 4, '1s') round(cache_hit, 0.01); -+---------------------+----------------------------+-------+ -| ts | prom_round(greptime_value) | job | -+---------------------+----------------------------+-------+ -| 1970-01-01T00:00:03 | 123.45 | read | -| 1970-01-01T00:00:03 | 234.57 | write | -| 1970-01-01T00:00:04 | 345.68 | read | -| 1970-01-01T00:00:04 | 456.79 | write | -+---------------------+----------------------------+-------+ ++---------------------+------------------------------------------+-------+ +| ts | prom_round(greptime_value,Float64(0.01)) | job | ++---------------------+------------------------------------------+-------+ +| 1970-01-01T00:00:03 | 123.45 | read | +| 1970-01-01T00:00:03 | 234.57 | write | +| 1970-01-01T00:00:04 | 345.68 | read | +| 1970-01-01T00:00:04 | 456.79 | write | ++---------------------+------------------------------------------+-------+ -- SQLNESS SORT_RESULT 3 1 tql eval (3, 4, '1s') round(cache_hit, 0.1); -+---------------------+----------------------------+-------+ -| ts | prom_round(greptime_value) | job | -+---------------------+----------------------------+-------+ -| 1970-01-01T00:00:03 | 123.5 | read | -| 1970-01-01T00:00:03 | 234.60000000000002 | write | -| 1970-01-01T00:00:04 | 345.70000000000005 | read | -| 1970-01-01T00:00:04 | 456.8 | write | -+---------------------+----------------------------+-------+ ++---------------------+-----------------------------------------+-------+ +| ts | prom_round(greptime_value,Float64(0.1)) | job | ++---------------------+-----------------------------------------+-------+ +| 1970-01-01T00:00:03 | 123.5 | read | +| 1970-01-01T00:00:03 | 234.60000000000002 | write | +| 1970-01-01T00:00:04 | 345.70000000000005 | read | +| 1970-01-01T00:00:04 | 456.8 | write | ++---------------------+-----------------------------------------+-------+ -- SQLNESS SORT_RESULT 3 1 tql eval (3, 4, '1s') round(cache_hit, 1.0); -+---------------------+----------------------------+-------+ -| ts | prom_round(greptime_value) | job | -+---------------------+----------------------------+-------+ -| 1970-01-01T00:00:03 | 123.0 | read | -| 1970-01-01T00:00:03 | 235.0 | write | -| 1970-01-01T00:00:04 | 346.0 | read | -| 1970-01-01T00:00:04 | 457.0 | write | -+---------------------+----------------------------+-------+ ++---------------------+---------------------------------------+-------+ +| ts | prom_round(greptime_value,Float64(1)) | job | ++---------------------+---------------------------------------+-------+ +| 1970-01-01T00:00:03 | 123.0 | read | +| 1970-01-01T00:00:03 | 235.0 | write | +| 1970-01-01T00:00:04 | 346.0 | read | +| 1970-01-01T00:00:04 | 457.0 | write | ++---------------------+---------------------------------------+-------+ -- SQLNESS SORT_RESULT 3 1 tql eval (3, 4, '1s') round(cache_hit); -+---------------------+----------------------------+-------+ -| ts | prom_round(greptime_value) | job | -+---------------------+----------------------------+-------+ -| 1970-01-01T00:00:03 | 123.0 | read | -| 1970-01-01T00:00:03 | 235.0 | write | -| 1970-01-01T00:00:04 | 346.0 | read | -| 1970-01-01T00:00:04 | 457.0 | write | -+---------------------+----------------------------+-------+ ++---------------------+---------------------------------------+-------+ +| ts | prom_round(greptime_value,Float64(0)) | job | ++---------------------+---------------------------------------+-------+ +| 1970-01-01T00:00:03 | 123.0 | read | +| 1970-01-01T00:00:03 | 235.0 | write | +| 1970-01-01T00:00:04 | 346.0 | read | +| 1970-01-01T00:00:04 | 457.0 | write | ++---------------------+---------------------------------------+-------+ -- SQLNESS SORT_RESULT 3 1 tql eval (3, 4, '1s') round(cache_hit, 10.0); -+---------------------+----------------------------+-------+ -| ts | prom_round(greptime_value) | job | -+---------------------+----------------------------+-------+ -| 1970-01-01T00:00:03 | 120.0 | read | -| 1970-01-01T00:00:03 | 230.0 | write | -| 1970-01-01T00:00:04 | 350.0 | read | -| 1970-01-01T00:00:04 | 460.0 | write | -+---------------------+----------------------------+-------+ ++---------------------+----------------------------------------+-------+ +| ts | prom_round(greptime_value,Float64(10)) | job | ++---------------------+----------------------------------------+-------+ +| 1970-01-01T00:00:03 | 120.0 | read | +| 1970-01-01T00:00:03 | 230.0 | write | +| 1970-01-01T00:00:04 | 350.0 | read | +| 1970-01-01T00:00:04 | 460.0 | write | ++---------------------+----------------------------------------+-------+ drop table cache_hit; diff --git a/tests/cases/standalone/common/promql/simple_histogram.result b/tests/cases/standalone/common/promql/simple_histogram.result index 1b4e35e934..132b6d333d 100644 --- a/tests/cases/standalone/common/promql/simple_histogram.result +++ b/tests/cases/standalone/common/promql/simple_histogram.result @@ -130,8 +130,7 @@ tql eval (3000, 3000, '1s') label_replace(histogram_quantile(0.8, histogram_buck -- quantile with rate is covered in other cases tql eval (3000, 3000, '1s') histogram_quantile(0.2, rate(histogram_bucket[5m])); -++ -++ +Error: 3001(EngineExecuteQuery), Unsupported arrow data type, type: Dictionary(Int64, Float64) drop table histogram_bucket; @@ -228,27 +227,27 @@ tql eval (420, 420, '1s') histogram_quantile(0.833, histogram2_bucket); tql eval (2820, 2820, '1s') histogram_quantile(0.166, rate(histogram2_bucket[15m])); -+---------------------+----------------------------+ -| ts | prom_rate(ts_range,val,ts) | -+---------------------+----------------------------+ -| 1970-01-01T00:47:00 | 0.996 | -+---------------------+----------------------------+ ++---------------------+------------------------------------------+ +| ts | prom_rate(ts_range,val,ts,Int64(900000)) | ++---------------------+------------------------------------------+ +| 1970-01-01T00:47:00 | 0.996 | ++---------------------+------------------------------------------+ tql eval (2820, 2820, '1s') histogram_quantile(0.5, rate(histogram2_bucket[15m])); -+---------------------+----------------------------+ -| ts | prom_rate(ts_range,val,ts) | -+---------------------+----------------------------+ -| 1970-01-01T00:47:00 | 3.0 | -+---------------------+----------------------------+ ++---------------------+------------------------------------------+ +| ts | prom_rate(ts_range,val,ts,Int64(900000)) | ++---------------------+------------------------------------------+ +| 1970-01-01T00:47:00 | 3.0 | ++---------------------+------------------------------------------+ tql eval (2820, 2820, '1s') histogram_quantile(0.833, rate(histogram2_bucket[15m])); -+---------------------+----------------------------+ -| ts | prom_rate(ts_range,val,ts) | -+---------------------+----------------------------+ -| 1970-01-01T00:47:00 | 4.998 | -+---------------------+----------------------------+ ++---------------------+------------------------------------------+ +| ts | prom_rate(ts_range,val,ts,Int64(900000)) | ++---------------------+------------------------------------------+ +| 1970-01-01T00:47:00 | 4.998 | ++---------------------+------------------------------------------+ drop table histogram2_bucket; @@ -284,12 +283,12 @@ Affected Rows: 12 tql eval (3000, 3005, '3s') histogram_quantile(0.5, sum by(le, s) (rate(histogram3_bucket[5m]))); -+---+---------------------+---------------------------------+ -| s | ts | sum(prom_rate(ts_range,val,ts)) | -+---+---------------------+---------------------------------+ -| a | 1970-01-01T00:50:00 | 0.55 | -| a | 1970-01-01T00:50:03 | 0.5500000000000002 | -+---+---------------------+---------------------------------+ ++---+---------------------+-----------------------------------------------+ +| s | ts | sum(prom_rate(ts_range,val,ts,Int64(300000))) | ++---+---------------------+-----------------------------------------------+ +| a | 1970-01-01T00:50:00 | 0.55 | +| a | 1970-01-01T00:50:03 | 0.5500000000000002 | ++---+---------------------+-----------------------------------------------+ drop table histogram3_bucket; diff --git a/tests/cases/standalone/common/promql/subquery.result b/tests/cases/standalone/common/promql/subquery.result index d088468b17..12e65c4310 100644 --- a/tests/cases/standalone/common/promql/subquery.result +++ b/tests/cases/standalone/common/promql/subquery.result @@ -45,19 +45,19 @@ tql eval (359, 359, '1s') sum_over_time(metric_total[60s:10s]); tql eval (10, 10, '1s') rate(metric_total[20s:10s]); -+---------------------+----------------------------+ -| ts | prom_rate(ts_range,val,ts) | -+---------------------+----------------------------+ -| 1970-01-01T00:00:10 | 0.1 | -+---------------------+----------------------------+ ++---------------------+-----------------------------------------+ +| ts | prom_rate(ts_range,val,ts,Int64(20000)) | ++---------------------+-----------------------------------------+ +| 1970-01-01T00:00:10 | 0.1 | ++---------------------+-----------------------------------------+ tql eval (20, 20, '1s') rate(metric_total[20s:5s]); -+---------------------+----------------------------+ -| ts | prom_rate(ts_range,val,ts) | -+---------------------+----------------------------+ -| 1970-01-01T00:00:20 | 0.06666666666666667 | -+---------------------+----------------------------+ ++---------------------+-----------------------------------------+ +| ts | prom_rate(ts_range,val,ts,Int64(20000)) | ++---------------------+-----------------------------------------+ +| 1970-01-01T00:00:20 | 0.06666666666666667 | ++---------------------+-----------------------------------------+ drop table metric_total; diff --git a/tests/cases/standalone/common/range/error.result b/tests/cases/standalone/common/range/error.result index f7236d6096..e3f12646e7 100644 --- a/tests/cases/standalone/common/range/error.result +++ b/tests/cases/standalone/common/range/error.result @@ -54,7 +54,11 @@ Error: 2000(InvalidSyntax), Invalid SQL syntax: sql parser error: Can't use the -- 2.2 no align param SELECT min(val) RANGE '5s' FROM host; -Error: 3000(PlanQuery), Error during planning: Missing argument in range select query +Error: 2000(InvalidSyntax), Invalid SQL syntax: sql parser error: ALIGN argument cannot be omitted in the range select query + +SELECT min(val) RANGE '5s' FILL PREV FROM host; + +Error: 2000(InvalidSyntax), Invalid SQL syntax: sql parser error: ALIGN argument cannot be omitted in the range select query -- 2.3 type mismatch SELECT covar(ceil(val), floor(val)) RANGE '20s' FROM host ALIGN '10s'; diff --git a/tests/cases/standalone/common/range/error.sql b/tests/cases/standalone/common/range/error.sql index ba3d1f63e2..3659be1c79 100644 --- a/tests/cases/standalone/common/range/error.sql +++ b/tests/cases/standalone/common/range/error.sql @@ -40,6 +40,8 @@ SELECT 1 RANGE '10s' FILL NULL FROM host ALIGN '1h' FILL NULL; SELECT min(val) RANGE '5s' FROM host; +SELECT min(val) RANGE '5s' FILL PREV FROM host; + -- 2.3 type mismatch SELECT covar(ceil(val), floor(val)) RANGE '20s' FROM host ALIGN '10s'; diff --git a/tests/cases/standalone/common/select/tql_filter.result b/tests/cases/standalone/common/select/tql_filter.result index 5c10d77a52..0d0c09e27c 100644 --- a/tests/cases/standalone/common/select/tql_filter.result +++ b/tests/cases/standalone/common/select/tql_filter.result @@ -17,11 +17,14 @@ tql analyze (1, 3, '1s') t1{ a = "a" }; +-+-+-+ | stage | node | plan_| +-+-+-+ -| 0_| 0_|_PromInstantManipulateExec: range=[1000..3000], lookback=[300000], interval=[1000], time index=[b] REDACTED -|_|_|_PromSeriesDivideExec: tags=["a"] REDACTED -|_|_|_MergeScanExec: REDACTED +| 0_| 0_|_MergeScanExec: REDACTED |_|_|_| -| 1_| 0_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges), distribution=PerSeries REDACTED +| 1_| 0_|_PromInstantManipulateExec: range=[1000..3000], lookback=[300000], interval=[1000], time index=[b] REDACTED +|_|_|_PromSeriesDivideExec: tags=["a"] REDACTED +|_|_|_SortExec: expr=[a@0 ASC], preserve_partitioning=[true] REDACTED +|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED +|_|_|_RepartitionExec: partitioning=Hash([a@0], 32), input_partitions=1 REDACTED +|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges), distribution=PerSeries REDACTED |_|_|_| |_|_| Total rows: 3_| +-+-+-+ @@ -37,11 +40,14 @@ tql analyze (1, 3, '1s') t1{ a =~ ".*" }; +-+-+-+ | stage | node | plan_| +-+-+-+ -| 0_| 0_|_PromInstantManipulateExec: range=[1000..3000], lookback=[300000], interval=[1000], time index=[b] REDACTED -|_|_|_PromSeriesDivideExec: tags=["a"] REDACTED -|_|_|_MergeScanExec: REDACTED +| 0_| 0_|_MergeScanExec: REDACTED |_|_|_| -| 1_| 0_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges), distribution=PerSeries REDACTED +| 1_| 0_|_PromInstantManipulateExec: range=[1000..3000], lookback=[300000], interval=[1000], time index=[b] REDACTED +|_|_|_PromSeriesDivideExec: tags=["a"] REDACTED +|_|_|_SortExec: expr=[a@0 ASC], preserve_partitioning=[true] REDACTED +|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED +|_|_|_RepartitionExec: partitioning=Hash([a@0], 32), input_partitions=1 REDACTED +|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges), distribution=PerSeries REDACTED |_|_|_| |_|_| Total rows: 6_| +-+-+-+ @@ -57,11 +63,14 @@ tql analyze (1, 3, '1s') t1{ a =~ "a.*" }; +-+-+-+ | stage | node | plan_| +-+-+-+ -| 0_| 0_|_PromInstantManipulateExec: range=[1000..3000], lookback=[300000], interval=[1000], time index=[b] REDACTED -|_|_|_PromSeriesDivideExec: tags=["a"] REDACTED -|_|_|_MergeScanExec: REDACTED +| 0_| 0_|_MergeScanExec: REDACTED |_|_|_| -| 1_| 0_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges), distribution=PerSeries REDACTED +| 1_| 0_|_PromInstantManipulateExec: range=[1000..3000], lookback=[300000], interval=[1000], time index=[b] REDACTED +|_|_|_PromSeriesDivideExec: tags=["a"] REDACTED +|_|_|_SortExec: expr=[a@0 ASC], preserve_partitioning=[true] REDACTED +|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED +|_|_|_RepartitionExec: partitioning=Hash([a@0], 32), input_partitions=1 REDACTED +|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges), distribution=PerSeries REDACTED |_|_|_| |_|_| Total rows: 3_| +-+-+-+ diff --git a/tests/cases/standalone/common/system/pg_catalog.result b/tests/cases/standalone/common/system/pg_catalog.result index 092e9cab06..9e154b115c 100644 --- a/tests/cases/standalone/common/system/pg_catalog.result +++ b/tests/cases/standalone/common/system/pg_catalog.result @@ -3,7 +3,7 @@ create database pg_catalog; Error: 1004(InvalidArguments), Schema pg_catalog already exists --- session_user because session_user is based on the current user so is not null is for test +-- session_user because session_user is based on the current user so is not null is for test -- SQLNESS PROTOCOL POSTGRES SELECT session_user is not null; @@ -107,12 +107,13 @@ select * from pg_catalog.pg_type order by oid; +-----+-----------+--------+ -- SQLNESS PROTOCOL POSTGRES +-- SQLNESS REPLACE (\d+\s*) OID select * from pg_catalog.pg_database where datname = 'public'; +------------+---------+ | oid | datname | +------------+---------+ -| 3927743705 | public | +| OID| public | +------------+---------+ -- \d @@ -159,15 +160,16 @@ ORDER BY 1,2; -- make sure oid of namespace keep stable -- SQLNESS PROTOCOL POSTGRES -SELECT * FROM pg_namespace ORDER BY oid; +-- SQLNESS REPLACE (\d+\s*) OID +SELECT * FROM pg_namespace ORDER BY nspname; +------------+--------------------+ | oid | nspname | +------------+--------------------+ -| 667359454 | pg_catalog | -| 3174397350 | information_schema | -| 3338153620 | greptime_private | -| 3927743705 | public | +| OID| greptime_private | +| OID| information_schema | +| OID| pg_catalog | +| OID| public | +------------+--------------------+ -- SQLNESS PROTOCOL POSTGRES @@ -260,6 +262,7 @@ where relnamespace in ( +---------+ -- SQLNESS PROTOCOL POSTGRES +-- SQLNESS REPLACE (\d+\s*) OID select relnamespace, relname, relkind from pg_catalog.pg_class where relnamespace in ( @@ -274,7 +277,7 @@ order by relnamespace, relname; +--------------+---------+---------+ | relnamespace | relname | relkind | +--------------+---------+---------+ -| 434869349 | foo | r | +| OID| foo | r | +--------------+---------+---------+ -- SQLNESS PROTOCOL POSTGRES diff --git a/tests/cases/standalone/common/system/pg_catalog.sql b/tests/cases/standalone/common/system/pg_catalog.sql index 4a110a8f07..0b79c62afe 100644 --- a/tests/cases/standalone/common/system/pg_catalog.sql +++ b/tests/cases/standalone/common/system/pg_catalog.sql @@ -1,7 +1,7 @@ -- should not able to create pg_catalog create database pg_catalog; --- session_user because session_user is based on the current user so is not null is for test +-- session_user because session_user is based on the current user so is not null is for test -- SQLNESS PROTOCOL POSTGRES SELECT session_user is not null; @@ -34,6 +34,7 @@ select * from pg_catalog.pg_database; select * from pg_catalog.pg_type order by oid; -- SQLNESS PROTOCOL POSTGRES +-- SQLNESS REPLACE (\d+\s*) OID select * from pg_catalog.pg_database where datname = 'public'; -- \d @@ -68,7 +69,8 @@ ORDER BY 1,2; -- make sure oid of namespace keep stable -- SQLNESS PROTOCOL POSTGRES -SELECT * FROM pg_namespace ORDER BY oid; +-- SQLNESS REPLACE (\d+\s*) OID +SELECT * FROM pg_namespace ORDER BY nspname; -- SQLNESS PROTOCOL POSTGRES create database my_db; @@ -128,6 +130,7 @@ where relnamespace in ( ); -- SQLNESS PROTOCOL POSTGRES +-- SQLNESS REPLACE (\d+\s*) OID select relnamespace, relname, relkind from pg_catalog.pg_class where relnamespace in ( diff --git a/tests/cases/standalone/common/tql-explain-analyze/analyze.result b/tests/cases/standalone/common/tql-explain-analyze/analyze.result index af1a3d4fce..2f3b9474eb 100644 --- a/tests/cases/standalone/common/tql-explain-analyze/analyze.result +++ b/tests/cases/standalone/common/tql-explain-analyze/analyze.result @@ -19,11 +19,14 @@ TQL ANALYZE (0, 10, '5s') test; +-+-+-+ | stage | node | plan_| +-+-+-+ -| 0_| 0_|_PromInstantManipulateExec: range=[0..10000], lookback=[300000], interval=[5000], time index=[j] REDACTED -|_|_|_PromSeriesDivideExec: tags=["k"] REDACTED -|_|_|_MergeScanExec: REDACTED +| 0_| 0_|_MergeScanExec: REDACTED |_|_|_| -| 1_| 0_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges), distribution=PerSeries REDACTED +| 1_| 0_|_PromInstantManipulateExec: range=[0..10000], lookback=[300000], interval=[5000], time index=[j] REDACTED +|_|_|_PromSeriesDivideExec: tags=["k"] REDACTED +|_|_|_SortExec: expr=[k@2 ASC], preserve_partitioning=[true] REDACTED +|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED +|_|_|_RepartitionExec: partitioning=Hash([k@2], 32), input_partitions=1 REDACTED +|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges), distribution=PerSeries REDACTED |_|_|_| |_|_| Total rows: 4_| +-+-+-+ @@ -41,11 +44,14 @@ TQL ANALYZE (0, 10, '1s', '2s') test; +-+-+-+ | stage | node | plan_| +-+-+-+ -| 0_| 0_|_PromInstantManipulateExec: range=[0..10000], lookback=[2000], interval=[1000], time index=[j] REDACTED -|_|_|_PromSeriesDivideExec: tags=["k"] REDACTED -|_|_|_MergeScanExec: REDACTED +| 0_| 0_|_MergeScanExec: REDACTED |_|_|_| -| 1_| 0_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges), distribution=PerSeries REDACTED +| 1_| 0_|_PromInstantManipulateExec: range=[0..10000], lookback=[2000], interval=[1000], time index=[j] REDACTED +|_|_|_PromSeriesDivideExec: tags=["k"] REDACTED +|_|_|_SortExec: expr=[k@2 ASC], preserve_partitioning=[true] REDACTED +|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED +|_|_|_RepartitionExec: partitioning=Hash([k@2], 32), input_partitions=1 REDACTED +|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges), distribution=PerSeries REDACTED |_|_|_| |_|_| Total rows: 4_| +-+-+-+ @@ -62,11 +68,14 @@ TQL ANALYZE ('1970-01-01T00:00:00'::timestamp, '1970-01-01T00:00:00'::timestamp +-+-+-+ | stage | node | plan_| +-+-+-+ -| 0_| 0_|_PromInstantManipulateExec: range=[0..10000], lookback=[300000], interval=[5000], time index=[j] REDACTED -|_|_|_PromSeriesDivideExec: tags=["k"] REDACTED -|_|_|_MergeScanExec: REDACTED +| 0_| 0_|_MergeScanExec: REDACTED |_|_|_| -| 1_| 0_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges), distribution=PerSeries REDACTED +| 1_| 0_|_PromInstantManipulateExec: range=[0..10000], lookback=[300000], interval=[5000], time index=[j] REDACTED +|_|_|_PromSeriesDivideExec: tags=["k"] REDACTED +|_|_|_SortExec: expr=[k@2 ASC], preserve_partitioning=[true] REDACTED +|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED +|_|_|_RepartitionExec: partitioning=Hash([k@2], 32), input_partitions=1 REDACTED +|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges), distribution=PerSeries REDACTED |_|_|_| |_|_| Total rows: 4_| +-+-+-+ @@ -85,11 +94,14 @@ TQL ANALYZE VERBOSE (0, 10, '5s') test; +-+-+-+ | stage | node | plan_| +-+-+-+ -| 0_| 0_|_PromInstantManipulateExec: range=[0..10000], lookback=[300000], interval=[5000], time index=[j] REDACTED -|_|_|_PromSeriesDivideExec: tags=["k"] REDACTED -|_|_|_MergeScanExec: REDACTED +| 0_| 0_|_MergeScanExec: REDACTED |_|_|_| -| 1_| 0_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges), distribution=PerSeries, projection=["i", "j", "k"], filters=[j >= TimestampMillisecond(-300000, None), j <= TimestampMillisecond(310000, None)], REDACTED +| 1_| 0_|_PromInstantManipulateExec: range=[0..10000], lookback=[300000], interval=[5000], time index=[j] REDACTED +|_|_|_PromSeriesDivideExec: tags=["k"] REDACTED +|_|_|_SortExec: expr=[k@2 ASC], preserve_partitioning=[true] REDACTED +|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED +|_|_|_RepartitionExec: partitioning=Hash([k@2], 32), input_partitions=1 REDACTED +|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges), distribution=PerSeries, projection=["i", "j", "k"], filters=[j >= TimestampMillisecond(-300000, None), j <= TimestampMillisecond(310000, None)], REDACTED |_|_|_| |_|_| Total rows: 4_| +-+-+-+ @@ -114,13 +126,23 @@ TQL ANALYZE (0, 10, '5s') test; +-+-+-+ | stage | node | plan_| +-+-+-+ -| 0_| 0_|_PromInstantManipulateExec: range=[0..10000], lookback=[300000], interval=[5000], time index=[j] REDACTED -|_|_|_PromSeriesDivideExec: tags=["k", "l"] REDACTED +| 0_| 0_|_SortPreservingMergeExec: [k@2 ASC, l@3 ASC, j@1 ASC] REDACTED +|_|_|_SortExec: expr=[k@2 ASC, l@3 ASC, j@1 ASC], preserve_partitioning=[true] REDACTED |_|_|_MergeScanExec: REDACTED |_|_|_| -| 1_| 0_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file 0 ranges), distribution=PerSeries REDACTED +| 1_| 0_|_PromInstantManipulateExec: range=[0..10000], lookback=[300000], interval=[5000], time index=[j] REDACTED +|_|_|_PromSeriesDivideExec: tags=["k", "l"] REDACTED +|_|_|_SortExec: expr=[k@2 ASC, l@3 ASC], preserve_partitioning=[true] REDACTED +|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED +|_|_|_RepartitionExec: partitioning=Hash([k@2, l@3], 32), input_partitions=1 REDACTED +|_|_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file 0 ranges), distribution=PerSeries REDACTED |_|_|_| -| 1_| 1_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file 0 ranges), distribution=PerSeries REDACTED +| 1_| 1_|_PromInstantManipulateExec: range=[0..10000], lookback=[300000], interval=[5000], time index=[j] REDACTED +|_|_|_PromSeriesDivideExec: tags=["k", "l"] REDACTED +|_|_|_SortExec: expr=[k@2 ASC, l@3 ASC], preserve_partitioning=[true] REDACTED +|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED +|_|_|_RepartitionExec: partitioning=Hash([k@2, l@3], 32), input_partitions=1 REDACTED +|_|_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file 0 ranges), distribution=PerSeries REDACTED |_|_|_| |_|_| Total rows: 0_| +-+-+-+ @@ -137,16 +159,28 @@ TQL ANALYZE (0, 10, '5s') rate(test[10s]); | stage | node | plan_| +-+-+-+ | 0_| 0_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED -|_|_|_FilterExec: prom_rate(j_range,i,j)@1 IS NOT NULL REDACTED -|_|_|_ProjectionExec: expr=[j@1 as j, prom_rate(j_range@4, i@0, j@1) as prom_rate(j_range,i,j), k@2 as k, l@3 as l] REDACTED +|_|_|_FilterExec: prom_rate(j_range,i,j,Int64(10000))@1 IS NOT NULL REDACTED +|_|_|_ProjectionExec: expr=[j@1 as j, prom_rate(j_range@4, i@0, j@1, 10000) as prom_rate(j_range,i,j,Int64(10000)), k@2 as k, l@3 as l] REDACTED |_|_|_PromRangeManipulateExec: req range=[0..10000], interval=[5000], eval range=[10000], time index=[j] REDACTED |_|_|_PromSeriesNormalizeExec: offset=[0], time index=[j], filter NaN: [true] REDACTED |_|_|_PromSeriesDivideExec: tags=["k", "l"] REDACTED |_|_|_MergeScanExec: REDACTED |_|_|_| -| 1_| 0_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file 0 ranges), distribution=PerSeries REDACTED +| 1_| 0_|_PromRangeManipulateExec: req range=[0..10000], interval=[5000], eval range=[10000], time index=[j] REDACTED +|_|_|_PromSeriesNormalizeExec: offset=[0], time index=[j], filter NaN: [true] REDACTED +|_|_|_PromSeriesDivideExec: tags=["k", "l"] REDACTED +|_|_|_SortExec: expr=[k@2 ASC, l@3 ASC], preserve_partitioning=[true] REDACTED +|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED +|_|_|_RepartitionExec: partitioning=Hash([k@2, l@3], 32), input_partitions=1 REDACTED +|_|_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file 0 ranges), distribution=PerSeries REDACTED |_|_|_| -| 1_| 1_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file 0 ranges), distribution=PerSeries REDACTED +| 1_| 1_|_PromRangeManipulateExec: req range=[0..10000], interval=[5000], eval range=[10000], time index=[j] REDACTED +|_|_|_PromSeriesNormalizeExec: offset=[0], time index=[j], filter NaN: [true] REDACTED +|_|_|_PromSeriesDivideExec: tags=["k", "l"] REDACTED +|_|_|_SortExec: expr=[k@2 ASC, l@3 ASC], preserve_partitioning=[true] REDACTED +|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED +|_|_|_RepartitionExec: partitioning=Hash([k@2, l@3], 32), input_partitions=1 REDACTED +|_|_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file 0 ranges), distribution=PerSeries REDACTED |_|_|_| |_|_| Total rows: 0_| +-+-+-+ diff --git a/tests/cases/standalone/common/tql-explain-analyze/explain.result b/tests/cases/standalone/common/tql-explain-analyze/explain.result index 200ec5c814..e4465eed0d 100644 --- a/tests/cases/standalone/common/tql-explain-analyze/explain.result +++ b/tests/cases/standalone/common/tql-explain-analyze/explain.result @@ -12,18 +12,13 @@ Affected Rows: 3 -- SQLNESS REPLACE (peers.*) REDACTED TQL EXPLAIN (0, 10, '5s') test; -+---------------+-----------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-----------------------------------------------------------------------------------------------+ -| logical_plan | PromInstantManipulate: range=[0..0], lookback=[300000], interval=[300000], time index=[j] | -| | PromSeriesDivide: tags=["k"] | -| | Projection: test.i, test.j, test.k | -| | MergeScan [is_placeholder=false] | -| physical_plan | PromInstantManipulateExec: range=[0..0], lookback=[300000], interval=[300000], time index=[j] | -| | PromSeriesDivideExec: tags=["k"] | -| | MergeScanExec: REDACTED -| | | -+---------------+-----------------------------------------------------------------------------------------------+ ++---------------+-------------------------------------------------+ +| plan_type | plan | ++---------------+-------------------------------------------------+ +| logical_plan | MergeScan [is_placeholder=false] | +| physical_plan | MergeScanExec: REDACTED +| | | ++---------------+-------------------------------------------------+ -- 'lookback' parameter is not fully supported, the test has to be updated -- explain at 0s, 5s and 10s. No point at 0s. @@ -31,36 +26,26 @@ TQL EXPLAIN (0, 10, '5s') test; -- SQLNESS REPLACE (peers.*) REDACTED TQL EXPLAIN (0, 10, '1s', '2s') test; -+---------------+---------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+---------------------------------------------------------------------------------------------+ -| logical_plan | PromInstantManipulate: range=[0..0], lookback=[2000], interval=[300000], time index=[j] | -| | PromSeriesDivide: tags=["k"] | -| | Projection: test.i, test.j, test.k | -| | MergeScan [is_placeholder=false] | -| physical_plan | PromInstantManipulateExec: range=[0..0], lookback=[2000], interval=[300000], time index=[j] | -| | PromSeriesDivideExec: tags=["k"] | -| | MergeScanExec: REDACTED -| | | -+---------------+---------------------------------------------------------------------------------------------+ ++---------------+-------------------------------------------------+ +| plan_type | plan | ++---------------+-------------------------------------------------+ +| logical_plan | MergeScan [is_placeholder=false] | +| physical_plan | MergeScanExec: REDACTED +| | | ++---------------+-------------------------------------------------+ -- explain at 0s, 5s and 10s. No point at 0s. -- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED -- SQLNESS REPLACE (peers.*) REDACTED TQL EXPLAIN ('1970-01-01T00:00:00'::timestamp, '1970-01-01T00:00:00'::timestamp + '10 seconds'::interval, '5s') test; -+---------------+-----------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-----------------------------------------------------------------------------------------------+ -| logical_plan | PromInstantManipulate: range=[0..0], lookback=[300000], interval=[300000], time index=[j] | -| | PromSeriesDivide: tags=["k"] | -| | Projection: test.i, test.j, test.k | -| | MergeScan [is_placeholder=false] | -| physical_plan | PromInstantManipulateExec: range=[0..0], lookback=[300000], interval=[300000], time index=[j] | -| | PromSeriesDivideExec: tags=["k"] | -| | MergeScanExec: REDACTED -| | | -+---------------+-----------------------------------------------------------------------------------------------+ ++---------------+-------------------------------------------------+ +| plan_type | plan | ++---------------+-------------------------------------------------+ +| logical_plan | MergeScan [is_placeholder=false] | +| physical_plan | MergeScanExec: REDACTED +| | | ++---------------+-------------------------------------------------+ -- explain verbose at 0s, 5s and 10s. No point at 0s. -- SQLNESS REPLACE (-+) - @@ -85,9 +70,7 @@ TQL EXPLAIN VERBOSE (0, 10, '5s') test; | logical_plan after expand_wildcard_rule_| SAME TEXT AS ABOVE_| | logical_plan after resolve_grouping_function_| SAME TEXT AS ABOVE_| | logical_plan after type_coercion_| SAME TEXT AS ABOVE_| -| logical_plan after DistPlannerAnalyzer_| PromInstantManipulate: range=[0..0], lookback=[300000], interval=[300000], time index=[j]_| -|_|_PromSeriesDivide: tags=["k"]_| -|_|_Projection: test.i, test.j, test.k_| +| logical_plan after DistPlannerAnalyzer_| Projection: test.i, test.j, test.k_| |_|_MergeScan [is_placeholder=false]_| | analyzed_logical_plan_| SAME TEXT AS ABOVE_| | logical_plan after eliminate_nested_union_| SAME TEXT AS ABOVE_| @@ -114,37 +97,45 @@ TQL EXPLAIN VERBOSE (0, 10, '5s') test; | logical_plan after unwrap_cast_in_comparison_| SAME TEXT AS ABOVE_| | logical_plan after common_sub_expression_eliminate_| SAME TEXT AS ABOVE_| | logical_plan after eliminate_group_by_constant_| SAME TEXT AS ABOVE_| +| logical_plan after optimize_projections_| MergeScan [is_placeholder=false]_| +| logical_plan after ScanHintRule_| SAME TEXT AS ABOVE_| +| logical_plan after eliminate_nested_union_| SAME TEXT AS ABOVE_| +| logical_plan after simplify_expressions_| SAME TEXT AS ABOVE_| +| logical_plan after unwrap_cast_in_comparison_| SAME TEXT AS ABOVE_| +| logical_plan after replace_distinct_aggregate_| SAME TEXT AS ABOVE_| +| logical_plan after eliminate_join_| SAME TEXT AS ABOVE_| +| logical_plan after decorrelate_predicate_subquery_| SAME TEXT AS ABOVE_| +| logical_plan after scalar_subquery_to_join_| SAME TEXT AS ABOVE_| +| logical_plan after extract_equijoin_predicate_| SAME TEXT AS ABOVE_| +| logical_plan after eliminate_duplicated_expr_| SAME TEXT AS ABOVE_| +| logical_plan after eliminate_filter_| SAME TEXT AS ABOVE_| +| logical_plan after eliminate_cross_join_| SAME TEXT AS ABOVE_| +| logical_plan after common_sub_expression_eliminate_| SAME TEXT AS ABOVE_| +| logical_plan after eliminate_limit_| SAME TEXT AS ABOVE_| +| logical_plan after propagate_empty_relation_| SAME TEXT AS ABOVE_| +| logical_plan after eliminate_one_union_| SAME TEXT AS ABOVE_| +| logical_plan after filter_null_join_keys_| SAME TEXT AS ABOVE_| +| logical_plan after eliminate_outer_join_| SAME TEXT AS ABOVE_| +| logical_plan after push_down_limit_| SAME TEXT AS ABOVE_| +| logical_plan after push_down_filter_| SAME TEXT AS ABOVE_| +| logical_plan after single_distinct_aggregation_to_group_by | SAME TEXT AS ABOVE_| +| logical_plan after simplify_expressions_| SAME TEXT AS ABOVE_| +| logical_plan after unwrap_cast_in_comparison_| SAME TEXT AS ABOVE_| +| logical_plan after common_sub_expression_eliminate_| SAME TEXT AS ABOVE_| +| logical_plan after eliminate_group_by_constant_| SAME TEXT AS ABOVE_| | logical_plan after optimize_projections_| SAME TEXT AS ABOVE_| | logical_plan after ScanHintRule_| SAME TEXT AS ABOVE_| -| logical_plan_| PromInstantManipulate: range=[0..0], lookback=[300000], interval=[300000], time index=[j]_| -|_|_PromSeriesDivide: tags=["k"]_| -|_|_Projection: test.i, test.j, test.k_| -|_|_MergeScan [is_placeholder=false]_| -| initial_physical_plan_| PromInstantManipulateExec: range=[0..0], lookback=[300000], interval=[300000], time index=[j]_| -|_|_PromSeriesDivideExec: tags=["k"]_| -|_|_ProjectionExec: expr=[i@0 as i, j@1 as j, k@2 as k]_| -|_|_MergeScanExec: REDACTED +| logical_plan_| MergeScan [is_placeholder=false]_| +| initial_physical_plan_| MergeScanExec: REDACTED |_|_| -| initial_physical_plan_with_stats_| PromInstantManipulateExec: range=[0..0], lookback=[300000], interval=[300000], time index=[j], statistics=[Rows=Inexact(0), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:)]] | -|_|_PromSeriesDivideExec: tags=["k"], statistics=[Rows=Absent, Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:)]]_| -|_|_ProjectionExec: expr=[i@0 as i, j@1 as j, k@2 as k], statistics=[Rows=Absent, Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:)]]_| -|_|_MergeScanExec: REDACTED +| initial_physical_plan_with_stats_| MergeScanExec: REDACTED |_|_| -| initial_physical_plan_with_schema_| PromInstantManipulateExec: range=[0..0], lookback=[300000], interval=[300000], time index=[j], schema=[i:Float64;N, j:Timestamp(Millisecond, None), k:Utf8;N]_| -|_|_PromSeriesDivideExec: tags=["k"], schema=[i:Float64;N, j:Timestamp(Millisecond, None), k:Utf8;N]_| -|_|_ProjectionExec: expr=[i@0 as i, j@1 as j, k@2 as k], schema=[i:Float64;N, j:Timestamp(Millisecond, None), k:Utf8;N]_| -|_|_MergeScanExec: REDACTED +| initial_physical_plan_with_schema_| MergeScanExec: REDACTED |_|_| -| physical_plan after parallelize_scan_| PromInstantManipulateExec: range=[0..0], lookback=[300000], interval=[300000], time index=[j]_| -|_|_PromSeriesDivideExec: tags=["k"]_| -|_|_ProjectionExec: expr=[i@0 as i, j@1 as j, k@2 as k]_| -|_|_MergeScanExec: REDACTED +| physical_plan after parallelize_scan_| MergeScanExec: REDACTED |_|_| | physical_plan after PassDistributionRule_| SAME TEXT AS ABOVE_| | physical_plan after OutputRequirements_| OutputRequirementExec_| -|_|_PromInstantManipulateExec: range=[0..0], lookback=[300000], interval=[300000], time index=[j]_| -|_|_PromSeriesDivideExec: tags=["k"]_| -|_|_ProjectionExec: expr=[i@0 as i, j@1 as j, k@2 as k]_| |_|_MergeScanExec: REDACTED |_|_| | physical_plan after aggregate_statistics_| SAME TEXT AS ABOVE_| @@ -154,15 +145,9 @@ TQL EXPLAIN VERBOSE (0, 10, '5s') test; | physical_plan after CombinePartialFinalAggregate_| SAME TEXT AS ABOVE_| | physical_plan after EnforceSorting_| SAME TEXT AS ABOVE_| | physical_plan after OptimizeAggregateOrder_| SAME TEXT AS ABOVE_| -| physical_plan after ProjectionPushdown_| OutputRequirementExec_| -|_|_PromInstantManipulateExec: range=[0..0], lookback=[300000], interval=[300000], time index=[j]_| -|_|_PromSeriesDivideExec: tags=["k"]_| -|_|_MergeScanExec: REDACTED -|_|_| +| physical_plan after ProjectionPushdown_| SAME TEXT AS ABOVE_| | physical_plan after coalesce_batches_| SAME TEXT AS ABOVE_| -| physical_plan after OutputRequirements_| PromInstantManipulateExec: range=[0..0], lookback=[300000], interval=[300000], time index=[j]_| -|_|_PromSeriesDivideExec: tags=["k"]_| -|_|_MergeScanExec: REDACTED +| physical_plan after OutputRequirements_| MergeScanExec: REDACTED |_|_| | physical_plan after LimitAggregation_| SAME TEXT AS ABOVE_| | physical_plan after ProjectionPushdown_| SAME TEXT AS ABOVE_| @@ -171,17 +156,11 @@ TQL EXPLAIN VERBOSE (0, 10, '5s') test; | physical_plan after MatchesConstantTerm_| SAME TEXT AS ABOVE_| | physical_plan after RemoveDuplicateRule_| SAME TEXT AS ABOVE_| | physical_plan after SanityCheckPlan_| SAME TEXT AS ABOVE_| -| physical_plan_| PromInstantManipulateExec: range=[0..0], lookback=[300000], interval=[300000], time index=[j]_| -|_|_PromSeriesDivideExec: tags=["k"]_| -|_|_MergeScanExec: REDACTED +| physical_plan_| MergeScanExec: REDACTED |_|_| -| physical_plan_with_stats_| PromInstantManipulateExec: range=[0..0], lookback=[300000], interval=[300000], time index=[j], statistics=[Rows=Inexact(0), Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:)]] | -|_|_PromSeriesDivideExec: tags=["k"], statistics=[Rows=Absent, Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:)]]_| -|_|_MergeScanExec: REDACTED +| physical_plan_with_stats_| MergeScanExec: REDACTED |_|_| -| physical_plan_with_schema_| PromInstantManipulateExec: range=[0..0], lookback=[300000], interval=[300000], time index=[j], schema=[i:Float64;N, j:Timestamp(Millisecond, None), k:Utf8;N]_| -|_|_PromSeriesDivideExec: tags=["k"], schema=[i:Float64;N, j:Timestamp(Millisecond, None), k:Utf8;N]_| -|_|_MergeScanExec: REDACTED +| physical_plan_with_schema_| MergeScanExec: REDACTED |_|_| +-+-+ diff --git a/tests/cases/standalone/common/tql/partition.result b/tests/cases/standalone/common/tql/partition.result new file mode 100644 index 0000000000..ebaf11129f --- /dev/null +++ b/tests/cases/standalone/common/tql/partition.result @@ -0,0 +1,164 @@ +-- no partition +create table t ( + i double, + j timestamp time index, + k string primary key +); + +Affected Rows: 0 + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +tql analyze (0, 10, '1s') 100 - (avg by (k) (irate(t[1m])) * 100); + ++-+-+-+ +| stage | node | plan_| ++-+-+-+ +| 0_| 0_|_MergeScanExec: REDACTED +|_|_|_| +| 1_| 0_|_ProjectionExec: expr=[k@0 as k, j@1 as j, 100 - avg(prom_irate(j_range,i))@2 * 100 as Float64(100) - avg(prom_irate(j_range,i)) * Float64(100)] REDACTED +|_|_|_RepartitionExec: partitioning=REDACTED +|_|_|_SortPreservingMergeExec: [k@0 ASC NULLS LAST, j@1 ASC NULLS LAST] REDACTED +|_|_|_SortExec: expr=[k@0 ASC NULLS LAST, j@1 ASC NULLS LAST], preserve_partitioning=[true] REDACTED +|_|_|_AggregateExec: mode=FinalPartitioned, gby=[k@0 as k, j@1 as j], aggr=[avg(prom_irate(j_range,i))] REDACTED +|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED +|_|_|_RepartitionExec: partitioning=Hash([k@0, j@1], 32), input_partitions=32 REDACTED +|_|_|_AggregateExec: mode=Partial, gby=[k@2 as k, j@0 as j], aggr=[avg(prom_irate(j_range,i))] REDACTED +|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED +|_|_|_FilterExec: prom_irate(j_range,i)@1 IS NOT NULL REDACTED +|_|_|_ProjectionExec: expr=[j@1 as j, prom_irate(j_range@3, i@0) as prom_irate(j_range,i), k@2 as k] REDACTED +|_|_|_PromRangeManipulateExec: req range=[0..10000], interval=[1000], eval range=[60000], time index=[j] REDACTED +|_|_|_PromSeriesNormalizeExec: offset=[0], time index=[j], filter NaN: [true] REDACTED +|_|_|_PromSeriesDivideExec: tags=["k"] REDACTED +|_|_|_SortExec: expr=[k@2 ASC], preserve_partitioning=[true] REDACTED +|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED +|_|_|_RepartitionExec: partitioning=Hash([k@2], 32), input_partitions=1 REDACTED +|_|_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file 0 ranges), distribution=PerSeries REDACTED +|_|_|_| +|_|_| Total rows: 0_| ++-+-+-+ + +drop table t; + +Affected Rows: 0 + +-- partition on tag +create table t ( + i double, + j timestamp time index, + k string, + l string, + primary key (k, l) +) partition on columns (k, l) (k < 'a', k >= 'a'); + +Affected Rows: 0 + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +tql analyze (0, 10, '1s') 100 - (avg by (k) (irate(t[1m])) * 100); + ++-+-+-+ +| stage | node | plan_| ++-+-+-+ +| 0_| 0_|_ProjectionExec: expr=[k@0 as k, j@1 as j, 100 - avg(prom_irate(j_range,i))@2 * 100 as Float64(100) - avg(prom_irate(j_range,i)) * Float64(100)] REDACTED +|_|_|_RepartitionExec: partitioning=REDACTED +|_|_|_SortPreservingMergeExec: [k@0 ASC NULLS LAST, j@1 ASC NULLS LAST] REDACTED +|_|_|_SortExec: expr=[k@0 ASC NULLS LAST, j@1 ASC NULLS LAST], preserve_partitioning=[true] REDACTED +|_|_|_AggregateExec: mode=FinalPartitioned, gby=[k@0 as k, j@1 as j], aggr=[avg(prom_irate(j_range,i))], ordering_mode=PartiallySorted([0]) REDACTED +|_|_|_SortExec: expr=[k@0 ASC NULLS LAST], preserve_partitioning=[true] REDACTED +|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED +|_|_|_RepartitionExec: partitioning=Hash([k@0, j@1], 32), input_partitions=32 REDACTED +|_|_|_AggregateExec: mode=Partial, gby=[k@2 as k, j@0 as j], aggr=[avg(prom_irate(j_range,i))], ordering_mode=PartiallySorted([0]) REDACTED +|_|_|_ProjectionExec: expr=[j@0 as j, prom_irate(j_range,i)@1 as prom_irate(j_range,i), k@2 as k] REDACTED +|_|_|_SortExec: expr=[k@2 ASC, l@3 ASC, j@0 ASC], preserve_partitioning=[true] REDACTED +|_|_|_MergeScanExec: REDACTED +|_|_|_| +| 1_| 0_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED +|_|_|_FilterExec: prom_irate(j_range,i)@1 IS NOT NULL REDACTED +|_|_|_ProjectionExec: expr=[j@1 as j, prom_irate(j_range@4, i@0) as prom_irate(j_range,i), k@2 as k, l@3 as l] REDACTED +|_|_|_PromRangeManipulateExec: req range=[0..10000], interval=[1000], eval range=[60000], time index=[j] REDACTED +|_|_|_PromSeriesNormalizeExec: offset=[0], time index=[j], filter NaN: [true] REDACTED +|_|_|_PromSeriesDivideExec: tags=["k", "l"] REDACTED +|_|_|_SortExec: expr=[k@2 ASC, l@3 ASC], preserve_partitioning=[true] REDACTED +|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED +|_|_|_RepartitionExec: partitioning=Hash([k@2, l@3], 32), input_partitions=1 REDACTED +|_|_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file 0 ranges), distribution=PerSeries REDACTED +|_|_|_| +| 1_| 1_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED +|_|_|_FilterExec: prom_irate(j_range,i)@1 IS NOT NULL REDACTED +|_|_|_ProjectionExec: expr=[j@1 as j, prom_irate(j_range@4, i@0) as prom_irate(j_range,i), k@2 as k, l@3 as l] REDACTED +|_|_|_PromRangeManipulateExec: req range=[0..10000], interval=[1000], eval range=[60000], time index=[j] REDACTED +|_|_|_PromSeriesNormalizeExec: offset=[0], time index=[j], filter NaN: [true] REDACTED +|_|_|_PromSeriesDivideExec: tags=["k", "l"] REDACTED +|_|_|_SortExec: expr=[k@2 ASC, l@3 ASC], preserve_partitioning=[true] REDACTED +|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED +|_|_|_RepartitionExec: partitioning=Hash([k@2, l@3], 32), input_partitions=1 REDACTED +|_|_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file 0 ranges), distribution=PerSeries REDACTED +|_|_|_| +|_|_| Total rows: 0_| ++-+-+-+ + +drop table t; + +Affected Rows: 0 + +-- partition on value +create table t ( + i double, + j timestamp time index, + k string, + l string, + primary key (k, l) +) partition on columns (i) (i < 1.0, i >= 1.0); + +Affected Rows: 0 + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +tql analyze (0, 10, '1s') 100 - (avg by (k) (irate(t[1m])) * 100); + ++-+-+-+ +| stage | node | plan_| ++-+-+-+ +| 0_| 0_|_ProjectionExec: expr=[k@0 as k, j@1 as j, 100 - avg(prom_irate(j_range,i))@2 * 100 as Float64(100) - avg(prom_irate(j_range,i)) * Float64(100)] REDACTED +|_|_|_RepartitionExec: partitioning=REDACTED +|_|_|_SortPreservingMergeExec: [k@0 ASC NULLS LAST, j@1 ASC NULLS LAST] REDACTED +|_|_|_SortExec: expr=[k@0 ASC NULLS LAST, j@1 ASC NULLS LAST], preserve_partitioning=[true] REDACTED +|_|_|_AggregateExec: mode=FinalPartitioned, gby=[k@0 as k, j@1 as j], aggr=[avg(prom_irate(j_range,i))] REDACTED +|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED +|_|_|_RepartitionExec: partitioning=Hash([k@0, j@1], 32), input_partitions=32 REDACTED +|_|_|_AggregateExec: mode=Partial, gby=[k@2 as k, j@0 as j], aggr=[avg(prom_irate(j_range,i))] REDACTED +|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED +|_|_|_FilterExec: prom_irate(j_range,i)@1 IS NOT NULL REDACTED +|_|_|_ProjectionExec: expr=[j@1 as j, prom_irate(j_range@4, i@0) as prom_irate(j_range,i), k@2 as k] REDACTED +|_|_|_PromRangeManipulateExec: req range=[0..10000], interval=[1000], eval range=[60000], time index=[j] REDACTED +|_|_|_PromSeriesNormalizeExec: offset=[0], time index=[j], filter NaN: [true] REDACTED +|_|_|_PromSeriesDivideExec: tags=["k", "l"] REDACTED +|_|_|_SortExec: expr=[k@2 ASC, l@3 ASC], preserve_partitioning=[true] REDACTED +|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED +|_|_|_RepartitionExec: partitioning=Hash([k@2, l@3], 32), input_partitions=32 REDACTED +|_|_|_MergeScanExec: REDACTED +|_|_|_| +| 1_| 0_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file 0 ranges), distribution=PerSeries REDACTED +|_|_|_| +| 1_| 1_|_SeqScan: region=REDACTED, partition_count=0 (0 memtable ranges, 0 file 0 ranges), distribution=PerSeries REDACTED +|_|_|_| +|_|_| Total rows: 0_| ++-+-+-+ + +drop table t; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/tql/partition.sql b/tests/cases/standalone/common/tql/partition.sql new file mode 100644 index 0000000000..442c0a34c8 --- /dev/null +++ b/tests/cases/standalone/common/tql/partition.sql @@ -0,0 +1,54 @@ +-- no partition +create table t ( + i double, + j timestamp time index, + k string primary key +); + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +tql analyze (0, 10, '1s') 100 - (avg by (k) (irate(t[1m])) * 100); + +drop table t; + +-- partition on tag +create table t ( + i double, + j timestamp time index, + k string, + l string, + primary key (k, l) +) partition on columns (k, l) (k < 'a', k >= 'a'); + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +tql analyze (0, 10, '1s') 100 - (avg by (k) (irate(t[1m])) * 100); + +drop table t; + +-- partition on value +create table t ( + i double, + j timestamp time index, + k string, + l string, + primary key (k, l) +) partition on columns (i) (i < 1.0, i >= 1.0); + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +tql analyze (0, 10, '1s') 100 - (avg by (k) (irate(t[1m])) * 100); + +drop table t;