Merge branch 'prom-plan-commutativity' into jkt

This commit is contained in:
Ruihang Xia
2025-04-27 19:23:42 +08:00
106 changed files with 2028 additions and 820 deletions

View File

@@ -1,15 +0,0 @@
# yaml-language-server: $schema=https://coderabbit.ai/integrations/schema.v2.json
language: "en-US"
early_access: false
reviews:
profile: "chill"
request_changes_workflow: false
high_level_summary: true
poem: true
review_status: true
collapse_walkthrough: false
auto_review:
enabled: false
drafts: false
chat:
auto_reply: true

View File

@@ -10,17 +10,17 @@ set -e
function create_version() {
# Read from envrionment variables.
if [ -z "$GITHUB_EVENT_NAME" ]; then
echo "GITHUB_EVENT_NAME is empty"
echo "GITHUB_EVENT_NAME is empty" >&2
exit 1
fi
if [ -z "$NEXT_RELEASE_VERSION" ]; then
echo "NEXT_RELEASE_VERSION is empty"
exit 1
echo "NEXT_RELEASE_VERSION is empty, use version from Cargo.toml" >&2
export NEXT_RELEASE_VERSION=$(grep '^version = ' Cargo.toml | cut -d '"' -f 2 | head -n 1)
fi
if [ -z "$NIGHTLY_RELEASE_PREFIX" ]; then
echo "NIGHTLY_RELEASE_PREFIX is empty"
echo "NIGHTLY_RELEASE_PREFIX is empty" >&2
exit 1
fi
@@ -35,7 +35,7 @@ function create_version() {
# It will be like 'dev-2023080819-f0e7216c'.
if [ "$NEXT_RELEASE_VERSION" = dev ]; then
if [ -z "$COMMIT_SHA" ]; then
echo "COMMIT_SHA is empty in dev build"
echo "COMMIT_SHA is empty in dev build" >&2
exit 1
fi
echo "dev-$(date "+%Y%m%d-%s")-$(echo "$COMMIT_SHA" | cut -c1-8)"
@@ -45,7 +45,7 @@ function create_version() {
# Note: Only output 'version=xxx' to stdout when everything is ok, so that it can be used in GitHub Actions Outputs.
if [ "$GITHUB_EVENT_NAME" = push ]; then
if [ -z "$GITHUB_REF_NAME" ]; then
echo "GITHUB_REF_NAME is empty in push event"
echo "GITHUB_REF_NAME is empty in push event" >&2
exit 1
fi
echo "$GITHUB_REF_NAME"
@@ -54,7 +54,7 @@ function create_version() {
elif [ "$GITHUB_EVENT_NAME" = schedule ]; then
echo "$NEXT_RELEASE_VERSION-$NIGHTLY_RELEASE_PREFIX-$(date "+%Y%m%d")"
else
echo "Unsupported GITHUB_EVENT_NAME: $GITHUB_EVENT_NAME"
echo "Unsupported GITHUB_EVENT_NAME: $GITHUB_EVENT_NAME" >&2
exit 1
fi
}

View File

@@ -90,8 +90,6 @@ env:
# The scheduled version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-YYYYMMDD', like v0.2.0-nigthly-20230313;
NIGHTLY_RELEASE_PREFIX: nightly
# Note: The NEXT_RELEASE_VERSION should be modified manually by every formal release.
NEXT_RELEASE_VERSION: v0.14.0
jobs:
allocate-runners:
@@ -135,7 +133,6 @@ jobs:
env:
GITHUB_EVENT_NAME: ${{ github.event_name }}
GITHUB_REF_NAME: ${{ github.ref_name }}
NEXT_RELEASE_VERSION: ${{ env.NEXT_RELEASE_VERSION }}
NIGHTLY_RELEASE_PREFIX: ${{ env.NIGHTLY_RELEASE_PREFIX }}
- name: Allocate linux-amd64 runner

261
Cargo.lock generated
View File

@@ -173,9 +173,9 @@ dependencies = [
[[package]]
name = "anyhow"
version = "1.0.89"
version = "1.0.98"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6"
checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
[[package]]
name = "anymap2"
@@ -185,7 +185,7 @@ checksum = "d301b3b94cb4b2f23d7917810addbbaff90738e0ca2be692bd027e70d7e0330c"
[[package]]
name = "api"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"common-base",
"common-decimal",
@@ -915,7 +915,7 @@ dependencies = [
[[package]]
name = "auth"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"api",
"async-trait",
@@ -1537,7 +1537,7 @@ dependencies = [
[[package]]
name = "cache"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"catalog",
"common-error",
@@ -1561,7 +1561,7 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]]
name = "catalog"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"api",
"arrow 54.2.1",
@@ -1597,7 +1597,7 @@ dependencies = [
"partition",
"paste",
"prometheus",
"rustc-hash 2.0.0",
"rustc-hash 2.1.1",
"serde_json",
"session",
"snafu 0.8.5",
@@ -1619,9 +1619,9 @@ dependencies = [
[[package]]
name = "cc"
version = "1.1.24"
version = "1.2.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "812acba72f0a070b003d3697490d2b55b837230ae7c6c6497f05cc2ddbb8d938"
checksum = "04da6a0d40b948dfc4fa8f5bbf402b0fc1a64a28dbf7d12ffd683550f2c1b63a"
dependencies = [
"jobserver",
"libc",
@@ -1874,7 +1874,7 @@ checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97"
[[package]]
name = "cli"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"async-trait",
"auth",
@@ -1917,7 +1917,7 @@ dependencies = [
"session",
"snafu 0.8.5",
"store-api",
"substrait 0.14.0",
"substrait 0.15.0",
"table",
"tempfile",
"tokio",
@@ -1926,7 +1926,7 @@ dependencies = [
[[package]]
name = "client"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"api",
"arc-swap",
@@ -1955,7 +1955,7 @@ dependencies = [
"rand 0.9.0",
"serde_json",
"snafu 0.8.5",
"substrait 0.14.0",
"substrait 0.15.0",
"substrait 0.37.3",
"tokio",
"tokio-stream",
@@ -1996,7 +1996,7 @@ dependencies = [
[[package]]
name = "cmd"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"async-trait",
"auth",
@@ -2056,7 +2056,7 @@ dependencies = [
"similar-asserts",
"snafu 0.8.5",
"store-api",
"substrait 0.14.0",
"substrait 0.15.0",
"table",
"temp-env",
"tempfile",
@@ -2102,7 +2102,7 @@ checksum = "55b672471b4e9f9e95499ea597ff64941a309b2cdbffcc46f2cc5e2d971fd335"
[[package]]
name = "common-base"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"anymap2",
"async-trait",
@@ -2124,11 +2124,11 @@ dependencies = [
[[package]]
name = "common-catalog"
version = "0.14.0"
version = "0.15.0"
[[package]]
name = "common-config"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"common-base",
"common-error",
@@ -2153,7 +2153,7 @@ dependencies = [
[[package]]
name = "common-datasource"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"arrow 54.2.1",
"arrow-schema 54.3.1",
@@ -2190,7 +2190,7 @@ dependencies = [
[[package]]
name = "common-decimal"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"bigdecimal 0.4.8",
"common-error",
@@ -2203,7 +2203,7 @@ dependencies = [
[[package]]
name = "common-error"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"common-macro",
"http 1.1.0",
@@ -2214,7 +2214,7 @@ dependencies = [
[[package]]
name = "common-frontend"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"async-trait",
"common-error",
@@ -2224,7 +2224,7 @@ dependencies = [
[[package]]
name = "common-function"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"ahash 0.8.11",
"api",
@@ -2277,7 +2277,7 @@ dependencies = [
[[package]]
name = "common-greptimedb-telemetry"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"async-trait",
"common-runtime",
@@ -2294,7 +2294,7 @@ dependencies = [
[[package]]
name = "common-grpc"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"api",
"arrow-flight",
@@ -2325,7 +2325,7 @@ dependencies = [
[[package]]
name = "common-grpc-expr"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"api",
"common-base",
@@ -2344,7 +2344,7 @@ dependencies = [
[[package]]
name = "common-macro"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"arc-swap",
"common-query",
@@ -2358,7 +2358,7 @@ dependencies = [
[[package]]
name = "common-mem-prof"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"common-error",
"common-macro",
@@ -2371,7 +2371,7 @@ dependencies = [
[[package]]
name = "common-meta"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"anymap2",
"api",
@@ -2432,7 +2432,7 @@ dependencies = [
[[package]]
name = "common-options"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"common-grpc",
"humantime-serde",
@@ -2441,11 +2441,11 @@ dependencies = [
[[package]]
name = "common-plugins"
version = "0.14.0"
version = "0.15.0"
[[package]]
name = "common-pprof"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"common-error",
"common-macro",
@@ -2457,7 +2457,7 @@ dependencies = [
[[package]]
name = "common-procedure"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"async-stream",
"async-trait",
@@ -2484,7 +2484,7 @@ dependencies = [
[[package]]
name = "common-procedure-test"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"async-trait",
"common-procedure",
@@ -2493,7 +2493,7 @@ dependencies = [
[[package]]
name = "common-query"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"api",
"async-trait",
@@ -2510,7 +2510,7 @@ dependencies = [
"futures-util",
"serde",
"snafu 0.8.5",
"sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e98e6b322426a9d397a71efef17075966223c089)",
"sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=0cf6c04490d59435ee965edd2078e8855bd8471e)",
"sqlparser_derive 0.1.1",
"statrs",
"store-api",
@@ -2519,7 +2519,7 @@ dependencies = [
[[package]]
name = "common-recordbatch"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"arc-swap",
"common-error",
@@ -2539,7 +2539,7 @@ dependencies = [
[[package]]
name = "common-runtime"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"async-trait",
"clap 4.5.19",
@@ -2569,14 +2569,14 @@ dependencies = [
[[package]]
name = "common-session"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"strum 0.27.1",
]
[[package]]
name = "common-telemetry"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"atty",
"backtrace",
@@ -2604,7 +2604,7 @@ dependencies = [
[[package]]
name = "common-test-util"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"client",
"common-query",
@@ -2616,7 +2616,7 @@ dependencies = [
[[package]]
name = "common-time"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"arrow 54.2.1",
"chrono",
@@ -2634,7 +2634,7 @@ dependencies = [
[[package]]
name = "common-version"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"build-data",
"const_format",
@@ -2644,7 +2644,7 @@ dependencies = [
[[package]]
name = "common-wal"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"common-base",
"common-error",
@@ -2946,9 +2946,9 @@ dependencies = [
[[package]]
name = "crossbeam-channel"
version = "0.5.13"
version = "0.5.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2"
checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2"
dependencies = [
"crossbeam-utils",
]
@@ -3110,14 +3110,14 @@ dependencies = [
[[package]]
name = "data-encoding"
version = "2.6.0"
version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8566979429cf69b49a5c740c60791108e86440e8be149bbea4fe54d2c32d6e2"
checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476"
[[package]]
name = "datafusion"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4"
dependencies = [
"arrow 54.2.1",
"arrow-array 54.2.1",
@@ -3168,7 +3168,7 @@ dependencies = [
[[package]]
name = "datafusion-catalog"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4"
dependencies = [
"arrow 54.2.1",
"async-trait",
@@ -3188,7 +3188,7 @@ dependencies = [
[[package]]
name = "datafusion-catalog-listing"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4"
dependencies = [
"arrow 54.2.1",
"arrow-schema 54.3.1",
@@ -3211,7 +3211,7 @@ dependencies = [
[[package]]
name = "datafusion-common"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4"
dependencies = [
"ahash 0.8.11",
"arrow 54.2.1",
@@ -3236,7 +3236,7 @@ dependencies = [
[[package]]
name = "datafusion-common-runtime"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4"
dependencies = [
"log",
"tokio",
@@ -3245,12 +3245,12 @@ dependencies = [
[[package]]
name = "datafusion-doc"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4"
[[package]]
name = "datafusion-execution"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4"
dependencies = [
"arrow 54.2.1",
"dashmap",
@@ -3268,7 +3268,7 @@ dependencies = [
[[package]]
name = "datafusion-expr"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4"
dependencies = [
"arrow 54.2.1",
"chrono",
@@ -3288,7 +3288,7 @@ dependencies = [
[[package]]
name = "datafusion-expr-common"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4"
dependencies = [
"arrow 54.2.1",
"datafusion-common",
@@ -3299,7 +3299,7 @@ dependencies = [
[[package]]
name = "datafusion-functions"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4"
dependencies = [
"arrow 54.2.1",
"arrow-buffer 54.3.1",
@@ -3328,7 +3328,7 @@ dependencies = [
[[package]]
name = "datafusion-functions-aggregate"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4"
dependencies = [
"ahash 0.8.11",
"arrow 54.2.1",
@@ -3349,7 +3349,7 @@ dependencies = [
[[package]]
name = "datafusion-functions-aggregate-common"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4"
dependencies = [
"ahash 0.8.11",
"arrow 54.2.1",
@@ -3361,7 +3361,7 @@ dependencies = [
[[package]]
name = "datafusion-functions-nested"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4"
dependencies = [
"arrow 54.2.1",
"arrow-array 54.2.1",
@@ -3383,7 +3383,7 @@ dependencies = [
[[package]]
name = "datafusion-functions-table"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4"
dependencies = [
"arrow 54.2.1",
"async-trait",
@@ -3398,7 +3398,7 @@ dependencies = [
[[package]]
name = "datafusion-functions-window"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4"
dependencies = [
"datafusion-common",
"datafusion-doc",
@@ -3414,7 +3414,7 @@ dependencies = [
[[package]]
name = "datafusion-functions-window-common"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4"
dependencies = [
"datafusion-common",
"datafusion-physical-expr-common",
@@ -3423,7 +3423,7 @@ dependencies = [
[[package]]
name = "datafusion-macros"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4"
dependencies = [
"datafusion-expr",
"quote",
@@ -3433,7 +3433,7 @@ dependencies = [
[[package]]
name = "datafusion-optimizer"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4"
dependencies = [
"arrow 54.2.1",
"chrono",
@@ -3451,7 +3451,7 @@ dependencies = [
[[package]]
name = "datafusion-physical-expr"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4"
dependencies = [
"ahash 0.8.11",
"arrow 54.2.1",
@@ -3474,7 +3474,7 @@ dependencies = [
[[package]]
name = "datafusion-physical-expr-common"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4"
dependencies = [
"ahash 0.8.11",
"arrow 54.2.1",
@@ -3487,7 +3487,7 @@ dependencies = [
[[package]]
name = "datafusion-physical-optimizer"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4"
dependencies = [
"arrow 54.2.1",
"arrow-schema 54.3.1",
@@ -3508,7 +3508,7 @@ dependencies = [
[[package]]
name = "datafusion-physical-plan"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4"
dependencies = [
"ahash 0.8.11",
"arrow 54.2.1",
@@ -3538,7 +3538,7 @@ dependencies = [
[[package]]
name = "datafusion-sql"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4"
dependencies = [
"arrow 54.2.1",
"arrow-array 54.2.1",
@@ -3556,7 +3556,7 @@ dependencies = [
[[package]]
name = "datafusion-substrait"
version = "45.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=5bbedc6704162afb03478f56ffb629405a4e1220#5bbedc6704162afb03478f56ffb629405a4e1220"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=e104c7cf62b11dd5fe41461b82514978234326b4#e104c7cf62b11dd5fe41461b82514978234326b4"
dependencies = [
"async-recursion",
"async-trait",
@@ -3572,7 +3572,7 @@ dependencies = [
[[package]]
name = "datanode"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"api",
"arrow-flight",
@@ -3624,7 +3624,7 @@ dependencies = [
"session",
"snafu 0.8.5",
"store-api",
"substrait 0.14.0",
"substrait 0.15.0",
"table",
"tokio",
"toml 0.8.19",
@@ -3633,7 +3633,7 @@ dependencies = [
[[package]]
name = "datatypes"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"arrow 54.2.1",
"arrow-array 54.2.1",
@@ -3656,7 +3656,7 @@ dependencies = [
"serde",
"serde_json",
"snafu 0.8.5",
"sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e98e6b322426a9d397a71efef17075966223c089)",
"sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=0cf6c04490d59435ee965edd2078e8855bd8471e)",
"sqlparser_derive 0.1.1",
]
@@ -4259,7 +4259,7 @@ dependencies = [
[[package]]
name = "file-engine"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"api",
"async-trait",
@@ -4382,7 +4382,7 @@ checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8"
[[package]]
name = "flow"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"api",
"arrow 54.2.1",
@@ -4444,7 +4444,7 @@ dependencies = [
"snafu 0.8.5",
"store-api",
"strum 0.27.1",
"substrait 0.14.0",
"substrait 0.15.0",
"table",
"tokio",
"tonic 0.12.3",
@@ -4499,7 +4499,7 @@ checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa"
[[package]]
name = "frontend"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"api",
"arc-swap",
@@ -4553,10 +4553,10 @@ dependencies = [
"session",
"snafu 0.8.5",
"sql",
"sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e98e6b322426a9d397a71efef17075966223c089)",
"sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=0cf6c04490d59435ee965edd2078e8855bd8471e)",
"store-api",
"strfmt",
"substrait 0.14.0",
"substrait 0.15.0",
"table",
"tokio",
"toml 0.8.19",
@@ -5795,7 +5795,7 @@ dependencies = [
[[package]]
name = "index"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"async-trait",
"asynchronous-codec",
@@ -6599,13 +6599,13 @@ dependencies = [
[[package]]
name = "log"
version = "0.4.22"
version = "0.4.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
[[package]]
name = "log-query"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"chrono",
"common-error",
@@ -6617,7 +6617,7 @@ dependencies = [
[[package]]
name = "log-store"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"async-stream",
"async-trait",
@@ -6911,7 +6911,7 @@ dependencies = [
[[package]]
name = "meta-client"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"api",
"async-trait",
@@ -6939,7 +6939,7 @@ dependencies = [
[[package]]
name = "meta-srv"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"api",
"async-trait",
@@ -7029,7 +7029,7 @@ dependencies = [
[[package]]
name = "metric-engine"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"api",
"aquamarine",
@@ -7118,7 +7118,7 @@ dependencies = [
[[package]]
name = "mito2"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"api",
"aquamarine",
@@ -7780,7 +7780,7 @@ version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af1844ef2428cc3e1cb900be36181049ef3d3193c63e43026cfe202983b27a56"
dependencies = [
"proc-macro-crate 1.3.1",
"proc-macro-crate 3.2.0",
"proc-macro2",
"quote",
"syn 2.0.100",
@@ -7824,7 +7824,7 @@ dependencies = [
[[package]]
name = "object-store"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"anyhow",
"bytes",
@@ -8119,7 +8119,7 @@ dependencies = [
[[package]]
name = "operator"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"ahash 0.8.11",
"api",
@@ -8166,9 +8166,9 @@ dependencies = [
"session",
"snafu 0.8.5",
"sql",
"sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e98e6b322426a9d397a71efef17075966223c089)",
"sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=0cf6c04490d59435ee965edd2078e8855bd8471e)",
"store-api",
"substrait 0.14.0",
"substrait 0.15.0",
"table",
"tokio",
"tokio-util",
@@ -8423,7 +8423,7 @@ dependencies = [
[[package]]
name = "partition"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"api",
"async-trait",
@@ -8443,7 +8443,7 @@ dependencies = [
"session",
"snafu 0.8.5",
"sql",
"sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e98e6b322426a9d397a71efef17075966223c089)",
"sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=0cf6c04490d59435ee965edd2078e8855bd8471e)",
"store-api",
"table",
]
@@ -8705,7 +8705,7 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]]
name = "pipeline"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"ahash 0.8.11",
"api",
@@ -8847,7 +8847,7 @@ dependencies = [
[[package]]
name = "plugins"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"auth",
"clap 4.5.19",
@@ -9127,7 +9127,7 @@ dependencies = [
[[package]]
name = "promql"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"ahash 0.8.11",
"async-trait",
@@ -9373,7 +9373,7 @@ dependencies = [
[[package]]
name = "puffin"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"async-compression 0.4.13",
"async-trait",
@@ -9414,7 +9414,7 @@ dependencies = [
[[package]]
name = "query"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"ahash 0.8.11",
"api",
@@ -9477,10 +9477,10 @@ dependencies = [
"session",
"snafu 0.8.5",
"sql",
"sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e98e6b322426a9d397a71efef17075966223c089)",
"sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=0cf6c04490d59435ee965edd2078e8855bd8471e)",
"statrs",
"store-api",
"substrait 0.14.0",
"substrait 0.15.0",
"table",
"tokio",
"tokio-stream",
@@ -9527,7 +9527,7 @@ dependencies = [
"pin-project-lite",
"quinn-proto",
"quinn-udp",
"rustc-hash 2.0.0",
"rustc-hash 2.1.1",
"rustls",
"socket2",
"thiserror 1.0.64",
@@ -9544,7 +9544,7 @@ dependencies = [
"bytes",
"rand 0.8.5",
"ring",
"rustc-hash 2.0.0",
"rustc-hash 2.1.1",
"rustls",
"slab",
"thiserror 1.0.64",
@@ -9821,9 +9821,9 @@ dependencies = [
[[package]]
name = "regex"
version = "1.11.0"
version = "1.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8"
checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
dependencies = [
"aho-corasick",
"memchr",
@@ -10005,15 +10005,14 @@ dependencies = [
[[package]]
name = "ring"
version = "0.17.8"
version = "0.17.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d"
checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
dependencies = [
"cc",
"cfg-if",
"getrandom 0.2.15",
"libc",
"spin",
"untrusted",
"windows-sys 0.52.0",
]
@@ -10334,9 +10333,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "rustc-hash"
version = "2.0.0"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152"
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
[[package]]
name = "rustc_version"
@@ -10831,7 +10830,7 @@ dependencies = [
[[package]]
name = "servers"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"ahash 0.8.11",
"api",
@@ -10951,7 +10950,7 @@ dependencies = [
[[package]]
name = "session"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"api",
"arc-swap",
@@ -11159,9 +11158,9 @@ dependencies = [
[[package]]
name = "smallbitvec"
version = "2.5.3"
version = "2.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fcc3fc564a4b53fd1e8589628efafe57602d91bde78be18186b5f61e8faea470"
checksum = "d31d263dd118560e1a492922182ab6ca6dc1d03a3bf54e7699993f31a4150e3f"
[[package]]
name = "smallvec"
@@ -11276,7 +11275,7 @@ dependencies = [
[[package]]
name = "sql"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"api",
"chrono",
@@ -11304,7 +11303,7 @@ dependencies = [
"serde",
"serde_json",
"snafu 0.8.5",
"sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e98e6b322426a9d397a71efef17075966223c089)",
"sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=0cf6c04490d59435ee965edd2078e8855bd8471e)",
"sqlparser_derive 0.1.1",
"store-api",
"table",
@@ -11331,7 +11330,7 @@ dependencies = [
[[package]]
name = "sqlness-runner"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"async-trait",
"clap 4.5.19",
@@ -11373,7 +11372,7 @@ dependencies = [
[[package]]
name = "sqlparser"
version = "0.54.0"
source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e98e6b322426a9d397a71efef17075966223c089#e98e6b322426a9d397a71efef17075966223c089"
source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=0cf6c04490d59435ee965edd2078e8855bd8471e#0cf6c04490d59435ee965edd2078e8855bd8471e"
dependencies = [
"lazy_static",
"log",
@@ -11381,7 +11380,7 @@ dependencies = [
"regex",
"serde",
"sqlparser 0.54.0 (registry+https://github.com/rust-lang/crates.io-index)",
"sqlparser_derive 0.3.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e98e6b322426a9d397a71efef17075966223c089)",
"sqlparser_derive 0.3.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=0cf6c04490d59435ee965edd2078e8855bd8471e)",
]
[[package]]
@@ -11409,7 +11408,7 @@ dependencies = [
[[package]]
name = "sqlparser_derive"
version = "0.3.0"
source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e98e6b322426a9d397a71efef17075966223c089#e98e6b322426a9d397a71efef17075966223c089"
source = "git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=0cf6c04490d59435ee965edd2078e8855bd8471e#0cf6c04490d59435ee965edd2078e8855bd8471e"
dependencies = [
"proc-macro2",
"quote",
@@ -11650,7 +11649,7 @@ dependencies = [
[[package]]
name = "store-api"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"api",
"aquamarine",
@@ -11799,7 +11798,7 @@ dependencies = [
[[package]]
name = "substrait"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"async-trait",
"bytes",
@@ -11979,7 +11978,7 @@ dependencies = [
[[package]]
name = "table"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"api",
"async-trait",
@@ -12230,7 +12229,7 @@ checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76"
[[package]]
name = "tests-fuzz"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"arbitrary",
"async-trait",
@@ -12264,7 +12263,7 @@ dependencies = [
"serde_yaml",
"snafu 0.8.5",
"sql",
"sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=e98e6b322426a9d397a71efef17075966223c089)",
"sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=0cf6c04490d59435ee965edd2078e8855bd8471e)",
"sqlx",
"store-api",
"strum 0.27.1",
@@ -12274,7 +12273,7 @@ dependencies = [
[[package]]
name = "tests-integration"
version = "0.14.0"
version = "0.15.0"
dependencies = [
"api",
"arrow-flight",
@@ -12341,7 +12340,7 @@ dependencies = [
"sql",
"sqlx",
"store-api",
"substrait 0.14.0",
"substrait 0.15.0",
"table",
"tempfile",
"time",

View File

@@ -68,15 +68,16 @@ members = [
resolver = "2"
[workspace.package]
version = "0.14.0"
version = "0.15.0"
edition = "2021"
license = "Apache-2.0"
[workspace.lints]
clippy.print_stdout = "warn"
clippy.print_stderr = "warn"
clippy.dbg_macro = "warn"
clippy.implicit_clone = "warn"
clippy.result_large_err = "allow"
clippy.large_enum_variant = "allow"
clippy.doc_overindented_list_items = "allow"
rust.unknown_lints = "deny"
rust.unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] }
@@ -112,15 +113,15 @@ clap = { version = "4.4", features = ["derive"] }
config = "0.13.0"
crossbeam-utils = "0.8"
dashmap = "6.1"
datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "5bbedc6704162afb03478f56ffb629405a4e1220" }
datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "5bbedc6704162afb03478f56ffb629405a4e1220" }
datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "5bbedc6704162afb03478f56ffb629405a4e1220" }
datafusion-functions = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "5bbedc6704162afb03478f56ffb629405a4e1220" }
datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "5bbedc6704162afb03478f56ffb629405a4e1220" }
datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "5bbedc6704162afb03478f56ffb629405a4e1220" }
datafusion-physical-plan = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "5bbedc6704162afb03478f56ffb629405a4e1220" }
datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "5bbedc6704162afb03478f56ffb629405a4e1220" }
datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "5bbedc6704162afb03478f56ffb629405a4e1220" }
datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
datafusion-functions = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
datafusion-physical-plan = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "e104c7cf62b11dd5fe41461b82514978234326b4" }
deadpool = "0.12"
deadpool-postgres = "0.14"
derive_builder = "0.20"
@@ -191,7 +192,7 @@ simd-json = "0.15"
similar-asserts = "1.6.0"
smallvec = { version = "1", features = ["serde"] }
snafu = "0.8"
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "e98e6b322426a9d397a71efef17075966223c089", features = [
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "0cf6c04490d59435ee965edd2078e8855bd8471e", features = [
"visitor",
"serde",
] } # branch = "v0.54.x"

View File

@@ -319,6 +319,7 @@
| `selector` | String | `round_robin` | Datanode selector type.<br/>- `round_robin` (default value)<br/>- `lease_based`<br/>- `load_based`<br/>For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". |
| `use_memory_store` | Bool | `false` | Store data in memory. |
| `enable_region_failover` | Bool | `false` | Whether to enable region failover.<br/>This feature is only available on GreptimeDB running on cluster mode and<br/>- Using Remote WAL<br/>- Using shared storage (e.g., s3). |
| `allow_region_failover_on_local_wal` | Bool | `false` | Whether to allow region failover on local WAL.<br/>**This option is not recommended to be set to true, because it may lead to data loss during failover.** |
| `node_max_idle_time` | String | `24hours` | Max allowed idle time before removing node info from metasrv memory. |
| `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. Enabled by default. |
| `runtime` | -- | -- | The runtime options. |

View File

@@ -50,6 +50,10 @@ use_memory_store = false
## - Using shared storage (e.g., s3).
enable_region_failover = false
## Whether to allow region failover on local WAL.
## **This option is not recommended to be set to true, because it may lead to data loss during failover.**
allow_region_failover_on_local_wal = false
## Max allowed idle time before removing node info from metasrv memory.
node_max_idle_time = "24hours"

18
flake.lock generated
View File

@@ -8,11 +8,11 @@
"rust-analyzer-src": "rust-analyzer-src"
},
"locked": {
"lastModified": 1737613896,
"narHash": "sha256-ldqXIglq74C7yKMFUzrS9xMT/EVs26vZpOD68Sh7OcU=",
"lastModified": 1742452566,
"narHash": "sha256-sVuLDQ2UIWfXUBbctzrZrXM2X05YjX08K7XHMztt36E=",
"owner": "nix-community",
"repo": "fenix",
"rev": "303a062fdd8e89f233db05868468975d17855d80",
"rev": "7d9ba794daf5e8cc7ee728859bc688d8e26d5f06",
"type": "github"
},
"original": {
@@ -41,11 +41,11 @@
},
"nixpkgs": {
"locked": {
"lastModified": 1737569578,
"narHash": "sha256-6qY0pk2QmUtBT9Mywdvif0i/CLVgpCjMUn6g9vB+f3M=",
"lastModified": 1743576891,
"narHash": "sha256-vXiKURtntURybE6FMNFAVpRPr8+e8KoLPrYs9TGuAKc=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "47addd76727f42d351590c905d9d1905ca895b82",
"rev": "44a69ed688786e98a101f02b712c313f1ade37ab",
"type": "github"
},
"original": {
@@ -65,11 +65,11 @@
"rust-analyzer-src": {
"flake": false,
"locked": {
"lastModified": 1737581772,
"narHash": "sha256-t1P2Pe3FAX9TlJsCZbmJ3wn+C4qr6aSMypAOu8WNsN0=",
"lastModified": 1742296961,
"narHash": "sha256-gCpvEQOrugHWLimD1wTFOJHagnSEP6VYBDspq96Idu0=",
"owner": "rust-lang",
"repo": "rust-analyzer",
"rev": "582af7ee9c8d84f5d534272fc7de9f292bd849be",
"rev": "15d87419f1a123d8f888d608129c3ce3ff8f13d4",
"type": "github"
},
"original": {

View File

@@ -21,7 +21,7 @@
lib = nixpkgs.lib;
rustToolchain = fenix.packages.${system}.fromToolchainName {
name = (lib.importTOML ./rust-toolchain.toml).toolchain.channel;
sha256 = "sha256-f/CVA1EC61EWbh0SjaRNhLL0Ypx2ObupbzigZp8NmL4=";
sha256 = "sha256-i0Sh/ZFFsHlZ3oFZFc24qdk6Cd8Do8OPU4HJQsrKOeM=";
};
in
{

View File

@@ -1,2 +1,2 @@
[toolchain]
channel = "nightly-2024-12-25"
channel = "nightly-2025-04-15"

View File

@@ -84,12 +84,6 @@ mod tests {
let key1 = "3178510";
let key2 = "4215648";
// have collision
assert_eq!(
oid_map.hasher.hash_one(key1) as u32,
oid_map.hasher.hash_one(key2) as u32
);
// insert them into oid_map
let oid1 = oid_map.get_oid(key1);
let oid2 = oid_map.get_oid(key2);

View File

@@ -12,8 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt;
use std::iter::repeat_n;
use std::sync::Arc;
use std::{fmt, iter};
use common_query::error::{InvalidFuncArgsSnafu, Result};
use common_query::prelude::Volatility;
@@ -126,9 +127,10 @@ impl Function for MatchesTermFunction {
let term = term_column.get_ref(0).as_string().unwrap();
match term {
None => {
return Ok(Arc::new(BooleanVector::from_iter(
iter::repeat(None).take(text_column.len()),
)));
return Ok(Arc::new(BooleanVector::from_iter(repeat_n(
None,
text_column.len(),
))));
}
Some(term) => Some(MatchesTermFinder::new(term)),
}
@@ -217,7 +219,7 @@ impl MatchesTermFinder {
}
let mut pos = 0;
while let Some(found_pos) = self.finder.find(text[pos..].as_bytes()) {
while let Some(found_pos) = self.finder.find(&text.as_bytes()[pos..]) {
let actual_pos = pos + found_pos;
let prev_ok = self.starts_with_non_alnum

View File

@@ -37,7 +37,7 @@ impl fmt::Display for RateFunction {
impl Function for RateFunction {
fn name(&self) -> &str {
"prom_rate"
"rate"
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
@@ -82,7 +82,7 @@ mod tests {
#[test]
fn test_rate_function() {
let rate = RateFunction;
assert_eq!("prom_rate", rate.name());
assert_eq!("rate", rate.name());
assert_eq!(
ConcreteDataType::float64_datatype(),
rate.return_type(&[]).unwrap()

View File

@@ -115,6 +115,13 @@ impl Function for UddSketchCalcFunction {
}
};
// Check if the sketch is empty, if so, return null
// This is important to avoid panics when calling estimate_quantile on an empty sketch
// In practice, this will happen if input is all null
if sketch.bucket_iter().count() == 0 {
builder.push_null();
continue;
}
// Compute the estimated quantile from the sketch
let result = sketch.estimate_quantile(perc);
builder.push(Some(result));

View File

@@ -15,8 +15,6 @@
#![feature(assert_matches)]
#![feature(btree_extract_if)]
#![feature(let_chains)]
#![feature(extract_if)]
#![feature(hash_extract_if)]
pub mod cache;
pub mod cache_invalidator;

View File

@@ -176,15 +176,12 @@ impl TableRoute {
})?
.into();
let leader_peer = peers
.get(region_route.leader_peer_index as usize)
.cloned()
.map(Into::into);
let leader_peer = peers.get(region_route.leader_peer_index as usize).cloned();
let follower_peers = region_route
.follower_peer_indexes
.into_iter()
.filter_map(|x| peers.get(x as usize).cloned().map(Into::into))
.filter_map(|x| peers.get(x as usize).cloned())
.collect::<Vec<_>>();
region_routes.push(RegionRoute {

View File

@@ -24,7 +24,7 @@ use datatypes::prelude::*;
use datatypes::vectors::{Helper as VectorHelper, VectorRef};
use snafu::ResultExt;
use crate::error::{self, Error, FromScalarValueSnafu, IntoVectorSnafu, Result};
use crate::error::{self, FromScalarValueSnafu, IntoVectorSnafu, Result};
use crate::prelude::*;
pub type AggregateFunctionCreatorRef = Arc<dyn AggregateFunctionCreator>;
@@ -166,8 +166,7 @@ impl DfAccumulator for DfAccumulatorAdaptor {
let output_type = self.creator.output_type()?;
let scalar_value = value
.try_to_scalar_value(&output_type)
.context(error::ToScalarValueSnafu)
.map_err(Error::from)?;
.context(error::ToScalarValueSnafu)?;
Ok(scalar_value)
}

View File

@@ -253,9 +253,10 @@ fn create_current_timestamp_vector(
data_type: &ConcreteDataType,
num_rows: usize,
) -> Result<VectorRef> {
let current_timestamp_vector = TimestampMillisecondVector::from_values(
std::iter::repeat(util::current_time_millis()).take(num_rows),
);
let current_timestamp_vector = TimestampMillisecondVector::from_values(std::iter::repeat_n(
util::current_time_millis(),
num_rows,
));
if data_type.is_timestamp() {
current_timestamp_vector.cast(data_type)
} else {

View File

@@ -198,8 +198,7 @@ impl fmt::Debug for ConstantVector {
impl Serializable for ConstantVector {
fn serialize_to_json(&self) -> Result<Vec<serde_json::Value>> {
std::iter::repeat(self.get(0))
.take(self.len())
std::iter::repeat_n(self.get(0), self.len())
.map(serde_json::Value::try_from)
.collect::<serde_json::Result<_>>()
.context(SerializeSnafu)

View File

@@ -412,7 +412,7 @@ pub(crate) fn replicate_decimal128(
// Safety: std::iter::Repeat and std::iter::Take implement TrustedLen.
builder
.mutable_array
.append_trusted_len_iter(std::iter::repeat(data).take(repeat_times));
.append_trusted_len_iter(std::iter::repeat_n(data, repeat_times));
}
}
None => {

View File

@@ -16,8 +16,8 @@ use std::any::Any;
use std::sync::Arc;
use arrow::array::Array;
use arrow::datatypes::Int32Type;
use arrow_array::{ArrayRef, DictionaryArray, Int32Array};
use arrow::datatypes::Int64Type;
use arrow_array::{ArrayRef, DictionaryArray, Int64Array};
use serde_json::Value as JsonValue;
use snafu::ResultExt;
@@ -32,7 +32,7 @@ use crate::vectors::{self, Helper, Validity, Vector, VectorRef};
/// Vector of dictionaries, basically backed by Arrow's `DictionaryArray`.
#[derive(Debug, PartialEq)]
pub struct DictionaryVector {
array: DictionaryArray<Int32Type>,
array: DictionaryArray<Int64Type>,
/// The datatype of the items in the dictionary.
item_type: ConcreteDataType,
/// The vector of items in the dictionary.
@@ -41,7 +41,7 @@ pub struct DictionaryVector {
impl DictionaryVector {
/// Create a new instance of `DictionaryVector` from a dictionary array and item type
pub fn new(array: DictionaryArray<Int32Type>, item_type: ConcreteDataType) -> Result<Self> {
pub fn new(array: DictionaryArray<Int64Type>, item_type: ConcreteDataType) -> Result<Self> {
let item_vector = Helper::try_into_vector(array.values())?;
Ok(Self {
@@ -52,12 +52,12 @@ impl DictionaryVector {
}
/// Returns the underlying Arrow dictionary array
pub fn array(&self) -> &DictionaryArray<Int32Type> {
pub fn array(&self) -> &DictionaryArray<Int64Type> {
&self.array
}
/// Returns the keys array of this dictionary
pub fn keys(&self) -> &arrow_array::PrimitiveArray<Int32Type> {
pub fn keys(&self) -> &arrow_array::PrimitiveArray<Int64Type> {
self.array.keys()
}
@@ -74,7 +74,7 @@ impl DictionaryVector {
impl Vector for DictionaryVector {
fn data_type(&self) -> ConcreteDataType {
ConcreteDataType::Dictionary(DictionaryType::new(
ConcreteDataType::int32_datatype(),
ConcreteDataType::int64_datatype(),
self.item_type.clone(),
))
}
@@ -163,10 +163,10 @@ impl Serializable for DictionaryVector {
}
}
impl TryFrom<DictionaryArray<Int32Type>> for DictionaryVector {
impl TryFrom<DictionaryArray<Int64Type>> for DictionaryVector {
type Error = crate::error::Error;
fn try_from(array: DictionaryArray<Int32Type>) -> Result<Self> {
fn try_from(array: DictionaryArray<Int64Type>) -> Result<Self> {
let item_type = ConcreteDataType::from_arrow_type(array.values().data_type());
let item_vector = Helper::try_into_vector(array.values())?;
@@ -243,7 +243,7 @@ impl VectorOp for DictionaryVector {
previous_offset = offset;
}
let new_keys = Int32Array::from(replicated_keys);
let new_keys = Int64Array::from(replicated_keys);
let new_array = DictionaryArray::try_new(new_keys, self.values().clone())
.expect("Failed to create replicated dictionary array");
@@ -261,7 +261,7 @@ impl VectorOp for DictionaryVector {
let filtered_key_array = filtered_key_vector.to_arrow_array();
let filtered_key_array = filtered_key_array
.as_any()
.downcast_ref::<Int32Array>()
.downcast_ref::<Int64Array>()
.unwrap();
let new_array = DictionaryArray::try_new(filtered_key_array.clone(), self.values().clone())
@@ -291,7 +291,7 @@ impl VectorOp for DictionaryVector {
let key_vector = Helper::try_into_vector(&key_array)?;
let new_key_vector = key_vector.take(indices)?;
let new_key_array = new_key_vector.to_arrow_array();
let new_key_array = new_key_array.as_any().downcast_ref::<Int32Array>().unwrap();
let new_key_array = new_key_array.as_any().downcast_ref::<Int64Array>().unwrap();
let new_array = DictionaryArray::try_new(new_key_array.clone(), self.values().clone())
.expect("Failed to create filtered dictionary array");
@@ -318,7 +318,7 @@ mod tests {
// Keys: [0, 1, 2, null, 1, 3]
// Resulting in: ["a", "b", "c", null, "b", "d"]
let values = StringArray::from(vec!["a", "b", "c", "d"]);
let keys = Int32Array::from(vec![Some(0), Some(1), Some(2), None, Some(1), Some(3)]);
let keys = Int64Array::from(vec![Some(0), Some(1), Some(2), None, Some(1), Some(3)]);
let dict_array = DictionaryArray::new(keys, Arc::new(values));
DictionaryVector::try_from(dict_array).unwrap()
}
@@ -404,7 +404,7 @@ mod tests {
assert_eq!(
casted.data_type(),
ConcreteDataType::Dictionary(DictionaryType::new(
ConcreteDataType::int32_datatype(),
ConcreteDataType::int64_datatype(),
ConcreteDataType::string_datatype(),
))
);

View File

@@ -20,7 +20,7 @@ use std::sync::Arc;
use arrow::array::{Array, ArrayRef, StringArray};
use arrow::compute;
use arrow::compute::kernels::comparison;
use arrow::datatypes::{DataType as ArrowDataType, Int32Type, TimeUnit};
use arrow::datatypes::{DataType as ArrowDataType, Int64Type, TimeUnit};
use arrow_array::DictionaryArray;
use arrow_schema::IntervalUnit;
use datafusion_common::ScalarValue;
@@ -348,11 +348,11 @@ impl Helper {
ArrowDataType::Decimal128(_, _) => {
Arc::new(Decimal128Vector::try_from_arrow_array(array)?)
}
ArrowDataType::Dictionary(key, value) if matches!(&**key, ArrowDataType::Int32) => {
ArrowDataType::Dictionary(key, value) if matches!(&**key, ArrowDataType::Int64) => {
let array = array
.as_ref()
.as_any()
.downcast_ref::<DictionaryArray<Int32Type>>()
.downcast_ref::<DictionaryArray<Int64Type>>()
.unwrap(); // Safety: the type is guarded by match arm condition
Arc::new(DictionaryVector::new(
array.clone(),

View File

@@ -120,9 +120,7 @@ impl fmt::Debug for NullVector {
impl Serializable for NullVector {
fn serialize_to_json(&self) -> Result<Vec<serde_json::Value>> {
Ok(std::iter::repeat(serde_json::Value::Null)
.take(self.len())
.collect())
Ok(std::iter::repeat_n(serde_json::Value::Null, self.len()).collect())
}
}

View File

@@ -388,7 +388,7 @@ pub(crate) fn replicate_primitive<T: LogicalPrimitiveType>(
// Safety: std::iter::Repeat and std::iter::Take implement TrustedLen.
builder
.mutable_array
.append_trusted_len_iter(std::iter::repeat(data).take(repeat_times));
.append_trusted_len_iter(std::iter::repeat_n(data, repeat_times));
}
}
None => {

View File

@@ -32,3 +32,9 @@ pub const SLOW_QUERY_THRESHOLD: Duration = Duration::from_secs(60);
/// The minimum duration between two queries execution by batching mode task
const MIN_REFRESH_DURATION: Duration = Duration::new(5, 0);
/// Grpc connection timeout
const GRPC_CONN_TIMEOUT: Duration = Duration::from_secs(5);
/// Grpc max retry number
const GRPC_MAX_RETRIES: u32 = 3;

View File

@@ -25,12 +25,15 @@ use common_meta::cluster::{NodeInfo, NodeInfoKey, Role};
use common_meta::peer::Peer;
use common_meta::rpc::store::RangeRequest;
use common_query::Output;
use common_telemetry::warn;
use meta_client::client::MetaClient;
use servers::query_handler::grpc::GrpcQueryHandler;
use session::context::{QueryContextBuilder, QueryContextRef};
use snafu::{OptionExt, ResultExt};
use crate::batching_mode::DEFAULT_BATCHING_ENGINE_QUERY_TIMEOUT;
use crate::batching_mode::{
DEFAULT_BATCHING_ENGINE_QUERY_TIMEOUT, GRPC_CONN_TIMEOUT, GRPC_MAX_RETRIES,
};
use crate::error::{ExternalSnafu, InvalidRequestSnafu, UnexpectedSnafu};
use crate::Error;
@@ -99,7 +102,9 @@ impl FrontendClient {
Self::Distributed {
meta_client,
chnl_mgr: {
let cfg = ChannelConfig::new().timeout(DEFAULT_BATCHING_ENGINE_QUERY_TIMEOUT);
let cfg = ChannelConfig::new()
.connect_timeout(GRPC_CONN_TIMEOUT)
.timeout(DEFAULT_BATCHING_ENGINE_QUERY_TIMEOUT);
ChannelManager::with_config(cfg)
},
}
@@ -223,12 +228,32 @@ impl FrontendClient {
peer: db.peer.clone(),
});
db.database
.handle(req.clone())
.await
.with_context(|_| InvalidRequestSnafu {
context: format!("Failed to handle request: {:?}", req),
})
let mut retry = 0;
loop {
let ret = db.database.handle(req.clone()).await.with_context(|_| {
InvalidRequestSnafu {
context: format!("Failed to handle request: {:?}", req),
}
});
if let Err(err) = ret {
if retry < GRPC_MAX_RETRIES {
retry += 1;
warn!(
"Failed to send request to grpc handle at Peer={:?}, retry = {}, error = {:?}",
db.peer, retry, err
);
continue;
} else {
common_telemetry::error!(
"Failed to send request to grpc handle at Peer={:?} after {} retries, error = {:?}",
db.peer, retry, err
);
return Err(err);
}
}
return ret;
}
}
FrontendClient::Standalone { database_client } => {
let ctx = QueryContextBuilder::default()

View File

@@ -53,6 +53,7 @@ use crate::batching_mode::utils::{
use crate::batching_mode::{
DEFAULT_BATCHING_ENGINE_QUERY_TIMEOUT, MIN_REFRESH_DURATION, SLOW_QUERY_THRESHOLD,
};
use crate::df_optimizer::apply_df_optimizer;
use crate::error::{
ConvertColumnSchemaSnafu, DatafusionSnafu, ExternalSnafu, InvalidQuerySnafu,
SubstraitEncodeLogicalPlanSnafu, UnexpectedSnafu,
@@ -541,7 +542,10 @@ impl BatchingTask {
.clone()
.rewrite(&mut add_auto_column)
.with_context(|_| DatafusionSnafu {
context: format!("Failed to rewrite plan {:?}", self.config.plan),
context: format!(
"Failed to rewrite plan:\n {}\n",
self.config.plan
),
})?
.data;
let schema_len = plan.schema().fields().len();
@@ -573,16 +577,19 @@ impl BatchingTask {
let mut add_filter = AddFilterRewriter::new(expr);
let mut add_auto_column = AddAutoColumnRewriter::new(sink_table_schema.clone());
// make a not optimized plan for clearer unparse
let plan = sql_to_df_plan(query_ctx.clone(), engine.clone(), &self.config.query, false)
.await?;
plan.clone()
let rewrite = plan
.clone()
.rewrite(&mut add_filter)
.and_then(|p| p.data.rewrite(&mut add_auto_column))
.with_context(|_| DatafusionSnafu {
context: format!("Failed to rewrite plan {plan:?}"),
context: format!("Failed to rewrite plan:\n {}\n", plan),
})?
.data
.data;
// only apply optimize after complex rewrite is done
apply_df_optimizer(rewrite).await?
};
Ok(Some((new_plan, schema_len)))

View File

@@ -704,6 +704,28 @@ mod test {
),
"SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('2025-02-24 10:48:00' AS TIMESTAMP)) AND (ts <= CAST('2025-02-24 10:49:00' AS TIMESTAMP))) GROUP BY arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)')"
),
// complex time window index with where
(
"SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS time_window FROM numbers_with_ts WHERE number in (2, 3, 4) GROUP BY time_window;",
Timestamp::new(1740394109, TimeUnit::Second),
(
"ts".to_string(),
Some(Timestamp::new(1740394080, TimeUnit::Second)),
Some(Timestamp::new(1740394140, TimeUnit::Second)),
),
"SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS time_window FROM numbers_with_ts WHERE numbers_with_ts.number IN (2, 3, 4) AND ((ts >= CAST('2025-02-24 10:48:00' AS TIMESTAMP)) AND (ts <= CAST('2025-02-24 10:49:00' AS TIMESTAMP))) GROUP BY arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)')"
),
// complex time window index with between and
(
"SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS time_window FROM numbers_with_ts WHERE number BETWEEN 2 AND 4 GROUP BY time_window;",
Timestamp::new(1740394109, TimeUnit::Second),
(
"ts".to_string(),
Some(Timestamp::new(1740394080, TimeUnit::Second)),
Some(Timestamp::new(1740394140, TimeUnit::Second)),
),
"SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS time_window FROM numbers_with_ts WHERE (numbers_with_ts.number BETWEEN 2 AND 4) AND ((ts >= CAST('2025-02-24 10:48:00' AS TIMESTAMP)) AND (ts <= CAST('2025-02-24 10:49:00' AS TIMESTAMP))) GROUP BY arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)')"
),
// no time index
(
"SELECT date_bin('5 minutes', ts) FROM numbers_with_ts;",

View File

@@ -342,8 +342,8 @@ impl TreeNodeRewriter for AddAutoColumnRewriter {
}
} else {
return Err(DataFusionError::Plan(format!(
"Expect table have 0,1 or 2 columns more than query columns, found {} query columns {:?}, {} table columns {:?} at node {:?}",
query_col_cnt, exprs, table_col_cnt, self.schema.column_schemas(), node
"Expect table have 0,1 or 2 columns more than query columns, found {} query columns {:?}, {} table columns {:?}",
query_col_cnt, exprs, table_col_cnt, self.schema.column_schemas()
)));
}
@@ -406,7 +406,9 @@ mod test {
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::{ColumnSchema, Schema};
use pretty_assertions::assert_eq;
use query::query_engine::DefaultSerializer;
use session::context::QueryContext;
use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan};
use super::*;
use crate::test_utils::create_test_query_engine;
@@ -701,4 +703,18 @@ mod test {
);
}
}
#[tokio::test]
async fn test_null_cast() {
let query_engine = create_test_query_engine();
let ctx = QueryContext::arc();
let sql = "SELECT NULL::DOUBLE FROM numbers_with_ts";
let plan = sql_to_df_plan(ctx, query_engine.clone(), sql, false)
.await
.unwrap();
let _sub_plan = DFLogicalSubstraitConvertor {}
.encode(&plan, DefaultSerializer)
.unwrap();
}
}

View File

@@ -25,7 +25,6 @@ use datafusion::config::ConfigOptions;
use datafusion::error::DataFusionError;
use datafusion::functions_aggregate::count::count_udaf;
use datafusion::functions_aggregate::sum::sum_udaf;
use datafusion::optimizer::analyzer::count_wildcard_rule::CountWildcardRule;
use datafusion::optimizer::analyzer::type_coercion::TypeCoercion;
use datafusion::optimizer::common_subexpr_eliminate::CommonSubexprEliminate;
use datafusion::optimizer::optimize_projections::OptimizeProjections;
@@ -42,6 +41,7 @@ use datafusion_expr::{
BinaryExpr, ColumnarValue, Expr, Operator, Projection, ScalarFunctionArgs, ScalarUDFImpl,
Signature, TypeSignature, Volatility,
};
use query::optimizer::count_wildcard::CountWildcardToTimeIndexRule;
use query::parser::QueryLanguageParser;
use query::query_engine::DefaultSerializer;
use query::QueryEngine;
@@ -61,9 +61,9 @@ pub async fn apply_df_optimizer(
) -> Result<datafusion_expr::LogicalPlan, Error> {
let cfg = ConfigOptions::new();
let analyzer = Analyzer::with_rules(vec![
Arc::new(CountWildcardRule::new()),
Arc::new(AvgExpandRule::new()),
Arc::new(TumbleExpandRule::new()),
Arc::new(CountWildcardToTimeIndexRule),
Arc::new(AvgExpandRule),
Arc::new(TumbleExpandRule),
Arc::new(CheckGroupByRule::new()),
Arc::new(TypeCoercion::new()),
]);
@@ -128,13 +128,7 @@ pub async fn sql_to_flow_plan(
}
#[derive(Debug)]
struct AvgExpandRule {}
impl AvgExpandRule {
pub fn new() -> Self {
Self {}
}
}
struct AvgExpandRule;
impl AnalyzerRule for AvgExpandRule {
fn analyze(
@@ -331,13 +325,7 @@ impl TreeNodeRewriter for ExpandAvgRewriter<'_> {
/// expand tumble in aggr expr to tumble_start and tumble_end with column name like `window_start`
#[derive(Debug)]
struct TumbleExpandRule {}
impl TumbleExpandRule {
pub fn new() -> Self {
Self {}
}
}
struct TumbleExpandRule;
impl AnalyzerRule for TumbleExpandRule {
fn analyze(

View File

@@ -46,7 +46,11 @@ pub struct ChineseTokenizer;
impl Tokenizer for ChineseTokenizer {
fn tokenize<'a>(&self, text: &'a str) -> Vec<&'a str> {
JIEBA.cut(text, false)
if text.is_ascii() {
EnglishTokenizer {}.tokenize(text)
} else {
JIEBA.cut(text, false)
}
}
}

View File

@@ -481,7 +481,7 @@ mod tests {
let mock_values = dic_values
.iter()
.flat_map(|(value, size)| iter::repeat(value.clone()).take(*size))
.flat_map(|(value, size)| std::iter::repeat_n(value.clone(), *size))
.collect::<Vec<_>>();
let sorted_result = sorted_result(&mock_values, segment_row_count);

View File

@@ -66,10 +66,12 @@ use crate::election::postgres::PgElection;
#[cfg(any(feature = "pg_kvbackend", feature = "mysql_kvbackend"))]
use crate::election::CANDIDATE_LEASE_SECS;
use crate::metasrv::builder::MetasrvBuilder;
use crate::metasrv::{BackendImpl, Metasrv, MetasrvOptions, SelectorRef};
use crate::metasrv::{BackendImpl, Metasrv, MetasrvOptions, SelectTarget, SelectorRef};
use crate::node_excluder::NodeExcluderRef;
use crate::selector::lease_based::LeaseBasedSelector;
use crate::selector::load_based::LoadBasedSelector;
use crate::selector::round_robin::RoundRobinSelector;
use crate::selector::weight_compute::RegionNumsBasedWeightCompute;
use crate::selector::SelectorType;
use crate::service::admin;
use crate::{error, Result};
@@ -294,14 +296,25 @@ pub async fn metasrv_builder(
let in_memory = Arc::new(MemoryKvBackend::new()) as ResettableKvBackendRef;
let node_excluder = plugins
.get::<NodeExcluderRef>()
.unwrap_or_else(|| Arc::new(Vec::new()) as NodeExcluderRef);
let selector = if let Some(selector) = plugins.get::<SelectorRef>() {
info!("Using selector from plugins");
selector
} else {
let selector = match opts.selector {
SelectorType::LoadBased => Arc::new(LoadBasedSelector::default()) as SelectorRef,
SelectorType::LeaseBased => Arc::new(LeaseBasedSelector) as SelectorRef,
SelectorType::RoundRobin => Arc::new(RoundRobinSelector::default()) as SelectorRef,
SelectorType::LoadBased => Arc::new(LoadBasedSelector::new(
RegionNumsBasedWeightCompute,
node_excluder,
)) as SelectorRef,
SelectorType::LeaseBased => {
Arc::new(LeaseBasedSelector::new(node_excluder)) as SelectorRef
}
SelectorType::RoundRobin => Arc::new(RoundRobinSelector::new(
SelectTarget::Datanode,
node_excluder,
)) as SelectorRef,
};
info!(
"Using selector from options, selector type: {}",

View File

@@ -14,7 +14,6 @@
#![feature(result_flattening)]
#![feature(assert_matches)]
#![feature(extract_if)]
#![feature(hash_set_entry)]
pub mod bootstrap;

View File

@@ -111,6 +111,11 @@ pub struct MetasrvOptions {
pub use_memory_store: bool,
/// Whether to enable region failover.
pub enable_region_failover: bool,
/// Whether to allow region failover on local WAL.
///
/// If it's true, the region failover will be allowed even if the local WAL is used.
/// Note that this option is not recommended to be set to true, because it may lead to data loss during failover.
pub allow_region_failover_on_local_wal: bool,
/// The HTTP server options.
pub http: HttpOptions,
/// The logging options.
@@ -173,6 +178,7 @@ impl Default for MetasrvOptions {
selector: SelectorType::default(),
use_memory_store: false,
enable_region_failover: false,
allow_region_failover_on_local_wal: false,
http: HttpOptions::default(),
logging: LoggingOptions {
dir: format!("{METASRV_HOME}/logs"),

View File

@@ -40,7 +40,8 @@ use common_meta::state_store::KvStateStore;
use common_meta::wal_options_allocator::{build_kafka_client, build_wal_options_allocator};
use common_procedure::local::{LocalManager, ManagerConfig};
use common_procedure::ProcedureManagerRef;
use snafu::ResultExt;
use common_telemetry::warn;
use snafu::{ensure, ResultExt};
use crate::cache_invalidator::MetasrvCacheInvalidator;
use crate::cluster::{MetaPeerClientBuilder, MetaPeerClientRef};
@@ -190,7 +191,7 @@ impl MetasrvBuilder {
let meta_peer_client = meta_peer_client
.unwrap_or_else(|| build_default_meta_peer_client(&election, &in_memory));
let selector = selector.unwrap_or_else(|| Arc::new(LeaseBasedSelector));
let selector = selector.unwrap_or_else(|| Arc::new(LeaseBasedSelector::default()));
let pushers = Pushers::default();
let mailbox = build_mailbox(&kv_backend, &pushers);
let procedure_manager = build_procedure_manager(&options, &kv_backend);
@@ -234,13 +235,17 @@ impl MetasrvBuilder {
))
});
let flow_selector = Arc::new(RoundRobinSelector::new(
SelectTarget::Flownode,
Arc::new(Vec::new()),
)) as SelectorRef;
let flow_metadata_allocator = {
// for now flownode just use round-robin selector
let flow_selector = RoundRobinSelector::new(SelectTarget::Flownode);
let flow_selector_ctx = selector_ctx.clone();
let peer_allocator = Arc::new(FlowPeerAllocator::new(
flow_selector_ctx,
Arc::new(flow_selector),
flow_selector.clone(),
));
let seq = Arc::new(
SequenceBuilder::new(FLOW_ID_SEQ, kv_backend.clone())
@@ -272,18 +277,25 @@ impl MetasrvBuilder {
},
));
let peer_lookup_service = Arc::new(MetaPeerLookupService::new(meta_peer_client.clone()));
if !is_remote_wal && options.enable_region_failover {
return error::UnexpectedSnafu {
violated: "Region failover is not supported in the local WAL implementation!",
ensure!(
options.allow_region_failover_on_local_wal,
error::UnexpectedSnafu {
violated: "Region failover is not supported in the local WAL implementation!
If you want to enable region failover for local WAL, please set `allow_region_failover_on_local_wal` to true.",
}
);
if options.allow_region_failover_on_local_wal {
warn!("Region failover is force enabled in the local WAL implementation! This may lead to data loss during failover!");
}
.fail();
}
let (tx, rx) = RegionSupervisor::channel();
let (region_failure_detector_controller, region_supervisor_ticker): (
RegionFailureDetectorControllerRef,
Option<std::sync::Arc<RegionSupervisorTicker>>,
) = if options.enable_region_failover && is_remote_wal {
) = if options.enable_region_failover {
(
Arc::new(RegionFailureDetectorControl::new(tx.clone())) as _,
Some(Arc::new(RegionSupervisorTicker::new(
@@ -309,7 +321,7 @@ impl MetasrvBuilder {
));
region_migration_manager.try_start()?;
let region_failover_handler = if options.enable_region_failover && is_remote_wal {
let region_failover_handler = if options.enable_region_failover {
let region_supervisor = RegionSupervisor::new(
rx,
options.failure_detector,
@@ -420,7 +432,7 @@ impl MetasrvBuilder {
meta_peer_client: meta_peer_client.clone(),
selector,
// TODO(jeremy): We do not allow configuring the flow selector.
flow_selector: Arc::new(RoundRobinSelector::new(SelectTarget::Flownode)),
flow_selector,
handler_group: RwLock::new(None),
handler_group_builder: Mutex::new(Some(handler_group_builder)),
election,

View File

@@ -71,4 +71,13 @@ lazy_static! {
/// The remote WAL prune execute counter.
pub static ref METRIC_META_REMOTE_WAL_PRUNE_EXECUTE: IntCounterVec =
register_int_counter_vec!("greptime_meta_remote_wal_prune_execute", "meta remote wal prune execute", &["topic_name"]).unwrap();
/// The migration stage elapsed histogram.
pub static ref METRIC_META_REGION_MIGRATION_STAGE_ELAPSED: HistogramVec = register_histogram_vec!(
"greptime_meta_region_migration_stage_elapsed",
"meta region migration stage elapsed",
&["stage"],
// 0.01 ~ 1000
exponential_buckets(0.01, 10.0, 7).unwrap(),
)
.unwrap();
}

View File

@@ -141,10 +141,7 @@ pub async fn mock(
if let Some(client) = client {
Ok(TokioIo::new(client))
} else {
Err(std::io::Error::new(
std::io::ErrorKind::Other,
"Client already taken",
))
Err(std::io::Error::other("Client already taken"))
}
}
}),

View File

@@ -24,3 +24,9 @@ pub trait NodeExcluder: Send + Sync {
/// Returns the excluded datanode ids.
fn excluded_datanode_ids(&self) -> &Vec<DatanodeId>;
}
impl NodeExcluder for Vec<DatanodeId> {
fn excluded_datanode_ids(&self) -> &Vec<DatanodeId> {
self
}
}

View File

@@ -25,7 +25,7 @@ pub(crate) mod update_metadata;
pub(crate) mod upgrade_candidate_region;
use std::any::Any;
use std::fmt::Debug;
use std::fmt::{Debug, Display};
use std::time::Duration;
use common_error::ext::BoxedError;
@@ -43,7 +43,7 @@ use common_procedure::error::{
Error as ProcedureError, FromJsonSnafu, Result as ProcedureResult, ToJsonSnafu,
};
use common_procedure::{Context as ProcedureContext, LockKey, Procedure, Status, StringKey};
use common_telemetry::info;
use common_telemetry::{error, info};
use manager::RegionMigrationProcedureGuard;
pub use manager::{
RegionMigrationManagerRef, RegionMigrationProcedureTask, RegionMigrationProcedureTracker,
@@ -55,7 +55,10 @@ use tokio::time::Instant;
use self::migration_start::RegionMigrationStart;
use crate::error::{self, Result};
use crate::metrics::{METRIC_META_REGION_MIGRATION_ERROR, METRIC_META_REGION_MIGRATION_EXECUTE};
use crate::metrics::{
METRIC_META_REGION_MIGRATION_ERROR, METRIC_META_REGION_MIGRATION_EXECUTE,
METRIC_META_REGION_MIGRATION_STAGE_ELAPSED,
};
use crate::service::mailbox::MailboxRef;
/// The default timeout for region migration.
@@ -103,6 +106,82 @@ impl PersistentContext {
}
}
/// Metrics of region migration.
#[derive(Debug, Clone, Default)]
pub struct Metrics {
/// Elapsed time of downgrading region and upgrading region.
operations_elapsed: Duration,
/// Elapsed time of downgrading leader region.
downgrade_leader_region_elapsed: Duration,
/// Elapsed time of open candidate region.
open_candidate_region_elapsed: Duration,
/// Elapsed time of upgrade candidate region.
upgrade_candidate_region_elapsed: Duration,
}
impl Display for Metrics {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"operations_elapsed: {:?}, downgrade_leader_region_elapsed: {:?}, open_candidate_region_elapsed: {:?}, upgrade_candidate_region_elapsed: {:?}",
self.operations_elapsed,
self.downgrade_leader_region_elapsed,
self.open_candidate_region_elapsed,
self.upgrade_candidate_region_elapsed
)
}
}
impl Metrics {
/// Updates the elapsed time of downgrading region and upgrading region.
pub fn update_operations_elapsed(&mut self, elapsed: Duration) {
self.operations_elapsed += elapsed;
}
/// Updates the elapsed time of downgrading leader region.
pub fn update_downgrade_leader_region_elapsed(&mut self, elapsed: Duration) {
self.downgrade_leader_region_elapsed += elapsed;
}
/// Updates the elapsed time of open candidate region.
pub fn update_open_candidate_region_elapsed(&mut self, elapsed: Duration) {
self.open_candidate_region_elapsed += elapsed;
}
/// Updates the elapsed time of upgrade candidate region.
pub fn update_upgrade_candidate_region_elapsed(&mut self, elapsed: Duration) {
self.upgrade_candidate_region_elapsed += elapsed;
}
}
impl Drop for Metrics {
fn drop(&mut self) {
if !self.operations_elapsed.is_zero() {
METRIC_META_REGION_MIGRATION_STAGE_ELAPSED
.with_label_values(&["operations"])
.observe(self.operations_elapsed.as_secs_f64());
}
if !self.downgrade_leader_region_elapsed.is_zero() {
METRIC_META_REGION_MIGRATION_STAGE_ELAPSED
.with_label_values(&["downgrade_leader_region"])
.observe(self.downgrade_leader_region_elapsed.as_secs_f64());
}
if !self.open_candidate_region_elapsed.is_zero() {
METRIC_META_REGION_MIGRATION_STAGE_ELAPSED
.with_label_values(&["open_candidate_region"])
.observe(self.open_candidate_region_elapsed.as_secs_f64());
}
if !self.upgrade_candidate_region_elapsed.is_zero() {
METRIC_META_REGION_MIGRATION_STAGE_ELAPSED
.with_label_values(&["upgrade_candidate_region"])
.observe(self.upgrade_candidate_region_elapsed.as_secs_f64());
}
}
}
/// It's shared in each step and available in executing (including retrying).
///
/// It will be dropped if the procedure runner crashes.
@@ -132,8 +211,8 @@ pub struct VolatileContext {
leader_region_last_entry_id: Option<u64>,
/// The last_entry_id of leader metadata region (Only used for metric engine).
leader_region_metadata_last_entry_id: Option<u64>,
/// Elapsed time of downgrading region and upgrading region.
operations_elapsed: Duration,
/// Metrics of region migration.
metrics: Metrics,
}
impl VolatileContext {
@@ -231,12 +310,35 @@ impl Context {
pub fn next_operation_timeout(&self) -> Option<Duration> {
self.persistent_ctx
.timeout
.checked_sub(self.volatile_ctx.operations_elapsed)
.checked_sub(self.volatile_ctx.metrics.operations_elapsed)
}
/// Updates operations elapsed.
pub fn update_operations_elapsed(&mut self, instant: Instant) {
self.volatile_ctx.operations_elapsed += instant.elapsed();
self.volatile_ctx
.metrics
.update_operations_elapsed(instant.elapsed());
}
/// Updates the elapsed time of downgrading leader region.
pub fn update_downgrade_leader_region_elapsed(&mut self, instant: Instant) {
self.volatile_ctx
.metrics
.update_downgrade_leader_region_elapsed(instant.elapsed());
}
/// Updates the elapsed time of open candidate region.
pub fn update_open_candidate_region_elapsed(&mut self, instant: Instant) {
self.volatile_ctx
.metrics
.update_open_candidate_region_elapsed(instant.elapsed());
}
/// Updates the elapsed time of upgrade candidate region.
pub fn update_upgrade_candidate_region_elapsed(&mut self, instant: Instant) {
self.volatile_ctx
.metrics
.update_upgrade_candidate_region_elapsed(instant.elapsed());
}
/// Returns address of meta server.
@@ -550,6 +652,14 @@ impl Procedure for RegionMigrationProcedure {
.inc();
ProcedureError::retry_later(e)
} else {
error!(
e;
"Region migration procedure failed, region_id: {}, from_peer: {}, to_peer: {}, {}",
self.context.region_id(),
self.context.persistent_ctx.from_peer,
self.context.persistent_ctx.to_peer,
self.context.volatile_ctx.metrics,
);
METRIC_META_REGION_MIGRATION_ERROR
.with_label_values(&[name, "external"])
.inc();

View File

@@ -46,7 +46,13 @@ impl State for CloseDowngradedRegion {
let region_id = ctx.region_id();
warn!(err; "Failed to close downgraded leader region: {region_id} on datanode {:?}", downgrade_leader_datanode);
}
info!(
"Region migration is finished: region_id: {}, from_peer: {}, to_peer: {}, {}",
ctx.region_id(),
ctx.persistent_ctx.from_peer,
ctx.persistent_ctx.to_peer,
ctx.volatile_ctx.metrics,
);
Ok((Box::new(RegionMigrationEnd), Status::done()))
}

View File

@@ -54,6 +54,7 @@ impl Default for DowngradeLeaderRegion {
#[typetag::serde]
impl State for DowngradeLeaderRegion {
async fn next(&mut self, ctx: &mut Context) -> Result<(Box<dyn State>, Status)> {
let now = Instant::now();
// Ensures the `leader_region_lease_deadline` must exist after recovering.
ctx.volatile_ctx
.set_leader_region_lease_deadline(Duration::from_secs(REGION_LEASE_SECS));
@@ -77,6 +78,7 @@ impl State for DowngradeLeaderRegion {
}
}
}
ctx.update_downgrade_leader_region_elapsed(now);
Ok((
Box::new(UpgradeCandidateRegion::default()),
@@ -348,7 +350,8 @@ mod tests {
let env = TestingEnv::new();
let mut ctx = env.context_factory().new_context(persistent_context);
prepare_table_metadata(&ctx, HashMap::default()).await;
ctx.volatile_ctx.operations_elapsed = ctx.persistent_ctx.timeout + Duration::from_secs(1);
ctx.volatile_ctx.metrics.operations_elapsed =
ctx.persistent_ctx.timeout + Duration::from_secs(1);
let err = state.downgrade_region(&mut ctx).await.unwrap_err();
@@ -591,7 +594,8 @@ mod tests {
let mut ctx = env.context_factory().new_context(persistent_context);
let mailbox_ctx = env.mailbox_context();
let mailbox = mailbox_ctx.mailbox().clone();
ctx.volatile_ctx.operations_elapsed = ctx.persistent_ctx.timeout + Duration::from_secs(1);
ctx.volatile_ctx.metrics.operations_elapsed =
ctx.persistent_ctx.timeout + Duration::from_secs(1);
let (tx, rx) = tokio::sync::mpsc::channel(1);
mailbox_ctx

View File

@@ -15,6 +15,7 @@
use std::any::Any;
use common_procedure::Status;
use common_telemetry::warn;
use serde::{Deserialize, Serialize};
use crate::error::{self, Result};
@@ -37,7 +38,15 @@ impl RegionMigrationAbort {
#[async_trait::async_trait]
#[typetag::serde]
impl State for RegionMigrationAbort {
async fn next(&mut self, _: &mut Context) -> Result<(Box<dyn State>, Status)> {
async fn next(&mut self, ctx: &mut Context) -> Result<(Box<dyn State>, Status)> {
warn!(
"Region migration is aborted: {}, region_id: {}, from_peer: {}, to_peer: {}, {}",
self.reason,
ctx.region_id(),
ctx.persistent_ctx.from_peer,
ctx.persistent_ctx.to_peer,
ctx.volatile_ctx.metrics,
);
error::MigrationAbortSnafu {
reason: &self.reason,
}

View File

@@ -13,7 +13,7 @@
// limitations under the License.
use std::any::Any;
use std::time::{Duration, Instant};
use std::time::Duration;
use api::v1::meta::MailboxMessage;
use common_meta::distributed_time_constants::REGION_LEASE_SECS;
@@ -24,6 +24,7 @@ use common_procedure::Status;
use common_telemetry::info;
use serde::{Deserialize, Serialize};
use snafu::{OptionExt, ResultExt};
use tokio::time::Instant;
use crate::error::{self, Result};
use crate::handler::HeartbeatMailbox;
@@ -42,7 +43,9 @@ pub struct OpenCandidateRegion;
impl State for OpenCandidateRegion {
async fn next(&mut self, ctx: &mut Context) -> Result<(Box<dyn State>, Status)> {
let instruction = self.build_open_region_instruction(ctx).await?;
let now = Instant::now();
self.open_candidate_region(ctx, instruction).await?;
ctx.update_open_candidate_region_elapsed(now);
Ok((
Box::new(UpdateMetadata::Downgrade),

View File

@@ -54,9 +54,12 @@ impl Default for UpgradeCandidateRegion {
#[typetag::serde]
impl State for UpgradeCandidateRegion {
async fn next(&mut self, ctx: &mut Context) -> Result<(Box<dyn State>, Status)> {
let now = Instant::now();
if self.upgrade_region_with_retry(ctx).await {
ctx.update_upgrade_candidate_region_elapsed(now);
Ok((Box::new(UpdateMetadata::Upgrade), Status::executing(false)))
} else {
ctx.update_upgrade_candidate_region_elapsed(now);
Ok((Box::new(UpdateMetadata::Rollback), Status::executing(false)))
}
}
@@ -288,7 +291,8 @@ mod tests {
let persistent_context = new_persistent_context();
let env = TestingEnv::new();
let mut ctx = env.context_factory().new_context(persistent_context);
ctx.volatile_ctx.operations_elapsed = ctx.persistent_ctx.timeout + Duration::from_secs(1);
ctx.volatile_ctx.metrics.operations_elapsed =
ctx.persistent_ctx.timeout + Duration::from_secs(1);
let err = state.upgrade_region(&ctx).await.unwrap_err();
@@ -558,7 +562,8 @@ mod tests {
let mut ctx = env.context_factory().new_context(persistent_context);
let mailbox_ctx = env.mailbox_context();
let mailbox = mailbox_ctx.mailbox().clone();
ctx.volatile_ctx.operations_elapsed = ctx.persistent_ctx.timeout + Duration::from_secs(1);
ctx.volatile_ctx.metrics.operations_elapsed =
ctx.persistent_ctx.timeout + Duration::from_secs(1);
let (tx, rx) = tokio::sync::mpsc::channel(1);
mailbox_ctx

View File

@@ -18,7 +18,7 @@ pub mod load_based;
pub mod round_robin;
#[cfg(test)]
pub(crate) mod test_utils;
mod weight_compute;
pub mod weight_compute;
pub mod weighted_choose;
use std::collections::HashSet;

View File

@@ -12,17 +12,37 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashSet;
use std::sync::Arc;
use common_meta::peer::Peer;
use crate::error::Result;
use crate::lease;
use crate::metasrv::SelectorContext;
use crate::node_excluder::NodeExcluderRef;
use crate::selector::common::{choose_items, filter_out_excluded_peers};
use crate::selector::weighted_choose::{RandomWeightedChoose, WeightedItem};
use crate::selector::{Selector, SelectorOptions};
/// Select all alive datanodes based using a random weighted choose.
pub struct LeaseBasedSelector;
pub struct LeaseBasedSelector {
node_excluder: NodeExcluderRef,
}
impl LeaseBasedSelector {
pub fn new(node_excluder: NodeExcluderRef) -> Self {
Self { node_excluder }
}
}
impl Default for LeaseBasedSelector {
fn default() -> Self {
Self {
node_excluder: Arc::new(Vec::new()),
}
}
}
#[async_trait::async_trait]
impl Selector for LeaseBasedSelector {
@@ -47,7 +67,14 @@ impl Selector for LeaseBasedSelector {
.collect();
// 3. choose peers by weight_array.
filter_out_excluded_peers(&mut weight_array, &opts.exclude_peer_ids);
let mut exclude_peer_ids = self
.node_excluder
.excluded_datanode_ids()
.iter()
.cloned()
.collect::<HashSet<_>>();
exclude_peer_ids.extend(opts.exclude_peer_ids.iter());
filter_out_excluded_peers(&mut weight_array, &exclude_peer_ids);
let mut weighted_choose = RandomWeightedChoose::new(weight_array);
let selected = choose_items(&opts, &mut weighted_choose)?;

View File

@@ -12,7 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
use common_meta::datanode::{DatanodeStatKey, DatanodeStatValue};
use common_meta::key::TableMetadataManager;
@@ -26,6 +27,7 @@ use crate::error::{self, Result};
use crate::key::{DatanodeLeaseKey, LeaseValue};
use crate::lease;
use crate::metasrv::SelectorContext;
use crate::node_excluder::NodeExcluderRef;
use crate::selector::common::{choose_items, filter_out_excluded_peers};
use crate::selector::weight_compute::{RegionNumsBasedWeightCompute, WeightCompute};
use crate::selector::weighted_choose::RandomWeightedChoose;
@@ -33,11 +35,15 @@ use crate::selector::{Selector, SelectorOptions};
pub struct LoadBasedSelector<C> {
weight_compute: C,
node_excluder: NodeExcluderRef,
}
impl<C> LoadBasedSelector<C> {
pub fn new(weight_compute: C) -> Self {
Self { weight_compute }
pub fn new(weight_compute: C, node_excluder: NodeExcluderRef) -> Self {
Self {
weight_compute,
node_excluder,
}
}
}
@@ -45,6 +51,7 @@ impl Default for LoadBasedSelector<RegionNumsBasedWeightCompute> {
fn default() -> Self {
Self {
weight_compute: RegionNumsBasedWeightCompute,
node_excluder: Arc::new(Vec::new()),
}
}
}
@@ -88,7 +95,14 @@ where
let mut weight_array = self.weight_compute.compute(&stat_kvs);
// 5. choose peers by weight_array.
filter_out_excluded_peers(&mut weight_array, &opts.exclude_peer_ids);
let mut exclude_peer_ids = self
.node_excluder
.excluded_datanode_ids()
.iter()
.cloned()
.collect::<HashSet<_>>();
exclude_peer_ids.extend(opts.exclude_peer_ids.iter());
filter_out_excluded_peers(&mut weight_array, &exclude_peer_ids);
let mut weighted_choose = RandomWeightedChoose::new(weight_array);
let selected = choose_items(&opts, &mut weighted_choose)?;

View File

@@ -12,7 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashSet;
use std::sync::atomic::AtomicUsize;
use std::sync::Arc;
use common_meta::peer::Peer;
use snafu::ensure;
@@ -20,6 +22,7 @@ use snafu::ensure;
use crate::error::{NoEnoughAvailableNodeSnafu, Result};
use crate::lease;
use crate::metasrv::{SelectTarget, SelectorContext};
use crate::node_excluder::NodeExcluderRef;
use crate::selector::{Selector, SelectorOptions};
/// Round-robin selector that returns the next peer in the list in sequence.
@@ -32,6 +35,7 @@ use crate::selector::{Selector, SelectorOptions};
pub struct RoundRobinSelector {
select_target: SelectTarget,
counter: AtomicUsize,
node_excluder: NodeExcluderRef,
}
impl Default for RoundRobinSelector {
@@ -39,32 +43,38 @@ impl Default for RoundRobinSelector {
Self {
select_target: SelectTarget::Datanode,
counter: AtomicUsize::new(0),
node_excluder: Arc::new(Vec::new()),
}
}
}
impl RoundRobinSelector {
pub fn new(select_target: SelectTarget) -> Self {
pub fn new(select_target: SelectTarget, node_excluder: NodeExcluderRef) -> Self {
Self {
select_target,
node_excluder,
..Default::default()
}
}
async fn get_peers(
&self,
min_required_items: usize,
ctx: &SelectorContext,
) -> Result<Vec<Peer>> {
async fn get_peers(&self, opts: &SelectorOptions, ctx: &SelectorContext) -> Result<Vec<Peer>> {
let mut peers = match self.select_target {
SelectTarget::Datanode => {
// 1. get alive datanodes.
let lease_kvs =
lease::alive_datanodes(&ctx.meta_peer_client, ctx.datanode_lease_secs).await?;
let mut exclude_peer_ids = self
.node_excluder
.excluded_datanode_ids()
.iter()
.cloned()
.collect::<HashSet<_>>();
exclude_peer_ids.extend(opts.exclude_peer_ids.iter());
// 2. map into peers
lease_kvs
.into_iter()
.filter(|(k, _)| !exclude_peer_ids.contains(&k.node_id))
.map(|(k, v)| Peer::new(k.node_id, v.node_addr))
.collect::<Vec<_>>()
}
@@ -84,8 +94,8 @@ impl RoundRobinSelector {
ensure!(
!peers.is_empty(),
NoEnoughAvailableNodeSnafu {
required: min_required_items,
available: 0usize,
required: opts.min_required_items,
available: peers.len(),
select_target: self.select_target
}
);
@@ -103,7 +113,7 @@ impl Selector for RoundRobinSelector {
type Output = Vec<Peer>;
async fn select(&self, ctx: &Self::Context, opts: SelectorOptions) -> Result<Vec<Peer>> {
let peers = self.get_peers(opts.min_required_items, ctx).await?;
let peers = self.get_peers(&opts, ctx).await?;
// choose peers
let mut selected = Vec::with_capacity(opts.min_required_items);
for _ in 0..opts.min_required_items {
@@ -176,4 +186,42 @@ mod test {
assert_eq!(peers.len(), 2);
assert_eq!(peers, vec![peer2.clone(), peer3.clone()]);
}
#[tokio::test]
async fn test_round_robin_selector_with_exclude_peer_ids() {
let selector = RoundRobinSelector::new(SelectTarget::Datanode, Arc::new(vec![5]));
let ctx = create_selector_context();
// add three nodes
let peer1 = Peer {
id: 2,
addr: "node1".to_string(),
};
let peer2 = Peer {
id: 5,
addr: "node2".to_string(),
};
let peer3 = Peer {
id: 8,
addr: "node3".to_string(),
};
put_datanodes(
&ctx.meta_peer_client,
vec![peer1.clone(), peer2.clone(), peer3.clone()],
)
.await;
let peers = selector
.select(
&ctx,
SelectorOptions {
min_required_items: 1,
allow_duplication: true,
exclude_peer_ids: HashSet::from([2]),
},
)
.await
.unwrap();
assert_eq!(peers.len(), 1);
assert_eq!(peers, vec![peer3.clone()]);
}
}

View File

@@ -278,7 +278,7 @@ impl KvBackend for LeaderCachedKvBackend {
let remote_res = self.store.batch_get(remote_req).await?;
let put_req = BatchPutRequest {
kvs: remote_res.kvs.clone().into_iter().map(Into::into).collect(),
kvs: remote_res.kvs.clone().into_iter().collect(),
..Default::default()
};
let _ = self.cache.batch_put(put_req).await?;

View File

@@ -363,9 +363,9 @@ mod tests {
builder
.push_field_array(
*column_id,
Arc::new(Int64Array::from_iter_values(
std::iter::repeat(*field).take(num_rows),
)),
Arc::new(Int64Array::from_iter_values(std::iter::repeat_n(
*field, num_rows,
))),
)
.unwrap();
}

View File

@@ -206,6 +206,14 @@ impl SeqScan {
.build(),
));
}
if self.properties.partitions[partition].is_empty() {
return Ok(Box::pin(RecordBatchStreamWrapper::new(
self.stream_ctx.input.mapper.output_schema(),
common_recordbatch::EmptyRecordBatchStream::new(
self.stream_ctx.input.mapper.output_schema(),
),
)));
}
let stream_ctx = self.stream_ctx.clone();
let semaphore = self.new_semaphore();

View File

@@ -346,7 +346,6 @@ impl BloomFilterIndexer {
#[cfg(test)]
pub(crate) mod tests {
use std::iter;
use api::v1::SemanticType;
use datatypes::data_type::ConcreteDataType;
@@ -461,15 +460,15 @@ pub(crate) mod tests {
Batch::new(
primary_key,
Arc::new(UInt64Vector::from_iter_values(
iter::repeat(0).take(num_rows),
)),
Arc::new(UInt64Vector::from_iter_values(
iter::repeat(0).take(num_rows),
)),
Arc::new(UInt8Vector::from_iter_values(
iter::repeat(1).take(num_rows),
)),
Arc::new(UInt64Vector::from_iter_values(std::iter::repeat_n(
0, num_rows,
))),
Arc::new(UInt64Vector::from_iter_values(std::iter::repeat_n(
0, num_rows,
))),
Arc::new(UInt8Vector::from_iter_values(std::iter::repeat_n(
1, num_rows,
))),
vec![u64_field],
)
.unwrap()

View File

@@ -489,12 +489,12 @@ mod tests {
Arc::new(UInt64Vector::from_iter_values(
(0..num_rows).map(|n| n as u64),
)),
Arc::new(UInt64Vector::from_iter_values(
std::iter::repeat(0).take(num_rows),
)),
Arc::new(UInt8Vector::from_iter_values(
std::iter::repeat(1).take(num_rows),
)),
Arc::new(UInt64Vector::from_iter_values(std::iter::repeat_n(
0, num_rows,
))),
Arc::new(UInt8Vector::from_iter_values(std::iter::repeat_n(
1, num_rows,
))),
vec![
BatchColumn {
column_id: 1,

View File

@@ -326,7 +326,6 @@ impl InvertedIndexer {
#[cfg(test)]
mod tests {
use std::collections::BTreeSet;
use std::iter;
use api::v1::SemanticType;
use datafusion_expr::{binary_expr, col, lit, Expr as DfExpr, Operator};
@@ -424,15 +423,15 @@ mod tests {
Batch::new(
primary_key,
Arc::new(UInt64Vector::from_iter_values(
iter::repeat(0).take(num_rows),
)),
Arc::new(UInt64Vector::from_iter_values(
iter::repeat(0).take(num_rows),
)),
Arc::new(UInt8Vector::from_iter_values(
iter::repeat(1).take(num_rows),
)),
Arc::new(UInt64Vector::from_iter_values(std::iter::repeat_n(
0, num_rows,
))),
Arc::new(UInt64Vector::from_iter_values(std::iter::repeat_n(
0, num_rows,
))),
Arc::new(UInt8Vector::from_iter_values(std::iter::repeat_n(
1, num_rows,
))),
vec![u64_field],
)
.unwrap()

View File

@@ -755,7 +755,7 @@ mod tests {
));
let mut keys = vec![];
for (index, num_rows) in pk_row_nums.iter().map(|v| v.1).enumerate() {
keys.extend(std::iter::repeat(index as u32).take(num_rows));
keys.extend(std::iter::repeat_n(index as u32, num_rows));
}
let keys = UInt32Array::from(keys);
Arc::new(DictionaryArray::new(keys, values))

View File

@@ -85,11 +85,9 @@ impl ImpureDefaultFiller {
.schema
.iter()
.filter_map(|schema| {
if self.impure_columns.contains_key(&schema.column_name) {
Some(&schema.column_name)
} else {
None
}
self.impure_columns
.contains_key(&schema.column_name)
.then_some(&schema.column_name)
})
.collect();

View File

@@ -325,7 +325,7 @@ impl std::str::FromStr for Pattern {
impl Pattern {
fn check(&self) -> Result<()> {
if self.len() == 0 {
if self.is_empty() {
return DissectEmptyPatternSnafu.fail();
}

View File

@@ -91,9 +91,9 @@ impl UserDefinedLogicalNodeCore for InstantManipulate {
_exprs: Vec<Expr>,
inputs: Vec<LogicalPlan>,
) -> DataFusionResult<Self> {
if inputs.is_empty() {
if inputs.len() != 1 {
return Err(DataFusionError::Internal(
"InstantManipulate should have at least one input".to_string(),
"InstantManipulate should have exact one input".to_string(),
));
}
@@ -354,6 +354,9 @@ impl Stream for InstantManipulateStream {
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
let poll = match ready!(self.input.poll_next_unpin(cx)) {
Some(Ok(batch)) => {
if batch.num_rows() == 0 {
return Poll::Pending;
}
let timer = std::time::Instant::now();
self.num_series.add(1);
let result = Ok(batch).and_then(|batch| self.manipulate(batch));

View File

@@ -42,7 +42,7 @@ use greptime_proto::substrait_extension as pb;
use prost::Message;
use snafu::ResultExt;
use crate::error::{DataFusionPlanningSnafu, DeserializeSnafu, Result};
use crate::error::{DeserializeSnafu, Result};
use crate::extension_plan::{Millisecond, METRIC_NUM_SERIES};
use crate::metrics::PROMQL_SERIES_COUNT;
use crate::range_array::RangeArray;
@@ -194,20 +194,26 @@ impl RangeManipulate {
pub fn deserialize(bytes: &[u8]) -> Result<Self> {
let pb_range_manipulate = pb::RangeManipulate::decode(bytes).context(DeserializeSnafu)?;
let empty_schema = Arc::new(DFSchema::empty());
let placeholder_plan = LogicalPlan::EmptyRelation(EmptyRelation {
produce_one_row: false,
schema: Arc::new(DFSchema::empty()),
schema: empty_schema.clone(),
});
Self::new(
pb_range_manipulate.start,
pb_range_manipulate.end,
pb_range_manipulate.interval,
pb_range_manipulate.range,
pb_range_manipulate.time_index,
pb_range_manipulate.tag_columns,
placeholder_plan,
)
.context(DataFusionPlanningSnafu)
// Unlike `Self::new()`, this method doesn't check the input schema as it will fail
// because the input schema is empty.
// But this is Ok since datafusion guarantees to call `with_exprs_and_inputs` for the
// deserialized plan.
Ok(Self {
start: pb_range_manipulate.start,
end: pb_range_manipulate.end,
interval: pb_range_manipulate.interval,
range: pb_range_manipulate.range,
time_index: pb_range_manipulate.time_index,
field_columns: pb_range_manipulate.tag_columns,
input: placeholder_plan,
output_schema: empty_schema,
})
}
}
@@ -270,14 +276,19 @@ impl UserDefinedLogicalNodeCore for RangeManipulate {
fn with_exprs_and_inputs(
&self,
_exprs: Vec<Expr>,
inputs: Vec<LogicalPlan>,
mut inputs: Vec<LogicalPlan>,
) -> DataFusionResult<Self> {
if inputs.is_empty() {
if inputs.len() != 1 {
return Err(DataFusionError::Internal(
"RangeManipulate should have at least one input".to_string(),
"RangeManipulate should have at exact one input".to_string(),
));
}
let input: LogicalPlan = inputs.pop().unwrap();
let input_schema = input.schema();
let output_schema =
Self::calculate_output_schema(input_schema, &self.time_index, &self.field_columns)?;
Ok(Self {
start: self.start,
end: self.end,
@@ -285,8 +296,8 @@ impl UserDefinedLogicalNodeCore for RangeManipulate {
range: self.range,
time_index: self.time_index.clone(),
field_columns: self.field_columns.clone(),
input: inputs.into_iter().next().unwrap(),
output_schema: self.output_schema.clone(),
input,
output_schema,
})
}
}

View File

@@ -106,6 +106,10 @@ impl SeriesDivide {
})
}
pub fn tags(&self) -> &[String] {
&self.tag_columns
}
pub fn serialize(&self) -> Vec<u8> {
pb::SeriesDivide {
tag_columns: self.tag_columns.clone(),
@@ -315,7 +319,9 @@ impl Stream for SeriesDivideStream {
let next_batch = ready!(self.as_mut().fetch_next_batch(cx)).transpose()?;
let timer = std::time::Instant::now();
if let Some(next_batch) = next_batch {
self.buffer.push(next_batch);
if next_batch.num_rows() != 0 {
self.buffer.push(next_batch);
}
continue;
} else {
// input stream is ended

View File

@@ -40,17 +40,17 @@ pub use holt_winters::HoltWinters;
pub use idelta::IDelta;
pub use predict_linear::PredictLinear;
pub use quantile::QuantileOverTime;
pub use quantile_aggr::quantile_udaf;
pub use quantile_aggr::{quantile_udaf, QUANTILE_NAME};
pub use resets::Resets;
pub use round::Round;
/// Extracts an array from a `ColumnarValue`.
///
/// If the `ColumnarValue` is a scalar, it converts it to an array of size 1.
pub(crate) fn extract_array(columnar_value: &ColumnarValue) -> Result<ArrayRef, DataFusionError> {
if let ColumnarValue::Array(array) = columnar_value {
Ok(array.clone())
} else {
Err(DataFusionError::Execution(
"expect array as input, found scalar value".to_string(),
))
match columnar_value {
ColumnarValue::Array(array) => Ok(array.clone()),
ColumnarValue::Scalar(scalar) => Ok(scalar.to_array_of_size(1)?),
}
}

View File

@@ -231,6 +231,7 @@ mod test {
AvgOverTime::scalar_udf(),
ts_array,
value_array,
vec![],
vec![
Some(49.9999995),
Some(45.8618844),
@@ -253,6 +254,7 @@ mod test {
MinOverTime::scalar_udf(),
ts_array,
value_array,
vec![],
vec![
Some(12.345678),
Some(12.345678),
@@ -275,6 +277,7 @@ mod test {
MaxOverTime::scalar_udf(),
ts_array,
value_array,
vec![],
vec![
Some(87.654321),
Some(87.654321),
@@ -297,6 +300,7 @@ mod test {
SumOverTime::scalar_udf(),
ts_array,
value_array,
vec![],
vec![
Some(99.999999),
Some(229.309422),
@@ -319,6 +323,7 @@ mod test {
CountOverTime::scalar_udf(),
ts_array,
value_array,
vec![],
vec![
Some(2.0),
Some(5.0),
@@ -341,6 +346,7 @@ mod test {
LastOverTime::scalar_udf(),
ts_array,
value_array,
vec![],
vec![
Some(87.654321),
Some(70.710678),
@@ -363,6 +369,7 @@ mod test {
AbsentOverTime::scalar_udf(),
ts_array,
value_array,
vec![],
vec![
None,
None,
@@ -385,6 +392,7 @@ mod test {
PresentOverTime::scalar_udf(),
ts_array,
value_array,
vec![],
vec![
Some(1.0),
Some(1.0),
@@ -407,6 +415,7 @@ mod test {
StdvarOverTime::scalar_udf(),
ts_array,
value_array,
vec![],
vec![
Some(1417.8479276253622),
Some(808.999919713209),
@@ -442,6 +451,7 @@ mod test {
StdvarOverTime::scalar_udf(),
RangeArray::from_ranges(ts_array, ranges).unwrap(),
RangeArray::from_ranges(values_array, ranges).unwrap(),
vec![],
vec![Some(0.0), Some(10.559999999999999)],
);
}
@@ -453,6 +463,7 @@ mod test {
StddevOverTime::scalar_udf(),
ts_array,
value_array,
vec![],
vec![
Some(37.6543215),
Some(28.442923895289123),
@@ -488,6 +499,7 @@ mod test {
StddevOverTime::scalar_udf(),
RangeArray::from_ranges(ts_array, ranges).unwrap(),
RangeArray::from_ranges(values_array, ranges).unwrap(),
vec![],
vec![Some(0.0), Some(3.249615361854384)],
);
}

View File

@@ -90,6 +90,7 @@ mod test {
Changes::scalar_udf(),
ts_array_1,
value_array_1,
vec![],
vec![Some(0.0), Some(3.0), Some(5.0), Some(8.0), None],
);
@@ -101,6 +102,7 @@ mod test {
Changes::scalar_udf(),
ts_array_2,
value_array_2,
vec![],
vec![Some(0.0), Some(3.0), Some(5.0), Some(9.0), None],
);
@@ -111,6 +113,7 @@ mod test {
Changes::scalar_udf(),
ts_array_3,
value_array_3,
vec![],
vec![Some(0.0), Some(0.0), Some(1.0), Some(1.0), None],
);
}

View File

@@ -74,6 +74,7 @@ mod test {
Deriv::scalar_udf(),
ts_array,
value_array,
vec![],
vec![Some(10.606060606060607), None],
);
}
@@ -99,6 +100,7 @@ mod test {
Deriv::scalar_udf(),
ts_range_array,
value_range_array,
vec![],
vec![Some(0.0)],
);
}

View File

@@ -34,11 +34,11 @@ use std::sync::Arc;
use datafusion::arrow::array::{Float64Array, TimestampMillisecondArray};
use datafusion::arrow::datatypes::TimeUnit;
use datafusion::common::DataFusionError;
use datafusion::common::{DataFusionError, Result as DfResult};
use datafusion::logical_expr::{ScalarUDF, Volatility};
use datafusion::physical_plan::ColumnarValue;
use datafusion_expr::create_udf;
use datatypes::arrow::array::Array;
use datatypes::arrow::array::{Array, Int64Array};
use datatypes::arrow::datatypes::DataType;
use crate::extension_plan::Millisecond;
@@ -53,7 +53,7 @@ pub type Increase = ExtrapolatedRate<true, false>;
/// from <https://github.com/prometheus/prometheus/blob/v0.40.1/promql/functions.go#L66>
#[derive(Debug)]
pub struct ExtrapolatedRate<const IS_COUNTER: bool, const IS_RATE: bool> {
/// Range duration in millisecond
/// Range length in milliseconds.
range_length: i64,
}
@@ -63,7 +63,7 @@ impl<const IS_COUNTER: bool, const IS_RATE: bool> ExtrapolatedRate<IS_COUNTER, I
Self { range_length }
}
fn scalar_udf_with_name(name: &str, range_length: i64) -> ScalarUDF {
fn scalar_udf_with_name(name: &str) -> ScalarUDF {
let input_types = vec![
// timestamp range vector
RangeArray::convert_data_type(DataType::Timestamp(TimeUnit::Millisecond, None)),
@@ -71,6 +71,8 @@ impl<const IS_COUNTER: bool, const IS_RATE: bool> ExtrapolatedRate<IS_COUNTER, I
RangeArray::convert_data_type(DataType::Float64),
// timestamp vector
DataType::Timestamp(TimeUnit::Millisecond, None),
// range length
DataType::Int64,
];
create_udf(
@@ -78,12 +80,34 @@ impl<const IS_COUNTER: bool, const IS_RATE: bool> ExtrapolatedRate<IS_COUNTER, I
input_types,
DataType::Float64,
Volatility::Volatile,
Arc::new(move |input: &_| Self::new(range_length).calc(input)) as _,
Arc::new(move |input: &_| Self::create_function(input)?.calc(input)) as _,
)
}
fn calc(&self, input: &[ColumnarValue]) -> Result<ColumnarValue, DataFusionError> {
assert_eq!(input.len(), 3);
fn create_function(inputs: &[ColumnarValue]) -> DfResult<Self> {
if inputs.len() != 4 {
return Err(DataFusionError::Plan(
"ExtrapolatedRate function should have 4 inputs".to_string(),
));
}
let range_length_array = extract_array(&inputs[3])?;
let range_length = range_length_array
.as_any()
.downcast_ref::<Int64Array>()
.unwrap()
.value(0) as i64;
Ok(Self::new(range_length))
}
/// Input parameters:
/// * 0: timestamp range vector
/// * 1: value range vector
/// * 2: timestamp vector
/// * 3: range length. Range duration in millisecond. Not used here
fn calc(&self, input: &[ColumnarValue]) -> DfResult<ColumnarValue> {
assert_eq!(input.len(), 4);
// construct matrix from input
let ts_array = extract_array(&input[0])?;
@@ -204,34 +228,34 @@ impl<const IS_COUNTER: bool, const IS_RATE: bool> ExtrapolatedRate<IS_COUNTER, I
// delta
impl ExtrapolatedRate<false, false> {
pub fn name() -> &'static str {
pub const fn name() -> &'static str {
"prom_delta"
}
pub fn scalar_udf(range_length: i64) -> ScalarUDF {
Self::scalar_udf_with_name(Self::name(), range_length)
pub fn scalar_udf() -> ScalarUDF {
Self::scalar_udf_with_name(Self::name())
}
}
// rate
impl ExtrapolatedRate<true, true> {
pub fn name() -> &'static str {
pub const fn name() -> &'static str {
"prom_rate"
}
pub fn scalar_udf(range_length: i64) -> ScalarUDF {
Self::scalar_udf_with_name(Self::name(), range_length)
pub fn scalar_udf() -> ScalarUDF {
Self::scalar_udf_with_name(Self::name())
}
}
// increase
impl ExtrapolatedRate<true, false> {
pub fn name() -> &'static str {
pub const fn name() -> &'static str {
"prom_increase"
}
pub fn scalar_udf(range_length: i64) -> ScalarUDF {
Self::scalar_udf_with_name(Self::name(), range_length)
pub fn scalar_udf() -> ScalarUDF {
Self::scalar_udf_with_name(Self::name())
}
}
@@ -271,6 +295,7 @@ mod test {
ColumnarValue::Array(Arc::new(ts_range.into_dict())),
ColumnarValue::Array(Arc::new(value_range.into_dict())),
ColumnarValue::Array(timestamps),
ColumnarValue::Array(Arc::new(Int64Array::from(vec![5]))),
];
let output = extract_array(
&ExtrapolatedRate::<IS_COUNTER, IS_RATE>::new(5)

View File

@@ -22,6 +22,7 @@ use datafusion::arrow::datatypes::TimeUnit;
use datafusion::common::DataFusionError;
use datafusion::logical_expr::{ScalarUDF, Volatility};
use datafusion::physical_plan::ColumnarValue;
use datafusion_common::ScalarValue;
use datafusion_expr::create_udf;
use datatypes::arrow::array::Array;
use datatypes::arrow::datatypes::DataType;
@@ -62,6 +63,10 @@ impl HoltWinters {
vec![
RangeArray::convert_data_type(DataType::Timestamp(TimeUnit::Millisecond, None)),
RangeArray::convert_data_type(DataType::Float64),
// sf
DataType::Float64,
// tf
DataType::Float64,
]
}
@@ -69,20 +74,39 @@ impl HoltWinters {
DataType::Float64
}
pub fn scalar_udf(level: f64, trend: f64) -> ScalarUDF {
pub fn scalar_udf() -> ScalarUDF {
create_udf(
Self::name(),
Self::input_type(),
Self::return_type(),
Volatility::Volatile,
Arc::new(move |input: &_| Self::new(level, trend).calc(input)) as _,
Arc::new(move |input: &_| Self::create_function(input)?.calc(input)) as _,
)
}
fn create_function(inputs: &[ColumnarValue]) -> Result<Self, DataFusionError> {
if inputs.len() != 4 {
return Err(DataFusionError::Plan(
"HoltWinters function should have 4 inputs".to_string(),
));
}
let ColumnarValue::Scalar(ScalarValue::Float64(Some(sf))) = inputs[2] else {
return Err(DataFusionError::Plan(
"HoltWinters function's third input should be a scalar float64".to_string(),
));
};
let ColumnarValue::Scalar(ScalarValue::Float64(Some(tf))) = inputs[3] else {
return Err(DataFusionError::Plan(
"HoltWinters function's fourth input should be a scalar float64".to_string(),
));
};
Ok(Self::new(sf, tf))
}
fn calc(&self, input: &[ColumnarValue]) -> Result<ColumnarValue, DataFusionError> {
// construct matrix from input.
// The third one is level param, the fourth - trend param which are included in fields.
assert_eq!(input.len(), 2);
assert_eq!(input.len(), 4);
let ts_array = extract_array(&input[0])?;
let value_array = extract_array(&input[1])?;
@@ -264,9 +288,13 @@ mod tests {
let ts_range_array = RangeArray::from_ranges(ts_array, ranges).unwrap();
let value_range_array = RangeArray::from_ranges(values_array, ranges).unwrap();
simple_range_udf_runner(
HoltWinters::scalar_udf(0.5, 0.1),
HoltWinters::scalar_udf(),
ts_range_array,
value_range_array,
vec![
ScalarValue::Float64(Some(0.5)),
ScalarValue::Float64(Some(0.1)),
],
vec![Some(5.0)],
);
}
@@ -287,9 +315,13 @@ mod tests {
let ts_range_array = RangeArray::from_ranges(ts_array, ranges).unwrap();
let value_range_array = RangeArray::from_ranges(values_array, ranges).unwrap();
simple_range_udf_runner(
HoltWinters::scalar_udf(0.5, 0.1),
HoltWinters::scalar_udf(),
ts_range_array,
value_range_array,
vec![
ScalarValue::Float64(Some(0.5)),
ScalarValue::Float64(Some(0.1)),
],
vec![Some(38.18119566835938)],
);
}
@@ -315,9 +347,13 @@ mod tests {
let (ts_range_array, value_range_array) =
create_ts_and_value_range_arrays(query, ranges.clone());
simple_range_udf_runner(
HoltWinters::scalar_udf(0.01, 0.1),
HoltWinters::scalar_udf(),
ts_range_array,
value_range_array,
vec![
ScalarValue::Float64(Some(0.01)),
ScalarValue::Float64(Some(0.1)),
],
vec![Some(expected)],
);
}

View File

@@ -190,6 +190,7 @@ mod test {
IDelta::<false>::scalar_udf(),
ts_range_array,
value_range_array,
vec![],
vec![Some(1.0), Some(-5.0), None, Some(6.0), None, None],
);
@@ -200,6 +201,7 @@ mod test {
IDelta::<true>::scalar_udf(),
ts_range_array,
value_range_array,
vec![],
// the second point represent counter reset
vec![Some(0.5), Some(0.0), None, Some(3.0), None, None],
);

View File

@@ -22,6 +22,7 @@ use datafusion::arrow::datatypes::TimeUnit;
use datafusion::common::DataFusionError;
use datafusion::logical_expr::{ScalarUDF, Volatility};
use datafusion::physical_plan::ColumnarValue;
use datafusion_common::ScalarValue;
use datafusion_expr::create_udf;
use datatypes::arrow::array::Array;
use datatypes::arrow::datatypes::DataType;
@@ -44,25 +45,41 @@ impl PredictLinear {
"prom_predict_linear"
}
pub fn scalar_udf(t: i64) -> ScalarUDF {
pub fn scalar_udf() -> ScalarUDF {
let input_types = vec![
// time index column
RangeArray::convert_data_type(DataType::Timestamp(TimeUnit::Millisecond, None)),
// value column
RangeArray::convert_data_type(DataType::Float64),
// t
DataType::Int64,
];
create_udf(
Self::name(),
input_types,
DataType::Float64,
Volatility::Volatile,
Arc::new(move |input: &_| Self::new(t).predict_linear(input)) as _,
Arc::new(move |input: &_| Self::create_function(input)?.predict_linear(input)) as _,
)
}
fn create_function(inputs: &[ColumnarValue]) -> Result<Self, DataFusionError> {
if inputs.len() != 3 {
return Err(DataFusionError::Plan(
"PredictLinear function should have 3 inputs".to_string(),
));
}
let ColumnarValue::Scalar(ScalarValue::Int64(Some(t))) = inputs[2] else {
return Err(DataFusionError::Plan(
"PredictLinear function's third input should be a scalar int64".to_string(),
));
};
Ok(Self::new(t))
}
fn predict_linear(&self, input: &[ColumnarValue]) -> Result<ColumnarValue, DataFusionError> {
// construct matrix from input.
assert_eq!(input.len(), 2);
assert_eq!(input.len(), 3);
let ts_array = extract_array(&input[0])?;
let value_array = extract_array(&input[1])?;
@@ -190,9 +207,10 @@ mod test {
let ts_array = RangeArray::from_ranges(ts_array, ranges).unwrap();
let value_array = RangeArray::from_ranges(values_array, ranges).unwrap();
simple_range_udf_runner(
PredictLinear::scalar_udf(0),
PredictLinear::scalar_udf(),
ts_array,
value_array,
vec![ScalarValue::Int64(Some(0))],
vec![None, None],
);
}
@@ -201,9 +219,10 @@ mod test {
fn calculate_predict_linear_test1() {
let (ts_array, value_array) = build_test_range_arrays();
simple_range_udf_runner(
PredictLinear::scalar_udf(0),
PredictLinear::scalar_udf(),
ts_array,
value_array,
vec![ScalarValue::Int64(Some(0))],
// value at t = 0
vec![Some(38.63636363636364)],
);
@@ -213,9 +232,10 @@ mod test {
fn calculate_predict_linear_test2() {
let (ts_array, value_array) = build_test_range_arrays();
simple_range_udf_runner(
PredictLinear::scalar_udf(3000),
PredictLinear::scalar_udf(),
ts_array,
value_array,
vec![ScalarValue::Int64(Some(3000))],
// value at t = 3000
vec![Some(31856.818181818187)],
);
@@ -225,9 +245,10 @@ mod test {
fn calculate_predict_linear_test3() {
let (ts_array, value_array) = build_test_range_arrays();
simple_range_udf_runner(
PredictLinear::scalar_udf(4200),
PredictLinear::scalar_udf(),
ts_array,
value_array,
vec![ScalarValue::Int64(Some(4200))],
// value at t = 4200
vec![Some(44584.09090909091)],
);
@@ -237,9 +258,10 @@ mod test {
fn calculate_predict_linear_test4() {
let (ts_array, value_array) = build_test_range_arrays();
simple_range_udf_runner(
PredictLinear::scalar_udf(6600),
PredictLinear::scalar_udf(),
ts_array,
value_array,
vec![ScalarValue::Int64(Some(6600))],
// value at t = 6600
vec![Some(70038.63636363638)],
);
@@ -249,9 +271,10 @@ mod test {
fn calculate_predict_linear_test5() {
let (ts_array, value_array) = build_test_range_arrays();
simple_range_udf_runner(
PredictLinear::scalar_udf(7800),
PredictLinear::scalar_udf(),
ts_array,
value_array,
vec![ScalarValue::Int64(Some(7800))],
// value at t = 7800
vec![Some(82765.9090909091)],
);

View File

@@ -19,6 +19,7 @@ use datafusion::arrow::datatypes::TimeUnit;
use datafusion::common::DataFusionError;
use datafusion::logical_expr::{ScalarUDF, Volatility};
use datafusion::physical_plan::ColumnarValue;
use datafusion_common::ScalarValue;
use datafusion_expr::create_udf;
use datatypes::arrow::array::Array;
use datatypes::arrow::datatypes::DataType;
@@ -40,22 +41,38 @@ impl QuantileOverTime {
"prom_quantile_over_time"
}
pub fn scalar_udf(quantile: f64) -> ScalarUDF {
pub fn scalar_udf() -> ScalarUDF {
let input_types = vec![
// time index column
RangeArray::convert_data_type(DataType::Timestamp(TimeUnit::Millisecond, None)),
// value column
RangeArray::convert_data_type(DataType::Float64),
// quantile
DataType::Float64,
];
create_udf(
Self::name(),
input_types,
DataType::Float64,
Volatility::Volatile,
Arc::new(move |input: &_| Self::new(quantile).quantile_over_time(input)) as _,
Arc::new(move |input: &_| Self::create_function(input)?.quantile_over_time(input)) as _,
)
}
fn create_function(inputs: &[ColumnarValue]) -> Result<Self, DataFusionError> {
if inputs.len() != 3 {
return Err(DataFusionError::Plan(
"QuantileOverTime function should have 3 inputs".to_string(),
));
}
let ColumnarValue::Scalar(ScalarValue::Float64(Some(quantile))) = inputs[2] else {
return Err(DataFusionError::Plan(
"QuantileOverTime function's third input should be a scalar float64".to_string(),
));
};
Ok(Self::new(quantile))
}
fn quantile_over_time(
&self,
input: &[ColumnarValue],

View File

@@ -16,16 +16,18 @@ use std::sync::Arc;
use datafusion::arrow::array::{ArrayRef, AsArray};
use datafusion::common::cast::{as_list_array, as_primitive_array, as_struct_array};
use datafusion::error::Result as DfResult;
use datafusion::error::{DataFusionError, Result as DfResult};
use datafusion::logical_expr::{Accumulator as DfAccumulator, AggregateUDF, Volatility};
use datafusion::physical_plan::expressions::Literal;
use datafusion::prelude::create_udaf;
use datafusion_common::ScalarValue;
use datafusion_expr::function::AccumulatorArgs;
use datatypes::arrow::array::{ListArray, StructArray};
use datatypes::arrow::datatypes::{DataType, Field, Float64Type};
use crate::functions::quantile::quantile_impl;
const QUANTILE_NAME: &str = "quantile";
pub const QUANTILE_NAME: &str = "quantile";
const VALUES_FIELD_NAME: &str = "values";
const DEFAULT_LIST_FIELD_NAME: &str = "item";
@@ -38,16 +40,16 @@ pub struct QuantileAccumulator {
/// Create a quantile `AggregateUDF` for PromQL quantile operator,
/// which calculates φ-quantile (0 ≤ φ ≤ 1) over dimensions
pub fn quantile_udaf(q: f64) -> Arc<AggregateUDF> {
pub fn quantile_udaf() -> Arc<AggregateUDF> {
Arc::new(create_udaf(
QUANTILE_NAME,
// Input type: (values)
vec![DataType::Float64],
// Input type: (φ, values)
vec![DataType::Float64, DataType::Float64],
// Output type: the φ-quantile
Arc::new(DataType::Float64),
Volatility::Volatile,
// Create the accumulator
Arc::new(move |_| Ok(Box::new(QuantileAccumulator::new(q)))),
Arc::new(QuantileAccumulator::from_args),
// Intermediate state types
Arc::new(vec![DataType::Struct(
vec![Field::new(
@@ -65,17 +67,40 @@ pub fn quantile_udaf(q: f64) -> Arc<AggregateUDF> {
}
impl QuantileAccumulator {
pub fn new(q: f64) -> Self {
fn new(q: f64) -> Self {
Self {
q,
..Default::default()
}
}
pub fn from_args(args: AccumulatorArgs) -> DfResult<Box<dyn DfAccumulator>> {
if args.exprs.len() != 2 {
return Err(DataFusionError::Plan(
"Quantile function should have 2 inputs".to_string(),
));
}
let q = match &args.exprs[0]
.as_any()
.downcast_ref::<Literal>()
.map(|lit| lit.value())
{
Some(ScalarValue::Float64(Some(q))) => *q,
_ => {
return Err(DataFusionError::Internal(
"Invalid quantile value".to_string(),
))
}
};
Ok(Box::new(Self::new(q)))
}
}
impl DfAccumulator for QuantileAccumulator {
fn update_batch(&mut self, values: &[ArrayRef]) -> DfResult<()> {
let f64_array = values[0].as_primitive::<Float64Type>();
let f64_array = values[1].as_primitive::<Float64Type>();
self.values.extend(f64_array);
@@ -162,9 +187,10 @@ mod tests {
#[test]
fn test_quantile_accumulator_single_value() {
let mut accumulator = QuantileAccumulator::new(0.5);
let q = create_f64_array(vec![Some(0.5)]);
let input = create_f64_array(vec![Some(10.0)]);
accumulator.update_batch(&[input]).unwrap();
accumulator.update_batch(&[q, input]).unwrap();
let result = accumulator.evaluate().unwrap();
assert_eq!(result, ScalarValue::Float64(Some(10.0)));
@@ -173,9 +199,10 @@ mod tests {
#[test]
fn test_quantile_accumulator_multiple_values() {
let mut accumulator = QuantileAccumulator::new(0.5);
let q = create_f64_array(vec![Some(0.5)]);
let input = create_f64_array(vec![Some(1.0), Some(2.0), Some(3.0), Some(4.0), Some(5.0)]);
accumulator.update_batch(&[input]).unwrap();
accumulator.update_batch(&[q, input]).unwrap();
let result = accumulator.evaluate().unwrap();
assert_eq!(result, ScalarValue::Float64(Some(3.0)));
@@ -184,9 +211,10 @@ mod tests {
#[test]
fn test_quantile_accumulator_with_nulls() {
let mut accumulator = QuantileAccumulator::new(0.5);
let q = create_f64_array(vec![Some(0.5)]);
let input = create_f64_array(vec![Some(1.0), None, Some(3.0), Some(4.0), Some(5.0)]);
accumulator.update_batch(&[input]).unwrap();
accumulator.update_batch(&[q, input]).unwrap();
let result = accumulator.evaluate().unwrap();
assert_eq!(result, ScalarValue::Float64(Some(3.0)));
@@ -195,11 +223,12 @@ mod tests {
#[test]
fn test_quantile_accumulator_multiple_batches() {
let mut accumulator = QuantileAccumulator::new(0.5);
let q = create_f64_array(vec![Some(0.5)]);
let input1 = create_f64_array(vec![Some(1.0), Some(2.0)]);
let input2 = create_f64_array(vec![Some(3.0), Some(4.0), Some(5.0)]);
accumulator.update_batch(&[input1]).unwrap();
accumulator.update_batch(&[input2]).unwrap();
accumulator.update_batch(&[q.clone(), input1]).unwrap();
accumulator.update_batch(&[q, input2]).unwrap();
let result = accumulator.evaluate().unwrap();
assert_eq!(result, ScalarValue::Float64(Some(3.0)));
@@ -208,29 +237,33 @@ mod tests {
#[test]
fn test_quantile_accumulator_different_quantiles() {
let mut min_accumulator = QuantileAccumulator::new(0.0);
let q = create_f64_array(vec![Some(0.0)]);
let input = create_f64_array(vec![Some(1.0), Some(2.0), Some(3.0), Some(4.0), Some(5.0)]);
min_accumulator.update_batch(&[input.clone()]).unwrap();
min_accumulator.update_batch(&[q, input.clone()]).unwrap();
assert_eq!(
min_accumulator.evaluate().unwrap(),
ScalarValue::Float64(Some(1.0))
);
let mut q1_accumulator = QuantileAccumulator::new(0.25);
q1_accumulator.update_batch(&[input.clone()]).unwrap();
let q = create_f64_array(vec![Some(0.25)]);
q1_accumulator.update_batch(&[q, input.clone()]).unwrap();
assert_eq!(
q1_accumulator.evaluate().unwrap(),
ScalarValue::Float64(Some(2.0))
);
let mut q3_accumulator = QuantileAccumulator::new(0.75);
q3_accumulator.update_batch(&[input.clone()]).unwrap();
let q = create_f64_array(vec![Some(0.75)]);
q3_accumulator.update_batch(&[q, input.clone()]).unwrap();
assert_eq!(
q3_accumulator.evaluate().unwrap(),
ScalarValue::Float64(Some(4.0))
);
let mut max_accumulator = QuantileAccumulator::new(1.0);
max_accumulator.update_batch(&[input]).unwrap();
let q = create_f64_array(vec![Some(1.0)]);
max_accumulator.update_batch(&[q, input]).unwrap();
assert_eq!(
max_accumulator.evaluate().unwrap(),
ScalarValue::Float64(Some(5.0))
@@ -240,10 +273,11 @@ mod tests {
#[test]
fn test_quantile_accumulator_size() {
let mut accumulator = QuantileAccumulator::new(0.5);
let q = create_f64_array(vec![Some(0.5)]);
let input = create_f64_array(vec![Some(1.0), Some(2.0), Some(3.0)]);
let initial_size = accumulator.size();
accumulator.update_batch(&[input]).unwrap();
accumulator.update_batch(&[q, input]).unwrap();
let after_update_size = accumulator.size();
assert!(after_update_size >= initial_size);
@@ -252,14 +286,16 @@ mod tests {
#[test]
fn test_quantile_accumulator_state_and_merge() -> DfResult<()> {
let mut acc1 = QuantileAccumulator::new(0.5);
let q = create_f64_array(vec![Some(0.5)]);
let input1 = create_f64_array(vec![Some(1.0), Some(2.0)]);
acc1.update_batch(&[input1])?;
acc1.update_batch(&[q, input1])?;
let state1 = acc1.state()?;
let mut acc2 = QuantileAccumulator::new(0.5);
let q = create_f64_array(vec![Some(0.5)]);
let input2 = create_f64_array(vec![Some(3.0), Some(4.0), Some(5.0)]);
acc2.update_batch(&[input2])?;
acc2.update_batch(&[q, input2])?;
let mut struct_builders = vec![];
for scalar in &state1 {
@@ -280,16 +316,16 @@ mod tests {
#[test]
fn test_quantile_accumulator_with_extreme_values() {
let mut accumulator = QuantileAccumulator::new(0.5);
let q = create_f64_array(vec![Some(0.5)]);
let input = create_f64_array(vec![Some(f64::MAX), Some(f64::MIN), Some(0.0)]);
accumulator.update_batch(&[input]).unwrap();
accumulator.update_batch(&[q, input]).unwrap();
let _result = accumulator.evaluate().unwrap();
}
#[test]
fn test_quantile_udaf_creation() {
let q = 0.5;
let udaf = quantile_udaf(q);
let udaf = quantile_udaf();
assert_eq!(udaf.name(), QUANTILE_NAME);
assert_eq!(udaf.return_type(&[]).unwrap(), DataType::Float64);

View File

@@ -90,6 +90,7 @@ mod test {
Resets::scalar_udf(),
ts_array_1,
value_array_1,
vec![],
vec![Some(0.0), Some(1.0), Some(2.0), Some(3.0), None],
);
@@ -101,6 +102,7 @@ mod test {
Resets::scalar_udf(),
ts_array_2,
value_array_2,
vec![],
vec![Some(0.0), Some(0.0), Some(1.0), Some(1.0), None],
);
@@ -111,6 +113,7 @@ mod test {
Resets::scalar_udf(),
ts_array_3,
value_array_3,
vec![],
vec![Some(0.0), Some(0.0), Some(0.0), Some(0.0), None],
);
}

View File

@@ -15,6 +15,7 @@
use std::sync::Arc;
use datafusion::error::DataFusionError;
use datafusion_common::ScalarValue;
use datafusion_expr::{create_udf, ColumnarValue, ScalarUDF, Volatility};
use datatypes::arrow::array::AsArray;
use datatypes::arrow::datatypes::{DataType, Float64Type};
@@ -36,25 +37,39 @@ impl Round {
}
fn input_type() -> Vec<DataType> {
vec![DataType::Float64]
vec![DataType::Float64, DataType::Float64]
}
pub fn return_type() -> DataType {
DataType::Float64
}
pub fn scalar_udf(nearest: f64) -> ScalarUDF {
pub fn scalar_udf() -> ScalarUDF {
create_udf(
Self::name(),
Self::input_type(),
Self::return_type(),
Volatility::Volatile,
Arc::new(move |input: &_| Self::new(nearest).calc(input)) as _,
Arc::new(move |input: &_| Self::create_function(input)?.calc(input)) as _,
)
}
fn create_function(inputs: &[ColumnarValue]) -> Result<Self, DataFusionError> {
if inputs.len() != 2 {
return Err(DataFusionError::Plan(
"Round function should have 2 inputs".to_string(),
));
}
let ColumnarValue::Scalar(ScalarValue::Float64(Some(nearest))) = inputs[1] else {
return Err(DataFusionError::Plan(
"Round function's second input should be a scalar float64".to_string(),
));
};
Ok(Self::new(nearest))
}
fn calc(&self, input: &[ColumnarValue]) -> Result<ColumnarValue, DataFusionError> {
assert_eq!(input.len(), 1);
assert_eq!(input.len(), 2);
let value_array = extract_array(&input[0])?;
@@ -80,8 +95,11 @@ mod tests {
use super::*;
fn test_round_f64(value: Vec<f64>, nearest: f64, expected: Vec<f64>) {
let round_udf = Round::scalar_udf(nearest);
let input = vec![ColumnarValue::Array(Arc::new(Float64Array::from(value)))];
let round_udf = Round::scalar_udf();
let input = vec![
ColumnarValue::Array(Arc::new(Float64Array::from(value))),
ColumnarValue::Scalar(ScalarValue::Float64(Some(nearest))),
];
let args = ScalarFunctionArgs {
args: input,
number_rows: 1,

View File

@@ -17,6 +17,7 @@ use std::sync::Arc;
use datafusion::arrow::array::Float64Array;
use datafusion::logical_expr::ScalarUDF;
use datafusion::physical_plan::ColumnarValue;
use datafusion_common::ScalarValue;
use datafusion_expr::ScalarFunctionArgs;
use datatypes::arrow::datatypes::DataType;
@@ -28,13 +29,17 @@ pub fn simple_range_udf_runner(
range_fn: ScalarUDF,
input_ts: RangeArray,
input_value: RangeArray,
other_args: Vec<ScalarValue>,
expected: Vec<Option<f64>>,
) {
let num_rows = input_ts.len();
let input = vec![
let input = [
ColumnarValue::Array(Arc::new(input_ts.into_dict())),
ColumnarValue::Array(Arc::new(input_value.into_dict())),
];
]
.into_iter()
.chain(other_args.into_iter().map(ColumnarValue::Scalar))
.collect::<Vec<_>>();
let args = ScalarFunctionArgs {
args: input,
number_rows: num_rows,

View File

@@ -55,12 +55,16 @@ impl Categorizer {
LogicalPlan::Filter(filter) => Self::check_expr(&filter.predicate),
LogicalPlan::Window(_) => Commutativity::Unimplemented,
LogicalPlan::Aggregate(aggr) => {
if Self::check_partition(&aggr.group_expr, &partition_cols) {
return Commutativity::Commutative;
if !Self::check_partition(&aggr.group_expr, &partition_cols) {
return Commutativity::NonCommutative;
}
// check all children exprs and uses the strictest level
Commutativity::Unimplemented
for expr in &aggr.aggr_expr {
let commutativity = Self::check_expr(expr);
if !matches!(commutativity, Commutativity::Commutative) {
return commutativity;
}
}
Commutativity::Commutative
}
LogicalPlan::Sort(_) => {
if partition_cols.is_empty() {
@@ -94,7 +98,7 @@ impl Categorizer {
}
}
LogicalPlan::Extension(extension) => {
Self::check_extension_plan(extension.node.as_ref() as _)
Self::check_extension_plan(extension.node.as_ref() as _, &partition_cols)
}
LogicalPlan::Distinct(_) => {
if partition_cols.is_empty() {
@@ -116,13 +120,30 @@ impl Categorizer {
}
}
pub fn check_extension_plan(plan: &dyn UserDefinedLogicalNode) -> Commutativity {
pub fn check_extension_plan(
plan: &dyn UserDefinedLogicalNode,
partition_cols: &[String],
) -> Commutativity {
match plan.name() {
name if name == EmptyMetric::name()
name if name == SeriesDivide::name() => {
let series_divide = plan.as_any().downcast_ref::<SeriesDivide>().unwrap();
let tags = series_divide.tags().iter().collect::<HashSet<_>>();
for partition_col in partition_cols {
if !tags.contains(partition_col) {
return Commutativity::NonCommutative;
}
}
Commutativity::Commutative
}
name if name == SeriesNormalize::name()
|| name == InstantManipulate::name()
|| name == SeriesNormalize::name()
|| name == RangeManipulate::name()
|| name == SeriesDivide::name()
|| name == RangeManipulate::name() =>
{
// They should always follows Series Divide.
// Either all commutative or all non-commutative (which will be blocked by SeriesDivide).
Commutativity::Commutative
}
name if name == EmptyMetric::name()
|| name == MergeScanLogicalPlan::name()
|| name == MergeSortLogicalPlan::name() =>
{
@@ -148,8 +169,9 @@ impl Categorizer {
| Expr::Negative(_)
| Expr::Between(_)
| Expr::Exists(_)
| Expr::InList(_)
| Expr::ScalarFunction(_) => Commutativity::Commutative,
| Expr::InList(_) => Commutativity::Commutative,
Expr::ScalarFunction(_udf) => Commutativity::Commutative,
Expr::AggregateFunction(_udaf) => Commutativity::Commutative,
Expr::Like(_)
| Expr::SimilarTo(_)
@@ -158,7 +180,6 @@ impl Categorizer {
| Expr::Case(_)
| Expr::Cast(_)
| Expr::TryCast(_)
| Expr::AggregateFunction(_)
| Expr::WindowFunction(_)
| Expr::InSubquery(_)
| Expr::ScalarSubquery(_)

View File

@@ -14,7 +14,6 @@
#![feature(let_chains)]
#![feature(int_roundings)]
#![feature(trait_upcasting)]
#![feature(try_blocks)]
#![feature(stmt_expr_attributes)]
#![feature(iterator_try_collect)]
@@ -28,7 +27,7 @@ pub mod error;
pub mod executor;
pub mod log_query;
pub mod metrics;
mod optimizer;
pub mod optimizer;
pub mod options;
pub mod parser;
mod part_sort;

View File

@@ -138,26 +138,18 @@ impl ParallelizeScan {
) -> Vec<Vec<PartitionRange>> {
if ranges.is_empty() {
// Returns a single partition with no range.
return vec![vec![]];
return vec![vec![]; expected_partition_num];
}
if ranges.len() == 1 {
return vec![ranges];
let mut vec = vec![vec![]; expected_partition_num];
vec[0] = ranges;
return vec;
}
// Sort ranges by number of rows in descending order.
ranges.sort_by(|a, b| b.num_rows.cmp(&a.num_rows));
// Get the max row number of the ranges. Note that the number of rows may be 0 if statistics are not available.
let max_rows = ranges[0].num_rows;
let total_rows = ranges.iter().map(|range| range.num_rows).sum::<usize>();
// Computes the partition num by the max row number. This eliminates the unbalance of the partitions.
let balanced_partition_num = if max_rows > 0 {
total_rows.div_ceil(max_rows)
} else {
ranges.len()
};
let actual_partition_num = expected_partition_num.min(balanced_partition_num).max(1);
let mut partition_ranges = vec![vec![]; actual_partition_num];
let mut partition_ranges = vec![vec![]; expected_partition_num];
#[derive(Eq, PartialEq)]
struct HeapNode {
@@ -179,7 +171,7 @@ impl ParallelizeScan {
}
let mut part_heap =
BinaryHeap::from_iter((0..actual_partition_num).map(|partition_idx| HeapNode {
BinaryHeap::from_iter((0..expected_partition_num).map(|partition_idx| HeapNode {
num_rows: 0,
partition_idx,
}));
@@ -270,7 +262,7 @@ mod test {
];
assert_eq!(result, expected);
// assign 4 ranges to 5 partitions. Only 4 partitions are returned.
// assign 4 ranges to 5 partitions.
let expected_partition_num = 5;
let result = ParallelizeScan::assign_partition_range(ranges, expected_partition_num);
let expected = vec![
@@ -280,32 +272,31 @@ mod test {
num_rows: 250,
identifier: 4,
}],
vec![PartitionRange {
start: Timestamp::new(0, TimeUnit::Second),
end: Timestamp::new(10, TimeUnit::Second),
num_rows: 100,
identifier: 1,
}],
vec![PartitionRange {
start: Timestamp::new(10, TimeUnit::Second),
end: Timestamp::new(20, TimeUnit::Second),
num_rows: 200,
identifier: 2,
}],
vec![
PartitionRange {
start: Timestamp::new(20, TimeUnit::Second),
end: Timestamp::new(30, TimeUnit::Second),
num_rows: 150,
identifier: 3,
},
PartitionRange {
start: Timestamp::new(0, TimeUnit::Second),
end: Timestamp::new(10, TimeUnit::Second),
num_rows: 100,
identifier: 1,
},
],
vec![],
vec![PartitionRange {
start: Timestamp::new(20, TimeUnit::Second),
end: Timestamp::new(30, TimeUnit::Second),
num_rows: 150,
identifier: 3,
}],
];
assert_eq!(result, expected);
// assign 0 ranges to 5 partitions. Only 1 partition is returned.
// assign 0 ranges to 5 partitions. Should return 5 empty ranges.
let result = ParallelizeScan::assign_partition_range(vec![], 5);
assert_eq!(result.len(), 1);
assert_eq!(result.len(), 5);
}
#[test]

View File

@@ -348,7 +348,7 @@ impl PartSortStream {
&self,
sort_column: &ArrayRef,
) -> datafusion_common::Result<Option<usize>> {
if sort_column.len() == 0 {
if sort_column.is_empty() {
return Ok(Some(0));
}

View File

@@ -31,7 +31,7 @@ use datafusion::functions_aggregate::stddev::stddev_pop_udaf;
use datafusion::functions_aggregate::sum::sum_udaf;
use datafusion::functions_aggregate::variance::var_pop_udaf;
use datafusion::functions_window::row_number::RowNumber;
use datafusion::logical_expr::expr::{AggregateFunction, Alias, ScalarFunction, WindowFunction};
use datafusion::logical_expr::expr::{Alias, ScalarFunction, WindowFunction};
use datafusion::logical_expr::expr_rewriter::normalize_cols;
use datafusion::logical_expr::{
BinaryExpr, Cast, Extension, LogicalPlan, LogicalPlanBuilder, Operator,
@@ -1425,15 +1425,18 @@ impl PromPlanner {
let field_column_pos = 0;
let mut exprs = Vec::with_capacity(self.ctx.field_columns.len());
let scalar_func = match func.name {
"increase" => ScalarFunc::ExtrapolateUdf(Arc::new(Increase::scalar_udf(
"increase" => ScalarFunc::ExtrapolateUdf(
Arc::new(Increase::scalar_udf()),
self.ctx.range.context(ExpectRangeSelectorSnafu)?,
))),
"rate" => ScalarFunc::ExtrapolateUdf(Arc::new(Rate::scalar_udf(
),
"rate" => ScalarFunc::ExtrapolateUdf(
Arc::new(Rate::scalar_udf()),
self.ctx.range.context(ExpectRangeSelectorSnafu)?,
))),
"delta" => ScalarFunc::ExtrapolateUdf(Arc::new(Delta::scalar_udf(
),
"delta" => ScalarFunc::ExtrapolateUdf(
Arc::new(Delta::scalar_udf()),
self.ctx.range.context(ExpectRangeSelectorSnafu)?,
))),
),
"idelta" => ScalarFunc::Udf(Arc::new(IDelta::<false>::scalar_udf())),
"irate" => ScalarFunc::Udf(Arc::new(IDelta::<true>::scalar_udf())),
"resets" => ScalarFunc::Udf(Arc::new(Resets::scalar_udf())),
@@ -1449,50 +1452,9 @@ impl PromPlanner {
"present_over_time" => ScalarFunc::Udf(Arc::new(PresentOverTime::scalar_udf())),
"stddev_over_time" => ScalarFunc::Udf(Arc::new(StddevOverTime::scalar_udf())),
"stdvar_over_time" => ScalarFunc::Udf(Arc::new(StdvarOverTime::scalar_udf())),
"quantile_over_time" => {
let quantile_expr = match other_input_exprs.pop_front() {
Some(DfExpr::Literal(ScalarValue::Float64(Some(quantile)))) => quantile,
other => UnexpectedPlanExprSnafu {
desc: format!("expected f64 literal as quantile, but found {:?}", other),
}
.fail()?,
};
ScalarFunc::Udf(Arc::new(QuantileOverTime::scalar_udf(quantile_expr)))
}
"predict_linear" => {
let t_expr = match other_input_exprs.pop_front() {
Some(DfExpr::Literal(ScalarValue::Float64(Some(t)))) => t as i64,
Some(DfExpr::Literal(ScalarValue::Int64(Some(t)))) => t,
other => UnexpectedPlanExprSnafu {
desc: format!("expected i64 literal as t, but found {:?}", other),
}
.fail()?,
};
ScalarFunc::Udf(Arc::new(PredictLinear::scalar_udf(t_expr)))
}
"holt_winters" => {
let sf_exp = match other_input_exprs.pop_front() {
Some(DfExpr::Literal(ScalarValue::Float64(Some(sf)))) => sf,
other => UnexpectedPlanExprSnafu {
desc: format!(
"expected f64 literal as smoothing factor, but found {:?}",
other
),
}
.fail()?,
};
let tf_exp = match other_input_exprs.pop_front() {
Some(DfExpr::Literal(ScalarValue::Float64(Some(tf)))) => tf,
other => UnexpectedPlanExprSnafu {
desc: format!(
"expected f64 literal as trend factor, but found {:?}",
other
),
}
.fail()?,
};
ScalarFunc::Udf(Arc::new(HoltWinters::scalar_udf(sf_exp, tf_exp)))
}
"quantile_over_time" => ScalarFunc::Udf(Arc::new(QuantileOverTime::scalar_udf())),
"predict_linear" => ScalarFunc::Udf(Arc::new(PredictLinear::scalar_udf())),
"holt_winters" => ScalarFunc::Udf(Arc::new(HoltWinters::scalar_udf())),
"time" => {
exprs.push(build_special_time_expr(
self.ctx.time_index_column.as_ref().unwrap(),
@@ -1627,17 +1589,10 @@ impl PromPlanner {
ScalarFunc::GeneratedExpr
}
"round" => {
let nearest = match other_input_exprs.pop_front() {
Some(DfExpr::Literal(ScalarValue::Float64(Some(t)))) => t,
Some(DfExpr::Literal(ScalarValue::Int64(Some(t)))) => t as f64,
None => 0.0,
other => UnexpectedPlanExprSnafu {
desc: format!("expected f64 literal as t, but found {:?}", other),
}
.fail()?,
};
ScalarFunc::DataFusionUdf(Arc::new(Round::scalar_udf(nearest)))
if other_input_exprs.is_empty() {
other_input_exprs.push_front(DfExpr::Literal(ScalarValue::Float64(Some(0.0))));
}
ScalarFunc::DataFusionUdf(Arc::new(Round::scalar_udf()))
}
_ => {
@@ -1695,7 +1650,7 @@ impl PromPlanner {
let _ = other_input_exprs.remove(field_column_pos + 1);
let _ = other_input_exprs.remove(field_column_pos);
}
ScalarFunc::ExtrapolateUdf(func) => {
ScalarFunc::ExtrapolateUdf(func, range_length) => {
let ts_range_expr = DfExpr::Column(Column::from_name(
RangeManipulate::build_timestamp_range_name(
self.ctx.time_index_column.as_ref().unwrap(),
@@ -1705,11 +1660,13 @@ impl PromPlanner {
other_input_exprs.insert(field_column_pos + 1, col_expr);
other_input_exprs
.insert(field_column_pos + 2, self.create_time_index_column_expr()?);
other_input_exprs.push_back(lit(range_length));
let fn_expr = DfExpr::ScalarFunction(ScalarFunction {
func,
args: other_input_exprs.clone().into(),
});
exprs.push(fn_expr);
let _ = other_input_exprs.pop_back();
let _ = other_input_exprs.remove(field_column_pos + 2);
let _ = other_input_exprs.remove(field_column_pos + 1);
let _ = other_input_exprs.remove(field_column_pos);
@@ -1972,11 +1929,13 @@ impl PromPlanner {
param: &Option<Box<PromExpr>>,
input_plan: &LogicalPlan,
) -> Result<(Vec<DfExpr>, Vec<DfExpr>)> {
let mut non_col_args = Vec::new();
let aggr = match op.id() {
token::T_SUM => sum_udaf(),
token::T_QUANTILE => {
let q = Self::get_param_value_as_f64(op, param)?;
quantile_udaf(q)
non_col_args.push(lit(q));
quantile_udaf()
}
token::T_AVG => avg_udaf(),
token::T_COUNT_VALUES | token::T_COUNT => count_udaf(),
@@ -1998,16 +1957,12 @@ impl PromPlanner {
.field_columns
.iter()
.map(|col| {
Ok(DfExpr::AggregateFunction(AggregateFunction {
func: aggr.clone(),
args: vec![DfExpr::Column(Column::from_name(col))],
distinct: false,
filter: None,
order_by: None,
null_treatment: None,
}))
non_col_args.push(DfExpr::Column(Column::from_name(col)));
let expr = aggr.call(non_col_args.clone());
non_col_args.pop();
expr
})
.collect::<Result<Vec<_>>>()?;
.collect::<Vec<_>>();
// if the aggregator is `count_values`, it must be grouped by current fields.
let prev_field_exprs = if op.id() == token::T_COUNT_VALUES {
@@ -2941,7 +2896,8 @@ enum ScalarFunc {
Udf(Arc<ScalarUdfDef>),
// todo(ruihang): maybe merge with Udf later
/// UDF that require extra information like range length to be evaluated.
ExtrapolateUdf(Arc<ScalarUdfDef>),
/// The second argument is range length.
ExtrapolateUdf(Arc<ScalarUdfDef>, i64),
/// Func that doesn't require input, like `time()`.
GeneratedExpr,
}
@@ -3595,8 +3551,8 @@ mod test {
async fn increase_aggr() {
let query = "increase(some_metric[5m])";
let expected = String::from(
"Filter: prom_increase(timestamp_range,field_0,timestamp) IS NOT NULL [timestamp:Timestamp(Millisecond, None), prom_increase(timestamp_range,field_0,timestamp):Float64;N, tag_0:Utf8]\
\n Projection: some_metric.timestamp, prom_increase(timestamp_range, field_0, some_metric.timestamp) AS prom_increase(timestamp_range,field_0,timestamp), some_metric.tag_0 [timestamp:Timestamp(Millisecond, None), prom_increase(timestamp_range,field_0,timestamp):Float64;N, tag_0:Utf8]\
"Filter: prom_increase(timestamp_range,field_0,timestamp,Int64(300000)) IS NOT NULL [timestamp:Timestamp(Millisecond, None), prom_increase(timestamp_range,field_0,timestamp,Int64(300000)):Float64;N, tag_0:Utf8]\
\n Projection: some_metric.timestamp, prom_increase(timestamp_range, field_0, some_metric.timestamp, Int64(300000)) AS prom_increase(timestamp_range,field_0,timestamp,Int64(300000)), some_metric.tag_0 [timestamp:Timestamp(Millisecond, None), prom_increase(timestamp_range,field_0,timestamp,Int64(300000)):Float64;N, tag_0:Utf8]\
\n PromRangeManipulate: req range=[0..100000000], interval=[5000], eval range=[300000], time index=[timestamp], values=[\"field_0\"] [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Dictionary(Int64, Float64);N, timestamp_range:Dictionary(Int64, Timestamp(Millisecond, None))]\
\n PromSeriesNormalize: offset=[0], time index=[timestamp], filter NaN: [true] [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N]\
\n PromSeriesDivide: tags=[\"tag_0\"] [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N]\
@@ -4395,8 +4351,8 @@ mod test {
let plan = PromPlanner::stmt_to_plan(table_provider, &eval_stmt, &build_session_state())
.await
.unwrap();
let expected = "Sort: prometheus_tsdb_head_series.greptime_timestamp ASC NULLS LAST [greptime_timestamp:Timestamp(Millisecond, None), quantile(sum(prometheus_tsdb_head_series.greptime_value)):Float64;N]\
\n Aggregate: groupBy=[[prometheus_tsdb_head_series.greptime_timestamp]], aggr=[[quantile(sum(prometheus_tsdb_head_series.greptime_value))]] [greptime_timestamp:Timestamp(Millisecond, None), quantile(sum(prometheus_tsdb_head_series.greptime_value)):Float64;N]\
let expected = "Sort: prometheus_tsdb_head_series.greptime_timestamp ASC NULLS LAST [greptime_timestamp:Timestamp(Millisecond, None), quantile(Float64(0.3),sum(prometheus_tsdb_head_series.greptime_value)):Float64;N]\
\n Aggregate: groupBy=[[prometheus_tsdb_head_series.greptime_timestamp]], aggr=[[quantile(Float64(0.3), sum(prometheus_tsdb_head_series.greptime_value))]] [greptime_timestamp:Timestamp(Millisecond, None), quantile(Float64(0.3),sum(prometheus_tsdb_head_series.greptime_value)):Float64;N]\
\n Sort: prometheus_tsdb_head_series.ip ASC NULLS LAST, prometheus_tsdb_head_series.greptime_timestamp ASC NULLS LAST [ip:Utf8, greptime_timestamp:Timestamp(Millisecond, None), sum(prometheus_tsdb_head_series.greptime_value):Float64;N]\
\n Aggregate: groupBy=[[prometheus_tsdb_head_series.ip, prometheus_tsdb_head_series.greptime_timestamp]], aggr=[[sum(prometheus_tsdb_head_series.greptime_value)]] [ip:Utf8, greptime_timestamp:Timestamp(Millisecond, None), sum(prometheus_tsdb_head_series.greptime_value):Float64;N]\
\n PromInstantManipulate: range=[0..100000000], lookback=[1000], interval=[5000], time index=[greptime_timestamp] [ip:Utf8, greptime_timestamp:Timestamp(Millisecond, None), greptime_value:Float64;N]\

View File

@@ -29,6 +29,10 @@ use datafusion::execution::{FunctionRegistry, SessionStateBuilder};
use datafusion::logical_expr::LogicalPlan;
use datafusion_expr::UserDefinedLogicalNode;
use greptime_proto::substrait_extension::MergeScan as PbMergeScan;
use promql::functions::{
AbsentOverTime, AvgOverTime, Changes, CountOverTime, Deriv, IDelta, LastOverTime, MaxOverTime,
MinOverTime, PresentOverTime, Resets, StddevOverTime, StdvarOverTime, SumOverTime,
};
use prost::Message;
use session::context::QueryContextRef;
use snafu::ResultExt;
@@ -132,6 +136,24 @@ impl SubstraitPlanDecoder for DefaultPlanDecoder {
let _ = session_state.register_udaf(Arc::new(HllState::state_udf_impl()));
let _ = session_state.register_udaf(Arc::new(HllState::merge_udf_impl()));
let _ = session_state.register_udaf(Arc::new(GeoPathAccumulator::udf_impl()));
// TODO(ruihang): add increase, rate, delta
let _ = session_state.register_udf(Arc::new(IDelta::<false>::scalar_udf()));
let _ = session_state.register_udf(Arc::new(IDelta::<true>::scalar_udf()));
let _ = session_state.register_udf(Arc::new(Resets::scalar_udf()));
let _ = session_state.register_udf(Arc::new(Changes::scalar_udf()));
let _ = session_state.register_udf(Arc::new(Deriv::scalar_udf()));
let _ = session_state.register_udf(Arc::new(AvgOverTime::scalar_udf()));
let _ = session_state.register_udf(Arc::new(MinOverTime::scalar_udf()));
let _ = session_state.register_udf(Arc::new(MaxOverTime::scalar_udf()));
let _ = session_state.register_udf(Arc::new(SumOverTime::scalar_udf()));
let _ = session_state.register_udf(Arc::new(CountOverTime::scalar_udf()));
let _ = session_state.register_udf(Arc::new(LastOverTime::scalar_udf()));
let _ = session_state.register_udf(Arc::new(AbsentOverTime::scalar_udf()));
let _ = session_state.register_udf(Arc::new(PresentOverTime::scalar_udf()));
let _ = session_state.register_udf(Arc::new(StddevOverTime::scalar_udf()));
let _ = session_state.register_udf(Arc::new(StdvarOverTime::scalar_udf()));
// TODO(ruihang): add quantile_over_time, predict_linear, holt_winters, round
}
let logical_plan = DFLogicalSubstraitConvertor
.decode(message, session_state)

View File

@@ -31,6 +31,7 @@ use datafusion::error::Result as DfResult;
use datafusion::execution::context::{QueryPlanner, SessionConfig, SessionContext, SessionState};
use datafusion::execution::runtime_env::RuntimeEnv;
use datafusion::execution::SessionStateBuilder;
use datafusion::physical_optimizer::enforce_sorting::EnforceSorting;
use datafusion::physical_optimizer::optimizer::PhysicalOptimizer;
use datafusion::physical_optimizer::sanity_checker::SanityCheckPlan;
use datafusion::physical_optimizer::PhysicalOptimizerRule;
@@ -142,6 +143,9 @@ impl QueryEngineState {
physical_optimizer
.rules
.insert(1, Arc::new(PassDistribution));
physical_optimizer
.rules
.insert(2, Arc::new(EnforceSorting {}));
// Add rule for windowed sort
physical_optimizer
.rules

View File

@@ -117,7 +117,7 @@ where
fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
match self.inner.poll_ready(cx) {
Poll::Pending => Poll::Pending,
Poll::Ready(r) => Poll::Ready(r.map_err(Into::into)),
Poll::Ready(r) => Poll::Ready(r),
}
}

View File

@@ -17,7 +17,6 @@
#![feature(exclusive_wrapper)]
#![feature(let_chains)]
#![feature(if_let_guard)]
#![feature(trait_upcasting)]
use datafusion_expr::LogicalPlan;
use datatypes::schema::Schema;

View File

@@ -55,7 +55,7 @@ pub fn transform_statements(stmts: &mut Vec<Statement>) -> Result<()> {
}
}
visit_expressions_mut(stmts, |expr| {
let _ = visit_expressions_mut(stmts, |expr| {
for rule in RULES.iter() {
rule.visit_expr(expr)?;
}

View File

@@ -290,7 +290,7 @@ impl RegionMetadata {
pub fn project(&self, projection: &[ColumnId]) -> Result<RegionMetadata> {
// check time index
ensure!(
projection.iter().any(|id| *id == self.time_index),
projection.contains(&self.time_index),
TimeIndexNotFoundSnafu
);

View File

@@ -14,6 +14,7 @@
#![feature(assert_matches)]
#![feature(try_blocks)]
#![feature(let_chains)]
pub mod dist_table;
pub mod error;

View File

@@ -95,7 +95,7 @@ impl TableProvider for DfTableProviderAdapter {
filters: &[Expr],
limit: Option<usize>,
) -> DfResult<Arc<dyn ExecutionPlan>> {
let filters: Vec<Expr> = filters.iter().map(Clone::clone).map(Into::into).collect();
let filters: Vec<Expr> = filters.iter().map(Clone::clone).collect();
let request = {
let mut request = self.scan_req.lock().unwrap();
request.filters = filters;

View File

@@ -82,11 +82,17 @@ impl RegionScanExec {
if scanner.properties().is_logical_region() {
pk_names.sort_unstable();
}
let mut pk_columns: Vec<PhysicalSortExpr> = pk_names
.into_iter()
let pk_columns = pk_names
.iter()
.filter_map(
|col| Some(Arc::new(Column::new_with_schema(col, &arrow_schema).ok()?) as _),
)
.collect::<Vec<_>>();
let mut pk_sort_columns: Vec<PhysicalSortExpr> = pk_names
.iter()
.filter_map(|col| {
Some(PhysicalSortExpr::new(
Arc::new(Column::new_with_schema(&col, &arrow_schema).ok()?) as _,
Arc::new(Column::new_with_schema(col, &arrow_schema).ok()?) as _,
SortOptions {
descending: false,
nulls_first: true,
@@ -113,28 +119,37 @@ impl RegionScanExec {
let eq_props = match request.distribution {
Some(TimeSeriesDistribution::PerSeries) => {
if let Some(ts) = ts_col {
pk_columns.push(ts);
pk_sort_columns.push(ts);
}
EquivalenceProperties::new_with_orderings(
arrow_schema.clone(),
&[LexOrdering::new(pk_columns)],
&[LexOrdering::new(pk_sort_columns)],
)
}
Some(TimeSeriesDistribution::TimeWindowed) => {
if let Some(ts_col) = ts_col {
pk_columns.insert(0, ts_col);
pk_sort_columns.insert(0, ts_col);
}
EquivalenceProperties::new_with_orderings(
arrow_schema.clone(),
&[LexOrdering::new(pk_columns)],
&[LexOrdering::new(pk_sort_columns)],
)
}
None => EquivalenceProperties::new(arrow_schema.clone()),
};
let partitioning = match request.distribution {
Some(TimeSeriesDistribution::PerSeries) => {
Partitioning::Hash(pk_columns.clone(), num_output_partition)
}
Some(TimeSeriesDistribution::TimeWindowed) | None => {
Partitioning::UnknownPartitioning(num_output_partition)
}
};
let properties = PlanProperties::new(
eq_props,
Partitioning::UnknownPartitioning(num_output_partition),
partitioning,
EmissionType::Incremental,
Boundedness::Bounded,
);
@@ -188,9 +203,14 @@ impl RegionScanExec {
warn!("Setting partition ranges more than once for RegionScanExec");
}
let num_partitions = partitions.len();
let mut properties = self.properties.clone();
properties.partitioning = Partitioning::UnknownPartitioning(num_partitions);
let new_partitioning = match properties.partitioning {
Partitioning::Hash(ref columns, _) => {
Partitioning::Hash(columns.clone(), target_partitions)
}
_ => Partitioning::UnknownPartitioning(target_partitions),
};
properties.partitioning = new_partitioning;
{
let mut scanner = self.scanner.lock().unwrap();

View File

@@ -85,11 +85,7 @@ pub struct UnstableTestVariables {
pub fn load_unstable_test_env_variables() -> UnstableTestVariables {
let _ = dotenv::dotenv();
let binary_path = env::var(GT_FUZZ_BINARY_PATH).expect("GT_FUZZ_BINARY_PATH not found");
let root_dir = if let Ok(root) = env::var(GT_FUZZ_INSTANCE_ROOT_DIR) {
Some(root)
} else {
None
};
let root_dir = env::var(GT_FUZZ_INSTANCE_ROOT_DIR).ok();
UnstableTestVariables {
binary_path,

View File

@@ -157,7 +157,7 @@ async fn execute_unstable_create_table(
}
Err(err) => {
// FIXME(weny): support to retry it later.
if matches!(err, sqlx::Error::PoolTimedOut { .. }) {
if matches!(err, sqlx::Error::PoolTimedOut) {
warn!("ignore pool timeout, sql: {sql}");
continue;
}

View File

@@ -489,10 +489,7 @@ async fn create_datanode_client(datanode: &Datanode) -> (String, Client) {
if let Some(client) = client {
Ok(TokioIo::new(client))
} else {
Err(std::io::Error::new(
std::io::ErrorKind::Other,
"Client already taken",
))
Err(std::io::Error::other("Client already taken"))
}
}
}),

View File

@@ -0,0 +1,266 @@
CREATE TABLE access_log (
"url" STRING,
user_id BIGINT,
ts TIMESTAMP TIME INDEX,
PRIMARY KEY ("url", user_id)
);
Affected Rows: 0
CREATE TABLE access_log_10s (
"url" STRING,
time_window timestamp time INDEX,
state BINARY,
PRIMARY KEY ("url")
);
Affected Rows: 0
CREATE FLOW calc_access_log_10s SINK TO access_log_10s
AS
SELECT
"url",
date_bin('10s'::INTERVAL, ts) AS time_window,
hll(user_id) AS state
FROM
access_log
GROUP BY
"url",
time_window;
Affected Rows: 0
-- insert 4 rows of data
INSERT INTO access_log VALUES
("/dashboard", 1, "2025-03-04 00:00:00"),
("/dashboard", 1, "2025-03-04 00:00:01"),
("/dashboard", 2, "2025-03-04 00:00:05"),
("/not_found", 3, "2025-03-04 00:00:11"),
("/dashboard", 4, "2025-03-04 00:00:15");
Affected Rows: 5
-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED |
ADMIN FLUSH_FLOW('calc_access_log_10s');
+-----------------------------------------+
| ADMIN FLUSH_FLOW('calc_access_log_10s') |
+-----------------------------------------+
| FLOW_FLUSHED |
+-----------------------------------------+
-- query should return 3 rows
SELECT "url", time_window FROM access_log_10s
ORDER BY
time_window;
+------------+---------------------+
| url | time_window |
+------------+---------------------+
| /dashboard | 2025-03-04T00:00:00 |
| /dashboard | 2025-03-04T00:00:10 |
| /not_found | 2025-03-04T00:00:10 |
+------------+---------------------+
-- use hll_count to query the approximate data in access_log_10s
SELECT "url", time_window, hll_count(state) FROM access_log_10s
ORDER BY
time_window;
+------------+---------------------+---------------------------------+
| url | time_window | hll_count(access_log_10s.state) |
+------------+---------------------+---------------------------------+
| /dashboard | 2025-03-04T00:00:00 | 2 |
| /dashboard | 2025-03-04T00:00:10 | 1 |
| /not_found | 2025-03-04T00:00:10 | 1 |
+------------+---------------------+---------------------------------+
-- further, we can aggregate 10 seconds of data to every minute, by using hll_merge to merge 10 seconds of hyperloglog state
SELECT
"url",
date_bin('1 minute'::INTERVAL, time_window) AS time_window_1m,
hll_count(hll_merge(state)) as uv_per_min
FROM
access_log_10s
GROUP BY
"url",
time_window_1m
ORDER BY
time_window_1m;
+------------+---------------------+------------+
| url | time_window_1m | uv_per_min |
+------------+---------------------+------------+
| /not_found | 2025-03-04T00:00:00 | 1 |
| /dashboard | 2025-03-04T00:00:00 | 3 |
+------------+---------------------+------------+
DROP FLOW calc_access_log_10s;
Affected Rows: 0
DROP TABLE access_log_10s;
Affected Rows: 0
DROP TABLE access_log;
Affected Rows: 0
CREATE TABLE percentile_base (
"id" INT PRIMARY KEY,
"value" DOUBLE,
ts timestamp(0) time index
);
Affected Rows: 0
CREATE TABLE percentile_5s (
"percentile_state" BINARY,
time_window timestamp(0) time index
);
Affected Rows: 0
CREATE FLOW calc_percentile_5s SINK TO percentile_5s
AS
SELECT
uddsketch_state(128, 0.01, "value") AS "value",
date_bin('5 seconds'::INTERVAL, ts) AS time_window
FROM
percentile_base
WHERE
"value" > 0 AND "value" < 70
GROUP BY
time_window;
Affected Rows: 0
INSERT INTO percentile_base ("id", "value", ts) VALUES
(1, 10.0, 1),
(2, 20.0, 2),
(3, 30.0, 3),
(4, 40.0, 4),
(5, 50.0, 5),
(6, 60.0, 6),
(7, 70.0, 7),
(8, 80.0, 8),
(9, 90.0, 9),
(10, 100.0, 10);
Affected Rows: 10
-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED |
ADMIN FLUSH_FLOW('calc_percentile_5s');
+----------------------------------------+
| ADMIN FLUSH_FLOW('calc_percentile_5s') |
+----------------------------------------+
| FLOW_FLUSHED |
+----------------------------------------+
SELECT
time_window,
uddsketch_calc(0.99, `percentile_state`) AS p99
FROM
percentile_5s
ORDER BY
time_window;
+---------------------+--------------------+
| time_window | p99 |
+---------------------+--------------------+
| 1970-01-01T00:00:00 | 40.04777053326359 |
| 1970-01-01T00:00:05 | 59.745049810145126 |
+---------------------+--------------------+
DROP FLOW calc_percentile_5s;
Affected Rows: 0
DROP TABLE percentile_5s;
Affected Rows: 0
DROP TABLE percentile_base;
Affected Rows: 0
CREATE TABLE percentile_base (
"id" INT PRIMARY KEY,
"value" DOUBLE,
ts timestamp(0) time index
);
Affected Rows: 0
CREATE TABLE percentile_5s (
"percentile_state" BINARY,
time_window timestamp(0) time index
);
Affected Rows: 0
CREATE FLOW calc_percentile_5s SINK TO percentile_5s
AS
SELECT
uddsketch_state(128, 0.01, CASE WHEN "value" > 0 AND "value" < 70 THEN "value" ELSE NULL END) AS "value",
date_bin('5 seconds'::INTERVAL, ts) AS time_window
FROM
percentile_base
GROUP BY
time_window;
Affected Rows: 0
INSERT INTO percentile_base ("id", "value", ts) VALUES
(1, 10.0, 1),
(2, 20.0, 2),
(3, 30.0, 3),
(4, 40.0, 4),
(5, 50.0, 5),
(6, 60.0, 6),
(7, 70.0, 7),
(8, 80.0, 8),
(9, 90.0, 9),
(10, 100.0, 10);
Affected Rows: 10
-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED |
ADMIN FLUSH_FLOW('calc_percentile_5s');
+----------------------------------------+
| ADMIN FLUSH_FLOW('calc_percentile_5s') |
+----------------------------------------+
| FLOW_FLUSHED |
+----------------------------------------+
SELECT
time_window,
uddsketch_calc(0.99, percentile_state) AS p99
FROM
percentile_5s
ORDER BY
time_window;
+---------------------+--------------------+
| time_window | p99 |
+---------------------+--------------------+
| 1970-01-01T00:00:00 | 40.04777053326359 |
| 1970-01-01T00:00:05 | 59.745049810145126 |
| 1970-01-01T00:00:10 | |
+---------------------+--------------------+
DROP FLOW calc_percentile_5s;
Affected Rows: 0
DROP TABLE percentile_5s;
Affected Rows: 0
DROP TABLE percentile_base;
Affected Rows: 0

View File

@@ -0,0 +1,161 @@
CREATE TABLE access_log (
"url" STRING,
user_id BIGINT,
ts TIMESTAMP TIME INDEX,
PRIMARY KEY ("url", user_id)
);
CREATE TABLE access_log_10s (
"url" STRING,
time_window timestamp time INDEX,
state BINARY,
PRIMARY KEY ("url")
);
CREATE FLOW calc_access_log_10s SINK TO access_log_10s
AS
SELECT
"url",
date_bin('10s'::INTERVAL, ts) AS time_window,
hll(user_id) AS state
FROM
access_log
GROUP BY
"url",
time_window;
-- insert 4 rows of data
INSERT INTO access_log VALUES
("/dashboard", 1, "2025-03-04 00:00:00"),
("/dashboard", 1, "2025-03-04 00:00:01"),
("/dashboard", 2, "2025-03-04 00:00:05"),
("/not_found", 3, "2025-03-04 00:00:11"),
("/dashboard", 4, "2025-03-04 00:00:15");
-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED |
ADMIN FLUSH_FLOW('calc_access_log_10s');
-- query should return 3 rows
SELECT "url", time_window FROM access_log_10s
ORDER BY
time_window;
-- use hll_count to query the approximate data in access_log_10s
SELECT "url", time_window, hll_count(state) FROM access_log_10s
ORDER BY
time_window;
-- further, we can aggregate 10 seconds of data to every minute, by using hll_merge to merge 10 seconds of hyperloglog state
SELECT
"url",
date_bin('1 minute'::INTERVAL, time_window) AS time_window_1m,
hll_count(hll_merge(state)) as uv_per_min
FROM
access_log_10s
GROUP BY
"url",
time_window_1m
ORDER BY
time_window_1m;
DROP FLOW calc_access_log_10s;
DROP TABLE access_log_10s;
DROP TABLE access_log;
CREATE TABLE percentile_base (
"id" INT PRIMARY KEY,
"value" DOUBLE,
ts timestamp(0) time index
);
CREATE TABLE percentile_5s (
"percentile_state" BINARY,
time_window timestamp(0) time index
);
CREATE FLOW calc_percentile_5s SINK TO percentile_5s
AS
SELECT
uddsketch_state(128, 0.01, "value") AS "value",
date_bin('5 seconds'::INTERVAL, ts) AS time_window
FROM
percentile_base
WHERE
"value" > 0 AND "value" < 70
GROUP BY
time_window;
INSERT INTO percentile_base ("id", "value", ts) VALUES
(1, 10.0, 1),
(2, 20.0, 2),
(3, 30.0, 3),
(4, 40.0, 4),
(5, 50.0, 5),
(6, 60.0, 6),
(7, 70.0, 7),
(8, 80.0, 8),
(9, 90.0, 9),
(10, 100.0, 10);
-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED |
ADMIN FLUSH_FLOW('calc_percentile_5s');
SELECT
time_window,
uddsketch_calc(0.99, `percentile_state`) AS p99
FROM
percentile_5s
ORDER BY
time_window;
DROP FLOW calc_percentile_5s;
DROP TABLE percentile_5s;
DROP TABLE percentile_base;
CREATE TABLE percentile_base (
"id" INT PRIMARY KEY,
"value" DOUBLE,
ts timestamp(0) time index
);
CREATE TABLE percentile_5s (
"percentile_state" BINARY,
time_window timestamp(0) time index
);
CREATE FLOW calc_percentile_5s SINK TO percentile_5s
AS
SELECT
uddsketch_state(128, 0.01, CASE WHEN "value" > 0 AND "value" < 70 THEN "value" ELSE NULL END) AS "value",
date_bin('5 seconds'::INTERVAL, ts) AS time_window
FROM
percentile_base
GROUP BY
time_window;
INSERT INTO percentile_base ("id", "value", ts) VALUES
(1, 10.0, 1),
(2, 20.0, 2),
(3, 30.0, 3),
(4, 40.0, 4),
(5, 50.0, 5),
(6, 60.0, 6),
(7, 70.0, 7),
(8, 80.0, 8),
(9, 90.0, 9),
(10, 100.0, 10);
-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED |
ADMIN FLUSH_FLOW('calc_percentile_5s');
SELECT
time_window,
uddsketch_calc(0.99, percentile_state) AS p99
FROM
percentile_5s
ORDER BY
time_window;
DROP FLOW calc_percentile_5s;
DROP TABLE percentile_5s;
DROP TABLE percentile_base;

View File

@@ -30,40 +30,40 @@ Affected Rows: 16
TQL EVAL (0, 15, '5s') quantile(0.5, test);
+---------------------+--------------------+
| ts | quantile(test.val) |
+---------------------+--------------------+
| 1970-01-01T00:00:00 | 2.5 |
| 1970-01-01T00:00:05 | 6.5 |
| 1970-01-01T00:00:10 | 10.5 |
| 1970-01-01T00:00:15 | 14.5 |
+---------------------+--------------------+
+---------------------+---------------------------------+
| ts | quantile(Float64(0.5),test.val) |
+---------------------+---------------------------------+
| 1970-01-01T00:00:00 | 2.5 |
| 1970-01-01T00:00:05 | 6.5 |
| 1970-01-01T00:00:10 | 10.5 |
| 1970-01-01T00:00:15 | 14.5 |
+---------------------+---------------------------------+
TQL EVAL (0, 15, '5s') quantile(0.5, test) by (idc);
+------+---------------------+--------------------+
| idc | ts | quantile(test.val) |
+------+---------------------+--------------------+
| idc1 | 1970-01-01T00:00:00 | 1.5 |
| idc1 | 1970-01-01T00:00:05 | 5.5 |
| idc1 | 1970-01-01T00:00:10 | 9.5 |
| idc1 | 1970-01-01T00:00:15 | 13.5 |
| idc2 | 1970-01-01T00:00:00 | 3.5 |
| idc2 | 1970-01-01T00:00:05 | 7.5 |
| idc2 | 1970-01-01T00:00:10 | 11.5 |
| idc2 | 1970-01-01T00:00:15 | 15.5 |
+------+---------------------+--------------------+
+------+---------------------+---------------------------------+
| idc | ts | quantile(Float64(0.5),test.val) |
+------+---------------------+---------------------------------+
| idc1 | 1970-01-01T00:00:00 | 1.5 |
| idc1 | 1970-01-01T00:00:05 | 5.5 |
| idc1 | 1970-01-01T00:00:10 | 9.5 |
| idc1 | 1970-01-01T00:00:15 | 13.5 |
| idc2 | 1970-01-01T00:00:00 | 3.5 |
| idc2 | 1970-01-01T00:00:05 | 7.5 |
| idc2 | 1970-01-01T00:00:10 | 11.5 |
| idc2 | 1970-01-01T00:00:15 | 15.5 |
+------+---------------------+---------------------------------+
TQL EVAL (0, 15, '5s') quantile(0.5, sum(test) by (idc));
+---------------------+-------------------------+
| ts | quantile(sum(test.val)) |
+---------------------+-------------------------+
| 1970-01-01T00:00:00 | 5.0 |
| 1970-01-01T00:00:05 | 13.0 |
| 1970-01-01T00:00:10 | 21.0 |
| 1970-01-01T00:00:15 | 29.0 |
+---------------------+-------------------------+
+---------------------+--------------------------------------+
| ts | quantile(Float64(0.5),sum(test.val)) |
+---------------------+--------------------------------------+
| 1970-01-01T00:00:00 | 5.0 |
| 1970-01-01T00:00:05 | 13.0 |
| 1970-01-01T00:00:10 | 21.0 |
| 1970-01-01T00:00:15 | 29.0 |
+---------------------+--------------------------------------+
DROP TABLE test;

View File

@@ -18,62 +18,62 @@ Affected Rows: 4
-- SQLNESS SORT_RESULT 3 1
tql eval (3, 4, '1s') round(cache_hit, 0.01);
+---------------------+----------------------------+-------+
| ts | prom_round(greptime_value) | job |
+---------------------+----------------------------+-------+
| 1970-01-01T00:00:03 | 123.45 | read |
| 1970-01-01T00:00:03 | 234.57 | write |
| 1970-01-01T00:00:04 | 345.68 | read |
| 1970-01-01T00:00:04 | 456.79 | write |
+---------------------+----------------------------+-------+
+---------------------+------------------------------------------+-------+
| ts | prom_round(greptime_value,Float64(0.01)) | job |
+---------------------+------------------------------------------+-------+
| 1970-01-01T00:00:03 | 123.45 | read |
| 1970-01-01T00:00:03 | 234.57 | write |
| 1970-01-01T00:00:04 | 345.68 | read |
| 1970-01-01T00:00:04 | 456.79 | write |
+---------------------+------------------------------------------+-------+
-- SQLNESS SORT_RESULT 3 1
tql eval (3, 4, '1s') round(cache_hit, 0.1);
+---------------------+----------------------------+-------+
| ts | prom_round(greptime_value) | job |
+---------------------+----------------------------+-------+
| 1970-01-01T00:00:03 | 123.5 | read |
| 1970-01-01T00:00:03 | 234.60000000000002 | write |
| 1970-01-01T00:00:04 | 345.70000000000005 | read |
| 1970-01-01T00:00:04 | 456.8 | write |
+---------------------+----------------------------+-------+
+---------------------+-----------------------------------------+-------+
| ts | prom_round(greptime_value,Float64(0.1)) | job |
+---------------------+-----------------------------------------+-------+
| 1970-01-01T00:00:03 | 123.5 | read |
| 1970-01-01T00:00:03 | 234.60000000000002 | write |
| 1970-01-01T00:00:04 | 345.70000000000005 | read |
| 1970-01-01T00:00:04 | 456.8 | write |
+---------------------+-----------------------------------------+-------+
-- SQLNESS SORT_RESULT 3 1
tql eval (3, 4, '1s') round(cache_hit, 1.0);
+---------------------+----------------------------+-------+
| ts | prom_round(greptime_value) | job |
+---------------------+----------------------------+-------+
| 1970-01-01T00:00:03 | 123.0 | read |
| 1970-01-01T00:00:03 | 235.0 | write |
| 1970-01-01T00:00:04 | 346.0 | read |
| 1970-01-01T00:00:04 | 457.0 | write |
+---------------------+----------------------------+-------+
+---------------------+---------------------------------------+-------+
| ts | prom_round(greptime_value,Float64(1)) | job |
+---------------------+---------------------------------------+-------+
| 1970-01-01T00:00:03 | 123.0 | read |
| 1970-01-01T00:00:03 | 235.0 | write |
| 1970-01-01T00:00:04 | 346.0 | read |
| 1970-01-01T00:00:04 | 457.0 | write |
+---------------------+---------------------------------------+-------+
-- SQLNESS SORT_RESULT 3 1
tql eval (3, 4, '1s') round(cache_hit);
+---------------------+----------------------------+-------+
| ts | prom_round(greptime_value) | job |
+---------------------+----------------------------+-------+
| 1970-01-01T00:00:03 | 123.0 | read |
| 1970-01-01T00:00:03 | 235.0 | write |
| 1970-01-01T00:00:04 | 346.0 | read |
| 1970-01-01T00:00:04 | 457.0 | write |
+---------------------+----------------------------+-------+
+---------------------+---------------------------------------+-------+
| ts | prom_round(greptime_value,Float64(0)) | job |
+---------------------+---------------------------------------+-------+
| 1970-01-01T00:00:03 | 123.0 | read |
| 1970-01-01T00:00:03 | 235.0 | write |
| 1970-01-01T00:00:04 | 346.0 | read |
| 1970-01-01T00:00:04 | 457.0 | write |
+---------------------+---------------------------------------+-------+
-- SQLNESS SORT_RESULT 3 1
tql eval (3, 4, '1s') round(cache_hit, 10.0);
+---------------------+----------------------------+-------+
| ts | prom_round(greptime_value) | job |
+---------------------+----------------------------+-------+
| 1970-01-01T00:00:03 | 120.0 | read |
| 1970-01-01T00:00:03 | 230.0 | write |
| 1970-01-01T00:00:04 | 350.0 | read |
| 1970-01-01T00:00:04 | 460.0 | write |
+---------------------+----------------------------+-------+
+---------------------+----------------------------------------+-------+
| ts | prom_round(greptime_value,Float64(10)) | job |
+---------------------+----------------------------------------+-------+
| 1970-01-01T00:00:03 | 120.0 | read |
| 1970-01-01T00:00:03 | 230.0 | write |
| 1970-01-01T00:00:04 | 350.0 | read |
| 1970-01-01T00:00:04 | 460.0 | write |
+---------------------+----------------------------------------+-------+
drop table cache_hit;

View File

@@ -130,8 +130,7 @@ tql eval (3000, 3000, '1s') label_replace(histogram_quantile(0.8, histogram_buck
-- quantile with rate is covered in other cases
tql eval (3000, 3000, '1s') histogram_quantile(0.2, rate(histogram_bucket[5m]));
++
++
Error: 3001(EngineExecuteQuery), Unsupported arrow data type, type: Dictionary(Int64, Float64)
drop table histogram_bucket;
@@ -228,27 +227,27 @@ tql eval (420, 420, '1s') histogram_quantile(0.833, histogram2_bucket);
tql eval (2820, 2820, '1s') histogram_quantile(0.166, rate(histogram2_bucket[15m]));
+---------------------+----------------------------+
| ts | prom_rate(ts_range,val,ts) |
+---------------------+----------------------------+
| 1970-01-01T00:47:00 | 0.996 |
+---------------------+----------------------------+
+---------------------+------------------------------------------+
| ts | prom_rate(ts_range,val,ts,Int64(900000)) |
+---------------------+------------------------------------------+
| 1970-01-01T00:47:00 | 0.996 |
+---------------------+------------------------------------------+
tql eval (2820, 2820, '1s') histogram_quantile(0.5, rate(histogram2_bucket[15m]));
+---------------------+----------------------------+
| ts | prom_rate(ts_range,val,ts) |
+---------------------+----------------------------+
| 1970-01-01T00:47:00 | 3.0 |
+---------------------+----------------------------+
+---------------------+------------------------------------------+
| ts | prom_rate(ts_range,val,ts,Int64(900000)) |
+---------------------+------------------------------------------+
| 1970-01-01T00:47:00 | 3.0 |
+---------------------+------------------------------------------+
tql eval (2820, 2820, '1s') histogram_quantile(0.833, rate(histogram2_bucket[15m]));
+---------------------+----------------------------+
| ts | prom_rate(ts_range,val,ts) |
+---------------------+----------------------------+
| 1970-01-01T00:47:00 | 4.998 |
+---------------------+----------------------------+
+---------------------+------------------------------------------+
| ts | prom_rate(ts_range,val,ts,Int64(900000)) |
+---------------------+------------------------------------------+
| 1970-01-01T00:47:00 | 4.998 |
+---------------------+------------------------------------------+
drop table histogram2_bucket;
@@ -284,12 +283,12 @@ Affected Rows: 12
tql eval (3000, 3005, '3s') histogram_quantile(0.5, sum by(le, s) (rate(histogram3_bucket[5m])));
+---+---------------------+---------------------------------+
| s | ts | sum(prom_rate(ts_range,val,ts)) |
+---+---------------------+---------------------------------+
| a | 1970-01-01T00:50:00 | 0.55 |
| a | 1970-01-01T00:50:03 | 0.5500000000000002 |
+---+---------------------+---------------------------------+
+---+---------------------+-----------------------------------------------+
| s | ts | sum(prom_rate(ts_range,val,ts,Int64(300000))) |
+---+---------------------+-----------------------------------------------+
| a | 1970-01-01T00:50:00 | 0.55 |
| a | 1970-01-01T00:50:03 | 0.5500000000000002 |
+---+---------------------+-----------------------------------------------+
drop table histogram3_bucket;

View File

@@ -45,19 +45,19 @@ tql eval (359, 359, '1s') sum_over_time(metric_total[60s:10s]);
tql eval (10, 10, '1s') rate(metric_total[20s:10s]);
+---------------------+----------------------------+
| ts | prom_rate(ts_range,val,ts) |
+---------------------+----------------------------+
| 1970-01-01T00:00:10 | 0.1 |
+---------------------+----------------------------+
+---------------------+-----------------------------------------+
| ts | prom_rate(ts_range,val,ts,Int64(20000)) |
+---------------------+-----------------------------------------+
| 1970-01-01T00:00:10 | 0.1 |
+---------------------+-----------------------------------------+
tql eval (20, 20, '1s') rate(metric_total[20s:5s]);
+---------------------+----------------------------+
| ts | prom_rate(ts_range,val,ts) |
+---------------------+----------------------------+
| 1970-01-01T00:00:20 | 0.06666666666666667 |
+---------------------+----------------------------+
+---------------------+-----------------------------------------+
| ts | prom_rate(ts_range,val,ts,Int64(20000)) |
+---------------------+-----------------------------------------+
| 1970-01-01T00:00:20 | 0.06666666666666667 |
+---------------------+-----------------------------------------+
drop table metric_total;

View File

@@ -54,7 +54,11 @@ Error: 2000(InvalidSyntax), Invalid SQL syntax: sql parser error: Can't use the
-- 2.2 no align param
SELECT min(val) RANGE '5s' FROM host;
Error: 3000(PlanQuery), Error during planning: Missing argument in range select query
Error: 2000(InvalidSyntax), Invalid SQL syntax: sql parser error: ALIGN argument cannot be omitted in the range select query
SELECT min(val) RANGE '5s' FILL PREV FROM host;
Error: 2000(InvalidSyntax), Invalid SQL syntax: sql parser error: ALIGN argument cannot be omitted in the range select query
-- 2.3 type mismatch
SELECT covar(ceil(val), floor(val)) RANGE '20s' FROM host ALIGN '10s';

View File

@@ -40,6 +40,8 @@ SELECT 1 RANGE '10s' FILL NULL FROM host ALIGN '1h' FILL NULL;
SELECT min(val) RANGE '5s' FROM host;
SELECT min(val) RANGE '5s' FILL PREV FROM host;
-- 2.3 type mismatch
SELECT covar(ceil(val), floor(val)) RANGE '20s' FROM host ALIGN '10s';

View File

@@ -17,11 +17,14 @@ tql analyze (1, 3, '1s') t1{ a = "a" };
+-+-+-+
| stage | node | plan_|
+-+-+-+
| 0_| 0_|_PromInstantManipulateExec: range=[1000..3000], lookback=[300000], interval=[1000], time index=[b] REDACTED
|_|_|_PromSeriesDivideExec: tags=["a"] REDACTED
|_|_|_MergeScanExec: REDACTED
| 0_| 0_|_MergeScanExec: REDACTED
|_|_|_|
| 1_| 0_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges), distribution=PerSeries REDACTED
| 1_| 0_|_PromInstantManipulateExec: range=[1000..3000], lookback=[300000], interval=[1000], time index=[b] REDACTED
|_|_|_PromSeriesDivideExec: tags=["a"] REDACTED
|_|_|_SortExec: expr=[a@0 ASC], preserve_partitioning=[true] REDACTED
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|_|_|_RepartitionExec: partitioning=Hash([a@0], 32), input_partitions=1 REDACTED
|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges), distribution=PerSeries REDACTED
|_|_|_|
|_|_| Total rows: 3_|
+-+-+-+
@@ -37,11 +40,14 @@ tql analyze (1, 3, '1s') t1{ a =~ ".*" };
+-+-+-+
| stage | node | plan_|
+-+-+-+
| 0_| 0_|_PromInstantManipulateExec: range=[1000..3000], lookback=[300000], interval=[1000], time index=[b] REDACTED
|_|_|_PromSeriesDivideExec: tags=["a"] REDACTED
|_|_|_MergeScanExec: REDACTED
| 0_| 0_|_MergeScanExec: REDACTED
|_|_|_|
| 1_| 0_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges), distribution=PerSeries REDACTED
| 1_| 0_|_PromInstantManipulateExec: range=[1000..3000], lookback=[300000], interval=[1000], time index=[b] REDACTED
|_|_|_PromSeriesDivideExec: tags=["a"] REDACTED
|_|_|_SortExec: expr=[a@0 ASC], preserve_partitioning=[true] REDACTED
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|_|_|_RepartitionExec: partitioning=Hash([a@0], 32), input_partitions=1 REDACTED
|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges), distribution=PerSeries REDACTED
|_|_|_|
|_|_| Total rows: 6_|
+-+-+-+
@@ -57,11 +63,14 @@ tql analyze (1, 3, '1s') t1{ a =~ "a.*" };
+-+-+-+
| stage | node | plan_|
+-+-+-+
| 0_| 0_|_PromInstantManipulateExec: range=[1000..3000], lookback=[300000], interval=[1000], time index=[b] REDACTED
|_|_|_PromSeriesDivideExec: tags=["a"] REDACTED
|_|_|_MergeScanExec: REDACTED
| 0_| 0_|_MergeScanExec: REDACTED
|_|_|_|
| 1_| 0_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges), distribution=PerSeries REDACTED
| 1_| 0_|_PromInstantManipulateExec: range=[1000..3000], lookback=[300000], interval=[1000], time index=[b] REDACTED
|_|_|_PromSeriesDivideExec: tags=["a"] REDACTED
|_|_|_SortExec: expr=[a@0 ASC], preserve_partitioning=[true] REDACTED
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|_|_|_RepartitionExec: partitioning=Hash([a@0], 32), input_partitions=1 REDACTED
|_|_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges), distribution=PerSeries REDACTED
|_|_|_|
|_|_| Total rows: 3_|
+-+-+-+

Some files were not shown because too many files have changed in this diff Show More