mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-27 08:29:59 +00:00
Compare commits
32 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5fc0c5706c | ||
|
|
4d768b2c31 | ||
|
|
b62f219810 | ||
|
|
5d330fad17 | ||
|
|
dfdfae1a7b | ||
|
|
822f0caf4b | ||
|
|
09f3d72d2d | ||
|
|
ca0c1282ed | ||
|
|
b719c020ba | ||
|
|
717c1d1807 | ||
|
|
291f3c89fe | ||
|
|
602cc38056 | ||
|
|
46b3593021 | ||
|
|
ff402fd6f6 | ||
|
|
b83e6e2b18 | ||
|
|
cb74337dbe | ||
|
|
32bffbb668 | ||
|
|
941906dc74 | ||
|
|
cbf251d0f0 | ||
|
|
1519379262 | ||
|
|
4bfe02ec7f | ||
|
|
ecacf1333e | ||
|
|
92fa33c250 | ||
|
|
8b2d1a3753 | ||
|
|
13401c94e0 | ||
|
|
fd637dae47 | ||
|
|
69fac19770 | ||
|
|
6435b97314 | ||
|
|
726e3909fe | ||
|
|
00d759e828 | ||
|
|
0042ea6462 | ||
|
|
d06450715f |
@@ -12,3 +12,6 @@ fetch = true
|
||||
checkout = true
|
||||
list_files = true
|
||||
internal_use_git2 = false
|
||||
|
||||
[env]
|
||||
CARGO_WORKSPACE_DIR = { value = "", relative = true }
|
||||
|
||||
168
Cargo.lock
generated
168
Cargo.lock
generated
@@ -211,7 +211,7 @@ checksum = "d301b3b94cb4b2f23d7917810addbbaff90738e0ca2be692bd027e70d7e0330c"
|
||||
|
||||
[[package]]
|
||||
name = "api"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-decimal",
|
||||
@@ -944,7 +944,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "auth"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -1586,7 +1586,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cache"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"catalog",
|
||||
"common-error",
|
||||
@@ -1602,6 +1602,17 @@ version = "1.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "acbc26382d871df4b7442e3df10a9402bf3cf5e55cbd66f12be38861425f0564"
|
||||
|
||||
[[package]]
|
||||
name = "cargo-manifest"
|
||||
version = "0.19.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a1d8af896b707212cd0e99c112a78c9497dd32994192a463ed2f7419d29bd8c6"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"thiserror 2.0.12",
|
||||
"toml 0.8.19",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cast"
|
||||
version = "0.3.0"
|
||||
@@ -1610,7 +1621,7 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
|
||||
|
||||
[[package]]
|
||||
name = "catalog"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow 54.2.1",
|
||||
@@ -1948,7 +1959,7 @@ checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97"
|
||||
|
||||
[[package]]
|
||||
name = "cli"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
@@ -1993,7 +2004,7 @@ dependencies = [
|
||||
"session",
|
||||
"snafu 0.8.5",
|
||||
"store-api",
|
||||
"substrait 0.15.0",
|
||||
"substrait 0.15.4",
|
||||
"table",
|
||||
"tempfile",
|
||||
"tokio",
|
||||
@@ -2002,7 +2013,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "client"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -2032,7 +2043,7 @@ dependencies = [
|
||||
"rand 0.9.0",
|
||||
"serde_json",
|
||||
"snafu 0.8.5",
|
||||
"substrait 0.15.0",
|
||||
"substrait 0.15.4",
|
||||
"substrait 0.37.3",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
@@ -2073,7 +2084,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cmd"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"auth",
|
||||
@@ -2134,7 +2145,7 @@ dependencies = [
|
||||
"snafu 0.8.5",
|
||||
"stat",
|
||||
"store-api",
|
||||
"substrait 0.15.0",
|
||||
"substrait 0.15.4",
|
||||
"table",
|
||||
"temp-env",
|
||||
"tempfile",
|
||||
@@ -2181,7 +2192,7 @@ checksum = "55b672471b4e9f9e95499ea597ff64941a309b2cdbffcc46f2cc5e2d971fd335"
|
||||
|
||||
[[package]]
|
||||
name = "common-base"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"anymap2",
|
||||
"async-trait",
|
||||
@@ -2203,11 +2214,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-catalog"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
|
||||
[[package]]
|
||||
name = "common-config"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-error",
|
||||
@@ -2232,7 +2243,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-datasource"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"arrow 54.2.1",
|
||||
"arrow-schema 54.3.1",
|
||||
@@ -2269,7 +2280,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-decimal"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"bigdecimal 0.4.8",
|
||||
"common-error",
|
||||
@@ -2282,7 +2293,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-error"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"common-macro",
|
||||
"http 1.1.0",
|
||||
@@ -2293,7 +2304,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-frontend"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-error",
|
||||
@@ -2309,7 +2320,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-function"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"api",
|
||||
@@ -2362,7 +2373,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-greptimedb-telemetry"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-runtime",
|
||||
@@ -2379,7 +2390,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-grpc"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -2411,7 +2422,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-grpc-expr"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"api",
|
||||
"common-base",
|
||||
@@ -2430,7 +2441,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-macro"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"common-query",
|
||||
@@ -2444,7 +2455,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-mem-prof"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"common-error",
|
||||
@@ -2460,7 +2471,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-meta"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"anymap2",
|
||||
"api",
|
||||
@@ -2525,7 +2536,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-options"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"common-grpc",
|
||||
"humantime-serde",
|
||||
@@ -2534,11 +2545,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-plugins"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
|
||||
[[package]]
|
||||
name = "common-pprof"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"common-error",
|
||||
"common-macro",
|
||||
@@ -2550,7 +2561,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-procedure"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
@@ -2577,7 +2588,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-procedure-test"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-procedure",
|
||||
@@ -2586,7 +2597,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-query"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -2612,7 +2623,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-recordbatch"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"common-error",
|
||||
@@ -2632,7 +2643,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-runtime"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"clap 4.5.19",
|
||||
@@ -2662,17 +2673,18 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-session"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"strum 0.27.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "common-telemetry"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
"common-error",
|
||||
"common-version",
|
||||
"console-subscriber",
|
||||
"greptime-proto",
|
||||
"humantime-serde",
|
||||
@@ -2696,7 +2708,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-test-util"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"client",
|
||||
"common-grpc",
|
||||
@@ -2709,7 +2721,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-time"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"arrow 54.2.1",
|
||||
"chrono",
|
||||
@@ -2727,9 +2739,10 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-version"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"build-data",
|
||||
"cargo-manifest",
|
||||
"const_format",
|
||||
"serde",
|
||||
"shadow-rs",
|
||||
@@ -2737,7 +2750,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-wal"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-error",
|
||||
@@ -2760,7 +2773,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-workload"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"api",
|
||||
"common-telemetry",
|
||||
@@ -3716,7 +3729,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datanode"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -3769,7 +3782,7 @@ dependencies = [
|
||||
"session",
|
||||
"snafu 0.8.5",
|
||||
"store-api",
|
||||
"substrait 0.15.0",
|
||||
"substrait 0.15.4",
|
||||
"table",
|
||||
"tokio",
|
||||
"toml 0.8.19",
|
||||
@@ -3778,7 +3791,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datatypes"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"arrow 54.2.1",
|
||||
"arrow-array 54.2.1",
|
||||
@@ -4438,7 +4451,7 @@ checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6"
|
||||
|
||||
[[package]]
|
||||
name = "file-engine"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -4575,7 +4588,7 @@ checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8"
|
||||
|
||||
[[package]]
|
||||
name = "flow"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow 54.2.1",
|
||||
@@ -4640,7 +4653,7 @@ dependencies = [
|
||||
"sql",
|
||||
"store-api",
|
||||
"strum 0.27.1",
|
||||
"substrait 0.15.0",
|
||||
"substrait 0.15.4",
|
||||
"table",
|
||||
"tokio",
|
||||
"tonic 0.12.3",
|
||||
@@ -4695,7 +4708,7 @@ checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa"
|
||||
|
||||
[[package]]
|
||||
name = "frontend"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -4755,7 +4768,7 @@ dependencies = [
|
||||
"sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=0cf6c04490d59435ee965edd2078e8855bd8471e)",
|
||||
"store-api",
|
||||
"strfmt",
|
||||
"substrait 0.15.0",
|
||||
"substrait 0.15.4",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
@@ -5145,7 +5158,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "greptime-proto"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=96c733f8472284d3c83a4c011dc6de9cf830c353#96c733f8472284d3c83a4c011dc6de9cf830c353"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=a5d256ba4abb7393e0859ffbf7fca1e38f3433dc#a5d256ba4abb7393e0859ffbf7fca1e38f3433dc"
|
||||
dependencies = [
|
||||
"prost 0.13.5",
|
||||
"serde",
|
||||
@@ -5916,7 +5929,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "index"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"asynchronous-codec",
|
||||
@@ -6801,7 +6814,7 @@ checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
|
||||
|
||||
[[package]]
|
||||
name = "log-query"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"common-error",
|
||||
@@ -6813,7 +6826,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "log-store"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
@@ -7111,7 +7124,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meta-client"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -7139,7 +7152,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meta-srv"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -7230,7 +7243,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "metric-engine"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"api",
|
||||
"aquamarine",
|
||||
@@ -7320,7 +7333,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "mito-codec"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"api",
|
||||
"bytes",
|
||||
@@ -7343,7 +7356,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "mito2"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"api",
|
||||
"aquamarine",
|
||||
@@ -8093,7 +8106,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "object-store"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytes",
|
||||
@@ -8407,7 +8420,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "operator"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"api",
|
||||
@@ -8462,7 +8475,7 @@ dependencies = [
|
||||
"sql",
|
||||
"sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=0cf6c04490d59435ee965edd2078e8855bd8471e)",
|
||||
"store-api",
|
||||
"substrait 0.15.0",
|
||||
"substrait 0.15.4",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
@@ -8729,7 +8742,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "partition"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -9017,7 +9030,7 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
|
||||
|
||||
[[package]]
|
||||
name = "pipeline"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"api",
|
||||
@@ -9160,7 +9173,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "plugins"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"auth",
|
||||
"clap 4.5.19",
|
||||
@@ -9473,7 +9486,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "promql"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"async-trait",
|
||||
@@ -9755,7 +9768,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "puffin"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"async-compression 0.4.13",
|
||||
"async-trait",
|
||||
@@ -9797,7 +9810,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "query"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"api",
|
||||
@@ -9863,7 +9876,7 @@ dependencies = [
|
||||
"sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=0cf6c04490d59435ee965edd2078e8855bd8471e)",
|
||||
"statrs",
|
||||
"store-api",
|
||||
"substrait 0.15.0",
|
||||
"substrait 0.15.4",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
@@ -11149,7 +11162,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "servers"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"api",
|
||||
@@ -11270,7 +11283,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "session"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -11609,7 +11622,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sql"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"api",
|
||||
"chrono",
|
||||
@@ -11664,7 +11677,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sqlness-runner"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"clap 4.5.19",
|
||||
@@ -11964,7 +11977,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "stat"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"nix 0.30.1",
|
||||
]
|
||||
@@ -11990,7 +12003,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "store-api"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"api",
|
||||
"aquamarine",
|
||||
@@ -12151,7 +12164,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "substrait"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bytes",
|
||||
@@ -12331,7 +12344,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "table"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -12592,7 +12605,7 @@ checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76"
|
||||
|
||||
[[package]]
|
||||
name = "tests-fuzz"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
"async-trait",
|
||||
@@ -12636,7 +12649,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tests-integration"
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -12703,7 +12716,7 @@ dependencies = [
|
||||
"sql",
|
||||
"sqlx",
|
||||
"store-api",
|
||||
"substrait 0.15.0",
|
||||
"substrait 0.15.4",
|
||||
"table",
|
||||
"tempfile",
|
||||
"time",
|
||||
@@ -13073,6 +13086,7 @@ version = "0.8.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a1ed1f98e3fdc28d6d910e6737ae6ab1a93bf1985935a1193e68f93eeb68d24e"
|
||||
dependencies = [
|
||||
"indexmap 2.9.0",
|
||||
"serde",
|
||||
"serde_spanned",
|
||||
"toml_datetime",
|
||||
|
||||
@@ -71,7 +71,7 @@ members = [
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
version = "0.15.0"
|
||||
version = "0.15.4"
|
||||
edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
|
||||
@@ -134,7 +134,7 @@ etcd-client = "0.14"
|
||||
fst = "0.4.7"
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "96c733f8472284d3c83a4c011dc6de9cf830c353" }
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "a5d256ba4abb7393e0859ffbf7fca1e38f3433dc" }
|
||||
hex = "0.4"
|
||||
http = "1"
|
||||
humantime = "2.1"
|
||||
|
||||
@@ -147,6 +147,7 @@
|
||||
| `region_engine.mito.write_cache_ttl` | String | Unset | TTL for write cache. |
|
||||
| `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
|
||||
| `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
|
||||
| `region_engine.mito.max_concurrent_scan_files` | Integer | `128` | Maximum number of SST files to scan concurrently. |
|
||||
| `region_engine.mito.allow_stale_entries` | Bool | `false` | Whether to allow stale WAL entries read during replay. |
|
||||
| `region_engine.mito.min_compaction_interval` | String | `0m` | Minimum time interval between two compactions.<br/>To align with the old behavior, the default value is 0 (no restrictions). |
|
||||
| `region_engine.mito.index` | -- | -- | The options for index in Mito engine. |
|
||||
@@ -496,6 +497,7 @@
|
||||
| `region_engine.mito.write_cache_ttl` | String | Unset | TTL for write cache. |
|
||||
| `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
|
||||
| `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
|
||||
| `region_engine.mito.max_concurrent_scan_files` | Integer | `128` | Maximum number of SST files to scan concurrently. |
|
||||
| `region_engine.mito.allow_stale_entries` | Bool | `false` | Whether to allow stale WAL entries read during replay. |
|
||||
| `region_engine.mito.min_compaction_interval` | String | `0m` | Minimum time interval between two compactions.<br/>To align with the old behavior, the default value is 0 (no restrictions). |
|
||||
| `region_engine.mito.index` | -- | -- | The options for index in Mito engine. |
|
||||
|
||||
@@ -474,6 +474,9 @@ sst_write_buffer_size = "8MB"
|
||||
## Capacity of the channel to send data from parallel scan tasks to the main task.
|
||||
parallel_scan_channel_size = 32
|
||||
|
||||
## Maximum number of SST files to scan concurrently.
|
||||
max_concurrent_scan_files = 128
|
||||
|
||||
## Whether to allow stale WAL entries read during replay.
|
||||
allow_stale_entries = false
|
||||
|
||||
|
||||
@@ -565,6 +565,9 @@ sst_write_buffer_size = "8MB"
|
||||
## Capacity of the channel to send data from parallel scan tasks to the main task.
|
||||
parallel_scan_channel_size = 32
|
||||
|
||||
## Maximum number of SST files to scan concurrently.
|
||||
max_concurrent_scan_files = 128
|
||||
|
||||
## Whether to allow stale WAL entries read during replay.
|
||||
allow_stale_entries = false
|
||||
|
||||
|
||||
@@ -211,12 +211,18 @@ impl Database {
|
||||
retries += 1;
|
||||
warn!("Retrying {} times with error = {:?}", retries, err);
|
||||
continue;
|
||||
} else {
|
||||
error!(
|
||||
err; "Failed to send request to grpc handle, retries = {}, not retryable error, aborting",
|
||||
retries
|
||||
);
|
||||
return Err(err.into());
|
||||
}
|
||||
}
|
||||
(Err(err), false) => {
|
||||
error!(
|
||||
"Failed to send request to grpc handle after {} retries, error = {:?}",
|
||||
retries, err
|
||||
err; "Failed to send request to grpc handle after {} retries",
|
||||
retries,
|
||||
);
|
||||
return Err(err.into());
|
||||
}
|
||||
|
||||
@@ -163,19 +163,70 @@ impl RegionRequester {
|
||||
let _span = tracing_context.attach(common_telemetry::tracing::info_span!(
|
||||
"poll_flight_data_stream"
|
||||
));
|
||||
while let Some(flight_message) = flight_message_stream.next().await {
|
||||
let flight_message = flight_message
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
|
||||
let mut buffered_message: Option<FlightMessage> = None;
|
||||
let mut stream_ended = false;
|
||||
|
||||
while !stream_ended {
|
||||
// get the next message from the buffered message or read from the flight message stream
|
||||
let flight_message_item = if let Some(msg) = buffered_message.take() {
|
||||
Some(Ok(msg))
|
||||
} else {
|
||||
flight_message_stream.next().await
|
||||
};
|
||||
|
||||
let flight_message = match flight_message_item {
|
||||
Some(Ok(message)) => message,
|
||||
Some(Err(e)) => {
|
||||
yield Err(BoxedError::new(e)).context(ExternalSnafu);
|
||||
break;
|
||||
}
|
||||
None => break,
|
||||
};
|
||||
|
||||
match flight_message {
|
||||
FlightMessage::RecordBatch(record_batch) => {
|
||||
yield RecordBatch::try_from_df_record_batch(
|
||||
let result_to_yield = RecordBatch::try_from_df_record_batch(
|
||||
schema_cloned.clone(),
|
||||
record_batch,
|
||||
)
|
||||
);
|
||||
|
||||
// get the next message from the stream. normally it should be a metrics message.
|
||||
if let Some(next_flight_message_result) = flight_message_stream.next().await
|
||||
{
|
||||
match next_flight_message_result {
|
||||
Ok(FlightMessage::Metrics(s)) => {
|
||||
let m = serde_json::from_str(&s).ok().map(Arc::new);
|
||||
metrics_ref.swap(m);
|
||||
}
|
||||
Ok(FlightMessage::RecordBatch(rb)) => {
|
||||
// for some reason it's not a metrics message, so we need to buffer this record batch
|
||||
// and yield it in the next iteration.
|
||||
buffered_message = Some(FlightMessage::RecordBatch(rb));
|
||||
}
|
||||
Ok(_) => {
|
||||
yield IllegalFlightMessagesSnafu {
|
||||
reason: "A RecordBatch message can only be succeeded by a Metrics message or another RecordBatch message"
|
||||
}
|
||||
.fail()
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu);
|
||||
break;
|
||||
}
|
||||
Err(e) => {
|
||||
yield Err(BoxedError::new(e)).context(ExternalSnafu);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// the stream has ended
|
||||
stream_ended = true;
|
||||
}
|
||||
|
||||
yield result_to_yield;
|
||||
}
|
||||
FlightMessage::Metrics(s) => {
|
||||
// just a branch in case of some metrics message comes after other things.
|
||||
let m = serde_json::from_str(&s).ok().map(Arc::new);
|
||||
metrics_ref.swap(m);
|
||||
break;
|
||||
|
||||
@@ -20,11 +20,11 @@ use cmd::error::{InitTlsProviderSnafu, Result};
|
||||
use cmd::options::GlobalOptions;
|
||||
use cmd::{cli, datanode, flownode, frontend, metasrv, standalone, App};
|
||||
use common_base::Plugins;
|
||||
use common_version::version;
|
||||
use common_version::{verbose_version, version};
|
||||
use servers::install_ring_crypto_provider;
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(name = "greptime", author, version, long_version = version(), about)]
|
||||
#[command(name = "greptime", author, version, long_version = verbose_version(), about)]
|
||||
#[command(propagate_version = true)]
|
||||
pub(crate) struct Command {
|
||||
#[clap(subcommand)]
|
||||
@@ -143,10 +143,8 @@ async fn start(cli: Command) -> Result<()> {
|
||||
}
|
||||
|
||||
fn setup_human_panic() {
|
||||
human_panic::setup_panic!(
|
||||
human_panic::Metadata::new("GreptimeDB", env!("CARGO_PKG_VERSION"))
|
||||
.homepage("https://github.com/GreptimeTeam/greptimedb/discussions")
|
||||
);
|
||||
human_panic::setup_panic!(human_panic::Metadata::new("GreptimeDB", version())
|
||||
.homepage("https://github.com/GreptimeTeam/greptimedb/discussions"));
|
||||
|
||||
common_telemetry::set_panic_hook();
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@ use catalog::kvbackend::MetaKvBackend;
|
||||
use common_base::Plugins;
|
||||
use common_meta::cache::LayeredCacheRegistryBuilder;
|
||||
use common_telemetry::info;
|
||||
use common_version::{short_version, version};
|
||||
use common_version::{short_version, verbose_version};
|
||||
use datanode::datanode::DatanodeBuilder;
|
||||
use datanode::service::DatanodeServiceBuilder;
|
||||
use meta_client::MetaClientType;
|
||||
@@ -67,7 +67,7 @@ impl InstanceBuilder {
|
||||
None,
|
||||
);
|
||||
|
||||
log_versions(version(), short_version(), APP_NAME);
|
||||
log_versions(verbose_version(), short_version(), APP_NAME);
|
||||
create_resource_limit_metrics(APP_NAME);
|
||||
|
||||
plugins::setup_datanode_plugins(plugins, &opts.plugins, dn_opts)
|
||||
|
||||
@@ -32,7 +32,7 @@ use common_meta::key::flow::FlowMetadataManager;
|
||||
use common_meta::key::TableMetadataManager;
|
||||
use common_telemetry::info;
|
||||
use common_telemetry::logging::{TracingOptions, DEFAULT_LOGGING_DIR};
|
||||
use common_version::{short_version, version};
|
||||
use common_version::{short_version, verbose_version};
|
||||
use flow::{
|
||||
get_flow_auth_options, FlownodeBuilder, FlownodeInstance, FlownodeServiceBuilder,
|
||||
FrontendClient, FrontendInvoker,
|
||||
@@ -279,7 +279,7 @@ impl StartCommand {
|
||||
None,
|
||||
);
|
||||
|
||||
log_versions(version(), short_version(), APP_NAME);
|
||||
log_versions(verbose_version(), short_version(), APP_NAME);
|
||||
create_resource_limit_metrics(APP_NAME);
|
||||
|
||||
info!("Flownode start command: {:#?}", self);
|
||||
|
||||
@@ -33,7 +33,7 @@ use common_meta::heartbeat::handler::HandlerGroupExecutor;
|
||||
use common_telemetry::info;
|
||||
use common_telemetry::logging::{TracingOptions, DEFAULT_LOGGING_DIR};
|
||||
use common_time::timezone::set_default_timezone;
|
||||
use common_version::{short_version, version};
|
||||
use common_version::{short_version, verbose_version};
|
||||
use frontend::frontend::Frontend;
|
||||
use frontend::heartbeat::HeartbeatTask;
|
||||
use frontend::instance::builder::FrontendBuilder;
|
||||
@@ -282,7 +282,7 @@ impl StartCommand {
|
||||
opts.component.slow_query.as_ref(),
|
||||
);
|
||||
|
||||
log_versions(version(), short_version(), APP_NAME);
|
||||
log_versions(verbose_version(), short_version(), APP_NAME);
|
||||
create_resource_limit_metrics(APP_NAME);
|
||||
|
||||
info!("Frontend start command: {:#?}", self);
|
||||
|
||||
@@ -112,7 +112,7 @@ pub trait App: Send {
|
||||
pub fn log_versions(version: &str, short_version: &str, app: &str) {
|
||||
// Report app version as gauge.
|
||||
APP_VERSION
|
||||
.with_label_values(&[env!("CARGO_PKG_VERSION"), short_version, app])
|
||||
.with_label_values(&[common_version::version(), short_version, app])
|
||||
.inc();
|
||||
|
||||
// Log version and argument flags.
|
||||
|
||||
@@ -22,7 +22,7 @@ use common_base::Plugins;
|
||||
use common_config::Configurable;
|
||||
use common_telemetry::info;
|
||||
use common_telemetry::logging::{TracingOptions, DEFAULT_LOGGING_DIR};
|
||||
use common_version::{short_version, version};
|
||||
use common_version::{short_version, verbose_version};
|
||||
use meta_srv::bootstrap::MetasrvInstance;
|
||||
use meta_srv::metasrv::BackendImpl;
|
||||
use snafu::ResultExt;
|
||||
@@ -320,7 +320,7 @@ impl StartCommand {
|
||||
None,
|
||||
);
|
||||
|
||||
log_versions(version(), short_version(), APP_NAME);
|
||||
log_versions(verbose_version(), short_version(), APP_NAME);
|
||||
create_resource_limit_metrics(APP_NAME);
|
||||
|
||||
info!("Metasrv start command: {:#?}", self);
|
||||
|
||||
@@ -51,7 +51,7 @@ use common_telemetry::logging::{
|
||||
LoggingOptions, SlowQueryOptions, TracingOptions, DEFAULT_LOGGING_DIR,
|
||||
};
|
||||
use common_time::timezone::set_default_timezone;
|
||||
use common_version::{short_version, version};
|
||||
use common_version::{short_version, verbose_version};
|
||||
use common_wal::config::DatanodeWalConfig;
|
||||
use datanode::config::{DatanodeOptions, ProcedureConfig, RegionEngineConfig, StorageConfig};
|
||||
use datanode::datanode::{Datanode, DatanodeBuilder};
|
||||
@@ -466,7 +466,7 @@ impl StartCommand {
|
||||
opts.component.slow_query.as_ref(),
|
||||
);
|
||||
|
||||
log_versions(version(), short_version(), APP_NAME);
|
||||
log_versions(verbose_version(), short_version(), APP_NAME);
|
||||
create_resource_limit_metrics(APP_NAME);
|
||||
|
||||
info!("Standalone start command: {:#?}", self);
|
||||
|
||||
@@ -12,8 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
use std::{env, fmt};
|
||||
|
||||
use common_query::error::Result;
|
||||
use common_query::prelude::{Signature, Volatility};
|
||||
@@ -47,7 +47,7 @@ impl Function for PGVersionFunction {
|
||||
fn eval(&self, _func_ctx: &FunctionContext, _columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
let result = StringVector::from(vec![format!(
|
||||
"PostgreSQL 16.3 GreptimeDB {}",
|
||||
env!("CARGO_PKG_VERSION")
|
||||
common_version::version()
|
||||
)]);
|
||||
Ok(Arc::new(result))
|
||||
}
|
||||
|
||||
@@ -12,8 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
use std::{env, fmt};
|
||||
|
||||
use common_query::error::Result;
|
||||
use common_query::prelude::{Signature, Volatility};
|
||||
@@ -52,13 +52,13 @@ impl Function for VersionFunction {
|
||||
"{}-greptimedb-{}",
|
||||
std::env::var("GREPTIMEDB_MYSQL_SERVER_VERSION")
|
||||
.unwrap_or_else(|_| "8.4.2".to_string()),
|
||||
env!("CARGO_PKG_VERSION")
|
||||
common_version::version()
|
||||
)
|
||||
}
|
||||
Channel::Postgres => {
|
||||
format!("16.3-greptimedb-{}", env!("CARGO_PKG_VERSION"))
|
||||
format!("16.3-greptimedb-{}", common_version::version())
|
||||
}
|
||||
_ => env!("CARGO_PKG_VERSION").to_string(),
|
||||
_ => common_version::version().to_string(),
|
||||
};
|
||||
let result = StringVector::from(vec![version]);
|
||||
Ok(Arc::new(result))
|
||||
|
||||
19
src/common/meta/src/cache/flow/table_flownode.rs
vendored
19
src/common/meta/src/cache/flow/table_flownode.rs
vendored
@@ -15,6 +15,7 @@
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_telemetry::info;
|
||||
use futures::future::BoxFuture;
|
||||
use moka::future::Cache;
|
||||
use moka::ops::compute::Op;
|
||||
@@ -89,6 +90,12 @@ fn init_factory(table_flow_manager: TableFlowManagerRef) -> Initializer<TableId,
|
||||
// we have a corresponding cache invalidation mechanism to invalidate `(Key, EmptyHashSet)`.
|
||||
.map(Arc::new)
|
||||
.map(Some)
|
||||
.inspect(|set| {
|
||||
info!(
|
||||
"Initialized table_flownode cache for table_id: {}, set: {:?}",
|
||||
table_id, set
|
||||
);
|
||||
})
|
||||
})
|
||||
})
|
||||
}
|
||||
@@ -167,6 +174,13 @@ fn invalidator<'a>(
|
||||
match ident {
|
||||
CacheIdent::CreateFlow(create_flow) => handle_create_flow(cache, create_flow).await,
|
||||
CacheIdent::DropFlow(drop_flow) => handle_drop_flow(cache, drop_flow).await,
|
||||
CacheIdent::FlowNodeAddressChange(node_id) => {
|
||||
info!(
|
||||
"Invalidate flow node cache for node_id in table_flownode: {}",
|
||||
node_id
|
||||
);
|
||||
cache.invalidate_all();
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
Ok(())
|
||||
@@ -174,7 +188,10 @@ fn invalidator<'a>(
|
||||
}
|
||||
|
||||
fn filter(ident: &CacheIdent) -> bool {
|
||||
matches!(ident, CacheIdent::CreateFlow(_) | CacheIdent::DropFlow(_))
|
||||
matches!(
|
||||
ident,
|
||||
CacheIdent::CreateFlow(_) | CacheIdent::DropFlow(_) | CacheIdent::FlowNodeAddressChange(_)
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -22,6 +22,7 @@ use crate::key::flow::flow_name::FlowNameKey;
|
||||
use crate::key::flow::flow_route::FlowRouteKey;
|
||||
use crate::key::flow::flownode_flow::FlownodeFlowKey;
|
||||
use crate::key::flow::table_flow::TableFlowKey;
|
||||
use crate::key::node_address::NodeAddressKey;
|
||||
use crate::key::schema_name::SchemaNameKey;
|
||||
use crate::key::table_info::TableInfoKey;
|
||||
use crate::key::table_name::TableNameKey;
|
||||
@@ -53,6 +54,10 @@ pub struct Context {
|
||||
#[async_trait::async_trait]
|
||||
pub trait CacheInvalidator: Send + Sync {
|
||||
async fn invalidate(&self, ctx: &Context, caches: &[CacheIdent]) -> Result<()>;
|
||||
|
||||
fn name(&self) -> &'static str {
|
||||
std::any::type_name::<Self>()
|
||||
}
|
||||
}
|
||||
|
||||
pub type CacheInvalidatorRef = Arc<dyn CacheInvalidator>;
|
||||
@@ -137,6 +142,13 @@ where
|
||||
let key = FlowInfoKey::new(*flow_id);
|
||||
self.invalidate_key(&key.to_bytes()).await;
|
||||
}
|
||||
CacheIdent::FlowNodeAddressChange(node_id) => {
|
||||
// other caches doesn't need to be invalidated
|
||||
// since this is only for flownode address change not id change
|
||||
common_telemetry::info!("Invalidate flow node cache for node_id: {}", node_id);
|
||||
let key = NodeAddressKey::with_flownode(*node_id);
|
||||
self.invalidate_key(&key.to_bytes()).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
|
||||
@@ -174,6 +174,8 @@ pub struct UpgradeRegion {
|
||||
/// The identifier of cache.
|
||||
pub enum CacheIdent {
|
||||
FlowId(FlowId),
|
||||
/// Indicate change of address of flownode.
|
||||
FlowNodeAddressChange(u64),
|
||||
FlowName(FlowName),
|
||||
TableId(TableId),
|
||||
TableName(TableName),
|
||||
|
||||
@@ -222,6 +222,7 @@ pub struct RecordBatchStreamAdapter {
|
||||
enum Metrics {
|
||||
Unavailable,
|
||||
Unresolved(Arc<dyn ExecutionPlan>),
|
||||
PartialResolved(Arc<dyn ExecutionPlan>, RecordBatchMetrics),
|
||||
Resolved(RecordBatchMetrics),
|
||||
}
|
||||
|
||||
@@ -275,7 +276,9 @@ impl RecordBatchStream for RecordBatchStreamAdapter {
|
||||
|
||||
fn metrics(&self) -> Option<RecordBatchMetrics> {
|
||||
match &self.metrics_2 {
|
||||
Metrics::Resolved(metrics) => Some(metrics.clone()),
|
||||
Metrics::Resolved(metrics) | Metrics::PartialResolved(_, metrics) => {
|
||||
Some(metrics.clone())
|
||||
}
|
||||
Metrics::Unavailable | Metrics::Unresolved(_) => None,
|
||||
}
|
||||
}
|
||||
@@ -299,13 +302,25 @@ impl Stream for RecordBatchStreamAdapter {
|
||||
Poll::Pending => Poll::Pending,
|
||||
Poll::Ready(Some(df_record_batch)) => {
|
||||
let df_record_batch = df_record_batch?;
|
||||
if let Metrics::Unresolved(df_plan) | Metrics::PartialResolved(df_plan, _) =
|
||||
&self.metrics_2
|
||||
{
|
||||
let mut metric_collector = MetricCollector::new(self.explain_verbose);
|
||||
accept(df_plan.as_ref(), &mut metric_collector).unwrap();
|
||||
self.metrics_2 = Metrics::PartialResolved(
|
||||
df_plan.clone(),
|
||||
metric_collector.record_batch_metrics,
|
||||
);
|
||||
}
|
||||
Poll::Ready(Some(RecordBatch::try_from_df_record_batch(
|
||||
self.schema(),
|
||||
df_record_batch,
|
||||
)))
|
||||
}
|
||||
Poll::Ready(None) => {
|
||||
if let Metrics::Unresolved(df_plan) = &self.metrics_2 {
|
||||
if let Metrics::Unresolved(df_plan) | Metrics::PartialResolved(df_plan, _) =
|
||||
&self.metrics_2
|
||||
{
|
||||
let mut metric_collector = MetricCollector::new(self.explain_verbose);
|
||||
accept(df_plan.as_ref(), &mut metric_collector).unwrap();
|
||||
self.metrics_2 = Metrics::Resolved(metric_collector.record_batch_metrics);
|
||||
|
||||
@@ -19,7 +19,8 @@ use datafusion::execution::registry::SerializerRegistry;
|
||||
use datafusion_common::DataFusionError;
|
||||
use datafusion_expr::UserDefinedLogicalNode;
|
||||
use promql::extension_plan::{
|
||||
EmptyMetric, InstantManipulate, RangeManipulate, ScalarCalculate, SeriesDivide, SeriesNormalize,
|
||||
Absent, EmptyMetric, InstantManipulate, RangeManipulate, ScalarCalculate, SeriesDivide,
|
||||
SeriesNormalize,
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -65,6 +66,13 @@ impl SerializerRegistry for ExtensionSerializer {
|
||||
.expect("Failed to downcast to SeriesDivide");
|
||||
Ok(series_divide.serialize())
|
||||
}
|
||||
name if name == Absent::name() => {
|
||||
let absent = node
|
||||
.as_any()
|
||||
.downcast_ref::<Absent>()
|
||||
.expect("Failed to downcast to Absent");
|
||||
Ok(absent.serialize())
|
||||
}
|
||||
name if name == EmptyMetric::name() => Err(DataFusionError::Substrait(
|
||||
"EmptyMetric should not be serialized".to_string(),
|
||||
)),
|
||||
@@ -103,6 +111,10 @@ impl SerializerRegistry for ExtensionSerializer {
|
||||
let scalar_calculate = ScalarCalculate::deserialize(bytes)?;
|
||||
Ok(Arc::new(scalar_calculate))
|
||||
}
|
||||
name if name == Absent::name() => {
|
||||
let absent = Absent::deserialize(bytes)?;
|
||||
Ok(Arc::new(absent))
|
||||
}
|
||||
name if name == EmptyMetric::name() => Err(DataFusionError::Substrait(
|
||||
"EmptyMetric should not be deserialized".to_string(),
|
||||
)),
|
||||
|
||||
@@ -14,6 +14,7 @@ workspace = true
|
||||
[dependencies]
|
||||
backtrace = "0.3"
|
||||
common-error.workspace = true
|
||||
common-version.workspace = true
|
||||
console-subscriber = { version = "0.1", optional = true }
|
||||
greptime-proto.workspace = true
|
||||
humantime-serde.workspace = true
|
||||
|
||||
@@ -384,7 +384,7 @@ pub fn init_global_logging(
|
||||
resource::SERVICE_INSTANCE_ID,
|
||||
node_id.unwrap_or("none".to_string()),
|
||||
),
|
||||
KeyValue::new(resource::SERVICE_VERSION, env!("CARGO_PKG_VERSION")),
|
||||
KeyValue::new(resource::SERVICE_VERSION, common_version::version()),
|
||||
KeyValue::new(resource::PROCESS_PID, std::process::id().to_string()),
|
||||
]));
|
||||
|
||||
|
||||
@@ -17,4 +17,5 @@ shadow-rs.workspace = true
|
||||
|
||||
[build-dependencies]
|
||||
build-data = "0.2"
|
||||
cargo-manifest = "0.19"
|
||||
shadow-rs.workspace = true
|
||||
|
||||
@@ -14,8 +14,10 @@
|
||||
|
||||
use std::collections::BTreeSet;
|
||||
use std::env;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use build_data::{format_timestamp, get_source_time};
|
||||
use cargo_manifest::Manifest;
|
||||
use shadow_rs::{BuildPattern, ShadowBuilder, CARGO_METADATA, CARGO_TREE};
|
||||
|
||||
fn main() -> shadow_rs::SdResult<()> {
|
||||
@@ -33,6 +35,24 @@ fn main() -> shadow_rs::SdResult<()> {
|
||||
// solve the problem where the "CARGO_MANIFEST_DIR" is not what we want when this repo is
|
||||
// made as a submodule in another repo.
|
||||
let src_path = env::var("CARGO_WORKSPACE_DIR").or_else(|_| env::var("CARGO_MANIFEST_DIR"))?;
|
||||
|
||||
let manifest = Manifest::from_path(PathBuf::from(&src_path).join("Cargo.toml"))
|
||||
.expect("Failed to parse Cargo.toml");
|
||||
if let Some(product_version) = manifest.workspace.as_ref().and_then(|w| {
|
||||
w.metadata.as_ref().and_then(|m| {
|
||||
m.get("greptime")
|
||||
.and_then(|g| g.get("product_version").and_then(|v| v.as_str()))
|
||||
})
|
||||
}) {
|
||||
println!(
|
||||
"cargo:rustc-env=GREPTIME_PRODUCT_VERSION={}",
|
||||
product_version
|
||||
);
|
||||
} else {
|
||||
let version = env::var("CARGO_PKG_VERSION").unwrap();
|
||||
println!("cargo:rustc-env=GREPTIME_PRODUCT_VERSION={}", version,);
|
||||
}
|
||||
|
||||
let out_path = env::var("OUT_DIR")?;
|
||||
|
||||
let _ = ShadowBuilder::builder()
|
||||
|
||||
@@ -105,13 +105,17 @@ pub const fn build_info() -> BuildInfo {
|
||||
build_time: env!("BUILD_TIMESTAMP"),
|
||||
rustc: build::RUST_VERSION,
|
||||
target: build::BUILD_TARGET,
|
||||
version: build::PKG_VERSION,
|
||||
version: env!("GREPTIME_PRODUCT_VERSION"),
|
||||
}
|
||||
}
|
||||
|
||||
const BUILD_INFO: BuildInfo = build_info();
|
||||
|
||||
pub const fn version() -> &'static str {
|
||||
BUILD_INFO.version
|
||||
}
|
||||
|
||||
pub const fn verbose_version() -> &'static str {
|
||||
const_format::formatcp!(
|
||||
"\nbranch: {}\ncommit: {}\nclean: {}\nversion: {}",
|
||||
BUILD_INFO.branch,
|
||||
|
||||
@@ -27,14 +27,14 @@ lazy_static! {
|
||||
pub static ref HANDLE_REGION_REQUEST_ELAPSED: HistogramVec = register_histogram_vec!(
|
||||
"greptime_datanode_handle_region_request_elapsed",
|
||||
"datanode handle region request elapsed",
|
||||
&[REGION_ID, REGION_REQUEST_TYPE]
|
||||
&[REGION_REQUEST_TYPE]
|
||||
)
|
||||
.unwrap();
|
||||
/// The number of rows in region request received by region server, labeled with request type.
|
||||
pub static ref REGION_CHANGED_ROW_COUNT: IntCounterVec = register_int_counter_vec!(
|
||||
"greptime_datanode_region_changed_row_count",
|
||||
"datanode region changed row count",
|
||||
&[REGION_ID, REGION_REQUEST_TYPE]
|
||||
&[REGION_REQUEST_TYPE]
|
||||
)
|
||||
.unwrap();
|
||||
/// The elapsed time since the last received heartbeat.
|
||||
|
||||
@@ -51,7 +51,7 @@ use servers::error::{self as servers_error, ExecuteGrpcRequestSnafu, Result as S
|
||||
use servers::grpc::flight::{FlightCraft, FlightRecordBatchStream, TonicStream};
|
||||
use servers::grpc::region_server::RegionServerHandler;
|
||||
use servers::grpc::FlightCompression;
|
||||
use session::context::{QueryContextBuilder, QueryContextRef};
|
||||
use session::context::{QueryContext, QueryContextBuilder, QueryContextRef};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use store_api::metric_engine_consts::{
|
||||
FILE_ENGINE_NAME, LOGICAL_TABLE_METADATA_KEY, METRIC_ENGINE_NAME,
|
||||
@@ -194,6 +194,7 @@ impl RegionServer {
|
||||
pub async fn handle_remote_read(
|
||||
&self,
|
||||
request: api::v1::region::QueryRequest,
|
||||
query_ctx: QueryContextRef,
|
||||
) -> Result<SendableRecordBatchStream> {
|
||||
let _permit = if let Some(p) = &self.inner.parallelism {
|
||||
Some(p.acquire().await?)
|
||||
@@ -201,12 +202,6 @@ impl RegionServer {
|
||||
None
|
||||
};
|
||||
|
||||
let query_ctx: QueryContextRef = request
|
||||
.header
|
||||
.as_ref()
|
||||
.map(|h| Arc::new(h.into()))
|
||||
.unwrap_or_else(|| Arc::new(QueryContextBuilder::default().build()));
|
||||
|
||||
let region_id = RegionId::from_u64(request.region_id);
|
||||
let provider = self.table_provider(region_id, Some(&query_ctx)).await?;
|
||||
let catalog_list = Arc::new(DummyCatalogList::with_table_provider(provider));
|
||||
@@ -214,7 +209,7 @@ impl RegionServer {
|
||||
let decoder = self
|
||||
.inner
|
||||
.query_engine
|
||||
.engine_context(query_ctx)
|
||||
.engine_context(query_ctx.clone())
|
||||
.new_plan_decoder()
|
||||
.context(NewPlanDecoderSnafu)?;
|
||||
|
||||
@@ -224,11 +219,14 @@ impl RegionServer {
|
||||
.context(DecodeLogicalPlanSnafu)?;
|
||||
|
||||
self.inner
|
||||
.handle_read(QueryRequest {
|
||||
header: request.header,
|
||||
region_id,
|
||||
plan,
|
||||
})
|
||||
.handle_read(
|
||||
QueryRequest {
|
||||
header: request.header,
|
||||
region_id,
|
||||
plan,
|
||||
},
|
||||
query_ctx,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
@@ -243,6 +241,7 @@ impl RegionServer {
|
||||
let ctx: Option<session::context::QueryContext> = request.header.as_ref().map(|h| h.into());
|
||||
|
||||
let provider = self.table_provider(request.region_id, ctx.as_ref()).await?;
|
||||
let query_ctx = Arc::new(ctx.unwrap_or_else(|| QueryContextBuilder::default().build()));
|
||||
|
||||
struct RegionDataSourceInjector {
|
||||
source: Arc<dyn TableSource>,
|
||||
@@ -271,7 +270,7 @@ impl RegionServer {
|
||||
.data;
|
||||
|
||||
self.inner
|
||||
.handle_read(QueryRequest { plan, ..request })
|
||||
.handle_read(QueryRequest { plan, ..request }, query_ctx)
|
||||
.await
|
||||
}
|
||||
|
||||
@@ -536,9 +535,14 @@ impl FlightCraft for RegionServer {
|
||||
.as_ref()
|
||||
.map(|h| TracingContext::from_w3c(&h.tracing_context))
|
||||
.unwrap_or_default();
|
||||
let query_ctx = request
|
||||
.header
|
||||
.as_ref()
|
||||
.map(|h| Arc::new(QueryContext::from(h)))
|
||||
.unwrap_or(QueryContext::arc());
|
||||
|
||||
let result = self
|
||||
.handle_remote_read(request)
|
||||
.handle_remote_read(request, query_ctx.clone())
|
||||
.trace(tracing_context.attach(info_span!("RegionServer::handle_read")))
|
||||
.await?;
|
||||
|
||||
@@ -546,6 +550,7 @@ impl FlightCraft for RegionServer {
|
||||
result,
|
||||
tracing_context,
|
||||
self.flight_compression,
|
||||
query_ctx,
|
||||
));
|
||||
Ok(Response::new(stream))
|
||||
}
|
||||
@@ -915,9 +920,8 @@ impl RegionServerInner {
|
||||
request: RegionRequest,
|
||||
) -> Result<RegionResponse> {
|
||||
let request_type = request.request_type();
|
||||
let region_id_str = region_id.to_string();
|
||||
let _timer = crate::metrics::HANDLE_REGION_REQUEST_ELAPSED
|
||||
.with_label_values(&[®ion_id_str, request_type])
|
||||
.with_label_values(&[request_type])
|
||||
.start_timer();
|
||||
|
||||
let region_change = match &request {
|
||||
@@ -957,7 +961,7 @@ impl RegionServerInner {
|
||||
// Update metrics
|
||||
if matches!(region_change, RegionChange::Ingest) {
|
||||
crate::metrics::REGION_CHANGED_ROW_COUNT
|
||||
.with_label_values(&[®ion_id_str, request_type])
|
||||
.with_label_values(&[request_type])
|
||||
.inc_by(result.affected_rows as u64);
|
||||
}
|
||||
// Sets corresponding region status to ready.
|
||||
@@ -1124,16 +1128,13 @@ impl RegionServerInner {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn handle_read(&self, request: QueryRequest) -> Result<SendableRecordBatchStream> {
|
||||
pub async fn handle_read(
|
||||
&self,
|
||||
request: QueryRequest,
|
||||
query_ctx: QueryContextRef,
|
||||
) -> Result<SendableRecordBatchStream> {
|
||||
// TODO(ruihang): add metrics and set trace id
|
||||
|
||||
// Build query context from gRPC header
|
||||
let query_ctx: QueryContextRef = request
|
||||
.header
|
||||
.as_ref()
|
||||
.map(|h| Arc::new(h.into()))
|
||||
.unwrap_or_else(|| QueryContextBuilder::default().build().into());
|
||||
|
||||
let result = self
|
||||
.query_engine
|
||||
.execute(request.plan, query_ctx)
|
||||
|
||||
@@ -527,7 +527,7 @@ pub struct FulltextOptions {
|
||||
#[serde(default = "fulltext_options_default_granularity")]
|
||||
pub granularity: u32,
|
||||
/// The false positive rate of the fulltext index (for bloom backend only)
|
||||
#[serde(default = "fulltext_options_default_false_positive_rate_in_10000")]
|
||||
#[serde(default = "index_options_default_false_positive_rate_in_10000")]
|
||||
pub false_positive_rate_in_10000: u32,
|
||||
}
|
||||
|
||||
@@ -535,7 +535,7 @@ fn fulltext_options_default_granularity() -> u32 {
|
||||
DEFAULT_GRANULARITY
|
||||
}
|
||||
|
||||
fn fulltext_options_default_false_positive_rate_in_10000() -> u32 {
|
||||
fn index_options_default_false_positive_rate_in_10000() -> u32 {
|
||||
(DEFAULT_FALSE_POSITIVE_RATE * 10000.0) as u32
|
||||
}
|
||||
|
||||
@@ -773,6 +773,7 @@ pub struct SkippingIndexOptions {
|
||||
/// The granularity of the skip index.
|
||||
pub granularity: u32,
|
||||
/// The false positive rate of the skip index (in ten-thousandths, e.g., 100 = 1%).
|
||||
#[serde(default = "index_options_default_false_positive_rate_in_10000")]
|
||||
pub false_positive_rate_in_10000: u32,
|
||||
/// The type of the skip index.
|
||||
#[serde(default)]
|
||||
@@ -1179,4 +1180,59 @@ mod tests {
|
||||
assert!(column_schema.default_constraint.is_none());
|
||||
assert!(column_schema.metadata.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_skipping_index_options_deserialization() {
|
||||
let original_options = "{\"granularity\":1024,\"false-positive-rate-in-10000\":10,\"index-type\":\"BloomFilter\"}";
|
||||
let options = serde_json::from_str::<SkippingIndexOptions>(original_options).unwrap();
|
||||
assert_eq!(1024, options.granularity);
|
||||
assert_eq!(SkippingIndexType::BloomFilter, options.index_type);
|
||||
assert_eq!(0.001, options.false_positive_rate());
|
||||
|
||||
let options_str = serde_json::to_string(&options).unwrap();
|
||||
assert_eq!(options_str, original_options);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_skipping_index_options_deserialization_v0_14_to_v0_15() {
|
||||
let options = "{\"granularity\":10240,\"index-type\":\"BloomFilter\"}";
|
||||
let options = serde_json::from_str::<SkippingIndexOptions>(options).unwrap();
|
||||
assert_eq!(10240, options.granularity);
|
||||
assert_eq!(SkippingIndexType::BloomFilter, options.index_type);
|
||||
assert_eq!(DEFAULT_FALSE_POSITIVE_RATE, options.false_positive_rate());
|
||||
|
||||
let options_str = serde_json::to_string(&options).unwrap();
|
||||
assert_eq!(options_str, "{\"granularity\":10240,\"false-positive-rate-in-10000\":100,\"index-type\":\"BloomFilter\"}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fulltext_options_deserialization() {
|
||||
let original_options = "{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\",\"granularity\":1024,\"false-positive-rate-in-10000\":10}";
|
||||
let options = serde_json::from_str::<FulltextOptions>(original_options).unwrap();
|
||||
assert!(!options.case_sensitive);
|
||||
assert!(options.enable);
|
||||
assert_eq!(FulltextBackend::Bloom, options.backend);
|
||||
assert_eq!(FulltextAnalyzer::default(), options.analyzer);
|
||||
assert_eq!(1024, options.granularity);
|
||||
assert_eq!(0.001, options.false_positive_rate());
|
||||
|
||||
let options_str = serde_json::to_string(&options).unwrap();
|
||||
assert_eq!(options_str, original_options);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fulltext_options_deserialization_v0_14_to_v0_15() {
|
||||
// 0.14 to 0.15
|
||||
let options = "{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\"}";
|
||||
let options = serde_json::from_str::<FulltextOptions>(options).unwrap();
|
||||
assert!(!options.case_sensitive);
|
||||
assert!(options.enable);
|
||||
assert_eq!(FulltextBackend::Bloom, options.backend);
|
||||
assert_eq!(FulltextAnalyzer::default(), options.analyzer);
|
||||
assert_eq!(DEFAULT_GRANULARITY, options.granularity);
|
||||
assert_eq!(DEFAULT_FALSE_POSITIVE_RATE, options.false_positive_rate());
|
||||
|
||||
let options_str = serde_json::to_string(&options).unwrap();
|
||||
assert_eq!(options_str, "{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\",\"granularity\":10240,\"false-positive-rate-in-10000\":100}");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,6 +12,11 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use arrow_array::{
|
||||
ArrayRef, PrimitiveArray, TimestampMicrosecondArray, TimestampMillisecondArray,
|
||||
TimestampNanosecondArray, TimestampSecondArray,
|
||||
};
|
||||
use arrow_schema::DataType;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::Timestamp;
|
||||
use paste::paste;
|
||||
@@ -138,6 +143,41 @@ define_timestamp_with_unit!(Millisecond);
|
||||
define_timestamp_with_unit!(Microsecond);
|
||||
define_timestamp_with_unit!(Nanosecond);
|
||||
|
||||
pub fn timestamp_array_to_primitive(
|
||||
ts_array: &ArrayRef,
|
||||
) -> Option<(
|
||||
PrimitiveArray<arrow_array::types::Int64Type>,
|
||||
arrow::datatypes::TimeUnit,
|
||||
)> {
|
||||
let DataType::Timestamp(unit, _) = ts_array.data_type() else {
|
||||
return None;
|
||||
};
|
||||
|
||||
let ts_primitive = match unit {
|
||||
arrow_schema::TimeUnit::Second => ts_array
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampSecondArray>()
|
||||
.unwrap()
|
||||
.reinterpret_cast::<arrow_array::types::Int64Type>(),
|
||||
arrow_schema::TimeUnit::Millisecond => ts_array
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampMillisecondArray>()
|
||||
.unwrap()
|
||||
.reinterpret_cast::<arrow_array::types::Int64Type>(),
|
||||
arrow_schema::TimeUnit::Microsecond => ts_array
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampMicrosecondArray>()
|
||||
.unwrap()
|
||||
.reinterpret_cast::<arrow_array::types::Int64Type>(),
|
||||
arrow_schema::TimeUnit::Nanosecond => ts_array
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampNanosecondArray>()
|
||||
.unwrap()
|
||||
.reinterpret_cast::<arrow_array::types::Int64Type>(),
|
||||
};
|
||||
Some((ts_primitive, *unit))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_time::timezone::set_default_timezone;
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
//! Batching mode engine
|
||||
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::flow::{DirtyWindowRequests, FlowResponse};
|
||||
@@ -142,7 +142,7 @@ impl BatchingEngine {
|
||||
|
||||
let handle: JoinHandle<Result<(), Error>> = tokio::spawn(async move {
|
||||
let src_table_names = &task.config.source_table_names;
|
||||
let mut all_dirty_windows = vec![];
|
||||
let mut all_dirty_windows = HashSet::new();
|
||||
for src_table_name in src_table_names {
|
||||
if let Some((timestamps, unit)) = group_by_table_name.get(src_table_name) {
|
||||
let Some(expr) = &task.config.time_window_expr else {
|
||||
@@ -155,7 +155,7 @@ impl BatchingEngine {
|
||||
.context(UnexpectedSnafu {
|
||||
reason: "Failed to eval start value",
|
||||
})?;
|
||||
all_dirty_windows.push(align_start);
|
||||
all_dirty_windows.insert(align_start);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -50,7 +50,8 @@ use snafu::{ensure, OptionExt, ResultExt};
|
||||
|
||||
use crate::adapter::util::from_proto_to_data_type;
|
||||
use crate::error::{
|
||||
ArrowSnafu, DatafusionSnafu, DatatypesSnafu, ExternalSnafu, PlanSnafu, UnexpectedSnafu,
|
||||
ArrowSnafu, DatafusionSnafu, DatatypesSnafu, ExternalSnafu, PlanSnafu, TimeSnafu,
|
||||
UnexpectedSnafu,
|
||||
};
|
||||
use crate::expr::error::DataTypeSnafu;
|
||||
use crate::Error;
|
||||
@@ -74,6 +75,7 @@ pub struct TimeWindowExpr {
|
||||
logical_expr: Expr,
|
||||
df_schema: DFSchema,
|
||||
eval_time_window_size: Option<std::time::Duration>,
|
||||
eval_time_original: Option<Timestamp>,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for TimeWindowExpr {
|
||||
@@ -106,10 +108,11 @@ impl TimeWindowExpr {
|
||||
logical_expr: expr.clone(),
|
||||
df_schema: df_schema.clone(),
|
||||
eval_time_window_size: None,
|
||||
eval_time_original: None,
|
||||
};
|
||||
let test_ts = DEFAULT_TEST_TIMESTAMP;
|
||||
let (l, u) = zelf.eval(test_ts)?;
|
||||
let time_window_size = match (l, u) {
|
||||
let (lower, upper) = zelf.eval(test_ts)?;
|
||||
let time_window_size = match (lower, upper) {
|
||||
(Some(l), Some(u)) => u.sub(&l).map(|r| r.to_std()).transpose().map_err(|_| {
|
||||
UnexpectedSnafu {
|
||||
reason: format!(
|
||||
@@ -121,13 +124,59 @@ impl TimeWindowExpr {
|
||||
_ => None,
|
||||
};
|
||||
zelf.eval_time_window_size = time_window_size;
|
||||
zelf.eval_time_original = lower;
|
||||
|
||||
Ok(zelf)
|
||||
}
|
||||
|
||||
/// TODO(discord9): add `eval_batch` too
|
||||
pub fn eval(
|
||||
&self,
|
||||
current: Timestamp,
|
||||
) -> Result<(Option<Timestamp>, Option<Timestamp>), Error> {
|
||||
fn compute_distance(time_diff_ns: i64, stride_ns: i64) -> i64 {
|
||||
if stride_ns == 0 {
|
||||
return time_diff_ns;
|
||||
}
|
||||
// a - (a % n) impl ceil to nearest n * stride
|
||||
let time_delta = time_diff_ns - (time_diff_ns % stride_ns);
|
||||
|
||||
if time_diff_ns < 0 && time_delta != time_diff_ns {
|
||||
// The origin is later than the source timestamp, round down to the previous bin
|
||||
|
||||
time_delta - stride_ns
|
||||
} else {
|
||||
time_delta
|
||||
}
|
||||
}
|
||||
|
||||
// FAST PATH: if we have eval_time_original and eval_time_window_size,
|
||||
// we can compute the bounds directly
|
||||
if let (Some(original), Some(window_size)) =
|
||||
(self.eval_time_original, self.eval_time_window_size)
|
||||
{
|
||||
// date_bin align current to lower bound
|
||||
let time_diff_ns = current.sub(&original).and_then(|s|s.num_nanoseconds()).with_context(||UnexpectedSnafu {
|
||||
reason: format!(
|
||||
"Failed to compute time difference between current {current:?} and original {original:?}"
|
||||
),
|
||||
})?;
|
||||
|
||||
let window_size_ns = window_size.as_nanos() as i64;
|
||||
|
||||
let distance_ns = compute_distance(time_diff_ns, window_size_ns);
|
||||
|
||||
let lower_bound = if distance_ns >= 0 {
|
||||
original.add_duration(std::time::Duration::from_nanos(distance_ns as u64))
|
||||
} else {
|
||||
original.sub_duration(std::time::Duration::from_nanos((-distance_ns) as u64))
|
||||
}
|
||||
.context(TimeSnafu)?;
|
||||
let upper_bound = lower_bound.add_duration(window_size).context(TimeSnafu)?;
|
||||
|
||||
return Ok((Some(lower_bound), Some(upper_bound)));
|
||||
}
|
||||
|
||||
let lower_bound =
|
||||
calc_expr_time_window_lower_bound(&self.phy_expr, &self.df_schema, current)?;
|
||||
let upper_bound =
|
||||
|
||||
@@ -380,6 +380,13 @@ impl SqlQueryHandler for Instance {
|
||||
.and_then(|stmts| query_interceptor.post_parsing(stmts, query_ctx.clone()))
|
||||
{
|
||||
Ok(stmts) => {
|
||||
if stmts.is_empty() {
|
||||
return vec![InvalidSqlSnafu {
|
||||
err_msg: "empty statements",
|
||||
}
|
||||
.fail()];
|
||||
}
|
||||
|
||||
let mut results = Vec::with_capacity(stmts.len());
|
||||
for stmt in stmts {
|
||||
if let Err(e) = checker
|
||||
|
||||
@@ -21,9 +21,10 @@ use common_catalog::format_full_table_name;
|
||||
use common_recordbatch::util;
|
||||
use common_telemetry::tracing;
|
||||
use datatypes::prelude::Value;
|
||||
use promql_parser::label::{Matcher, Matchers};
|
||||
use promql_parser::label::{MatchOp, Matcher, Matchers};
|
||||
use query::promql;
|
||||
use query::promql::planner::PromPlanner;
|
||||
use servers::prom_store::{DATABASE_LABEL, SCHEMA_LABEL};
|
||||
use servers::prometheus;
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
@@ -114,7 +115,17 @@ impl Instance {
|
||||
end: SystemTime,
|
||||
ctx: &QueryContextRef,
|
||||
) -> Result<Vec<String>> {
|
||||
let table_schema = ctx.current_schema();
|
||||
let table_schema = matchers
|
||||
.iter()
|
||||
.find_map(|m| {
|
||||
if (m.name == SCHEMA_LABEL || m.name == DATABASE_LABEL) && m.op == MatchOp::Equal {
|
||||
Some(m.value.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.unwrap_or_else(|| ctx.current_schema());
|
||||
|
||||
let table = self
|
||||
.catalog_manager
|
||||
.table(ctx.current_catalog(), &table_schema, &metric, Some(ctx))
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
use api::v1::meta::{HeartbeatRequest, Peer, Role};
|
||||
use common_meta::instruction::CacheIdent;
|
||||
use common_meta::key::node_address::{NodeAddressKey, NodeAddressValue};
|
||||
use common_meta::key::{MetadataKey, MetadataValue};
|
||||
use common_meta::rpc::store::PutRequest;
|
||||
@@ -80,7 +81,19 @@ async fn rewrite_node_address(ctx: &mut Context, peer: &Peer) {
|
||||
match ctx.leader_cached_kv_backend.put(put).await {
|
||||
Ok(_) => {
|
||||
info!("Successfully updated flow `NodeAddressValue`: {:?}", peer);
|
||||
// TODO(discord): broadcast invalidating cache to all frontends
|
||||
// broadcast invalidating cache to all frontends
|
||||
let cache_idents = vec![CacheIdent::FlowNodeAddressChange(peer.id)];
|
||||
info!(
|
||||
"Invalidate flow node cache for new address with cache idents: {:?}",
|
||||
cache_idents
|
||||
);
|
||||
if let Err(e) = ctx
|
||||
.cache_invalidator
|
||||
.invalidate(&Default::default(), &cache_idents)
|
||||
.await
|
||||
{
|
||||
error!(e; "Failed to invalidate {} `NodeAddressKey` cache, peer: {:?}", cache_idents.len(), peer);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
error!(e; "Failed to update flow `NodeAddressValue`: {:?}", peer);
|
||||
|
||||
@@ -473,8 +473,9 @@ struct MetricEngineInner {
|
||||
mod test {
|
||||
use std::collections::HashMap;
|
||||
|
||||
use common_telemetry::info;
|
||||
use store_api::metric_engine_consts::PHYSICAL_TABLE_METADATA_KEY;
|
||||
use store_api::region_request::{RegionCloseRequest, RegionOpenRequest};
|
||||
use store_api::region_request::{RegionCloseRequest, RegionFlushRequest, RegionOpenRequest};
|
||||
|
||||
use super::*;
|
||||
use crate::test_util::TestEnv;
|
||||
@@ -559,4 +560,90 @@ mod test {
|
||||
assert!(env.metric().region_statistic(logical_region_id).is_none());
|
||||
assert!(env.metric().region_statistic(physical_region_id).is_some());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_open_region_failure() {
|
||||
let env = TestEnv::new().await;
|
||||
env.init_metric_region().await;
|
||||
let physical_region_id = env.default_physical_region_id();
|
||||
|
||||
let metric_engine = env.metric();
|
||||
metric_engine
|
||||
.handle_request(
|
||||
physical_region_id,
|
||||
RegionRequest::Flush(RegionFlushRequest {
|
||||
row_group_size: None,
|
||||
}),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let path = format!("{}/metadata/", env.default_region_dir());
|
||||
let object_store = env.get_object_store().unwrap();
|
||||
let list = object_store.list(&path).await.unwrap();
|
||||
// Delete parquet files in metadata region
|
||||
for entry in list {
|
||||
if entry.metadata().is_dir() {
|
||||
continue;
|
||||
}
|
||||
if entry.name().ends_with("parquet") {
|
||||
info!("deleting {}", entry.path());
|
||||
object_store.delete(entry.path()).await.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
let physical_region_option = [(PHYSICAL_TABLE_METADATA_KEY.to_string(), String::new())]
|
||||
.into_iter()
|
||||
.collect();
|
||||
let open_request = RegionOpenRequest {
|
||||
engine: METRIC_ENGINE_NAME.to_string(),
|
||||
region_dir: env.default_region_dir(),
|
||||
options: physical_region_option,
|
||||
skip_wal_replay: false,
|
||||
};
|
||||
// Opening an already opened region should succeed.
|
||||
// Since the region is already open, no metadata recovery operations will be performed.
|
||||
metric_engine
|
||||
.handle_request(physical_region_id, RegionRequest::Open(open_request))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Close the region
|
||||
metric_engine
|
||||
.handle_request(
|
||||
physical_region_id,
|
||||
RegionRequest::Close(RegionCloseRequest {}),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Try to reopen region.
|
||||
let physical_region_option = [(PHYSICAL_TABLE_METADATA_KEY.to_string(), String::new())]
|
||||
.into_iter()
|
||||
.collect();
|
||||
let open_request = RegionOpenRequest {
|
||||
engine: METRIC_ENGINE_NAME.to_string(),
|
||||
region_dir: env.default_region_dir(),
|
||||
options: physical_region_option,
|
||||
skip_wal_replay: false,
|
||||
};
|
||||
let err = metric_engine
|
||||
.handle_request(physical_region_id, RegionRequest::Open(open_request))
|
||||
.await
|
||||
.unwrap_err();
|
||||
// Failed to open region because of missing parquet files.
|
||||
assert_eq!(err.status_code(), StatusCode::StorageUnavailable);
|
||||
|
||||
let mito_engine = metric_engine.mito();
|
||||
let data_region_id = utils::to_data_region_id(physical_region_id);
|
||||
let metadata_region_id = utils::to_metadata_region_id(physical_region_id);
|
||||
// The metadata/data region should be closed.
|
||||
let err = mito_engine.get_metadata(data_region_id).await.unwrap_err();
|
||||
assert_eq!(err.status_code(), StatusCode::RegionNotFound);
|
||||
let err = mito_engine
|
||||
.get_metadata(metadata_region_id)
|
||||
.await
|
||||
.unwrap_err();
|
||||
assert_eq!(err.status_code(), StatusCode::RegionNotFound);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -59,7 +59,7 @@ impl MetricEngineInner {
|
||||
}
|
||||
}
|
||||
|
||||
async fn close_physical_region(&self, region_id: RegionId) -> Result<AffectedRows> {
|
||||
pub(crate) async fn close_physical_region(&self, region_id: RegionId) -> Result<AffectedRows> {
|
||||
let data_region_id = utils::to_data_region_id(region_id);
|
||||
let metadata_region_id = utils::to_metadata_region_id(region_id);
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
use api::region::RegionResponse;
|
||||
use api::v1::SemanticType;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_telemetry::info;
|
||||
use common_telemetry::{error, info, warn};
|
||||
use datafusion::common::HashMap;
|
||||
use mito2::engine::MITO_ENGINE_NAME;
|
||||
use object_store::util::join_dir;
|
||||
@@ -94,6 +94,21 @@ impl MetricEngineInner {
|
||||
Ok(responses)
|
||||
}
|
||||
|
||||
// If the metadata region is opened with a stale manifest,
|
||||
// the metric engine may fail to recover logical tables from the metadata region,
|
||||
// as the manifest could reference files that have already been deleted
|
||||
// due to compaction operations performed by the region leader.
|
||||
async fn close_physical_region_on_recovery_failure(&self, physical_region_id: RegionId) {
|
||||
info!(
|
||||
"Closing metadata region {} and data region {} on metadata recovery failure",
|
||||
utils::to_metadata_region_id(physical_region_id),
|
||||
utils::to_data_region_id(physical_region_id)
|
||||
);
|
||||
if let Err(err) = self.close_physical_region(physical_region_id).await {
|
||||
error!(err; "Failed to close physical region {}", physical_region_id);
|
||||
}
|
||||
}
|
||||
|
||||
async fn open_physical_region_with_results(
|
||||
&self,
|
||||
metadata_region_result: Option<std::result::Result<RegionResponse, BoxedError>>,
|
||||
@@ -119,8 +134,14 @@ impl MetricEngineInner {
|
||||
region_type: "data",
|
||||
})?;
|
||||
|
||||
self.recover_states(physical_region_id, physical_region_options)
|
||||
.await?;
|
||||
if let Err(err) = self
|
||||
.recover_states(physical_region_id, physical_region_options)
|
||||
.await
|
||||
{
|
||||
self.close_physical_region_on_recovery_failure(physical_region_id)
|
||||
.await;
|
||||
return Err(err);
|
||||
}
|
||||
Ok(data_region_response)
|
||||
}
|
||||
|
||||
@@ -139,11 +160,31 @@ impl MetricEngineInner {
|
||||
request: RegionOpenRequest,
|
||||
) -> Result<AffectedRows> {
|
||||
if request.is_physical_table() {
|
||||
if self
|
||||
.state
|
||||
.read()
|
||||
.unwrap()
|
||||
.physical_region_states()
|
||||
.get(®ion_id)
|
||||
.is_some()
|
||||
{
|
||||
warn!(
|
||||
"The physical region {} is already open, ignore the open request",
|
||||
region_id
|
||||
);
|
||||
return Ok(0);
|
||||
}
|
||||
// open physical region and recover states
|
||||
let physical_region_options = PhysicalRegionOptions::try_from(&request.options)?;
|
||||
self.open_physical_region(region_id, request).await?;
|
||||
self.recover_states(region_id, physical_region_options)
|
||||
.await?;
|
||||
if let Err(err) = self
|
||||
.recover_states(region_id, physical_region_options)
|
||||
.await
|
||||
{
|
||||
self.close_physical_region_on_recovery_failure(region_id)
|
||||
.await;
|
||||
return Err(err);
|
||||
}
|
||||
|
||||
Ok(0)
|
||||
} else {
|
||||
|
||||
@@ -23,6 +23,7 @@ use mito2::config::MitoConfig;
|
||||
use mito2::engine::MitoEngine;
|
||||
use mito2::test_util::TestEnv as MitoTestEnv;
|
||||
use object_store::util::join_dir;
|
||||
use object_store::ObjectStore;
|
||||
use store_api::metadata::ColumnMetadata;
|
||||
use store_api::metric_engine_consts::{
|
||||
LOGICAL_TABLE_METADATA_KEY, METRIC_ENGINE_NAME, PHYSICAL_TABLE_METADATA_KEY,
|
||||
@@ -74,6 +75,10 @@ impl TestEnv {
|
||||
join_dir(&env_root, "data")
|
||||
}
|
||||
|
||||
pub fn get_object_store(&self) -> Option<ObjectStore> {
|
||||
self.mito_env.get_object_store()
|
||||
}
|
||||
|
||||
/// Returns a reference to the engine.
|
||||
pub fn mito(&self) -> MitoEngine {
|
||||
self.mito.clone()
|
||||
|
||||
@@ -62,7 +62,7 @@ use crate::read::BoxedBatchReader;
|
||||
use crate::region::options::MergeMode;
|
||||
use crate::region::version::VersionControlRef;
|
||||
use crate::region::ManifestContextRef;
|
||||
use crate::request::{OptionOutputTx, OutputTx, WorkerRequest};
|
||||
use crate::request::{OptionOutputTx, OutputTx, WorkerRequestWithTime};
|
||||
use crate::schedule::remote_job_scheduler::{
|
||||
CompactionJob, DefaultNotifier, RemoteJob, RemoteJobSchedulerRef,
|
||||
};
|
||||
@@ -77,7 +77,7 @@ pub struct CompactionRequest {
|
||||
pub(crate) current_version: CompactionVersion,
|
||||
pub(crate) access_layer: AccessLayerRef,
|
||||
/// Sender to send notification to the region worker.
|
||||
pub(crate) request_sender: mpsc::Sender<WorkerRequest>,
|
||||
pub(crate) request_sender: mpsc::Sender<WorkerRequestWithTime>,
|
||||
/// Waiters of the compaction request.
|
||||
pub(crate) waiters: Vec<OutputTx>,
|
||||
/// Start time of compaction task.
|
||||
@@ -101,7 +101,7 @@ pub(crate) struct CompactionScheduler {
|
||||
/// Compacting regions.
|
||||
region_status: HashMap<RegionId, CompactionStatus>,
|
||||
/// Request sender of the worker that this scheduler belongs to.
|
||||
request_sender: Sender<WorkerRequest>,
|
||||
request_sender: Sender<WorkerRequestWithTime>,
|
||||
cache_manager: CacheManagerRef,
|
||||
engine_config: Arc<MitoConfig>,
|
||||
listener: WorkerListener,
|
||||
@@ -112,7 +112,7 @@ pub(crate) struct CompactionScheduler {
|
||||
impl CompactionScheduler {
|
||||
pub(crate) fn new(
|
||||
scheduler: SchedulerRef,
|
||||
request_sender: Sender<WorkerRequest>,
|
||||
request_sender: Sender<WorkerRequestWithTime>,
|
||||
cache_manager: CacheManagerRef,
|
||||
engine_config: Arc<MitoConfig>,
|
||||
listener: WorkerListener,
|
||||
@@ -559,7 +559,7 @@ impl CompactionStatus {
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn new_compaction_request(
|
||||
&mut self,
|
||||
request_sender: Sender<WorkerRequest>,
|
||||
request_sender: Sender<WorkerRequestWithTime>,
|
||||
mut waiter: OptionOutputTx,
|
||||
engine_config: Arc<MitoConfig>,
|
||||
cache_manager: CacheManagerRef,
|
||||
|
||||
@@ -27,6 +27,7 @@ use crate::manifest::action::RegionEdit;
|
||||
use crate::metrics::{COMPACTION_FAILURE_COUNT, COMPACTION_STAGE_ELAPSED};
|
||||
use crate::request::{
|
||||
BackgroundNotify, CompactionFailed, CompactionFinished, OutputTx, WorkerRequest,
|
||||
WorkerRequestWithTime,
|
||||
};
|
||||
use crate::worker::WorkerListener;
|
||||
use crate::{error, metrics};
|
||||
@@ -37,7 +38,7 @@ pub const MAX_PARALLEL_COMPACTION: usize = 1;
|
||||
pub(crate) struct CompactionTaskImpl {
|
||||
pub compaction_region: CompactionRegion,
|
||||
/// Request sender to notify the worker.
|
||||
pub(crate) request_sender: mpsc::Sender<WorkerRequest>,
|
||||
pub(crate) request_sender: mpsc::Sender<WorkerRequestWithTime>,
|
||||
/// Senders that are used to notify waiters waiting for pending compaction tasks.
|
||||
pub waiters: Vec<OutputTx>,
|
||||
/// Start time of compaction task
|
||||
@@ -135,7 +136,11 @@ impl CompactionTaskImpl {
|
||||
|
||||
/// Notifies region worker to handle post-compaction tasks.
|
||||
async fn send_to_worker(&self, request: WorkerRequest) {
|
||||
if let Err(e) = self.request_sender.send(request).await {
|
||||
if let Err(e) = self
|
||||
.request_sender
|
||||
.send(WorkerRequestWithTime::new(request))
|
||||
.await
|
||||
{
|
||||
error!(
|
||||
"Failed to notify compaction job status for region {}, request: {:?}",
|
||||
self.compaction_region.region_id, e.0
|
||||
|
||||
@@ -30,6 +30,8 @@ use crate::sst::DEFAULT_WRITE_BUFFER_SIZE;
|
||||
const MULTIPART_UPLOAD_MINIMUM_SIZE: ReadableSize = ReadableSize::mb(5);
|
||||
/// Default channel size for parallel scan task.
|
||||
pub(crate) const DEFAULT_SCAN_CHANNEL_SIZE: usize = 32;
|
||||
/// Default maximum number of SST files to scan concurrently.
|
||||
pub(crate) const DEFAULT_MAX_CONCURRENT_SCAN_FILES: usize = 128;
|
||||
|
||||
// Use `1/GLOBAL_WRITE_BUFFER_SIZE_FACTOR` of OS memory as global write buffer size in default mode
|
||||
const GLOBAL_WRITE_BUFFER_SIZE_FACTOR: u64 = 8;
|
||||
@@ -107,6 +109,8 @@ pub struct MitoConfig {
|
||||
pub sst_write_buffer_size: ReadableSize,
|
||||
/// Capacity of the channel to send data from parallel scan tasks to the main task (default 32).
|
||||
pub parallel_scan_channel_size: usize,
|
||||
/// Maximum number of SST files to scan concurrently (default 128).
|
||||
pub max_concurrent_scan_files: usize,
|
||||
/// Whether to allow stale entries read during replay.
|
||||
pub allow_stale_entries: bool,
|
||||
|
||||
@@ -152,6 +156,7 @@ impl Default for MitoConfig {
|
||||
write_cache_ttl: None,
|
||||
sst_write_buffer_size: DEFAULT_WRITE_BUFFER_SIZE,
|
||||
parallel_scan_channel_size: DEFAULT_SCAN_CHANNEL_SIZE,
|
||||
max_concurrent_scan_files: DEFAULT_MAX_CONCURRENT_SCAN_FILES,
|
||||
allow_stale_entries: false,
|
||||
index: IndexConfig::default(),
|
||||
inverted_index: InvertedIndexConfig::default(),
|
||||
|
||||
@@ -506,6 +506,7 @@ impl EngineInner {
|
||||
CacheStrategy::EnableAll(cache_manager),
|
||||
)
|
||||
.with_parallel_scan_channel_size(self.config.parallel_scan_channel_size)
|
||||
.with_max_concurrent_scan_files(self.config.max_concurrent_scan_files)
|
||||
.with_ignore_inverted_index(self.config.inverted_index.apply_on_query.disabled())
|
||||
.with_ignore_fulltext_index(self.config.fulltext_index.apply_on_query.disabled())
|
||||
.with_ignore_bloom_filter(self.config.bloom_filter_index.apply_on_query.disabled())
|
||||
|
||||
@@ -13,6 +13,8 @@
|
||||
// limitations under the License.
|
||||
|
||||
use api::v1::Rows;
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_recordbatch::RecordBatches;
|
||||
use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
|
||||
use futures::TryStreamExt;
|
||||
@@ -151,6 +153,58 @@ async fn test_scan_with_min_sst_sequence() {
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_max_concurrent_scan_files() {
|
||||
let mut env = TestEnv::with_prefix("test_max_concurrent_scan_files").await;
|
||||
let config = MitoConfig {
|
||||
max_concurrent_scan_files: 2,
|
||||
..Default::default()
|
||||
};
|
||||
let engine = env.create_engine(config).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
let column_schemas = test_util::rows_schema(&request);
|
||||
|
||||
engine
|
||||
.handle_request(region_id, RegionRequest::Create(request))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let put_and_flush = async |start, end| {
|
||||
let rows = Rows {
|
||||
schema: column_schemas.clone(),
|
||||
rows: test_util::build_rows(start, end),
|
||||
};
|
||||
test_util::put_rows(&engine, region_id, rows).await;
|
||||
test_util::flush_region(&engine, region_id, None).await;
|
||||
};
|
||||
|
||||
// Write overlapping files.
|
||||
put_and_flush(0, 4).await;
|
||||
put_and_flush(3, 7).await;
|
||||
put_and_flush(6, 9).await;
|
||||
|
||||
let request = ScanRequest::default();
|
||||
let scanner = engine.scanner(region_id, request).await.unwrap();
|
||||
let Scanner::Seq(scanner) = scanner else {
|
||||
panic!("Scanner should be seq scan");
|
||||
};
|
||||
let error = scanner.check_scan_limit().unwrap_err();
|
||||
assert_eq!(StatusCode::RateLimited, error.status_code());
|
||||
|
||||
let request = ScanRequest {
|
||||
distribution: Some(TimeSeriesDistribution::PerSeries),
|
||||
..Default::default()
|
||||
};
|
||||
let scanner = engine.scanner(region_id, request).await.unwrap();
|
||||
let Scanner::Series(scanner) = scanner else {
|
||||
panic!("Scanner should be series scan");
|
||||
};
|
||||
let error = scanner.check_scan_limit().unwrap_err();
|
||||
assert_eq!(StatusCode::RateLimited, error.status_code());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_series_scan() {
|
||||
let mut env = TestEnv::with_prefix("test_series_scan").await;
|
||||
|
||||
@@ -1020,6 +1020,30 @@ pub enum Error {
|
||||
location: Location,
|
||||
source: mito_codec::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Inconsistent timestamp column length, expect: {}, actual: {}",
|
||||
expected,
|
||||
actual
|
||||
))]
|
||||
InconsistentTimestampLength {
|
||||
expected: usize,
|
||||
actual: usize,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Too many files to read concurrently: {}, max allowed: {}",
|
||||
actual,
|
||||
max
|
||||
))]
|
||||
TooManyFilesToRead {
|
||||
actual: usize,
|
||||
max: usize,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
@@ -1175,6 +1199,10 @@ impl ErrorExt for Error {
|
||||
ConvertBulkWalEntry { source, .. } => source.status_code(),
|
||||
|
||||
Encode { source, .. } | Decode { source, .. } => source.status_code(),
|
||||
|
||||
InconsistentTimestampLength { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
TooManyFilesToRead { .. } => StatusCode::RateLimited,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -42,7 +42,7 @@ use crate::region::version::{VersionControlData, VersionControlRef};
|
||||
use crate::region::{ManifestContextRef, RegionLeaderState};
|
||||
use crate::request::{
|
||||
BackgroundNotify, FlushFailed, FlushFinished, OptionOutputTx, OutputTx, SenderBulkRequest,
|
||||
SenderDdlRequest, SenderWriteRequest, WorkerRequest,
|
||||
SenderDdlRequest, SenderWriteRequest, WorkerRequest, WorkerRequestWithTime,
|
||||
};
|
||||
use crate::schedule::scheduler::{Job, SchedulerRef};
|
||||
use crate::sst::file::FileMeta;
|
||||
@@ -223,7 +223,7 @@ pub(crate) struct RegionFlushTask {
|
||||
/// Flush result senders.
|
||||
pub(crate) senders: Vec<OutputTx>,
|
||||
/// Request sender to notify the worker.
|
||||
pub(crate) request_sender: mpsc::Sender<WorkerRequest>,
|
||||
pub(crate) request_sender: mpsc::Sender<WorkerRequestWithTime>,
|
||||
|
||||
pub(crate) access_layer: AccessLayerRef,
|
||||
pub(crate) listener: WorkerListener,
|
||||
@@ -441,7 +441,11 @@ impl RegionFlushTask {
|
||||
|
||||
/// Notify flush job status.
|
||||
async fn send_worker_request(&self, request: WorkerRequest) {
|
||||
if let Err(e) = self.request_sender.send(request).await {
|
||||
if let Err(e) = self
|
||||
.request_sender
|
||||
.send(WorkerRequestWithTime::new(request))
|
||||
.await
|
||||
{
|
||||
error!(
|
||||
"Failed to notify flush job status for region {}, request: {:?}",
|
||||
self.region_id, e.0
|
||||
|
||||
@@ -126,7 +126,12 @@ impl From<&BulkPart> for BulkWalEntry {
|
||||
|
||||
impl BulkPart {
|
||||
pub(crate) fn estimated_size(&self) -> usize {
|
||||
self.batch.get_array_memory_size()
|
||||
self.batch
|
||||
.columns()
|
||||
.iter()
|
||||
// If can not get slice memory size, assume 0 here.
|
||||
.map(|c| c.to_data().get_slice_memory_size().unwrap_or(0))
|
||||
.sum()
|
||||
}
|
||||
|
||||
/// Converts [BulkPart] to [Mutation] for fallback `write_bulk` implementation.
|
||||
|
||||
@@ -94,12 +94,7 @@ lazy_static! {
|
||||
|
||||
|
||||
// ------ Write related metrics
|
||||
/// Number of stalled write requests in each worker.
|
||||
pub static ref WRITE_STALL_TOTAL: IntGaugeVec = register_int_gauge_vec!(
|
||||
"greptime_mito_write_stall_total",
|
||||
"mito stalled write request in each worker",
|
||||
&[WORKER_LABEL]
|
||||
).unwrap();
|
||||
//
|
||||
/// Counter of rejected write requests.
|
||||
pub static ref WRITE_REJECT_TOTAL: IntCounter =
|
||||
register_int_counter!("greptime_mito_write_reject_total", "mito write reject total").unwrap();
|
||||
@@ -402,6 +397,7 @@ lazy_static! {
|
||||
|
||||
}
|
||||
|
||||
// Use another block to avoid reaching the recursion limit.
|
||||
lazy_static! {
|
||||
/// Counter for compaction input file size.
|
||||
pub static ref COMPACTION_INPUT_BYTES: Counter = register_counter!(
|
||||
@@ -426,6 +422,27 @@ lazy_static! {
|
||||
"greptime_mito_memtable_field_builder_count",
|
||||
"active field builder count in TimeSeriesMemtable",
|
||||
).unwrap();
|
||||
|
||||
/// Number of stalling write requests in each worker.
|
||||
pub static ref WRITE_STALLING: IntGaugeVec = register_int_gauge_vec!(
|
||||
"greptime_mito_write_stalling_count",
|
||||
"mito stalled write request in each worker",
|
||||
&[WORKER_LABEL]
|
||||
).unwrap();
|
||||
/// Total number of stalled write requests.
|
||||
pub static ref WRITE_STALL_TOTAL: IntCounter = register_int_counter!(
|
||||
"greptime_mito_write_stall_total",
|
||||
"Total number of stalled write requests"
|
||||
).unwrap();
|
||||
/// Time waiting for requests to be handled by the region worker.
|
||||
pub static ref REQUEST_WAIT_TIME: HistogramVec = register_histogram_vec!(
|
||||
"greptime_mito_request_wait_time",
|
||||
"mito request wait time before being handled by region worker",
|
||||
&[WORKER_LABEL],
|
||||
// 0.001 ~ 10000
|
||||
exponential_buckets(0.001, 10.0, 8).unwrap(),
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
/// Stager notifier to collect metrics.
|
||||
|
||||
@@ -39,7 +39,7 @@ use tokio_stream::wrappers::ReceiverStream;
|
||||
|
||||
use crate::access_layer::AccessLayerRef;
|
||||
use crate::cache::CacheStrategy;
|
||||
use crate::config::DEFAULT_SCAN_CHANNEL_SIZE;
|
||||
use crate::config::{DEFAULT_MAX_CONCURRENT_SCAN_FILES, DEFAULT_SCAN_CHANNEL_SIZE};
|
||||
use crate::error::Result;
|
||||
use crate::memtable::MemtableRange;
|
||||
use crate::metrics::READ_SST_COUNT;
|
||||
@@ -187,6 +187,8 @@ pub(crate) struct ScanRegion {
|
||||
cache_strategy: CacheStrategy,
|
||||
/// Capacity of the channel to send data from parallel scan tasks to the main task.
|
||||
parallel_scan_channel_size: usize,
|
||||
/// Maximum number of SST files to scan concurrently.
|
||||
max_concurrent_scan_files: usize,
|
||||
/// Whether to ignore inverted index.
|
||||
ignore_inverted_index: bool,
|
||||
/// Whether to ignore fulltext index.
|
||||
@@ -214,6 +216,7 @@ impl ScanRegion {
|
||||
request,
|
||||
cache_strategy,
|
||||
parallel_scan_channel_size: DEFAULT_SCAN_CHANNEL_SIZE,
|
||||
max_concurrent_scan_files: DEFAULT_MAX_CONCURRENT_SCAN_FILES,
|
||||
ignore_inverted_index: false,
|
||||
ignore_fulltext_index: false,
|
||||
ignore_bloom_filter: false,
|
||||
@@ -232,6 +235,16 @@ impl ScanRegion {
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets maximum number of SST files to scan concurrently.
|
||||
#[must_use]
|
||||
pub(crate) fn with_max_concurrent_scan_files(
|
||||
mut self,
|
||||
max_concurrent_scan_files: usize,
|
||||
) -> Self {
|
||||
self.max_concurrent_scan_files = max_concurrent_scan_files;
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets whether to ignore inverted index.
|
||||
#[must_use]
|
||||
pub(crate) fn with_ignore_inverted_index(mut self, ignore: bool) -> Self {
|
||||
@@ -421,6 +434,7 @@ impl ScanRegion {
|
||||
.with_bloom_filter_index_applier(bloom_filter_applier)
|
||||
.with_fulltext_index_applier(fulltext_index_applier)
|
||||
.with_parallel_scan_channel_size(self.parallel_scan_channel_size)
|
||||
.with_max_concurrent_scan_files(self.max_concurrent_scan_files)
|
||||
.with_start_time(self.start_time)
|
||||
.with_append_mode(self.version.options.append_mode)
|
||||
.with_filter_deleted(self.filter_deleted)
|
||||
@@ -597,6 +611,8 @@ pub struct ScanInput {
|
||||
ignore_file_not_found: bool,
|
||||
/// Capacity of the channel to send data from parallel scan tasks to the main task.
|
||||
pub(crate) parallel_scan_channel_size: usize,
|
||||
/// Maximum number of SST files to scan concurrently.
|
||||
pub(crate) max_concurrent_scan_files: usize,
|
||||
/// Index appliers.
|
||||
inverted_index_applier: Option<InvertedIndexApplierRef>,
|
||||
bloom_filter_index_applier: Option<BloomFilterIndexApplierRef>,
|
||||
@@ -629,6 +645,7 @@ impl ScanInput {
|
||||
cache_strategy: CacheStrategy::Disabled,
|
||||
ignore_file_not_found: false,
|
||||
parallel_scan_channel_size: DEFAULT_SCAN_CHANNEL_SIZE,
|
||||
max_concurrent_scan_files: DEFAULT_MAX_CONCURRENT_SCAN_FILES,
|
||||
inverted_index_applier: None,
|
||||
bloom_filter_index_applier: None,
|
||||
fulltext_index_applier: None,
|
||||
@@ -693,6 +710,16 @@ impl ScanInput {
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets maximum number of SST files to scan concurrently.
|
||||
#[must_use]
|
||||
pub(crate) fn with_max_concurrent_scan_files(
|
||||
mut self,
|
||||
max_concurrent_scan_files: usize,
|
||||
) -> Self {
|
||||
self.max_concurrent_scan_files = max_concurrent_scan_files;
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets invereted index applier.
|
||||
#[must_use]
|
||||
pub(crate) fn with_inverted_index_applier(
|
||||
|
||||
@@ -33,11 +33,11 @@ use store_api::region_engine::{PartitionRange, PrepareRequest, RegionScanner, Sc
|
||||
use store_api::storage::TimeSeriesRowSelector;
|
||||
use tokio::sync::Semaphore;
|
||||
|
||||
use crate::error::{PartitionOutOfRangeSnafu, Result};
|
||||
use crate::error::{PartitionOutOfRangeSnafu, Result, TooManyFilesToReadSnafu};
|
||||
use crate::read::dedup::{DedupReader, LastNonNull, LastRow};
|
||||
use crate::read::last_row::LastRowReader;
|
||||
use crate::read::merge::MergeReaderBuilder;
|
||||
use crate::read::range::RangeBuilderList;
|
||||
use crate::read::range::{RangeBuilderList, RangeMeta};
|
||||
use crate::read::scan_region::{ScanInput, StreamContext};
|
||||
use crate::read::scan_util::{
|
||||
scan_file_ranges, scan_mem_ranges, PartitionMetrics, PartitionMetricsList,
|
||||
@@ -347,6 +347,40 @@ impl SeqScan {
|
||||
|
||||
metrics
|
||||
}
|
||||
|
||||
/// Finds the maximum number of files to read in a single partition range.
|
||||
fn max_files_in_partition(ranges: &[RangeMeta], partition_ranges: &[PartitionRange]) -> usize {
|
||||
partition_ranges
|
||||
.iter()
|
||||
.map(|part_range| {
|
||||
let range_meta = &ranges[part_range.identifier];
|
||||
range_meta.indices.len()
|
||||
})
|
||||
.max()
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
/// Checks resource limit for the scanner.
|
||||
pub(crate) fn check_scan_limit(&self) -> Result<()> {
|
||||
// Check max file count limit for all partitions since we scan them in parallel.
|
||||
let total_max_files: usize = self
|
||||
.properties
|
||||
.partitions
|
||||
.iter()
|
||||
.map(|partition| Self::max_files_in_partition(&self.stream_ctx.ranges, partition))
|
||||
.sum();
|
||||
|
||||
let max_concurrent_files = self.stream_ctx.input.max_concurrent_scan_files;
|
||||
if total_max_files > max_concurrent_files {
|
||||
return TooManyFilesToReadSnafu {
|
||||
actual: total_max_files,
|
||||
max: max_concurrent_files,
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl RegionScanner for SeqScan {
|
||||
@@ -372,6 +406,9 @@ impl RegionScanner for SeqScan {
|
||||
|
||||
fn prepare(&mut self, request: PrepareRequest) -> Result<(), BoxedError> {
|
||||
self.properties.prepare(request);
|
||||
|
||||
self.check_scan_limit().map_err(BoxedError::new)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ use tokio::sync::Semaphore;
|
||||
|
||||
use crate::error::{
|
||||
ComputeArrowSnafu, Error, InvalidSenderSnafu, PartitionOutOfRangeSnafu, Result,
|
||||
ScanMultiTimesSnafu, ScanSeriesSnafu,
|
||||
ScanMultiTimesSnafu, ScanSeriesSnafu, TooManyFilesToReadSnafu,
|
||||
};
|
||||
use crate::read::range::RangeBuilderList;
|
||||
use crate::read::scan_region::{ScanInput, StreamContext};
|
||||
@@ -201,6 +201,32 @@ impl SeriesScan {
|
||||
let chained_stream = ChainedRecordBatchStream::new(streams).map_err(BoxedError::new)?;
|
||||
Ok(Box::pin(chained_stream))
|
||||
}
|
||||
|
||||
/// Checks resource limit for the scanner.
|
||||
pub(crate) fn check_scan_limit(&self) -> Result<()> {
|
||||
// Sum the total number of files across all partitions
|
||||
let total_files: usize = self
|
||||
.properties
|
||||
.partitions
|
||||
.iter()
|
||||
.flat_map(|partition| partition.iter())
|
||||
.map(|part_range| {
|
||||
let range_meta = &self.stream_ctx.ranges[part_range.identifier];
|
||||
range_meta.indices.len()
|
||||
})
|
||||
.sum();
|
||||
|
||||
let max_concurrent_files = self.stream_ctx.input.max_concurrent_scan_files;
|
||||
if total_files > max_concurrent_files {
|
||||
return TooManyFilesToReadSnafu {
|
||||
actual: total_files,
|
||||
max: max_concurrent_files,
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn new_channel_list(num_partitions: usize) -> (SenderList, ReceiverList) {
|
||||
@@ -236,6 +262,9 @@ impl RegionScanner for SeriesScan {
|
||||
|
||||
fn prepare(&mut self, request: PrepareRequest) -> Result<(), BoxedError> {
|
||||
self.properties.prepare(request);
|
||||
|
||||
self.check_scan_limit().map_err(BoxedError::new)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -242,6 +242,7 @@ impl RegionScanner for UnorderedScan {
|
||||
|
||||
fn prepare(&mut self, request: PrepareRequest) -> Result<(), BoxedError> {
|
||||
self.properties.prepare(request);
|
||||
// UnorderedScan only scans one row group per partition so the resource requirement won't be too high.
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -542,6 +542,22 @@ pub(crate) struct SenderBulkRequest {
|
||||
pub(crate) region_metadata: RegionMetadataRef,
|
||||
}
|
||||
|
||||
/// Request sent to a worker with timestamp
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct WorkerRequestWithTime {
|
||||
pub(crate) request: WorkerRequest,
|
||||
pub(crate) created_at: Instant,
|
||||
}
|
||||
|
||||
impl WorkerRequestWithTime {
|
||||
pub(crate) fn new(request: WorkerRequest) -> Self {
|
||||
Self {
|
||||
request,
|
||||
created_at: Instant::now(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Request sent to a worker
|
||||
#[derive(Debug)]
|
||||
pub(crate) enum WorkerRequest {
|
||||
|
||||
@@ -30,6 +30,7 @@ use crate::manifest::action::RegionEdit;
|
||||
use crate::metrics::{COMPACTION_FAILURE_COUNT, INFLIGHT_COMPACTION_COUNT};
|
||||
use crate::request::{
|
||||
BackgroundNotify, CompactionFailed, CompactionFinished, OutputTx, WorkerRequest,
|
||||
WorkerRequestWithTime,
|
||||
};
|
||||
|
||||
pub type RemoteJobSchedulerRef = Arc<dyn RemoteJobScheduler>;
|
||||
@@ -130,7 +131,7 @@ pub struct CompactionJobResult {
|
||||
/// DefaultNotifier is a default implementation of Notifier that sends WorkerRequest to the mito engine.
|
||||
pub(crate) struct DefaultNotifier {
|
||||
/// The sender to send WorkerRequest to the mito engine. This is used to notify the mito engine when a remote job is completed.
|
||||
pub(crate) request_sender: Sender<WorkerRequest>,
|
||||
pub(crate) request_sender: Sender<WorkerRequestWithTime>,
|
||||
}
|
||||
|
||||
impl DefaultNotifier {
|
||||
@@ -173,10 +174,10 @@ impl Notifier for DefaultNotifier {
|
||||
|
||||
if let Err(e) = self
|
||||
.request_sender
|
||||
.send(WorkerRequest::Background {
|
||||
.send(WorkerRequestWithTime::new(WorkerRequest::Background {
|
||||
region_id: result.region_id,
|
||||
notify,
|
||||
})
|
||||
}))
|
||||
.await
|
||||
{
|
||||
error!(
|
||||
|
||||
@@ -294,7 +294,7 @@ impl RowGroupSelection {
|
||||
let Some(y) = self.selection_in_rg.get(rg_id) else {
|
||||
continue;
|
||||
};
|
||||
let selection = x.selection.intersection(&y.selection);
|
||||
let selection = intersect_row_selections(&x.selection, &y.selection);
|
||||
let row_count = selection.row_count();
|
||||
let selector_len = selector_len(&selection);
|
||||
if row_count > 0 {
|
||||
@@ -423,6 +423,68 @@ impl RowGroupSelection {
|
||||
}
|
||||
}
|
||||
|
||||
/// Ported from `parquet` but trailing rows are removed.
|
||||
///
|
||||
/// Combine two lists of `RowSelection` return the intersection of them
|
||||
/// For example:
|
||||
/// self: NNYYYYNNYYNYN
|
||||
/// other: NYNNNNNNY
|
||||
///
|
||||
/// returned: NNNNNNNNY (modified)
|
||||
/// NNNNNNNNYYNYN (original)
|
||||
fn intersect_row_selections(left: &RowSelection, right: &RowSelection) -> RowSelection {
|
||||
let mut l_iter = left.iter().copied().peekable();
|
||||
let mut r_iter = right.iter().copied().peekable();
|
||||
|
||||
let iter = std::iter::from_fn(move || {
|
||||
loop {
|
||||
let l = l_iter.peek_mut();
|
||||
let r = r_iter.peek_mut();
|
||||
|
||||
match (l, r) {
|
||||
(Some(a), _) if a.row_count == 0 => {
|
||||
l_iter.next().unwrap();
|
||||
}
|
||||
(_, Some(b)) if b.row_count == 0 => {
|
||||
r_iter.next().unwrap();
|
||||
}
|
||||
(Some(l), Some(r)) => {
|
||||
return match (l.skip, r.skip) {
|
||||
// Keep both ranges
|
||||
(false, false) => {
|
||||
if l.row_count < r.row_count {
|
||||
r.row_count -= l.row_count;
|
||||
l_iter.next()
|
||||
} else {
|
||||
l.row_count -= r.row_count;
|
||||
r_iter.next()
|
||||
}
|
||||
}
|
||||
// skip at least one
|
||||
_ => {
|
||||
if l.row_count < r.row_count {
|
||||
let skip = l.row_count;
|
||||
r.row_count -= l.row_count;
|
||||
l_iter.next();
|
||||
Some(RowSelector::skip(skip))
|
||||
} else {
|
||||
let skip = r.row_count;
|
||||
l.row_count -= skip;
|
||||
r_iter.next();
|
||||
Some(RowSelector::skip(skip))
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
(None, _) => return None,
|
||||
(_, None) => return None,
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
iter.collect()
|
||||
}
|
||||
|
||||
/// Converts an iterator of row ranges into a `RowSelection` by creating a sequence of `RowSelector`s.
|
||||
///
|
||||
/// This function processes each range in the input and either creates a new selector or merges
|
||||
@@ -448,10 +510,6 @@ pub(crate) fn row_selection_from_row_ranges(
|
||||
last_processed_end = end;
|
||||
}
|
||||
|
||||
if last_processed_end < total_row_count {
|
||||
add_or_merge_selector(&mut selectors, total_row_count - last_processed_end, true);
|
||||
}
|
||||
|
||||
RowSelection::from(selectors)
|
||||
}
|
||||
|
||||
@@ -546,7 +604,6 @@ mod tests {
|
||||
RowSelector::select(2),
|
||||
RowSelector::skip(2),
|
||||
RowSelector::select(3),
|
||||
RowSelector::skip(2),
|
||||
]);
|
||||
assert_eq!(selection, expected);
|
||||
}
|
||||
@@ -555,7 +612,7 @@ mod tests {
|
||||
fn test_empty_range() {
|
||||
let ranges = [];
|
||||
let selection = row_selection_from_row_ranges(ranges.iter().cloned(), 10);
|
||||
let expected = RowSelection::from(vec![RowSelector::skip(10)]);
|
||||
let expected = RowSelection::from(vec![]);
|
||||
assert_eq!(selection, expected);
|
||||
}
|
||||
|
||||
@@ -563,11 +620,7 @@ mod tests {
|
||||
fn test_adjacent_ranges() {
|
||||
let ranges = [1..2, 2..3];
|
||||
let selection = row_selection_from_row_ranges(ranges.iter().cloned(), 10);
|
||||
let expected = RowSelection::from(vec![
|
||||
RowSelector::skip(1),
|
||||
RowSelector::select(2),
|
||||
RowSelector::skip(7),
|
||||
]);
|
||||
let expected = RowSelection::from(vec![RowSelector::skip(1), RowSelector::select(2)]);
|
||||
assert_eq!(selection, expected);
|
||||
}
|
||||
|
||||
@@ -580,7 +633,6 @@ mod tests {
|
||||
RowSelector::select(1),
|
||||
RowSelector::skip(98),
|
||||
RowSelector::select(1),
|
||||
RowSelector::skip(10139),
|
||||
]);
|
||||
assert_eq!(selection, expected);
|
||||
}
|
||||
|
||||
@@ -32,7 +32,7 @@ use crate::error::Result;
|
||||
use crate::flush::FlushScheduler;
|
||||
use crate::manifest::manager::{RegionManifestManager, RegionManifestOptions};
|
||||
use crate::region::{ManifestContext, ManifestContextRef, RegionLeaderState, RegionRoleState};
|
||||
use crate::request::WorkerRequest;
|
||||
use crate::request::{WorkerRequest, WorkerRequestWithTime};
|
||||
use crate::schedule::scheduler::{Job, LocalScheduler, Scheduler, SchedulerRef};
|
||||
use crate::sst::index::intermediate::IntermediateManager;
|
||||
use crate::sst::index::puffin_manager::PuffinManagerFactory;
|
||||
@@ -85,7 +85,7 @@ impl SchedulerEnv {
|
||||
/// Creates a new compaction scheduler.
|
||||
pub(crate) fn mock_compaction_scheduler(
|
||||
&self,
|
||||
request_sender: Sender<WorkerRequest>,
|
||||
request_sender: Sender<WorkerRequestWithTime>,
|
||||
) -> CompactionScheduler {
|
||||
let scheduler = self.get_scheduler();
|
||||
|
||||
|
||||
@@ -39,7 +39,7 @@ use common_runtime::JoinHandle;
|
||||
use common_telemetry::{error, info, warn};
|
||||
use futures::future::try_join_all;
|
||||
use object_store::manager::ObjectStoreManagerRef;
|
||||
use prometheus::IntGauge;
|
||||
use prometheus::{Histogram, IntGauge};
|
||||
use rand::{rng, Rng};
|
||||
use snafu::{ensure, ResultExt};
|
||||
use store_api::logstore::LogStore;
|
||||
@@ -58,11 +58,11 @@ use crate::error;
|
||||
use crate::error::{CreateDirSnafu, JoinSnafu, Result, WorkerStoppedSnafu};
|
||||
use crate::flush::{FlushScheduler, WriteBufferManagerImpl, WriteBufferManagerRef};
|
||||
use crate::memtable::MemtableBuilderProvider;
|
||||
use crate::metrics::{REGION_COUNT, WRITE_STALL_TOTAL};
|
||||
use crate::metrics::{REGION_COUNT, REQUEST_WAIT_TIME, WRITE_STALLING};
|
||||
use crate::region::{MitoRegionRef, OpeningRegions, OpeningRegionsRef, RegionMap, RegionMapRef};
|
||||
use crate::request::{
|
||||
BackgroundNotify, DdlRequest, SenderBulkRequest, SenderDdlRequest, SenderWriteRequest,
|
||||
WorkerRequest,
|
||||
WorkerRequest, WorkerRequestWithTime,
|
||||
};
|
||||
use crate::schedule::scheduler::{LocalScheduler, SchedulerRef};
|
||||
use crate::sst::file::FileId;
|
||||
@@ -469,8 +469,9 @@ impl<S: LogStore> WorkerStarter<S> {
|
||||
last_periodical_check_millis: now,
|
||||
flush_sender: self.flush_sender,
|
||||
flush_receiver: self.flush_receiver,
|
||||
stalled_count: WRITE_STALL_TOTAL.with_label_values(&[&id_string]),
|
||||
stalling_count: WRITE_STALLING.with_label_values(&[&id_string]),
|
||||
region_count: REGION_COUNT.with_label_values(&[&id_string]),
|
||||
request_wait_time: REQUEST_WAIT_TIME.with_label_values(&[&id_string]),
|
||||
region_edit_queues: RegionEditQueues::default(),
|
||||
schema_metadata_manager: self.schema_metadata_manager,
|
||||
};
|
||||
@@ -498,7 +499,7 @@ pub(crate) struct RegionWorker {
|
||||
/// The opening regions.
|
||||
opening_regions: OpeningRegionsRef,
|
||||
/// Request sender.
|
||||
sender: Sender<WorkerRequest>,
|
||||
sender: Sender<WorkerRequestWithTime>,
|
||||
/// Handle to the worker thread.
|
||||
handle: Mutex<Option<JoinHandle<()>>>,
|
||||
/// Whether to run the worker thread.
|
||||
@@ -509,7 +510,8 @@ impl RegionWorker {
|
||||
/// Submits request to background worker thread.
|
||||
async fn submit_request(&self, request: WorkerRequest) -> Result<()> {
|
||||
ensure!(self.is_running(), WorkerStoppedSnafu { id: self.id });
|
||||
if self.sender.send(request).await.is_err() {
|
||||
let request_with_time = WorkerRequestWithTime::new(request);
|
||||
if self.sender.send(request_with_time).await.is_err() {
|
||||
warn!(
|
||||
"Worker {} is already exited but the running flag is still true",
|
||||
self.id
|
||||
@@ -531,7 +533,12 @@ impl RegionWorker {
|
||||
info!("Stop region worker {}", self.id);
|
||||
|
||||
self.set_running(false);
|
||||
if self.sender.send(WorkerRequest::Stop).await.is_err() {
|
||||
if self
|
||||
.sender
|
||||
.send(WorkerRequestWithTime::new(WorkerRequest::Stop))
|
||||
.await
|
||||
.is_err()
|
||||
{
|
||||
warn!("Worker {} is already exited before stop", self.id);
|
||||
}
|
||||
|
||||
@@ -669,9 +676,9 @@ struct RegionWorkerLoop<S> {
|
||||
/// Regions that are opening.
|
||||
opening_regions: OpeningRegionsRef,
|
||||
/// Request sender.
|
||||
sender: Sender<WorkerRequest>,
|
||||
sender: Sender<WorkerRequestWithTime>,
|
||||
/// Request receiver.
|
||||
receiver: Receiver<WorkerRequest>,
|
||||
receiver: Receiver<WorkerRequestWithTime>,
|
||||
/// WAL of the engine.
|
||||
wal: Wal<S>,
|
||||
/// Manages object stores for manifest and SSTs.
|
||||
@@ -706,10 +713,12 @@ struct RegionWorkerLoop<S> {
|
||||
flush_sender: watch::Sender<()>,
|
||||
/// Watch channel receiver to wait for background flush job.
|
||||
flush_receiver: watch::Receiver<()>,
|
||||
/// Gauge of stalled request count.
|
||||
stalled_count: IntGauge,
|
||||
/// Gauge of stalling request count.
|
||||
stalling_count: IntGauge,
|
||||
/// Gauge of regions in the worker.
|
||||
region_count: IntGauge,
|
||||
/// Histogram of request wait time for this worker.
|
||||
request_wait_time: Histogram,
|
||||
/// Queues for region edit requests.
|
||||
region_edit_queues: RegionEditQueues,
|
||||
/// Database level metadata manager.
|
||||
@@ -749,10 +758,16 @@ impl<S: LogStore> RegionWorkerLoop<S> {
|
||||
tokio::select! {
|
||||
request_opt = self.receiver.recv() => {
|
||||
match request_opt {
|
||||
Some(request) => match request {
|
||||
WorkerRequest::Write(sender_req) => write_req_buffer.push(sender_req),
|
||||
WorkerRequest::Ddl(sender_req) => ddl_req_buffer.push(sender_req),
|
||||
_ => general_req_buffer.push(request),
|
||||
Some(request_with_time) => {
|
||||
// Observe the wait time
|
||||
let wait_time = request_with_time.created_at.elapsed();
|
||||
self.request_wait_time.observe(wait_time.as_secs_f64());
|
||||
|
||||
match request_with_time.request {
|
||||
WorkerRequest::Write(sender_req) => write_req_buffer.push(sender_req),
|
||||
WorkerRequest::Ddl(sender_req) => ddl_req_buffer.push(sender_req),
|
||||
req => general_req_buffer.push(req),
|
||||
}
|
||||
},
|
||||
// The channel is disconnected.
|
||||
None => break,
|
||||
@@ -791,11 +806,17 @@ impl<S: LogStore> RegionWorkerLoop<S> {
|
||||
for _ in 1..self.config.worker_request_batch_size {
|
||||
// We have received one request so we start from 1.
|
||||
match self.receiver.try_recv() {
|
||||
Ok(req) => match req {
|
||||
WorkerRequest::Write(sender_req) => write_req_buffer.push(sender_req),
|
||||
WorkerRequest::Ddl(sender_req) => ddl_req_buffer.push(sender_req),
|
||||
_ => general_req_buffer.push(req),
|
||||
},
|
||||
Ok(request_with_time) => {
|
||||
// Observe the wait time
|
||||
let wait_time = request_with_time.created_at.elapsed();
|
||||
self.request_wait_time.observe(wait_time.as_secs_f64());
|
||||
|
||||
match request_with_time.request {
|
||||
WorkerRequest::Write(sender_req) => write_req_buffer.push(sender_req),
|
||||
WorkerRequest::Ddl(sender_req) => ddl_req_buffer.push(sender_req),
|
||||
req => general_req_buffer.push(req),
|
||||
}
|
||||
}
|
||||
// We still need to handle remaining requests.
|
||||
Err(_) => break,
|
||||
}
|
||||
|
||||
@@ -15,15 +15,11 @@
|
||||
//! Handles bulk insert requests.
|
||||
|
||||
use datatypes::arrow;
|
||||
use datatypes::arrow::array::{
|
||||
TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
|
||||
TimestampSecondArray,
|
||||
};
|
||||
use datatypes::arrow::datatypes::{DataType, TimeUnit};
|
||||
use store_api::logstore::LogStore;
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::region_request::RegionBulkInsertsRequest;
|
||||
|
||||
use crate::error::InconsistentTimestampLengthSnafu;
|
||||
use crate::memtable::bulk::part::BulkPart;
|
||||
use crate::request::{OptionOutputTx, SenderBulkRequest};
|
||||
use crate::worker::RegionWorkerLoop;
|
||||
@@ -41,6 +37,10 @@ impl<S: LogStore> RegionWorkerLoop<S> {
|
||||
.with_label_values(&["process_bulk_req"])
|
||||
.start_timer();
|
||||
let batch = request.payload;
|
||||
if batch.num_rows() == 0 {
|
||||
sender.send(Ok(0));
|
||||
return;
|
||||
}
|
||||
|
||||
let Some((ts_index, ts)) = batch
|
||||
.schema()
|
||||
@@ -60,55 +60,23 @@ impl<S: LogStore> RegionWorkerLoop<S> {
|
||||
return;
|
||||
};
|
||||
|
||||
let DataType::Timestamp(unit, _) = ts.data_type() else {
|
||||
// safety: ts data type must be a timestamp type.
|
||||
unreachable!()
|
||||
};
|
||||
if batch.num_rows() != ts.len() {
|
||||
sender.send(
|
||||
InconsistentTimestampLengthSnafu {
|
||||
expected: batch.num_rows(),
|
||||
actual: ts.len(),
|
||||
}
|
||||
.fail(),
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
let (min_ts, max_ts) = match unit {
|
||||
TimeUnit::Second => {
|
||||
let ts = ts.as_any().downcast_ref::<TimestampSecondArray>().unwrap();
|
||||
(
|
||||
//safety: ts array must contain at least one row so this won't return None.
|
||||
arrow::compute::min(ts).unwrap(),
|
||||
arrow::compute::max(ts).unwrap(),
|
||||
)
|
||||
}
|
||||
// safety: ts data type must be a timestamp type.
|
||||
let (ts_primitive, _) = datatypes::timestamp::timestamp_array_to_primitive(ts).unwrap();
|
||||
|
||||
TimeUnit::Millisecond => {
|
||||
let ts = ts
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampMillisecondArray>()
|
||||
.unwrap();
|
||||
(
|
||||
//safety: ts array must contain at least one row so this won't return None.
|
||||
arrow::compute::min(ts).unwrap(),
|
||||
arrow::compute::max(ts).unwrap(),
|
||||
)
|
||||
}
|
||||
TimeUnit::Microsecond => {
|
||||
let ts = ts
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampMicrosecondArray>()
|
||||
.unwrap();
|
||||
(
|
||||
//safety: ts array must contain at least one row so this won't return None.
|
||||
arrow::compute::min(ts).unwrap(),
|
||||
arrow::compute::max(ts).unwrap(),
|
||||
)
|
||||
}
|
||||
TimeUnit::Nanosecond => {
|
||||
let ts = ts
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampNanosecondArray>()
|
||||
.unwrap();
|
||||
(
|
||||
//safety: ts array must contain at least one row so this won't return None.
|
||||
arrow::compute::min(ts).unwrap(),
|
||||
arrow::compute::max(ts).unwrap(),
|
||||
)
|
||||
}
|
||||
};
|
||||
// safety: we've checked ts.len() == batch.num_rows() and batch is not empty
|
||||
let min_ts = arrow::compute::min(&ts_primitive).unwrap();
|
||||
let max_ts = arrow::compute::max(&ts_primitive).unwrap();
|
||||
|
||||
let part = BulkPart {
|
||||
batch,
|
||||
|
||||
@@ -34,7 +34,7 @@ use crate::region::version::VersionBuilder;
|
||||
use crate::region::{MitoRegionRef, RegionLeaderState, RegionRoleState};
|
||||
use crate::request::{
|
||||
BackgroundNotify, OptionOutputTx, RegionChangeResult, RegionEditRequest, RegionEditResult,
|
||||
RegionSyncRequest, TruncateResult, WorkerRequest,
|
||||
RegionSyncRequest, TruncateResult, WorkerRequest, WorkerRequestWithTime,
|
||||
};
|
||||
use crate::sst::location;
|
||||
use crate::worker::{RegionWorkerLoop, WorkerListener};
|
||||
@@ -230,7 +230,10 @@ impl<S> RegionWorkerLoop<S> {
|
||||
}),
|
||||
};
|
||||
// We don't set state back as the worker loop is already exited.
|
||||
if let Err(res) = request_sender.send(notify).await {
|
||||
if let Err(res) = request_sender
|
||||
.send(WorkerRequestWithTime::new(notify))
|
||||
.await
|
||||
{
|
||||
warn!(
|
||||
"Failed to send region edit result back to the worker, region_id: {}, res: {:?}",
|
||||
region_id, res
|
||||
@@ -318,10 +321,10 @@ impl<S> RegionWorkerLoop<S> {
|
||||
truncated_sequence: truncate.truncated_sequence,
|
||||
};
|
||||
let _ = request_sender
|
||||
.send(WorkerRequest::Background {
|
||||
.send(WorkerRequestWithTime::new(WorkerRequest::Background {
|
||||
region_id: truncate.region_id,
|
||||
notify: BackgroundNotify::Truncate(truncate_result),
|
||||
})
|
||||
}))
|
||||
.await
|
||||
.inspect_err(|_| warn!("failed to send truncate result"));
|
||||
});
|
||||
@@ -364,7 +367,10 @@ impl<S> RegionWorkerLoop<S> {
|
||||
.on_notify_region_change_result_begin(region.region_id)
|
||||
.await;
|
||||
|
||||
if let Err(res) = request_sender.send(notify).await {
|
||||
if let Err(res) = request_sender
|
||||
.send(WorkerRequestWithTime::new(notify))
|
||||
.await
|
||||
{
|
||||
warn!(
|
||||
"Failed to send region change result back to the worker, region_id: {}, res: {:?}",
|
||||
region.region_id, res
|
||||
|
||||
@@ -27,7 +27,9 @@ use store_api::storage::RegionId;
|
||||
|
||||
use crate::error::{InvalidRequestSnafu, RegionStateSnafu, RejectWriteSnafu, Result};
|
||||
use crate::metrics;
|
||||
use crate::metrics::{WRITE_REJECT_TOTAL, WRITE_ROWS_TOTAL, WRITE_STAGE_ELAPSED};
|
||||
use crate::metrics::{
|
||||
WRITE_REJECT_TOTAL, WRITE_ROWS_TOTAL, WRITE_STAGE_ELAPSED, WRITE_STALL_TOTAL,
|
||||
};
|
||||
use crate::region::{RegionLeaderState, RegionRoleState};
|
||||
use crate::region_write_ctx::RegionWriteCtx;
|
||||
use crate::request::{SenderBulkRequest, SenderWriteRequest, WriteRequest};
|
||||
@@ -57,8 +59,9 @@ impl<S: LogStore> RegionWorkerLoop<S> {
|
||||
}
|
||||
|
||||
if self.write_buffer_manager.should_stall() && allow_stall {
|
||||
self.stalled_count
|
||||
.add((write_requests.len() + bulk_requests.len()) as i64);
|
||||
let stalled_count = (write_requests.len() + bulk_requests.len()) as i64;
|
||||
self.stalling_count.add(stalled_count);
|
||||
WRITE_STALL_TOTAL.inc_by(stalled_count as u64);
|
||||
self.stalled_requests.append(write_requests, bulk_requests);
|
||||
self.listener.on_write_stall();
|
||||
return;
|
||||
@@ -161,7 +164,7 @@ impl<S: LogStore> RegionWorkerLoop<S> {
|
||||
pub(crate) async fn handle_stalled_requests(&mut self) {
|
||||
// Handle stalled requests.
|
||||
let stalled = std::mem::take(&mut self.stalled_requests);
|
||||
self.stalled_count.sub(stalled.stalled_count() as i64);
|
||||
self.stalling_count.sub(stalled.stalled_count() as i64);
|
||||
// We already stalled these requests, don't stall them again.
|
||||
for (_, (_, mut requests, mut bulk)) in stalled.requests {
|
||||
self.handle_write_requests(&mut requests, &mut bulk, false)
|
||||
@@ -172,7 +175,7 @@ impl<S: LogStore> RegionWorkerLoop<S> {
|
||||
/// Rejects all stalled requests.
|
||||
pub(crate) fn reject_stalled_requests(&mut self) {
|
||||
let stalled = std::mem::take(&mut self.stalled_requests);
|
||||
self.stalled_count.sub(stalled.stalled_count() as i64);
|
||||
self.stalling_count.sub(stalled.stalled_count() as i64);
|
||||
for (_, (_, mut requests, mut bulk)) in stalled.requests {
|
||||
reject_write_requests(&mut requests, &mut bulk);
|
||||
}
|
||||
@@ -182,7 +185,8 @@ impl<S: LogStore> RegionWorkerLoop<S> {
|
||||
pub(crate) fn reject_region_stalled_requests(&mut self, region_id: &RegionId) {
|
||||
debug!("Rejects stalled requests for region {}", region_id);
|
||||
let (mut requests, mut bulk) = self.stalled_requests.remove(region_id);
|
||||
self.stalled_count.sub((requests.len() + bulk.len()) as i64);
|
||||
self.stalling_count
|
||||
.sub((requests.len() + bulk.len()) as i64);
|
||||
reject_write_requests(&mut requests, &mut bulk);
|
||||
}
|
||||
|
||||
@@ -190,7 +194,8 @@ impl<S: LogStore> RegionWorkerLoop<S> {
|
||||
pub(crate) async fn handle_region_stalled_requests(&mut self, region_id: &RegionId) {
|
||||
debug!("Handles stalled requests for region {}", region_id);
|
||||
let (mut requests, mut bulk) = self.stalled_requests.remove(region_id);
|
||||
self.stalled_count.sub((requests.len() + bulk.len()) as i64);
|
||||
self.stalling_count
|
||||
.sub((requests.len() + bulk.len()) as i64);
|
||||
self.handle_write_requests(&mut requests, &mut bulk, true)
|
||||
.await;
|
||||
}
|
||||
@@ -251,7 +256,8 @@ impl<S> RegionWorkerLoop<S> {
|
||||
"Region {} is altering, add request to pending writes",
|
||||
region.region_id
|
||||
);
|
||||
self.stalled_count.add(1);
|
||||
self.stalling_count.add(1);
|
||||
WRITE_STALL_TOTAL.inc();
|
||||
self.stalled_requests.push(sender_req);
|
||||
continue;
|
||||
}
|
||||
@@ -353,7 +359,8 @@ impl<S> RegionWorkerLoop<S> {
|
||||
"Region {} is altering, add request to pending writes",
|
||||
region.region_id
|
||||
);
|
||||
self.stalled_count.add(1);
|
||||
self.stalling_count.add(1);
|
||||
WRITE_STALL_TOTAL.inc();
|
||||
self.stalled_requests.push_bulk(bulk_req);
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -20,11 +20,7 @@ use api::v1::region::{
|
||||
bulk_insert_request, region_request, BulkInsertRequest, RegionRequest, RegionRequestHeader,
|
||||
};
|
||||
use api::v1::ArrowIpc;
|
||||
use arrow::array::{
|
||||
Array, TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
|
||||
TimestampSecondArray,
|
||||
};
|
||||
use arrow::datatypes::{DataType, Int64Type, TimeUnit};
|
||||
use arrow::array::Array;
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use common_base::AffectedRows;
|
||||
use common_grpc::flight::{FlightDecoder, FlightEncoder, FlightMessage};
|
||||
@@ -62,6 +58,10 @@ impl Inserter {
|
||||
};
|
||||
decode_timer.observe_duration();
|
||||
|
||||
if record_batch.num_rows() == 0 {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
// notify flownode to update dirty timestamps if flow is configured.
|
||||
self.maybe_update_flow_dirty_window(table_info, record_batch.clone());
|
||||
|
||||
@@ -155,6 +155,9 @@ impl Inserter {
|
||||
let mut raw_data_bytes = None;
|
||||
for (peer, masks) in mask_per_datanode {
|
||||
for (region_id, mask) in masks {
|
||||
if mask.select_none() {
|
||||
continue;
|
||||
}
|
||||
let rb = record_batch.clone();
|
||||
let schema_bytes = schema_bytes.clone();
|
||||
let node_manager = self.node_manager.clone();
|
||||
@@ -304,32 +307,11 @@ fn extract_timestamps(rb: &RecordBatch, timestamp_index_name: &str) -> error::Re
|
||||
if rb.num_rows() == 0 {
|
||||
return Ok(vec![]);
|
||||
}
|
||||
let primitive = match ts_col.data_type() {
|
||||
DataType::Timestamp(unit, _) => match unit {
|
||||
TimeUnit::Second => ts_col
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampSecondArray>()
|
||||
.unwrap()
|
||||
.reinterpret_cast::<Int64Type>(),
|
||||
TimeUnit::Millisecond => ts_col
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampMillisecondArray>()
|
||||
.unwrap()
|
||||
.reinterpret_cast::<Int64Type>(),
|
||||
TimeUnit::Microsecond => ts_col
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampMicrosecondArray>()
|
||||
.unwrap()
|
||||
.reinterpret_cast::<Int64Type>(),
|
||||
TimeUnit::Nanosecond => ts_col
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampNanosecondArray>()
|
||||
.unwrap()
|
||||
.reinterpret_cast::<Int64Type>(),
|
||||
},
|
||||
t => {
|
||||
return error::InvalidTimeIndexTypeSnafu { ty: t.clone() }.fail();
|
||||
}
|
||||
};
|
||||
let (primitive, _) =
|
||||
datatypes::timestamp::timestamp_array_to_primitive(ts_col).with_context(|| {
|
||||
error::InvalidTimeIndexTypeSnafu {
|
||||
ty: ts_col.data_type().clone(),
|
||||
}
|
||||
})?;
|
||||
Ok(primitive.iter().flatten().collect())
|
||||
}
|
||||
|
||||
@@ -229,6 +229,7 @@ impl DispatchedTo {
|
||||
pub enum PipelineExecOutput {
|
||||
Transformed(TransformedOutput),
|
||||
DispatchedTo(DispatchedTo, Value),
|
||||
Filtered,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -309,6 +310,10 @@ impl Pipeline {
|
||||
// process
|
||||
for processor in self.processors.iter() {
|
||||
val = processor.exec_mut(val)?;
|
||||
if val.is_null() {
|
||||
// line is filtered
|
||||
return Ok(PipelineExecOutput::Filtered);
|
||||
}
|
||||
}
|
||||
|
||||
// dispatch, fast return if matched
|
||||
@@ -333,9 +338,9 @@ impl Pipeline {
|
||||
table_suffix,
|
||||
}));
|
||||
}
|
||||
// continue v2 process, check ts column and set the rest fields with auto-transform
|
||||
// continue v2 process, and set the rest fields with auto-transform
|
||||
// if transformer presents, then ts has been set
|
||||
values_to_row(schema_info, val, pipeline_ctx, Some(values))?
|
||||
values_to_row(schema_info, val, pipeline_ctx, Some(values), false)?
|
||||
}
|
||||
TransformerMode::AutoTransform(ts_name, time_unit) => {
|
||||
// infer ts from the context
|
||||
@@ -347,7 +352,7 @@ impl Pipeline {
|
||||
));
|
||||
let n_ctx =
|
||||
PipelineContext::new(&def, pipeline_ctx.pipeline_param, pipeline_ctx.channel);
|
||||
values_to_row(schema_info, val, &n_ctx, None)?
|
||||
values_to_row(schema_info, val, &n_ctx, None, true)?
|
||||
}
|
||||
};
|
||||
|
||||
@@ -525,9 +530,6 @@ transform:
|
||||
.into_transformed()
|
||||
.unwrap();
|
||||
|
||||
// println!("[DEBUG]schema_info: {:?}", schema_info.schema);
|
||||
// println!("[DEBUG]re: {:?}", result.0.values);
|
||||
|
||||
assert_eq!(schema_info.schema.len(), result.0.values.len());
|
||||
let test = vec![
|
||||
(
|
||||
|
||||
@@ -19,6 +19,7 @@ pub mod decolorize;
|
||||
pub mod digest;
|
||||
pub mod dissect;
|
||||
pub mod epoch;
|
||||
pub mod filter;
|
||||
pub mod gsub;
|
||||
pub mod join;
|
||||
pub mod json_parse;
|
||||
@@ -54,6 +55,7 @@ use crate::error::{
|
||||
Result, UnsupportedProcessorSnafu,
|
||||
};
|
||||
use crate::etl::field::{Field, Fields};
|
||||
use crate::etl::processor::filter::FilterProcessor;
|
||||
use crate::etl::processor::json_parse::JsonParseProcessor;
|
||||
use crate::etl::processor::select::SelectProcessor;
|
||||
use crate::etl::processor::simple_extract::SimpleExtractProcessor;
|
||||
@@ -146,6 +148,7 @@ pub enum ProcessorKind {
|
||||
Digest(DigestProcessor),
|
||||
Select(SelectProcessor),
|
||||
Vrl(VrlProcessor),
|
||||
Filter(FilterProcessor),
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
@@ -226,6 +229,7 @@ fn parse_processor(doc: &yaml_rust::Yaml) -> Result<ProcessorKind> {
|
||||
}
|
||||
vrl::PROCESSOR_VRL => ProcessorKind::Vrl(VrlProcessor::try_from(value)?),
|
||||
select::PROCESSOR_SELECT => ProcessorKind::Select(SelectProcessor::try_from(value)?),
|
||||
filter::PROCESSOR_FILTER => ProcessorKind::Filter(FilterProcessor::try_from(value)?),
|
||||
_ => return UnsupportedProcessorSnafu { processor: str_key }.fail(),
|
||||
};
|
||||
|
||||
|
||||
242
src/pipeline/src/etl/processor/filter.rs
Normal file
242
src/pipeline/src/etl/processor/filter.rs
Normal file
@@ -0,0 +1,242 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use ahash::{HashSet, HashSetExt};
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::error::{
|
||||
Error, KeyMustBeStringSnafu, ProcessorExpectStringSnafu, ProcessorMissingFieldSnafu, Result,
|
||||
ValueMustBeMapSnafu,
|
||||
};
|
||||
use crate::etl::field::Fields;
|
||||
use crate::etl::processor::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, yaml_strings, FIELDS_NAME, FIELD_NAME,
|
||||
};
|
||||
use crate::{Processor, Value};
|
||||
|
||||
pub(crate) const PROCESSOR_FILTER: &str = "filter";
|
||||
|
||||
const MATCH_MODE_NAME: &str = "mode";
|
||||
const MATCH_OP_NAME: &str = "match_op";
|
||||
const CASE_INSENSITIVE_NAME: &str = "case_insensitive";
|
||||
const TARGETS_NAME: &str = "targets";
|
||||
|
||||
#[derive(Debug)]
|
||||
enum MatchMode {
|
||||
SimpleMatch(MatchOp),
|
||||
}
|
||||
|
||||
impl Default for MatchMode {
|
||||
fn default() -> Self {
|
||||
Self::SimpleMatch(MatchOp::default())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
enum MatchOp {
|
||||
#[default]
|
||||
In,
|
||||
NotIn,
|
||||
}
|
||||
|
||||
/// Filter out the whole line if matches.
|
||||
/// Ultimately it's a condition check, maybe we can use VRL to do more complex check.
|
||||
/// Implement simple string match for now. Can be extended later.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct FilterProcessor {
|
||||
fields: Fields,
|
||||
mode: MatchMode,
|
||||
case_insensitive: bool,
|
||||
targets: HashSet<String>,
|
||||
}
|
||||
|
||||
impl TryFrom<&yaml_rust::yaml::Hash> for FilterProcessor {
|
||||
type Error = Error;
|
||||
|
||||
// match mode can be extended in the future
|
||||
#[allow(clippy::single_match)]
|
||||
fn try_from(value: &yaml_rust::yaml::Hash) -> std::result::Result<Self, Self::Error> {
|
||||
let mut fields = Fields::default();
|
||||
let mut mode = MatchMode::default();
|
||||
let mut op = MatchOp::default();
|
||||
let mut case_insensitive = true;
|
||||
let mut targets = HashSet::new();
|
||||
|
||||
for (k, v) in value.iter() {
|
||||
let key = k
|
||||
.as_str()
|
||||
.with_context(|| KeyMustBeStringSnafu { k: k.clone() })?;
|
||||
match key {
|
||||
FIELD_NAME => fields = Fields::one(yaml_new_field(v, FIELD_NAME)?),
|
||||
FIELDS_NAME => fields = yaml_new_fields(v, FIELDS_NAME)?,
|
||||
MATCH_MODE_NAME => match yaml_string(v, MATCH_MODE_NAME)?.as_str() {
|
||||
"simple" => mode = MatchMode::SimpleMatch(MatchOp::In),
|
||||
_ => {}
|
||||
},
|
||||
MATCH_OP_NAME => match yaml_string(v, MATCH_OP_NAME)?.as_str() {
|
||||
"in" => op = MatchOp::In,
|
||||
"not_in" => op = MatchOp::NotIn,
|
||||
_ => {}
|
||||
},
|
||||
CASE_INSENSITIVE_NAME => case_insensitive = yaml_bool(v, CASE_INSENSITIVE_NAME)?,
|
||||
TARGETS_NAME => {
|
||||
yaml_strings(v, TARGETS_NAME)?
|
||||
.into_iter()
|
||||
.filter(|s| !s.is_empty())
|
||||
.for_each(|s| {
|
||||
targets.insert(s);
|
||||
});
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
if matches!(mode, MatchMode::SimpleMatch(_)) {
|
||||
mode = MatchMode::SimpleMatch(op);
|
||||
}
|
||||
|
||||
if targets.is_empty() {
|
||||
return ProcessorMissingFieldSnafu {
|
||||
processor: PROCESSOR_FILTER,
|
||||
field: TARGETS_NAME.to_string(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
|
||||
if case_insensitive {
|
||||
targets = targets.into_iter().map(|s| s.to_lowercase()).collect();
|
||||
}
|
||||
|
||||
Ok(FilterProcessor {
|
||||
fields,
|
||||
mode,
|
||||
case_insensitive,
|
||||
targets,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl FilterProcessor {
|
||||
fn match_target(&self, input: String) -> bool {
|
||||
let input = if self.case_insensitive {
|
||||
input.to_lowercase()
|
||||
} else {
|
||||
input
|
||||
};
|
||||
|
||||
match &self.mode {
|
||||
MatchMode::SimpleMatch(op) => match op {
|
||||
MatchOp::In => self.targets.contains(&input),
|
||||
MatchOp::NotIn => !self.targets.contains(&input),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Processor for FilterProcessor {
|
||||
fn kind(&self) -> &str {
|
||||
PROCESSOR_FILTER
|
||||
}
|
||||
|
||||
fn ignore_missing(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn exec_mut(&self, mut val: Value) -> Result<Value> {
|
||||
let v_map = val.as_map_mut().context(ValueMustBeMapSnafu)?;
|
||||
|
||||
for field in self.fields.iter() {
|
||||
let index = field.input_field();
|
||||
match v_map.get(index) {
|
||||
Some(Value::String(s)) => {
|
||||
if self.match_target(s.clone()) {
|
||||
return Ok(Value::Null);
|
||||
}
|
||||
}
|
||||
Some(v) => {
|
||||
return ProcessorExpectStringSnafu {
|
||||
processor: self.kind(),
|
||||
v: v.clone(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
None => {}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(val)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use ahash::HashSet;
|
||||
|
||||
use crate::etl::field::{Field, Fields};
|
||||
use crate::etl::processor::filter::{FilterProcessor, MatchMode, MatchOp};
|
||||
use crate::{Map, Processor, Value};
|
||||
|
||||
#[test]
|
||||
fn test_eq() {
|
||||
let processor = FilterProcessor {
|
||||
fields: Fields::one(Field::new("name", None)),
|
||||
mode: MatchMode::SimpleMatch(MatchOp::In),
|
||||
case_insensitive: false,
|
||||
targets: HashSet::from_iter(vec!["John".to_string()]),
|
||||
};
|
||||
|
||||
let val = Value::Map(Map::one("name", Value::String("John".to_string())));
|
||||
|
||||
let result = processor.exec_mut(val).unwrap();
|
||||
assert_eq!(result, Value::Null);
|
||||
|
||||
let val = Value::Map(Map::one("name", Value::String("Wick".to_string())));
|
||||
let expect = val.clone();
|
||||
let result = processor.exec_mut(val).unwrap();
|
||||
assert_eq!(result, expect);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ne() {
|
||||
let processor = FilterProcessor {
|
||||
fields: Fields::one(Field::new("name", None)),
|
||||
mode: MatchMode::SimpleMatch(MatchOp::NotIn),
|
||||
case_insensitive: false,
|
||||
targets: HashSet::from_iter(vec!["John".to_string()]),
|
||||
};
|
||||
|
||||
let val = Value::Map(Map::one("name", Value::String("John".to_string())));
|
||||
let expect = val.clone();
|
||||
let result = processor.exec_mut(val).unwrap();
|
||||
assert_eq!(result, expect);
|
||||
|
||||
let val = Value::Map(Map::one("name", Value::String("Wick".to_string())));
|
||||
let result = processor.exec_mut(val).unwrap();
|
||||
assert_eq!(result, Value::Null);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_case() {
|
||||
let processor = FilterProcessor {
|
||||
fields: Fields::one(Field::new("name", None)),
|
||||
mode: MatchMode::SimpleMatch(MatchOp::In),
|
||||
case_insensitive: true,
|
||||
targets: HashSet::from_iter(vec!["john".to_string()]),
|
||||
};
|
||||
|
||||
let val = Value::Map(Map::one("name", Value::String("JoHN".to_string())));
|
||||
let result = processor.exec_mut(val).unwrap();
|
||||
assert_eq!(result, Value::Null);
|
||||
}
|
||||
}
|
||||
@@ -420,15 +420,17 @@ pub(crate) fn values_to_row(
|
||||
values: Value,
|
||||
pipeline_ctx: &PipelineContext<'_>,
|
||||
row: Option<Vec<GreptimeValue>>,
|
||||
need_calc_ts: bool,
|
||||
) -> Result<Row> {
|
||||
let mut row: Vec<GreptimeValue> =
|
||||
row.unwrap_or_else(|| Vec::with_capacity(schema_info.schema.len()));
|
||||
let custom_ts = pipeline_ctx.pipeline_definition.get_custom_ts();
|
||||
|
||||
// calculate timestamp value based on the channel
|
||||
let ts = calc_ts(pipeline_ctx, &values)?;
|
||||
|
||||
row.push(GreptimeValue { value_data: ts });
|
||||
if need_calc_ts {
|
||||
// calculate timestamp value based on the channel
|
||||
let ts = calc_ts(pipeline_ctx, &values)?;
|
||||
row.push(GreptimeValue { value_data: ts });
|
||||
}
|
||||
|
||||
row.resize(schema_info.schema.len(), GreptimeValue { value_data: None });
|
||||
|
||||
@@ -608,7 +610,7 @@ fn identity_pipeline_inner(
|
||||
skip_error
|
||||
);
|
||||
let row = unwrap_or_continue_if_err!(
|
||||
values_to_row(&mut schema_info, pipeline_map, pipeline_ctx, None),
|
||||
values_to_row(&mut schema_info, pipeline_map, pipeline_ctx, None, true),
|
||||
skip_error
|
||||
);
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod absent;
|
||||
mod empty_metric;
|
||||
mod histogram_fold;
|
||||
mod instant_manipulate;
|
||||
@@ -24,6 +25,7 @@ mod series_divide;
|
||||
mod test_util;
|
||||
mod union_distinct_on;
|
||||
|
||||
pub use absent::{Absent, AbsentExec, AbsentStream};
|
||||
use datafusion::arrow::datatypes::{ArrowPrimitiveType, TimestampMillisecondType};
|
||||
pub use empty_metric::{build_special_time_expr, EmptyMetric, EmptyMetricExec, EmptyMetricStream};
|
||||
pub use histogram_fold::{HistogramFold, HistogramFoldExec, HistogramFoldStream};
|
||||
|
||||
654
src/promql/src/extension_plan/absent.rs
Normal file
654
src/promql/src/extension_plan/absent.rs
Normal file
@@ -0,0 +1,654 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::HashMap;
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
use datafusion::arrow::array::Array;
|
||||
use datafusion::common::{DFSchemaRef, Result as DataFusionResult};
|
||||
use datafusion::execution::context::TaskContext;
|
||||
use datafusion::logical_expr::{Expr, LogicalPlan, UserDefinedLogicalNodeCore};
|
||||
use datafusion::physical_expr::{EquivalenceProperties, LexRequirement, PhysicalSortRequirement};
|
||||
use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType};
|
||||
use datafusion::physical_plan::expressions::Column as ColumnExpr;
|
||||
use datafusion::physical_plan::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
|
||||
use datafusion::physical_plan::{
|
||||
DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, Partitioning, PlanProperties,
|
||||
RecordBatchStream, SendableRecordBatchStream,
|
||||
};
|
||||
use datafusion_common::DFSchema;
|
||||
use datafusion_expr::EmptyRelation;
|
||||
use datatypes::arrow;
|
||||
use datatypes::arrow::array::{ArrayRef, Float64Array, TimestampMillisecondArray};
|
||||
use datatypes::arrow::datatypes::{DataType, Field, SchemaRef, TimeUnit};
|
||||
use datatypes::arrow::record_batch::RecordBatch;
|
||||
use datatypes::arrow_array::StringArray;
|
||||
use datatypes::compute::SortOptions;
|
||||
use futures::{ready, Stream, StreamExt};
|
||||
use greptime_proto::substrait_extension as pb;
|
||||
use prost::Message;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::DeserializeSnafu;
|
||||
use crate::extension_plan::Millisecond;
|
||||
|
||||
/// Maximum number of rows per output batch
|
||||
const ABSENT_BATCH_SIZE: usize = 8192;
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Hash)]
|
||||
pub struct Absent {
|
||||
start: Millisecond,
|
||||
end: Millisecond,
|
||||
step: Millisecond,
|
||||
time_index_column: String,
|
||||
value_column: String,
|
||||
fake_labels: Vec<(String, String)>,
|
||||
input: LogicalPlan,
|
||||
output_schema: DFSchemaRef,
|
||||
}
|
||||
|
||||
impl PartialOrd for Absent {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
// compare on fields except schema and input
|
||||
(
|
||||
self.start,
|
||||
self.end,
|
||||
self.step,
|
||||
&self.time_index_column,
|
||||
&self.value_column,
|
||||
&self.fake_labels,
|
||||
)
|
||||
.partial_cmp(&(
|
||||
other.start,
|
||||
other.end,
|
||||
other.step,
|
||||
&other.time_index_column,
|
||||
&other.value_column,
|
||||
&other.fake_labels,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
impl UserDefinedLogicalNodeCore for Absent {
|
||||
fn name(&self) -> &str {
|
||||
Self::name()
|
||||
}
|
||||
|
||||
fn inputs(&self) -> Vec<&LogicalPlan> {
|
||||
vec![&self.input]
|
||||
}
|
||||
|
||||
fn schema(&self) -> &DFSchemaRef {
|
||||
&self.output_schema
|
||||
}
|
||||
|
||||
fn expressions(&self) -> Vec<Expr> {
|
||||
vec![]
|
||||
}
|
||||
|
||||
fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"PromAbsent: start={}, end={}, step={}",
|
||||
self.start, self.end, self.step
|
||||
)
|
||||
}
|
||||
|
||||
fn with_exprs_and_inputs(
|
||||
&self,
|
||||
_exprs: Vec<Expr>,
|
||||
inputs: Vec<LogicalPlan>,
|
||||
) -> DataFusionResult<Self> {
|
||||
if inputs.is_empty() {
|
||||
return Err(datafusion::error::DataFusionError::Internal(
|
||||
"Absent must have at least one input".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
start: self.start,
|
||||
end: self.end,
|
||||
step: self.step,
|
||||
time_index_column: self.time_index_column.clone(),
|
||||
value_column: self.value_column.clone(),
|
||||
fake_labels: self.fake_labels.clone(),
|
||||
input: inputs[0].clone(),
|
||||
output_schema: self.output_schema.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Absent {
|
||||
pub fn try_new(
|
||||
start: Millisecond,
|
||||
end: Millisecond,
|
||||
step: Millisecond,
|
||||
time_index_column: String,
|
||||
value_column: String,
|
||||
fake_labels: Vec<(String, String)>,
|
||||
input: LogicalPlan,
|
||||
) -> DataFusionResult<Self> {
|
||||
let mut fields = vec![
|
||||
Field::new(
|
||||
&time_index_column,
|
||||
DataType::Timestamp(TimeUnit::Millisecond, None),
|
||||
true,
|
||||
),
|
||||
Field::new(&value_column, DataType::Float64, true),
|
||||
];
|
||||
|
||||
// remove duplicate fake labels
|
||||
let mut fake_labels = fake_labels
|
||||
.into_iter()
|
||||
.collect::<HashMap<String, String>>()
|
||||
.into_iter()
|
||||
.collect::<Vec<_>>();
|
||||
fake_labels.sort_unstable_by(|a, b| a.0.cmp(&b.0));
|
||||
for (name, _) in fake_labels.iter() {
|
||||
fields.push(Field::new(name, DataType::Utf8, true));
|
||||
}
|
||||
|
||||
let output_schema = Arc::new(DFSchema::from_unqualified_fields(
|
||||
fields.into(),
|
||||
HashMap::new(),
|
||||
)?);
|
||||
|
||||
Ok(Self {
|
||||
start,
|
||||
end,
|
||||
step,
|
||||
time_index_column,
|
||||
value_column,
|
||||
fake_labels,
|
||||
input,
|
||||
output_schema,
|
||||
})
|
||||
}
|
||||
|
||||
pub const fn name() -> &'static str {
|
||||
"prom_absent"
|
||||
}
|
||||
|
||||
pub fn to_execution_plan(&self, exec_input: Arc<dyn ExecutionPlan>) -> Arc<dyn ExecutionPlan> {
|
||||
let output_schema = Arc::new(self.output_schema.as_arrow().clone());
|
||||
let properties = PlanProperties::new(
|
||||
EquivalenceProperties::new(output_schema.clone()),
|
||||
Partitioning::UnknownPartitioning(1),
|
||||
EmissionType::Incremental,
|
||||
Boundedness::Bounded,
|
||||
);
|
||||
Arc::new(AbsentExec {
|
||||
start: self.start,
|
||||
end: self.end,
|
||||
step: self.step,
|
||||
time_index_column: self.time_index_column.clone(),
|
||||
value_column: self.value_column.clone(),
|
||||
fake_labels: self.fake_labels.clone(),
|
||||
output_schema: output_schema.clone(),
|
||||
input: exec_input,
|
||||
properties,
|
||||
metric: ExecutionPlanMetricsSet::new(),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn serialize(&self) -> Vec<u8> {
|
||||
pb::Absent {
|
||||
start: self.start,
|
||||
end: self.end,
|
||||
step: self.step,
|
||||
time_index_column: self.time_index_column.clone(),
|
||||
value_column: self.value_column.clone(),
|
||||
fake_labels: self
|
||||
.fake_labels
|
||||
.iter()
|
||||
.map(|(name, value)| pb::LabelPair {
|
||||
key: name.clone(),
|
||||
value: value.clone(),
|
||||
})
|
||||
.collect(),
|
||||
}
|
||||
.encode_to_vec()
|
||||
}
|
||||
|
||||
pub fn deserialize(bytes: &[u8]) -> DataFusionResult<Self> {
|
||||
let pb_absent = pb::Absent::decode(bytes).context(DeserializeSnafu)?;
|
||||
let placeholder_plan = LogicalPlan::EmptyRelation(EmptyRelation {
|
||||
produce_one_row: false,
|
||||
schema: Arc::new(DFSchema::empty()),
|
||||
});
|
||||
Self::try_new(
|
||||
pb_absent.start,
|
||||
pb_absent.end,
|
||||
pb_absent.step,
|
||||
pb_absent.time_index_column,
|
||||
pb_absent.value_column,
|
||||
pb_absent
|
||||
.fake_labels
|
||||
.iter()
|
||||
.map(|label| (label.key.clone(), label.value.clone()))
|
||||
.collect(),
|
||||
placeholder_plan,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct AbsentExec {
|
||||
start: Millisecond,
|
||||
end: Millisecond,
|
||||
step: Millisecond,
|
||||
time_index_column: String,
|
||||
value_column: String,
|
||||
fake_labels: Vec<(String, String)>,
|
||||
output_schema: SchemaRef,
|
||||
input: Arc<dyn ExecutionPlan>,
|
||||
properties: PlanProperties,
|
||||
metric: ExecutionPlanMetricsSet,
|
||||
}
|
||||
|
||||
impl ExecutionPlan for AbsentExec {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.output_schema.clone()
|
||||
}
|
||||
|
||||
fn properties(&self) -> &PlanProperties {
|
||||
&self.properties
|
||||
}
|
||||
|
||||
fn required_input_distribution(&self) -> Vec<Distribution> {
|
||||
vec![Distribution::SinglePartition]
|
||||
}
|
||||
|
||||
fn required_input_ordering(&self) -> Vec<Option<LexRequirement>> {
|
||||
vec![Some(LexRequirement::new(vec![PhysicalSortRequirement {
|
||||
expr: Arc::new(
|
||||
ColumnExpr::new_with_schema(&self.time_index_column, &self.input.schema()).unwrap(),
|
||||
),
|
||||
options: Some(SortOptions {
|
||||
descending: false,
|
||||
nulls_first: false,
|
||||
}),
|
||||
}]))]
|
||||
}
|
||||
|
||||
fn maintains_input_order(&self) -> Vec<bool> {
|
||||
vec![false]
|
||||
}
|
||||
|
||||
fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
|
||||
vec![&self.input]
|
||||
}
|
||||
|
||||
fn with_new_children(
|
||||
self: Arc<Self>,
|
||||
children: Vec<Arc<dyn ExecutionPlan>>,
|
||||
) -> DataFusionResult<Arc<dyn ExecutionPlan>> {
|
||||
assert!(!children.is_empty());
|
||||
Ok(Arc::new(Self {
|
||||
start: self.start,
|
||||
end: self.end,
|
||||
step: self.step,
|
||||
time_index_column: self.time_index_column.clone(),
|
||||
value_column: self.value_column.clone(),
|
||||
fake_labels: self.fake_labels.clone(),
|
||||
output_schema: self.output_schema.clone(),
|
||||
input: children[0].clone(),
|
||||
properties: self.properties.clone(),
|
||||
metric: self.metric.clone(),
|
||||
}))
|
||||
}
|
||||
|
||||
fn execute(
|
||||
&self,
|
||||
partition: usize,
|
||||
context: Arc<TaskContext>,
|
||||
) -> DataFusionResult<SendableRecordBatchStream> {
|
||||
let baseline_metric = BaselineMetrics::new(&self.metric, partition);
|
||||
let input = self.input.execute(partition, context)?;
|
||||
|
||||
Ok(Box::pin(AbsentStream {
|
||||
end: self.end,
|
||||
step: self.step,
|
||||
time_index_column_index: self
|
||||
.input
|
||||
.schema()
|
||||
.column_with_name(&self.time_index_column)
|
||||
.unwrap() // Safety: we have checked the column name in `try_new`
|
||||
.0,
|
||||
output_schema: self.output_schema.clone(),
|
||||
fake_labels: self.fake_labels.clone(),
|
||||
input,
|
||||
metric: baseline_metric,
|
||||
// Buffer for streaming output timestamps
|
||||
output_timestamps: Vec::new(),
|
||||
// Current timestamp in the output range
|
||||
output_ts_cursor: self.start,
|
||||
input_finished: false,
|
||||
}))
|
||||
}
|
||||
|
||||
fn metrics(&self) -> Option<MetricsSet> {
|
||||
Some(self.metric.clone_inner())
|
||||
}
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"AbsentExec"
|
||||
}
|
||||
}
|
||||
|
||||
impl DisplayAs for AbsentExec {
|
||||
fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
match t {
|
||||
DisplayFormatType::Default | DisplayFormatType::Verbose => {
|
||||
write!(
|
||||
f,
|
||||
"PromAbsentExec: start={}, end={}, step={}",
|
||||
self.start, self.end, self.step
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct AbsentStream {
|
||||
end: Millisecond,
|
||||
step: Millisecond,
|
||||
time_index_column_index: usize,
|
||||
output_schema: SchemaRef,
|
||||
fake_labels: Vec<(String, String)>,
|
||||
input: SendableRecordBatchStream,
|
||||
metric: BaselineMetrics,
|
||||
// Buffer for streaming output timestamps
|
||||
output_timestamps: Vec<Millisecond>,
|
||||
// Current timestamp in the output range
|
||||
output_ts_cursor: Millisecond,
|
||||
input_finished: bool,
|
||||
}
|
||||
|
||||
impl RecordBatchStream for AbsentStream {
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.output_schema.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for AbsentStream {
|
||||
type Item = DataFusionResult<RecordBatch>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
loop {
|
||||
if !self.input_finished {
|
||||
match ready!(self.input.poll_next_unpin(cx)) {
|
||||
Some(Ok(batch)) => {
|
||||
let timer = std::time::Instant::now();
|
||||
if let Err(e) = self.process_input_batch(&batch) {
|
||||
return Poll::Ready(Some(Err(e)));
|
||||
}
|
||||
self.metric.elapsed_compute().add_elapsed(timer);
|
||||
|
||||
// If we have enough data for a batch, output it
|
||||
if self.output_timestamps.len() >= ABSENT_BATCH_SIZE {
|
||||
let timer = std::time::Instant::now();
|
||||
let result = self.flush_output_batch();
|
||||
self.metric.elapsed_compute().add_elapsed(timer);
|
||||
|
||||
match result {
|
||||
Ok(Some(batch)) => return Poll::Ready(Some(Ok(batch))),
|
||||
Ok(None) => continue,
|
||||
Err(e) => return Poll::Ready(Some(Err(e))),
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(Err(e)) => return Poll::Ready(Some(Err(e))),
|
||||
None => {
|
||||
self.input_finished = true;
|
||||
|
||||
let timer = std::time::Instant::now();
|
||||
// Process any remaining absent timestamps
|
||||
if let Err(e) = self.process_remaining_absent_timestamps() {
|
||||
return Poll::Ready(Some(Err(e)));
|
||||
}
|
||||
let result = self.flush_output_batch();
|
||||
self.metric.elapsed_compute().add_elapsed(timer);
|
||||
return Poll::Ready(result.transpose());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return Poll::Ready(None);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl AbsentStream {
|
||||
fn process_input_batch(&mut self, batch: &RecordBatch) -> DataFusionResult<()> {
|
||||
// Extract timestamps from this batch
|
||||
let timestamp_array = batch.column(self.time_index_column_index);
|
||||
let milli_ts_array = arrow::compute::cast(
|
||||
timestamp_array,
|
||||
&DataType::Timestamp(TimeUnit::Millisecond, None),
|
||||
)?;
|
||||
let timestamp_array = milli_ts_array
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampMillisecondArray>()
|
||||
.unwrap();
|
||||
|
||||
// Process against current output cursor position
|
||||
for &input_ts in timestamp_array.values() {
|
||||
// Generate absent timestamps up to this input timestamp
|
||||
while self.output_ts_cursor < input_ts && self.output_ts_cursor <= self.end {
|
||||
self.output_timestamps.push(self.output_ts_cursor);
|
||||
self.output_ts_cursor += self.step;
|
||||
}
|
||||
|
||||
// Skip the input timestamp if it matches our cursor
|
||||
if self.output_ts_cursor == input_ts {
|
||||
self.output_ts_cursor += self.step;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn process_remaining_absent_timestamps(&mut self) -> DataFusionResult<()> {
|
||||
// Generate all remaining absent timestamps (input is finished)
|
||||
while self.output_ts_cursor <= self.end {
|
||||
self.output_timestamps.push(self.output_ts_cursor);
|
||||
self.output_ts_cursor += self.step;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn flush_output_batch(&mut self) -> DataFusionResult<Option<RecordBatch>> {
|
||||
if self.output_timestamps.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let mut columns: Vec<ArrayRef> = Vec::with_capacity(self.output_schema.fields().len());
|
||||
let num_rows = self.output_timestamps.len();
|
||||
columns.push(Arc::new(TimestampMillisecondArray::from(
|
||||
self.output_timestamps.clone(),
|
||||
)) as _);
|
||||
columns.push(Arc::new(Float64Array::from(vec![1.0; num_rows])) as _);
|
||||
|
||||
for (_, value) in self.fake_labels.iter() {
|
||||
columns.push(Arc::new(StringArray::from_iter(std::iter::repeat_n(
|
||||
Some(value.clone()),
|
||||
num_rows,
|
||||
))) as _);
|
||||
}
|
||||
|
||||
let batch = RecordBatch::try_new(self.output_schema.clone(), columns)?;
|
||||
|
||||
self.output_timestamps.clear();
|
||||
Ok(Some(batch))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit};
|
||||
use datafusion::arrow::record_batch::RecordBatch;
|
||||
use datafusion::physical_plan::memory::MemoryExec;
|
||||
use datafusion::prelude::SessionContext;
|
||||
use datatypes::arrow::array::{Float64Array, TimestampMillisecondArray};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_absent_basic() {
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new(
|
||||
"timestamp",
|
||||
DataType::Timestamp(TimeUnit::Millisecond, None),
|
||||
true,
|
||||
),
|
||||
Field::new("value", DataType::Float64, true),
|
||||
]));
|
||||
|
||||
// Input has timestamps: 0, 2000, 4000
|
||||
let timestamp_array = Arc::new(TimestampMillisecondArray::from(vec![0, 2000, 4000]));
|
||||
let value_array = Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0]));
|
||||
let batch =
|
||||
RecordBatch::try_new(schema.clone(), vec![timestamp_array, value_array]).unwrap();
|
||||
|
||||
let memory_exec = MemoryExec::try_new(&[vec![batch]], schema, None).unwrap();
|
||||
|
||||
let output_schema = Arc::new(Schema::new(vec![
|
||||
Field::new(
|
||||
"timestamp",
|
||||
DataType::Timestamp(TimeUnit::Millisecond, None),
|
||||
true,
|
||||
),
|
||||
Field::new("value", DataType::Float64, true),
|
||||
]));
|
||||
|
||||
let absent_exec = AbsentExec {
|
||||
start: 0,
|
||||
end: 5000,
|
||||
step: 1000,
|
||||
time_index_column: "timestamp".to_string(),
|
||||
value_column: "value".to_string(),
|
||||
fake_labels: vec![],
|
||||
output_schema: output_schema.clone(),
|
||||
input: Arc::new(memory_exec),
|
||||
properties: PlanProperties::new(
|
||||
EquivalenceProperties::new(output_schema.clone()),
|
||||
Partitioning::UnknownPartitioning(1),
|
||||
EmissionType::Incremental,
|
||||
Boundedness::Bounded,
|
||||
),
|
||||
metric: ExecutionPlanMetricsSet::new(),
|
||||
};
|
||||
|
||||
let session_ctx = SessionContext::new();
|
||||
let task_ctx = session_ctx.task_ctx();
|
||||
let mut stream = absent_exec.execute(0, task_ctx).unwrap();
|
||||
|
||||
// Collect all output batches
|
||||
let mut output_timestamps = Vec::new();
|
||||
while let Some(batch_result) = stream.next().await {
|
||||
let batch = batch_result.unwrap();
|
||||
let ts_array = batch
|
||||
.column(0)
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampMillisecondArray>()
|
||||
.unwrap();
|
||||
for i in 0..ts_array.len() {
|
||||
if !ts_array.is_null(i) {
|
||||
let ts = ts_array.value(i);
|
||||
output_timestamps.push(ts);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Should output absent timestamps: 1000, 3000, 5000
|
||||
// (0, 2000, 4000 exist in input, so 1000, 3000, 5000 are absent)
|
||||
assert_eq!(output_timestamps, vec![1000, 3000, 5000]);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_absent_empty_input() {
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new(
|
||||
"timestamp",
|
||||
DataType::Timestamp(TimeUnit::Millisecond, None),
|
||||
true,
|
||||
),
|
||||
Field::new("value", DataType::Float64, true),
|
||||
]));
|
||||
|
||||
// Empty input
|
||||
let memory_exec = MemoryExec::try_new(&[vec![]], schema, None).unwrap();
|
||||
|
||||
let output_schema = Arc::new(Schema::new(vec![
|
||||
Field::new(
|
||||
"timestamp",
|
||||
DataType::Timestamp(TimeUnit::Millisecond, None),
|
||||
true,
|
||||
),
|
||||
Field::new("value", DataType::Float64, true),
|
||||
]));
|
||||
let absent_exec = AbsentExec {
|
||||
start: 0,
|
||||
end: 2000,
|
||||
step: 1000,
|
||||
time_index_column: "timestamp".to_string(),
|
||||
value_column: "value".to_string(),
|
||||
fake_labels: vec![],
|
||||
output_schema: output_schema.clone(),
|
||||
input: Arc::new(memory_exec),
|
||||
properties: PlanProperties::new(
|
||||
EquivalenceProperties::new(output_schema.clone()),
|
||||
Partitioning::UnknownPartitioning(1),
|
||||
EmissionType::Incremental,
|
||||
Boundedness::Bounded,
|
||||
),
|
||||
metric: ExecutionPlanMetricsSet::new(),
|
||||
};
|
||||
|
||||
let session_ctx = SessionContext::new();
|
||||
let task_ctx = session_ctx.task_ctx();
|
||||
let mut stream = absent_exec.execute(0, task_ctx).unwrap();
|
||||
|
||||
// Collect all output timestamps
|
||||
let mut output_timestamps = Vec::new();
|
||||
while let Some(batch_result) = stream.next().await {
|
||||
let batch = batch_result.unwrap();
|
||||
let ts_array = batch
|
||||
.column(0)
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampMillisecondArray>()
|
||||
.unwrap();
|
||||
for i in 0..ts_array.len() {
|
||||
if !ts_array.is_null(i) {
|
||||
let ts = ts_array.value(i);
|
||||
output_timestamps.push(ts);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Should output all timestamps in range: 0, 1000, 2000
|
||||
assert_eq!(output_timestamps, vec![0, 1000, 2000]);
|
||||
}
|
||||
}
|
||||
@@ -22,8 +22,8 @@ use datafusion::physical_plan::ExecutionPlan;
|
||||
use datafusion::physical_planner::{ExtensionPlanner, PhysicalPlanner};
|
||||
|
||||
use crate::extension_plan::{
|
||||
EmptyMetric, HistogramFold, InstantManipulate, RangeManipulate, ScalarCalculate, SeriesDivide,
|
||||
SeriesNormalize, UnionDistinctOn,
|
||||
Absent, EmptyMetric, HistogramFold, InstantManipulate, RangeManipulate, ScalarCalculate,
|
||||
SeriesDivide, SeriesNormalize, UnionDistinctOn,
|
||||
};
|
||||
|
||||
pub struct PromExtensionPlanner;
|
||||
@@ -57,6 +57,8 @@ impl ExtensionPlanner for PromExtensionPlanner {
|
||||
physical_inputs[0].clone(),
|
||||
physical_inputs[1].clone(),
|
||||
)))
|
||||
} else if let Some(node) = node.as_any().downcast_ref::<Absent>() {
|
||||
Ok(Some(node.to_execution_plan(physical_inputs[0].clone())))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
@@ -340,7 +340,14 @@ impl ExecutionPlan for RangeManipulateExec {
|
||||
}
|
||||
|
||||
fn required_input_distribution(&self) -> Vec<Distribution> {
|
||||
self.input.required_input_distribution()
|
||||
let input_requirement = self.input.required_input_distribution();
|
||||
if input_requirement.is_empty() {
|
||||
// if the input is EmptyMetric, its required_input_distribution() is empty so we can't
|
||||
// use its input distribution.
|
||||
vec![Distribution::UnspecifiedDistribution]
|
||||
} else {
|
||||
input_requirement
|
||||
}
|
||||
}
|
||||
|
||||
fn with_new_children(
|
||||
|
||||
@@ -237,7 +237,8 @@ fn create_output_batch(
|
||||
for (node, metric) in sub_stage_metrics.into_iter().enumerate() {
|
||||
builder.append_metric(1, node as _, metrics_to_string(metric, format)?);
|
||||
}
|
||||
return Ok(TreeNodeRecursion::Stop);
|
||||
// might have multiple merge scans, so continue
|
||||
return Ok(TreeNodeRecursion::Continue);
|
||||
}
|
||||
Ok(TreeNodeRecursion::Continue)
|
||||
})?;
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_telemetry::debug;
|
||||
@@ -38,6 +38,13 @@ use crate::dist_plan::merge_scan::MergeScanLogicalPlan;
|
||||
use crate::plan::ExtractExpr;
|
||||
use crate::query_engine::DefaultSerializer;
|
||||
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
|
||||
mod utils;
|
||||
|
||||
pub(crate) use utils::{AliasMapping, AliasTracker};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct DistPlannerAnalyzer;
|
||||
|
||||
@@ -154,8 +161,50 @@ struct PlanRewriter {
|
||||
status: RewriterStatus,
|
||||
/// Partition columns of the table in current pass
|
||||
partition_cols: Option<Vec<String>>,
|
||||
column_requirements: HashSet<Column>,
|
||||
alias_tracker: Option<AliasTracker>,
|
||||
/// use stack count as scope to determine column requirements is needed or not
|
||||
/// i.e for a logical plan like:
|
||||
/// ```ignore
|
||||
/// 1: Projection: t.number
|
||||
/// 2: Sort: t.pk1+t.pk2
|
||||
/// 3. Projection: t.number, t.pk1, t.pk2
|
||||
/// ```
|
||||
/// `Sort` will make a column requirement for `t.pk1` at level 2.
|
||||
/// Which making `Projection` at level 1 need to add a ref to `t.pk1` as well.
|
||||
/// So that the expanded plan will be
|
||||
/// ```ignore
|
||||
/// Projection: t.number
|
||||
/// MergeSort: t.pk1
|
||||
/// MergeScan: remote_input=
|
||||
/// Projection: t.number, "t.pk1+t.pk2" <--- the original `Projection` at level 1 get added with `t.pk1+t.pk2`
|
||||
/// Sort: t.pk1+t.pk2
|
||||
/// Projection: t.number, t.pk1, t.pk2
|
||||
/// ```
|
||||
/// Making `MergeSort` can have `t.pk1` as input.
|
||||
/// Meanwhile `Projection` at level 3 doesn't need to add any new column because 3 > 2
|
||||
/// and col requirements at level 2 is not applicable for level 3.
|
||||
///
|
||||
/// see more details in test `expand_proj_step_aggr` and `expand_proj_sort_proj`
|
||||
///
|
||||
/// TODO(discord9): a simpler solution to track column requirements for merge scan
|
||||
column_requirements: Vec<(HashSet<Column>, usize)>,
|
||||
/// Whether to expand on next call
|
||||
/// This is used to handle the case where a plan is transformed, but need to be expanded from it's
|
||||
/// parent node. For example a Aggregate plan is split into two parts in frontend and datanode, and need
|
||||
/// to be expanded from the parent node of the Aggregate plan.
|
||||
expand_on_next_call: bool,
|
||||
/// Expanding on next partial/conditional/transformed commutative plan
|
||||
/// This is used to handle the case where a plan is transformed, but still
|
||||
/// need to push down as many node as possible before next partial/conditional/transformed commutative
|
||||
/// plan. I.e.
|
||||
/// ```ignore
|
||||
/// Limit:
|
||||
/// Sort:
|
||||
/// ```
|
||||
/// where `Limit` is partial commutative, and `Sort` is conditional commutative.
|
||||
/// In this case, we need to expand the `Limit` plan,
|
||||
/// so that we can push down the `Sort` plan as much as possible.
|
||||
expand_on_next_part_cond_trans_commutative: bool,
|
||||
new_child_plan: Option<LogicalPlan>,
|
||||
}
|
||||
|
||||
@@ -171,21 +220,57 @@ impl PlanRewriter {
|
||||
|
||||
/// Return true if should stop and expand. The input plan is the parent node of current node
|
||||
fn should_expand(&mut self, plan: &LogicalPlan) -> bool {
|
||||
debug!(
|
||||
"Check should_expand at level: {} with Stack:\n{}, ",
|
||||
self.level,
|
||||
self.stack
|
||||
.iter()
|
||||
.map(|(p, l)| format!("{l}:{}{}", " ".repeat(l - 1), p.display()))
|
||||
.collect::<Vec<String>>()
|
||||
.join("\n"),
|
||||
);
|
||||
if DFLogicalSubstraitConvertor
|
||||
.encode(plan, DefaultSerializer)
|
||||
.is_err()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if self.expand_on_next_call {
|
||||
self.expand_on_next_call = false;
|
||||
return true;
|
||||
}
|
||||
match Categorizer::check_plan(plan, self.partition_cols.clone()) {
|
||||
|
||||
if self.expand_on_next_part_cond_trans_commutative {
|
||||
let comm = Categorizer::check_plan(plan, self.get_aliased_partition_columns());
|
||||
match comm {
|
||||
Commutativity::PartialCommutative => {
|
||||
// a small difference is that for partial commutative, we still need to
|
||||
// push down it(so `Limit` can be pushed down)
|
||||
|
||||
// notice how limit needed to be expanded as well to make sure query is correct
|
||||
// i.e. `Limit fetch=10` need to be pushed down to the leaf node
|
||||
self.expand_on_next_part_cond_trans_commutative = false;
|
||||
self.expand_on_next_call = true;
|
||||
}
|
||||
Commutativity::ConditionalCommutative(_)
|
||||
| Commutativity::TransformedCommutative { .. } => {
|
||||
// again a new node that can be push down, we should just
|
||||
// do push down now and avoid further expansion
|
||||
self.expand_on_next_part_cond_trans_commutative = false;
|
||||
return true;
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
match Categorizer::check_plan(plan, self.get_aliased_partition_columns()) {
|
||||
Commutativity::Commutative => {}
|
||||
Commutativity::PartialCommutative => {
|
||||
if let Some(plan) = partial_commutative_transformer(plan) {
|
||||
self.update_column_requirements(&plan);
|
||||
// notice this plan is parent of current node, so `self.level - 1` when updating column requirements
|
||||
self.update_column_requirements(&plan, self.level - 1);
|
||||
self.expand_on_next_part_cond_trans_commutative = true;
|
||||
self.stage.push(plan)
|
||||
}
|
||||
}
|
||||
@@ -193,7 +278,9 @@ impl PlanRewriter {
|
||||
if let Some(transformer) = transformer
|
||||
&& let Some(plan) = transformer(plan)
|
||||
{
|
||||
self.update_column_requirements(&plan);
|
||||
// notice this plan is parent of current node, so `self.level - 1` when updating column requirements
|
||||
self.update_column_requirements(&plan, self.level - 1);
|
||||
self.expand_on_next_part_cond_trans_commutative = true;
|
||||
self.stage.push(plan)
|
||||
}
|
||||
}
|
||||
@@ -202,12 +289,22 @@ impl PlanRewriter {
|
||||
&& let Some(transformer_actions) = transformer(plan)
|
||||
{
|
||||
debug!(
|
||||
"PlanRewriter: transformed plan: {:#?}\n from {plan}",
|
||||
transformer_actions.extra_parent_plans
|
||||
"PlanRewriter: transformed plan: {}\n from {plan}",
|
||||
transformer_actions
|
||||
.extra_parent_plans
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, p)| format!(
|
||||
"Extra {i}-th parent plan from parent to child = {}",
|
||||
p.display()
|
||||
))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n")
|
||||
);
|
||||
if let Some(last_stage) = transformer_actions.extra_parent_plans.last() {
|
||||
// update the column requirements from the last stage
|
||||
self.update_column_requirements(last_stage);
|
||||
// notice current plan's parent plan is where we need to apply the column requirements
|
||||
self.update_column_requirements(last_stage, self.level - 1);
|
||||
}
|
||||
self.stage
|
||||
.extend(transformer_actions.extra_parent_plans.into_iter().rev());
|
||||
@@ -225,16 +322,25 @@ impl PlanRewriter {
|
||||
false
|
||||
}
|
||||
|
||||
fn update_column_requirements(&mut self, plan: &LogicalPlan) {
|
||||
/// Update the column requirements for the current plan, plan_level is the level of the plan
|
||||
/// in the stack, which is used to determine if the column requirements are applicable
|
||||
/// for other plans in the stack.
|
||||
fn update_column_requirements(&mut self, plan: &LogicalPlan, plan_level: usize) {
|
||||
debug!(
|
||||
"PlanRewriter: update column requirements for plan: {plan}\n with old column_requirements: {:?}",
|
||||
self.column_requirements
|
||||
);
|
||||
let mut container = HashSet::new();
|
||||
for expr in plan.expressions() {
|
||||
// this method won't fail
|
||||
let _ = expr_to_columns(&expr, &mut container);
|
||||
}
|
||||
|
||||
for col in container {
|
||||
self.column_requirements.insert(col);
|
||||
}
|
||||
self.column_requirements.push((container, plan_level));
|
||||
debug!(
|
||||
"PlanRewriter: updated column requirements: {:?}",
|
||||
self.column_requirements
|
||||
);
|
||||
}
|
||||
|
||||
fn is_expanded(&self) -> bool {
|
||||
@@ -249,6 +355,45 @@ impl PlanRewriter {
|
||||
self.status = RewriterStatus::Unexpanded;
|
||||
}
|
||||
|
||||
/// Maybe update alias for original table columns in the plan
|
||||
fn maybe_update_alias(&mut self, node: &LogicalPlan) {
|
||||
if let Some(alias_tracker) = &mut self.alias_tracker {
|
||||
alias_tracker.update_alias(node);
|
||||
debug!(
|
||||
"Current partition columns are: {:?}",
|
||||
self.get_aliased_partition_columns()
|
||||
);
|
||||
} else if let LogicalPlan::TableScan(table_scan) = node {
|
||||
self.alias_tracker = AliasTracker::new(table_scan);
|
||||
debug!(
|
||||
"Initialize partition columns: {:?} with table={}",
|
||||
self.get_aliased_partition_columns(),
|
||||
table_scan.table_name
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn get_aliased_partition_columns(&self) -> Option<AliasMapping> {
|
||||
if let Some(part_cols) = self.partition_cols.as_ref() {
|
||||
let Some(alias_tracker) = &self.alias_tracker else {
|
||||
// no alias tracker meaning no table scan encountered
|
||||
return None;
|
||||
};
|
||||
let mut aliased = HashMap::new();
|
||||
for part_col in part_cols {
|
||||
let all_alias = alias_tracker
|
||||
.get_all_alias_for_col(part_col)
|
||||
.cloned()
|
||||
.unwrap_or_default();
|
||||
|
||||
aliased.insert(part_col.clone(), all_alias);
|
||||
}
|
||||
Some(aliased)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn maybe_set_partitions(&mut self, plan: &LogicalPlan) {
|
||||
if self.partition_cols.is_some() {
|
||||
// only need to set once
|
||||
@@ -294,10 +439,15 @@ impl PlanRewriter {
|
||||
}
|
||||
// store schema before expand
|
||||
let schema = on_node.schema().clone();
|
||||
let mut rewriter = EnforceDistRequirementRewriter {
|
||||
column_requirements: std::mem::take(&mut self.column_requirements),
|
||||
};
|
||||
let mut rewriter = EnforceDistRequirementRewriter::new(
|
||||
std::mem::take(&mut self.column_requirements),
|
||||
self.level,
|
||||
);
|
||||
debug!("PlanRewriter: enforce column requirements for node: {on_node} with rewriter: {rewriter:?}");
|
||||
on_node = on_node.rewrite(&mut rewriter)?.data;
|
||||
debug!(
|
||||
"PlanRewriter: after enforced column requirements for node: {on_node} with rewriter: {rewriter:?}"
|
||||
);
|
||||
|
||||
// add merge scan as the new root
|
||||
let mut node = MergeScanLogicalPlan::new(
|
||||
@@ -316,7 +466,8 @@ impl PlanRewriter {
|
||||
}
|
||||
self.set_expanded();
|
||||
|
||||
// recover the schema
|
||||
// recover the schema, this make sure after expand the schema is the same as old node
|
||||
// because after expand the raw top node might have extra columns i.e. sorting columns for `Sort` node
|
||||
let node = LogicalPlanBuilder::from(node)
|
||||
.project(schema.iter().map(|(qualifier, field)| {
|
||||
Expr::Column(Column::new(qualifier.cloned(), field.name()))
|
||||
@@ -333,42 +484,96 @@ impl PlanRewriter {
|
||||
/// Requirements enforced by this rewriter:
|
||||
/// - Enforce column requirements for `LogicalPlan::Projection` nodes. Makes sure the
|
||||
/// required columns are available in the sub plan.
|
||||
///
|
||||
#[derive(Debug)]
|
||||
struct EnforceDistRequirementRewriter {
|
||||
column_requirements: HashSet<Column>,
|
||||
/// only enforce column requirements after the expanding node in question,
|
||||
/// meaning only for node with `cur_level` <= `level` will consider adding those column requirements
|
||||
/// TODO(discord9): a simpler solution to track column requirements for merge scan
|
||||
column_requirements: Vec<(HashSet<Column>, usize)>,
|
||||
/// only apply column requirements >= `cur_level`
|
||||
/// this is used to avoid applying column requirements that are not needed
|
||||
/// for the current node, i.e. the node is not in the scope of the column requirements
|
||||
/// i.e, for this plan:
|
||||
/// ```ignore
|
||||
/// Aggregate: min(t.number)
|
||||
/// Projection: t.number
|
||||
/// ```
|
||||
/// when on `Projection` node, we don't need to apply the column requirements of `Aggregate` node
|
||||
/// because the `Projection` node is not in the scope of the `Aggregate` node
|
||||
cur_level: usize,
|
||||
}
|
||||
|
||||
impl EnforceDistRequirementRewriter {
|
||||
fn new(column_requirements: Vec<(HashSet<Column>, usize)>, cur_level: usize) -> Self {
|
||||
Self {
|
||||
column_requirements,
|
||||
cur_level,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TreeNodeRewriter for EnforceDistRequirementRewriter {
|
||||
type Node = LogicalPlan;
|
||||
|
||||
fn f_down(&mut self, node: Self::Node) -> DfResult<Transformed<Self::Node>> {
|
||||
if let LogicalPlan::Projection(ref projection) = node {
|
||||
let mut column_requirements = std::mem::take(&mut self.column_requirements);
|
||||
if column_requirements.is_empty() {
|
||||
return Ok(Transformed::no(node));
|
||||
}
|
||||
|
||||
for expr in &projection.expr {
|
||||
let (qualifier, name) = expr.qualified_name();
|
||||
let column = Column::new(qualifier, name);
|
||||
column_requirements.remove(&column);
|
||||
}
|
||||
if column_requirements.is_empty() {
|
||||
return Ok(Transformed::no(node));
|
||||
}
|
||||
|
||||
let mut new_exprs = projection.expr.clone();
|
||||
for col in &column_requirements {
|
||||
new_exprs.push(Expr::Column(col.clone()));
|
||||
}
|
||||
let new_node =
|
||||
node.with_new_exprs(new_exprs, node.inputs().into_iter().cloned().collect())?;
|
||||
return Ok(Transformed::yes(new_node));
|
||||
// check that node doesn't have multiple children, i.e. join/subquery
|
||||
if node.inputs().len() > 1 {
|
||||
return Err(datafusion_common::DataFusionError::Internal(
|
||||
"EnforceDistRequirementRewriter: node with multiple inputs is not supported"
|
||||
.to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
self.cur_level += 1;
|
||||
Ok(Transformed::no(node))
|
||||
}
|
||||
|
||||
fn f_up(&mut self, node: Self::Node) -> DfResult<Transformed<Self::Node>> {
|
||||
self.cur_level -= 1;
|
||||
// first get all applicable column requirements
|
||||
let mut applicable_column_requirements = self
|
||||
.column_requirements
|
||||
.iter()
|
||||
.filter(|(_, level)| *level >= self.cur_level)
|
||||
.map(|(cols, _)| cols.clone())
|
||||
.reduce(|mut acc, cols| {
|
||||
acc.extend(cols);
|
||||
acc
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
debug!(
|
||||
"EnforceDistRequirementRewriter: applicable column requirements at level {} = {:?} for node {}",
|
||||
self.cur_level,
|
||||
applicable_column_requirements,
|
||||
node.display()
|
||||
);
|
||||
|
||||
// make sure all projection applicable scope has the required columns
|
||||
if let LogicalPlan::Projection(ref projection) = node {
|
||||
for expr in &projection.expr {
|
||||
let (qualifier, name) = expr.qualified_name();
|
||||
let column = Column::new(qualifier, name);
|
||||
applicable_column_requirements.remove(&column);
|
||||
}
|
||||
if applicable_column_requirements.is_empty() {
|
||||
return Ok(Transformed::no(node));
|
||||
}
|
||||
|
||||
let mut new_exprs = projection.expr.clone();
|
||||
for col in &applicable_column_requirements {
|
||||
new_exprs.push(Expr::Column(col.clone()));
|
||||
}
|
||||
let new_node =
|
||||
node.with_new_exprs(new_exprs, node.inputs().into_iter().cloned().collect())?;
|
||||
debug!(
|
||||
"EnforceDistRequirementRewriter: added missing columns {:?} to projection node from old node: \n{node}\n Making new node: \n{new_node}",
|
||||
applicable_column_requirements
|
||||
);
|
||||
|
||||
// still need to continue for next projection if applicable
|
||||
return Ok(Transformed::yes(new_node));
|
||||
}
|
||||
Ok(Transformed::no(node))
|
||||
}
|
||||
}
|
||||
@@ -384,6 +589,7 @@ impl TreeNodeRewriter for PlanRewriter {
|
||||
self.stage.clear();
|
||||
self.set_unexpanded();
|
||||
self.partition_cols = None;
|
||||
self.alias_tracker = None;
|
||||
Ok(Transformed::no(node))
|
||||
}
|
||||
|
||||
@@ -406,8 +612,19 @@ impl TreeNodeRewriter for PlanRewriter {
|
||||
|
||||
self.maybe_set_partitions(&node);
|
||||
|
||||
self.maybe_update_alias(&node);
|
||||
|
||||
let Some(parent) = self.get_parent() else {
|
||||
let node = self.expand(node)?;
|
||||
debug!("Plan Rewriter: expand now for no parent found for node: {node}");
|
||||
let node = self.expand(node);
|
||||
debug!(
|
||||
"PlanRewriter: expanded plan: {}",
|
||||
match &node {
|
||||
Ok(n) => n.to_string(),
|
||||
Err(e) => format!("Error expanding plan: {e}"),
|
||||
}
|
||||
);
|
||||
let node = node?;
|
||||
self.pop_stack();
|
||||
return Ok(Transformed::yes(node));
|
||||
};
|
||||
@@ -435,160 +652,3 @@ impl TreeNodeRewriter for PlanRewriter {
|
||||
Ok(Transformed::no(node))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::sync::Arc;
|
||||
|
||||
use datafusion::datasource::DefaultTableSource;
|
||||
use datafusion::functions_aggregate::expr_fn::avg;
|
||||
use datafusion_common::JoinType;
|
||||
use datafusion_expr::{col, lit, Expr, LogicalPlanBuilder};
|
||||
use table::table::adapter::DfTableProviderAdapter;
|
||||
use table::table::numbers::NumbersTable;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[ignore = "Projection is disabled for https://github.com/apache/arrow-datafusion/issues/6489"]
|
||||
#[test]
|
||||
fn transform_simple_projection_filter() {
|
||||
let numbers_table = NumbersTable::table(0);
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(numbers_table),
|
||||
)));
|
||||
|
||||
let plan = LogicalPlanBuilder::scan_with_filters("t", table_source, None, vec![])
|
||||
.unwrap()
|
||||
.filter(col("number").lt(lit(10)))
|
||||
.unwrap()
|
||||
.project(vec![col("number")])
|
||||
.unwrap()
|
||||
.distinct()
|
||||
.unwrap()
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let config = ConfigOptions::default();
|
||||
let result = DistPlannerAnalyzer {}.analyze(plan, &config).unwrap();
|
||||
let expected = [
|
||||
"Distinct:",
|
||||
" MergeScan [is_placeholder=false]",
|
||||
" Distinct:",
|
||||
" Projection: t.number",
|
||||
" Filter: t.number < Int32(10)",
|
||||
" TableScan: t",
|
||||
]
|
||||
.join("\n");
|
||||
assert_eq!(expected, result.to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn transform_aggregator() {
|
||||
let numbers_table = NumbersTable::table(0);
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(numbers_table),
|
||||
)));
|
||||
|
||||
let plan = LogicalPlanBuilder::scan_with_filters("t", table_source, None, vec![])
|
||||
.unwrap()
|
||||
.aggregate(Vec::<Expr>::new(), vec![avg(col("number"))])
|
||||
.unwrap()
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let config = ConfigOptions::default();
|
||||
let result = DistPlannerAnalyzer {}.analyze(plan, &config).unwrap();
|
||||
let expected = "Projection: avg(t.number)\
|
||||
\n MergeScan [is_placeholder=false]";
|
||||
assert_eq!(expected, result.to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn transform_distinct_order() {
|
||||
let numbers_table = NumbersTable::table(0);
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(numbers_table),
|
||||
)));
|
||||
|
||||
let plan = LogicalPlanBuilder::scan_with_filters("t", table_source, None, vec![])
|
||||
.unwrap()
|
||||
.distinct()
|
||||
.unwrap()
|
||||
.sort(vec![col("number").sort(true, false)])
|
||||
.unwrap()
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let config = ConfigOptions::default();
|
||||
let result = DistPlannerAnalyzer {}.analyze(plan, &config).unwrap();
|
||||
let expected = ["Projection: t.number", " MergeScan [is_placeholder=false]"].join("\n");
|
||||
assert_eq!(expected, result.to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn transform_single_limit() {
|
||||
let numbers_table = NumbersTable::table(0);
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(numbers_table),
|
||||
)));
|
||||
|
||||
let plan = LogicalPlanBuilder::scan_with_filters("t", table_source, None, vec![])
|
||||
.unwrap()
|
||||
.limit(0, Some(1))
|
||||
.unwrap()
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let config = ConfigOptions::default();
|
||||
let result = DistPlannerAnalyzer {}.analyze(plan, &config).unwrap();
|
||||
let expected = "Projection: t.number\
|
||||
\n MergeScan [is_placeholder=false]";
|
||||
assert_eq!(expected, result.to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn transform_unalighed_join_with_alias() {
|
||||
let left = NumbersTable::table(0);
|
||||
let right = NumbersTable::table(1);
|
||||
let left_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(left),
|
||||
)));
|
||||
let right_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(right),
|
||||
)));
|
||||
|
||||
let right_plan = LogicalPlanBuilder::scan_with_filters("t", right_source, None, vec![])
|
||||
.unwrap()
|
||||
.alias("right")
|
||||
.unwrap()
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let plan = LogicalPlanBuilder::scan_with_filters("t", left_source, None, vec![])
|
||||
.unwrap()
|
||||
.join_on(
|
||||
right_plan,
|
||||
JoinType::LeftSemi,
|
||||
vec![col("t.number").eq(col("right.number"))],
|
||||
)
|
||||
.unwrap()
|
||||
.limit(0, Some(1))
|
||||
.unwrap()
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let config = ConfigOptions::default();
|
||||
let result = DistPlannerAnalyzer {}.analyze(plan, &config).unwrap();
|
||||
let expected = [
|
||||
"Limit: skip=0, fetch=1",
|
||||
" LeftSemi Join: Filter: t.number = right.number",
|
||||
" Projection: t.number",
|
||||
" MergeScan [is_placeholder=false]",
|
||||
" SubqueryAlias: right",
|
||||
" Projection: t.number",
|
||||
" MergeScan [is_placeholder=false]",
|
||||
]
|
||||
.join("\n");
|
||||
assert_eq!(expected, result.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
1309
src/query/src/dist_plan/analyzer/test.rs
Normal file
1309
src/query/src/dist_plan/analyzer/test.rs
Normal file
File diff suppressed because it is too large
Load Diff
318
src/query/src/dist_plan/analyzer/utils.rs
Normal file
318
src/query/src/dist_plan/analyzer/utils.rs
Normal file
@@ -0,0 +1,318 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
use datafusion::datasource::DefaultTableSource;
|
||||
use datafusion_common::Column;
|
||||
use datafusion_expr::{Expr, LogicalPlan, TableScan};
|
||||
use table::metadata::TableType;
|
||||
use table::table::adapter::DfTableProviderAdapter;
|
||||
|
||||
/// Mapping of original column in table to all the alias at current node
|
||||
pub type AliasMapping = HashMap<String, HashSet<Column>>;
|
||||
|
||||
/// tracking aliases for the source table columns in the plan
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct AliasTracker {
|
||||
/// mapping from the original table name to the alias used in the plan
|
||||
/// notice how one column might have multiple aliases in the plan
|
||||
///
|
||||
pub mapping: AliasMapping,
|
||||
}
|
||||
|
||||
impl AliasTracker {
|
||||
pub fn new(table_scan: &TableScan) -> Option<Self> {
|
||||
if let Some(source) = table_scan
|
||||
.source
|
||||
.as_any()
|
||||
.downcast_ref::<DefaultTableSource>()
|
||||
{
|
||||
if let Some(provider) = source
|
||||
.table_provider
|
||||
.as_any()
|
||||
.downcast_ref::<DfTableProviderAdapter>()
|
||||
{
|
||||
if provider.table().table_type() == TableType::Base {
|
||||
let info = provider.table().table_info();
|
||||
let schema = info.meta.schema.clone();
|
||||
let col_schema = schema.column_schemas();
|
||||
let mapping = col_schema
|
||||
.iter()
|
||||
.map(|col| {
|
||||
(
|
||||
col.name.clone(),
|
||||
HashSet::from_iter(std::iter::once(Column::new_unqualified(
|
||||
col.name.clone(),
|
||||
))),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
return Some(Self { mapping });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// update alias for original columns
|
||||
///
|
||||
/// only handle `Alias` with column in `Projection` node
|
||||
pub fn update_alias(&mut self, node: &LogicalPlan) {
|
||||
if let LogicalPlan::Projection(projection) = node {
|
||||
// first collect all the alias mapping, i.e. the col_a AS b AS c AS d become `a->d`
|
||||
// notice one column might have multiple aliases
|
||||
let mut alias_mapping: AliasMapping = HashMap::new();
|
||||
for expr in &projection.expr {
|
||||
if let Expr::Alias(alias) = expr {
|
||||
let outer_alias = alias.clone();
|
||||
let mut cur_alias = alias.clone();
|
||||
while let Expr::Alias(alias) = *cur_alias.expr {
|
||||
cur_alias = alias;
|
||||
}
|
||||
if let Expr::Column(column) = *cur_alias.expr {
|
||||
alias_mapping
|
||||
.entry(column.name.clone())
|
||||
.or_default()
|
||||
.insert(Column::new(outer_alias.relation, outer_alias.name));
|
||||
}
|
||||
} else if let Expr::Column(column) = expr {
|
||||
// identity mapping
|
||||
alias_mapping
|
||||
.entry(column.name.clone())
|
||||
.or_default()
|
||||
.insert(column.clone());
|
||||
}
|
||||
}
|
||||
|
||||
// update mapping using `alias_mapping`
|
||||
let mut new_mapping = HashMap::new();
|
||||
for (table_col_name, cur_columns) in std::mem::take(&mut self.mapping) {
|
||||
let new_aliases = {
|
||||
let mut new_aliases = HashSet::new();
|
||||
for cur_column in &cur_columns {
|
||||
let new_alias_for_cur_column = alias_mapping
|
||||
.get(cur_column.name())
|
||||
.cloned()
|
||||
.unwrap_or_default();
|
||||
|
||||
for new_alias in new_alias_for_cur_column {
|
||||
let is_table_ref_eq = match (&new_alias.relation, &cur_column.relation)
|
||||
{
|
||||
(Some(o), Some(c)) => o.resolved_eq(c),
|
||||
_ => true,
|
||||
};
|
||||
// is the same column if both name and table ref is eq
|
||||
if is_table_ref_eq {
|
||||
new_aliases.insert(new_alias.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
new_aliases
|
||||
};
|
||||
|
||||
new_mapping.insert(table_col_name, new_aliases);
|
||||
}
|
||||
|
||||
self.mapping = new_mapping;
|
||||
common_telemetry::debug!(
|
||||
"Updating alias tracker to {:?} using node: \n{node}",
|
||||
self.mapping
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_all_alias_for_col(&self, col_name: &str) -> Option<&HashSet<Column>> {
|
||||
self.mapping.get(col_name)
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
pub fn is_alias_for(&self, original_col: &str, cur_col: &Column) -> bool {
|
||||
self.mapping
|
||||
.get(original_col)
|
||||
.map(|cols| cols.contains(cur_col))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_telemetry::init_default_ut_logging;
|
||||
use datafusion::error::Result as DfResult;
|
||||
use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor};
|
||||
use datafusion_expr::{col, LogicalPlanBuilder};
|
||||
|
||||
use super::*;
|
||||
use crate::dist_plan::analyzer::test::TestTable;
|
||||
|
||||
#[derive(Debug)]
|
||||
struct TrackerTester {
|
||||
alias_tracker: Option<AliasTracker>,
|
||||
mapping_at_each_level: Vec<AliasMapping>,
|
||||
}
|
||||
|
||||
impl TreeNodeVisitor<'_> for TrackerTester {
|
||||
type Node = LogicalPlan;
|
||||
|
||||
fn f_up(&mut self, node: &LogicalPlan) -> DfResult<TreeNodeRecursion> {
|
||||
if let Some(alias_tracker) = &mut self.alias_tracker {
|
||||
alias_tracker.update_alias(node);
|
||||
self.mapping_at_each_level.push(
|
||||
self.alias_tracker
|
||||
.as_ref()
|
||||
.map(|a| a.mapping.clone())
|
||||
.unwrap_or_default()
|
||||
.clone(),
|
||||
);
|
||||
} else if let LogicalPlan::TableScan(table_scan) = node {
|
||||
self.alias_tracker = AliasTracker::new(table_scan);
|
||||
self.mapping_at_each_level.push(
|
||||
self.alias_tracker
|
||||
.as_ref()
|
||||
.map(|a| a.mapping.clone())
|
||||
.unwrap_or_default()
|
||||
.clone(),
|
||||
);
|
||||
}
|
||||
Ok(TreeNodeRecursion::Continue)
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn proj_alias_tracker() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
let plan = LogicalPlanBuilder::scan_with_filters("t", table_source, None, vec![])
|
||||
.unwrap()
|
||||
.project(vec![
|
||||
col("number"),
|
||||
col("pk3").alias("pk1"),
|
||||
col("pk2").alias("pk3"),
|
||||
])
|
||||
.unwrap()
|
||||
.project(vec![
|
||||
col("number"),
|
||||
col("pk1").alias("pk2"),
|
||||
col("pk3").alias("pk1"),
|
||||
])
|
||||
.unwrap()
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let mut tracker_tester = TrackerTester {
|
||||
alias_tracker: None,
|
||||
mapping_at_each_level: Vec::new(),
|
||||
};
|
||||
plan.visit(&mut tracker_tester).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
tracker_tester.mapping_at_each_level,
|
||||
vec![
|
||||
HashMap::from([
|
||||
("number".to_string(), HashSet::from(["number".into()])),
|
||||
("pk1".to_string(), HashSet::from(["pk1".into()])),
|
||||
("pk2".to_string(), HashSet::from(["pk2".into()])),
|
||||
("pk3".to_string(), HashSet::from(["pk3".into()])),
|
||||
("ts".to_string(), HashSet::from(["ts".into()]))
|
||||
]),
|
||||
HashMap::from([
|
||||
("number".to_string(), HashSet::from(["t.number".into()])),
|
||||
("pk1".to_string(), HashSet::from([])),
|
||||
("pk2".to_string(), HashSet::from(["pk3".into()])),
|
||||
("pk3".to_string(), HashSet::from(["pk1".into()])),
|
||||
("ts".to_string(), HashSet::from([]))
|
||||
]),
|
||||
HashMap::from([
|
||||
("number".to_string(), HashSet::from(["t.number".into()])),
|
||||
("pk1".to_string(), HashSet::from([])),
|
||||
("pk2".to_string(), HashSet::from(["pk1".into()])),
|
||||
("pk3".to_string(), HashSet::from(["pk2".into()])),
|
||||
("ts".to_string(), HashSet::from([]))
|
||||
])
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn proj_multi_alias_tracker() {
|
||||
// use logging for better debugging
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "numbers".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
let plan = LogicalPlanBuilder::scan_with_filters("t", table_source, None, vec![])
|
||||
.unwrap()
|
||||
.project(vec![
|
||||
col("number"),
|
||||
col("pk3").alias("pk1"),
|
||||
col("pk3").alias("pk2"),
|
||||
])
|
||||
.unwrap()
|
||||
.project(vec![
|
||||
col("number"),
|
||||
col("pk2").alias("pk4"),
|
||||
col("pk1").alias("pk5"),
|
||||
])
|
||||
.unwrap()
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let mut tracker_tester = TrackerTester {
|
||||
alias_tracker: None,
|
||||
mapping_at_each_level: Vec::new(),
|
||||
};
|
||||
plan.visit(&mut tracker_tester).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
tracker_tester.mapping_at_each_level,
|
||||
vec![
|
||||
HashMap::from([
|
||||
("number".to_string(), HashSet::from(["number".into()])),
|
||||
("pk1".to_string(), HashSet::from(["pk1".into()])),
|
||||
("pk2".to_string(), HashSet::from(["pk2".into()])),
|
||||
("pk3".to_string(), HashSet::from(["pk3".into()])),
|
||||
("ts".to_string(), HashSet::from(["ts".into()]))
|
||||
]),
|
||||
HashMap::from([
|
||||
("number".to_string(), HashSet::from(["t.number".into()])),
|
||||
("pk1".to_string(), HashSet::from([])),
|
||||
("pk2".to_string(), HashSet::from([])),
|
||||
(
|
||||
"pk3".to_string(),
|
||||
HashSet::from(["pk1".into(), "pk2".into()])
|
||||
),
|
||||
("ts".to_string(), HashSet::from([]))
|
||||
]),
|
||||
HashMap::from([
|
||||
("number".to_string(), HashSet::from(["t.number".into()])),
|
||||
("pk1".to_string(), HashSet::from([])),
|
||||
("pk2".to_string(), HashSet::from([])),
|
||||
(
|
||||
"pk3".to_string(),
|
||||
HashSet::from(["pk4".into(), "pk5".into()])
|
||||
),
|
||||
("ts".to_string(), HashSet::from([]))
|
||||
])
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -27,6 +27,7 @@ use promql::extension_plan::{
|
||||
EmptyMetric, InstantManipulate, RangeManipulate, SeriesDivide, SeriesNormalize,
|
||||
};
|
||||
|
||||
use crate::dist_plan::analyzer::AliasMapping;
|
||||
use crate::dist_plan::merge_sort::{merge_sort_transformer, MergeSortLogicalPlan};
|
||||
use crate::dist_plan::MergeScanLogicalPlan;
|
||||
|
||||
@@ -139,9 +140,7 @@ pub fn step_aggr_to_upper_aggr(
|
||||
new_projection_exprs.push(aliased_output_aggr_expr);
|
||||
}
|
||||
let upper_aggr_plan = LogicalPlan::Aggregate(new_aggr);
|
||||
debug!("Before recompute schema: {upper_aggr_plan:?}");
|
||||
let upper_aggr_plan = upper_aggr_plan.recompute_schema()?;
|
||||
debug!("After recompute schema: {upper_aggr_plan:?}");
|
||||
// create a projection on top of the new aggregate plan
|
||||
let new_projection =
|
||||
Projection::try_new(new_projection_exprs, Arc::new(upper_aggr_plan.clone()))?;
|
||||
@@ -222,7 +221,7 @@ pub enum Commutativity {
|
||||
pub struct Categorizer {}
|
||||
|
||||
impl Categorizer {
|
||||
pub fn check_plan(plan: &LogicalPlan, partition_cols: Option<Vec<String>>) -> Commutativity {
|
||||
pub fn check_plan(plan: &LogicalPlan, partition_cols: Option<AliasMapping>) -> Commutativity {
|
||||
let partition_cols = partition_cols.unwrap_or_default();
|
||||
|
||||
match plan {
|
||||
@@ -247,7 +246,6 @@ impl Categorizer {
|
||||
transformer: Some(Arc::new(|plan: &LogicalPlan| {
|
||||
debug!("Before Step optimize: {plan}");
|
||||
let ret = step_aggr_to_upper_aggr(plan);
|
||||
debug!("After Step Optimize: {ret:?}");
|
||||
ret.ok().map(|s| TransformerAction {
|
||||
extra_parent_plans: s.to_vec(),
|
||||
new_child_plan: None,
|
||||
@@ -264,7 +262,11 @@ impl Categorizer {
|
||||
return commutativity;
|
||||
}
|
||||
}
|
||||
Commutativity::Commutative
|
||||
// all group by expressions are partition columns can push down, unless
|
||||
// another push down(including `Limit` or `Sort`) is already in progress(which will then prvent next cond commutative node from being push down).
|
||||
// TODO(discord9): This is a temporary solution(that works), a better description of
|
||||
// commutativity is needed under this situation.
|
||||
Commutativity::ConditionalCommutative(None)
|
||||
}
|
||||
LogicalPlan::Sort(_) => {
|
||||
if partition_cols.is_empty() {
|
||||
@@ -322,17 +324,20 @@ impl Categorizer {
|
||||
|
||||
pub fn check_extension_plan(
|
||||
plan: &dyn UserDefinedLogicalNode,
|
||||
partition_cols: &[String],
|
||||
partition_cols: &AliasMapping,
|
||||
) -> Commutativity {
|
||||
match plan.name() {
|
||||
name if name == SeriesDivide::name() => {
|
||||
let series_divide = plan.as_any().downcast_ref::<SeriesDivide>().unwrap();
|
||||
let tags = series_divide.tags().iter().collect::<HashSet<_>>();
|
||||
for partition_col in partition_cols {
|
||||
if !tags.contains(partition_col) {
|
||||
|
||||
for all_alias in partition_cols.values() {
|
||||
let all_alias = all_alias.iter().map(|c| &c.name).collect::<HashSet<_>>();
|
||||
if tags.intersection(&all_alias).count() == 0 {
|
||||
return Commutativity::NonCommutative;
|
||||
}
|
||||
}
|
||||
|
||||
Commutativity::Commutative
|
||||
}
|
||||
name if name == SeriesNormalize::name()
|
||||
@@ -396,7 +401,7 @@ impl Categorizer {
|
||||
|
||||
/// Return true if the given expr and partition cols satisfied the rule.
|
||||
/// In this case the plan can be treated as fully commutative.
|
||||
fn check_partition(exprs: &[Expr], partition_cols: &[String]) -> bool {
|
||||
fn check_partition(exprs: &[Expr], partition_cols: &AliasMapping) -> bool {
|
||||
let mut ref_cols = HashSet::new();
|
||||
for expr in exprs {
|
||||
expr.add_column_refs(&mut ref_cols);
|
||||
@@ -405,8 +410,14 @@ impl Categorizer {
|
||||
.into_iter()
|
||||
.map(|c| c.name.clone())
|
||||
.collect::<HashSet<_>>();
|
||||
for col in partition_cols {
|
||||
if !ref_cols.contains(col) {
|
||||
for all_alias in partition_cols.values() {
|
||||
let all_alias = all_alias
|
||||
.iter()
|
||||
.map(|c| c.name.clone())
|
||||
.collect::<HashSet<_>>();
|
||||
// check if ref columns intersect with all alias of partition columns
|
||||
// is empty, if it's empty, not all partition columns show up in `exprs`
|
||||
if ref_cols.intersection(&all_alias).count() == 0 {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -424,7 +435,7 @@ pub type StageTransformer = Arc<dyn Fn(&LogicalPlan) -> Option<TransformerAction
|
||||
pub struct TransformerAction {
|
||||
/// list of plans that need to be applied to parent plans, in the order of parent to child.
|
||||
/// i.e. if this returns `[Projection, Aggregate]`, then the parent plan should be transformed to
|
||||
/// ```
|
||||
/// ```ignore
|
||||
/// Original Parent Plan:
|
||||
/// Projection:
|
||||
/// Aggregate:
|
||||
@@ -453,7 +464,7 @@ mod test {
|
||||
fetch: None,
|
||||
});
|
||||
assert!(matches!(
|
||||
Categorizer::check_plan(&plan, Some(vec![])),
|
||||
Categorizer::check_plan(&plan, Some(Default::default())),
|
||||
Commutativity::Commutative
|
||||
));
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ use std::any::Any;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::Duration;
|
||||
|
||||
use ahash::HashSet;
|
||||
use ahash::{HashMap, HashSet};
|
||||
use arrow_schema::{Schema as ArrowSchema, SchemaRef as ArrowSchemaRef, SortOptions};
|
||||
use async_stream::stream;
|
||||
use common_catalog::parse_catalog_and_schema_from_db_string;
|
||||
@@ -88,7 +88,11 @@ impl UserDefinedLogicalNodeCore for MergeScanLogicalPlan {
|
||||
}
|
||||
|
||||
fn fmt_for_explain(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(f, "MergeScan [is_placeholder={}]", self.is_placeholder)
|
||||
write!(
|
||||
f,
|
||||
"MergeScan [is_placeholder={}, remote_input=[\n{}\n]]",
|
||||
self.is_placeholder, self.input
|
||||
)
|
||||
}
|
||||
|
||||
fn with_exprs_and_inputs(
|
||||
@@ -143,7 +147,7 @@ pub struct MergeScanExec {
|
||||
metric: ExecutionPlanMetricsSet,
|
||||
properties: PlanProperties,
|
||||
/// Metrics from sub stages
|
||||
sub_stage_metrics: Arc<Mutex<Vec<RecordBatchMetrics>>>,
|
||||
sub_stage_metrics: Arc<Mutex<HashMap<RegionId, RecordBatchMetrics>>>,
|
||||
query_ctx: QueryContextRef,
|
||||
target_partition: usize,
|
||||
partition_cols: Vec<String>,
|
||||
@@ -155,6 +159,7 @@ impl std::fmt::Debug for MergeScanExec {
|
||||
.field("table", &self.table)
|
||||
.field("regions", &self.regions)
|
||||
.field("schema", &self.schema)
|
||||
.field("plan", &self.plan)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
@@ -317,6 +322,12 @@ impl MergeScanExec {
|
||||
if let Some(mut first_consume_timer) = first_consume_timer.take() {
|
||||
first_consume_timer.stop();
|
||||
}
|
||||
|
||||
if let Some(metrics) = stream.metrics() {
|
||||
let mut sub_stage_metrics = sub_stage_metrics_moved.lock().unwrap();
|
||||
sub_stage_metrics.insert(region_id, metrics);
|
||||
}
|
||||
|
||||
yield Ok(batch);
|
||||
// reset poll timer
|
||||
poll_timer = Instant::now();
|
||||
@@ -341,7 +352,8 @@ impl MergeScanExec {
|
||||
metric.record_greptime_exec_cost(value as usize);
|
||||
|
||||
// record metrics from sub sgates
|
||||
sub_stage_metrics_moved.lock().unwrap().push(metrics);
|
||||
let mut sub_stage_metrics = sub_stage_metrics_moved.lock().unwrap();
|
||||
sub_stage_metrics.insert(region_id, metrics);
|
||||
}
|
||||
|
||||
MERGE_SCAN_POLL_ELAPSED.observe(poll_duration.as_secs_f64());
|
||||
@@ -409,7 +421,12 @@ impl MergeScanExec {
|
||||
}
|
||||
|
||||
pub fn sub_stage_metrics(&self) -> Vec<RecordBatchMetrics> {
|
||||
self.sub_stage_metrics.lock().unwrap().clone()
|
||||
self.sub_stage_metrics
|
||||
.lock()
|
||||
.unwrap()
|
||||
.values()
|
||||
.cloned()
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn partition_count(&self) -> usize {
|
||||
|
||||
@@ -181,6 +181,15 @@ fn fetch_partition_range(input: Arc<dyn ExecutionPlan>) -> DataFusionResult<Opti
|
||||
is_batch_coalesced = true;
|
||||
}
|
||||
|
||||
// only a very limited set of plans can exist between region scan and sort exec
|
||||
// other plans might make this optimize wrong, so be safe here by limiting it
|
||||
if !(plan.as_any().is::<ProjectionExec>()
|
||||
|| plan.as_any().is::<FilterExec>()
|
||||
|| plan.as_any().is::<CoalesceBatchesExec>())
|
||||
{
|
||||
partition_ranges = None;
|
||||
}
|
||||
|
||||
// TODO(discord9): do this in logical plan instead as it's lessy bugy there
|
||||
// Collects alias of the time index column.
|
||||
if let Some(projection) = plan.as_any().downcast_ref::<ProjectionExec>() {
|
||||
@@ -194,6 +203,14 @@ fn fetch_partition_range(input: Arc<dyn ExecutionPlan>) -> DataFusionResult<Opti
|
||||
}
|
||||
|
||||
if let Some(region_scan_exec) = plan.as_any().downcast_ref::<RegionScanExec>() {
|
||||
// `PerSeries` distribution is not supported in windowed sort.
|
||||
if region_scan_exec.distribution()
|
||||
== Some(store_api::storage::TimeSeriesDistribution::PerSeries)
|
||||
{
|
||||
partition_ranges = None;
|
||||
return Ok(Transformed::no(plan));
|
||||
}
|
||||
|
||||
partition_ranges = Some(region_scan_exec.get_uncollapsed_partition_ranges());
|
||||
// Reset time index column.
|
||||
time_index = HashSet::from([region_scan_exec.time_index()]);
|
||||
|
||||
@@ -96,9 +96,10 @@ impl PartSortExec {
|
||||
|
||||
if partition >= self.partition_ranges.len() {
|
||||
internal_err!(
|
||||
"Partition index out of range: {} >= {}",
|
||||
"Partition index out of range: {} >= {} at {}",
|
||||
partition,
|
||||
self.partition_ranges.len()
|
||||
self.partition_ranges.len(),
|
||||
snafu::location!()
|
||||
)?;
|
||||
}
|
||||
|
||||
@@ -322,9 +323,10 @@ impl PartSortStream {
|
||||
) -> datafusion_common::Result<()> {
|
||||
if self.cur_part_idx >= self.partition_ranges.len() {
|
||||
internal_err!(
|
||||
"Partition index out of range: {} >= {}",
|
||||
"Partition index out of range: {} >= {} at {}",
|
||||
self.cur_part_idx,
|
||||
self.partition_ranges.len()
|
||||
self.partition_ranges.len(),
|
||||
snafu::location!()
|
||||
)?;
|
||||
}
|
||||
let cur_range = self.partition_ranges[self.cur_part_idx];
|
||||
@@ -355,9 +357,10 @@ impl PartSortStream {
|
||||
// check if the current partition index is out of range
|
||||
if self.cur_part_idx >= self.partition_ranges.len() {
|
||||
internal_err!(
|
||||
"Partition index out of range: {} >= {}",
|
||||
"Partition index out of range: {} >= {} at {}",
|
||||
self.cur_part_idx,
|
||||
self.partition_ranges.len()
|
||||
self.partition_ranges.len(),
|
||||
snafu::location!()
|
||||
)?;
|
||||
}
|
||||
let cur_range = self.partition_ranges[self.cur_part_idx];
|
||||
|
||||
@@ -27,6 +27,7 @@ use datafusion::datasource::DefaultTableSource;
|
||||
use datafusion::execution::context::SessionState;
|
||||
use datafusion::functions_aggregate::average::avg_udaf;
|
||||
use datafusion::functions_aggregate::count::count_udaf;
|
||||
use datafusion::functions_aggregate::expr_fn::first_value;
|
||||
use datafusion::functions_aggregate::grouping::grouping_udaf;
|
||||
use datafusion::functions_aggregate::min_max::{max_udaf, min_udaf};
|
||||
use datafusion::functions_aggregate::stddev::stddev_pop_udaf;
|
||||
@@ -50,7 +51,7 @@ use datatypes::arrow::datatypes::{DataType as ArrowDataType, TimeUnit as ArrowTi
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use itertools::Itertools;
|
||||
use promql::extension_plan::{
|
||||
build_special_time_expr, EmptyMetric, HistogramFold, InstantManipulate, Millisecond,
|
||||
build_special_time_expr, Absent, EmptyMetric, HistogramFold, InstantManipulate, Millisecond,
|
||||
RangeManipulate, ScalarCalculate, SeriesDivide, SeriesNormalize, UnionDistinctOn,
|
||||
};
|
||||
use promql::functions::{
|
||||
@@ -86,6 +87,8 @@ use crate::promql::error::{
|
||||
const SPECIAL_TIME_FUNCTION: &str = "time";
|
||||
/// `scalar()` function in PromQL.
|
||||
const SCALAR_FUNCTION: &str = "scalar";
|
||||
/// `absent()` function in PromQL
|
||||
const SPECIAL_ABSENT_FUNCTION: &str = "absent";
|
||||
/// `histogram_quantile` function in PromQL
|
||||
const SPECIAL_HISTOGRAM_QUANTILE: &str = "histogram_quantile";
|
||||
/// `vector` function in PromQL
|
||||
@@ -124,7 +127,10 @@ struct PromPlannerContext {
|
||||
time_index_column: Option<String>,
|
||||
field_columns: Vec<String>,
|
||||
tag_columns: Vec<String>,
|
||||
/// The matcher for field columns `__field__`.
|
||||
field_column_matcher: Option<Vec<Matcher>>,
|
||||
/// The matcher for selectors (normal matchers).
|
||||
selector_matcher: Vec<Matcher>,
|
||||
schema_name: Option<String>,
|
||||
/// The range in millisecond of range selector. None if there is no range selector.
|
||||
range: Option<Millisecond>,
|
||||
@@ -148,6 +154,7 @@ impl PromPlannerContext {
|
||||
self.field_columns = vec![];
|
||||
self.tag_columns = vec![];
|
||||
self.field_column_matcher = None;
|
||||
self.selector_matcher.clear();
|
||||
self.schema_name = None;
|
||||
self.range = None;
|
||||
}
|
||||
@@ -191,18 +198,38 @@ impl PromPlanner {
|
||||
planner.prom_expr_to_plan(&stmt.expr, session_state).await
|
||||
}
|
||||
|
||||
#[async_recursion]
|
||||
pub async fn prom_expr_to_plan(
|
||||
&mut self,
|
||||
prom_expr: &PromExpr,
|
||||
session_state: &SessionState,
|
||||
) -> Result<LogicalPlan> {
|
||||
self.prom_expr_to_plan_inner(prom_expr, false, session_state)
|
||||
.await
|
||||
}
|
||||
|
||||
/**
|
||||
Converts a PromQL expression to a logical plan.
|
||||
|
||||
NOTE:
|
||||
The `timestamp_fn` indicates whether the PromQL `timestamp()` function is being evaluated in the current context.
|
||||
If `true`, the planner generates a logical plan that projects the timestamp (time index) column
|
||||
as the value column for each input row, implementing the PromQL `timestamp()` function semantics.
|
||||
If `false`, the planner generates the standard logical plan for the given PromQL expression.
|
||||
*/
|
||||
#[async_recursion]
|
||||
async fn prom_expr_to_plan_inner(
|
||||
&mut self,
|
||||
prom_expr: &PromExpr,
|
||||
timestamp_fn: bool,
|
||||
session_state: &SessionState,
|
||||
) -> Result<LogicalPlan> {
|
||||
let res = match prom_expr {
|
||||
PromExpr::Aggregate(expr) => self.prom_aggr_expr_to_plan(session_state, expr).await?,
|
||||
PromExpr::Unary(expr) => self.prom_unary_expr_to_plan(session_state, expr).await?,
|
||||
PromExpr::Binary(expr) => self.prom_binary_expr_to_plan(session_state, expr).await?,
|
||||
PromExpr::Paren(ParenExpr { expr }) => {
|
||||
self.prom_expr_to_plan(expr, session_state).await?
|
||||
self.prom_expr_to_plan_inner(expr, timestamp_fn, session_state)
|
||||
.await?
|
||||
}
|
||||
PromExpr::Subquery(expr) => {
|
||||
self.prom_subquery_expr_to_plan(session_state, expr).await?
|
||||
@@ -210,7 +237,8 @@ impl PromPlanner {
|
||||
PromExpr::NumberLiteral(lit) => self.prom_number_lit_to_plan(lit)?,
|
||||
PromExpr::StringLiteral(lit) => self.prom_string_lit_to_plan(lit)?,
|
||||
PromExpr::VectorSelector(selector) => {
|
||||
self.prom_vector_selector_to_plan(selector).await?
|
||||
self.prom_vector_selector_to_plan(selector, timestamp_fn)
|
||||
.await?
|
||||
}
|
||||
PromExpr::MatrixSelector(selector) => {
|
||||
self.prom_matrix_selector_to_plan(selector).await?
|
||||
@@ -673,6 +701,7 @@ impl PromPlanner {
|
||||
async fn prom_vector_selector_to_plan(
|
||||
&mut self,
|
||||
vector_selector: &VectorSelector,
|
||||
timestamp_fn: bool,
|
||||
) -> Result<LogicalPlan> {
|
||||
let VectorSelector {
|
||||
name,
|
||||
@@ -687,6 +716,15 @@ impl PromPlanner {
|
||||
let normalize = self
|
||||
.selector_to_series_normalize_plan(offset, matchers, false)
|
||||
.await?;
|
||||
|
||||
let normalize = if timestamp_fn {
|
||||
// If evaluating the PromQL `timestamp()` function, project the time index column as the value column
|
||||
// before wrapping with [`InstantManipulate`], so the output matches PromQL's `timestamp()` semantics.
|
||||
self.create_timestamp_func_plan(normalize)?
|
||||
} else {
|
||||
normalize
|
||||
};
|
||||
|
||||
let manipulate = InstantManipulate::new(
|
||||
self.ctx.start,
|
||||
self.ctx.end,
|
||||
@@ -704,6 +742,43 @@ impl PromPlanner {
|
||||
}))
|
||||
}
|
||||
|
||||
/// Builds a projection plan for the PromQL `timestamp()` function.
|
||||
/// Projects the time index column as the value column for each row.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `normalize` - Input [`LogicalPlan`] for the normalized series.
|
||||
///
|
||||
/// # Returns
|
||||
/// Returns a [`Result<LogicalPlan>`] where the resulting logical plan projects the timestamp
|
||||
/// column as the value column, along with the original tag and time index columns.
|
||||
///
|
||||
/// # Timestamp vs. Time Function
|
||||
///
|
||||
/// - **Timestamp Function (`timestamp()`)**: In PromQL, the `timestamp()` function returns the
|
||||
/// timestamp (time index) of each sample as the value column.
|
||||
///
|
||||
/// - **Time Function (`time()`)**: The `time()` function returns the evaluation time of the query
|
||||
/// as a scalar value.
|
||||
///
|
||||
/// # Side Effects
|
||||
/// Updates the planner context's field columns to the timestamp column name.
|
||||
///
|
||||
fn create_timestamp_func_plan(&mut self, normalize: LogicalPlan) -> Result<LogicalPlan> {
|
||||
let time_expr = build_special_time_expr(self.ctx.time_index_column.as_ref().unwrap())
|
||||
.alias(DEFAULT_FIELD_COLUMN);
|
||||
self.ctx.field_columns = vec![time_expr.schema_name().to_string()];
|
||||
let mut project_exprs = Vec::with_capacity(self.ctx.tag_columns.len() + 2);
|
||||
project_exprs.push(self.create_time_index_column_expr()?);
|
||||
project_exprs.push(time_expr);
|
||||
project_exprs.extend(self.create_tag_column_exprs()?);
|
||||
|
||||
LogicalPlanBuilder::from(normalize)
|
||||
.project(project_exprs)
|
||||
.context(DataFusionPlanningSnafu)?
|
||||
.build()
|
||||
.context(DataFusionPlanningSnafu)
|
||||
}
|
||||
|
||||
async fn prom_matrix_selector_to_plan(
|
||||
&mut self,
|
||||
matrix_selector: &MatrixSelector,
|
||||
@@ -716,17 +791,19 @@ impl PromPlanner {
|
||||
..
|
||||
} = vs;
|
||||
let matchers = self.preprocess_label_matchers(matchers, name)?;
|
||||
if let Some(empty_plan) = self.setup_context().await? {
|
||||
return Ok(empty_plan);
|
||||
}
|
||||
|
||||
ensure!(!range.is_zero(), ZeroRangeSelectorSnafu);
|
||||
let range_ms = range.as_millis() as _;
|
||||
self.ctx.range = Some(range_ms);
|
||||
|
||||
let normalize = self
|
||||
.selector_to_series_normalize_plan(offset, matchers, true)
|
||||
.await?;
|
||||
// Some functions like rate may require special fields in the RangeManipulate plan
|
||||
// so we can't skip RangeManipulate.
|
||||
let normalize = match self.setup_context().await? {
|
||||
Some(empty_plan) => empty_plan,
|
||||
None => {
|
||||
self.selector_to_series_normalize_plan(offset, matchers, true)
|
||||
.await?
|
||||
}
|
||||
};
|
||||
let manipulate = RangeManipulate::new(
|
||||
self.ctx.start,
|
||||
self.ctx.end,
|
||||
@@ -760,13 +837,15 @@ impl PromPlanner {
|
||||
}
|
||||
SPECIAL_VECTOR_FUNCTION => return self.create_vector_plan(args).await,
|
||||
SCALAR_FUNCTION => return self.create_scalar_plan(args, session_state).await,
|
||||
SPECIAL_ABSENT_FUNCTION => return self.create_absent_plan(args, session_state).await,
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// transform function arguments
|
||||
let args = self.create_function_args(&args.args)?;
|
||||
let input = if let Some(prom_expr) = &args.input {
|
||||
self.prom_expr_to_plan(prom_expr, session_state).await?
|
||||
self.prom_expr_to_plan_inner(prom_expr, func.name == "timestamp", session_state)
|
||||
.await?
|
||||
} else {
|
||||
self.ctx.time_index_column = Some(SPECIAL_TIME_FUNCTION.to_string());
|
||||
self.ctx.reset_table_name_and_schema();
|
||||
@@ -930,6 +1009,7 @@ impl PromPlanner {
|
||||
);
|
||||
self.ctx.schema_name = Some(matcher.value.clone());
|
||||
} else if matcher.name != METRIC_NAME {
|
||||
self.ctx.selector_matcher.push(matcher.clone());
|
||||
let _ = matchers.insert(matcher.clone());
|
||||
}
|
||||
}
|
||||
@@ -1175,6 +1255,13 @@ impl PromPlanner {
|
||||
) -> Result<Vec<DfExpr>> {
|
||||
let mut exprs = Vec::with_capacity(label_matchers.matchers.len());
|
||||
for matcher in label_matchers.matchers {
|
||||
if matcher.name == SCHEMA_COLUMN_MATCHER
|
||||
|| matcher.name == DB_COLUMN_MATCHER
|
||||
|| matcher.name == FIELD_COLUMN_MATCHER
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
let col = if table_schema
|
||||
.field_with_unqualified_name(&matcher.name)
|
||||
.is_err()
|
||||
@@ -1652,7 +1739,7 @@ impl PromPlanner {
|
||||
|
||||
ScalarFunc::GeneratedExpr
|
||||
}
|
||||
"sort" | "sort_desc" | "sort_by_label" | "sort_by_label_desc" => {
|
||||
"sort" | "sort_desc" | "sort_by_label" | "sort_by_label_desc" | "timestamp" => {
|
||||
// These functions are not expression but a part of plan,
|
||||
// they are processed by `prom_call_expr_to_plan`.
|
||||
for value in &self.ctx.field_columns {
|
||||
@@ -2263,10 +2350,14 @@ impl PromPlanner {
|
||||
let input_plan = self.prom_expr_to_plan(&input, session_state).await?;
|
||||
|
||||
if !self.ctx.has_le_tag() {
|
||||
return ColumnNotFoundSnafu {
|
||||
col: LE_COLUMN_NAME.to_string(),
|
||||
}
|
||||
.fail();
|
||||
// Return empty result instead of error when 'le' column is not found
|
||||
// This handles the case when histogram metrics don't exist
|
||||
return Ok(LogicalPlan::EmptyRelation(
|
||||
datafusion::logical_expr::EmptyRelation {
|
||||
produce_one_row: false,
|
||||
schema: Arc::new(DFSchema::empty()),
|
||||
},
|
||||
));
|
||||
}
|
||||
let time_index_column =
|
||||
self.ctx
|
||||
@@ -2374,6 +2465,69 @@ impl PromPlanner {
|
||||
Ok(scalar_plan)
|
||||
}
|
||||
|
||||
/// Create a [SPECIAL_ABSENT_FUNCTION] plan
|
||||
async fn create_absent_plan(
|
||||
&mut self,
|
||||
args: &PromFunctionArgs,
|
||||
session_state: &SessionState,
|
||||
) -> Result<LogicalPlan> {
|
||||
if args.args.len() != 1 {
|
||||
return FunctionInvalidArgumentSnafu {
|
||||
fn_name: SPECIAL_ABSENT_FUNCTION.to_string(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
let input = self.prom_expr_to_plan(&args.args[0], session_state).await?;
|
||||
|
||||
let time_index_expr = self.create_time_index_column_expr()?;
|
||||
let first_field_expr =
|
||||
self.create_field_column_exprs()?
|
||||
.pop()
|
||||
.with_context(|| ValueNotFoundSnafu {
|
||||
table: self.ctx.table_name.clone().unwrap_or_default(),
|
||||
})?;
|
||||
let first_value_expr = first_value(first_field_expr, None);
|
||||
|
||||
let ordered_aggregated_input = LogicalPlanBuilder::from(input)
|
||||
.aggregate(
|
||||
vec![time_index_expr.clone()],
|
||||
vec![first_value_expr.clone()],
|
||||
)
|
||||
.context(DataFusionPlanningSnafu)?
|
||||
.sort(vec![time_index_expr.sort(true, false)])
|
||||
.context(DataFusionPlanningSnafu)?
|
||||
.build()
|
||||
.context(DataFusionPlanningSnafu)?;
|
||||
|
||||
let fake_labels = self
|
||||
.ctx
|
||||
.selector_matcher
|
||||
.iter()
|
||||
.filter_map(|matcher| match matcher.op {
|
||||
MatchOp::Equal => Some((matcher.name.clone(), matcher.value.clone())),
|
||||
_ => None,
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// Create the absent plan
|
||||
let absent_plan = LogicalPlan::Extension(Extension {
|
||||
node: Arc::new(
|
||||
Absent::try_new(
|
||||
self.ctx.start,
|
||||
self.ctx.end,
|
||||
self.ctx.interval,
|
||||
self.ctx.time_index_column.as_ref().unwrap().clone(),
|
||||
self.ctx.field_columns[0].clone(),
|
||||
fake_labels,
|
||||
ordered_aggregated_input,
|
||||
)
|
||||
.context(DataFusionPlanningSnafu)?,
|
||||
),
|
||||
});
|
||||
|
||||
Ok(absent_plan)
|
||||
}
|
||||
|
||||
/// Try to build a DataFusion Literal Expression from PromQL Expr, return
|
||||
/// `None` if the input is not a literal expression.
|
||||
fn try_build_literal_expr(expr: &PromExpr) -> Option<DfExpr> {
|
||||
@@ -4657,4 +4811,53 @@ Filter: up.field_0 IS NOT NULL [timestamp:Timestamp(Millisecond, None), field_0:
|
||||
|
||||
assert_eq!(plan.display_indent_schema().to_string(), expected);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_histogram_quantile_missing_le_column() {
|
||||
let mut eval_stmt = EvalStmt {
|
||||
expr: PromExpr::NumberLiteral(NumberLiteral { val: 1.0 }),
|
||||
start: UNIX_EPOCH,
|
||||
end: UNIX_EPOCH
|
||||
.checked_add(Duration::from_secs(100_000))
|
||||
.unwrap(),
|
||||
interval: Duration::from_secs(5),
|
||||
lookback_delta: Duration::from_secs(1),
|
||||
};
|
||||
|
||||
// Test case: histogram_quantile with a table that doesn't have 'le' column
|
||||
let case = r#"histogram_quantile(0.99, sum by(pod,instance,le) (rate(non_existent_histogram_bucket{instance=~"xxx"}[1m])))"#;
|
||||
|
||||
let prom_expr = parser::parse(case).unwrap();
|
||||
eval_stmt.expr = prom_expr;
|
||||
|
||||
// Create a table provider with a table that doesn't have 'le' column
|
||||
let table_provider = build_test_table_provider_with_fields(
|
||||
&[(
|
||||
DEFAULT_SCHEMA_NAME.to_string(),
|
||||
"non_existent_histogram_bucket".to_string(),
|
||||
)],
|
||||
&["pod", "instance"], // Note: no 'le' column
|
||||
)
|
||||
.await;
|
||||
|
||||
// Should return empty result instead of error
|
||||
let result =
|
||||
PromPlanner::stmt_to_plan(table_provider, &eval_stmt, &build_session_state()).await;
|
||||
|
||||
// This should succeed now (returning empty result) instead of failing with "Cannot find column le"
|
||||
assert!(
|
||||
result.is_ok(),
|
||||
"Expected successful plan creation with empty result, but got error: {:?}",
|
||||
result.err()
|
||||
);
|
||||
|
||||
// Verify that the result is an EmptyRelation
|
||||
let plan = result.unwrap();
|
||||
match plan {
|
||||
LogicalPlan::EmptyRelation(_) => {
|
||||
// This is what we expect
|
||||
}
|
||||
_ => panic!("Expected EmptyRelation, but got: {:?}", plan),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -36,6 +36,7 @@ use common_telemetry::tracing_context::{FutureExt, TracingContext};
|
||||
use futures::{future, ready, Stream};
|
||||
use futures_util::{StreamExt, TryStreamExt};
|
||||
use prost::Message;
|
||||
use session::context::{QueryContext, QueryContextRef};
|
||||
use snafu::{ensure, ResultExt};
|
||||
use table::table_name::TableName;
|
||||
use tokio::sync::mpsc;
|
||||
@@ -188,6 +189,7 @@ impl FlightCraft for GreptimeRequestHandler {
|
||||
let ticket = request.into_inner().ticket;
|
||||
let request =
|
||||
GreptimeRequest::decode(ticket.as_ref()).context(error::InvalidFlightTicketSnafu)?;
|
||||
let query_ctx = QueryContext::arc();
|
||||
|
||||
// The Grpc protocol pass query by Flight. It needs to be wrapped under a span, in order to record stream
|
||||
let span = info_span!(
|
||||
@@ -202,6 +204,7 @@ impl FlightCraft for GreptimeRequestHandler {
|
||||
output,
|
||||
TracingContext::from_current_span(),
|
||||
flight_compression,
|
||||
query_ctx,
|
||||
);
|
||||
Ok(Response::new(stream))
|
||||
}
|
||||
@@ -371,15 +374,25 @@ fn to_flight_data_stream(
|
||||
output: Output,
|
||||
tracing_context: TracingContext,
|
||||
flight_compression: FlightCompression,
|
||||
query_ctx: QueryContextRef,
|
||||
) -> TonicStream<FlightData> {
|
||||
match output.data {
|
||||
OutputData::Stream(stream) => {
|
||||
let stream = FlightRecordBatchStream::new(stream, tracing_context, flight_compression);
|
||||
let stream = FlightRecordBatchStream::new(
|
||||
stream,
|
||||
tracing_context,
|
||||
flight_compression,
|
||||
query_ctx,
|
||||
);
|
||||
Box::pin(stream) as _
|
||||
}
|
||||
OutputData::RecordBatches(x) => {
|
||||
let stream =
|
||||
FlightRecordBatchStream::new(x.as_stream(), tracing_context, flight_compression);
|
||||
let stream = FlightRecordBatchStream::new(
|
||||
x.as_stream(),
|
||||
tracing_context,
|
||||
flight_compression,
|
||||
query_ctx,
|
||||
);
|
||||
Box::pin(stream) as _
|
||||
}
|
||||
OutputData::AffectedRows(rows) => {
|
||||
|
||||
@@ -25,6 +25,7 @@ use futures::channel::mpsc;
|
||||
use futures::channel::mpsc::Sender;
|
||||
use futures::{SinkExt, Stream, StreamExt};
|
||||
use pin_project::{pin_project, pinned_drop};
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::ResultExt;
|
||||
use tokio::task::JoinHandle;
|
||||
|
||||
@@ -46,10 +47,12 @@ impl FlightRecordBatchStream {
|
||||
recordbatches: SendableRecordBatchStream,
|
||||
tracing_context: TracingContext,
|
||||
compression: FlightCompression,
|
||||
query_ctx: QueryContextRef,
|
||||
) -> Self {
|
||||
let should_send_partial_metrics = query_ctx.explain_verbose();
|
||||
let (tx, rx) = mpsc::channel::<TonicResult<FlightMessage>>(1);
|
||||
let join_handle = common_runtime::spawn_global(async move {
|
||||
Self::flight_data_stream(recordbatches, tx)
|
||||
Self::flight_data_stream(recordbatches, tx, should_send_partial_metrics)
|
||||
.trace(tracing_context.attach(info_span!("flight_data_stream")))
|
||||
.await
|
||||
});
|
||||
@@ -69,6 +72,7 @@ impl FlightRecordBatchStream {
|
||||
async fn flight_data_stream(
|
||||
mut recordbatches: SendableRecordBatchStream,
|
||||
mut tx: Sender<TonicResult<FlightMessage>>,
|
||||
should_send_partial_metrics: bool,
|
||||
) {
|
||||
let schema = recordbatches.schema().arrow_schema().clone();
|
||||
if let Err(e) = tx.send(Ok(FlightMessage::Schema(schema))).await {
|
||||
@@ -88,6 +92,17 @@ impl FlightRecordBatchStream {
|
||||
warn!(e; "stop sending Flight data");
|
||||
return;
|
||||
}
|
||||
if should_send_partial_metrics {
|
||||
if let Some(metrics) = recordbatches
|
||||
.metrics()
|
||||
.and_then(|m| serde_json::to_string(&m).ok())
|
||||
{
|
||||
if let Err(e) = tx.send(Ok(FlightMessage::Metrics(metrics))).await {
|
||||
warn!(e; "stop sending Flight data");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
let e = Err(e).context(error::CollectRecordbatchSnafu);
|
||||
@@ -154,6 +169,7 @@ mod test {
|
||||
use datatypes::schema::{ColumnSchema, Schema};
|
||||
use datatypes::vectors::Int32Vector;
|
||||
use futures::StreamExt;
|
||||
use session::context::QueryContext;
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -175,6 +191,7 @@ mod test {
|
||||
recordbatches,
|
||||
TracingContext::default(),
|
||||
FlightCompression::default(),
|
||||
QueryContext::arc(),
|
||||
);
|
||||
|
||||
let mut raw_data = Vec::with_capacity(2);
|
||||
|
||||
@@ -42,6 +42,7 @@ use session::hints::READ_PREFERENCE_HINT;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use table::TableRef;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::sync::mpsc::error::TrySendError;
|
||||
|
||||
use crate::error::Error::UnsupportedAuthScheme;
|
||||
use crate::error::{
|
||||
@@ -176,8 +177,9 @@ impl GreptimeRequestHandler {
|
||||
let result = result
|
||||
.map(|x| DoPutResponse::new(request_id, x))
|
||||
.map_err(Into::into);
|
||||
if result_sender.try_send(result).is_err() {
|
||||
warn!(r#""DoPut" client maybe unreachable, abort handling its message"#);
|
||||
if let Err(e)= result_sender.try_send(result)
|
||||
&& let TrySendError::Closed(_) = e {
|
||||
warn!(r#""DoPut" client with request_id {} maybe unreachable, abort handling its message"#, request_id);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -121,7 +121,7 @@ impl PrometheusGatewayService {
|
||||
let result = self.handler.do_query(&query, ctx).await;
|
||||
let (metric_name, mut result_type) =
|
||||
match retrieve_metric_name_and_result_type(&query.query) {
|
||||
Ok((metric_name, result_type)) => (metric_name.unwrap_or_default(), result_type),
|
||||
Ok((metric_name, result_type)) => (metric_name, result_type),
|
||||
Err(err) => {
|
||||
return PrometheusJsonResponse::error(err.status_code(), err.output_msg())
|
||||
}
|
||||
|
||||
@@ -38,7 +38,7 @@ use crate::error::{self, InternalSnafu, PipelineSnafu, Result};
|
||||
use crate::http::extractor::PipelineInfo;
|
||||
use crate::http::header::{write_cost_header_map, GREPTIME_DB_HEADER_METRICS};
|
||||
use crate::http::PromValidationMode;
|
||||
use crate::prom_store::{snappy_decompress, zstd_decompress};
|
||||
use crate::prom_store::{extract_schema_from_read_request, snappy_decompress, zstd_decompress};
|
||||
use crate::proto::{PromSeriesProcessor, PromWriteRequest};
|
||||
use crate::query_handler::{PipelineHandlerRef, PromStoreProtocolHandlerRef, PromStoreResponse};
|
||||
|
||||
@@ -117,6 +117,7 @@ pub async fn remote_write(
|
||||
let is_zstd = content_encoding.contains(VM_ENCODING);
|
||||
|
||||
let mut processor = PromSeriesProcessor::default_processor();
|
||||
|
||||
if let Some(pipeline_name) = pipeline_info.pipeline_name {
|
||||
let pipeline_def = PipelineDefinition::from_name(
|
||||
&pipeline_name,
|
||||
@@ -184,13 +185,19 @@ pub async fn remote_read(
|
||||
) -> Result<PromStoreResponse> {
|
||||
let db = params.db.clone().unwrap_or_default();
|
||||
query_ctx.set_channel(Channel::Prometheus);
|
||||
|
||||
let request = decode_remote_read_request(body).await?;
|
||||
|
||||
// Extract schema from special labels and set it in query context
|
||||
if let Some(schema) = extract_schema_from_read_request(&request) {
|
||||
query_ctx.set_current_schema(&schema);
|
||||
}
|
||||
|
||||
let query_ctx = Arc::new(query_ctx);
|
||||
let _timer = crate::metrics::METRIC_HTTP_PROM_STORE_READ_ELAPSED
|
||||
.with_label_values(&[db.as_str()])
|
||||
.start_timer();
|
||||
|
||||
let request = decode_remote_read_request(body).await?;
|
||||
|
||||
state.prom_store_handler.read(request, query_ctx).await
|
||||
}
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
//! prom supply the prometheus HTTP API Server compliance
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::extract::{Path, Query, State};
|
||||
@@ -56,13 +56,13 @@ use crate::error::{
|
||||
TableNotFoundSnafu, UnexpectedResultSnafu,
|
||||
};
|
||||
use crate::http::header::collect_plan_metrics;
|
||||
use crate::prom_store::{FIELD_NAME_LABEL, METRIC_NAME_LABEL};
|
||||
use crate::prom_store::{DATABASE_LABEL, FIELD_NAME_LABEL, METRIC_NAME_LABEL, SCHEMA_LABEL};
|
||||
use crate::prometheus_handler::PrometheusHandlerRef;
|
||||
|
||||
/// For [ValueType::Vector] result type
|
||||
#[derive(Debug, Default, Serialize, Deserialize, PartialEq)]
|
||||
pub struct PromSeriesVector {
|
||||
pub metric: HashMap<String, String>,
|
||||
pub metric: BTreeMap<String, String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub value: Option<(f64, String)>,
|
||||
}
|
||||
@@ -70,7 +70,7 @@ pub struct PromSeriesVector {
|
||||
/// For [ValueType::Matrix] result type
|
||||
#[derive(Debug, Default, Serialize, Deserialize, PartialEq)]
|
||||
pub struct PromSeriesMatrix {
|
||||
pub metric: HashMap<String, String>,
|
||||
pub metric: BTreeMap<String, String>,
|
||||
pub values: Vec<(f64, String)>,
|
||||
}
|
||||
|
||||
@@ -318,7 +318,7 @@ async fn do_instant_query(
|
||||
) -> PrometheusJsonResponse {
|
||||
let result = handler.do_query(prom_query, query_ctx).await;
|
||||
let (metric_name, result_type) = match retrieve_metric_name_and_result_type(&prom_query.query) {
|
||||
Ok((metric_name, result_type)) => (metric_name.unwrap_or_default(), result_type),
|
||||
Ok((metric_name, result_type)) => (metric_name, result_type),
|
||||
Err(err) => return PrometheusJsonResponse::error(err.status_code(), err.output_msg()),
|
||||
};
|
||||
PrometheusJsonResponse::from_query_result(result, metric_name, result_type).await
|
||||
@@ -428,7 +428,7 @@ async fn do_range_query(
|
||||
let result = handler.do_query(prom_query, query_ctx).await;
|
||||
let metric_name = match retrieve_metric_name_and_result_type(&prom_query.query) {
|
||||
Err(err) => return PrometheusJsonResponse::error(err.status_code(), err.output_msg()),
|
||||
Ok((metric_name, _)) => metric_name.unwrap_or_default(),
|
||||
Ok((metric_name, _)) => metric_name,
|
||||
};
|
||||
PrometheusJsonResponse::from_query_result(result, metric_name, ValueType::Matrix).await
|
||||
}
|
||||
@@ -824,13 +824,52 @@ pub(crate) fn try_update_catalog_schema(ctx: &mut QueryContext, catalog: &str, s
|
||||
}
|
||||
|
||||
fn promql_expr_to_metric_name(expr: &PromqlExpr) -> Option<String> {
|
||||
find_metric_name_and_matchers(expr, |name, matchers| {
|
||||
name.clone().or(matchers
|
||||
.find_matchers(METRIC_NAME)
|
||||
.into_iter()
|
||||
.next()
|
||||
.map(|m| m.value))
|
||||
})
|
||||
let mut metric_names = HashSet::new();
|
||||
collect_metric_names(expr, &mut metric_names);
|
||||
|
||||
// Return the metric name only if there's exactly one unique metric name
|
||||
if metric_names.len() == 1 {
|
||||
metric_names.into_iter().next()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Recursively collect all metric names from a PromQL expression
|
||||
fn collect_metric_names(expr: &PromqlExpr, metric_names: &mut HashSet<String>) {
|
||||
match expr {
|
||||
PromqlExpr::Aggregate(AggregateExpr { expr, .. }) => {
|
||||
collect_metric_names(expr, metric_names)
|
||||
}
|
||||
PromqlExpr::Unary(UnaryExpr { expr }) => collect_metric_names(expr, metric_names),
|
||||
PromqlExpr::Binary(BinaryExpr { lhs, rhs, .. }) => {
|
||||
collect_metric_names(lhs, metric_names);
|
||||
collect_metric_names(rhs, metric_names);
|
||||
}
|
||||
PromqlExpr::Paren(ParenExpr { expr }) => collect_metric_names(expr, metric_names),
|
||||
PromqlExpr::Subquery(SubqueryExpr { expr, .. }) => collect_metric_names(expr, metric_names),
|
||||
PromqlExpr::VectorSelector(VectorSelector { name, matchers, .. }) => {
|
||||
if let Some(name) = name {
|
||||
metric_names.insert(name.clone());
|
||||
} else if let Some(matcher) = matchers.find_matchers(METRIC_NAME).into_iter().next() {
|
||||
metric_names.insert(matcher.value);
|
||||
}
|
||||
}
|
||||
PromqlExpr::MatrixSelector(MatrixSelector { vs, .. }) => {
|
||||
let VectorSelector { name, matchers, .. } = vs;
|
||||
if let Some(name) = name {
|
||||
metric_names.insert(name.clone());
|
||||
} else if let Some(matcher) = matchers.find_matchers(METRIC_NAME).into_iter().next() {
|
||||
metric_names.insert(matcher.value);
|
||||
}
|
||||
}
|
||||
PromqlExpr::Call(Call { args, .. }) => {
|
||||
args.args
|
||||
.iter()
|
||||
.for_each(|e| collect_metric_names(e, metric_names));
|
||||
}
|
||||
PromqlExpr::NumberLiteral(_) | PromqlExpr::StringLiteral(_) | PromqlExpr::Extension(_) => {}
|
||||
}
|
||||
}
|
||||
|
||||
fn find_metric_name_and_matchers<E, F>(expr: &PromqlExpr, f: F) -> Option<E>
|
||||
@@ -995,6 +1034,19 @@ pub async fn label_values_query(
|
||||
let mut field_columns = field_columns.into_iter().collect::<Vec<_>>();
|
||||
field_columns.sort_unstable();
|
||||
return PrometheusJsonResponse::success(PrometheusResponse::LabelValues(field_columns));
|
||||
} else if label_name == SCHEMA_LABEL || label_name == DATABASE_LABEL {
|
||||
let catalog_manager = handler.catalog_manager();
|
||||
|
||||
match retrieve_schema_names(&query_ctx, catalog_manager, params.matches.0).await {
|
||||
Ok(schema_names) => {
|
||||
return PrometheusJsonResponse::success(PrometheusResponse::LabelValues(
|
||||
schema_names,
|
||||
));
|
||||
}
|
||||
Err(e) => {
|
||||
return PrometheusJsonResponse::error(e.status_code(), e.output_msg());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let queries = params.matches.0;
|
||||
@@ -1112,53 +1164,51 @@ async fn retrieve_field_names(
|
||||
Ok(field_columns)
|
||||
}
|
||||
|
||||
/// Try to parse and extract the name of referenced metric from the promql query.
|
||||
///
|
||||
/// Returns the metric name if a single metric is referenced, otherwise None.
|
||||
fn retrieve_metric_name_from_promql(query: &str) -> Option<String> {
|
||||
let promql_expr = promql_parser::parser::parse(query).ok()?;
|
||||
async fn retrieve_schema_names(
|
||||
query_ctx: &QueryContext,
|
||||
catalog_manager: CatalogManagerRef,
|
||||
matches: Vec<String>,
|
||||
) -> Result<Vec<String>> {
|
||||
let mut schemas = Vec::new();
|
||||
let catalog = query_ctx.current_catalog();
|
||||
|
||||
struct MetricNameVisitor {
|
||||
metric_name: Option<String>,
|
||||
}
|
||||
let candidate_schemas = catalog_manager
|
||||
.schema_names(catalog, Some(query_ctx))
|
||||
.await
|
||||
.context(CatalogSnafu)?;
|
||||
|
||||
impl promql_parser::util::ExprVisitor for MetricNameVisitor {
|
||||
type Error = ();
|
||||
|
||||
fn pre_visit(&mut self, plan: &PromqlExpr) -> std::result::Result<bool, Self::Error> {
|
||||
let query_metric_name = match plan {
|
||||
PromqlExpr::VectorSelector(vs) => vs
|
||||
.matchers
|
||||
.find_matchers(METRIC_NAME)
|
||||
.into_iter()
|
||||
.next()
|
||||
.map(|m| m.value)
|
||||
.or_else(|| vs.name.clone()),
|
||||
PromqlExpr::MatrixSelector(ms) => ms
|
||||
.vs
|
||||
.matchers
|
||||
.find_matchers(METRIC_NAME)
|
||||
.into_iter()
|
||||
.next()
|
||||
.map(|m| m.value)
|
||||
.or_else(|| ms.vs.name.clone()),
|
||||
_ => return Ok(true),
|
||||
};
|
||||
|
||||
// set it to empty string if multiple metrics are referenced.
|
||||
if self.metric_name.is_some() && query_metric_name.is_some() {
|
||||
self.metric_name = Some(String::new());
|
||||
} else {
|
||||
self.metric_name = query_metric_name.or_else(|| self.metric_name.clone());
|
||||
for schema in candidate_schemas {
|
||||
let mut found = true;
|
||||
for match_item in &matches {
|
||||
if let Some(table_name) = retrieve_metric_name_from_promql(match_item) {
|
||||
let exists = catalog_manager
|
||||
.table_exists(catalog, &schema, &table_name, Some(query_ctx))
|
||||
.await
|
||||
.context(CatalogSnafu)?;
|
||||
if !exists {
|
||||
found = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(true)
|
||||
if found {
|
||||
schemas.push(schema);
|
||||
}
|
||||
}
|
||||
|
||||
let mut visitor = MetricNameVisitor { metric_name: None };
|
||||
promql_parser::util::walk_expr(&mut visitor, &promql_expr).ok()?;
|
||||
visitor.metric_name
|
||||
schemas.sort_unstable();
|
||||
|
||||
Ok(schemas)
|
||||
}
|
||||
|
||||
/// Try to parse and extract the name of referenced metric from the promql query.
|
||||
///
|
||||
/// Returns the metric name if exactly one unique metric is referenced, otherwise None.
|
||||
/// Multiple references to the same metric are allowed.
|
||||
fn retrieve_metric_name_from_promql(query: &str) -> Option<String> {
|
||||
let promql_expr = promql_parser::parser::parse(query).ok()?;
|
||||
promql_expr_to_metric_name(&promql_expr)
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Serialize, Deserialize)]
|
||||
@@ -1275,3 +1325,205 @@ pub async fn parse_query(
|
||||
PrometheusJsonResponse::error(StatusCode::InvalidArguments, "query is required")
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use promql_parser::parser::value::ValueType;
|
||||
|
||||
use super::*;
|
||||
|
||||
struct TestCase {
|
||||
name: &'static str,
|
||||
promql: &'static str,
|
||||
expected_metric: Option<&'static str>,
|
||||
expected_type: ValueType,
|
||||
should_error: bool,
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_retrieve_metric_name_and_result_type() {
|
||||
let test_cases = &[
|
||||
// Single metric cases
|
||||
TestCase {
|
||||
name: "simple metric",
|
||||
promql: "cpu_usage",
|
||||
expected_metric: Some("cpu_usage"),
|
||||
expected_type: ValueType::Vector,
|
||||
should_error: false,
|
||||
},
|
||||
TestCase {
|
||||
name: "metric with selector",
|
||||
promql: r#"cpu_usage{instance="localhost"}"#,
|
||||
expected_metric: Some("cpu_usage"),
|
||||
expected_type: ValueType::Vector,
|
||||
should_error: false,
|
||||
},
|
||||
TestCase {
|
||||
name: "metric with range selector",
|
||||
promql: "cpu_usage[5m]",
|
||||
expected_metric: Some("cpu_usage"),
|
||||
expected_type: ValueType::Matrix,
|
||||
should_error: false,
|
||||
},
|
||||
TestCase {
|
||||
name: "metric with __name__ matcher",
|
||||
promql: r#"{__name__="cpu_usage"}"#,
|
||||
expected_metric: Some("cpu_usage"),
|
||||
expected_type: ValueType::Vector,
|
||||
should_error: false,
|
||||
},
|
||||
TestCase {
|
||||
name: "metric with unary operator",
|
||||
promql: "-cpu_usage",
|
||||
expected_metric: Some("cpu_usage"),
|
||||
expected_type: ValueType::Vector,
|
||||
should_error: false,
|
||||
},
|
||||
// Aggregation and function cases
|
||||
TestCase {
|
||||
name: "metric with aggregation",
|
||||
promql: "sum(cpu_usage)",
|
||||
expected_metric: Some("cpu_usage"),
|
||||
expected_type: ValueType::Vector,
|
||||
should_error: false,
|
||||
},
|
||||
TestCase {
|
||||
name: "complex aggregation",
|
||||
promql: r#"sum by (instance) (cpu_usage{job="node"})"#,
|
||||
expected_metric: Some("cpu_usage"),
|
||||
expected_type: ValueType::Vector,
|
||||
should_error: false,
|
||||
},
|
||||
// Same metric binary operations
|
||||
TestCase {
|
||||
name: "same metric addition",
|
||||
promql: "cpu_usage + cpu_usage",
|
||||
expected_metric: Some("cpu_usage"),
|
||||
expected_type: ValueType::Vector,
|
||||
should_error: false,
|
||||
},
|
||||
TestCase {
|
||||
name: "metric with scalar addition",
|
||||
promql: r#"sum(rate(cpu_usage{job="node"}[5m])) by (instance) + 100"#,
|
||||
expected_metric: Some("cpu_usage"),
|
||||
expected_type: ValueType::Vector,
|
||||
should_error: false,
|
||||
},
|
||||
// Multiple metrics cases
|
||||
TestCase {
|
||||
name: "different metrics addition",
|
||||
promql: "cpu_usage + memory_usage",
|
||||
expected_metric: None,
|
||||
expected_type: ValueType::Vector,
|
||||
should_error: false,
|
||||
},
|
||||
TestCase {
|
||||
name: "different metrics subtraction",
|
||||
promql: "network_in - network_out",
|
||||
expected_metric: None,
|
||||
expected_type: ValueType::Vector,
|
||||
should_error: false,
|
||||
},
|
||||
// Unless operator cases
|
||||
TestCase {
|
||||
name: "unless with different metrics",
|
||||
promql: "cpu_usage unless memory_usage",
|
||||
expected_metric: None,
|
||||
expected_type: ValueType::Vector,
|
||||
should_error: false,
|
||||
},
|
||||
TestCase {
|
||||
name: "unless with same metric",
|
||||
promql: "cpu_usage unless cpu_usage",
|
||||
expected_metric: Some("cpu_usage"),
|
||||
expected_type: ValueType::Vector,
|
||||
should_error: false,
|
||||
},
|
||||
// Subquery cases
|
||||
TestCase {
|
||||
name: "basic subquery",
|
||||
promql: "cpu_usage[5m:1m]",
|
||||
expected_metric: Some("cpu_usage"),
|
||||
expected_type: ValueType::Matrix,
|
||||
should_error: false,
|
||||
},
|
||||
TestCase {
|
||||
name: "subquery with multiple metrics",
|
||||
promql: "(cpu_usage + memory_usage)[5m:1m]",
|
||||
expected_metric: None,
|
||||
expected_type: ValueType::Matrix,
|
||||
should_error: false,
|
||||
},
|
||||
// Literal values
|
||||
TestCase {
|
||||
name: "scalar value",
|
||||
promql: "42",
|
||||
expected_metric: None,
|
||||
expected_type: ValueType::Scalar,
|
||||
should_error: false,
|
||||
},
|
||||
TestCase {
|
||||
name: "string literal",
|
||||
promql: r#""hello world""#,
|
||||
expected_metric: None,
|
||||
expected_type: ValueType::String,
|
||||
should_error: false,
|
||||
},
|
||||
// Error cases
|
||||
TestCase {
|
||||
name: "invalid syntax",
|
||||
promql: "cpu_usage{invalid=",
|
||||
expected_metric: None,
|
||||
expected_type: ValueType::Vector,
|
||||
should_error: true,
|
||||
},
|
||||
TestCase {
|
||||
name: "empty query",
|
||||
promql: "",
|
||||
expected_metric: None,
|
||||
expected_type: ValueType::Vector,
|
||||
should_error: true,
|
||||
},
|
||||
TestCase {
|
||||
name: "malformed brackets",
|
||||
promql: "cpu_usage[5m",
|
||||
expected_metric: None,
|
||||
expected_type: ValueType::Vector,
|
||||
should_error: true,
|
||||
},
|
||||
];
|
||||
|
||||
for test_case in test_cases {
|
||||
let result = retrieve_metric_name_and_result_type(test_case.promql);
|
||||
|
||||
if test_case.should_error {
|
||||
assert!(
|
||||
result.is_err(),
|
||||
"Test '{}' should have failed but succeeded with: {:?}",
|
||||
test_case.name,
|
||||
result
|
||||
);
|
||||
} else {
|
||||
let (metric_name, value_type) = result.unwrap_or_else(|e| {
|
||||
panic!(
|
||||
"Test '{}' should have succeeded but failed with error: {}",
|
||||
test_case.name, e
|
||||
)
|
||||
});
|
||||
|
||||
let expected_metric_name = test_case.expected_metric.map(|s| s.to_string());
|
||||
assert_eq!(
|
||||
metric_name, expected_metric_name,
|
||||
"Test '{}': metric name mismatch. Expected: {:?}, Got: {:?}",
|
||||
test_case.name, expected_metric_name, metric_name
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
value_type, test_case.expected_type,
|
||||
"Test '{}': value type mismatch. Expected: {:?}, Got: {:?}",
|
||||
test_case.name, test_case.expected_type, value_type
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,7 +13,8 @@
|
||||
// limitations under the License.
|
||||
|
||||
//! prom supply the prometheus HTTP API Server compliance
|
||||
use std::collections::HashMap;
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
|
||||
use axum::http::HeaderValue;
|
||||
use axum::response::{IntoResponse, Response};
|
||||
@@ -117,7 +118,7 @@ impl PrometheusJsonResponse {
|
||||
/// Convert from `Result<Output>`
|
||||
pub async fn from_query_result(
|
||||
result: Result<Output>,
|
||||
metric_name: String,
|
||||
metric_name: Option<String>,
|
||||
result_type: ValueType,
|
||||
) -> Self {
|
||||
let response: Result<Self> = try {
|
||||
@@ -181,7 +182,7 @@ impl PrometheusJsonResponse {
|
||||
/// Convert [RecordBatches] to [PromData]
|
||||
fn record_batches_to_data(
|
||||
batches: RecordBatches,
|
||||
metric_name: String,
|
||||
metric_name: Option<String>,
|
||||
result_type: ValueType,
|
||||
) -> Result<PrometheusResponse> {
|
||||
// infer semantic type of each column from schema.
|
||||
@@ -229,7 +230,6 @@ impl PrometheusJsonResponse {
|
||||
reason: "no value column found".to_string(),
|
||||
})?;
|
||||
|
||||
let metric_name = (METRIC_NAME, metric_name.as_str());
|
||||
// Preserves the order of output tags.
|
||||
// Tag order matters, e.g., after sorc and sort_desc, the output order must be kept.
|
||||
let mut buffer = IndexMap::<Vec<(&str, &str)>, Vec<(f64, String)>>::new();
|
||||
@@ -275,9 +275,10 @@ impl PrometheusJsonResponse {
|
||||
}
|
||||
|
||||
// retrieve tags
|
||||
// TODO(ruihang): push table name `__metric__`
|
||||
let mut tags = Vec::with_capacity(num_label_columns + 1);
|
||||
tags.push(metric_name);
|
||||
if let Some(metric_name) = &metric_name {
|
||||
tags.push((METRIC_NAME, metric_name.as_str()));
|
||||
}
|
||||
for (tag_column, tag_name) in tag_columns.iter().zip(tag_names.iter()) {
|
||||
// TODO(ruihang): add test for NULL tag
|
||||
if let Some(tag_value) = tag_column.get_data(row_index) {
|
||||
@@ -311,7 +312,7 @@ impl PrometheusJsonResponse {
|
||||
let metric = tags
|
||||
.into_iter()
|
||||
.map(|(k, v)| (k.to_string(), v.to_string()))
|
||||
.collect::<HashMap<_, _>>();
|
||||
.collect::<BTreeMap<_, _>>();
|
||||
match result {
|
||||
PromQueryResult::Vector(ref mut v) => {
|
||||
v.push(PromSeriesVector {
|
||||
@@ -320,6 +321,11 @@ impl PrometheusJsonResponse {
|
||||
});
|
||||
}
|
||||
PromQueryResult::Matrix(ref mut v) => {
|
||||
// sort values by timestamp
|
||||
if !values.is_sorted_by(|a, b| a.0 <= b.0) {
|
||||
values.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(Ordering::Equal));
|
||||
}
|
||||
|
||||
v.push(PromSeriesMatrix { metric, values });
|
||||
}
|
||||
PromQueryResult::Scalar(ref mut v) => {
|
||||
@@ -331,6 +337,12 @@ impl PrometheusJsonResponse {
|
||||
}
|
||||
});
|
||||
|
||||
// sort matrix by metric
|
||||
// see: https://prometheus.io/docs/prometheus/3.5/querying/api/#range-vectors
|
||||
if let PromQueryResult::Matrix(ref mut v) = result {
|
||||
v.sort_by(|a, b| a.metric.cmp(&b.metric));
|
||||
}
|
||||
|
||||
let result_type_string = result_type.to_string();
|
||||
let data = PrometheusResponse::PromData(PromData {
|
||||
result_type: result_type_string,
|
||||
|
||||
@@ -170,7 +170,7 @@ fn select_variable(query: &str, query_context: QueryContextRef) -> Option<Output
|
||||
|
||||
// skip the first "select"
|
||||
for var in vars.iter().skip(1) {
|
||||
let var = var.trim_matches(|c| c == ' ' || c == ',');
|
||||
let var = var.trim_matches(|c| c == ' ' || c == ',' || c == ';');
|
||||
let var_as: Vec<&str> = var
|
||||
.split(" as ")
|
||||
.map(|x| {
|
||||
@@ -185,6 +185,9 @@ fn select_variable(query: &str, query_context: QueryContextRef) -> Option<Output
|
||||
let value = match var_as[0] {
|
||||
"session.time_zone" | "time_zone" => query_context.timezone().to_string(),
|
||||
"system_time_zone" => system_timezone_name(),
|
||||
"max_execution_time" | "session.max_execution_time" => {
|
||||
query_context.query_timeout_as_millis().to_string()
|
||||
}
|
||||
_ => VAR_VALUES
|
||||
.get(var_as[0])
|
||||
.map(|v| v.to_string())
|
||||
@@ -352,11 +355,11 @@ mod test {
|
||||
// complex variables
|
||||
let query = "/* mysql-connector-java-8.0.17 (Revision: 16a712ddb3f826a1933ab42b0039f7fb9eebc6ec) */SELECT @@session.auto_increment_increment AS auto_increment_increment, @@character_set_client AS character_set_client, @@character_set_connection AS character_set_connection, @@character_set_results AS character_set_results, @@character_set_server AS character_set_server, @@collation_server AS collation_server, @@collation_connection AS collation_connection, @@init_connect AS init_connect, @@interactive_timeout AS interactive_timeout, @@license AS license, @@lower_case_table_names AS lower_case_table_names, @@max_allowed_packet AS max_allowed_packet, @@net_write_timeout AS net_write_timeout, @@performance_schema AS performance_schema, @@sql_mode AS sql_mode, @@system_time_zone AS system_time_zone, @@time_zone AS time_zone, @@transaction_isolation AS transaction_isolation, @@wait_timeout AS wait_timeout;";
|
||||
let expected = "\
|
||||
+--------------------------+----------------------+--------------------------+-----------------------+----------------------+------------------+----------------------+--------------+---------------------+---------+------------------------+--------------------+-------------------+--------------------+----------+------------------+---------------+-----------------------+---------------+
|
||||
| auto_increment_increment | character_set_client | character_set_connection | character_set_results | character_set_server | collation_server | collation_connection | init_connect | interactive_timeout | license | lower_case_table_names | max_allowed_packet | net_write_timeout | performance_schema | sql_mode | system_time_zone | time_zone | transaction_isolation | wait_timeout; |
|
||||
+--------------------------+----------------------+--------------------------+-----------------------+----------------------+------------------+----------------------+--------------+---------------------+---------+------------------------+--------------------+-------------------+--------------------+----------+------------------+---------------+-----------------------+---------------+
|
||||
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 31536000 | 0 | 0 | 134217728 | 31536000 | 0 | 0 | Asia/Shanghai | Asia/Shanghai | REPEATABLE-READ | 31536000 |
|
||||
+--------------------------+----------------------+--------------------------+-----------------------+----------------------+------------------+----------------------+--------------+---------------------+---------+------------------------+--------------------+-------------------+--------------------+----------+------------------+---------------+-----------------------+---------------+";
|
||||
+--------------------------+----------------------+--------------------------+-----------------------+----------------------+------------------+----------------------+--------------+---------------------+---------+------------------------+--------------------+-------------------+--------------------+----------+------------------+---------------+-----------------------+--------------+
|
||||
| auto_increment_increment | character_set_client | character_set_connection | character_set_results | character_set_server | collation_server | collation_connection | init_connect | interactive_timeout | license | lower_case_table_names | max_allowed_packet | net_write_timeout | performance_schema | sql_mode | system_time_zone | time_zone | transaction_isolation | wait_timeout |
|
||||
+--------------------------+----------------------+--------------------------+-----------------------+----------------------+------------------+----------------------+--------------+---------------------+---------+------------------------+--------------------+-------------------+--------------------+----------+------------------+---------------+-----------------------+--------------+
|
||||
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 31536000 | 0 | 0 | 134217728 | 31536000 | 0 | 0 | Asia/Shanghai | Asia/Shanghai | REPEATABLE-READ | 31536000 |
|
||||
+--------------------------+----------------------+--------------------------+-----------------------+----------------------+------------------+----------------------+--------------+---------------------+---------+------------------------+--------------------+-------------------+--------------------+----------+------------------+---------------+-----------------------+--------------+";
|
||||
test(query, expected);
|
||||
|
||||
let query = "show variables";
|
||||
|
||||
@@ -167,6 +167,9 @@ async fn run_custom_pipeline(
|
||||
PipelineExecOutput::DispatchedTo(dispatched_to, val) => {
|
||||
push_to_map!(dispatched, dispatched_to, val, arr_len);
|
||||
}
|
||||
PipelineExecOutput::Filtered => {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -49,7 +49,7 @@ pub(crate) struct GreptimeDBStartupParameters {
|
||||
impl GreptimeDBStartupParameters {
|
||||
fn new() -> GreptimeDBStartupParameters {
|
||||
GreptimeDBStartupParameters {
|
||||
version: format!("16.3-greptimedb-{}", env!("CARGO_PKG_VERSION")),
|
||||
version: format!("16.3-greptimedb-{}", common_version::version()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@ use std::collections::BTreeMap;
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
||||
use api::prom_store::remote::label_matcher::Type as MatcherType;
|
||||
use api::prom_store::remote::{Label, Query, Sample, TimeSeries, WriteRequest};
|
||||
use api::prom_store::remote::{Label, Query, ReadRequest, Sample, TimeSeries, WriteRequest};
|
||||
use api::v1::RowInsertRequests;
|
||||
use common_grpc::precision::Precision;
|
||||
use common_query::prelude::{GREPTIME_TIMESTAMP, GREPTIME_VALUE};
|
||||
@@ -44,6 +44,9 @@ pub const METRIC_NAME_LABEL_BYTES: &[u8] = b"__name__";
|
||||
pub const DATABASE_LABEL: &str = "__database__";
|
||||
pub const DATABASE_LABEL_BYTES: &[u8] = b"__database__";
|
||||
|
||||
pub const SCHEMA_LABEL: &str = "__schema__";
|
||||
pub const SCHEMA_LABEL_BYTES: &[u8] = b"__schema__";
|
||||
|
||||
pub const PHYSICAL_TABLE_LABEL: &str = "__physical_table__";
|
||||
pub const PHYSICAL_TABLE_LABEL_BYTES: &[u8] = b"__physical_table__";
|
||||
|
||||
@@ -73,6 +76,29 @@ pub fn table_name(q: &Query) -> Result<String> {
|
||||
})
|
||||
}
|
||||
|
||||
/// Extract schema from remote read request. Returns the first schema found from any query's matchers.
|
||||
/// Prioritizes __schema__ over __database__ labels.
|
||||
pub fn extract_schema_from_read_request(request: &ReadRequest) -> Option<String> {
|
||||
for query in &request.queries {
|
||||
for matcher in &query.matchers {
|
||||
if matcher.name == SCHEMA_LABEL && matcher.r#type == MatcherType::Eq as i32 {
|
||||
return Some(matcher.value.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If no __schema__ found, look for __database__
|
||||
for query in &request.queries {
|
||||
for matcher in &query.matchers {
|
||||
if matcher.name == DATABASE_LABEL && matcher.r#type == MatcherType::Eq as i32 {
|
||||
return Some(matcher.value.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Create a DataFrame from a remote Query
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub fn query_to_plan(dataframe: DataFrame, q: &Query) -> Result<LogicalPlan> {
|
||||
@@ -91,7 +117,7 @@ pub fn query_to_plan(dataframe: DataFrame, q: &Query) -> Result<LogicalPlan> {
|
||||
for m in label_matches {
|
||||
let name = &m.name;
|
||||
|
||||
if name == METRIC_NAME_LABEL {
|
||||
if name == METRIC_NAME_LABEL || name == SCHEMA_LABEL || name == DATABASE_LABEL {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ use crate::http::PromValidationMode;
|
||||
use crate::pipeline::run_pipeline;
|
||||
use crate::prom_row_builder::{PromCtx, TablesBuilder};
|
||||
use crate::prom_store::{
|
||||
DATABASE_LABEL_BYTES, METRIC_NAME_LABEL_BYTES, PHYSICAL_TABLE_LABEL_BYTES,
|
||||
DATABASE_LABEL_BYTES, METRIC_NAME_LABEL_BYTES, PHYSICAL_TABLE_LABEL_BYTES, SCHEMA_LABEL_BYTES,
|
||||
};
|
||||
use crate::query_handler::PipelineHandlerRef;
|
||||
use crate::repeated_field::{Clear, RepeatedField};
|
||||
@@ -199,10 +199,17 @@ impl PromTimeSeries {
|
||||
self.table_name = decode_string(&label.value, prom_validation_mode)?;
|
||||
self.labels.truncate(self.labels.len() - 1); // remove last label
|
||||
}
|
||||
DATABASE_LABEL_BYTES => {
|
||||
SCHEMA_LABEL_BYTES => {
|
||||
self.schema = Some(decode_string(&label.value, prom_validation_mode)?);
|
||||
self.labels.truncate(self.labels.len() - 1); // remove last label
|
||||
}
|
||||
DATABASE_LABEL_BYTES => {
|
||||
// Only set schema from __database__ if __schema__ hasn't been set yet
|
||||
if self.schema.is_none() {
|
||||
self.schema = Some(decode_string(&label.value, prom_validation_mode)?);
|
||||
}
|
||||
self.labels.truncate(self.labels.len() - 1); // remove last label
|
||||
}
|
||||
PHYSICAL_TABLE_LABEL_BYTES => {
|
||||
self.physical_table =
|
||||
Some(decode_string(&label.value, prom_validation_mode)?);
|
||||
@@ -412,6 +419,10 @@ impl PromSeriesProcessor {
|
||||
let one_sample = series.samples.len() == 1;
|
||||
|
||||
for s in series.samples.iter() {
|
||||
// skip NaN value
|
||||
if s.value.is_nan() {
|
||||
continue;
|
||||
}
|
||||
let timestamp = s.timestamp;
|
||||
pipeline_map.insert(GREPTIME_TIMESTAMP.to_string(), Value::Int64(timestamp));
|
||||
pipeline_map.insert(GREPTIME_VALUE.to_string(), Value::Float64(s.value));
|
||||
|
||||
@@ -95,6 +95,18 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Not allowed to remove partition column {} from table {}",
|
||||
column_name,
|
||||
table_name
|
||||
))]
|
||||
RemovePartitionColumn {
|
||||
column_name: String,
|
||||
table_name: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Failed to build column descriptor for table: {}, column: {}",
|
||||
table_name,
|
||||
@@ -193,6 +205,7 @@ impl ErrorExt for Error {
|
||||
StatusCode::EngineExecuteQuery
|
||||
}
|
||||
Error::RemoveColumnInIndex { .. }
|
||||
| Error::RemovePartitionColumn { .. }
|
||||
| Error::BuildColumnDescriptor { .. }
|
||||
| Error::InvalidAlterRequest { .. } => StatusCode::InvalidArguments,
|
||||
Error::CastDefaultValue { source, .. } => source.status_code(),
|
||||
|
||||
@@ -645,10 +645,19 @@ impl TableMeta {
|
||||
msg: format!("Table {table_name} cannot add new columns {column_names:?}"),
|
||||
})?;
|
||||
|
||||
let partition_key_indices = self
|
||||
.partition_key_indices
|
||||
.iter()
|
||||
.map(|idx| table_schema.column_name_by_index(*idx))
|
||||
// This unwrap is safe since we only add new columns.
|
||||
.map(|name| new_schema.column_index_by_name(name).unwrap())
|
||||
.collect();
|
||||
|
||||
// value_indices would be generated automatically.
|
||||
let _ = meta_builder
|
||||
.schema(Arc::new(new_schema))
|
||||
.primary_key_indices(primary_key_indices);
|
||||
.primary_key_indices(primary_key_indices)
|
||||
.partition_key_indices(partition_key_indices);
|
||||
|
||||
Ok(meta_builder)
|
||||
}
|
||||
@@ -676,6 +685,14 @@ impl TableMeta {
|
||||
}
|
||||
);
|
||||
|
||||
ensure!(
|
||||
!self.partition_key_indices.contains(&index),
|
||||
error::RemovePartitionColumnSnafu {
|
||||
column_name: *column_name,
|
||||
table_name,
|
||||
}
|
||||
);
|
||||
|
||||
if let Some(ts_index) = timestamp_index {
|
||||
// Not allowed to remove column in timestamp index.
|
||||
ensure!(
|
||||
@@ -725,9 +742,18 @@ impl TableMeta {
|
||||
.map(|name| new_schema.column_index_by_name(name).unwrap())
|
||||
.collect();
|
||||
|
||||
let partition_key_indices = self
|
||||
.partition_key_indices
|
||||
.iter()
|
||||
.map(|idx| table_schema.column_name_by_index(*idx))
|
||||
// This unwrap is safe since we don't allow removing a partition key column.
|
||||
.map(|name| new_schema.column_index_by_name(name).unwrap())
|
||||
.collect();
|
||||
|
||||
let _ = meta_builder
|
||||
.schema(Arc::new(new_schema))
|
||||
.primary_key_indices(primary_key_indices);
|
||||
.primary_key_indices(primary_key_indices)
|
||||
.partition_key_indices(partition_key_indices);
|
||||
|
||||
Ok(meta_builder)
|
||||
}
|
||||
@@ -1300,6 +1326,8 @@ fn unset_column_skipping_index_options(
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::assert_matches::assert_matches;
|
||||
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
@@ -1308,6 +1336,7 @@ mod tests {
|
||||
};
|
||||
|
||||
use super::*;
|
||||
use crate::Error;
|
||||
|
||||
/// Create a test schema with 3 columns: `[col1 int32, ts timestampmills, col2 int32]`.
|
||||
fn new_test_schema() -> Schema {
|
||||
@@ -1385,6 +1414,11 @@ mod tests {
|
||||
ConcreteDataType::string_datatype(),
|
||||
true,
|
||||
);
|
||||
let yet_another_field = ColumnSchema::new(
|
||||
"yet_another_field_after_ts",
|
||||
ConcreteDataType::int64_datatype(),
|
||||
true,
|
||||
);
|
||||
let alter_kind = AlterKind::AddColumns {
|
||||
columns: vec![
|
||||
AddColumnRequest {
|
||||
@@ -1401,6 +1435,14 @@ mod tests {
|
||||
}),
|
||||
add_if_not_exists: false,
|
||||
},
|
||||
AddColumnRequest {
|
||||
column_schema: yet_another_field,
|
||||
is_key: true,
|
||||
location: Some(AddColumnLocation::After {
|
||||
column_name: "ts".to_string(),
|
||||
}),
|
||||
add_if_not_exists: false,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
@@ -1756,6 +1798,29 @@ mod tests {
|
||||
assert_eq!(StatusCode::InvalidArguments, err.status_code());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_remove_partition_column() {
|
||||
let schema = Arc::new(new_test_schema());
|
||||
let meta = TableMetaBuilder::empty()
|
||||
.schema(schema)
|
||||
.primary_key_indices(vec![])
|
||||
.partition_key_indices(vec![0])
|
||||
.engine("engine")
|
||||
.next_column_id(3)
|
||||
.build()
|
||||
.unwrap();
|
||||
// Remove column in primary key.
|
||||
let alter_kind = AlterKind::DropColumns {
|
||||
names: vec![String::from("col1")],
|
||||
};
|
||||
|
||||
let err = meta
|
||||
.builder_with_alter_kind("my_table", &alter_kind)
|
||||
.err()
|
||||
.unwrap();
|
||||
assert_matches!(err, Error::RemovePartitionColumn { .. });
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_change_key_column_data_type() {
|
||||
let schema = Arc::new(new_test_schema());
|
||||
@@ -1821,6 +1886,8 @@ mod tests {
|
||||
let meta = TableMetaBuilder::empty()
|
||||
.schema(schema)
|
||||
.primary_key_indices(vec![0])
|
||||
// partition col: col1, col2
|
||||
.partition_key_indices(vec![0, 2])
|
||||
.engine("engine")
|
||||
.next_column_id(3)
|
||||
.build()
|
||||
@@ -1836,11 +1903,19 @@ mod tests {
|
||||
.map(|column_schema| column_schema.name.clone())
|
||||
.collect();
|
||||
assert_eq!(
|
||||
&["my_tag_first", "col1", "ts", "my_field_after_ts", "col2"],
|
||||
&[
|
||||
"my_tag_first", // primary key column
|
||||
"col1", // partition column
|
||||
"ts", // timestamp column
|
||||
"yet_another_field_after_ts", // primary key column
|
||||
"my_field_after_ts", // value column
|
||||
"col2", // partition column
|
||||
],
|
||||
&names[..]
|
||||
);
|
||||
assert_eq!(&[0, 1], &new_meta.primary_key_indices[..]);
|
||||
assert_eq!(&[2, 3, 4], &new_meta.value_indices[..]);
|
||||
assert_eq!(&[0, 1, 3], &new_meta.primary_key_indices[..]);
|
||||
assert_eq!(&[2, 4, 5], &new_meta.value_indices[..]);
|
||||
assert_eq!(&[1, 5], &new_meta.partition_key_indices[..]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -882,11 +882,14 @@ CREATE TABLE {table_name} (
|
||||
let region_id = RegionId::new(table_id, *region);
|
||||
|
||||
let stream = region_server
|
||||
.handle_remote_read(RegionQueryRequest {
|
||||
region_id: region_id.as_u64(),
|
||||
plan: plan.to_vec(),
|
||||
..Default::default()
|
||||
})
|
||||
.handle_remote_read(
|
||||
RegionQueryRequest {
|
||||
region_id: region_id.as_u64(),
|
||||
plan: plan.to_vec(),
|
||||
..Default::default()
|
||||
},
|
||||
QueryContext::arc(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -249,11 +249,14 @@ mod tests {
|
||||
let region_id = RegionId::new(table_id, *region);
|
||||
|
||||
let stream = region_server
|
||||
.handle_remote_read(QueryRequest {
|
||||
region_id: region_id.as_u64(),
|
||||
plan: plan.to_vec(),
|
||||
..Default::default()
|
||||
})
|
||||
.handle_remote_read(
|
||||
QueryRequest {
|
||||
region_id: region_id.as_u64(),
|
||||
plan: plan.to_vec(),
|
||||
..Default::default()
|
||||
},
|
||||
QueryContext::arc(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -16,7 +16,10 @@ use std::collections::BTreeMap;
|
||||
use std::io::Write;
|
||||
use std::str::FromStr;
|
||||
|
||||
use api::prom_store::remote::WriteRequest;
|
||||
use api::prom_store::remote::label_matcher::Type as MatcherType;
|
||||
use api::prom_store::remote::{
|
||||
Label, LabelMatcher, Query, ReadRequest, ReadResponse, Sample, TimeSeries, WriteRequest,
|
||||
};
|
||||
use auth::user_provider_from_option;
|
||||
use axum::http::{HeaderName, HeaderValue, StatusCode};
|
||||
use chrono::Utc;
|
||||
@@ -94,6 +97,7 @@ macro_rules! http_tests {
|
||||
test_dashboard_path,
|
||||
test_prometheus_remote_write,
|
||||
test_prometheus_remote_special_labels,
|
||||
test_prometheus_remote_schema_labels,
|
||||
test_prometheus_remote_write_with_pipeline,
|
||||
test_vm_proto_remote_write,
|
||||
|
||||
@@ -112,6 +116,7 @@ macro_rules! http_tests {
|
||||
test_pipeline_with_hint_vrl,
|
||||
test_pipeline_2,
|
||||
test_pipeline_skip_error,
|
||||
test_pipeline_filter,
|
||||
|
||||
test_otlp_metrics,
|
||||
test_otlp_traces_v0,
|
||||
@@ -780,6 +785,89 @@ pub async fn test_prom_http_api(store_type: StorageType) {
|
||||
serde_json::from_value::<PrometheusResponse>(json!(["host1", "host2"])).unwrap()
|
||||
);
|
||||
|
||||
// special labels
|
||||
let res = client
|
||||
.get("/v1/prometheus/api/v1/label/__schema__/values?start=0&end=600")
|
||||
.send()
|
||||
.await;
|
||||
assert_eq!(res.status(), StatusCode::OK);
|
||||
let body = serde_json::from_str::<PrometheusJsonResponse>(&res.text().await).unwrap();
|
||||
assert_eq!(body.status, "success");
|
||||
assert_eq!(
|
||||
body.data,
|
||||
serde_json::from_value::<PrometheusResponse>(json!([
|
||||
"greptime_private",
|
||||
"information_schema",
|
||||
"public"
|
||||
]))
|
||||
.unwrap()
|
||||
);
|
||||
|
||||
// special labels
|
||||
let res = client
|
||||
.get("/v1/prometheus/api/v1/label/__schema__/values?match[]=demo&start=0&end=600")
|
||||
.send()
|
||||
.await;
|
||||
assert_eq!(res.status(), StatusCode::OK);
|
||||
let body = serde_json::from_str::<PrometheusJsonResponse>(&res.text().await).unwrap();
|
||||
assert_eq!(body.status, "success");
|
||||
assert_eq!(
|
||||
body.data,
|
||||
serde_json::from_value::<PrometheusResponse>(json!(["public"])).unwrap()
|
||||
);
|
||||
|
||||
// special labels
|
||||
let res = client
|
||||
.get("/v1/prometheus/api/v1/label/__database__/values?match[]=demo&start=0&end=600")
|
||||
.send()
|
||||
.await;
|
||||
assert_eq!(res.status(), StatusCode::OK);
|
||||
let body = serde_json::from_str::<PrometheusJsonResponse>(&res.text().await).unwrap();
|
||||
assert_eq!(body.status, "success");
|
||||
assert_eq!(
|
||||
body.data,
|
||||
serde_json::from_value::<PrometheusResponse>(json!(["public"])).unwrap()
|
||||
);
|
||||
|
||||
// special labels
|
||||
let res = client
|
||||
.get("/v1/prometheus/api/v1/label/__database__/values?match[]=multi_labels{idc=\"idc1\", env=\"dev\"}&start=0&end=600")
|
||||
.send()
|
||||
.await;
|
||||
assert_eq!(res.status(), StatusCode::OK);
|
||||
let body = serde_json::from_str::<PrometheusJsonResponse>(&res.text().await).unwrap();
|
||||
assert_eq!(body.status, "success");
|
||||
assert_eq!(
|
||||
body.data,
|
||||
serde_json::from_value::<PrometheusResponse>(json!(["public"])).unwrap()
|
||||
);
|
||||
|
||||
// match special labels.
|
||||
let res = client
|
||||
.get("/v1/prometheus/api/v1/label/host/values?match[]=multi_labels{__schema__=\"public\", idc=\"idc1\", env=\"dev\"}&start=0&end=600")
|
||||
.send()
|
||||
.await;
|
||||
assert_eq!(res.status(), StatusCode::OK);
|
||||
let body = serde_json::from_str::<PrometheusJsonResponse>(&res.text().await).unwrap();
|
||||
assert_eq!(body.status, "success");
|
||||
assert_eq!(
|
||||
body.data,
|
||||
serde_json::from_value::<PrometheusResponse>(json!(["host1", "host2"])).unwrap()
|
||||
);
|
||||
|
||||
// match special labels.
|
||||
let res = client
|
||||
.get("/v1/prometheus/api/v1/label/host/values?match[]=multi_labels{__schema__=\"information_schema\", idc=\"idc1\", env=\"dev\"}&start=0&end=600")
|
||||
.send()
|
||||
.await;
|
||||
assert_eq!(res.status(), StatusCode::OK);
|
||||
let body = serde_json::from_str::<PrometheusJsonResponse>(&res.text().await).unwrap();
|
||||
assert_eq!(body.status, "success");
|
||||
assert_eq!(
|
||||
body.data,
|
||||
serde_json::from_value::<PrometheusResponse>(json!([])).unwrap()
|
||||
);
|
||||
|
||||
// search field name
|
||||
let res = client
|
||||
.get("/v1/prometheus/api/v1/label/__field__/values?match[]=demo")
|
||||
@@ -1137,6 +1225,7 @@ write_cache_path = ""
|
||||
write_cache_size = "5GiB"
|
||||
sst_write_buffer_size = "8MiB"
|
||||
parallel_scan_channel_size = 32
|
||||
max_concurrent_scan_files = 128
|
||||
allow_stale_entries = false
|
||||
min_compaction_interval = "0s"
|
||||
|
||||
@@ -1464,6 +1553,188 @@ pub async fn test_prometheus_remote_write_with_pipeline(store_type: StorageType)
|
||||
guard.remove_all().await;
|
||||
}
|
||||
|
||||
pub async fn test_prometheus_remote_schema_labels(store_type: StorageType) {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let (app, mut guard) =
|
||||
setup_test_prom_app_with_frontend(store_type, "test_prometheus_remote_schema_labels").await;
|
||||
let client = TestClient::new(app).await;
|
||||
|
||||
// Create test schemas
|
||||
let res = client
|
||||
.post("/v1/sql?sql=create database test_schema_1")
|
||||
.header("Content-Type", "application/x-www-form-urlencoded")
|
||||
.send()
|
||||
.await;
|
||||
assert_eq!(res.status(), StatusCode::OK);
|
||||
|
||||
let res = client
|
||||
.post("/v1/sql?sql=create database test_schema_2")
|
||||
.header("Content-Type", "application/x-www-form-urlencoded")
|
||||
.send()
|
||||
.await;
|
||||
assert_eq!(res.status(), StatusCode::OK);
|
||||
|
||||
// Write data with __schema__ label
|
||||
let schema_series = TimeSeries {
|
||||
labels: vec![
|
||||
Label {
|
||||
name: "__name__".to_string(),
|
||||
value: "metric_with_schema".to_string(),
|
||||
},
|
||||
Label {
|
||||
name: "__schema__".to_string(),
|
||||
value: "test_schema_1".to_string(),
|
||||
},
|
||||
Label {
|
||||
name: "instance".to_string(),
|
||||
value: "host1".to_string(),
|
||||
},
|
||||
],
|
||||
samples: vec![Sample {
|
||||
value: 100.0,
|
||||
timestamp: 1000,
|
||||
}],
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let write_request = WriteRequest {
|
||||
timeseries: vec![schema_series],
|
||||
..Default::default()
|
||||
};
|
||||
let serialized_request = write_request.encode_to_vec();
|
||||
let compressed_request =
|
||||
prom_store::snappy_compress(&serialized_request).expect("failed to encode snappy");
|
||||
|
||||
let res = client
|
||||
.post("/v1/prometheus/write")
|
||||
.header("Content-Encoding", "snappy")
|
||||
.body(compressed_request)
|
||||
.send()
|
||||
.await;
|
||||
assert_eq!(res.status(), StatusCode::NO_CONTENT);
|
||||
|
||||
// Read data from test_schema_1 using __schema__ matcher
|
||||
let read_request = ReadRequest {
|
||||
queries: vec![Query {
|
||||
start_timestamp_ms: 500,
|
||||
end_timestamp_ms: 1500,
|
||||
matchers: vec![
|
||||
LabelMatcher {
|
||||
name: "__name__".to_string(),
|
||||
value: "metric_with_schema".to_string(),
|
||||
r#type: MatcherType::Eq as i32,
|
||||
},
|
||||
LabelMatcher {
|
||||
name: "__schema__".to_string(),
|
||||
value: "test_schema_1".to_string(),
|
||||
r#type: MatcherType::Eq as i32,
|
||||
},
|
||||
],
|
||||
..Default::default()
|
||||
}],
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let serialized_read_request = read_request.encode_to_vec();
|
||||
let compressed_read_request =
|
||||
prom_store::snappy_compress(&serialized_read_request).expect("failed to encode snappy");
|
||||
|
||||
let mut result = client
|
||||
.post("/v1/prometheus/read")
|
||||
.body(compressed_read_request)
|
||||
.send()
|
||||
.await;
|
||||
assert_eq!(result.status(), StatusCode::OK);
|
||||
|
||||
let response_body = result.chunk().await.unwrap();
|
||||
let decompressed_response = prom_store::snappy_decompress(&response_body).unwrap();
|
||||
let read_response = ReadResponse::decode(&decompressed_response[..]).unwrap();
|
||||
|
||||
assert_eq!(read_response.results.len(), 1);
|
||||
assert_eq!(read_response.results[0].timeseries.len(), 1);
|
||||
|
||||
let timeseries = &read_response.results[0].timeseries[0];
|
||||
assert_eq!(timeseries.samples.len(), 1);
|
||||
assert_eq!(timeseries.samples[0].value, 100.0);
|
||||
assert_eq!(timeseries.samples[0].timestamp, 1000);
|
||||
|
||||
// write data to unknown schema
|
||||
let unknown_schema_series = TimeSeries {
|
||||
labels: vec![
|
||||
Label {
|
||||
name: "__name__".to_string(),
|
||||
value: "metric_unknown_schema".to_string(),
|
||||
},
|
||||
Label {
|
||||
name: "__schema__".to_string(),
|
||||
value: "unknown_schema".to_string(),
|
||||
},
|
||||
Label {
|
||||
name: "instance".to_string(),
|
||||
value: "host2".to_string(),
|
||||
},
|
||||
],
|
||||
samples: vec![Sample {
|
||||
value: 200.0,
|
||||
timestamp: 2000,
|
||||
}],
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let unknown_write_request = WriteRequest {
|
||||
timeseries: vec![unknown_schema_series],
|
||||
..Default::default()
|
||||
};
|
||||
let serialized_unknown_request = unknown_write_request.encode_to_vec();
|
||||
let compressed_unknown_request =
|
||||
prom_store::snappy_compress(&serialized_unknown_request).expect("failed to encode snappy");
|
||||
|
||||
// Write data to unknown schema
|
||||
let res = client
|
||||
.post("/v1/prometheus/write")
|
||||
.header("Content-Encoding", "snappy")
|
||||
.body(compressed_unknown_request)
|
||||
.send()
|
||||
.await;
|
||||
assert_eq!(res.status(), StatusCode::BAD_REQUEST);
|
||||
|
||||
// Read data from unknown schema
|
||||
let unknown_read_request = ReadRequest {
|
||||
queries: vec![Query {
|
||||
start_timestamp_ms: 1500,
|
||||
end_timestamp_ms: 2500,
|
||||
matchers: vec![
|
||||
LabelMatcher {
|
||||
name: "__name__".to_string(),
|
||||
value: "metric_unknown_schema".to_string(),
|
||||
r#type: MatcherType::Eq as i32,
|
||||
},
|
||||
LabelMatcher {
|
||||
name: "__schema__".to_string(),
|
||||
value: "unknown_schema".to_string(),
|
||||
r#type: MatcherType::Eq as i32,
|
||||
},
|
||||
],
|
||||
..Default::default()
|
||||
}],
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let serialized_unknown_read_request = unknown_read_request.encode_to_vec();
|
||||
let compressed_unknown_read_request =
|
||||
prom_store::snappy_compress(&serialized_unknown_read_request)
|
||||
.expect("failed to encode snappy");
|
||||
|
||||
let unknown_result = client
|
||||
.post("/v1/prometheus/read")
|
||||
.body(compressed_unknown_read_request)
|
||||
.send()
|
||||
.await;
|
||||
assert_eq!(unknown_result.status(), StatusCode::BAD_REQUEST);
|
||||
|
||||
guard.remove_all().await;
|
||||
}
|
||||
|
||||
pub async fn test_vm_proto_remote_write(store_type: StorageType) {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let (app, mut guard) =
|
||||
@@ -1945,6 +2216,78 @@ transform:
|
||||
guard.remove_all().await;
|
||||
}
|
||||
|
||||
pub async fn test_pipeline_filter(store_type: StorageType) {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let (app, mut guard) =
|
||||
setup_test_http_app_with_frontend(store_type, "test_pipeline_filter").await;
|
||||
|
||||
// handshake
|
||||
let client = TestClient::new(app).await;
|
||||
|
||||
let pipeline_body = r#"
|
||||
processors:
|
||||
- date:
|
||||
field: time
|
||||
formats:
|
||||
- "%Y-%m-%d %H:%M:%S%.3f"
|
||||
- filter:
|
||||
field: name
|
||||
targets:
|
||||
- John
|
||||
transform:
|
||||
- field: name
|
||||
type: string
|
||||
- field: time
|
||||
type: time
|
||||
index: timestamp
|
||||
"#;
|
||||
|
||||
// 1. create pipeline
|
||||
let res = client
|
||||
.post("/v1/events/pipelines/test")
|
||||
.header("Content-Type", "application/x-yaml")
|
||||
.body(pipeline_body)
|
||||
.send()
|
||||
.await;
|
||||
assert_eq!(res.status(), StatusCode::OK);
|
||||
|
||||
// 2. write data
|
||||
let data_body = r#"
|
||||
[
|
||||
{
|
||||
"time": "2024-05-25 20:16:37.217",
|
||||
"name": "John"
|
||||
},
|
||||
{
|
||||
"time": "2024-05-25 20:16:37.218",
|
||||
"name": "JoHN"
|
||||
},
|
||||
{
|
||||
"time": "2024-05-25 20:16:37.328",
|
||||
"name": "Jane"
|
||||
}
|
||||
]
|
||||
"#;
|
||||
|
||||
let res = client
|
||||
.post("/v1/events/logs?db=public&table=logs1&pipeline_name=test")
|
||||
.header("Content-Type", "application/json")
|
||||
.body(data_body)
|
||||
.send()
|
||||
.await;
|
||||
assert_eq!(res.status(), StatusCode::OK);
|
||||
|
||||
validate_data(
|
||||
"pipeline_filter",
|
||||
&client,
|
||||
"select * from logs1",
|
||||
"[[\"Jane\",1716668197328000000]]",
|
||||
)
|
||||
.await;
|
||||
|
||||
guard.remove_all().await;
|
||||
}
|
||||
|
||||
pub async fn test_pipeline_dispatcher(storage_type: StorageType) {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let (app, mut guard) =
|
||||
@@ -2405,14 +2748,19 @@ processors:
|
||||
ignore_missing: true
|
||||
- vrl:
|
||||
source: |
|
||||
.log_id = .id
|
||||
del(.id)
|
||||
.from_source = "channel_2"
|
||||
cond, err = .id1 > .id2
|
||||
if (cond) {
|
||||
.from_source = "channel_1"
|
||||
}
|
||||
del(.id1)
|
||||
del(.id2)
|
||||
.
|
||||
|
||||
transform:
|
||||
- fields:
|
||||
- log_id
|
||||
type: int32
|
||||
- from_source
|
||||
type: string
|
||||
- field: time
|
||||
type: time
|
||||
index: timestamp
|
||||
@@ -2432,7 +2780,8 @@ transform:
|
||||
let data_body = r#"
|
||||
[
|
||||
{
|
||||
"id": "2436",
|
||||
"id1": 2436,
|
||||
"id2": 123,
|
||||
"time": "2024-05-25 20:16:37.217"
|
||||
}
|
||||
]
|
||||
@@ -2449,7 +2798,7 @@ transform:
|
||||
"test_pipeline_with_vrl",
|
||||
&client,
|
||||
"select * from d_table",
|
||||
"[[2436,1716668197217000000]]",
|
||||
"[[\"channel_1\",1716668197217000000]]",
|
||||
)
|
||||
.await;
|
||||
|
||||
|
||||
@@ -152,6 +152,16 @@ pub async fn test_mysql_stmts(store_type: StorageType) {
|
||||
|
||||
conn.execute("SET TRANSACTION READ ONLY").await.unwrap();
|
||||
|
||||
// empty statements
|
||||
let err = conn.execute(" ------- ;").await.unwrap_err();
|
||||
assert!(err.to_string().contains("empty statements"));
|
||||
let err = conn.execute("----------\n;").await.unwrap_err();
|
||||
assert!(err.to_string().contains("empty statements"));
|
||||
let err = conn.execute(" ;").await.unwrap_err();
|
||||
assert!(err.to_string().contains("empty statements"));
|
||||
let err = conn.execute(" \n ;").await.unwrap_err();
|
||||
assert!(err.to_string().contains("empty statements"));
|
||||
|
||||
let _ = fe_mysql_server.shutdown().await;
|
||||
guard.remove_all().await;
|
||||
}
|
||||
|
||||
@@ -84,17 +84,37 @@ limit 1;
|
||||
|_|_Inner Join: t_2.ts = t_3.ts, t_2.vin = t_3.vin_|
|
||||
|_|_Inner Join: t_1.ts = t_2.ts, t_1.vin = t_2.vin_|
|
||||
|_|_Filter: t_1.vin IS NOT NULL_|
|
||||
|_|_MergeScan [is_placeholder=false]_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| TableScan: t_1_|
|
||||
|_| ]]_|
|
||||
|_|_Filter: t_2.vin IS NOT NULL_|
|
||||
|_|_MergeScan [is_placeholder=false]_|
|
||||
|_|_MergeScan [is_placeholder=false]_|
|
||||
|_|_MergeScan [is_placeholder=false]_|
|
||||
|_|_MergeScan [is_placeholder=false]_|
|
||||
|_|_MergeScan [is_placeholder=false]_|
|
||||
|_|_MergeScan [is_placeholder=false]_|
|
||||
|_|_MergeScan [is_placeholder=false]_|
|
||||
|_|_MergeScan [is_placeholder=false]_|
|
||||
|_|_MergeScan [is_placeholder=false]_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| TableScan: t_2_|
|
||||
|_| ]]_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| TableScan: t_3_|
|
||||
|_| ]]_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| TableScan: t_4_|
|
||||
|_| ]]_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| TableScan: t_5_|
|
||||
|_| ]]_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| TableScan: t_6_|
|
||||
|_| ]]_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| TableScan: t_7_|
|
||||
|_| ]]_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| TableScan: t_8_|
|
||||
|_| ]]_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| TableScan: t_9_|
|
||||
|_| ]]_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| TableScan: t_10_|
|
||||
|_| ]]_|
|
||||
| physical_plan | SortPreservingMergeExec: [ts@0 DESC], fetch=1_|
|
||||
|_|_SortExec: TopK(fetch=1), expr=[ts@0 DESC], preserve_partitioning=[true]_|
|
||||
|_|_CoalesceBatchesExec: target_batch_size=8192_|
|
||||
|
||||
@@ -26,7 +26,12 @@ explain SELECT * FROM demo WHERE ts > cast(1000000000 as timestamp) ORDER BY hos
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| MergeSort: demo.host ASC NULLS LAST_|
|
||||
|_|_MergeScan [is_placeholder=false]_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Sort: demo.host ASC NULLS LAST_|
|
||||
|_|_Projection: demo.host, demo.ts, demo.cpu, demo.memory, demo.disk_util_|
|
||||
|_|_Filter: demo.ts > arrow_cast(Int64(1000000000), Utf8("Timestamp(Millisecond, None)"))_|
|
||||
|_|_TableScan: demo_|
|
||||
|_| ]]_|
|
||||
| physical_plan | SortPreservingMergeExec: [host@0 ASC NULLS LAST]_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
|
||||
@@ -12,7 +12,12 @@ EXPLAIN SELECT DISTINCT i%2 FROM integers ORDER BY 1;
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| MergeScan [is_placeholder=false]_|
|
||||
| logical_plan_| MergeScan [is_placeholder=false, remote_input=[ |
|
||||
|_| Sort: integers.i % Int64(2) ASC NULLS LAST_|
|
||||
|_|_Distinct:_|
|
||||
|_|_Projection: integers.i % Int64(2)_|
|
||||
|_|_TableScan: integers_|
|
||||
|_| ]]_|
|
||||
| physical_plan | MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
@@ -35,7 +40,11 @@ EXPLAIN SELECT a, b FROM test ORDER BY a, b;
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| MergeScan [is_placeholder=false]_|
|
||||
| logical_plan_| MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Sort: test.a ASC NULLS LAST, test.b ASC NULLS LAST |
|
||||
|_|_Projection: test.a, test.b_|
|
||||
|_|_TableScan: test_|
|
||||
|_| ]]_|
|
||||
| physical_plan | MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
@@ -50,7 +59,12 @@ EXPLAIN SELECT DISTINCT a, b FROM test ORDER BY a, b;
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| MergeScan [is_placeholder=false]_|
|
||||
| logical_plan_| MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Sort: test.a ASC NULLS LAST, test.b ASC NULLS LAST |
|
||||
|_|_Distinct:_|
|
||||
|_|_Projection: test.a, test.b_|
|
||||
|_|_TableScan: test_|
|
||||
|_| ]]_|
|
||||
| physical_plan | MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user