mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-14 12:00:40 +00:00
Merge branch 'main' into under-utilized-2
This commit is contained in:
165
Cargo.lock
generated
165
Cargo.lock
generated
@@ -212,7 +212,7 @@ checksum = "d301b3b94cb4b2f23d7917810addbbaff90738e0ca2be692bd027e70d7e0330c"
|
||||
|
||||
[[package]]
|
||||
name = "api"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"arrow-schema 57.3.0",
|
||||
"common-base",
|
||||
@@ -933,7 +933,7 @@ checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
|
||||
|
||||
[[package]]
|
||||
name = "auth"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -1523,7 +1523,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cache"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"catalog",
|
||||
"common-error",
|
||||
@@ -1559,7 +1559,7 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
|
||||
|
||||
[[package]]
|
||||
name = "catalog"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow 57.3.0",
|
||||
@@ -1894,7 +1894,7 @@ checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675"
|
||||
|
||||
[[package]]
|
||||
name = "cli"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
@@ -1951,7 +1951,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "client"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -1983,7 +1983,7 @@ dependencies = [
|
||||
"serde_json",
|
||||
"snafu 0.8.6",
|
||||
"store-api",
|
||||
"substrait 1.0.0-rc.2",
|
||||
"substrait 1.0.0",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tonic 0.14.2",
|
||||
@@ -2023,7 +2023,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cmd"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -2155,24 +2155,19 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-base"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
"anymap2",
|
||||
"async-trait",
|
||||
"bitvec",
|
||||
"bytes",
|
||||
"common-error",
|
||||
"common-macro",
|
||||
"common-test-util",
|
||||
"futures",
|
||||
"lazy_static",
|
||||
"paste",
|
||||
"pin-project",
|
||||
"rand 0.9.1",
|
||||
"regex",
|
||||
"serde",
|
||||
"snafu 0.8.6",
|
||||
"tokio",
|
||||
"toml 0.8.23",
|
||||
"zeroize",
|
||||
@@ -2180,14 +2175,14 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-catalog"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"const_format",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "common-config"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-error",
|
||||
@@ -2203,7 +2198,6 @@ dependencies = [
|
||||
"object-store",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_with",
|
||||
"snafu 0.8.6",
|
||||
"temp-env",
|
||||
"tempfile",
|
||||
@@ -2212,7 +2206,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-datasource"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"arrow 57.3.0",
|
||||
"arrow-schema 57.3.0",
|
||||
@@ -2248,7 +2242,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-decimal"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"bigdecimal 0.4.8",
|
||||
"common-error",
|
||||
@@ -2261,7 +2255,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-error"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"common-macro",
|
||||
"http 1.3.1",
|
||||
@@ -2272,7 +2266,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-event-recorder"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -2295,7 +2289,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-frontend"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -2316,7 +2310,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-function"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
"api",
|
||||
@@ -2379,7 +2373,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-greptimedb-telemetry"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-runtime",
|
||||
@@ -2396,7 +2390,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-grpc"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -2431,7 +2425,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-grpc-expr"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"common-base",
|
||||
@@ -2451,7 +2445,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-macro"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"greptime-proto",
|
||||
"once_cell",
|
||||
@@ -2462,7 +2456,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-mem-prof"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"common-error",
|
||||
@@ -2478,7 +2472,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-memory-manager"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"common-error",
|
||||
"common-macro",
|
||||
@@ -2490,7 +2484,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-meta"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"anymap2",
|
||||
"api",
|
||||
@@ -2561,7 +2555,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-options"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"common-grpc",
|
||||
"humantime-serde",
|
||||
@@ -2571,11 +2565,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-plugins"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
|
||||
[[package]]
|
||||
name = "common-pprof"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"common-error",
|
||||
"common-macro",
|
||||
@@ -2586,7 +2580,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-procedure"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-stream",
|
||||
@@ -2615,7 +2609,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-procedure-test"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-procedure",
|
||||
@@ -2625,7 +2619,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-query"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -2651,7 +2645,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-recordbatch"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"common-base",
|
||||
@@ -2676,7 +2670,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-runtime"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"clap",
|
||||
@@ -2705,7 +2699,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-session"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"strum 0.27.1",
|
||||
@@ -2713,7 +2707,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-sql"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"arrow-schema 57.3.0",
|
||||
"common-base",
|
||||
@@ -2733,7 +2727,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-stat"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-runtime",
|
||||
@@ -2748,7 +2742,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-telemetry"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
"common-base",
|
||||
@@ -2777,7 +2771,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-test-util"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"client",
|
||||
"common-grpc",
|
||||
@@ -2790,7 +2784,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-time"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"arrow 57.3.0",
|
||||
"chrono",
|
||||
@@ -2808,7 +2802,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-version"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"cargo-manifest",
|
||||
"const_format",
|
||||
@@ -2818,7 +2812,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-wal"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-error",
|
||||
@@ -2841,7 +2835,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-workload"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"common-telemetry",
|
||||
"serde",
|
||||
@@ -4203,7 +4197,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datanode"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -4271,7 +4265,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datatypes"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"arrow 57.3.0",
|
||||
"arrow-array 57.3.0",
|
||||
@@ -4949,7 +4943,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
|
||||
|
||||
[[package]]
|
||||
name = "file-engine"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -5081,7 +5075,7 @@ checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8"
|
||||
|
||||
[[package]]
|
||||
name = "flow"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow 57.3.0",
|
||||
@@ -5150,7 +5144,7 @@ dependencies = [
|
||||
"sql",
|
||||
"store-api",
|
||||
"strum 0.27.1",
|
||||
"substrait 1.0.0-rc.2",
|
||||
"substrait 1.0.0",
|
||||
"table",
|
||||
"tokio",
|
||||
"tonic 0.14.2",
|
||||
@@ -5211,7 +5205,7 @@ checksum = "28dd6caf6059519a65843af8fe2a3ae298b14b80179855aeb4adc2c1934ee619"
|
||||
|
||||
[[package]]
|
||||
name = "frontend"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -6459,7 +6453,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "index"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"asynchronous-codec",
|
||||
@@ -7296,7 +7290,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"windows-targets 0.48.5",
|
||||
"windows-targets 0.52.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -7427,7 +7421,7 @@ checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
|
||||
|
||||
[[package]]
|
||||
name = "log-query"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"common-error",
|
||||
@@ -7439,7 +7433,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "log-store"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
@@ -7730,7 +7724,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meta-client"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -7761,7 +7755,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meta-srv"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -7861,7 +7855,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "metric-engine"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"aquamarine",
|
||||
@@ -7962,7 +7956,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "mito-codec"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"bytes",
|
||||
@@ -7987,7 +7981,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "mito2"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"aquamarine",
|
||||
@@ -8711,7 +8705,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "object-store"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytes",
|
||||
@@ -8724,7 +8718,6 @@ dependencies = [
|
||||
"futures",
|
||||
"humantime-serde",
|
||||
"lazy_static",
|
||||
"moka",
|
||||
"opendal",
|
||||
"prometheus 0.14.0",
|
||||
"reqwest",
|
||||
@@ -9039,7 +9032,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "operator"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
"api",
|
||||
@@ -9099,7 +9092,7 @@ dependencies = [
|
||||
"sql",
|
||||
"sqlparser",
|
||||
"store-api",
|
||||
"substrait 1.0.0-rc.2",
|
||||
"substrait 1.0.0",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
@@ -9375,7 +9368,7 @@ checksum = "e3c406c9e2aa74554e662d2c2ee11cd3e73756988800be7e6f5eddb16fed4699"
|
||||
|
||||
[[package]]
|
||||
name = "partition"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -9593,9 +9586,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pgwire"
|
||||
version = "0.38.2"
|
||||
version = "0.38.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3a1bdf05fc8231cc5024572fe056e3ce34eb6b9b755ba7aba110e1c64119cec3"
|
||||
checksum = "24bd4e6b1bfddc5c6420dee6602ec80946700b4c31ddcb64ee190ad6d979c210"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"base64 0.22.1",
|
||||
@@ -9731,7 +9724,7 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
|
||||
|
||||
[[package]]
|
||||
name = "pipeline"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
"api",
|
||||
@@ -9888,7 +9881,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "plugins"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"auth",
|
||||
"catalog",
|
||||
@@ -10206,7 +10199,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "promql"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
"async-trait",
|
||||
@@ -10558,7 +10551,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "puffin"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"async-compression",
|
||||
"async-trait",
|
||||
@@ -10620,7 +10613,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "query"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
"api",
|
||||
@@ -10687,7 +10680,7 @@ dependencies = [
|
||||
"sql",
|
||||
"sqlparser",
|
||||
"store-api",
|
||||
"substrait 1.0.0-rc.2",
|
||||
"substrait 1.0.0",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
@@ -11991,7 +11984,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "servers"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
"api",
|
||||
@@ -12125,7 +12118,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "session"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"ahash 0.8.12",
|
||||
"api",
|
||||
@@ -12457,7 +12450,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sql"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-buffer 57.3.0",
|
||||
@@ -12518,7 +12511,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sqlness-runner"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"clap",
|
||||
@@ -12798,7 +12791,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "standalone"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"catalog",
|
||||
@@ -12842,7 +12835,7 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
|
||||
|
||||
[[package]]
|
||||
name = "store-api"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"aquamarine",
|
||||
@@ -13034,7 +13027,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "substrait"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bytes",
|
||||
@@ -13156,7 +13149,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "table"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -13426,7 +13419,7 @@ checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683"
|
||||
|
||||
[[package]]
|
||||
name = "tests-fuzz"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
"async-trait",
|
||||
@@ -13470,7 +13463,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tests-integration"
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -13547,7 +13540,7 @@ dependencies = [
|
||||
"sqlx",
|
||||
"standalone",
|
||||
"store-api",
|
||||
"substrait 1.0.0-rc.2",
|
||||
"substrait 1.0.0",
|
||||
"table",
|
||||
"tempfile",
|
||||
"time",
|
||||
|
||||
@@ -75,7 +75,7 @@ members = [
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
version = "1.0.0-rc.2"
|
||||
version = "1.0.0"
|
||||
edition = "2024"
|
||||
license = "Apache-2.0"
|
||||
|
||||
|
||||
@@ -69,6 +69,11 @@
|
||||
| `prom_store` | -- | -- | Prometheus remote storage options |
|
||||
| `prom_store.enable` | Bool | `true` | Whether to enable Prometheus remote write and read in HTTP API. |
|
||||
| `prom_store.with_metric_engine` | Bool | `true` | Whether to store the data from Prometheus remote write in metric engine. |
|
||||
| `prom_store.pending_rows_flush_interval` | String | `0s` | Interval to flush pending rows batcher.<br/>Set to "0s" to disable batching mode in Prometheus Remote Write endpoint |
|
||||
| `prom_store.max_batch_rows` | Integer | `100000` | Max rows per pending batch before triggering a flush. |
|
||||
| `prom_store.max_concurrent_flushes` | Integer | `256` | Max number of concurrent batch flushes. |
|
||||
| `prom_store.worker_channel_capacity` | Integer | `65526` | Capacity of the pending batch worker channel. |
|
||||
| `prom_store.max_inflight_requests` | Integer | `3000` | Max inflight write requests before backpressure. |
|
||||
| `wal` | -- | -- | The WAL options. |
|
||||
| `wal.provider` | String | `raft_engine` | The provider of the WAL.<br/>- `raft_engine`: the wal is stored in the local file system by raft-engine.<br/>- `kafka`: it's remote wal that data is stored in Kafka. |
|
||||
| `wal.dir` | String | Unset | The directory to store the WAL files.<br/>**It's only used when the provider is `raft_engine`**. |
|
||||
@@ -139,7 +144,7 @@
|
||||
| `region_engine.mito.max_background_flushes` | Integer | Auto | Max number of running background flush jobs (default: 1/2 of cpu cores). |
|
||||
| `region_engine.mito.max_background_compactions` | Integer | Auto | Max number of running background compaction jobs (default: 1/4 of cpu cores). |
|
||||
| `region_engine.mito.max_background_purges` | Integer | Auto | Max number of running background purge jobs (default: number of cpu cores). |
|
||||
| `region_engine.mito.experimental_compaction_memory_limit` | String | 0 | Memory budget for compaction tasks. Setting it to 0 or "unlimited" disables the limit. |
|
||||
| `region_engine.mito.experimental_compaction_memory_limit` | String | 0 | Memory budget for compaction tasks.<br/>Supports absolute size (e.g., "2GiB", "512MB") or percentage of system memory (e.g., "50%").<br/>Setting it to 0 or "unlimited" disables the limit. |
|
||||
| `region_engine.mito.experimental_compaction_on_exhausted` | String | wait | Behavior when compaction cannot acquire memory from the budget.<br/>Options: "wait" (default, 10s), "wait(<duration>)", "fail" |
|
||||
| `region_engine.mito.auto_flush_interval` | String | `1h` | Interval to auto flush a region if it has not flushed yet. |
|
||||
| `region_engine.mito.global_write_buffer_size` | String | Auto | Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB. |
|
||||
@@ -157,13 +162,12 @@
|
||||
| `region_engine.mito.enable_refill_cache_on_read` | Bool | `true` | Enable refilling cache on read operations (default: true).<br/>When disabled, cache refilling on read won't happen. |
|
||||
| `region_engine.mito.manifest_cache_size` | String | `256MB` | Capacity for manifest cache (default: 256MB). |
|
||||
| `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
|
||||
| `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
|
||||
| `region_engine.mito.max_concurrent_scan_files` | Integer | `384` | Maximum number of SST files to scan concurrently. |
|
||||
| `region_engine.mito.allow_stale_entries` | Bool | `false` | Whether to allow stale WAL entries read during replay. |
|
||||
| `region_engine.mito.scan_memory_limit` | String | `50%` | Memory limit for table scans across all queries.<br/>Supports absolute size (e.g., "2GB") or percentage of system memory (e.g., "20%").<br/>Setting it to 0 disables the limit. |
|
||||
| `region_engine.mito.scan_memory_on_exhausted` | String | `fail` | Controls what happens when a scan cannot get memory immediately.<br/>"fail" (default) fails fast and is the recommended option for most users.<br/>"wait" / "wait(<duration>)" waits for memory to become available. This is mainly<br/>for advanced tuning in bursty workloads where temporary contention is common and<br/>higher latency is acceptable.<br/>"wait" means "wait(10s)", not unlimited waiting. |
|
||||
| `region_engine.mito.min_compaction_interval` | String | `0m` | Minimum time interval between two compactions.<br/>To align with the old behavior, the default value is 0 (no restrictions). |
|
||||
| `region_engine.mito.default_experimental_flat_format` | Bool | `false` | Whether to enable experimental flat format as the default format. |
|
||||
| `region_engine.mito.default_flat_format` | Bool | `true` | Whether to enable flat format as the default SST format. |
|
||||
| `region_engine.mito.index` | -- | -- | The options for index in Mito engine. |
|
||||
| `region_engine.mito.index.aux_path` | String | `""` | Auxiliary directory path for the index in filesystem, used to store intermediate files for<br/>creating the index and staging files for searching the index, defaults to `{data_home}/index_intermediate`.<br/>The default name for this directory is `index_intermediate` for backward compatibility.<br/><br/>This path contains two subdirectories:<br/>- `__intm`: for storing intermediate files used during creating index.<br/>- `staging`: for storing staging files used during searching index. |
|
||||
| `region_engine.mito.index.staging_size` | String | `2GB` | The max capacity of the staging directory. |
|
||||
@@ -293,6 +297,11 @@
|
||||
| `prom_store` | -- | -- | Prometheus remote storage options |
|
||||
| `prom_store.enable` | Bool | `true` | Whether to enable Prometheus remote write and read in HTTP API. |
|
||||
| `prom_store.with_metric_engine` | Bool | `true` | Whether to store the data from Prometheus remote write in metric engine. |
|
||||
| `prom_store.pending_rows_flush_interval` | String | `0s` | Interval to flush pending rows batcher.<br/>Set to "0s" to disable batching mode in Prometheus Remote Write endpoint |
|
||||
| `prom_store.max_batch_rows` | Integer | `100000` | Max rows per pending batch before triggering a flush. |
|
||||
| `prom_store.max_concurrent_flushes` | Integer | `256` | Max number of concurrent batch flushes. |
|
||||
| `prom_store.worker_channel_capacity` | Integer | `65526` | Capacity of the pending batch worker channel. |
|
||||
| `prom_store.max_inflight_requests` | Integer | `3000` | Max inflight write requests before backpressure. |
|
||||
| `meta_client` | -- | -- | The metasrv client options. |
|
||||
| `meta_client.metasrv_addrs` | Array | -- | The addresses of the metasrv. |
|
||||
| `meta_client.timeout` | String | `3s` | Operation timeout. |
|
||||
@@ -532,7 +541,7 @@
|
||||
| `region_engine.mito.max_background_flushes` | Integer | Auto | Max number of running background flush jobs (default: 1/2 of cpu cores). |
|
||||
| `region_engine.mito.max_background_compactions` | Integer | Auto | Max number of running background compaction jobs (default: 1/4 of cpu cores). |
|
||||
| `region_engine.mito.max_background_purges` | Integer | Auto | Max number of running background purge jobs (default: number of cpu cores). |
|
||||
| `region_engine.mito.experimental_compaction_memory_limit` | String | 0 | Memory budget for compaction tasks. Setting it to 0 or "unlimited" disables the limit. |
|
||||
| `region_engine.mito.experimental_compaction_memory_limit` | String | 0 | Memory budget for compaction tasks.<br/>Supports absolute size (e.g., "2GiB", "512MB") or percentage of system memory (e.g., "50%").<br/>Setting it to 0 or "unlimited" disables the limit. |
|
||||
| `region_engine.mito.experimental_compaction_on_exhausted` | String | wait | Behavior when compaction cannot acquire memory from the budget.<br/>Options: "wait" (default, 10s), "wait(<duration>)", "fail" |
|
||||
| `region_engine.mito.auto_flush_interval` | String | `1h` | Interval to auto flush a region if it has not flushed yet. |
|
||||
| `region_engine.mito.global_write_buffer_size` | String | Auto | Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB. |
|
||||
@@ -550,13 +559,12 @@
|
||||
| `region_engine.mito.enable_refill_cache_on_read` | Bool | `true` | Enable refilling cache on read operations (default: true).<br/>When disabled, cache refilling on read won't happen. |
|
||||
| `region_engine.mito.manifest_cache_size` | String | `256MB` | Capacity for manifest cache (default: 256MB). |
|
||||
| `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
|
||||
| `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
|
||||
| `region_engine.mito.max_concurrent_scan_files` | Integer | `384` | Maximum number of SST files to scan concurrently. |
|
||||
| `region_engine.mito.allow_stale_entries` | Bool | `false` | Whether to allow stale WAL entries read during replay. |
|
||||
| `region_engine.mito.scan_memory_limit` | String | `50%` | Memory limit for table scans across all queries.<br/>Supports absolute size (e.g., "2GB") or percentage of system memory (e.g., "20%").<br/>Setting it to 0 disables the limit. |
|
||||
| `region_engine.mito.scan_memory_on_exhausted` | String | `fail` | Controls what happens when a scan cannot get memory immediately.<br/>"fail" (default) fails fast and is the recommended option for most users.<br/>"wait" / "wait(<duration>)" waits for memory to become available. This is mainly<br/>for advanced tuning in bursty workloads where temporary contention is common and<br/>higher latency is acceptable.<br/>"wait" means "wait(10s)", not unlimited waiting. |
|
||||
| `region_engine.mito.min_compaction_interval` | String | `0m` | Minimum time interval between two compactions.<br/>To align with the old behavior, the default value is 0 (no restrictions). |
|
||||
| `region_engine.mito.default_experimental_flat_format` | Bool | `false` | Whether to enable experimental flat format as the default format. |
|
||||
| `region_engine.mito.default_flat_format` | Bool | `true` | Whether to enable flat format as the default SST format. |
|
||||
| `region_engine.mito.index` | -- | -- | The options for index in Mito engine. |
|
||||
| `region_engine.mito.index.aux_path` | String | `""` | Auxiliary directory path for the index in filesystem, used to store intermediate files for<br/>creating the index and staging files for searching the index, defaults to `{data_home}/index_intermediate`.<br/>The default name for this directory is `index_intermediate` for backward compatibility.<br/><br/>This path contains two subdirectories:<br/>- `__intm`: for storing intermediate files used during creating index.<br/>- `staging`: for storing staging files used during searching index. |
|
||||
| `region_engine.mito.index.staging_size` | String | `2GB` | The max capacity of the staging directory. |
|
||||
|
||||
@@ -446,7 +446,9 @@ compress_manifest = false
|
||||
## @toml2docs:none-default="Auto"
|
||||
#+ max_background_purges = 8
|
||||
|
||||
## Memory budget for compaction tasks. Setting it to 0 or "unlimited" disables the limit.
|
||||
## Memory budget for compaction tasks.
|
||||
## Supports absolute size (e.g., "2GiB", "512MB") or percentage of system memory (e.g., "50%").
|
||||
## Setting it to 0 or "unlimited" disables the limit.
|
||||
## @toml2docs:none-default="0"
|
||||
#+ experimental_compaction_memory_limit = "0"
|
||||
|
||||
@@ -520,9 +522,6 @@ manifest_cache_size = "256MB"
|
||||
## Buffer size for SST writing.
|
||||
sst_write_buffer_size = "8MB"
|
||||
|
||||
## Capacity of the channel to send data from parallel scan tasks to the main task.
|
||||
parallel_scan_channel_size = 32
|
||||
|
||||
## Maximum number of SST files to scan concurrently.
|
||||
max_concurrent_scan_files = 384
|
||||
|
||||
@@ -545,8 +544,8 @@ scan_memory_on_exhausted = "fail"
|
||||
## To align with the old behavior, the default value is 0 (no restrictions).
|
||||
min_compaction_interval = "0m"
|
||||
|
||||
## Whether to enable experimental flat format as the default format.
|
||||
default_experimental_flat_format = false
|
||||
## Whether to enable flat format as the default SST format.
|
||||
default_flat_format = true
|
||||
|
||||
## The options for index in Mito engine.
|
||||
[region_engine.mito.index]
|
||||
|
||||
@@ -214,6 +214,17 @@ enable = true
|
||||
enable = true
|
||||
## Whether to store the data from Prometheus remote write in metric engine.
|
||||
with_metric_engine = true
|
||||
## Interval to flush pending rows batcher.
|
||||
## Set to "0s" to disable batching mode in Prometheus Remote Write endpoint
|
||||
#+pending_rows_flush_interval = "0s"
|
||||
## Max rows per pending batch before triggering a flush.
|
||||
#+max_batch_rows = 100000
|
||||
## Max number of concurrent batch flushes.
|
||||
#+max_concurrent_flushes = 256
|
||||
## Capacity of the pending batch worker channel.
|
||||
#+worker_channel_capacity = 65526
|
||||
## Max inflight write requests before backpressure.
|
||||
#+max_inflight_requests = 3000
|
||||
|
||||
## The metasrv client options.
|
||||
[meta_client]
|
||||
|
||||
@@ -173,6 +173,17 @@ enable = true
|
||||
enable = true
|
||||
## Whether to store the data from Prometheus remote write in metric engine.
|
||||
with_metric_engine = true
|
||||
## Interval to flush pending rows batcher.
|
||||
## Set to "0s" to disable batching mode in Prometheus Remote Write endpoint
|
||||
#+pending_rows_flush_interval = "0s"
|
||||
## Max rows per pending batch before triggering a flush.
|
||||
#+max_batch_rows = 100000
|
||||
## Max number of concurrent batch flushes.
|
||||
#+max_concurrent_flushes = 256
|
||||
## Capacity of the pending batch worker channel.
|
||||
#+worker_channel_capacity = 65526
|
||||
## Max inflight write requests before backpressure.
|
||||
#+max_inflight_requests = 3000
|
||||
|
||||
## The WAL options.
|
||||
[wal]
|
||||
@@ -538,7 +549,9 @@ compress_manifest = false
|
||||
## @toml2docs:none-default="Auto"
|
||||
#+ max_background_purges = 8
|
||||
|
||||
## Memory budget for compaction tasks. Setting it to 0 or "unlimited" disables the limit.
|
||||
## Memory budget for compaction tasks.
|
||||
## Supports absolute size (e.g., "2GiB", "512MB") or percentage of system memory (e.g., "50%").
|
||||
## Setting it to 0 or "unlimited" disables the limit.
|
||||
## @toml2docs:none-default="0"
|
||||
#+ experimental_compaction_memory_limit = "0"
|
||||
|
||||
@@ -612,9 +625,6 @@ manifest_cache_size = "256MB"
|
||||
## Buffer size for SST writing.
|
||||
sst_write_buffer_size = "8MB"
|
||||
|
||||
## Capacity of the channel to send data from parallel scan tasks to the main task.
|
||||
parallel_scan_channel_size = 32
|
||||
|
||||
## Maximum number of SST files to scan concurrently.
|
||||
max_concurrent_scan_files = 384
|
||||
|
||||
@@ -637,8 +647,8 @@ scan_memory_on_exhausted = "fail"
|
||||
## To align with the old behavior, the default value is 0 (no restrictions).
|
||||
min_compaction_interval = "0m"
|
||||
|
||||
## Whether to enable experimental flat format as the default format.
|
||||
default_experimental_flat_format = false
|
||||
## Whether to enable flat format as the default SST format.
|
||||
default_flat_format = true
|
||||
|
||||
## The options for index in Mito engine.
|
||||
[region_engine.mito.index]
|
||||
|
||||
@@ -337,6 +337,7 @@ fn mask_secrets(sql: &str, secrets: &[Option<String>]) -> String {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_base::secrets::SecretString;
|
||||
use common_test_util::temp_dir::create_temp_dir;
|
||||
|
||||
use super::*;
|
||||
use crate::common::{PrefixedAzblobConnection, PrefixedGcsConnection, PrefixedOssConnection};
|
||||
@@ -432,9 +433,21 @@ mod tests {
|
||||
#[test]
|
||||
fn test_build_copy_target_decodes_file_uri_path() {
|
||||
let storage = ObjectStoreConfig::default();
|
||||
let target = build_copy_target("file:///tmp/my%20backup", &storage, "public", 7)
|
||||
let snapshot_root = create_temp_dir("my backup");
|
||||
let snapshot_uri = Url::from_file_path(snapshot_root.path())
|
||||
.expect("absolute platform path should convert to file:// URI")
|
||||
.to_string();
|
||||
let expected = normalize_path(&format!(
|
||||
"{}/{}",
|
||||
snapshot_root.path().to_string_lossy(),
|
||||
data_dir_for_schema_chunk("public", 7)
|
||||
));
|
||||
let target = build_copy_target(&snapshot_uri, &storage, "public", 7)
|
||||
.expect("file:// copy target should be built");
|
||||
|
||||
assert_eq!(target.location, "/tmp/my backup/data/public/7/");
|
||||
assert!(snapshot_uri.contains("%20"));
|
||||
assert!(!target.location.contains("%20"));
|
||||
assert!(target.location.contains("my backup"));
|
||||
assert_eq!(target.location, expected);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -86,7 +86,6 @@ serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
servers.workspace = true
|
||||
session.workspace = true
|
||||
similar-asserts.workspace = true
|
||||
snafu.workspace = true
|
||||
sqlparser.workspace = true
|
||||
standalone.workspace = true
|
||||
@@ -113,5 +112,6 @@ common-version.workspace = true
|
||||
file-engine.workspace = true
|
||||
mito2.workspace = true
|
||||
serde.workspace = true
|
||||
similar-asserts.workspace = true
|
||||
temp-env = "0.3"
|
||||
tempfile.workspace = true
|
||||
|
||||
@@ -211,7 +211,6 @@ impl ObjbenchCommand {
|
||||
object_store.clone(),
|
||||
)
|
||||
.expected_metadata(Some(region_meta.clone()))
|
||||
.flat_format(true)
|
||||
.build()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
|
||||
@@ -102,10 +102,6 @@ pub struct ScanbenchCommand {
|
||||
#[clap(long, value_name = "FILE")]
|
||||
pprof_file: Option<PathBuf>,
|
||||
|
||||
/// Force reading the region in flat format.
|
||||
#[clap(long, default_value_t = false)]
|
||||
force_flat_format: bool,
|
||||
|
||||
/// Enable WAL replay when opening the region.
|
||||
#[clap(long, default_value_t = false)]
|
||||
enable_wal: bool,
|
||||
@@ -580,12 +576,11 @@ impl ScanbenchCommand {
|
||||
};
|
||||
|
||||
println!(
|
||||
"{} Scanner: {}, Parallelism: {}, Iterations: {}, Force flat format: {}",
|
||||
"{} Scanner: {}, Parallelism: {}, Iterations: {}",
|
||||
"ℹ".blue(),
|
||||
self.scanner,
|
||||
self.parallelism,
|
||||
self.iterations,
|
||||
self.force_flat_format,
|
||||
);
|
||||
|
||||
// Start profiling if pprof_file is specified (unless pprof_after_warmup is set)
|
||||
@@ -626,7 +621,6 @@ impl ScanbenchCommand {
|
||||
filters: filters.clone(),
|
||||
series_row_selector,
|
||||
distribution,
|
||||
force_flat_format: self.force_flat_format,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
|
||||
@@ -16,16 +16,11 @@ anymap2 = "0.13"
|
||||
async-trait.workspace = true
|
||||
bitvec = "1.0"
|
||||
bytes.workspace = true
|
||||
common-error.workspace = true
|
||||
common-macro.workspace = true
|
||||
futures.workspace = true
|
||||
lazy_static.workspace = true
|
||||
paste.workspace = true
|
||||
pin-project.workspace = true
|
||||
rand.workspace = true
|
||||
regex.workspace = true
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
snafu.workspace = true
|
||||
tokio.workspace = true
|
||||
zeroize = { version = "1.6", default-features = false, features = ["alloc"] }
|
||||
|
||||
|
||||
@@ -18,7 +18,6 @@ notify.workspace = true
|
||||
object-store.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
serde_with.workspace = true
|
||||
snafu.workspace = true
|
||||
toml.workspace = true
|
||||
|
||||
|
||||
@@ -28,7 +28,6 @@ common-runtime.workspace = true
|
||||
common-telemetry.workspace = true
|
||||
datafusion.workspace = true
|
||||
datafusion-datasource.workspace = true
|
||||
datafusion-orc.workspace = true
|
||||
datatypes.workspace = true
|
||||
futures.workspace = true
|
||||
lazy_static.workspace = true
|
||||
@@ -47,3 +46,4 @@ url.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
common-test-util.workspace = true
|
||||
datafusion-orc.workspace = true
|
||||
|
||||
@@ -37,7 +37,6 @@ common-error.workspace = true
|
||||
common-grpc-expr.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-procedure.workspace = true
|
||||
common-procedure-test.workspace = true
|
||||
common-query.workspace = true
|
||||
common-recordbatch.workspace = true
|
||||
common-runtime.workspace = true
|
||||
@@ -92,6 +91,7 @@ typetag.workspace = true
|
||||
[dev-dependencies]
|
||||
chrono.workspace = true
|
||||
common-procedure = { workspace = true, features = ["testing"] }
|
||||
common-procedure-test.workspace = true
|
||||
common-test-util.workspace = true
|
||||
common-wal = { workspace = true, features = ["testing"] }
|
||||
datatypes.workspace = true
|
||||
|
||||
@@ -15,14 +15,17 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::helper::ColumnDataTypeWrapper;
|
||||
use api::v1::{ColumnDataType, RowInsertRequests};
|
||||
use api::v1::alter_table_expr::Kind;
|
||||
use api::v1::{
|
||||
AlterTableExpr, ColumnDataType, ModifyColumnType, ModifyColumnTypes, RowInsertRequests,
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq};
|
||||
use client::Output;
|
||||
use common_error::ext::{BoxedError, ErrorExt};
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_query::prelude::GREPTIME_PHYSICAL_TABLE;
|
||||
use common_telemetry::tracing;
|
||||
use common_telemetry::{tracing, warn};
|
||||
use itertools::Itertools;
|
||||
use opentelemetry_proto::tonic::collector::logs::v1::ExportLogsServiceRequest;
|
||||
use opentelemetry_proto::tonic::collector::trace::v1::ExportTraceServiceRequest;
|
||||
@@ -33,23 +36,26 @@ use servers::http::prom_store::PHYSICAL_TABLE_PARAM;
|
||||
use servers::interceptor::{OpenTelemetryProtocolInterceptor, OpenTelemetryProtocolInterceptorRef};
|
||||
use servers::otlp;
|
||||
use servers::otlp::trace::TraceAuxData;
|
||||
use servers::otlp::trace::coerce::{
|
||||
coerce_value_data, is_supported_trace_coercion, resolve_new_trace_column_type,
|
||||
trace_value_datatype,
|
||||
};
|
||||
use servers::otlp::trace::coerce::{coerce_value_data, trace_value_datatype};
|
||||
use servers::otlp::trace::span::{TraceSpan, TraceSpanGroup};
|
||||
use servers::query_handler::{
|
||||
OpenTelemetryProtocolHandler, PipelineHandlerRef, TraceIngestOutcome,
|
||||
};
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::ResultExt;
|
||||
use snafu::{IntoError, ResultExt};
|
||||
use table::requests::{OTLP_METRIC_COMPAT_KEY, OTLP_METRIC_COMPAT_PROM};
|
||||
|
||||
use crate::instance::Instance;
|
||||
use crate::instance::otlp::trace_types::{
|
||||
PendingTraceColumnRewrite, choose_trace_reconcile_decision, enrich_trace_reconcile_error,
|
||||
is_trace_reconcile_candidate_type, push_observed_trace_type, validate_trace_column_rewrites,
|
||||
};
|
||||
use crate::metrics::{
|
||||
OTLP_LOGS_ROWS, OTLP_METRICS_ROWS, OTLP_TRACES_FAILURE_COUNT, OTLP_TRACES_ROWS,
|
||||
};
|
||||
|
||||
pub mod trace_types;
|
||||
|
||||
const TRACE_INGEST_CHUNK_SIZE: usize = 64;
|
||||
const TRACE_FAILURE_MESSAGE_LIMIT: usize = 4;
|
||||
|
||||
@@ -546,34 +552,72 @@ impl Instance {
|
||||
Some(summary)
|
||||
}
|
||||
|
||||
/// Picks the final datatype for one trace column.
|
||||
///
|
||||
/// Existing table schema is authoritative when present. Otherwise we resolve the
|
||||
/// request-local observed types using the shared trace coercion rules.
|
||||
fn choose_trace_target_type(
|
||||
observed_types: &[ColumnDataType],
|
||||
existing_type: Option<ColumnDataType>,
|
||||
) -> ServerResult<Option<ColumnDataType>> {
|
||||
let Some(existing_type) = existing_type else {
|
||||
return resolve_new_trace_column_type(observed_types.iter().copied()).map_err(|_| {
|
||||
error::InvalidParameterSnafu {
|
||||
reason: "unsupported trace type mix".to_string(),
|
||||
}
|
||||
.build()
|
||||
});
|
||||
/// Widen existing trace table columns to Float64 before request rewrite.
|
||||
async fn alter_trace_table_columns_to_float64(
|
||||
&self,
|
||||
ctx: &QueryContextRef,
|
||||
table_name: &str,
|
||||
column_names: &[String],
|
||||
) -> ServerResult<()> {
|
||||
let catalog_name = ctx.current_catalog().to_string();
|
||||
let schema_name = ctx.current_schema();
|
||||
let alter_expr = AlterTableExpr {
|
||||
catalog_name: catalog_name.clone(),
|
||||
schema_name: schema_name.clone(),
|
||||
table_name: table_name.to_string(),
|
||||
kind: Some(Kind::ModifyColumnTypes(ModifyColumnTypes {
|
||||
modify_column_types: column_names
|
||||
.iter()
|
||||
.map(|column_name| ModifyColumnType {
|
||||
column_name: column_name.clone(),
|
||||
target_type: ColumnDataType::Float64 as i32,
|
||||
target_type_extension: None,
|
||||
})
|
||||
.collect(),
|
||||
})),
|
||||
};
|
||||
|
||||
if observed_types.iter().copied().all(|request_type| {
|
||||
request_type == existing_type
|
||||
|| is_supported_trace_coercion(request_type, existing_type)
|
||||
}) {
|
||||
Ok(Some(existing_type))
|
||||
} else {
|
||||
error::InvalidParameterSnafu {
|
||||
reason: "unsupported trace type mix".to_string(),
|
||||
if let Err(err) = self
|
||||
.statement_executor
|
||||
.alter_table_inner(alter_expr, ctx.clone())
|
||||
.await
|
||||
{
|
||||
let table = self
|
||||
.catalog_manager
|
||||
.table(&catalog_name, &schema_name, table_name, None)
|
||||
.await
|
||||
.map_err(servers::error::Error::from)?;
|
||||
let alter_already_applied = table
|
||||
.map(|table| {
|
||||
let table_schema = table.schema();
|
||||
column_names.iter().all(|column_name| {
|
||||
table_schema
|
||||
.column_schema_by_name(column_name)
|
||||
.and_then(|table_col| {
|
||||
ColumnDataTypeWrapper::try_from(table_col.data_type.clone())
|
||||
.ok()
|
||||
.map(|wrapper| wrapper.datatype())
|
||||
})
|
||||
== Some(ColumnDataType::Float64)
|
||||
})
|
||||
})
|
||||
.unwrap_or(false);
|
||||
|
||||
if alter_already_applied {
|
||||
return Ok(());
|
||||
}
|
||||
.fail()
|
||||
|
||||
warn!(
|
||||
table_name,
|
||||
columns = ?column_names,
|
||||
error = %err,
|
||||
"failed to widen trace columns before insert"
|
||||
);
|
||||
|
||||
return Err(wrap_trace_alter_failure(err));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Coerce request column types and values to match the existing table schema
|
||||
@@ -598,7 +642,8 @@ impl Instance {
|
||||
};
|
||||
|
||||
let table_schema = table.map(|table| table.schema());
|
||||
let mut pending_coercions = Vec::new();
|
||||
let mut pending_rewrites = Vec::new();
|
||||
let mut pending_alter_columns = Vec::new();
|
||||
|
||||
for (col_idx, col_schema) in rows.schema.iter().enumerate() {
|
||||
let Some(current_type) = ColumnDataType::try_from(col_schema.datatype).ok() else {
|
||||
@@ -647,8 +692,8 @@ impl Instance {
|
||||
|
||||
// Decide the final type once per column, then rewrite all affected cells
|
||||
// together in one row pass below.
|
||||
let Some(target_type) =
|
||||
Self::choose_trace_target_type(&observed_types, existing_type).map_err(
|
||||
let Some(decision) =
|
||||
choose_trace_reconcile_decision(&observed_types, existing_type).map_err(
|
||||
|_| {
|
||||
enrich_trace_reconcile_error(
|
||||
&req.table_name,
|
||||
@@ -661,31 +706,54 @@ impl Instance {
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
let target_type = decision.target_type();
|
||||
|
||||
if observed_types
|
||||
.iter()
|
||||
.all(|observed| *observed == target_type)
|
||||
if !decision.requires_alter()
|
||||
&& observed_types
|
||||
.iter()
|
||||
.all(|observed| *observed == target_type)
|
||||
&& col_schema.datatype == target_type as i32
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
pending_coercions.push((col_idx, target_type, col_schema.column_name.clone()));
|
||||
if decision.requires_alter()
|
||||
&& !pending_alter_columns.contains(&col_schema.column_name)
|
||||
{
|
||||
pending_alter_columns.push(col_schema.column_name.clone());
|
||||
}
|
||||
|
||||
pending_rewrites.push(PendingTraceColumnRewrite {
|
||||
col_idx,
|
||||
target_type,
|
||||
column_name: col_schema.column_name.clone(),
|
||||
});
|
||||
}
|
||||
|
||||
if pending_coercions.is_empty() {
|
||||
if pending_rewrites.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
validate_trace_column_rewrites(&rows.rows, &pending_rewrites, &req.table_name)?;
|
||||
|
||||
if !pending_alter_columns.is_empty() {
|
||||
self.alter_trace_table_columns_to_float64(
|
||||
ctx,
|
||||
&req.table_name,
|
||||
&pending_alter_columns,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
// Update schema metadata before mutating row values so both stay in sync.
|
||||
for (col_idx, target_type, ..) in &pending_coercions {
|
||||
rows.schema[*col_idx].datatype = *target_type as i32;
|
||||
for pending_rewrite in &pending_rewrites {
|
||||
rows.schema[pending_rewrite.col_idx].datatype = pending_rewrite.target_type as i32;
|
||||
}
|
||||
|
||||
// Apply all pending column rewrites in one row pass.
|
||||
for row in &mut rows.rows {
|
||||
for (col_idx, target_type, column_name) in &pending_coercions {
|
||||
let Some(value) = row.values.get_mut(*col_idx) else {
|
||||
for pending_rewrite in &pending_rewrites {
|
||||
let Some(value) = row.values.get_mut(pending_rewrite.col_idx) else {
|
||||
continue;
|
||||
};
|
||||
let Some(request_type) =
|
||||
@@ -693,20 +761,23 @@ impl Instance {
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
if request_type == *target_type {
|
||||
if request_type == pending_rewrite.target_type {
|
||||
continue;
|
||||
}
|
||||
|
||||
value.value_data = coerce_value_data(
|
||||
&value.value_data,
|
||||
*target_type,
|
||||
pending_rewrite.target_type,
|
||||
request_type,
|
||||
)
|
||||
.map_err(|_| {
|
||||
error::InvalidParameterSnafu {
|
||||
reason: format!(
|
||||
"failed to coerce trace column '{}' in table '{}' from {:?} to {:?}",
|
||||
column_name, req.table_name, request_type, target_type
|
||||
pending_rewrite.column_name,
|
||||
req.table_name,
|
||||
request_type,
|
||||
pending_rewrite.target_type
|
||||
),
|
||||
}
|
||||
.build()
|
||||
@@ -719,58 +790,21 @@ impl Instance {
|
||||
}
|
||||
}
|
||||
|
||||
fn enrich_trace_reconcile_error(
|
||||
table_name: &str,
|
||||
column_name: &str,
|
||||
observed_types: &[ColumnDataType],
|
||||
existing_type: Option<ColumnDataType>,
|
||||
) -> servers::error::Error {
|
||||
let observed_types = observed_types
|
||||
.iter()
|
||||
.map(|datatype| format!("{datatype:?}"))
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ");
|
||||
|
||||
error::InvalidParameterSnafu {
|
||||
reason: match existing_type {
|
||||
Some(existing_type) => format!(
|
||||
"failed to reconcile trace column '{}' in table '{}' with observed types [{}] against existing {:?}",
|
||||
column_name, table_name, observed_types, existing_type
|
||||
),
|
||||
None => format!(
|
||||
"failed to reconcile trace column '{}' in table '{}' with observed types [{}]",
|
||||
column_name, table_name, observed_types
|
||||
),
|
||||
},
|
||||
}
|
||||
.build()
|
||||
}
|
||||
|
||||
/// Only these trace scalar types participate in reconciliation. Other column kinds
|
||||
/// such as JSON and binary keep their original write path and schema checks.
|
||||
fn is_trace_reconcile_candidate_type(datatype: ColumnDataType) -> bool {
|
||||
matches!(
|
||||
datatype,
|
||||
ColumnDataType::String
|
||||
| ColumnDataType::Boolean
|
||||
| ColumnDataType::Int64
|
||||
| ColumnDataType::Float64
|
||||
)
|
||||
}
|
||||
|
||||
/// Keeps the observed type list small without depending on enum ordering.
|
||||
fn push_observed_trace_type(observed_types: &mut Vec<ColumnDataType>, datatype: ColumnDataType) {
|
||||
if !observed_types.contains(&datatype) {
|
||||
observed_types.push(datatype);
|
||||
}
|
||||
/// Preserve the original alter failure status so chunk retry behavior stays correct.
|
||||
fn wrap_trace_alter_failure<E>(err: E) -> servers::error::Error
|
||||
where
|
||||
E: ErrorExt + Send + Sync + 'static,
|
||||
{
|
||||
error::ExecuteGrpcQuerySnafu.into_error(BoxedError::new(err))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
use servers::query_handler::TraceIngestOutcome;
|
||||
|
||||
use super::{ChunkFailureReaction, Instance};
|
||||
use super::{ChunkFailureReaction, Instance, wrap_trace_alter_failure};
|
||||
use crate::metrics::OTLP_TRACES_FAILURE_COUNT;
|
||||
|
||||
#[test]
|
||||
@@ -923,4 +957,18 @@ mod tests {
|
||||
ChunkFailureReaction::DiscardChunk
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_wrap_trace_alter_failure_preserves_status_code() {
|
||||
let err = wrap_trace_alter_failure(
|
||||
servers::error::TableNotFoundSnafu {
|
||||
catalog: "greptime".to_string(),
|
||||
schema: "public".to_string(),
|
||||
table: "trace_type_missing".to_string(),
|
||||
}
|
||||
.build(),
|
||||
);
|
||||
|
||||
assert_eq!(err.status_code(), StatusCode::TableNotFound);
|
||||
}
|
||||
}
|
||||
|
||||
308
src/frontend/src/instance/otlp/trace_types.rs
Normal file
308
src/frontend/src/instance/otlp/trace_types.rs
Normal file
@@ -0,0 +1,308 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use api::v1::{ColumnDataType, Row};
|
||||
use servers::error::{self, Result as ServerResult};
|
||||
use servers::otlp::trace::coerce::{
|
||||
coerce_value_data, is_supported_trace_coercion, resolve_new_trace_column_type,
|
||||
trace_value_datatype,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub(super) enum TraceReconcileDecision {
|
||||
UseExisting(ColumnDataType),
|
||||
UseRequestLocal(ColumnDataType),
|
||||
AlterExistingTo(ColumnDataType),
|
||||
}
|
||||
|
||||
impl TraceReconcileDecision {
|
||||
pub(super) fn target_type(self) -> ColumnDataType {
|
||||
match self {
|
||||
Self::UseExisting(target_type)
|
||||
| Self::UseRequestLocal(target_type)
|
||||
| Self::AlterExistingTo(target_type) => target_type,
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn requires_alter(self) -> bool {
|
||||
matches!(self, Self::AlterExistingTo(_))
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) struct PendingTraceColumnRewrite {
|
||||
pub(super) col_idx: usize,
|
||||
pub(super) target_type: ColumnDataType,
|
||||
pub(super) column_name: String,
|
||||
}
|
||||
|
||||
/// Picks the reconciliation action for one trace column.
|
||||
///
|
||||
/// Existing table schema is authoritative unless the only incompatible case is
|
||||
/// widening an existing Int64 column to Float64 for incoming Int64/Float64 data.
|
||||
pub(super) fn choose_trace_reconcile_decision(
|
||||
observed_types: &[ColumnDataType],
|
||||
existing_type: Option<ColumnDataType>,
|
||||
) -> ServerResult<Option<TraceReconcileDecision>> {
|
||||
let Some(existing_type) = existing_type else {
|
||||
return resolve_new_trace_column_type(observed_types.iter().copied())
|
||||
.map(|target_type| target_type.map(TraceReconcileDecision::UseRequestLocal))
|
||||
.map_err(|_| {
|
||||
error::InvalidParameterSnafu {
|
||||
reason: "unsupported trace type mix".to_string(),
|
||||
}
|
||||
.build()
|
||||
});
|
||||
};
|
||||
|
||||
if observed_types.iter().all(|&request_type| {
|
||||
request_type == existing_type || is_supported_trace_coercion(request_type, existing_type)
|
||||
}) {
|
||||
return Ok(Some(TraceReconcileDecision::UseExisting(existing_type)));
|
||||
}
|
||||
|
||||
if existing_type == ColumnDataType::Int64
|
||||
&& observed_types.contains(&ColumnDataType::Float64)
|
||||
&& observed_types.iter().all(|observed_type| {
|
||||
matches!(
|
||||
observed_type,
|
||||
ColumnDataType::Int64 | ColumnDataType::Float64
|
||||
)
|
||||
})
|
||||
{
|
||||
return Ok(Some(TraceReconcileDecision::AlterExistingTo(
|
||||
ColumnDataType::Float64,
|
||||
)));
|
||||
}
|
||||
|
||||
error::InvalidParameterSnafu {
|
||||
reason: "unsupported trace type mix".to_string(),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
/// Validate all pending trace column rewrites before any schema mutation happens.
|
||||
pub(super) fn validate_trace_column_rewrites(
|
||||
rows: &[Row],
|
||||
pending_rewrites: &[PendingTraceColumnRewrite],
|
||||
table_name: &str,
|
||||
) -> ServerResult<()> {
|
||||
for row in rows {
|
||||
for pending_rewrite in pending_rewrites {
|
||||
let Some(value) = row.values.get(pending_rewrite.col_idx) else {
|
||||
continue;
|
||||
};
|
||||
let Some(request_type) = value.value_data.as_ref().and_then(trace_value_datatype)
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
if request_type == pending_rewrite.target_type {
|
||||
continue;
|
||||
}
|
||||
|
||||
coerce_value_data(&value.value_data, pending_rewrite.target_type, request_type)
|
||||
.map_err(|_| {
|
||||
error::InvalidParameterSnafu {
|
||||
reason: format!(
|
||||
"failed to coerce trace column '{}' in table '{}' from {:?} to {:?}",
|
||||
pending_rewrite.column_name,
|
||||
table_name,
|
||||
request_type,
|
||||
pending_rewrite.target_type
|
||||
),
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(super) fn enrich_trace_reconcile_error(
|
||||
table_name: &str,
|
||||
column_name: &str,
|
||||
observed_types: &[ColumnDataType],
|
||||
existing_type: Option<ColumnDataType>,
|
||||
) -> servers::error::Error {
|
||||
let observed_types = observed_types
|
||||
.iter()
|
||||
.map(|datatype| format!("{datatype:?}"))
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ");
|
||||
|
||||
error::InvalidParameterSnafu {
|
||||
reason: match existing_type {
|
||||
Some(existing_type) => format!(
|
||||
"failed to reconcile trace column '{}' in table '{}' with observed types [{}] against existing {:?}",
|
||||
column_name, table_name, observed_types, existing_type
|
||||
),
|
||||
None => format!(
|
||||
"failed to reconcile trace column '{}' in table '{}' with observed types [{}]",
|
||||
column_name, table_name, observed_types
|
||||
),
|
||||
},
|
||||
}
|
||||
.build()
|
||||
}
|
||||
|
||||
/// Only these trace scalar types participate in reconciliation. Other column kinds
|
||||
/// such as JSON and binary keep their original write path and schema checks.
|
||||
pub(super) fn is_trace_reconcile_candidate_type(datatype: ColumnDataType) -> bool {
|
||||
matches!(
|
||||
datatype,
|
||||
ColumnDataType::String
|
||||
| ColumnDataType::Boolean
|
||||
| ColumnDataType::Int64
|
||||
| ColumnDataType::Float64
|
||||
)
|
||||
}
|
||||
|
||||
/// Keeps the observed type list small without depending on enum ordering.
|
||||
pub(super) fn push_observed_trace_type(
|
||||
observed_types: &mut Vec<ColumnDataType>,
|
||||
datatype: ColumnDataType,
|
||||
) {
|
||||
if !observed_types.contains(&datatype) {
|
||||
observed_types.push(datatype);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use api::v1::value::ValueData;
|
||||
use api::v1::{ColumnDataType, Row, Value};
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
|
||||
use super::{
|
||||
PendingTraceColumnRewrite, TraceReconcileDecision, choose_trace_reconcile_decision,
|
||||
enrich_trace_reconcile_error, is_trace_reconcile_candidate_type, push_observed_trace_type,
|
||||
validate_trace_column_rewrites,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn test_choose_trace_reconcile_decision_existing_int64_keeps_int64() {
|
||||
assert_eq!(
|
||||
choose_trace_reconcile_decision(&[ColumnDataType::Int64], Some(ColumnDataType::Int64))
|
||||
.unwrap(),
|
||||
Some(TraceReconcileDecision::UseExisting(ColumnDataType::Int64))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_choose_trace_reconcile_decision_existing_int64_widens_to_float64() {
|
||||
assert_eq!(
|
||||
choose_trace_reconcile_decision(
|
||||
&[ColumnDataType::Int64, ColumnDataType::Float64],
|
||||
Some(ColumnDataType::Int64)
|
||||
)
|
||||
.unwrap(),
|
||||
Some(TraceReconcileDecision::AlterExistingTo(
|
||||
ColumnDataType::Float64
|
||||
))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_choose_trace_reconcile_decision_existing_float64_stays_authoritative() {
|
||||
assert_eq!(
|
||||
choose_trace_reconcile_decision(
|
||||
&[ColumnDataType::Int64, ColumnDataType::Float64],
|
||||
Some(ColumnDataType::Float64)
|
||||
)
|
||||
.unwrap(),
|
||||
Some(TraceReconcileDecision::UseExisting(ColumnDataType::Float64))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_choose_trace_reconcile_decision_existing_int64_with_boolean_is_error() {
|
||||
let err = choose_trace_reconcile_decision(
|
||||
&[ColumnDataType::Boolean, ColumnDataType::Int64],
|
||||
Some(ColumnDataType::Int64),
|
||||
)
|
||||
.unwrap_err();
|
||||
assert_eq!(err.status_code(), StatusCode::InvalidArguments);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_choose_trace_reconcile_decision_request_local_prefers_float64() {
|
||||
assert_eq!(
|
||||
choose_trace_reconcile_decision(
|
||||
&[ColumnDataType::Int64, ColumnDataType::Float64],
|
||||
None
|
||||
)
|
||||
.unwrap(),
|
||||
Some(TraceReconcileDecision::UseRequestLocal(
|
||||
ColumnDataType::Float64
|
||||
))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_trace_column_rewrites_rejects_invalid_string_parse() {
|
||||
let rows = vec![Row {
|
||||
values: vec![Value {
|
||||
value_data: Some(ValueData::StringValue("not_a_number".to_string())),
|
||||
}],
|
||||
}];
|
||||
let pending_rewrites = vec![PendingTraceColumnRewrite {
|
||||
col_idx: 0,
|
||||
target_type: ColumnDataType::Int64,
|
||||
column_name: "span_attributes.attr_int".to_string(),
|
||||
}];
|
||||
|
||||
let err = validate_trace_column_rewrites(&rows, &pending_rewrites, "trace_type_atomicity")
|
||||
.unwrap_err();
|
||||
assert_eq!(err.status_code(), StatusCode::InvalidArguments);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_enrich_trace_reconcile_error_includes_existing_type() {
|
||||
let err = enrich_trace_reconcile_error(
|
||||
"trace_type_atomicity",
|
||||
"span_attributes.attr_int",
|
||||
&[ColumnDataType::String, ColumnDataType::Int64],
|
||||
Some(ColumnDataType::Boolean),
|
||||
);
|
||||
|
||||
assert_eq!(err.status_code(), StatusCode::InvalidArguments);
|
||||
assert!(err.to_string().contains("span_attributes.attr_int"));
|
||||
assert!(err.to_string().contains("Boolean"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_trace_reconcile_candidate_type_filters_non_scalar_types() {
|
||||
assert!(is_trace_reconcile_candidate_type(ColumnDataType::String));
|
||||
assert!(is_trace_reconcile_candidate_type(ColumnDataType::Boolean));
|
||||
assert!(!is_trace_reconcile_candidate_type(ColumnDataType::Binary));
|
||||
assert!(!is_trace_reconcile_candidate_type(
|
||||
ColumnDataType::TimestampMillisecond
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_push_observed_trace_type_deduplicates_types() {
|
||||
let mut observed_types = Vec::new();
|
||||
|
||||
push_observed_trace_type(&mut observed_types, ColumnDataType::Int64);
|
||||
push_observed_trace_type(&mut observed_types, ColumnDataType::Int64);
|
||||
push_observed_trace_type(&mut observed_types, ColumnDataType::Float64);
|
||||
|
||||
assert_eq!(
|
||||
observed_types,
|
||||
vec![ColumnDataType::Int64, ColumnDataType::Float64]
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -31,7 +31,7 @@ use snafu::{OptionExt, ResultExt};
|
||||
use crate::error::{
|
||||
CatalogSnafu, CollectRecordbatchSnafu, ExecLogicalPlanSnafu,
|
||||
PrometheusLabelValuesQueryPlanSnafu, PrometheusMetricNamesQueryPlanSnafu, ReadTableSnafu,
|
||||
Result, TableNotFoundSnafu,
|
||||
Result, TableNotFoundSnafu, TableSnafu,
|
||||
};
|
||||
use crate::instance::Instance;
|
||||
|
||||
@@ -120,20 +120,32 @@ impl Instance {
|
||||
})
|
||||
.unwrap_or_else(|| ctx.current_schema());
|
||||
|
||||
let full_table_name = format_full_table_name(ctx.current_catalog(), &table_schema, &metric);
|
||||
let table = self
|
||||
.catalog_manager
|
||||
.table(ctx.current_catalog(), &table_schema, &metric, Some(ctx))
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
.with_context(|| TableNotFoundSnafu {
|
||||
table_name: format_full_table_name(ctx.current_catalog(), &table_schema, &metric),
|
||||
table_name: full_table_name.clone(),
|
||||
})?;
|
||||
|
||||
// Check label column existence before building the query plan so a missing label can be
|
||||
// reported as `TableColumnNotFound` and handled like Prometheus expects.
|
||||
if table.schema().column_schema_by_name(&label_name).is_none() {
|
||||
return table::error::ColumnNotExistsSnafu {
|
||||
column_name: label_name,
|
||||
table_name: full_table_name,
|
||||
}
|
||||
.fail()
|
||||
.context(TableSnafu);
|
||||
}
|
||||
|
||||
let dataframe = self
|
||||
.query_engine
|
||||
.read_table(table.clone())
|
||||
.with_context(|_| ReadTableSnafu {
|
||||
table_name: format_full_table_name(ctx.current_catalog(), &table_schema, &metric),
|
||||
table_name: full_table_name,
|
||||
})?;
|
||||
|
||||
let scan_plan = dataframe.into_unoptimized_plan();
|
||||
|
||||
@@ -24,7 +24,6 @@ use common_base::Plugins;
|
||||
use common_config::Configurable;
|
||||
#[cfg(any(feature = "pg_kvbackend", feature = "mysql_kvbackend"))]
|
||||
use common_meta::distributed_time_constants::META_LEASE_SECS;
|
||||
use common_meta::election::CANDIDATE_LEASE_SECS;
|
||||
use common_meta::election::etcd::EtcdElection;
|
||||
use common_meta::kv_backend::chroot::ChrootKvBackend;
|
||||
use common_meta::kv_backend::etcd::EtcdStore;
|
||||
@@ -290,6 +289,7 @@ pub async fn metasrv_builder(
|
||||
use std::time::Duration;
|
||||
|
||||
use common_meta::distributed_time_constants::POSTGRES_KEEP_ALIVE_SECS;
|
||||
use common_meta::election::CANDIDATE_LEASE_SECS;
|
||||
use common_meta::election::rds::postgres::{ElectionPgClient, PgElection};
|
||||
use common_meta::kv_backend::rds::PgStore;
|
||||
use deadpool_postgres::{Config, ManagerConfig, RecyclingMethod};
|
||||
@@ -354,6 +354,7 @@ pub async fn metasrv_builder(
|
||||
(None, BackendImpl::MysqlStore) => {
|
||||
use std::time::Duration;
|
||||
|
||||
use common_meta::election::CANDIDATE_LEASE_SECS;
|
||||
use common_meta::election::rds::mysql::{ElectionMysqlClient, MySqlElection};
|
||||
use common_meta::kv_backend::rds::MySqlStore;
|
||||
|
||||
|
||||
@@ -1136,6 +1136,12 @@ impl Error {
|
||||
Error::RetryLater { .. }
|
||||
| Error::RetryLaterWithSource { .. }
|
||||
| Error::MailboxTimeout { .. }
|
||||
) || matches!(
|
||||
self,
|
||||
Error::AllocateRegions { source, .. } if source.is_retry_later()
|
||||
) || matches!(
|
||||
self,
|
||||
Error::DeallocateRegions { source, .. } if source.is_retry_later()
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -1324,3 +1330,35 @@ pub(crate) fn match_for_io_error(err_status: &tonic::Status) -> Option<&std::io:
|
||||
err = err.source()?;
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_error::mock::MockError;
|
||||
use common_error::status_code::StatusCode;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use super::DeallocateRegionsSnafu;
|
||||
|
||||
#[test]
|
||||
fn test_deallocate_regions_is_retryable_when_source_is_retry_later() {
|
||||
let source = common_meta::error::Error::retry_later(MockError::new(StatusCode::Internal));
|
||||
let err = Err::<(), _>(source)
|
||||
.context(DeallocateRegionsSnafu { table_id: 1024_u32 })
|
||||
.unwrap_err();
|
||||
|
||||
assert!(err.is_retryable());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_deallocate_regions_is_not_retryable_when_source_is_not_retry_later() {
|
||||
let source = common_meta::error::UnexpectedSnafu {
|
||||
err_msg: "mock error",
|
||||
}
|
||||
.build();
|
||||
let err = Err::<(), _>(source)
|
||||
.context(DeallocateRegionsSnafu { table_id: 1024_u32 })
|
||||
.unwrap_err();
|
||||
|
||||
assert!(!err.is_retryable());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,7 +23,7 @@ pub mod repartition_start;
|
||||
pub mod utils;
|
||||
|
||||
use std::any::Any;
|
||||
use std::collections::HashMap;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::fmt::{Debug, Display};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
@@ -40,15 +40,15 @@ use common_meta::key::table_route::TableRouteValue;
|
||||
use common_meta::key::{DeserializedValueWithBytes, TableMetadataManagerRef};
|
||||
use common_meta::lock_key::{CatalogLock, SchemaLock, TableLock, TableNameLock};
|
||||
use common_meta::node_manager::NodeManagerRef;
|
||||
use common_meta::region_keeper::MemoryRegionKeeperRef;
|
||||
use common_meta::region_keeper::{MemoryRegionKeeperRef, OperatingRegionGuard};
|
||||
use common_meta::region_registry::LeaderRegionRegistryRef;
|
||||
use common_meta::rpc::router::RegionRoute;
|
||||
use common_meta::rpc::router::{RegionRoute, operating_leader_regions};
|
||||
use common_procedure::error::{FromJsonSnafu, ToJsonSnafu};
|
||||
use common_procedure::{
|
||||
BoxedProcedure, Context as ProcedureContext, Error as ProcedureError, LockKey, Procedure,
|
||||
ProcedureManagerRef, Result as ProcedureResult, Status, StringKey, UserMetadata,
|
||||
};
|
||||
use common_telemetry::{error, info};
|
||||
use common_telemetry::{error, info, warn};
|
||||
use partition::expr::PartitionExpr;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
@@ -56,6 +56,8 @@ use store_api::storage::{RegionNumber, TableId};
|
||||
use table::table_name::TableName;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::procedure::repartition::collect::ProcedureMeta;
|
||||
use crate::procedure::repartition::deallocate_region::DeallocateRegion;
|
||||
use crate::procedure::repartition::group::{
|
||||
Context as RepartitionGroupContext, RepartitionGroupProcedure,
|
||||
};
|
||||
@@ -74,6 +76,12 @@ pub struct PersistentContext {
|
||||
pub table_name: String,
|
||||
pub table_id: TableId,
|
||||
pub plans: Vec<RepartitionPlanEntry>,
|
||||
/// Records failed sub-procedures for metadata rollback.
|
||||
#[serde(default)]
|
||||
pub failed_procedures: Vec<ProcedureMeta>,
|
||||
#[serde(default)]
|
||||
/// Records unknown sub-procedures for metadata rollback.
|
||||
pub unknown_procedures: Vec<ProcedureMeta>,
|
||||
/// The timeout for repartition operations.
|
||||
#[serde(with = "humantime_serde", default = "default_timeout")]
|
||||
pub timeout: Duration,
|
||||
@@ -102,6 +110,8 @@ impl PersistentContext {
|
||||
table_name,
|
||||
table_id,
|
||||
plans: vec![],
|
||||
failed_procedures: vec![],
|
||||
unknown_procedures: vec![],
|
||||
timeout: timeout.unwrap_or_else(default_timeout),
|
||||
}
|
||||
}
|
||||
@@ -393,6 +403,23 @@ impl Context {
|
||||
.await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn register_operating_regions(
|
||||
memory_region_keeper: &MemoryRegionKeeperRef,
|
||||
region_routes: &[RegionRoute],
|
||||
) -> Result<Vec<OperatingRegionGuard>> {
|
||||
let mut operating_guards = Vec::with_capacity(region_routes.len());
|
||||
for (region_id, datanode_id) in operating_leader_regions(region_routes) {
|
||||
let guard = memory_region_keeper
|
||||
.register(datanode_id, region_id)
|
||||
.context(error::RegionOperatingRaceSnafu {
|
||||
peer_id: datanode_id,
|
||||
region_id,
|
||||
})?;
|
||||
operating_guards.push(guard);
|
||||
}
|
||||
Ok(operating_guards)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
@@ -456,6 +483,131 @@ impl RepartitionProcedure {
|
||||
|
||||
Ok(Self { state, context })
|
||||
}
|
||||
|
||||
/// Returns whether parent rollback should remove this repartition's allocated regions.
|
||||
///
|
||||
/// This uses an "after AllocateRegion" semantic: once execution reaches
|
||||
/// `AllocateRegion` or any later state, rollback must try to remove this round's
|
||||
/// `allocated_region_ids` from table-route metadata when they exist.
|
||||
///
|
||||
/// State flow:
|
||||
/// `RepartitionStart -> AllocateRegion -> Dispatch -> Collect -> DeallocateRegion -> RepartitionEnd`
|
||||
/// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
/// rollback allocated regions in metadata
|
||||
///
|
||||
/// Notes:
|
||||
/// - `RepartitionStart`: no-op, because allocation has not happened yet.
|
||||
/// - `AllocateRegion` / `Dispatch` / `Collect` rollback-active.
|
||||
/// - `DeallocateRegion`: is not rollback-active.
|
||||
/// - `RepartitionEnd`: no-op.
|
||||
fn should_rollback_allocated_regions(&self) -> bool {
|
||||
self.state.as_any().is::<allocate_region::AllocateRegion>()
|
||||
|| self.state.as_any().is::<dispatch::Dispatch>()
|
||||
|| self.state.as_any().is::<collect::Collect>()
|
||||
}
|
||||
|
||||
fn rollback_allocated_region_ids(&self) -> HashSet<store_api::storage::RegionId> {
|
||||
if self.state.as_any().is::<allocate_region::AllocateRegion>()
|
||||
|| self.state.as_any().is::<dispatch::Dispatch>()
|
||||
{
|
||||
return self
|
||||
.context
|
||||
.persistent_ctx
|
||||
.plans
|
||||
.iter()
|
||||
.flat_map(|plan| plan.allocated_region_ids.iter().copied())
|
||||
.collect();
|
||||
}
|
||||
|
||||
self.context
|
||||
.persistent_ctx
|
||||
.failed_procedures
|
||||
.iter()
|
||||
.chain(self.context.persistent_ctx.unknown_procedures.iter())
|
||||
.flat_map(|procedure_meta| {
|
||||
let plan_index = procedure_meta.plan_index;
|
||||
self.context.persistent_ctx.plans[plan_index]
|
||||
.allocated_region_ids
|
||||
.iter()
|
||||
.copied()
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn filter_allocated_region_routes(
|
||||
region_routes: &[RegionRoute],
|
||||
allocated_region_ids: &HashSet<store_api::storage::RegionId>,
|
||||
) -> Vec<RegionRoute> {
|
||||
region_routes
|
||||
.iter()
|
||||
.filter(|route| !allocated_region_ids.contains(&route.region.id))
|
||||
.cloned()
|
||||
.collect()
|
||||
}
|
||||
|
||||
async fn rollback_inner(&mut self, procedure_ctx: &ProcedureContext) -> Result<()> {
|
||||
if !self.should_rollback_allocated_regions() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let table_id = self.context.persistent_ctx.table_id;
|
||||
let allocated_region_ids = self.rollback_allocated_region_ids();
|
||||
if allocated_region_ids.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let table_lock = TableLock::Write(table_id).into();
|
||||
let _guard = procedure_ctx.provider.acquire_lock(&table_lock).await;
|
||||
let table_route_value = self.context.get_table_route_value().await?;
|
||||
let current_region_routes = table_route_value.region_routes().unwrap();
|
||||
let allocated_region_routes = DeallocateRegion::filter_deallocatable_region_routes(
|
||||
table_id,
|
||||
current_region_routes,
|
||||
&allocated_region_ids,
|
||||
);
|
||||
if !allocated_region_routes.is_empty() {
|
||||
let table = TableName {
|
||||
catalog_name: self.context.persistent_ctx.catalog_name.clone(),
|
||||
schema_name: self.context.persistent_ctx.schema_name.clone(),
|
||||
table_name: self.context.persistent_ctx.table_name.clone(),
|
||||
};
|
||||
// Memory guards are not required here,
|
||||
// because the table metadata still contains routes for the deallocating regions.
|
||||
if let Err(err) = DeallocateRegion::deallocate_regions(
|
||||
&self.context.node_manager,
|
||||
&self.context.leader_region_registry,
|
||||
table,
|
||||
table_id,
|
||||
&allocated_region_routes,
|
||||
)
|
||||
.await
|
||||
{
|
||||
warn!(err; "Failed to drop allocated regions during repartition rollback, table_id: {}, regions: {:?}", table_id, allocated_region_ids);
|
||||
}
|
||||
}
|
||||
|
||||
let new_region_routes =
|
||||
Self::filter_allocated_region_routes(current_region_routes, &allocated_region_ids);
|
||||
|
||||
if new_region_routes.len() != current_region_routes.len() {
|
||||
self.context
|
||||
.update_table_route(&table_route_value, new_region_routes, HashMap::new())
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.with_context(|_| error::RetryLaterWithSourceSnafu {
|
||||
reason: format!(
|
||||
"Failed to rollback allocated region routes for repartition table: {}",
|
||||
table_id
|
||||
),
|
||||
})?;
|
||||
}
|
||||
|
||||
if let Err(err) = self.context.invalidate_table_cache().await {
|
||||
warn!(err; "Failed to invalidate table cache during repartition rollback, table_id: {}", table_id);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
@@ -497,9 +649,14 @@ impl Procedure for RepartitionProcedure {
|
||||
}
|
||||
}
|
||||
|
||||
async fn rollback(&mut self, ctx: &ProcedureContext) -> ProcedureResult<()> {
|
||||
self.rollback_inner(ctx)
|
||||
.await
|
||||
.map_err(ProcedureError::external)
|
||||
}
|
||||
|
||||
fn rollback_supported(&self) -> bool {
|
||||
// TODO(weny): support rollback.
|
||||
false
|
||||
true
|
||||
}
|
||||
|
||||
fn dump(&self) -> ProcedureResult<String> {
|
||||
@@ -624,3 +781,642 @@ impl RepartitionProcedureFactory for DefaultRepartitionProcedureFactory {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
|
||||
use common_error::ext::BoxedError;
|
||||
use common_error::mock::MockError;
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_meta::ddl::test_util::datanode_handler::{
|
||||
DatanodeWatcher, NaiveDatanodeHandler, UnexpectedErrorDatanodeHandler,
|
||||
};
|
||||
use common_meta::error;
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::rpc::router::{Region, RegionRoute};
|
||||
use common_meta::test_util::MockDatanodeManager;
|
||||
use common_procedure::{Error as ProcedureError, Procedure, ProcedureId, ProcedureState};
|
||||
use store_api::storage::RegionId;
|
||||
use table::table_name::TableName;
|
||||
use tokio::sync::mpsc;
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::*;
|
||||
use crate::procedure::repartition::allocate_region::AllocateRegion;
|
||||
use crate::procedure::repartition::collect::Collect;
|
||||
use crate::procedure::repartition::deallocate_region::DeallocateRegion;
|
||||
use crate::procedure::repartition::dispatch::Dispatch;
|
||||
use crate::procedure::repartition::plan::RegionDescriptor;
|
||||
use crate::procedure::repartition::repartition_end::RepartitionEnd;
|
||||
use crate::procedure::repartition::test_util::{
|
||||
TestingEnv, assert_parent_state, current_parent_region_routes, extract_subprocedure_ids,
|
||||
new_parent_context, procedure_context_with_receivers, procedure_state_receiver, range_expr,
|
||||
test_region_route, test_region_wal_options,
|
||||
};
|
||||
|
||||
fn test_plan(table_id: TableId) -> RepartitionPlanEntry {
|
||||
RepartitionPlanEntry {
|
||||
group_id: uuid::Uuid::new_v4(),
|
||||
source_regions: vec![RegionDescriptor {
|
||||
region_id: RegionId::new(table_id, 1),
|
||||
partition_expr: range_expr("x", 0, 100),
|
||||
}],
|
||||
target_regions: vec![
|
||||
RegionDescriptor {
|
||||
region_id: RegionId::new(table_id, 1),
|
||||
partition_expr: range_expr("x", 0, 50),
|
||||
},
|
||||
RegionDescriptor {
|
||||
region_id: RegionId::new(table_id, 3),
|
||||
partition_expr: range_expr("x", 50, 100),
|
||||
},
|
||||
],
|
||||
allocated_region_ids: vec![RegionId::new(table_id, 3)],
|
||||
pending_deallocate_region_ids: vec![],
|
||||
transition_map: vec![vec![0, 1]],
|
||||
}
|
||||
}
|
||||
|
||||
fn test_procedure(state: Box<dyn State>, context: Context) -> RepartitionProcedure {
|
||||
RepartitionProcedure { state, context }
|
||||
}
|
||||
|
||||
fn test_context(env: &TestingEnv, table_id: TableId) -> Context {
|
||||
let node_manager = Arc::new(MockDatanodeManager::new(UnexpectedErrorDatanodeHandler));
|
||||
let ddl_ctx = env.ddl_context(node_manager);
|
||||
let persistent_ctx = PersistentContext::new(
|
||||
TableName::new("test_catalog", "test_schema", "test_table"),
|
||||
table_id,
|
||||
None,
|
||||
);
|
||||
|
||||
Context::new(
|
||||
&ddl_ctx,
|
||||
env.mailbox_ctx.mailbox().clone(),
|
||||
env.server_addr.clone(),
|
||||
persistent_ctx,
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_filter_allocated_region_routes() {
|
||||
let table_id = 1024;
|
||||
let region_routes = vec![
|
||||
test_region_route(RegionId::new(table_id, 1), "a"),
|
||||
test_region_route(RegionId::new(table_id, 2), "b"),
|
||||
];
|
||||
let allocated_region_ids = HashSet::from([RegionId::new(table_id, 2)]);
|
||||
|
||||
let new_region_routes = RepartitionProcedure::filter_allocated_region_routes(
|
||||
®ion_routes,
|
||||
&allocated_region_ids,
|
||||
);
|
||||
|
||||
assert_eq!(new_region_routes.len(), 1);
|
||||
assert_eq!(new_region_routes[0].region.id, RegionId::new(table_id, 1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_should_rollback_allocated_regions() {
|
||||
let env = TestingEnv::new();
|
||||
let table_id = 1024;
|
||||
|
||||
let procedure = test_procedure(
|
||||
Box::new(RepartitionStart::new(vec![], vec![])),
|
||||
test_context(&env, table_id),
|
||||
);
|
||||
assert!(!procedure.should_rollback_allocated_regions());
|
||||
|
||||
let procedure = test_procedure(
|
||||
Box::new(AllocateRegion::new(vec![])),
|
||||
test_context(&env, table_id),
|
||||
);
|
||||
assert!(procedure.should_rollback_allocated_regions());
|
||||
|
||||
let procedure = test_procedure(Box::new(Dispatch), test_context(&env, table_id));
|
||||
assert!(procedure.should_rollback_allocated_regions());
|
||||
|
||||
let procedure =
|
||||
test_procedure(Box::new(Collect::new(vec![])), test_context(&env, table_id));
|
||||
assert!(procedure.should_rollback_allocated_regions());
|
||||
|
||||
let procedure = test_procedure(Box::new(DeallocateRegion), test_context(&env, table_id));
|
||||
assert!(!procedure.should_rollback_allocated_regions());
|
||||
|
||||
let procedure = test_procedure(Box::new(RepartitionEnd), test_context(&env, table_id));
|
||||
assert!(!procedure.should_rollback_allocated_regions());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_repartition_rollback_removes_allocated_routes_from_dispatch() {
|
||||
let env = TestingEnv::new();
|
||||
let table_id = 1024;
|
||||
let node_manager = Arc::new(MockDatanodeManager::new(UnexpectedErrorDatanodeHandler));
|
||||
let ddl_ctx = env.ddl_context(node_manager);
|
||||
let original_region_routes = vec![
|
||||
test_region_route(
|
||||
RegionId::new(table_id, 1),
|
||||
&range_expr("x", 0, 100).as_json_str().unwrap(),
|
||||
),
|
||||
test_region_route(
|
||||
RegionId::new(table_id, 2),
|
||||
&range_expr("x", 50, 100).as_json_str().unwrap(),
|
||||
),
|
||||
test_region_route(RegionId::new(table_id, 3), ""),
|
||||
];
|
||||
env.create_physical_table_metadata_with_wal_options(
|
||||
table_id,
|
||||
original_region_routes,
|
||||
test_region_wal_options(&[1, 2]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let mut persistent_ctx = PersistentContext::new(
|
||||
TableName::new("test_catalog", "test_schema", "test_table"),
|
||||
table_id,
|
||||
None,
|
||||
);
|
||||
persistent_ctx.plans = vec![test_plan(table_id)];
|
||||
persistent_ctx.failed_procedures = vec![ProcedureMeta {
|
||||
plan_index: 0,
|
||||
group_id: Uuid::new_v4(),
|
||||
procedure_id: ProcedureId::random(),
|
||||
}];
|
||||
let context = Context::new(
|
||||
&ddl_ctx,
|
||||
env.mailbox_ctx.mailbox().clone(),
|
||||
env.server_addr.clone(),
|
||||
persistent_ctx,
|
||||
);
|
||||
let mut procedure = RepartitionProcedure {
|
||||
state: Box::new(Dispatch),
|
||||
context,
|
||||
};
|
||||
|
||||
procedure
|
||||
.rollback(&TestingEnv::procedure_context())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let region_routes = current_parent_region_routes(&procedure.context).await;
|
||||
assert_eq!(region_routes.len(), 2);
|
||||
assert_eq!(region_routes[0].region.id, RegionId::new(table_id, 1));
|
||||
assert_eq!(region_routes[1].region.id, RegionId::new(table_id, 2));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_repartition_rollback_removes_allocated_routes_from_allocate() {
|
||||
let env = TestingEnv::new();
|
||||
let table_id = 1024;
|
||||
let node_manager = Arc::new(MockDatanodeManager::new(UnexpectedErrorDatanodeHandler));
|
||||
let ddl_ctx = env.ddl_context(node_manager);
|
||||
let original_region_routes = vec![
|
||||
test_region_route(
|
||||
RegionId::new(table_id, 1),
|
||||
&range_expr("x", 0, 100).as_json_str().unwrap(),
|
||||
),
|
||||
test_region_route(
|
||||
RegionId::new(table_id, 2),
|
||||
&range_expr("x", 50, 100).as_json_str().unwrap(),
|
||||
),
|
||||
test_region_route(RegionId::new(table_id, 3), ""),
|
||||
];
|
||||
env.create_physical_table_metadata_with_wal_options(
|
||||
table_id,
|
||||
original_region_routes,
|
||||
test_region_wal_options(&[1, 2]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let mut persistent_ctx = PersistentContext::new(
|
||||
TableName::new("test_catalog", "test_schema", "test_table"),
|
||||
table_id,
|
||||
None,
|
||||
);
|
||||
persistent_ctx.plans = vec![test_plan(table_id)];
|
||||
let context = Context::new(
|
||||
&ddl_ctx,
|
||||
env.mailbox_ctx.mailbox().clone(),
|
||||
env.server_addr.clone(),
|
||||
persistent_ctx,
|
||||
);
|
||||
let mut procedure = RepartitionProcedure {
|
||||
state: Box::new(AllocateRegion::new(vec![])),
|
||||
context,
|
||||
};
|
||||
|
||||
procedure
|
||||
.rollback(&TestingEnv::procedure_context())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let region_routes = current_parent_region_routes(&procedure.context).await;
|
||||
assert_eq!(region_routes.len(), 2);
|
||||
assert_eq!(region_routes[0].region.id, RegionId::new(table_id, 1));
|
||||
assert_eq!(region_routes[1].region.id, RegionId::new(table_id, 2));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_repartition_rollback_from_collect_only_removes_failed_allocated_routes() {
|
||||
let env = TestingEnv::new();
|
||||
let table_id = 1024;
|
||||
let node_manager = Arc::new(MockDatanodeManager::new(UnexpectedErrorDatanodeHandler));
|
||||
let ddl_ctx = env.ddl_context(node_manager);
|
||||
let original_region_routes = vec![
|
||||
test_region_route(
|
||||
RegionId::new(table_id, 1),
|
||||
&range_expr("x", 0, 100).as_json_str().unwrap(),
|
||||
),
|
||||
test_region_route(
|
||||
RegionId::new(table_id, 2),
|
||||
&range_expr("x", 100, 200).as_json_str().unwrap(),
|
||||
),
|
||||
test_region_route(RegionId::new(table_id, 3), ""),
|
||||
test_region_route(RegionId::new(table_id, 4), ""),
|
||||
];
|
||||
env.create_physical_table_metadata_with_wal_options(
|
||||
table_id,
|
||||
original_region_routes,
|
||||
test_region_wal_options(&[1, 2, 3, 4]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let mut persistent_ctx = PersistentContext::new(
|
||||
TableName::new("test_catalog", "test_schema", "test_table"),
|
||||
table_id,
|
||||
None,
|
||||
);
|
||||
let failed_plan = test_plan(table_id);
|
||||
let succeeded_plan = RepartitionPlanEntry {
|
||||
group_id: Uuid::new_v4(),
|
||||
source_regions: vec![RegionDescriptor {
|
||||
region_id: RegionId::new(table_id, 2),
|
||||
partition_expr: range_expr("x", 100, 200),
|
||||
}],
|
||||
target_regions: vec![
|
||||
RegionDescriptor {
|
||||
region_id: RegionId::new(table_id, 2),
|
||||
partition_expr: range_expr("x", 100, 150),
|
||||
},
|
||||
RegionDescriptor {
|
||||
region_id: RegionId::new(table_id, 4),
|
||||
partition_expr: range_expr("x", 150, 200),
|
||||
},
|
||||
],
|
||||
allocated_region_ids: vec![RegionId::new(table_id, 4)],
|
||||
pending_deallocate_region_ids: vec![],
|
||||
transition_map: vec![vec![0]],
|
||||
};
|
||||
persistent_ctx.plans = vec![failed_plan, succeeded_plan];
|
||||
persistent_ctx.failed_procedures = vec![ProcedureMeta {
|
||||
plan_index: 0,
|
||||
group_id: persistent_ctx.plans[0].group_id,
|
||||
procedure_id: ProcedureId::random(),
|
||||
}];
|
||||
|
||||
let context = Context::new(
|
||||
&ddl_ctx,
|
||||
env.mailbox_ctx.mailbox().clone(),
|
||||
env.server_addr.clone(),
|
||||
persistent_ctx,
|
||||
);
|
||||
let mut procedure = RepartitionProcedure {
|
||||
state: Box::new(Collect::new(vec![])),
|
||||
context,
|
||||
};
|
||||
|
||||
procedure
|
||||
.rollback(&TestingEnv::procedure_context())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let region_routes = current_parent_region_routes(&procedure.context).await;
|
||||
assert_eq!(region_routes.len(), 3);
|
||||
assert_eq!(region_routes[0].region.id, RegionId::new(table_id, 1));
|
||||
assert_eq!(region_routes[1].region.id, RegionId::new(table_id, 2));
|
||||
assert_eq!(region_routes[2].region.id, RegionId::new(table_id, 4));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_repartition_rollback_is_idempotent() {
|
||||
let env = TestingEnv::new();
|
||||
let table_id = 1024;
|
||||
let node_manager = Arc::new(MockDatanodeManager::new(UnexpectedErrorDatanodeHandler));
|
||||
let ddl_ctx = env.ddl_context(node_manager);
|
||||
let original_region_routes = vec![
|
||||
test_region_route(
|
||||
RegionId::new(table_id, 1),
|
||||
&range_expr("x", 0, 100).as_json_str().unwrap(),
|
||||
),
|
||||
test_region_route(
|
||||
RegionId::new(table_id, 2),
|
||||
&range_expr("x", 50, 100).as_json_str().unwrap(),
|
||||
),
|
||||
test_region_route(RegionId::new(table_id, 3), ""),
|
||||
];
|
||||
env.create_physical_table_metadata_with_wal_options(
|
||||
table_id,
|
||||
original_region_routes,
|
||||
test_region_wal_options(&[1, 2]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let mut persistent_ctx = PersistentContext::new(
|
||||
TableName::new("test_catalog", "test_schema", "test_table"),
|
||||
table_id,
|
||||
None,
|
||||
);
|
||||
persistent_ctx.plans = vec![test_plan(table_id)];
|
||||
persistent_ctx.failed_procedures = vec![ProcedureMeta {
|
||||
plan_index: 0,
|
||||
group_id: Uuid::new_v4(),
|
||||
procedure_id: ProcedureId::random(),
|
||||
}];
|
||||
let context = Context::new(
|
||||
&ddl_ctx,
|
||||
env.mailbox_ctx.mailbox().clone(),
|
||||
env.server_addr.clone(),
|
||||
persistent_ctx,
|
||||
);
|
||||
let mut procedure = RepartitionProcedure {
|
||||
state: Box::new(Dispatch),
|
||||
context,
|
||||
};
|
||||
|
||||
procedure
|
||||
.rollback(&TestingEnv::procedure_context())
|
||||
.await
|
||||
.unwrap();
|
||||
let once = current_parent_region_routes(&procedure.context).await;
|
||||
|
||||
procedure
|
||||
.rollback(&TestingEnv::procedure_context())
|
||||
.await
|
||||
.unwrap();
|
||||
let twice = current_parent_region_routes(&procedure.context).await;
|
||||
|
||||
assert_eq!(once, twice);
|
||||
assert_eq!(once.len(), 2);
|
||||
assert_eq!(once[0].region.id, RegionId::new(table_id, 1));
|
||||
assert_eq!(once[1].region.id, RegionId::new(table_id, 2));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_repartition_procedure_flow_split_failed_and_full_rollback() {
|
||||
let env = TestingEnv::new();
|
||||
let table_id = 1024;
|
||||
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
|
||||
|
||||
env.create_physical_table_metadata_for_repartition(
|
||||
table_id,
|
||||
vec![
|
||||
test_region_route(
|
||||
RegionId::new(table_id, 1),
|
||||
&range_expr("x", 0, 100).as_json_str().unwrap(),
|
||||
),
|
||||
test_region_route(
|
||||
RegionId::new(table_id, 2),
|
||||
&range_expr("x", 100, 200).as_json_str().unwrap(),
|
||||
),
|
||||
],
|
||||
test_region_wal_options(&[1, 2]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let context = new_parent_context(&env, node_manager, table_id);
|
||||
let mut procedure = RepartitionProcedure::new(
|
||||
vec![range_expr("x", 0, 100)],
|
||||
vec![range_expr("x", 0, 50), range_expr("x", 50, 100)],
|
||||
context,
|
||||
);
|
||||
|
||||
let start_status = procedure
|
||||
.execute(&TestingEnv::procedure_context())
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(!start_status.need_persist());
|
||||
let start_status = procedure
|
||||
.execute(&TestingEnv::procedure_context())
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(start_status.need_persist());
|
||||
assert_parent_state::<AllocateRegion>(&procedure);
|
||||
|
||||
let allocate_status = procedure
|
||||
.execute(&TestingEnv::procedure_context())
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(allocate_status.need_persist());
|
||||
assert_parent_state::<Dispatch>(&procedure);
|
||||
assert_eq!(procedure.context.persistent_ctx.plans.len(), 1);
|
||||
let plan = &procedure.context.persistent_ctx.plans[0];
|
||||
let expected_plan = test_plan(table_id);
|
||||
assert_eq!(plan.source_regions, expected_plan.source_regions);
|
||||
assert_eq!(plan.target_regions, expected_plan.target_regions);
|
||||
assert_eq!(
|
||||
plan.allocated_region_ids,
|
||||
expected_plan.allocated_region_ids
|
||||
);
|
||||
assert_eq!(
|
||||
plan.pending_deallocate_region_ids,
|
||||
expected_plan.pending_deallocate_region_ids
|
||||
);
|
||||
assert_eq!(plan.transition_map, expected_plan.transition_map);
|
||||
assert_eq!(
|
||||
current_parent_region_routes(&procedure.context).await,
|
||||
vec![
|
||||
test_region_route(
|
||||
RegionId::new(table_id, 1),
|
||||
&range_expr("x", 0, 100).as_json_str().unwrap(),
|
||||
),
|
||||
test_region_route(
|
||||
RegionId::new(table_id, 2),
|
||||
&range_expr("x", 100, 200).as_json_str().unwrap(),
|
||||
),
|
||||
RegionRoute {
|
||||
region: Region {
|
||||
id: RegionId::new(table_id, 3),
|
||||
partition_expr: range_expr("x", 50, 100).as_json_str().unwrap(),
|
||||
..Default::default()
|
||||
},
|
||||
leader_peer: Some(Peer::empty(0)),
|
||||
..Default::default()
|
||||
},
|
||||
]
|
||||
);
|
||||
|
||||
let dispatch_status = procedure
|
||||
.execute(&TestingEnv::procedure_context())
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(!dispatch_status.need_persist());
|
||||
let subprocedure_ids = extract_subprocedure_ids(dispatch_status);
|
||||
assert_eq!(subprocedure_ids.len(), 1);
|
||||
assert_parent_state::<Collect>(&procedure);
|
||||
|
||||
let failed_state = ProcedureState::failed(Arc::new(ProcedureError::external(
|
||||
MockError::new(StatusCode::Internal),
|
||||
)));
|
||||
let collect_ctx = procedure_context_with_receivers(HashMap::from([(
|
||||
subprocedure_ids[0],
|
||||
procedure_state_receiver(failed_state),
|
||||
)]));
|
||||
|
||||
let err = procedure.execute(&collect_ctx).await.unwrap_err();
|
||||
assert!(!err.is_retry_later());
|
||||
assert_parent_state::<Collect>(&procedure);
|
||||
|
||||
procedure
|
||||
.rollback(&TestingEnv::procedure_context())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let region_routes = current_parent_region_routes(&procedure.context).await;
|
||||
assert_eq!(
|
||||
region_routes,
|
||||
vec![
|
||||
test_region_route(
|
||||
RegionId::new(table_id, 1),
|
||||
&range_expr("x", 0, 100).as_json_str().unwrap(),
|
||||
),
|
||||
test_region_route(
|
||||
RegionId::new(table_id, 2),
|
||||
&range_expr("x", 100, 200).as_json_str().unwrap(),
|
||||
),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_repartition_procedure_flow_split_allocate_retryable_then_resume() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let env = TestingEnv::new();
|
||||
let table_id = 1024;
|
||||
let (tx, _rx) = mpsc::channel(8);
|
||||
let should_retry = Arc::new(AtomicBool::new(true));
|
||||
let datanode_handler = DatanodeWatcher::new(tx).with_handler(move |_, _| {
|
||||
if should_retry.swap(false, Ordering::SeqCst) {
|
||||
return Err(error::Error::RetryLater {
|
||||
source: BoxedError::new(
|
||||
error::UnexpectedSnafu {
|
||||
err_msg: "retry later",
|
||||
}
|
||||
.build(),
|
||||
),
|
||||
clean_poisons: false,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(api::region::RegionResponse::new(0))
|
||||
});
|
||||
let node_manager = Arc::new(MockDatanodeManager::new(datanode_handler));
|
||||
|
||||
env.create_physical_table_metadata_for_repartition(
|
||||
table_id,
|
||||
vec![
|
||||
test_region_route(
|
||||
RegionId::new(table_id, 1),
|
||||
&range_expr("x", 0, 100).as_json_str().unwrap(),
|
||||
),
|
||||
test_region_route(
|
||||
RegionId::new(table_id, 2),
|
||||
&range_expr("x", 100, 200).as_json_str().unwrap(),
|
||||
),
|
||||
],
|
||||
test_region_wal_options(&[1, 2]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let context = new_parent_context(&env, node_manager, table_id);
|
||||
let mut procedure = RepartitionProcedure::new(
|
||||
vec![range_expr("x", 0, 100)],
|
||||
vec![range_expr("x", 0, 50), range_expr("x", 50, 100)],
|
||||
context,
|
||||
);
|
||||
|
||||
let start_status = procedure
|
||||
.execute(&TestingEnv::procedure_context())
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(!start_status.need_persist());
|
||||
let start_status = procedure
|
||||
.execute(&TestingEnv::procedure_context())
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(start_status.need_persist());
|
||||
assert_parent_state::<AllocateRegion>(&procedure);
|
||||
|
||||
let err = procedure
|
||||
.execute(&TestingEnv::procedure_context())
|
||||
.await
|
||||
.unwrap_err();
|
||||
assert!(err.is_retry_later());
|
||||
assert_parent_state::<AllocateRegion>(&procedure);
|
||||
assert!(!procedure.context.persistent_ctx.plans.is_empty());
|
||||
assert_eq!(
|
||||
current_parent_region_routes(&procedure.context).await,
|
||||
vec![
|
||||
test_region_route(
|
||||
RegionId::new(table_id, 1),
|
||||
&range_expr("x", 0, 100).as_json_str().unwrap(),
|
||||
),
|
||||
test_region_route(
|
||||
RegionId::new(table_id, 2),
|
||||
&range_expr("x", 100, 200).as_json_str().unwrap(),
|
||||
),
|
||||
]
|
||||
);
|
||||
|
||||
let allocate_status = procedure
|
||||
.execute(&TestingEnv::procedure_context())
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(allocate_status.need_persist());
|
||||
assert_parent_state::<Dispatch>(&procedure);
|
||||
|
||||
assert_eq!(procedure.context.persistent_ctx.plans.len(), 1);
|
||||
let plan = &procedure.context.persistent_ctx.plans[0];
|
||||
let expected_plan = test_plan(table_id);
|
||||
assert_eq!(plan.source_regions, expected_plan.source_regions);
|
||||
assert_eq!(plan.target_regions, expected_plan.target_regions);
|
||||
assert_eq!(
|
||||
plan.allocated_region_ids,
|
||||
expected_plan.allocated_region_ids
|
||||
);
|
||||
assert_eq!(plan.transition_map, expected_plan.transition_map);
|
||||
assert_eq!(
|
||||
current_parent_region_routes(&procedure.context).await,
|
||||
vec![
|
||||
test_region_route(
|
||||
RegionId::new(table_id, 1),
|
||||
&range_expr("x", 0, 100).as_json_str().unwrap(),
|
||||
),
|
||||
test_region_route(
|
||||
RegionId::new(table_id, 2),
|
||||
&range_expr("x", 100, 200).as_json_str().unwrap(),
|
||||
),
|
||||
RegionRoute {
|
||||
region: Region {
|
||||
id: RegionId::new(table_id, 3),
|
||||
partition_expr: range_expr("x", 50, 100).as_json_str().unwrap(),
|
||||
..Default::default()
|
||||
},
|
||||
leader_peer: Some(Peer::empty(0)),
|
||||
..Default::default()
|
||||
},
|
||||
]
|
||||
);
|
||||
|
||||
let dispatch_status = procedure
|
||||
.execute(&TestingEnv::procedure_context())
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(!dispatch_status.need_persist());
|
||||
let subprocedure_ids = extract_subprocedure_ids(dispatch_status);
|
||||
assert_eq!(subprocedure_ids.len(), 1);
|
||||
assert_parent_state::<Collect>(&procedure);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,12 +21,11 @@ use common_meta::ddl::create_table::template::{
|
||||
};
|
||||
use common_meta::lock_key::TableLock;
|
||||
use common_meta::node_manager::NodeManagerRef;
|
||||
use common_meta::region_keeper::{MemoryRegionKeeperRef, OperatingRegionGuard};
|
||||
use common_meta::rpc::router::{RegionRoute, operating_leader_regions};
|
||||
use common_meta::rpc::router::RegionRoute;
|
||||
use common_procedure::{Context as ProcedureContext, Status};
|
||||
use common_telemetry::info;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use common_telemetry::{debug, info};
|
||||
use serde::{Deserialize, Deserializer, Serialize};
|
||||
use snafu::ResultExt;
|
||||
use store_api::storage::{RegionNumber, TableId};
|
||||
use table::metadata::TableInfo;
|
||||
use table::table_reference::TableReference;
|
||||
@@ -40,14 +39,103 @@ use crate::procedure::repartition::plan::{
|
||||
};
|
||||
use crate::procedure::repartition::{Context, State};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub enum AllocateRegion {
|
||||
Build(BuildPlan),
|
||||
Execute(ExecutePlan),
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for AllocateRegion {
|
||||
fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
#[derive(Deserialize)]
|
||||
enum CurrentAllocateRegion {
|
||||
Build(BuildPlan),
|
||||
Execute(ExecutePlan),
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct LegacyAllocateRegion {
|
||||
plan_entries: Vec<AllocationPlanEntry>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[serde(untagged)]
|
||||
enum AllocateRegionRepr {
|
||||
Current(CurrentAllocateRegion),
|
||||
Legacy(LegacyAllocateRegion),
|
||||
}
|
||||
|
||||
match AllocateRegionRepr::deserialize(deserializer)? {
|
||||
AllocateRegionRepr::Current(CurrentAllocateRegion::Build(build_plan)) => {
|
||||
Ok(Self::Build(build_plan))
|
||||
}
|
||||
AllocateRegionRepr::Current(CurrentAllocateRegion::Execute(execute_plan)) => {
|
||||
Ok(Self::Execute(execute_plan))
|
||||
}
|
||||
AllocateRegionRepr::Legacy(legacy) => Ok(Self::Build(BuildPlan {
|
||||
plan_entries: legacy.plan_entries,
|
||||
})),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct AllocateRegion {
|
||||
pub struct BuildPlan {
|
||||
plan_entries: Vec<AllocationPlanEntry>,
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
#[typetag::serde]
|
||||
impl State for AllocateRegion {
|
||||
impl BuildPlan {
|
||||
async fn next(
|
||||
&mut self,
|
||||
ctx: &mut Context,
|
||||
_procedure_ctx: &ProcedureContext,
|
||||
) -> Result<(Box<dyn State>, Status)> {
|
||||
let timer = Instant::now();
|
||||
let table_id = ctx.persistent_ctx.table_id;
|
||||
let table_route_value = ctx.get_table_route_value().await?;
|
||||
let mut next_region_number =
|
||||
AllocateRegion::get_next_region_number(table_route_value.max_region_number().unwrap());
|
||||
|
||||
// Converts allocation plan to repartition plan.
|
||||
let repartition_plan_entries = AllocateRegion::convert_to_repartition_plans(
|
||||
table_id,
|
||||
&mut next_region_number,
|
||||
&self.plan_entries,
|
||||
);
|
||||
let plan_count = repartition_plan_entries.len();
|
||||
let to_allocate = AllocateRegion::count_regions_to_allocate(&repartition_plan_entries);
|
||||
info!(
|
||||
"Repartition allocate regions start, table_id: {}, groups: {}, regions_to_allocate: {}",
|
||||
table_id, plan_count, to_allocate
|
||||
);
|
||||
|
||||
// If no region to allocate, directly dispatch the plan.
|
||||
if AllocateRegion::count_regions_to_allocate(&repartition_plan_entries) == 0 {
|
||||
ctx.persistent_ctx.plans = repartition_plan_entries;
|
||||
ctx.update_allocate_region_elapsed(timer.elapsed());
|
||||
return Ok((Box::new(Dispatch), Status::executing(true)));
|
||||
}
|
||||
|
||||
ctx.persistent_ctx.plans = repartition_plan_entries;
|
||||
debug!(
|
||||
"Repartition allocate regions build plan completed, table_id: {}, elapsed: {:?}",
|
||||
table_id,
|
||||
timer.elapsed()
|
||||
);
|
||||
Ok((
|
||||
Box::new(AllocateRegion::Execute(ExecutePlan)),
|
||||
Status::executing(true),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ExecutePlan;
|
||||
|
||||
impl ExecutePlan {
|
||||
async fn next(
|
||||
&mut self,
|
||||
ctx: &mut Context,
|
||||
@@ -55,36 +143,13 @@ impl State for AllocateRegion {
|
||||
) -> Result<(Box<dyn State>, Status)> {
|
||||
let timer = Instant::now();
|
||||
let table_id = ctx.persistent_ctx.table_id;
|
||||
let allocate_regions = AllocateRegion::collect_allocate_regions(&ctx.persistent_ctx.plans);
|
||||
let region_number_and_partition_exprs =
|
||||
AllocateRegion::prepare_region_allocation_data(&allocate_regions)?;
|
||||
let table_info_value = ctx.get_table_info_value().await?;
|
||||
let table_route_value = ctx.get_table_route_value().await?;
|
||||
// Safety: it is physical table route value.
|
||||
let region_routes = table_route_value.region_routes().unwrap();
|
||||
let mut next_region_number =
|
||||
Self::get_next_region_number(table_route_value.max_region_number().unwrap());
|
||||
|
||||
// Converts allocation plan to repartition plan.
|
||||
let repartition_plan_entries = Self::convert_to_repartition_plans(
|
||||
table_id,
|
||||
&mut next_region_number,
|
||||
&self.plan_entries,
|
||||
);
|
||||
let plan_count = repartition_plan_entries.len();
|
||||
let to_allocate = Self::count_regions_to_allocate(&repartition_plan_entries);
|
||||
info!(
|
||||
"Repartition allocate regions start, table_id: {}, groups: {}, regions_to_allocate: {}",
|
||||
table_id, plan_count, to_allocate
|
||||
);
|
||||
|
||||
// If no region to allocate, directly dispatch the plan.
|
||||
if Self::count_regions_to_allocate(&repartition_plan_entries) == 0 {
|
||||
ctx.persistent_ctx.plans = repartition_plan_entries;
|
||||
ctx.update_allocate_region_elapsed(timer.elapsed());
|
||||
return Ok((Box::new(Dispatch), Status::executing(true)));
|
||||
}
|
||||
|
||||
let allocate_regions = Self::collect_allocate_regions(&repartition_plan_entries);
|
||||
let region_number_and_partition_exprs =
|
||||
Self::prepare_region_allocation_data(&allocate_regions)?;
|
||||
let table_info_value = ctx.get_table_info_value().await?;
|
||||
let new_allocated_region_routes = ctx
|
||||
.region_routes_allocator
|
||||
.allocate(
|
||||
@@ -122,12 +187,13 @@ impl State for AllocateRegion {
|
||||
table_id, new_region_count, new_regions_brief
|
||||
);
|
||||
|
||||
let _operating_guards = Self::register_operating_regions(
|
||||
// The table route metadata is not updated yet; register it in memory for region lease renewal.
|
||||
let _operating_guards = Context::register_operating_regions(
|
||||
&ctx.memory_region_keeper,
|
||||
&new_allocated_region_routes,
|
||||
)?;
|
||||
// Allocates the regions on datanodes.
|
||||
Self::allocate_regions(
|
||||
AllocateRegion::allocate_regions(
|
||||
&ctx.node_manager,
|
||||
&table_info_value.table_info,
|
||||
&new_allocated_region_routes,
|
||||
@@ -135,21 +201,33 @@ impl State for AllocateRegion {
|
||||
)
|
||||
.await?;
|
||||
|
||||
// TODO(weny): for metric engine, sync logical regions from the the central region.
|
||||
|
||||
// Updates the table routes.
|
||||
let table_lock = TableLock::Write(table_id).into();
|
||||
let _guard = procedure_ctx.provider.acquire_lock(&table_lock).await;
|
||||
let new_region_routes =
|
||||
Self::generate_region_routes(region_routes, &new_allocated_region_routes);
|
||||
AllocateRegion::generate_region_routes(region_routes, &new_allocated_region_routes);
|
||||
ctx.update_table_route(&table_route_value, new_region_routes, wal_options)
|
||||
.await?;
|
||||
ctx.invalidate_table_cache().await?;
|
||||
|
||||
ctx.persistent_ctx.plans = repartition_plan_entries;
|
||||
ctx.update_allocate_region_elapsed(timer.elapsed());
|
||||
Ok((Box::new(Dispatch), Status::executing(true)))
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
#[typetag::serde]
|
||||
impl State for AllocateRegion {
|
||||
async fn next(
|
||||
&mut self,
|
||||
ctx: &mut Context,
|
||||
procedure_ctx: &ProcedureContext,
|
||||
) -> Result<(Box<dyn State>, Status)> {
|
||||
match self {
|
||||
AllocateRegion::Build(build_plan) => build_plan.next(ctx, procedure_ctx).await,
|
||||
AllocateRegion::Execute(execute_plan) => execute_plan.next(ctx, procedure_ctx).await,
|
||||
}
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
@@ -158,24 +236,7 @@ impl State for AllocateRegion {
|
||||
|
||||
impl AllocateRegion {
|
||||
pub fn new(plan_entries: Vec<AllocationPlanEntry>) -> Self {
|
||||
Self { plan_entries }
|
||||
}
|
||||
|
||||
fn register_operating_regions(
|
||||
memory_region_keeper: &MemoryRegionKeeperRef,
|
||||
region_routes: &[RegionRoute],
|
||||
) -> Result<Vec<OperatingRegionGuard>> {
|
||||
let mut operating_guards = Vec::with_capacity(region_routes.len());
|
||||
for (region_id, datanode_id) in operating_leader_regions(region_routes) {
|
||||
let guard = memory_region_keeper
|
||||
.register(datanode_id, region_id)
|
||||
.context(error::RegionOperatingRaceSnafu {
|
||||
peer_id: datanode_id,
|
||||
region_id,
|
||||
})?;
|
||||
operating_guards.push(guard);
|
||||
}
|
||||
Ok(operating_guards)
|
||||
AllocateRegion::Build(BuildPlan { plan_entries })
|
||||
}
|
||||
|
||||
fn generate_region_routes(
|
||||
@@ -300,6 +361,7 @@ mod tests {
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::*;
|
||||
use crate::procedure::repartition::State;
|
||||
use crate::procedure::repartition::test_util::range_expr;
|
||||
|
||||
fn create_region_descriptor(
|
||||
@@ -488,4 +550,71 @@ mod tests {
|
||||
assert!(!result[0].1.is_empty());
|
||||
assert!(!result[1].1.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_allocate_region_state_backward_compatibility() {
|
||||
// Arrange
|
||||
let serialized = r#"{"repartition_state":"AllocateRegion","plan_entries":[]}"#;
|
||||
|
||||
// Act
|
||||
let state: Box<dyn State> = serde_json::from_str(serialized).unwrap();
|
||||
|
||||
// Assert
|
||||
let allocate_region = state
|
||||
.as_any()
|
||||
.downcast_ref::<AllocateRegion>()
|
||||
.expect("expected AllocateRegion state");
|
||||
match allocate_region {
|
||||
AllocateRegion::Build(build_plan) => assert!(build_plan.plan_entries.is_empty()),
|
||||
AllocateRegion::Execute(_) => panic!("expected build plan"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_allocate_region_state_round_trip() {
|
||||
// Arrange
|
||||
let state: Box<dyn State> = Box::new(AllocateRegion::new(vec![]));
|
||||
|
||||
// Act
|
||||
let serialized = serde_json::to_string(&state).unwrap();
|
||||
let deserialized: Box<dyn State> = serde_json::from_str(&serialized).unwrap();
|
||||
|
||||
// Assert
|
||||
assert_eq!(
|
||||
serialized,
|
||||
r#"{"repartition_state":"AllocateRegion","Build":{"plan_entries":[]}}"#
|
||||
);
|
||||
let allocate_region = deserialized
|
||||
.as_any()
|
||||
.downcast_ref::<AllocateRegion>()
|
||||
.expect("expected AllocateRegion state");
|
||||
match allocate_region {
|
||||
AllocateRegion::Build(build_plan) => assert!(build_plan.plan_entries.is_empty()),
|
||||
AllocateRegion::Execute(_) => panic!("expected build plan"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_allocate_region_execute_state_round_trip() {
|
||||
// Arrange
|
||||
let state: Box<dyn State> = Box::new(AllocateRegion::Execute(ExecutePlan));
|
||||
|
||||
// Act
|
||||
let serialized = serde_json::to_string(&state).unwrap();
|
||||
let deserialized: Box<dyn State> = serde_json::from_str(&serialized).unwrap();
|
||||
|
||||
// Assert
|
||||
assert_eq!(
|
||||
serialized,
|
||||
r#"{"repartition_state":"AllocateRegion","Execute":null}"#
|
||||
);
|
||||
let allocate_region = deserialized
|
||||
.as_any()
|
||||
.downcast_ref::<AllocateRegion>()
|
||||
.expect("expected AllocateRegion state");
|
||||
match allocate_region {
|
||||
AllocateRegion::Execute(_) => {}
|
||||
AllocateRegion::Build(_) => panic!("expected execute plan"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -94,17 +94,28 @@ impl State for Collect {
|
||||
}
|
||||
}
|
||||
|
||||
let inflight = self.inflight_procedures.len();
|
||||
let succeeded = self.succeeded_procedures.len();
|
||||
let failed = self.failed_procedures.len();
|
||||
let unknown = self.unknown_procedures.len();
|
||||
info!(
|
||||
"Collected repartition group results for table_id: {}, inflight: {}, succeeded: {}, failed: {}, unknown: {}",
|
||||
table_id, inflight, succeeded, failed, unknown
|
||||
"Collected repartition group results for table_id: {}, succeeded: {}, failed: {}, unknown: {}",
|
||||
table_id, succeeded, failed, unknown
|
||||
);
|
||||
|
||||
if failed > 0 || unknown > 0 {
|
||||
// TODO(weny): retry the failed or unknown procedures.
|
||||
ctx.persistent_ctx
|
||||
.failed_procedures
|
||||
.extend(self.failed_procedures.iter());
|
||||
ctx.persistent_ctx
|
||||
.unknown_procedures
|
||||
.extend(self.unknown_procedures.iter());
|
||||
return crate::error::UnexpectedSnafu {
|
||||
violated: format!(
|
||||
"Repartition groups failed or became unknown, table_id: {}, failed: {}, unknown: {}",
|
||||
table_id, failed, unknown
|
||||
),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
|
||||
if let Some(start_time) = ctx.volatile_ctx.dispatch_start_time.take() {
|
||||
@@ -118,3 +129,139 @@ impl State for Collect {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_error::mock::MockError;
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_meta::test_util::MockDatanodeManager;
|
||||
use common_procedure::{
|
||||
Context as ProcedureContext, ContextProvider, Error as ProcedureError, ProcedureId,
|
||||
ProcedureState,
|
||||
};
|
||||
use common_procedure_test::MockContextProvider;
|
||||
use tokio::sync::watch;
|
||||
|
||||
use super::*;
|
||||
use crate::procedure::repartition::PersistentContext;
|
||||
use crate::procedure::repartition::test_util::TestingEnv;
|
||||
|
||||
struct FailedProcedureContextProvider {
|
||||
receiver: watch::Receiver<ProcedureState>,
|
||||
inner: MockContextProvider,
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl ContextProvider for FailedProcedureContextProvider {
|
||||
async fn procedure_state(
|
||||
&self,
|
||||
procedure_id: ProcedureId,
|
||||
) -> common_procedure::Result<Option<ProcedureState>> {
|
||||
self.inner.procedure_state(procedure_id).await
|
||||
}
|
||||
|
||||
async fn procedure_state_receiver(
|
||||
&self,
|
||||
_procedure_id: ProcedureId,
|
||||
) -> common_procedure::Result<Option<watch::Receiver<ProcedureState>>> {
|
||||
Ok(Some(self.receiver.clone()))
|
||||
}
|
||||
|
||||
async fn try_put_poison(
|
||||
&self,
|
||||
key: &common_procedure::PoisonKey,
|
||||
procedure_id: ProcedureId,
|
||||
) -> common_procedure::Result<()> {
|
||||
self.inner.try_put_poison(key, procedure_id).await
|
||||
}
|
||||
|
||||
async fn acquire_lock(
|
||||
&self,
|
||||
key: &common_procedure::StringKey,
|
||||
) -> common_procedure::local::DynamicKeyLockGuard {
|
||||
self.inner.acquire_lock(key).await
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_collect_returns_error_when_unknown_exists() {
|
||||
let env = TestingEnv::new();
|
||||
let ddl_ctx = env.ddl_context(Arc::new(MockDatanodeManager::new(())));
|
||||
let persistent_ctx = PersistentContext::new(
|
||||
table::table_name::TableName::new("test_catalog", "test_schema", "test_table"),
|
||||
1024,
|
||||
None,
|
||||
);
|
||||
let mut ctx = crate::procedure::repartition::Context::new(
|
||||
&ddl_ctx,
|
||||
env.mailbox_ctx.mailbox().clone(),
|
||||
env.server_addr.clone(),
|
||||
persistent_ctx,
|
||||
);
|
||||
let mut state = Collect {
|
||||
inflight_procedures: vec![],
|
||||
succeeded_procedures: vec![],
|
||||
failed_procedures: vec![],
|
||||
unknown_procedures: vec![ProcedureMeta {
|
||||
plan_index: 0,
|
||||
group_id: uuid::Uuid::new_v4(),
|
||||
procedure_id: common_procedure::ProcedureId::random(),
|
||||
}],
|
||||
};
|
||||
|
||||
let err = state
|
||||
.next(&mut ctx, &TestingEnv::procedure_context())
|
||||
.await
|
||||
.unwrap_err();
|
||||
|
||||
assert!(!err.is_retryable());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_collect_returns_error_when_failed_exists() {
|
||||
let env = TestingEnv::new();
|
||||
let ddl_ctx = env.ddl_context(Arc::new(MockDatanodeManager::new(())));
|
||||
let persistent_ctx = PersistentContext::new(
|
||||
table::table_name::TableName::new("test_catalog", "test_schema", "test_table"),
|
||||
1024,
|
||||
None,
|
||||
);
|
||||
let mut ctx = crate::procedure::repartition::Context::new(
|
||||
&ddl_ctx,
|
||||
env.mailbox_ctx.mailbox().clone(),
|
||||
env.server_addr.clone(),
|
||||
persistent_ctx,
|
||||
);
|
||||
let procedure_id = common_procedure::ProcedureId::random();
|
||||
let (tx, rx) = watch::channel(ProcedureState::Running);
|
||||
tx.send(ProcedureState::failed(Arc::new(ProcedureError::external(
|
||||
MockError::new(StatusCode::Internal),
|
||||
))))
|
||||
.unwrap();
|
||||
let procedure_ctx = ProcedureContext {
|
||||
procedure_id: ProcedureId::random(),
|
||||
provider: Arc::new(FailedProcedureContextProvider {
|
||||
receiver: rx,
|
||||
inner: MockContextProvider::default(),
|
||||
}),
|
||||
};
|
||||
let mut state = Collect {
|
||||
inflight_procedures: vec![ProcedureMeta {
|
||||
plan_index: 0,
|
||||
group_id: uuid::Uuid::new_v4(),
|
||||
procedure_id,
|
||||
}],
|
||||
succeeded_procedures: vec![],
|
||||
failed_procedures: vec![],
|
||||
unknown_procedures: vec![],
|
||||
};
|
||||
|
||||
let err = state.next(&mut ctx, &procedure_ctx).await.unwrap_err();
|
||||
|
||||
assert_eq!(state.failed_procedures.len(), 1);
|
||||
assert_eq!(state.unknown_procedures.len(), 0);
|
||||
assert!(!err.is_retryable());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -88,7 +88,8 @@ impl State for DeallocateRegion {
|
||||
&ctx.persistent_ctx.schema_name,
|
||||
&ctx.persistent_ctx.table_name,
|
||||
);
|
||||
// Deallocates the regions on datanodes.
|
||||
// Memory guards are not required here,
|
||||
// because the table metadata still contains routes for the deallocating regions.
|
||||
Self::deallocate_regions(
|
||||
&ctx.node_manager,
|
||||
&ctx.leader_region_registry,
|
||||
@@ -116,7 +117,7 @@ impl State for DeallocateRegion {
|
||||
}
|
||||
|
||||
impl DeallocateRegion {
|
||||
async fn deallocate_regions(
|
||||
pub(crate) async fn deallocate_regions(
|
||||
node_manager: &NodeManagerRef,
|
||||
leader_region_registry: &LeaderRegionRegistryRef,
|
||||
table: TableName,
|
||||
@@ -141,7 +142,7 @@ impl DeallocateRegion {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn filter_deallocatable_region_routes(
|
||||
pub(crate) fn filter_deallocatable_region_routes(
|
||||
table_id: TableId,
|
||||
region_routes: &[RegionRoute],
|
||||
pending_deallocate_region_ids: &HashSet<RegionId>,
|
||||
@@ -165,7 +166,7 @@ impl DeallocateRegion {
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
fn generate_region_routes(
|
||||
pub(crate) fn generate_region_routes(
|
||||
region_routes: &[RegionRoute],
|
||||
pending_deallocate_region_ids: &HashSet<RegionId>,
|
||||
) -> Vec<RegionRoute> {
|
||||
@@ -181,12 +182,21 @@ impl DeallocateRegion {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::HashSet;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_meta::ddl::test_util::datanode_handler::RetryErrorDatanodeHandler;
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::rpc::router::{Region, RegionRoute};
|
||||
use common_meta::test_util::MockDatanodeManager;
|
||||
use store_api::storage::{RegionId, TableId};
|
||||
|
||||
use crate::error::Error;
|
||||
use crate::procedure::repartition::State;
|
||||
use crate::procedure::repartition::deallocate_region::DeallocateRegion;
|
||||
use crate::procedure::repartition::plan::RepartitionPlanEntry;
|
||||
use crate::procedure::repartition::test_util::{
|
||||
TestingEnv, current_parent_region_routes, new_parent_context,
|
||||
};
|
||||
|
||||
fn test_region_routes(table_id: TableId) -> Vec<RegionRoute> {
|
||||
vec![
|
||||
@@ -238,4 +248,36 @@ mod tests {
|
||||
assert_eq!(new_region_routes.len(), 1);
|
||||
assert_eq!(new_region_routes[0].region.id, RegionId::new(table_id, 2));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_next_retryable_when_deallocate_regions_retry_later() {
|
||||
let env = TestingEnv::new();
|
||||
let table_id = 1024;
|
||||
let original_routes = test_region_routes(table_id);
|
||||
|
||||
env.create_physical_table_metadata(table_id, original_routes.clone())
|
||||
.await;
|
||||
|
||||
let node_manager = Arc::new(MockDatanodeManager::new(RetryErrorDatanodeHandler));
|
||||
let mut ctx = new_parent_context(&env, node_manager, table_id);
|
||||
ctx.persistent_ctx.plans = vec![RepartitionPlanEntry {
|
||||
group_id: uuid::Uuid::new_v4(),
|
||||
source_regions: vec![],
|
||||
target_regions: vec![],
|
||||
allocated_region_ids: vec![],
|
||||
pending_deallocate_region_ids: vec![RegionId::new(table_id, 1)],
|
||||
transition_map: vec![],
|
||||
}];
|
||||
|
||||
let mut state = DeallocateRegion;
|
||||
|
||||
let err = state
|
||||
.next(&mut ctx, &TestingEnv::procedure_context())
|
||||
.await
|
||||
.unwrap_err();
|
||||
|
||||
assert!(matches!(err, Error::DeallocateRegions { .. }));
|
||||
assert!(err.is_retryable());
|
||||
assert_eq!(current_parent_region_routes(&ctx).await, original_routes);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -31,7 +31,7 @@ use crate::procedure::repartition::{self, Context, State};
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Dispatch;
|
||||
|
||||
fn build_region_mapping(
|
||||
pub(crate) fn build_region_mapping(
|
||||
source_regions: &[RegionDescriptor],
|
||||
target_regions: &[RegionDescriptor],
|
||||
transition_map: &[Vec<usize>],
|
||||
@@ -106,7 +106,11 @@ impl State for Dispatch {
|
||||
|
||||
Ok((
|
||||
Box::new(Collect::new(procedure_metas)),
|
||||
Status::suspended(procedures, true),
|
||||
// The state is not persisted after sub-procedures are spawned.
|
||||
// If metasrv restarts before all sub-procedures complete,
|
||||
// it restores from the `Dispatch` state and re-dispatches them.
|
||||
// This is safe because the sub-procedures are idempotent.
|
||||
Status::suspended(procedures, false),
|
||||
))
|
||||
}
|
||||
|
||||
|
||||
@@ -41,14 +41,18 @@ use common_procedure::{
|
||||
Context as ProcedureContext, Error as ProcedureError, LockKey, Procedure,
|
||||
Result as ProcedureResult, Status, StringKey, UserMetadata,
|
||||
};
|
||||
use common_telemetry::{error, info};
|
||||
use common_telemetry::{error, info, warn};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::{RegionId, TableId};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::procedure::repartition::group::apply_staging_manifest::ApplyStagingManifest;
|
||||
use crate::procedure::repartition::group::enter_staging_region::EnterStagingRegion;
|
||||
use crate::procedure::repartition::group::remap_manifest::RemapManifest;
|
||||
use crate::procedure::repartition::group::repartition_start::RepartitionStart;
|
||||
use crate::procedure::repartition::group::update_metadata::UpdateMetadata;
|
||||
use crate::procedure::repartition::plan::RegionDescriptor;
|
||||
use crate::procedure::repartition::utils::get_datanode_table_value;
|
||||
use crate::procedure::repartition::{self};
|
||||
@@ -192,6 +196,62 @@ impl RepartitionGroupProcedure {
|
||||
|
||||
Ok(Self { state, context })
|
||||
}
|
||||
|
||||
async fn rollback_inner(&mut self, procedure_ctx: &ProcedureContext) -> Result<()> {
|
||||
if !self.should_rollback_metadata() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let table_lock =
|
||||
common_meta::lock_key::TableLock::Write(self.context.persistent_ctx.table_id).into();
|
||||
let _guard = procedure_ctx.provider.acquire_lock(&table_lock).await;
|
||||
UpdateMetadata::RollbackStaging
|
||||
.rollback_staging_regions(&mut self.context)
|
||||
.await?;
|
||||
|
||||
if let Err(err) = self.context.invalidate_table_cache().await {
|
||||
warn!(
|
||||
err;
|
||||
"Failed to broadcast the invalidate table cache message during repartition group rollback"
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns whether group rollback should revert staging metadata.
|
||||
///
|
||||
/// This uses an "after metadata apply, before exit staging" semantic.
|
||||
/// Once execution reaches `UpdateMetadata::ApplyStaging` or any later staging state,
|
||||
/// rollback must restore table-route metadata back to the pre-apply view.
|
||||
///
|
||||
/// State flow:
|
||||
/// `RepartitionStart -> SyncRegion -> UpdateMetadata::ApplyStaging -> EnterStagingRegion`
|
||||
/// ` -> RemapManifest -> ApplyStagingManifest -> UpdateMetadata::ExitStaging -> RepartitionEnd`
|
||||
/// ` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^`
|
||||
/// ` rollback staging metadata`
|
||||
///
|
||||
/// Notes:
|
||||
/// - `RepartitionStart` / `SyncRegion`: no-op, metadata has not been staged yet.
|
||||
/// - `UpdateMetadata::ApplyStaging` / `EnterStagingRegion` / `RemapManifest` /
|
||||
/// `ApplyStagingManifest` / `UpdateMetadata::RollbackStaging`: rollback-active.
|
||||
/// - `UpdateMetadata::ExitStaging` / `RepartitionEnd`: excluded, because metadata has
|
||||
/// already moved into the post-commit exit path.
|
||||
fn should_rollback_metadata(&self) -> bool {
|
||||
self.state.as_any().is::<EnterStagingRegion>()
|
||||
|| self.state.as_any().is::<RemapManifest>()
|
||||
|| self.state.as_any().is::<ApplyStagingManifest>()
|
||||
|| self
|
||||
.state
|
||||
.as_any()
|
||||
.downcast_ref::<UpdateMetadata>()
|
||||
.is_some_and(|state| {
|
||||
matches!(
|
||||
state,
|
||||
UpdateMetadata::ApplyStaging | UpdateMetadata::RollbackStaging
|
||||
)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
@@ -200,6 +260,12 @@ impl Procedure for RepartitionGroupProcedure {
|
||||
Self::TYPE_NAME
|
||||
}
|
||||
|
||||
async fn rollback(&mut self, ctx: &ProcedureContext) -> ProcedureResult<()> {
|
||||
self.rollback_inner(ctx)
|
||||
.await
|
||||
.map_err(ProcedureError::external)
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all, fields(
|
||||
state = %self.state.name(),
|
||||
table_id = self.context.persistent_ctx.table_id,
|
||||
@@ -238,7 +304,7 @@ impl Procedure for RepartitionGroupProcedure {
|
||||
}
|
||||
|
||||
fn rollback_supported(&self) -> bool {
|
||||
false
|
||||
true
|
||||
}
|
||||
|
||||
fn dump(&self) -> ProcedureResult<String> {
|
||||
@@ -304,7 +370,7 @@ impl Context {
|
||||
pub struct GroupPrepareResult {
|
||||
/// The validated source region routes.
|
||||
pub source_routes: Vec<RegionRoute>,
|
||||
/// The validated target region routes.
|
||||
/// Validated target region routes used for metadata rollback (logical rollback).
|
||||
pub target_routes: Vec<RegionRoute>,
|
||||
/// The primary source region id (first source region), used for retrieving region options.
|
||||
pub central_region: RegionId,
|
||||
@@ -599,12 +665,149 @@ pub(crate) trait State: Sync + Send + Debug {
|
||||
mod tests {
|
||||
use std::assert_matches;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use common_meta::key::TableMetadataManager;
|
||||
use common_meta::kv_backend::test_util::MockKvBackendBuilder;
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::rpc::router::{Region, RegionRoute};
|
||||
use common_procedure::{Context as ProcedureContext, Procedure, ProcedureId};
|
||||
use common_procedure_test::MockContextProvider;
|
||||
use partition::expr::PartitionExpr;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use super::{
|
||||
Context, PersistentContext, RepartitionGroupProcedure, RepartitionStart, State,
|
||||
region_routes,
|
||||
};
|
||||
use crate::error::Error;
|
||||
use crate::procedure::repartition::test_util::{TestingEnv, new_persistent_context};
|
||||
use crate::procedure::repartition::dispatch::build_region_mapping;
|
||||
use crate::procedure::repartition::group::apply_staging_manifest::ApplyStagingManifest;
|
||||
use crate::procedure::repartition::group::enter_staging_region::EnterStagingRegion;
|
||||
use crate::procedure::repartition::group::remap_manifest::RemapManifest;
|
||||
use crate::procedure::repartition::group::repartition_start::RepartitionStart as GroupRepartitionStart;
|
||||
use crate::procedure::repartition::group::sync_region::SyncRegion;
|
||||
use crate::procedure::repartition::group::update_metadata::UpdateMetadata;
|
||||
use crate::procedure::repartition::plan;
|
||||
use crate::procedure::repartition::repartition_start::RepartitionStart as ParentRepartitionStart;
|
||||
use crate::procedure::repartition::test_util::{
|
||||
TestingEnv, new_persistent_context, range_expr,
|
||||
};
|
||||
|
||||
struct GroupRollbackFixture {
|
||||
context: Context,
|
||||
original_region_routes: Vec<RegionRoute>,
|
||||
next_state: Option<Box<dyn State>>,
|
||||
}
|
||||
|
||||
async fn new_group_rollback_fixture(
|
||||
original_region_routes: Vec<RegionRoute>,
|
||||
from_exprs: Vec<PartitionExpr>,
|
||||
to_exprs: Vec<PartitionExpr>,
|
||||
sync_region: bool,
|
||||
) -> GroupRollbackFixture {
|
||||
let env = TestingEnv::new();
|
||||
let procedure_ctx = TestingEnv::procedure_context();
|
||||
let table_id = 1024;
|
||||
let mut next_region_number = 10;
|
||||
|
||||
env.create_physical_table_metadata(table_id, original_region_routes.clone())
|
||||
.await;
|
||||
|
||||
let (_, physical_route) = env
|
||||
.table_metadata_manager
|
||||
.table_route_manager()
|
||||
.get_physical_table_route(table_id)
|
||||
.await
|
||||
.unwrap();
|
||||
let allocation_plans =
|
||||
ParentRepartitionStart::build_plan(&physical_route, &from_exprs, &to_exprs).unwrap();
|
||||
assert_eq!(allocation_plans.len(), 1);
|
||||
|
||||
let repartition_plan = plan::convert_allocation_plan_to_repartition_plan(
|
||||
table_id,
|
||||
&mut next_region_number,
|
||||
&allocation_plans[0],
|
||||
);
|
||||
let region_mapping = build_region_mapping(
|
||||
&repartition_plan.source_regions,
|
||||
&repartition_plan.target_regions,
|
||||
&repartition_plan.transition_map,
|
||||
);
|
||||
let persistent_context = PersistentContext::new(
|
||||
repartition_plan.group_id,
|
||||
table_id,
|
||||
"test_catalog".to_string(),
|
||||
"test_schema".to_string(),
|
||||
repartition_plan.source_regions,
|
||||
repartition_plan.target_regions,
|
||||
region_mapping,
|
||||
sync_region,
|
||||
repartition_plan.allocated_region_ids,
|
||||
repartition_plan.pending_deallocate_region_ids,
|
||||
Duration::from_secs(120),
|
||||
);
|
||||
let mut context = env.create_context(persistent_context);
|
||||
let (next_state, _) = GroupRepartitionStart
|
||||
.next(&mut context, &procedure_ctx)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
GroupRollbackFixture {
|
||||
context,
|
||||
original_region_routes,
|
||||
next_state: Some(next_state),
|
||||
}
|
||||
}
|
||||
|
||||
async fn new_split_group_rollback_fixture(sync_region: bool) -> GroupRollbackFixture {
|
||||
new_group_rollback_fixture(
|
||||
vec![
|
||||
new_region_route(RegionId::new(1024, 1), Some(range_expr("x", 0, 100))),
|
||||
new_region_route(RegionId::new(1024, 2), Some(range_expr("x", 100, 200))),
|
||||
new_region_route(RegionId::new(1024, 10), None),
|
||||
],
|
||||
vec![range_expr("x", 0, 100)],
|
||||
vec![range_expr("x", 0, 50), range_expr("x", 50, 100)],
|
||||
sync_region,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn new_merge_group_rollback_fixture(sync_region: bool) -> GroupRollbackFixture {
|
||||
new_group_rollback_fixture(
|
||||
vec![
|
||||
new_region_route(RegionId::new(1024, 1), Some(range_expr("x", 0, 100))),
|
||||
new_region_route(RegionId::new(1024, 2), Some(range_expr("x", 100, 200))),
|
||||
new_region_route(RegionId::new(1024, 3), Some(range_expr("x", 200, 300))),
|
||||
],
|
||||
vec![range_expr("x", 0, 100), range_expr("x", 100, 200)],
|
||||
vec![range_expr("x", 0, 200)],
|
||||
sync_region,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn stage_metadata(context: &mut Context) {
|
||||
UpdateMetadata::ApplyStaging
|
||||
.apply_staging_regions(context)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
fn new_region_route(region_id: RegionId, partition_expr: Option<PartitionExpr>) -> RegionRoute {
|
||||
RegionRoute {
|
||||
region: Region {
|
||||
id: region_id,
|
||||
partition_expr: partition_expr
|
||||
.map(|expr| expr.as_json_str().unwrap())
|
||||
.unwrap_or_default(),
|
||||
..Default::default()
|
||||
},
|
||||
leader_peer: Some(Peer::empty(1)),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_get_table_route_value_not_found_error() {
|
||||
@@ -653,4 +856,198 @@ mod tests {
|
||||
let err = ctx.get_datanode_table_value(1024, 1).await.unwrap_err();
|
||||
assert!(err.is_retryable());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_group_rollback_supported() {
|
||||
let env = TestingEnv::new();
|
||||
let persistent_context = new_persistent_context(1024, vec![], vec![]);
|
||||
let procedure = RepartitionGroupProcedure {
|
||||
state: Box::new(RepartitionStart),
|
||||
context: env.create_context(persistent_context),
|
||||
};
|
||||
|
||||
assert!(procedure.rollback_supported());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_group_rollback_is_noop_before_apply_staging() {
|
||||
let env = TestingEnv::new();
|
||||
let persistent_context = new_persistent_context(1024, vec![], vec![]);
|
||||
let ctx = env.create_context(persistent_context.clone());
|
||||
let mut procedure = RepartitionGroupProcedure {
|
||||
state: Box::new(RepartitionStart),
|
||||
context: ctx,
|
||||
};
|
||||
let provider = Arc::new(MockContextProvider::new(Default::default()));
|
||||
let procedure_ctx = ProcedureContext {
|
||||
procedure_id: ProcedureId::random(),
|
||||
provider,
|
||||
};
|
||||
|
||||
procedure.rollback(&procedure_ctx).await.unwrap();
|
||||
|
||||
assert!(procedure.state.as_any().is::<RepartitionStart>());
|
||||
assert_eq!(procedure.context.persistent_ctx, persistent_context);
|
||||
}
|
||||
|
||||
async fn assert_noop_rollback(
|
||||
fixture: GroupRollbackFixture,
|
||||
state: Box<dyn State>,
|
||||
assert_state: impl FnOnce(&dyn State),
|
||||
) {
|
||||
let original_region_routes = fixture.original_region_routes.clone();
|
||||
let procedure_ctx = TestingEnv::procedure_context();
|
||||
let mut procedure = RepartitionGroupProcedure {
|
||||
state,
|
||||
context: fixture.context,
|
||||
};
|
||||
|
||||
procedure.rollback(&procedure_ctx).await.unwrap();
|
||||
|
||||
assert_state(&*procedure.state);
|
||||
let table_route_value = procedure
|
||||
.context
|
||||
.get_table_route_value()
|
||||
.await
|
||||
.unwrap()
|
||||
.into_inner();
|
||||
let region_routes = region_routes(
|
||||
procedure.context.persistent_ctx.table_id,
|
||||
&table_route_value,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(region_routes.clone(), original_region_routes);
|
||||
}
|
||||
|
||||
async fn assert_metadata_rollback_restores_table_route(
|
||||
mut fixture: GroupRollbackFixture,
|
||||
state: Box<dyn State>,
|
||||
) {
|
||||
let original_region_routes = fixture.original_region_routes.clone();
|
||||
let procedure_ctx = TestingEnv::procedure_context();
|
||||
stage_metadata(&mut fixture.context).await;
|
||||
let mut procedure = RepartitionGroupProcedure {
|
||||
state,
|
||||
context: fixture.context,
|
||||
};
|
||||
|
||||
procedure.rollback(&procedure_ctx).await.unwrap();
|
||||
|
||||
let table_route_value = procedure
|
||||
.context
|
||||
.get_table_route_value()
|
||||
.await
|
||||
.unwrap()
|
||||
.into_inner();
|
||||
let region_routes = region_routes(
|
||||
procedure.context.persistent_ctx.table_id,
|
||||
&table_route_value,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(region_routes.clone(), original_region_routes);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_group_rollback_is_noop_in_sync_region() {
|
||||
let mut fixture = new_split_group_rollback_fixture(true).await;
|
||||
assert!(
|
||||
fixture
|
||||
.next_state
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.as_any()
|
||||
.is::<SyncRegion>()
|
||||
);
|
||||
let state = fixture.next_state.take().unwrap();
|
||||
|
||||
assert_noop_rollback(fixture, state, |state| {
|
||||
assert!(state.as_any().is::<SyncRegion>());
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_group_rollback_is_noop_in_exit_staging() {
|
||||
let fixture = new_split_group_rollback_fixture(false).await;
|
||||
|
||||
assert_noop_rollback(fixture, Box::new(UpdateMetadata::ExitStaging), |state| {
|
||||
assert!(state.as_any().is::<UpdateMetadata>());
|
||||
assert!(matches!(
|
||||
state.as_any().downcast_ref::<UpdateMetadata>(),
|
||||
Some(UpdateMetadata::ExitStaging)
|
||||
));
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_group_rollback_restores_split_routes_from_apply_staging() {
|
||||
let fixture = new_split_group_rollback_fixture(false).await;
|
||||
assert_metadata_rollback_restores_table_route(
|
||||
fixture,
|
||||
Box::new(UpdateMetadata::ApplyStaging),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_group_rollback_restores_split_routes_from_enter_staging_region() {
|
||||
let fixture = new_split_group_rollback_fixture(false).await;
|
||||
assert_metadata_rollback_restores_table_route(fixture, Box::new(EnterStagingRegion)).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_group_rollback_restores_split_routes_from_remap_manifest() {
|
||||
let fixture = new_split_group_rollback_fixture(false).await;
|
||||
assert_metadata_rollback_restores_table_route(fixture, Box::new(RemapManifest)).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_group_rollback_restores_split_routes_from_apply_staging_manifest() {
|
||||
let fixture = new_split_group_rollback_fixture(false).await;
|
||||
assert_metadata_rollback_restores_table_route(fixture, Box::new(ApplyStagingManifest))
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_group_rollback_restores_merge_routes_and_is_idempotent() {
|
||||
let mut fixture = new_merge_group_rollback_fixture(false).await;
|
||||
let original_region_routes = fixture.original_region_routes.clone();
|
||||
let procedure_ctx = TestingEnv::procedure_context();
|
||||
stage_metadata(&mut fixture.context).await;
|
||||
let mut procedure = RepartitionGroupProcedure {
|
||||
state: Box::new(UpdateMetadata::ApplyStaging),
|
||||
context: fixture.context,
|
||||
};
|
||||
|
||||
procedure.rollback(&procedure_ctx).await.unwrap();
|
||||
let table_route_value = procedure
|
||||
.context
|
||||
.get_table_route_value()
|
||||
.await
|
||||
.unwrap()
|
||||
.into_inner();
|
||||
let once = region_routes(
|
||||
procedure.context.persistent_ctx.table_id,
|
||||
&table_route_value,
|
||||
)
|
||||
.unwrap()
|
||||
.clone();
|
||||
procedure.rollback(&procedure_ctx).await.unwrap();
|
||||
let table_route_value = procedure
|
||||
.context
|
||||
.get_table_route_value()
|
||||
.await
|
||||
.unwrap()
|
||||
.into_inner();
|
||||
let twice = region_routes(
|
||||
procedure.context.persistent_ctx.table_id,
|
||||
&table_route_value,
|
||||
)
|
||||
.unwrap()
|
||||
.clone();
|
||||
|
||||
assert_eq!(once, original_region_routes);
|
||||
assert_eq!(once, twice);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -332,7 +332,14 @@ impl ApplyStagingManifest {
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
},
|
||||
Err(error::Error::MailboxChannelClosed {..})=> error::RetryLaterSnafu {
|
||||
reason: format!(
|
||||
"Mailbox closed when sending apply staging manifests to datanode {:?}, elapsed: {:?}",
|
||||
peer,
|
||||
now.elapsed()
|
||||
),
|
||||
}.fail()?,
|
||||
Err(error::Error::MailboxTimeout { .. }) => {
|
||||
let reason = format!(
|
||||
"Mailbox received timeout for apply staging manifests on datanode {:?}, elapsed: {:?}",
|
||||
|
||||
@@ -315,7 +315,14 @@ impl EnterStagingRegion {
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
},
|
||||
Err(error::Error::MailboxChannelClosed {..})=> error::RetryLaterSnafu {
|
||||
reason: format!(
|
||||
"Mailbox closed when sending enter staging regions to datanode {:?}, elapsed: {:?}",
|
||||
peer,
|
||||
now.elapsed()
|
||||
),
|
||||
}.fail()?,
|
||||
Err(error::Error::MailboxTimeout { .. }) => {
|
||||
let reason = format!(
|
||||
"Mailbox received timeout for enter staging regions on datanode {:?}, elapsed: {:?}",
|
||||
|
||||
@@ -184,6 +184,14 @@ impl RemapManifest {
|
||||
|
||||
Self::handle_remap_manifest_reply(remap.region_id, reply, &now, peer)
|
||||
}
|
||||
Err(error::Error::MailboxChannelClosed { .. }) => error::RetryLaterSnafu {
|
||||
reason: format!(
|
||||
"Mailbox closed when sending remap manifests to datanode {:?}, elapsed: {:?}",
|
||||
peer,
|
||||
now.elapsed()
|
||||
),
|
||||
}
|
||||
.fail()?,
|
||||
Err(error::Error::MailboxTimeout { .. }) => {
|
||||
let reason = format!(
|
||||
"Mailbox received timeout for remap manifests on datanode {:?}, elapsed: {:?}",
|
||||
|
||||
@@ -273,6 +273,14 @@ impl SyncRegion {
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
Err(error::Error::MailboxChannelClosed { .. }) => error::RetryLaterSnafu {
|
||||
reason: format!(
|
||||
"Mailbox closed when sending sync region to datanode {:?}, elapsed: {:?}",
|
||||
peer,
|
||||
now.elapsed()
|
||||
),
|
||||
}
|
||||
.fail()?,
|
||||
Err(error::Error::MailboxTimeout { .. }) => {
|
||||
let reason = format!(
|
||||
"Mailbox received timeout for sync regions on datanode {:?}, elapsed: {:?}",
|
||||
|
||||
@@ -30,7 +30,7 @@ impl UpdateMetadata {
|
||||
/// Abort:
|
||||
/// - Target region not found.
|
||||
/// - Source region not found.
|
||||
fn apply_staging_region_routes(
|
||||
pub(crate) fn apply_staging_region_routes(
|
||||
group_id: GroupId,
|
||||
sources: &[RegionDescriptor],
|
||||
targets: &[RegionDescriptor],
|
||||
@@ -50,10 +50,12 @@ impl UpdateMetadata {
|
||||
region_id: target.region_id,
|
||||
},
|
||||
)?;
|
||||
// Set the new partition expression for the target region route.
|
||||
region_route.region.partition_expr = target
|
||||
.partition_expr
|
||||
.as_json_str()
|
||||
.context(error::SerializePartitionExprSnafu)?;
|
||||
// Set leader staging state and write route policy for the target region route.
|
||||
region_route.set_leader_staging();
|
||||
region_route.clear_ignore_all_writes();
|
||||
}
|
||||
@@ -65,6 +67,7 @@ impl UpdateMetadata {
|
||||
region_id: source.region_id,
|
||||
},
|
||||
)?;
|
||||
// Set leader staging state for the source region route.
|
||||
region_route.set_leader_staging();
|
||||
if pending_deallocate_region_ids.contains(&source.region_id) {
|
||||
// When a region is pending deallocation, it should ignore all writes.
|
||||
|
||||
@@ -18,10 +18,12 @@ use common_error::ext::BoxedError;
|
||||
use common_meta::rpc::router::RegionRoute;
|
||||
use common_telemetry::{error, info};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::procedure::repartition::group::update_metadata::UpdateMetadata;
|
||||
use crate::procedure::repartition::group::{Context, GroupId, region_routes};
|
||||
use crate::procedure::repartition::plan::RegionDescriptor;
|
||||
|
||||
impl UpdateMetadata {
|
||||
/// Rolls back the staging regions.
|
||||
@@ -31,8 +33,9 @@ impl UpdateMetadata {
|
||||
/// - Target region not found.
|
||||
fn rollback_staging_region_routes(
|
||||
group_id: GroupId,
|
||||
source_routes: &[RegionRoute],
|
||||
target_routes: &[RegionRoute],
|
||||
sources: &[RegionDescriptor],
|
||||
original_target_routes: &[RegionRoute],
|
||||
pending_deallocate_region_ids: &[RegionId],
|
||||
current_region_routes: &[RegionRoute],
|
||||
) -> Result<Vec<RegionRoute>> {
|
||||
let mut region_routes = current_region_routes.to_vec();
|
||||
@@ -40,26 +43,35 @@ impl UpdateMetadata {
|
||||
.iter_mut()
|
||||
.map(|route| (route.region.id, route))
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
for source in source_routes {
|
||||
let region_route = region_routes_map.get_mut(&source.region.id).context(
|
||||
for source in sources {
|
||||
let region_route = region_routes_map.get_mut(&source.region_id).context(
|
||||
error::RepartitionSourceRegionMissingSnafu {
|
||||
group_id,
|
||||
region_id: source.region.id,
|
||||
region_id: source.region_id,
|
||||
},
|
||||
)?;
|
||||
region_route.region.partition_expr = source.region.partition_expr.clone();
|
||||
// Clean leader staging state for source regions.
|
||||
region_route.clear_leader_staging();
|
||||
region_route.clear_ignore_all_writes();
|
||||
if pending_deallocate_region_ids.contains(&source.region_id) {
|
||||
// Clean ignore all writes state for source regions if it's pending to be deallocated,
|
||||
// which means the source region is merged into the target region.
|
||||
region_route.clear_ignore_all_writes();
|
||||
}
|
||||
}
|
||||
|
||||
for target in target_routes {
|
||||
for target in original_target_routes {
|
||||
let region_route = region_routes_map.get_mut(&target.region.id).context(
|
||||
error::RepartitionTargetRegionMissingSnafu {
|
||||
group_id,
|
||||
region_id: target.region.id,
|
||||
},
|
||||
)?;
|
||||
|
||||
// Revert the partition expression and write route policy to the original value for the target region.
|
||||
region_route.region.partition_expr = target.region.partition_expr.clone();
|
||||
region_route.write_route_policy = target.write_route_policy;
|
||||
|
||||
// Clean leader staging state for target regions.
|
||||
region_route.clear_leader_staging();
|
||||
}
|
||||
|
||||
@@ -83,8 +95,9 @@ impl UpdateMetadata {
|
||||
let prepare_result = ctx.persistent_ctx.group_prepare_result.as_ref().unwrap();
|
||||
let new_region_routes = Self::rollback_staging_region_routes(
|
||||
group_id,
|
||||
&prepare_result.source_routes,
|
||||
&ctx.persistent_ctx.sources,
|
||||
&prepare_result.target_routes,
|
||||
&ctx.persistent_ctx.pending_deallocate_region_ids,
|
||||
region_routes,
|
||||
)?;
|
||||
|
||||
@@ -113,87 +126,176 @@ impl UpdateMetadata {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::HashSet;
|
||||
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::rpc::router::{LeaderState, Region, RegionRoute};
|
||||
use store_api::storage::RegionId;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::procedure::repartition::group::update_metadata::UpdateMetadata;
|
||||
use crate::procedure::repartition::plan::RegionDescriptor;
|
||||
use crate::procedure::repartition::test_util::range_expr;
|
||||
|
||||
fn new_region_route(
|
||||
region_id: RegionId,
|
||||
partition_expr: &str,
|
||||
leader_state: Option<LeaderState>,
|
||||
ignore_all_writes: bool,
|
||||
) -> RegionRoute {
|
||||
let mut route = RegionRoute {
|
||||
region: Region {
|
||||
id: region_id,
|
||||
partition_expr: partition_expr.to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
leader_peer: Some(Peer::empty(1)),
|
||||
leader_state,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
if ignore_all_writes {
|
||||
route.set_ignore_all_writes();
|
||||
}
|
||||
|
||||
route
|
||||
}
|
||||
|
||||
fn original_target_routes(
|
||||
region_routes: &[RegionRoute],
|
||||
targets: &[RegionDescriptor],
|
||||
) -> Vec<RegionRoute> {
|
||||
let target_ids = targets
|
||||
.iter()
|
||||
.map(|target| target.region_id)
|
||||
.collect::<HashSet<_>>();
|
||||
region_routes
|
||||
.iter()
|
||||
.filter(|route| target_ids.contains(&route.region.id))
|
||||
.cloned()
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rollback_staging_region_routes() {
|
||||
fn test_rollback_staging_region_routes_split_case() {
|
||||
let group_id = Uuid::new_v4();
|
||||
let table_id = 1024;
|
||||
let region_routes = vec![
|
||||
{
|
||||
let mut route = RegionRoute {
|
||||
region: Region {
|
||||
id: RegionId::new(table_id, 1),
|
||||
partition_expr: range_expr("x", 0, 100).as_json_str().unwrap(),
|
||||
..Default::default()
|
||||
},
|
||||
leader_peer: Some(Peer::empty(1)),
|
||||
leader_state: Some(LeaderState::Staging),
|
||||
..Default::default()
|
||||
};
|
||||
route.set_ignore_all_writes();
|
||||
route
|
||||
let original_region_routes = vec![
|
||||
new_region_route(
|
||||
RegionId::new(table_id, 1),
|
||||
&range_expr("x", 0, 100).as_json_str().unwrap(),
|
||||
None,
|
||||
false,
|
||||
),
|
||||
new_region_route(
|
||||
RegionId::new(table_id, 2),
|
||||
&range_expr("x", 100, 200).as_json_str().unwrap(),
|
||||
None,
|
||||
false,
|
||||
),
|
||||
new_region_route(RegionId::new(table_id, 3), "", None, false),
|
||||
];
|
||||
let sources = vec![RegionDescriptor {
|
||||
region_id: RegionId::new(table_id, 1),
|
||||
partition_expr: range_expr("x", 0, 100),
|
||||
}];
|
||||
let targets = vec![
|
||||
RegionDescriptor {
|
||||
region_id: RegionId::new(table_id, 1),
|
||||
partition_expr: range_expr("x", 0, 50),
|
||||
},
|
||||
RegionRoute {
|
||||
region: Region {
|
||||
id: RegionId::new(table_id, 2),
|
||||
partition_expr: String::new(),
|
||||
..Default::default()
|
||||
},
|
||||
leader_peer: Some(Peer::empty(1)),
|
||||
leader_state: Some(LeaderState::Staging),
|
||||
..Default::default()
|
||||
},
|
||||
RegionRoute {
|
||||
region: Region {
|
||||
id: RegionId::new(table_id, 3),
|
||||
partition_expr: String::new(),
|
||||
..Default::default()
|
||||
},
|
||||
leader_peer: Some(Peer::empty(1)),
|
||||
leader_state: Some(LeaderState::Downgrading),
|
||||
..Default::default()
|
||||
RegionDescriptor {
|
||||
region_id: RegionId::new(table_id, 3),
|
||||
partition_expr: range_expr("x", 50, 100),
|
||||
},
|
||||
];
|
||||
let source_routes = vec![RegionRoute {
|
||||
region: Region {
|
||||
id: RegionId::new(table_id, 1),
|
||||
partition_expr: range_expr("x", 0, 20).as_json_str().unwrap(),
|
||||
..Default::default()
|
||||
},
|
||||
leader_peer: Some(Peer::empty(1)),
|
||||
..Default::default()
|
||||
}];
|
||||
let target_routes = vec![RegionRoute {
|
||||
region: Region {
|
||||
id: RegionId::new(table_id, 2),
|
||||
partition_expr: range_expr("x", 0, 20).as_json_str().unwrap(),
|
||||
..Default::default()
|
||||
},
|
||||
leader_peer: Some(Peer::empty(1)),
|
||||
..Default::default()
|
||||
}];
|
||||
let new_region_routes = UpdateMetadata::rollback_staging_region_routes(
|
||||
let applied_region_routes = UpdateMetadata::apply_staging_region_routes(
|
||||
group_id,
|
||||
&source_routes,
|
||||
&target_routes,
|
||||
®ion_routes,
|
||||
&sources,
|
||||
&targets,
|
||||
&[],
|
||||
&original_region_routes,
|
||||
)
|
||||
.unwrap();
|
||||
assert!(!new_region_routes[0].is_leader_staging());
|
||||
assert!(!new_region_routes[0].is_ignore_all_writes());
|
||||
assert_eq!(
|
||||
new_region_routes[0].region.partition_expr,
|
||||
range_expr("x", 0, 20).as_json_str().unwrap(),
|
||||
);
|
||||
assert!(!new_region_routes[1].is_leader_staging());
|
||||
assert!(!new_region_routes[1].is_ignore_all_writes());
|
||||
assert!(new_region_routes[2].is_leader_downgrading());
|
||||
let target_routes = original_target_routes(&original_region_routes, &targets);
|
||||
let new_region_routes = UpdateMetadata::rollback_staging_region_routes(
|
||||
group_id,
|
||||
&sources,
|
||||
&target_routes,
|
||||
&[],
|
||||
&applied_region_routes,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(new_region_routes, original_region_routes);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rollback_staging_region_routes_merge_case_is_idempotent() {
|
||||
let group_id = Uuid::new_v4();
|
||||
let table_id = 1024;
|
||||
let original_region_routes = vec![
|
||||
new_region_route(
|
||||
RegionId::new(table_id, 1),
|
||||
&range_expr("x", 0, 100).as_json_str().unwrap(),
|
||||
None,
|
||||
false,
|
||||
),
|
||||
new_region_route(
|
||||
RegionId::new(table_id, 2),
|
||||
&range_expr("x", 100, 200).as_json_str().unwrap(),
|
||||
None,
|
||||
false,
|
||||
),
|
||||
new_region_route(
|
||||
RegionId::new(table_id, 3),
|
||||
&range_expr("x", 200, 300).as_json_str().unwrap(),
|
||||
None,
|
||||
false,
|
||||
),
|
||||
];
|
||||
let sources = vec![
|
||||
RegionDescriptor {
|
||||
region_id: RegionId::new(table_id, 1),
|
||||
partition_expr: range_expr("x", 0, 100),
|
||||
},
|
||||
RegionDescriptor {
|
||||
region_id: RegionId::new(table_id, 2),
|
||||
partition_expr: range_expr("x", 100, 200),
|
||||
},
|
||||
];
|
||||
let targets = vec![RegionDescriptor {
|
||||
region_id: RegionId::new(table_id, 1),
|
||||
partition_expr: range_expr("x", 0, 200),
|
||||
}];
|
||||
let target_routes = original_target_routes(&original_region_routes, &targets);
|
||||
let applied_region_routes = UpdateMetadata::apply_staging_region_routes(
|
||||
group_id,
|
||||
&sources,
|
||||
&targets,
|
||||
&[RegionId::new(table_id, 2)],
|
||||
&original_region_routes,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let once = UpdateMetadata::rollback_staging_region_routes(
|
||||
group_id,
|
||||
&sources,
|
||||
&target_routes,
|
||||
&[RegionId::new(table_id, 2)],
|
||||
&applied_region_routes,
|
||||
)
|
||||
.unwrap();
|
||||
let twice = UpdateMetadata::rollback_staging_region_routes(
|
||||
group_id,
|
||||
&sources,
|
||||
&target_routes,
|
||||
&[RegionId::new(table_id, 2)],
|
||||
&once,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(once, original_region_routes);
|
||||
assert_eq!(once, twice);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -102,7 +102,7 @@ impl State for RepartitionStart {
|
||||
}
|
||||
|
||||
impl RepartitionStart {
|
||||
fn build_plan(
|
||||
pub(crate) fn build_plan(
|
||||
physical_route: &PhysicalTableRouteValue,
|
||||
from_exprs: &[PartitionExpr],
|
||||
to_exprs: &[PartitionExpr],
|
||||
|
||||
@@ -16,22 +16,41 @@ use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use common_meta::ddl::DdlContext;
|
||||
use common_meta::key::table_route::TableRouteValue;
|
||||
use common_meta::key::test_utils::{new_test_table_info, new_test_table_info_with_name};
|
||||
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
use common_meta::kv_backend::memory::MemoryKvBackend;
|
||||
use common_meta::node_manager::NodeManagerRef;
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::rpc::router::{Region, RegionRoute};
|
||||
use common_meta::sequence::SequenceBuilder;
|
||||
use common_meta::test_util::new_ddl_context_with_kv_backend;
|
||||
use common_procedure::{
|
||||
Context as ProcedureContext, ContextProvider, ProcedureId, ProcedureState, Status,
|
||||
};
|
||||
use common_procedure_test::MockContextProvider;
|
||||
use common_wal::options::{KafkaWalOptions, WalOptions};
|
||||
use datatypes::value::Value;
|
||||
use partition::expr::{PartitionExpr, col};
|
||||
use store_api::storage::TableId;
|
||||
use store_api::storage::{RegionId, RegionNumber, TableId};
|
||||
use table::table_name::TableName;
|
||||
use tokio::sync::watch;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::cache_invalidator::MetasrvCacheInvalidator;
|
||||
use crate::metasrv::MetasrvInfo;
|
||||
use crate::procedure::repartition::group::{Context, PersistentContext, VolatileContext};
|
||||
use crate::procedure::repartition::plan::RegionDescriptor;
|
||||
use crate::procedure::repartition::{
|
||||
Context as ParentContext, PersistentContext as ParentPersistentContext, RepartitionProcedure,
|
||||
};
|
||||
use crate::procedure::test_util::MailboxContext;
|
||||
|
||||
/// `TestingEnv` provides components during the tests.
|
||||
pub struct TestingEnv {
|
||||
pub kv_backend: KvBackendRef,
|
||||
pub table_metadata_manager: TableMetadataManagerRef,
|
||||
pub mailbox_ctx: MailboxContext,
|
||||
pub server_addr: String,
|
||||
@@ -45,13 +64,14 @@ impl Default for TestingEnv {
|
||||
|
||||
impl TestingEnv {
|
||||
pub fn new() -> Self {
|
||||
let kv_backend = Arc::new(MemoryKvBackend::new());
|
||||
let kv_backend = Arc::new(MemoryKvBackend::new()) as KvBackendRef;
|
||||
let table_metadata_manager = Arc::new(TableMetadataManager::new(kv_backend.clone()));
|
||||
let mailbox_sequence =
|
||||
SequenceBuilder::new("test_heartbeat_mailbox", kv_backend.clone()).build();
|
||||
let mailbox_ctx = MailboxContext::new(mailbox_sequence);
|
||||
|
||||
Self {
|
||||
kv_backend,
|
||||
table_metadata_manager,
|
||||
mailbox_ctx,
|
||||
server_addr: "localhost".to_string(),
|
||||
@@ -76,6 +96,65 @@ impl TestingEnv {
|
||||
volatile_ctx: VolatileContext::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn procedure_context() -> ProcedureContext {
|
||||
ProcedureContext {
|
||||
procedure_id: ProcedureId::random(),
|
||||
provider: Arc::new(MockContextProvider::default()),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn create_physical_table_metadata(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
region_routes: Vec<RegionRoute>,
|
||||
) {
|
||||
self.create_physical_table_metadata_with_wal_options(
|
||||
table_id,
|
||||
region_routes,
|
||||
HashMap::default(),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
pub async fn create_physical_table_metadata_with_wal_options(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
region_routes: Vec<RegionRoute>,
|
||||
region_wal_options: HashMap<RegionNumber, String>,
|
||||
) {
|
||||
self.table_metadata_manager
|
||||
.create_table_metadata(
|
||||
new_test_table_info(table_id),
|
||||
TableRouteValue::physical(region_routes),
|
||||
region_wal_options,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
pub async fn create_physical_table_metadata_for_repartition(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
region_routes: Vec<RegionRoute>,
|
||||
region_wal_options: HashMap<RegionNumber, String>,
|
||||
) {
|
||||
let mut table_info = new_test_table_info_with_name(table_id, "test_table");
|
||||
table_info.meta.column_ids = vec![0, 1, 2];
|
||||
|
||||
self.table_metadata_manager
|
||||
.create_table_metadata(
|
||||
table_info,
|
||||
TableRouteValue::physical(region_routes),
|
||||
region_wal_options,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
pub fn ddl_context(&self, node_manager: NodeManagerRef) -> DdlContext {
|
||||
new_ddl_context_with_kv_backend(node_manager, self.kv_backend.clone())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn range_expr(col_name: &str, start: i64, end: i64) -> PartitionExpr {
|
||||
@@ -84,6 +163,18 @@ pub fn range_expr(col_name: &str, start: i64, end: i64) -> PartitionExpr {
|
||||
.and(col(col_name).lt(Value::Int64(end)))
|
||||
}
|
||||
|
||||
pub fn test_region_wal_options(region_numbers: &[RegionNumber]) -> HashMap<RegionNumber, String> {
|
||||
let wal_options = serde_json::to_string(&WalOptions::Kafka(KafkaWalOptions {
|
||||
topic: "test_topic".to_string(),
|
||||
}))
|
||||
.unwrap();
|
||||
|
||||
region_numbers
|
||||
.iter()
|
||||
.map(|region_number| (*region_number, wal_options.clone()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn new_persistent_context(
|
||||
table_id: TableId,
|
||||
sources: Vec<RegionDescriptor>,
|
||||
@@ -105,3 +196,110 @@ pub fn new_persistent_context(
|
||||
timeout: Duration::from_secs(120),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn test_region_route(region_id: RegionId, partition_expr: &str) -> RegionRoute {
|
||||
RegionRoute {
|
||||
region: Region {
|
||||
id: region_id,
|
||||
partition_expr: partition_expr.to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
leader_peer: Some(Peer::empty(1)),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn current_parent_region_routes(ctx: &ParentContext) -> Vec<RegionRoute> {
|
||||
let table_route_value = ctx.get_table_route_value().await.unwrap().into_inner();
|
||||
table_route_value.region_routes().unwrap().clone()
|
||||
}
|
||||
|
||||
pub fn new_parent_context(
|
||||
env: &TestingEnv,
|
||||
node_manager: NodeManagerRef,
|
||||
table_id: TableId,
|
||||
) -> ParentContext {
|
||||
let ddl_ctx = env.ddl_context(node_manager);
|
||||
let persistent_ctx = ParentPersistentContext::new(
|
||||
TableName::new("test_catalog", "test_schema", "test_table"),
|
||||
table_id,
|
||||
None,
|
||||
);
|
||||
|
||||
ParentContext::new(
|
||||
&ddl_ctx,
|
||||
env.mailbox_ctx.mailbox().clone(),
|
||||
env.server_addr.clone(),
|
||||
persistent_ctx,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn assert_parent_state<T: 'static>(procedure: &RepartitionProcedure) {
|
||||
assert!(procedure.state.as_any().is::<T>());
|
||||
}
|
||||
|
||||
pub fn extract_subprocedure_ids(status: Status) -> Vec<ProcedureId> {
|
||||
let Status::Suspended { subprocedures, .. } = status else {
|
||||
panic!("expected suspended status");
|
||||
};
|
||||
|
||||
subprocedures
|
||||
.into_iter()
|
||||
.map(|procedure| procedure.id)
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn procedure_state_receiver(state: ProcedureState) -> watch::Receiver<ProcedureState> {
|
||||
let (tx, rx) = watch::channel(ProcedureState::Running);
|
||||
tx.send(state).unwrap();
|
||||
rx
|
||||
}
|
||||
|
||||
pub fn procedure_context_with_receivers(
|
||||
receivers: HashMap<ProcedureId, watch::Receiver<ProcedureState>>,
|
||||
) -> ProcedureContext {
|
||||
ProcedureContext {
|
||||
procedure_id: ProcedureId::random(),
|
||||
provider: Arc::new(ProcedureStateReceiverProvider {
|
||||
receivers,
|
||||
inner: MockContextProvider::default(),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
struct ProcedureStateReceiverProvider {
|
||||
receivers: HashMap<ProcedureId, watch::Receiver<ProcedureState>>,
|
||||
inner: MockContextProvider,
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl ContextProvider for ProcedureStateReceiverProvider {
|
||||
async fn procedure_state(
|
||||
&self,
|
||||
procedure_id: ProcedureId,
|
||||
) -> common_procedure::Result<Option<ProcedureState>> {
|
||||
self.inner.procedure_state(procedure_id).await
|
||||
}
|
||||
|
||||
async fn procedure_state_receiver(
|
||||
&self,
|
||||
procedure_id: ProcedureId,
|
||||
) -> common_procedure::Result<Option<watch::Receiver<ProcedureState>>> {
|
||||
Ok(self.receivers.get(&procedure_id).cloned())
|
||||
}
|
||||
|
||||
async fn try_put_poison(
|
||||
&self,
|
||||
key: &common_procedure::PoisonKey,
|
||||
procedure_id: ProcedureId,
|
||||
) -> common_procedure::Result<()> {
|
||||
self.inner.try_put_poison(key, procedure_id).await
|
||||
}
|
||||
|
||||
async fn acquire_lock(
|
||||
&self,
|
||||
key: &common_procedure::StringKey,
|
||||
) -> common_procedure::local::DynamicKeyLockGuard {
|
||||
self.inner.acquire_lock(key).await
|
||||
}
|
||||
}
|
||||
|
||||
@@ -190,6 +190,23 @@ pub(crate) async fn flush_region(
|
||||
operation: "Flush regions",
|
||||
}
|
||||
.fail(),
|
||||
Err(error::Error::MailboxChannelClosed { .. }) => match error_strategy {
|
||||
ErrorStrategy::Ignore => {
|
||||
warn!(
|
||||
"Failed to flush regions({:?}), the datanode({}) is unreachable(MailboxChannelClosed). Skip flush operation.",
|
||||
region_ids, datanode
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
ErrorStrategy::Retry => error::RetryLaterSnafu {
|
||||
reason: format!(
|
||||
"Mailbox closed when sending flush region to datanode {:?}, elapsed: {:?}",
|
||||
datanode,
|
||||
now.elapsed()
|
||||
),
|
||||
}
|
||||
.fail()?,
|
||||
},
|
||||
Err(err) => Err(err),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -528,7 +528,7 @@ mod tests {
|
||||
async fn test_bulk_insert_physical_region_passthrough() {
|
||||
// Use flat format so that BulkMemtable is used (supports write_bulk).
|
||||
let mito_config = MitoConfig {
|
||||
default_experimental_flat_format: true,
|
||||
default_flat_format: true,
|
||||
..Default::default()
|
||||
};
|
||||
let env = TestEnv::with_mito_config("", mito_config, Default::default()).await;
|
||||
@@ -585,7 +585,7 @@ mod tests {
|
||||
async fn test_bulk_insert_physical_region_empty_batch() {
|
||||
// Use flat format so that BulkMemtable is used (supports write_bulk).
|
||||
let mito_config = MitoConfig {
|
||||
default_experimental_flat_format: true,
|
||||
default_flat_format: true,
|
||||
..Default::default()
|
||||
};
|
||||
let env = TestEnv::with_mito_config("", mito_config, Default::default()).await;
|
||||
|
||||
@@ -121,6 +121,10 @@ mod tests {
|
||||
.map(|path| path.replace(&e.file_id, "<file_id>"));
|
||||
e.file_id = "<file_id>".to_string();
|
||||
e.index_version = 0;
|
||||
// Round down sizes to nearest 1000 to avoid exact size
|
||||
// comparisons that break when the SST format changes.
|
||||
e.file_size = e.file_size / 1000 * 1000;
|
||||
e.index_file_size = e.index_file_size.map(|s| s / 1000 * 1000);
|
||||
format!("\n{:?}", e)
|
||||
})
|
||||
.sorted()
|
||||
@@ -129,12 +133,12 @@ mod tests {
|
||||
assert_eq!(
|
||||
debug_format,
|
||||
r#"
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test_metric_region/11_0000000001/data/<file_id>.parquet", file_size: 3217, index_file_path: Some("test_metric_region/11_0000000001/data/index/<file_id>.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(20), origin_region_id: 47244640257(11, 1), node_id: None, visible: true }
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47244640258(11, 2), table_id: 11, region_number: 2, region_group: 0, region_sequence: 2, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test_metric_region/11_0000000002/data/<file_id>.parquet", file_size: 3217, index_file_path: Some("test_metric_region/11_0000000002/data/index/<file_id>.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(10), origin_region_id: 47244640258(11, 2), node_id: None, visible: true }
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47261417473(11, 16777217), table_id: 11, region_number: 16777217, region_group: 1, region_sequence: 1, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test_metric_region/11_0000000001/metadata/<file_id>.parquet", file_size: 3487, index_file_path: None, index_file_size: None, num_rows: 8, num_row_groups: 1, num_series: Some(8), min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(8), origin_region_id: 47261417473(11, 16777217), node_id: None, visible: true }
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47261417474(11, 16777218), table_id: 11, region_number: 16777218, region_group: 1, region_sequence: 2, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test_metric_region/11_0000000002/metadata/<file_id>.parquet", file_size: 3471, index_file_path: None, index_file_size: None, num_rows: 4, num_row_groups: 1, num_series: Some(4), min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(4), origin_region_id: 47261417474(11, 16777218), node_id: None, visible: true }
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 94489280554(22, 42), table_id: 22, region_number: 42, region_group: 0, region_sequence: 42, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test_metric_region/22_0000000042/data/<file_id>.parquet", file_size: 3217, index_file_path: Some("test_metric_region/22_0000000042/data/index/<file_id>.puffin"), index_file_size: Some(235), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(10), origin_region_id: 94489280554(22, 42), node_id: None, visible: true }
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 94506057770(22, 16777258), table_id: 22, region_number: 16777258, region_group: 1, region_sequence: 42, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test_metric_region/22_0000000042/metadata/<file_id>.parquet", file_size: 3471, index_file_path: None, index_file_size: None, num_rows: 4, num_row_groups: 1, num_series: Some(4), min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(4), origin_region_id: 94506057770(22, 16777258), node_id: None, visible: true }"#,
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47244640257(11, 1), table_id: 11, region_number: 1, region_group: 0, region_sequence: 1, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test_metric_region/11_0000000001/data/<file_id>.parquet", file_size: 3000, index_file_path: Some("test_metric_region/11_0000000001/data/index/<file_id>.puffin"), index_file_size: Some(0), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(20), origin_region_id: 47244640257(11, 1), node_id: None, visible: true }
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47244640258(11, 2), table_id: 11, region_number: 2, region_group: 0, region_sequence: 2, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test_metric_region/11_0000000002/data/<file_id>.parquet", file_size: 3000, index_file_path: Some("test_metric_region/11_0000000002/data/index/<file_id>.puffin"), index_file_size: Some(0), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(10), origin_region_id: 47244640258(11, 2), node_id: None, visible: true }
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47261417473(11, 16777217), table_id: 11, region_number: 16777217, region_group: 1, region_sequence: 1, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test_metric_region/11_0000000001/metadata/<file_id>.parquet", file_size: 4000, index_file_path: None, index_file_size: None, num_rows: 8, num_row_groups: 1, num_series: Some(8), min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(8), origin_region_id: 47261417473(11, 16777217), node_id: None, visible: true }
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 47261417474(11, 16777218), table_id: 11, region_number: 16777218, region_group: 1, region_sequence: 2, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test_metric_region/11_0000000002/metadata/<file_id>.parquet", file_size: 3000, index_file_path: None, index_file_size: None, num_rows: 4, num_row_groups: 1, num_series: Some(4), min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(4), origin_region_id: 47261417474(11, 16777218), node_id: None, visible: true }
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 94489280554(22, 42), table_id: 22, region_number: 42, region_group: 0, region_sequence: 42, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test_metric_region/22_0000000042/data/<file_id>.parquet", file_size: 3000, index_file_path: Some("test_metric_region/22_0000000042/data/index/<file_id>.puffin"), index_file_size: Some(0), num_rows: 10, num_row_groups: 1, num_series: Some(1), min_ts: 0::Millisecond, max_ts: 9::Millisecond, sequence: Some(10), origin_region_id: 94489280554(22, 42), node_id: None, visible: true }
|
||||
ManifestSstEntry { table_dir: "test_metric_region/", region_id: 94506057770(22, 16777258), table_id: 22, region_number: 16777258, region_group: 1, region_sequence: 42, file_id: "<file_id>", index_version: 0, level: 0, file_path: "test_metric_region/22_0000000042/metadata/<file_id>.parquet", file_size: 3000, index_file_path: None, index_file_size: None, num_rows: 4, num_row_groups: 1, num_series: Some(4), min_ts: 0::Millisecond, max_ts: 0::Millisecond, sequence: Some(4), origin_region_id: 94506057770(22, 16777258), node_id: None, visible: true }"#,
|
||||
);
|
||||
// list from storage
|
||||
let storage_entries = mito
|
||||
|
||||
@@ -15,7 +15,6 @@ common-base.workspace = true
|
||||
common-decimal.workspace = true
|
||||
common-error.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-query.workspace = true
|
||||
common-recordbatch.workspace = true
|
||||
common-telemetry.workspace = true
|
||||
common-time.workspace = true
|
||||
@@ -27,6 +26,7 @@ snafu.workspace = true
|
||||
store-api.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
common-query.workspace = true
|
||||
criterion = "0.7"
|
||||
datafusion-common.workspace = true
|
||||
datafusion-expr.workspace = true
|
||||
|
||||
@@ -848,7 +848,7 @@ impl CompactionSstReaderBuilder<'_> {
|
||||
}
|
||||
|
||||
fn build_scan_input(self) -> Result<ScanInput> {
|
||||
let mapper = ProjectionMapper::all(&self.metadata, true)?;
|
||||
let mapper = ProjectionMapper::all(&self.metadata)?;
|
||||
let mut scan_input = ScanInput::new(self.sst_layer, mapper)
|
||||
.with_files(self.inputs.to_vec())
|
||||
.with_append_mode(self.append_mode)
|
||||
@@ -857,8 +857,7 @@ impl CompactionSstReaderBuilder<'_> {
|
||||
.with_filter_deleted(self.filter_deleted)
|
||||
// We ignore file not found error during compaction.
|
||||
.with_ignore_file_not_found(true)
|
||||
.with_merge_mode(self.merge_mode)
|
||||
.with_flat_format(true);
|
||||
.with_merge_mode(self.merge_mode);
|
||||
|
||||
// This serves as a workaround of https://github.com/GreptimeTeam/greptimedb/issues/3944
|
||||
// by converting time ranges into predicate.
|
||||
|
||||
@@ -322,11 +322,7 @@ impl DefaultCompactor {
|
||||
.region_options
|
||||
.sst_format
|
||||
.map(|format| format == FormatType::Flat)
|
||||
.unwrap_or(
|
||||
compaction_region
|
||||
.engine_config
|
||||
.default_experimental_flat_format,
|
||||
);
|
||||
.unwrap_or(compaction_region.engine_config.default_flat_format);
|
||||
|
||||
let index_config = compaction_region.engine_config.index.clone();
|
||||
let inverted_index_config = compaction_region.engine_config.inverted_index.clone();
|
||||
|
||||
@@ -33,8 +33,6 @@ use crate::memtable::MemtableConfig;
|
||||
use crate::sst::DEFAULT_WRITE_BUFFER_SIZE;
|
||||
|
||||
const MULTIPART_UPLOAD_MINIMUM_SIZE: ReadableSize = ReadableSize::mb(5);
|
||||
/// Default channel size for parallel scan task.
|
||||
pub(crate) const DEFAULT_SCAN_CHANNEL_SIZE: usize = 32;
|
||||
/// Default maximum number of SST files to scan concurrently.
|
||||
pub(crate) const DEFAULT_MAX_CONCURRENT_SCAN_FILES: usize = 384;
|
||||
|
||||
@@ -93,7 +91,9 @@ pub struct MitoConfig {
|
||||
pub max_background_compactions: usize,
|
||||
/// Max number of running background purge jobs (default: number of cpu cores).
|
||||
pub max_background_purges: usize,
|
||||
/// Memory budget for compaction tasks. Setting it to 0 or "unlimited" disables the limit.
|
||||
/// Memory budget for compaction tasks.
|
||||
/// Supports absolute size (e.g., "2GiB", "512MB") or percentage of system memory (e.g., "50%").
|
||||
/// Setting it to 0 or "unlimited" disables the limit.
|
||||
pub experimental_compaction_memory_limit: MemoryLimit,
|
||||
/// Behavior when compaction cannot acquire memory from the budget.
|
||||
pub experimental_compaction_on_exhausted: OnExhaustedPolicy,
|
||||
@@ -142,8 +142,6 @@ pub struct MitoConfig {
|
||||
// Other configs:
|
||||
/// Buffer size for SST writing.
|
||||
pub sst_write_buffer_size: ReadableSize,
|
||||
/// Capacity of the channel to send data from parallel scan tasks to the main task (default 32).
|
||||
pub parallel_scan_channel_size: usize,
|
||||
/// Maximum number of SST files to scan concurrently (default 384).
|
||||
pub max_concurrent_scan_files: usize,
|
||||
/// Whether to allow stale entries read during replay.
|
||||
@@ -177,9 +175,9 @@ pub struct MitoConfig {
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub min_compaction_interval: Duration,
|
||||
|
||||
/// Whether to enable experimental flat format as the default format.
|
||||
/// Whether to enable flat format as the default SST format.
|
||||
/// When enabled, forces using BulkMemtable and BulkMemtableBuilder.
|
||||
pub default_experimental_flat_format: bool,
|
||||
pub default_flat_format: bool,
|
||||
|
||||
pub gc: GcConfig,
|
||||
}
|
||||
@@ -217,7 +215,6 @@ impl Default for MitoConfig {
|
||||
enable_refill_cache_on_read: true,
|
||||
manifest_cache_size: ReadableSize::mb(256),
|
||||
sst_write_buffer_size: DEFAULT_WRITE_BUFFER_SIZE,
|
||||
parallel_scan_channel_size: DEFAULT_SCAN_CHANNEL_SIZE,
|
||||
max_concurrent_scan_files: DEFAULT_MAX_CONCURRENT_SCAN_FILES,
|
||||
allow_stale_entries: false,
|
||||
scan_memory_limit: MemoryLimit::default(),
|
||||
@@ -230,7 +227,7 @@ impl Default for MitoConfig {
|
||||
vector_index: VectorIndexConfig::default(),
|
||||
memtable: MemtableConfig::default(),
|
||||
min_compaction_interval: Duration::from_secs(0),
|
||||
default_experimental_flat_format: false,
|
||||
default_flat_format: true,
|
||||
gc: GcConfig::default(),
|
||||
};
|
||||
|
||||
@@ -295,14 +292,6 @@ impl MitoConfig {
|
||||
);
|
||||
}
|
||||
|
||||
if self.parallel_scan_channel_size < 1 {
|
||||
self.parallel_scan_channel_size = DEFAULT_SCAN_CHANNEL_SIZE;
|
||||
warn!(
|
||||
"Sanitize scan channel size to {}",
|
||||
self.parallel_scan_channel_size
|
||||
);
|
||||
}
|
||||
|
||||
// Sets write cache path if it is empty.
|
||||
if self.write_cache_path.trim().is_empty() {
|
||||
self.write_cache_path = data_home.to_string();
|
||||
|
||||
@@ -1027,7 +1027,6 @@ impl EngineInner {
|
||||
request,
|
||||
CacheStrategy::EnableAll(cache_manager),
|
||||
)
|
||||
.with_parallel_scan_channel_size(self.config.parallel_scan_channel_size)
|
||||
.with_max_concurrent_scan_files(self.config.max_concurrent_scan_files)
|
||||
.with_ignore_inverted_index(self.config.inverted_index.apply_on_query.disabled())
|
||||
.with_ignore_fulltext_index(self.config.fulltext_index.apply_on_query.disabled())
|
||||
|
||||
@@ -141,7 +141,7 @@ async fn test_alter_region_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -213,7 +213,7 @@ async fn test_alter_region_with_format(flat_format: bool) {
|
||||
.reopen_engine(
|
||||
engine,
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
@@ -267,7 +267,7 @@ async fn test_put_after_alter_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -318,7 +318,7 @@ async fn test_put_after_alter_with_format(flat_format: bool) {
|
||||
.reopen_engine(
|
||||
engine,
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
@@ -387,7 +387,7 @@ async fn test_alter_region_retry_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -457,7 +457,7 @@ async fn test_alter_on_flushing_with_format(flat_format: bool) {
|
||||
let engine = env
|
||||
.create_engine_with(
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
None,
|
||||
@@ -574,7 +574,7 @@ async fn test_alter_column_fulltext_options_with_format(flat_format: bool) {
|
||||
let engine = env
|
||||
.create_engine_with(
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
None,
|
||||
@@ -681,7 +681,7 @@ async fn test_alter_column_fulltext_options_with_format(flat_format: bool) {
|
||||
.reopen_engine(
|
||||
engine,
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
@@ -718,7 +718,7 @@ async fn test_alter_column_set_inverted_index_with_format(flat_format: bool) {
|
||||
let engine = env
|
||||
.create_engine_with(
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
None,
|
||||
@@ -816,7 +816,7 @@ async fn test_alter_column_set_inverted_index_with_format(flat_format: bool) {
|
||||
.reopen_engine(
|
||||
engine,
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
@@ -853,7 +853,7 @@ async fn test_alter_region_ttl_options_with_format(flat_format: bool) {
|
||||
let engine = env
|
||||
.create_engine_with(
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
None,
|
||||
@@ -916,7 +916,7 @@ async fn test_write_stall_on_altering_with_format(flat_format: bool) {
|
||||
let engine = env
|
||||
.create_engine_with(
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
None,
|
||||
@@ -994,7 +994,7 @@ async fn test_alter_region_sst_format_with_flush() {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: false,
|
||||
default_flat_format: false,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -1085,7 +1085,7 @@ async fn test_alter_region_sst_format_with_flush() {
|
||||
.reopen_engine(
|
||||
engine,
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: false,
|
||||
default_flat_format: false,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
@@ -1118,7 +1118,7 @@ async fn test_alter_region_sst_format_without_flush() {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: false,
|
||||
default_flat_format: false,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -1203,7 +1203,7 @@ async fn test_alter_region_sst_format_without_flush() {
|
||||
.reopen_engine(
|
||||
engine,
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: false,
|
||||
default_flat_format: false,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
@@ -1231,6 +1231,250 @@ async fn test_alter_region_sst_format_without_flush() {
|
||||
assert_eq!(expected_all_data, batches.pretty_print().unwrap());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_alter_region_sst_format_flat_to_pk_with_flush() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_flat_format: true,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
env.get_kv_backend(),
|
||||
)
|
||||
.await;
|
||||
|
||||
let column_schemas = rows_schema(&request);
|
||||
let table_dir = request.table_dir.clone();
|
||||
engine
|
||||
.handle_request(region_id, RegionRequest::Create(request))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Inserts some data with flat format
|
||||
let rows = Rows {
|
||||
schema: column_schemas.clone(),
|
||||
rows: build_rows(0, 3),
|
||||
};
|
||||
put_rows(&engine, region_id, rows).await;
|
||||
|
||||
// Flushes to create SST files with flat format
|
||||
flush_region(&engine, region_id, None).await;
|
||||
|
||||
let expected_data = "\
|
||||
+-------+---------+---------------------+
|
||||
| tag_0 | field_0 | ts |
|
||||
+-------+---------+---------------------+
|
||||
| 0 | 0.0 | 1970-01-01T00:00:00 |
|
||||
| 1 | 1.0 | 1970-01-01T00:00:01 |
|
||||
| 2 | 2.0 | 1970-01-01T00:00:02 |
|
||||
+-------+---------+---------------------+";
|
||||
let request = ScanRequest::default();
|
||||
let stream = engine.scan_to_stream(region_id, request).await.unwrap();
|
||||
let batches = RecordBatches::try_collect(stream).await.unwrap();
|
||||
assert_eq!(expected_data, batches.pretty_print().unwrap());
|
||||
|
||||
// Alters sst_format from flat to primary_key
|
||||
let alter_format_request = RegionAlterRequest {
|
||||
kind: AlterKind::SetRegionOptions {
|
||||
options: vec![SetRegionOption::Format("primary_key".to_string())],
|
||||
},
|
||||
};
|
||||
engine
|
||||
.handle_request(region_id, RegionRequest::Alter(alter_format_request))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Inserts more data after alter
|
||||
let rows = Rows {
|
||||
schema: column_schemas.clone(),
|
||||
rows: build_rows(3, 6),
|
||||
};
|
||||
put_rows(&engine, region_id, rows).await;
|
||||
|
||||
// Flushes to create SST files with primary_key format
|
||||
flush_region(&engine, region_id, None).await;
|
||||
|
||||
let expected_all_data = "\
|
||||
+-------+---------+---------------------+
|
||||
| tag_0 | field_0 | ts |
|
||||
+-------+---------+---------------------+
|
||||
| 0 | 0.0 | 1970-01-01T00:00:00 |
|
||||
| 1 | 1.0 | 1970-01-01T00:00:01 |
|
||||
| 2 | 2.0 | 1970-01-01T00:00:02 |
|
||||
| 3 | 3.0 | 1970-01-01T00:00:03 |
|
||||
| 4 | 4.0 | 1970-01-01T00:00:04 |
|
||||
| 5 | 5.0 | 1970-01-01T00:00:05 |
|
||||
+-------+---------+---------------------+";
|
||||
let request = ScanRequest::default();
|
||||
let stream = engine.scan_to_stream(region_id, request).await.unwrap();
|
||||
let batches = RecordBatches::try_collect(stream).await.unwrap();
|
||||
assert_eq!(expected_all_data, batches.pretty_print().unwrap());
|
||||
|
||||
// Reopens region to verify format persists
|
||||
let engine = env
|
||||
.reopen_engine(
|
||||
engine,
|
||||
MitoConfig {
|
||||
default_flat_format: false,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
.await;
|
||||
engine
|
||||
.handle_request(
|
||||
region_id,
|
||||
RegionRequest::Open(RegionOpenRequest {
|
||||
engine: String::new(),
|
||||
table_dir,
|
||||
path_type: PathType::Bare,
|
||||
options: HashMap::default(),
|
||||
skip_wal_replay: false,
|
||||
checkpoint: None,
|
||||
}),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let request = ScanRequest::default();
|
||||
let stream = engine.scan_to_stream(region_id, request).await.unwrap();
|
||||
let batches = RecordBatches::try_collect(stream).await.unwrap();
|
||||
assert_eq!(expected_all_data, batches.pretty_print().unwrap());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_alter_region_sst_format_flat_to_pk_without_flush() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_flat_format: true,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
env.get_kv_backend(),
|
||||
)
|
||||
.await;
|
||||
|
||||
let column_schemas = rows_schema(&request);
|
||||
let table_dir = request.table_dir.clone();
|
||||
engine
|
||||
.handle_request(region_id, RegionRequest::Create(request))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let check_format = |engine: &MitoEngine, expected: Option<FormatType>| {
|
||||
let current_format = engine
|
||||
.get_region(region_id)
|
||||
.unwrap()
|
||||
.version()
|
||||
.options
|
||||
.sst_format;
|
||||
assert_eq!(current_format, expected);
|
||||
};
|
||||
check_format(&engine, Some(FormatType::Flat));
|
||||
|
||||
// Inserts some data with flat format
|
||||
let rows = Rows {
|
||||
schema: column_schemas.clone(),
|
||||
rows: build_rows(0, 3),
|
||||
};
|
||||
put_rows(&engine, region_id, rows).await;
|
||||
|
||||
// Alters sst_format from flat to primary_key
|
||||
let alter_format_request = RegionAlterRequest {
|
||||
kind: AlterKind::SetRegionOptions {
|
||||
options: vec![SetRegionOption::Format("primary_key".to_string())],
|
||||
},
|
||||
};
|
||||
engine
|
||||
.handle_request(region_id, RegionRequest::Alter(alter_format_request))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
check_format(&engine, Some(FormatType::PrimaryKey));
|
||||
|
||||
// Inserts more data after alter
|
||||
let rows = Rows {
|
||||
schema: column_schemas.clone(),
|
||||
rows: build_rows(3, 6),
|
||||
};
|
||||
put_rows(&engine, region_id, rows).await;
|
||||
|
||||
let expected_all_data = "\
|
||||
+-------+---------+---------------------+
|
||||
| tag_0 | field_0 | ts |
|
||||
+-------+---------+---------------------+
|
||||
| 0 | 0.0 | 1970-01-01T00:00:00 |
|
||||
| 1 | 1.0 | 1970-01-01T00:00:01 |
|
||||
| 2 | 2.0 | 1970-01-01T00:00:02 |
|
||||
| 3 | 3.0 | 1970-01-01T00:00:03 |
|
||||
| 4 | 4.0 | 1970-01-01T00:00:04 |
|
||||
| 5 | 5.0 | 1970-01-01T00:00:05 |
|
||||
+-------+---------+---------------------+";
|
||||
let request = ScanRequest::default();
|
||||
let stream = engine.scan_to_stream(region_id, request).await.unwrap();
|
||||
let batches = RecordBatches::try_collect(stream).await.unwrap();
|
||||
assert_eq!(expected_all_data, batches.pretty_print().unwrap());
|
||||
|
||||
// Reopens region to verify format persists
|
||||
let engine = env
|
||||
.reopen_engine(
|
||||
engine,
|
||||
MitoConfig {
|
||||
default_flat_format: false,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
.await;
|
||||
engine
|
||||
.handle_request(
|
||||
region_id,
|
||||
RegionRequest::Open(RegionOpenRequest {
|
||||
engine: String::new(),
|
||||
table_dir,
|
||||
path_type: PathType::Bare,
|
||||
options: HashMap::default(),
|
||||
skip_wal_replay: false,
|
||||
checkpoint: None,
|
||||
}),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
check_format(&engine, Some(FormatType::PrimaryKey));
|
||||
|
||||
let request = ScanRequest::default();
|
||||
let stream = engine.scan_to_stream(region_id, request).await.unwrap();
|
||||
let batches = RecordBatches::try_collect(stream).await.unwrap();
|
||||
assert_eq!(expected_all_data, batches.pretty_print().unwrap());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_alter_region_append_mode_with_flush() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
|
||||
@@ -44,7 +44,7 @@ async fn test_append_mode_write_query_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -112,7 +112,7 @@ async fn test_append_mode_compaction_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -211,7 +211,7 @@ async fn test_append_mode_compaction_with_format(flat_format: bool) {
|
||||
.reopen_engine(
|
||||
engine,
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
@@ -238,7 +238,7 @@ async fn test_alter_append_mode_clears_merge_mode_with_format(flat_format: bool)
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -329,7 +329,7 @@ async fn test_alter_append_mode_clears_merge_mode_with_format(flat_format: bool)
|
||||
.reopen_engine(
|
||||
engine,
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
@@ -376,7 +376,7 @@ async fn test_put_single_range_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -474,7 +474,7 @@ async fn test_put_single_range_with_format(flat_format: bool) {
|
||||
.reopen_engine(
|
||||
engine,
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
|
||||
@@ -62,7 +62,7 @@ async fn test_apply_staging_manifest_invalid_region_state_with_format(flat_forma
|
||||
let mut env = TestEnv::with_prefix("invalid-region-state").await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -125,7 +125,7 @@ async fn test_apply_staging_manifest_mismatched_partition_expr_with_format(flat_
|
||||
let mut env = TestEnv::with_prefix("mismatched-partition-expr").await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -205,7 +205,7 @@ async fn test_apply_staging_manifest_success_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::with_prefix("success").await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -406,7 +406,7 @@ async fn test_apply_staging_manifest_invalid_files_to_add_with_format(flat_forma
|
||||
let mut env = TestEnv::with_prefix("invalid-files-to-add").await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -483,7 +483,7 @@ async fn test_apply_staging_manifest_change_edit_different_columns_fails_with_fo
|
||||
let mut env = TestEnv::with_prefix("apply-change-edit-different-columns").await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -599,7 +599,7 @@ async fn test_apply_staging_manifest_preserves_unflushed_memtable_with_format(fl
|
||||
let mut env = TestEnv::with_prefix("apply-preserve-memtable").await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -56,7 +56,7 @@ async fn test_engine_new_stop_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::with_prefix("engine-stop").await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -93,7 +93,7 @@ async fn test_write_to_region_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::with_prefix("write-to-region").await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -134,7 +134,7 @@ async fn test_region_replay_with_format(factory: Option<LogStoreFactory>, flat_f
|
||||
.with_log_store_factory(factory.clone());
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -169,7 +169,7 @@ async fn test_region_replay_with_format(factory: Option<LogStoreFactory>, flat_f
|
||||
.reopen_engine(
|
||||
engine,
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
@@ -234,7 +234,7 @@ async fn test_write_query_region_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -278,7 +278,7 @@ async fn test_different_order_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -339,7 +339,7 @@ async fn test_different_order_and_type_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -403,7 +403,7 @@ async fn test_put_delete_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -465,7 +465,7 @@ async fn test_delete_not_null_fields_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -524,7 +524,7 @@ async fn test_put_overwrite_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -594,7 +594,7 @@ async fn test_absent_and_invalid_columns_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -650,7 +650,7 @@ async fn test_region_usage_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::with_prefix("region_usage").await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -716,7 +716,7 @@ async fn test_engine_with_write_cache_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let path = env.data_home().to_str().unwrap().to_string();
|
||||
let mito_config = MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
}
|
||||
.enable_write_cache(path, ReadableSize::mb(512), None);
|
||||
@@ -765,7 +765,7 @@ async fn test_cache_null_primary_key_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
vector_cache_size: ReadableSize::mb(32),
|
||||
..Default::default()
|
||||
})
|
||||
@@ -896,7 +896,7 @@ async fn test_list_ssts_with_format(
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -1002,7 +1002,7 @@ async fn test_all_index_metas_list_all_types_with_format(flat_format: bool, expe
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -49,7 +49,7 @@ async fn test_batch_catchup_with_format(factory: Option<LogStoreFactory>, flat_f
|
||||
.with_log_store_factory(factory.clone());
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -135,7 +135,7 @@ async fn test_batch_catchup_with_format(factory: Option<LogStoreFactory>, flat_f
|
||||
.reopen_engine(
|
||||
engine,
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
@@ -216,7 +216,7 @@ async fn test_batch_catchup_err_with_format(factory: Option<LogStoreFactory>, fl
|
||||
.with_log_store_factory(factory.clone());
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -49,7 +49,7 @@ async fn test_batch_open_with_format(factory: Option<LogStoreFactory>, flat_form
|
||||
.with_log_store_factory(factory.clone());
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -157,7 +157,7 @@ async fn test_batch_open_with_format(factory: Option<LogStoreFactory>, flat_form
|
||||
.reopen_engine(
|
||||
engine,
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
@@ -193,7 +193,7 @@ async fn test_batch_open_err_with_format(factory: Option<LogStoreFactory>, flat_
|
||||
.with_log_store_factory(factory.clone());
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -35,7 +35,7 @@ async fn test_bump_committed_sequence_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -97,7 +97,7 @@ async fn test_bump_committed_sequence_with_format(flat_format: bool) {
|
||||
.reopen_engine(
|
||||
engine,
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
@@ -136,7 +136,7 @@ async fn test_bump_committed_sequence_with_format(flat_format: bool) {
|
||||
.reopen_engine(
|
||||
engine,
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
|
||||
@@ -701,7 +701,7 @@ async fn test_catchup_not_exist_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -29,7 +29,7 @@ async fn test_engine_close_region_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::with_prefix("close").await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -147,7 +147,7 @@ async fn test_compaction_region_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -223,7 +223,7 @@ async fn test_infer_compaction_time_window_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -374,7 +374,7 @@ async fn test_compaction_overlapping_files_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -445,7 +445,7 @@ async fn test_compaction_region_with_overlapping_with_format(flat_format: bool)
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -503,7 +503,7 @@ async fn test_compaction_region_with_overlapping_delete_all_with_format(flat_for
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -571,7 +571,7 @@ async fn test_readonly_during_compaction_with_format(flat_format: bool) {
|
||||
let engine = env
|
||||
.create_engine_with(
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
// Ensure there is only one background worker for purge task.
|
||||
max_background_purges: 1,
|
||||
..Default::default()
|
||||
@@ -730,7 +730,7 @@ async fn test_compaction_update_time_window_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -836,7 +836,7 @@ async fn test_change_region_compaction_window_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -938,7 +938,7 @@ async fn test_change_region_compaction_window_with_format(flat_format: bool) {
|
||||
.reopen_engine(
|
||||
engine,
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
@@ -981,7 +981,7 @@ async fn test_open_overwrite_compaction_window_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -1040,7 +1040,7 @@ async fn test_open_overwrite_compaction_window_with_format(flat_format: bool) {
|
||||
.reopen_engine(
|
||||
engine,
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
|
||||
@@ -41,7 +41,7 @@ async fn test_engine_copy_region_from_with_format(flat_format: bool, with_index:
|
||||
let mut env = TestEnv::with_prefix("copy-region-from").await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -156,7 +156,7 @@ async fn test_engine_copy_region_failure_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await.with_mock_layer(mock_layer);
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -283,7 +283,7 @@ async fn test_engine_copy_region_invalid_args_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -328,7 +328,7 @@ async fn test_engine_copy_region_unexpected_state_with_format(flat_format: bool)
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -36,7 +36,7 @@ async fn test_engine_create_new_region_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::with_prefix("new-region").await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -61,7 +61,7 @@ async fn test_engine_create_existing_region_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::with_prefix("create-existing").await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -91,7 +91,7 @@ async fn test_engine_create_close_create_region_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::with_prefix("create-close-create").await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -131,7 +131,7 @@ async fn test_engine_create_with_different_id_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -160,7 +160,7 @@ async fn test_engine_create_with_different_schema_with_format(flat_format: bool)
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -190,7 +190,7 @@ async fn test_engine_create_with_different_primary_key_with_format(flat_format:
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -220,7 +220,7 @@ async fn test_engine_create_with_options_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -253,7 +253,7 @@ async fn test_engine_create_with_custom_store_with_format(flat_format: bool) {
|
||||
let engine = env
|
||||
.create_engine_with_multiple_object_stores(
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
None,
|
||||
@@ -301,7 +301,7 @@ async fn test_engine_create_with_memtable_opts_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -353,7 +353,7 @@ async fn create_with_partition_expr_persists_manifest_with_format(flat_format: b
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -401,7 +401,7 @@ async fn test_engine_create_with_format_one_case(create_format: &str, default_fl
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: default_flat_format,
|
||||
default_flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -45,7 +45,7 @@ async fn test_engine_drop_region_with_format(flat_format: bool) {
|
||||
let engine = env
|
||||
.create_engine_with(
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
None,
|
||||
@@ -175,7 +175,7 @@ async fn test_engine_drop_region_for_custom_store_with_format(flat_format: bool)
|
||||
let engine = env
|
||||
.create_engine_with_multiple_object_stores(
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
None,
|
||||
|
||||
@@ -54,7 +54,7 @@ async fn test_edit_region_schedule_compaction_with_format(flat_format: bool) {
|
||||
let (tx, mut rx) = oneshot::channel();
|
||||
let config = MitoConfig {
|
||||
min_compaction_interval: Duration::from_secs(60 * 60),
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
};
|
||||
let time_provider = Arc::new(MockTimeProvider::new(current_time_millis()));
|
||||
@@ -154,7 +154,7 @@ async fn test_edit_region_fill_cache_with_format(flat_format: bool) {
|
||||
MitoConfig {
|
||||
// Write cache must be enabled to download the ingested SST file.
|
||||
enable_write_cache: true,
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
None,
|
||||
@@ -268,7 +268,7 @@ async fn test_edit_region_concurrently_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
// Suppress the compaction to not impede the speed of this kinda stress testing.
|
||||
min_compaction_interval: Duration::from_secs(60 * 60),
|
||||
..Default::default()
|
||||
|
||||
@@ -36,7 +36,7 @@ async fn test_scan_without_filtering_deleted_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -49,7 +49,7 @@ async fn test_manual_flush_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -112,7 +112,7 @@ async fn test_flush_engine_with_format(flat_format: bool) {
|
||||
let engine = env
|
||||
.create_engine_with(
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
Some(write_buffer_manager.clone()),
|
||||
@@ -191,7 +191,7 @@ async fn test_write_stall_with_format(flat_format: bool) {
|
||||
let engine = env
|
||||
.create_engine_with(
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
Some(write_buffer_manager.clone()),
|
||||
@@ -274,7 +274,7 @@ async fn test_flush_empty_with_format(flat_format: bool) {
|
||||
let engine = env
|
||||
.create_engine_with(
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
Some(write_buffer_manager.clone()),
|
||||
@@ -447,7 +447,7 @@ async fn test_auto_flush_engine_with_format(flat_format: bool) {
|
||||
.create_engine_with_time(
|
||||
MitoConfig {
|
||||
auto_flush_interval: Duration::from_secs(60 * 5),
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
Some(write_buffer_manager.clone()),
|
||||
@@ -523,7 +523,7 @@ async fn test_flush_workers_with_format(flat_format: bool) {
|
||||
.create_engine_with(
|
||||
MitoConfig {
|
||||
num_workers: 2,
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
Some(write_buffer_manager.clone()),
|
||||
|
||||
@@ -39,7 +39,7 @@ async fn test_merge_mode_write_query_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -107,7 +107,7 @@ async fn test_merge_mode_compaction_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -220,7 +220,7 @@ async fn test_merge_mode_compaction_with_format(flat_format: bool) {
|
||||
.reopen_engine(
|
||||
engine,
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
|
||||
@@ -48,7 +48,7 @@ async fn test_engine_open_empty_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::with_prefix("open-empty").await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -87,7 +87,7 @@ async fn test_engine_open_existing_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::with_prefix("open-exiting").await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -126,7 +126,7 @@ async fn test_engine_reopen_region_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::with_prefix("reopen-region").await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -153,7 +153,7 @@ async fn test_engine_open_readonly_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -207,7 +207,7 @@ async fn test_engine_region_open_with_options_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -260,7 +260,7 @@ async fn test_engine_region_open_with_custom_store_with_format(flat_format: bool
|
||||
let engine = env
|
||||
.create_engine_with_multiple_object_stores(
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
None,
|
||||
@@ -332,7 +332,7 @@ async fn test_open_region_skip_wal_replay_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -376,7 +376,7 @@ async fn test_open_region_skip_wal_replay_with_format(flat_format: bool) {
|
||||
.reopen_engine(
|
||||
engine,
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
@@ -415,7 +415,7 @@ async fn test_open_region_skip_wal_replay_with_format(flat_format: bool) {
|
||||
.reopen_engine(
|
||||
engine,
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
@@ -462,7 +462,7 @@ async fn test_open_region_wait_for_opening_region_ok_with_format(flat_format: bo
|
||||
let mut env = TestEnv::with_prefix("wait-for-opening-region-ok").await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -513,7 +513,7 @@ async fn test_open_region_wait_for_opening_region_err_with_format(flat_format: b
|
||||
let mut env = TestEnv::with_prefix("wait-for-opening-region-err").await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -569,7 +569,7 @@ async fn test_open_compaction_region() {
|
||||
async fn test_open_compaction_region_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let mut mito_config = MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
};
|
||||
mito_config
|
||||
|
||||
@@ -33,13 +33,11 @@ async fn scan_in_parallel(
|
||||
region_id: RegionId,
|
||||
table_dir: &str,
|
||||
parallelism: usize,
|
||||
channel_size: usize,
|
||||
flat_format: bool,
|
||||
) {
|
||||
let engine = env
|
||||
.open_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
parallel_scan_channel_size: channel_size,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -85,7 +83,7 @@ async fn test_parallel_scan_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -146,15 +144,13 @@ async fn test_parallel_scan_with_format(flat_format: bool) {
|
||||
|
||||
engine.stop().await.unwrap();
|
||||
|
||||
scan_in_parallel(&mut env, region_id, &table_dir, 0, 1, flat_format).await;
|
||||
scan_in_parallel(&mut env, region_id, &table_dir, 0, flat_format).await;
|
||||
|
||||
scan_in_parallel(&mut env, region_id, &table_dir, 1, 1, flat_format).await;
|
||||
scan_in_parallel(&mut env, region_id, &table_dir, 1, flat_format).await;
|
||||
|
||||
scan_in_parallel(&mut env, region_id, &table_dir, 2, 1, flat_format).await;
|
||||
scan_in_parallel(&mut env, region_id, &table_dir, 2, flat_format).await;
|
||||
|
||||
scan_in_parallel(&mut env, region_id, &table_dir, 2, 8, flat_format).await;
|
||||
scan_in_parallel(&mut env, region_id, &table_dir, 4, flat_format).await;
|
||||
|
||||
scan_in_parallel(&mut env, region_id, &table_dir, 4, 8, flat_format).await;
|
||||
|
||||
scan_in_parallel(&mut env, region_id, &table_dir, 8, 2, flat_format).await;
|
||||
scan_in_parallel(&mut env, region_id, &table_dir, 8, flat_format).await;
|
||||
}
|
||||
|
||||
@@ -58,7 +58,7 @@ async fn test_partition_filter_basic_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -84,7 +84,7 @@ async fn test_scan_projection_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -141,7 +141,7 @@ async fn test_scan_projection_without_primary_key_with_format(flat_format: bool)
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -32,7 +32,7 @@ async fn check_prune_row_groups(exprs: Vec<Expr>, expected: &str, flat_format: b
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -180,7 +180,7 @@ async fn test_prune_memtable_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -264,7 +264,7 @@ async fn test_prune_memtable_complex_expr_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -327,7 +327,7 @@ async fn test_mem_range_prune_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -392,7 +392,7 @@ async fn test_scan_filter_field_after_delete_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -37,7 +37,7 @@ async fn test_remap_manifests_invalid_partition_expr_with_format(flat_format: bo
|
||||
let mut env = TestEnv::with_prefix("invalid-partition-expr").await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -83,7 +83,7 @@ async fn test_remap_manifests_invalid_region_state_with_format(flat_format: bool
|
||||
let mut env = TestEnv::with_prefix("invalid-region-state").await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -123,7 +123,7 @@ async fn test_remap_manifests_invalid_input_regions_with_format(flat_format: boo
|
||||
let mut env = TestEnv::with_prefix("invalid-input-regions").await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -166,7 +166,7 @@ async fn test_remap_manifests_success_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::with_prefix("engine-stop").await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -12,11 +12,15 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use api::v1::Rows;
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_recordbatch::RecordBatches;
|
||||
use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
|
||||
use datatypes::arrow::array::AsArray;
|
||||
use datatypes::arrow::datatypes::{Float64Type, TimestampMillisecondType};
|
||||
use futures::TryStreamExt;
|
||||
use store_api::region_engine::{PrepareRequest, RegionEngine, RegionScanner};
|
||||
use store_api::region_request::RegionRequest;
|
||||
@@ -37,7 +41,7 @@ async fn test_scan_with_min_sst_sequence_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::with_prefix("test_scan_with_min_sst_sequence").await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -172,7 +176,7 @@ async fn test_max_concurrent_scan_files() {
|
||||
async fn test_max_concurrent_scan_files_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::with_prefix("test_max_concurrent_scan_files").await;
|
||||
let config = MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
max_concurrent_scan_files: 2,
|
||||
..Default::default()
|
||||
};
|
||||
@@ -222,11 +226,16 @@ async fn test_max_concurrent_scan_files_with_format(flat_format: bool) {
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_series_scan_primarykey() {
|
||||
async fn test_series_scan() {
|
||||
test_series_scan_with_format(false).await;
|
||||
test_series_scan_with_format(true).await;
|
||||
}
|
||||
|
||||
async fn test_series_scan_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::with_prefix("test_series_scan").await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: false,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -295,10 +304,27 @@ async fn test_series_scan_primarykey() {
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let actual_rows = collect_partition_rows_round_robin(&scanner, 3).await;
|
||||
|
||||
let mut expected_rows = Vec::new();
|
||||
for value in [0_i64, 1, 2, 3, 4, 5, 3600, 3601, 3602, 7200, 7201, 7202] {
|
||||
expected_rows.push((value.to_string(), value as f64, value * 1000));
|
||||
}
|
||||
expected_rows.sort_by(|a, b| a.0.cmp(&b.0).then(a.2.cmp(&b.2)));
|
||||
|
||||
assert_eq!(expected_rows, actual_rows);
|
||||
}
|
||||
|
||||
/// Scans all partitions in round-robin fashion and returns rows sorted by (tag, ts).
|
||||
/// Also asserts that each series appears in only one partition.
|
||||
async fn collect_partition_rows_round_robin(
|
||||
scanner: &dyn RegionScanner,
|
||||
num_partitions: usize,
|
||||
) -> Vec<(String, f64, i64)> {
|
||||
let metrics_set = ExecutionPlanMetricsSet::default();
|
||||
|
||||
let mut partition_batches = vec![vec![]; 3];
|
||||
let mut streams: Vec<_> = (0..3)
|
||||
let mut partition_batches = vec![vec![]; num_partitions];
|
||||
let mut streams: Vec<_> = (0..num_partitions)
|
||||
.map(|partition| {
|
||||
let stream = scanner
|
||||
.scan_partition(&Default::default(), &metrics_set, partition)
|
||||
@@ -309,11 +335,11 @@ async fn test_series_scan_primarykey() {
|
||||
let mut num_done = 0;
|
||||
let mut schema = None;
|
||||
// Pull streams in round-robin fashion to get the consistent output from the sender.
|
||||
while num_done < 3 {
|
||||
while num_done < num_partitions {
|
||||
if schema.is_none() {
|
||||
schema = Some(streams[0].as_ref().unwrap().schema().clone());
|
||||
}
|
||||
for i in 0..3 {
|
||||
for i in 0..num_partitions {
|
||||
let Some(mut stream) = streams[i].take() else {
|
||||
continue;
|
||||
};
|
||||
@@ -326,189 +352,54 @@ async fn test_series_scan_primarykey() {
|
||||
}
|
||||
}
|
||||
|
||||
let mut check_result = |expected| {
|
||||
let batches =
|
||||
RecordBatches::try_new(schema.clone().unwrap(), partition_batches.remove(0)).unwrap();
|
||||
assert_eq!(expected, batches.pretty_print().unwrap());
|
||||
};
|
||||
|
||||
// Output series order is 0, 1, 2, 3, 3600, 3601, 3602, 4, 5, 7200, 7201, 7202
|
||||
let expected = "\
|
||||
+-------+---------+---------------------+
|
||||
| tag_0 | field_0 | ts |
|
||||
+-------+---------+---------------------+
|
||||
| 0 | 0.0 | 1970-01-01T00:00:00 |
|
||||
| 3 | 3.0 | 1970-01-01T00:00:03 |
|
||||
| 3602 | 3602.0 | 1970-01-01T01:00:02 |
|
||||
| 7200 | 7200.0 | 1970-01-01T02:00:00 |
|
||||
+-------+---------+---------------------+";
|
||||
check_result(expected);
|
||||
|
||||
let expected = "\
|
||||
+-------+---------+---------------------+
|
||||
| tag_0 | field_0 | ts |
|
||||
+-------+---------+---------------------+
|
||||
| 1 | 1.0 | 1970-01-01T00:00:01 |
|
||||
| 3600 | 3600.0 | 1970-01-01T01:00:00 |
|
||||
| 4 | 4.0 | 1970-01-01T00:00:04 |
|
||||
| 7201 | 7201.0 | 1970-01-01T02:00:01 |
|
||||
+-------+---------+---------------------+";
|
||||
check_result(expected);
|
||||
|
||||
let expected = "\
|
||||
+-------+---------+---------------------+
|
||||
| tag_0 | field_0 | ts |
|
||||
+-------+---------+---------------------+
|
||||
| 2 | 2.0 | 1970-01-01T00:00:02 |
|
||||
| 3601 | 3601.0 | 1970-01-01T01:00:01 |
|
||||
| 5 | 5.0 | 1970-01-01T00:00:05 |
|
||||
| 7202 | 7202.0 | 1970-01-01T02:00:02 |
|
||||
+-------+---------+---------------------+";
|
||||
check_result(expected);
|
||||
let schema = schema.unwrap();
|
||||
collect_and_assert_partition_rows(schema, partition_batches)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_series_scan_flat() {
|
||||
let mut env = TestEnv::with_prefix("test_series_scan").await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: true,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
/// Collects rows sorted by (tag, ts) from partition batches.
|
||||
/// Also asserts that each series appears in only one partition.
|
||||
fn collect_and_assert_partition_rows(
|
||||
schema: datatypes::schema::SchemaRef,
|
||||
partition_batches: Vec<Vec<common_recordbatch::RecordBatch>>,
|
||||
) -> Vec<(String, f64, i64)> {
|
||||
let mut series_to_partition = BTreeMap::new();
|
||||
let mut actual_rows = Vec::new();
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
let request = CreateRequestBuilder::new()
|
||||
.insert_option("compaction.type", "twcs")
|
||||
.insert_option("compaction.twcs.time_window", "1h")
|
||||
.build();
|
||||
let column_schemas = test_util::rows_schema(&request);
|
||||
for (partition, batches) in partition_batches.into_iter().enumerate() {
|
||||
let batches = RecordBatches::try_new(schema.clone(), batches).unwrap();
|
||||
let mut partition_series = Vec::new();
|
||||
|
||||
engine
|
||||
.handle_request(region_id, RegionRequest::Create(request))
|
||||
.await
|
||||
.unwrap();
|
||||
for batch in batches.iter() {
|
||||
let tags = batch.column_by_name("tag_0").unwrap().as_string::<i32>();
|
||||
let fields = batch
|
||||
.column_by_name("field_0")
|
||||
.unwrap()
|
||||
.as_primitive::<Float64Type>();
|
||||
let ts = batch
|
||||
.column_by_name("ts")
|
||||
.unwrap()
|
||||
.as_primitive::<TimestampMillisecondType>();
|
||||
|
||||
let put_flush_rows = async |start, end| {
|
||||
let rows = Rows {
|
||||
schema: column_schemas.clone(),
|
||||
rows: test_util::build_rows(start, end),
|
||||
};
|
||||
test_util::put_rows(&engine, region_id, rows).await;
|
||||
test_util::flush_region(&engine, region_id, None).await;
|
||||
};
|
||||
// generates 3 SST files
|
||||
put_flush_rows(0, 3).await;
|
||||
put_flush_rows(2, 6).await;
|
||||
put_flush_rows(3600, 3603).await;
|
||||
// Put to memtable.
|
||||
let rows = Rows {
|
||||
schema: column_schemas.clone(),
|
||||
rows: test_util::build_rows(7200, 7203),
|
||||
};
|
||||
test_util::put_rows(&engine, region_id, rows).await;
|
||||
|
||||
let request = ScanRequest {
|
||||
distribution: Some(TimeSeriesDistribution::PerSeries),
|
||||
..Default::default()
|
||||
};
|
||||
let scanner = engine.scanner(region_id, request).await.unwrap();
|
||||
let Scanner::Series(mut scanner) = scanner else {
|
||||
panic!("Scanner should be series scan");
|
||||
};
|
||||
// 3 partition ranges for 3 time window.
|
||||
assert_eq!(
|
||||
3,
|
||||
scanner.properties().partitions[0].len(),
|
||||
"unexpected ranges: {:?}",
|
||||
scanner.properties().partitions
|
||||
);
|
||||
let raw_ranges: Vec<_> = scanner
|
||||
.properties()
|
||||
.partitions
|
||||
.iter()
|
||||
.flatten()
|
||||
.cloned()
|
||||
.collect();
|
||||
let mut new_ranges = Vec::with_capacity(3);
|
||||
for range in raw_ranges {
|
||||
new_ranges.push(vec![range]);
|
||||
}
|
||||
scanner
|
||||
.prepare(PrepareRequest {
|
||||
ranges: Some(new_ranges),
|
||||
..Default::default()
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let metrics_set = ExecutionPlanMetricsSet::default();
|
||||
|
||||
let mut partition_batches = vec![vec![]; 3];
|
||||
let mut streams: Vec<_> = (0..3)
|
||||
.map(|partition| {
|
||||
let stream = scanner
|
||||
.scan_partition(&Default::default(), &metrics_set, partition)
|
||||
.unwrap();
|
||||
Some(stream)
|
||||
})
|
||||
.collect();
|
||||
let mut num_done = 0;
|
||||
let mut schema = None;
|
||||
// Pull streams in round-robin fashion to get the consistent output from the sender.
|
||||
while num_done < 3 {
|
||||
if schema.is_none() {
|
||||
schema = Some(streams[0].as_ref().unwrap().schema().clone());
|
||||
for row in 0..batch.num_rows() {
|
||||
let tag = tags.value(row).to_string();
|
||||
let field = fields.value(row);
|
||||
let ts = ts.value(row);
|
||||
partition_series.push(tag.clone());
|
||||
actual_rows.push((tag, field, ts));
|
||||
}
|
||||
}
|
||||
for i in 0..3 {
|
||||
let Some(mut stream) = streams[i].take() else {
|
||||
continue;
|
||||
};
|
||||
let Some(rb) = stream.try_next().await.unwrap() else {
|
||||
num_done += 1;
|
||||
continue;
|
||||
};
|
||||
partition_batches[i].push(rb);
|
||||
streams[i] = Some(stream);
|
||||
|
||||
partition_series.sort();
|
||||
partition_series.dedup();
|
||||
for tag in partition_series {
|
||||
let prev = series_to_partition.insert(tag.clone(), partition);
|
||||
assert_eq!(
|
||||
None, prev,
|
||||
"series {tag} appears in multiple partitions: {prev:?} and {partition}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let mut check_result = |expected| {
|
||||
let batches =
|
||||
RecordBatches::try_new(schema.clone().unwrap(), partition_batches.remove(0)).unwrap();
|
||||
assert_eq!(expected, batches.pretty_print().unwrap());
|
||||
};
|
||||
|
||||
// Output series order is 0, 1, 2, 3, 3600, 3601, 3602, 4, 5, 7200, 7201, 7202
|
||||
let expected = "\
|
||||
+-------+---------+---------------------+
|
||||
| tag_0 | field_0 | ts |
|
||||
+-------+---------+---------------------+
|
||||
| 0 | 0.0 | 1970-01-01T00:00:00 |
|
||||
| 1 | 1.0 | 1970-01-01T00:00:01 |
|
||||
| 2 | 2.0 | 1970-01-01T00:00:02 |
|
||||
| 3 | 3.0 | 1970-01-01T00:00:03 |
|
||||
| 7200 | 7200.0 | 1970-01-01T02:00:00 |
|
||||
| 7201 | 7201.0 | 1970-01-01T02:00:01 |
|
||||
| 7202 | 7202.0 | 1970-01-01T02:00:02 |
|
||||
+-------+---------+---------------------+";
|
||||
check_result(expected);
|
||||
|
||||
let expected = "\
|
||||
+-------+---------+---------------------+
|
||||
| tag_0 | field_0 | ts |
|
||||
+-------+---------+---------------------+
|
||||
| 3600 | 3600.0 | 1970-01-01T01:00:00 |
|
||||
| 3601 | 3601.0 | 1970-01-01T01:00:01 |
|
||||
| 3602 | 3602.0 | 1970-01-01T01:00:02 |
|
||||
+-------+---------+---------------------+";
|
||||
check_result(expected);
|
||||
|
||||
let expected = "\
|
||||
+-------+---------+---------------------+
|
||||
| tag_0 | field_0 | ts |
|
||||
+-------+---------+---------------------+
|
||||
| 4 | 4.0 | 1970-01-01T00:00:04 |
|
||||
| 5 | 5.0 | 1970-01-01T00:00:05 |
|
||||
+-------+---------+---------------------+";
|
||||
check_result(expected);
|
||||
actual_rows.sort_by(|a, b| a.0.cmp(&b.0).then(a.2.cmp(&b.2)));
|
||||
actual_rows
|
||||
}
|
||||
|
||||
@@ -70,7 +70,7 @@ async fn test_set_role_state_gracefully_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -141,7 +141,7 @@ async fn test_set_role_state_gracefully_not_exist_with_format(flat_format: bool)
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -166,7 +166,7 @@ async fn test_write_downgrading_region_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::with_prefix("write-to-downgrading-region").await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -220,7 +220,7 @@ async fn test_unified_state_transitions_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -329,7 +329,7 @@ async fn test_restricted_state_transitions_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -72,7 +72,7 @@ async fn test_staging_state_integration_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -130,7 +130,7 @@ async fn test_staging_blocks_alter_operations_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -171,7 +171,7 @@ async fn test_staging_blocks_truncate_operations_with_format(flat_format: bool)
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -308,7 +308,7 @@ async fn test_staging_write_partition_expr_version_with_format(flat_format: bool
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -505,7 +505,7 @@ async fn test_staging_manifest_directory_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -657,7 +657,7 @@ async fn test_staging_exit_success_with_manifests_with_format(flat_format: bool)
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -883,7 +883,7 @@ async fn test_enter_staging_writes_partition_expr_change_action_with_format(flat
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -947,7 +947,7 @@ async fn test_staging_exit_conflict_partition_expr_change_and_change_with_format
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -1032,7 +1032,7 @@ async fn test_write_stall_on_enter_staging_with_format(flat_format: bool) {
|
||||
let engine = env
|
||||
.create_engine_with(
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
None,
|
||||
@@ -1156,7 +1156,7 @@ async fn test_enter_staging_error(env: &mut TestEnv, flat_format: bool) {
|
||||
let partition_expr = default_partition_expr();
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -80,7 +80,7 @@ async fn test_sync_after_flush_region_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -112,7 +112,7 @@ async fn test_sync_after_flush_region_with_format(flat_format: bool) {
|
||||
// Open the region on the follower engine
|
||||
let follower_engine = env
|
||||
.create_follower_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -189,7 +189,7 @@ async fn test_sync_after_alter_region_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::new().await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -224,7 +224,7 @@ async fn test_sync_after_alter_region_with_format(flat_format: bool) {
|
||||
// Open the region on the follower engine
|
||||
let follower_engine = env
|
||||
.create_follower_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
|
||||
@@ -41,7 +41,7 @@ async fn test_engine_truncate_region_basic_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::with_prefix("truncate-basic").await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -104,7 +104,7 @@ async fn test_engine_put_data_after_truncate_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::with_prefix("truncate-put").await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -180,7 +180,7 @@ async fn test_engine_truncate_after_flush_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::with_prefix("truncate-flush").await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -270,7 +270,7 @@ async fn test_engine_truncate_reopen_with_format(flat_format: bool) {
|
||||
let mut env = TestEnv::with_prefix("truncate-reopen").await;
|
||||
let engine = env
|
||||
.create_engine(MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
})
|
||||
.await;
|
||||
@@ -310,7 +310,7 @@ async fn test_engine_truncate_reopen_with_format(flat_format: bool) {
|
||||
.reopen_engine(
|
||||
engine,
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
@@ -355,7 +355,7 @@ async fn test_engine_truncate_during_flush_with_format(flat_format: bool) {
|
||||
let engine = env
|
||||
.create_engine_with(
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
Some(write_buffer_manager),
|
||||
@@ -436,7 +436,7 @@ async fn test_engine_truncate_during_flush_with_format(flat_format: bool) {
|
||||
.reopen_engine(
|
||||
engine,
|
||||
MitoConfig {
|
||||
default_experimental_flat_format: flat_format,
|
||||
default_flat_format: flat_format,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
|
||||
@@ -634,7 +634,7 @@ impl RegionFlushTask {
|
||||
.options
|
||||
.sst_format
|
||||
.map(|f| f == FormatType::Flat)
|
||||
.unwrap_or(self.engine_config.default_experimental_flat_format);
|
||||
.unwrap_or(self.engine_config.default_flat_format);
|
||||
SstWriteRequest {
|
||||
op_type: OperationType::Flush,
|
||||
metadata: version.metadata.clone(),
|
||||
|
||||
@@ -421,7 +421,7 @@ impl MemtableBuilderProvider {
|
||||
let flat_format = options
|
||||
.sst_format
|
||||
.map(|format| format == FormatType::Flat)
|
||||
.unwrap_or(self.config.default_experimental_flat_format);
|
||||
.unwrap_or(self.config.default_flat_format);
|
||||
if flat_format {
|
||||
if options.memtable.is_some() {
|
||||
common_telemetry::info!(
|
||||
|
||||
@@ -175,6 +175,7 @@ impl Batch {
|
||||
}
|
||||
|
||||
/// Create an empty [`Batch`].
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn empty() -> Self {
|
||||
Self {
|
||||
primary_key: vec![],
|
||||
@@ -677,6 +678,7 @@ impl Batch {
|
||||
|
||||
/// Checks the batch is monotonic by timestamps.
|
||||
#[cfg(debug_assertions)]
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn check_monotonic(&self) -> Result<(), String> {
|
||||
use std::cmp::Ordering;
|
||||
if self.timestamps_native().is_none() {
|
||||
@@ -719,6 +721,7 @@ impl Batch {
|
||||
|
||||
/// Returns Ok if the given batch is behind the current batch.
|
||||
#[cfg(debug_assertions)]
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn check_next_batch(&self, other: &Batch) -> Result<(), String> {
|
||||
// Checks the primary key
|
||||
if self.primary_key() < other.primary_key() {
|
||||
@@ -798,6 +801,7 @@ impl Batch {
|
||||
/// A struct to check the batch is monotonic.
|
||||
#[cfg(debug_assertions)]
|
||||
#[derive(Default)]
|
||||
#[allow(dead_code)]
|
||||
pub(crate) struct BatchChecker {
|
||||
last_batch: Option<Batch>,
|
||||
start: Option<Timestamp>,
|
||||
@@ -805,6 +809,7 @@ pub(crate) struct BatchChecker {
|
||||
}
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
#[allow(dead_code)]
|
||||
impl BatchChecker {
|
||||
/// Attaches the given start timestamp to the checker.
|
||||
pub(crate) fn with_start(mut self, start: Option<Timestamp>) -> Self {
|
||||
|
||||
@@ -98,6 +98,7 @@ pub(crate) enum CompatBatch {
|
||||
|
||||
impl CompatBatch {
|
||||
/// Returns the inner primary key batch adapter if this is a PrimaryKey format.
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn as_primary_key(&self) -> Option<&PrimaryKeyCompatBatch> {
|
||||
match self {
|
||||
CompatBatch::PrimaryKey(batch) => Some(batch),
|
||||
@@ -980,7 +981,6 @@ mod tests {
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::schema::ColumnSchema;
|
||||
use datatypes::value::ValueRef;
|
||||
use datatypes::vectors::{Int64Vector, TimestampMillisecondVector, UInt8Vector, UInt64Vector};
|
||||
use mito_codec::row_converter::{
|
||||
DensePrimaryKeyCodec, PrimaryKeyCodecExt, SparsePrimaryKeyCodec,
|
||||
};
|
||||
@@ -992,7 +992,6 @@ mod tests {
|
||||
use crate::read::flat_projection::FlatProjectionMapper;
|
||||
use crate::sst::parquet::flat_format::FlatReadFormat;
|
||||
use crate::sst::{FlatSchemaOptions, to_flat_sst_arrow_schema};
|
||||
use crate::test_util::{VecBatchReader, check_reader_result};
|
||||
|
||||
/// Creates a new [RegionMetadata].
|
||||
fn new_metadata(
|
||||
@@ -1053,44 +1052,6 @@ mod tests {
|
||||
buffer
|
||||
}
|
||||
|
||||
/// Creates a batch for specific primary `key`.
|
||||
///
|
||||
/// `fields`: [(column_id of the field, is null)]
|
||||
fn new_batch(
|
||||
primary_key: &[u8],
|
||||
fields: &[(ColumnId, bool)],
|
||||
start_ts: i64,
|
||||
num_rows: usize,
|
||||
) -> Batch {
|
||||
let timestamps = Arc::new(TimestampMillisecondVector::from_values(
|
||||
start_ts..start_ts + num_rows as i64,
|
||||
));
|
||||
let sequences = Arc::new(UInt64Vector::from_values(0..num_rows as u64));
|
||||
let op_types = Arc::new(UInt8Vector::from_vec(vec![OpType::Put as u8; num_rows]));
|
||||
let field_columns = fields
|
||||
.iter()
|
||||
.map(|(id, is_null)| {
|
||||
let data = if *is_null {
|
||||
Arc::new(Int64Vector::from(vec![None; num_rows]))
|
||||
} else {
|
||||
Arc::new(Int64Vector::from_vec(vec![*id as i64; num_rows]))
|
||||
};
|
||||
BatchColumn {
|
||||
column_id: *id,
|
||||
data,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
Batch::new(
|
||||
primary_key.to_vec(),
|
||||
timestamps,
|
||||
sequences,
|
||||
op_types,
|
||||
field_columns,
|
||||
)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_pk_len() {
|
||||
let reader_meta = new_metadata(
|
||||
@@ -1213,311 +1174,6 @@ mod tests {
|
||||
assert!(may_compat_fields(&mapper, &reader_meta).unwrap().is_none())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_compat_reader() {
|
||||
let reader_meta = Arc::new(new_metadata(
|
||||
&[
|
||||
(
|
||||
0,
|
||||
SemanticType::Timestamp,
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
),
|
||||
(1, SemanticType::Tag, ConcreteDataType::string_datatype()),
|
||||
(2, SemanticType::Field, ConcreteDataType::int64_datatype()),
|
||||
],
|
||||
&[1],
|
||||
));
|
||||
let expect_meta = Arc::new(new_metadata(
|
||||
&[
|
||||
(
|
||||
0,
|
||||
SemanticType::Timestamp,
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
),
|
||||
(1, SemanticType::Tag, ConcreteDataType::string_datatype()),
|
||||
(2, SemanticType::Field, ConcreteDataType::int64_datatype()),
|
||||
(3, SemanticType::Tag, ConcreteDataType::string_datatype()),
|
||||
(4, SemanticType::Field, ConcreteDataType::int64_datatype()),
|
||||
],
|
||||
&[1, 3],
|
||||
));
|
||||
let mapper = ProjectionMapper::all(&expect_meta, false).unwrap();
|
||||
let k1 = encode_key(&[Some("a")]);
|
||||
let k2 = encode_key(&[Some("b")]);
|
||||
let source_reader = VecBatchReader::new(&[
|
||||
new_batch(&k1, &[(2, false)], 1000, 3),
|
||||
new_batch(&k2, &[(2, false)], 1000, 3),
|
||||
]);
|
||||
|
||||
let mut compat_reader = CompatReader::new(&mapper, reader_meta, source_reader).unwrap();
|
||||
let k1 = encode_key(&[Some("a"), None]);
|
||||
let k2 = encode_key(&[Some("b"), None]);
|
||||
check_reader_result(
|
||||
&mut compat_reader,
|
||||
&[
|
||||
new_batch(&k1, &[(2, false), (4, true)], 1000, 3),
|
||||
new_batch(&k2, &[(2, false), (4, true)], 1000, 3),
|
||||
],
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_compat_reader_different_order() {
|
||||
let reader_meta = Arc::new(new_metadata(
|
||||
&[
|
||||
(
|
||||
0,
|
||||
SemanticType::Timestamp,
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
),
|
||||
(1, SemanticType::Tag, ConcreteDataType::string_datatype()),
|
||||
(2, SemanticType::Field, ConcreteDataType::int64_datatype()),
|
||||
],
|
||||
&[1],
|
||||
));
|
||||
let expect_meta = Arc::new(new_metadata(
|
||||
&[
|
||||
(
|
||||
0,
|
||||
SemanticType::Timestamp,
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
),
|
||||
(1, SemanticType::Tag, ConcreteDataType::string_datatype()),
|
||||
(3, SemanticType::Field, ConcreteDataType::int64_datatype()),
|
||||
(2, SemanticType::Field, ConcreteDataType::int64_datatype()),
|
||||
(4, SemanticType::Field, ConcreteDataType::int64_datatype()),
|
||||
],
|
||||
&[1],
|
||||
));
|
||||
let mapper = ProjectionMapper::all(&expect_meta, false).unwrap();
|
||||
let k1 = encode_key(&[Some("a")]);
|
||||
let k2 = encode_key(&[Some("b")]);
|
||||
let source_reader = VecBatchReader::new(&[
|
||||
new_batch(&k1, &[(2, false)], 1000, 3),
|
||||
new_batch(&k2, &[(2, false)], 1000, 3),
|
||||
]);
|
||||
|
||||
let mut compat_reader = CompatReader::new(&mapper, reader_meta, source_reader).unwrap();
|
||||
check_reader_result(
|
||||
&mut compat_reader,
|
||||
&[
|
||||
new_batch(&k1, &[(3, true), (2, false), (4, true)], 1000, 3),
|
||||
new_batch(&k2, &[(3, true), (2, false), (4, true)], 1000, 3),
|
||||
],
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_compat_reader_different_types() {
|
||||
let actual_meta = Arc::new(new_metadata(
|
||||
&[
|
||||
(
|
||||
0,
|
||||
SemanticType::Timestamp,
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
),
|
||||
(1, SemanticType::Tag, ConcreteDataType::string_datatype()),
|
||||
(2, SemanticType::Field, ConcreteDataType::int64_datatype()),
|
||||
],
|
||||
&[1],
|
||||
));
|
||||
let expect_meta = Arc::new(new_metadata(
|
||||
&[
|
||||
(
|
||||
0,
|
||||
SemanticType::Timestamp,
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
),
|
||||
(1, SemanticType::Tag, ConcreteDataType::string_datatype()),
|
||||
(2, SemanticType::Field, ConcreteDataType::string_datatype()),
|
||||
],
|
||||
&[1],
|
||||
));
|
||||
let mapper = ProjectionMapper::all(&expect_meta, false).unwrap();
|
||||
let k1 = encode_key(&[Some("a")]);
|
||||
let k2 = encode_key(&[Some("b")]);
|
||||
let source_reader = VecBatchReader::new(&[
|
||||
new_batch(&k1, &[(2, false)], 1000, 3),
|
||||
new_batch(&k2, &[(2, false)], 1000, 3),
|
||||
]);
|
||||
|
||||
let fn_batch_cast = |batch: Batch| {
|
||||
let mut new_fields = batch.fields.clone();
|
||||
new_fields[0].data = new_fields[0]
|
||||
.data
|
||||
.cast(&ConcreteDataType::string_datatype())
|
||||
.unwrap();
|
||||
|
||||
batch.with_fields(new_fields).unwrap()
|
||||
};
|
||||
let mut compat_reader = CompatReader::new(&mapper, actual_meta, source_reader).unwrap();
|
||||
check_reader_result(
|
||||
&mut compat_reader,
|
||||
&[
|
||||
fn_batch_cast(new_batch(&k1, &[(2, false)], 1000, 3)),
|
||||
fn_batch_cast(new_batch(&k2, &[(2, false)], 1000, 3)),
|
||||
],
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_compat_reader_projection() {
|
||||
let reader_meta = Arc::new(new_metadata(
|
||||
&[
|
||||
(
|
||||
0,
|
||||
SemanticType::Timestamp,
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
),
|
||||
(1, SemanticType::Tag, ConcreteDataType::string_datatype()),
|
||||
(2, SemanticType::Field, ConcreteDataType::int64_datatype()),
|
||||
],
|
||||
&[1],
|
||||
));
|
||||
let expect_meta = Arc::new(new_metadata(
|
||||
&[
|
||||
(
|
||||
0,
|
||||
SemanticType::Timestamp,
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
),
|
||||
(1, SemanticType::Tag, ConcreteDataType::string_datatype()),
|
||||
(3, SemanticType::Field, ConcreteDataType::int64_datatype()),
|
||||
(2, SemanticType::Field, ConcreteDataType::int64_datatype()),
|
||||
(4, SemanticType::Field, ConcreteDataType::int64_datatype()),
|
||||
],
|
||||
&[1],
|
||||
));
|
||||
// tag_1, field_2, field_3
|
||||
let mapper = ProjectionMapper::new(&expect_meta, [1, 3, 2].into_iter(), false).unwrap();
|
||||
let k1 = encode_key(&[Some("a")]);
|
||||
let source_reader = VecBatchReader::new(&[new_batch(&k1, &[(2, false)], 1000, 3)]);
|
||||
|
||||
let mut compat_reader =
|
||||
CompatReader::new(&mapper, reader_meta.clone(), source_reader).unwrap();
|
||||
check_reader_result(
|
||||
&mut compat_reader,
|
||||
&[new_batch(&k1, &[(3, true), (2, false)], 1000, 3)],
|
||||
)
|
||||
.await;
|
||||
|
||||
// tag_1, field_4, field_3
|
||||
let mapper = ProjectionMapper::new(&expect_meta, [1, 4, 2].into_iter(), false).unwrap();
|
||||
let k1 = encode_key(&[Some("a")]);
|
||||
let source_reader = VecBatchReader::new(&[new_batch(&k1, &[], 1000, 3)]);
|
||||
|
||||
let mut compat_reader = CompatReader::new(&mapper, reader_meta, source_reader).unwrap();
|
||||
check_reader_result(
|
||||
&mut compat_reader,
|
||||
&[new_batch(&k1, &[(3, true), (4, true)], 1000, 3)],
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_compat_reader_projection_read_superset() {
|
||||
let reader_meta = Arc::new(new_metadata(
|
||||
&[
|
||||
(
|
||||
0,
|
||||
SemanticType::Timestamp,
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
),
|
||||
(1, SemanticType::Tag, ConcreteDataType::string_datatype()),
|
||||
(2, SemanticType::Field, ConcreteDataType::int64_datatype()),
|
||||
],
|
||||
&[1],
|
||||
));
|
||||
let expect_meta = Arc::new(new_metadata(
|
||||
&[
|
||||
(
|
||||
0,
|
||||
SemanticType::Timestamp,
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
),
|
||||
(1, SemanticType::Tag, ConcreteDataType::string_datatype()),
|
||||
(3, SemanticType::Field, ConcreteDataType::int64_datatype()),
|
||||
(2, SemanticType::Field, ConcreteDataType::int64_datatype()),
|
||||
(4, SemanticType::Field, ConcreteDataType::int64_datatype()),
|
||||
],
|
||||
&[1],
|
||||
));
|
||||
// Output: tag_1, field_3, field_2. Read also includes field_4.
|
||||
let mapper = ProjectionMapper::new_with_read_columns(
|
||||
&expect_meta,
|
||||
[1, 3, 2].into_iter(),
|
||||
false,
|
||||
vec![1, 3, 2, 4],
|
||||
)
|
||||
.unwrap();
|
||||
let k1 = encode_key(&[Some("a")]);
|
||||
let source_reader = VecBatchReader::new(&[new_batch(&k1, &[(2, false)], 1000, 3)]);
|
||||
|
||||
let mut compat_reader = CompatReader::new(&mapper, reader_meta, source_reader).unwrap();
|
||||
check_reader_result(
|
||||
&mut compat_reader,
|
||||
&[new_batch(&k1, &[(3, true), (2, false), (4, true)], 1000, 3)],
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_compat_reader_different_pk_encoding() {
|
||||
let mut reader_meta = new_metadata(
|
||||
&[
|
||||
(
|
||||
0,
|
||||
SemanticType::Timestamp,
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
),
|
||||
(1, SemanticType::Tag, ConcreteDataType::string_datatype()),
|
||||
(2, SemanticType::Field, ConcreteDataType::int64_datatype()),
|
||||
],
|
||||
&[1],
|
||||
);
|
||||
reader_meta.primary_key_encoding = PrimaryKeyEncoding::Dense;
|
||||
let reader_meta = Arc::new(reader_meta);
|
||||
let mut expect_meta = new_metadata(
|
||||
&[
|
||||
(
|
||||
0,
|
||||
SemanticType::Timestamp,
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
),
|
||||
(1, SemanticType::Tag, ConcreteDataType::string_datatype()),
|
||||
(2, SemanticType::Field, ConcreteDataType::int64_datatype()),
|
||||
(3, SemanticType::Tag, ConcreteDataType::string_datatype()),
|
||||
(4, SemanticType::Field, ConcreteDataType::int64_datatype()),
|
||||
],
|
||||
&[1, 3],
|
||||
);
|
||||
expect_meta.primary_key_encoding = PrimaryKeyEncoding::Sparse;
|
||||
let expect_meta = Arc::new(expect_meta);
|
||||
|
||||
let mapper = ProjectionMapper::all(&expect_meta, false).unwrap();
|
||||
let k1 = encode_key(&[Some("a")]);
|
||||
let k2 = encode_key(&[Some("b")]);
|
||||
let source_reader = VecBatchReader::new(&[
|
||||
new_batch(&k1, &[(2, false)], 1000, 3),
|
||||
new_batch(&k2, &[(2, false)], 1000, 3),
|
||||
]);
|
||||
|
||||
let mut compat_reader = CompatReader::new(&mapper, reader_meta, source_reader).unwrap();
|
||||
let k1 = encode_sparse_key(&[(1, Some("a")), (3, None)]);
|
||||
let k2 = encode_sparse_key(&[(1, Some("b")), (3, None)]);
|
||||
check_reader_result(
|
||||
&mut compat_reader,
|
||||
&[
|
||||
new_batch(&k1, &[(2, false), (4, true)], 1000, 3),
|
||||
new_batch(&k2, &[(2, false), (4, true)], 1000, 3),
|
||||
],
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
/// Creates a primary key array for flat format testing.
|
||||
fn build_flat_test_pk_array(primary_keys: &[&[u8]]) -> ArrayRef {
|
||||
let mut builder = BinaryDictionaryBuilder::<UInt32Type>::new();
|
||||
|
||||
@@ -19,9 +19,10 @@ use std::time::Instant;
|
||||
|
||||
use async_stream::try_stream;
|
||||
use common_telemetry::debug;
|
||||
use datatypes::arrow::array::{Int64Array, UInt64Array};
|
||||
use datatypes::arrow::array::{Array, AsArray, Int64Array, UInt64Array};
|
||||
use datatypes::arrow::compute::interleave;
|
||||
use datatypes::arrow::datatypes::SchemaRef;
|
||||
use datatypes::arrow::datatypes::{ArrowNativeType, BinaryType, DataType, SchemaRef, Utf8Type};
|
||||
use datatypes::arrow::error::ArrowError;
|
||||
use datatypes::arrow::record_batch::RecordBatch;
|
||||
use datatypes::arrow_array::BinaryArray;
|
||||
use datatypes::timestamp::timestamp_array_to_primitive;
|
||||
@@ -39,6 +40,62 @@ use crate::sst::parquet::flat_format::{
|
||||
};
|
||||
use crate::sst::parquet::format::PrimaryKeyArray;
|
||||
|
||||
/// Checks whether interleaving the selected rows from byte columns would overflow
|
||||
/// i32 offsets. Similar to arrow-rs `interleave_bytes()`, accumulates offsets and
|
||||
/// returns an error if the capacity exceeds `i32::MAX`.
|
||||
///
|
||||
/// TODO(yingwen): Remove this after upgrading to arrow >= 58.1.0, which handles
|
||||
/// offset overflow in `interleave_bytes()` natively.
|
||||
///
|
||||
/// See: <https://github.com/apache/arrow-rs/blob/65ad652f2410fc51ad77da1805e85c0a76d9a7ea/arrow-select/src/interleave.rs#L208-L225>
|
||||
fn check_interleave_bytes_overflow<T: datatypes::arrow::datatypes::ByteArrayType>(
|
||||
batches: &[(usize, RecordBatch)],
|
||||
col_idx: usize,
|
||||
indices: &[(usize, usize)],
|
||||
) -> std::result::Result<(), ArrowError> {
|
||||
// Quick check: if concatenating all value data won't overflow, interleaving
|
||||
// a subset of rows definitely won't either.
|
||||
let total: usize = batches
|
||||
.iter()
|
||||
.map(|(_, batch)| batch.column(col_idx).as_bytes::<T>().value_data().len())
|
||||
.sum();
|
||||
if T::Offset::from_usize(total).is_some() {
|
||||
return Ok(());
|
||||
}
|
||||
// Total exceeds the offset limit, do the precise per-row check.
|
||||
let mut capacity: usize = 0;
|
||||
for &(a, b) in indices {
|
||||
let array = batches[a].1.column(col_idx).as_bytes::<T>();
|
||||
let o = array.value_offsets();
|
||||
let element_len = o[b + 1].as_usize() - o[b].as_usize();
|
||||
capacity += element_len;
|
||||
T::Offset::from_usize(capacity).ok_or(ArrowError::OffsetOverflowError(capacity))?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Checks whether `interleave()` would overflow i32 offsets for `Utf8` or `Binary` columns.
|
||||
fn check_interleave_overflow(
|
||||
batches: &[(usize, RecordBatch)],
|
||||
schema: &SchemaRef,
|
||||
indices: &[(usize, usize)],
|
||||
) -> Result<()> {
|
||||
for (col_idx, field) in schema.fields.iter().enumerate() {
|
||||
match field.data_type() {
|
||||
DataType::Utf8 => {
|
||||
check_interleave_bytes_overflow::<Utf8Type>(batches, col_idx, indices)
|
||||
.context(ComputeArrowSnafu)?;
|
||||
}
|
||||
DataType::Binary => {
|
||||
check_interleave_bytes_overflow::<BinaryType>(batches, col_idx, indices)
|
||||
.context(ComputeArrowSnafu)?;
|
||||
}
|
||||
_ => continue,
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Keeps track of the current position in a batch
|
||||
#[derive(Debug, Copy, Clone, Default)]
|
||||
struct BatchCursor {
|
||||
@@ -121,6 +178,8 @@ impl BatchBuilder {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
check_interleave_overflow(&self.batches, &self.schema, &self.indices)?;
|
||||
|
||||
let columns = (0..self.schema.fields.len())
|
||||
.map(|column_idx| {
|
||||
let arrays: Vec<_> = self
|
||||
|
||||
@@ -45,6 +45,7 @@ use crate::sst::parquet::reader::{FlatRowGroupReader, ReaderMetrics, RowGroupRea
|
||||
///
|
||||
/// This reader is different from the [MergeMode](crate::region::options::MergeMode) as
|
||||
/// it focus on time series (the same key).
|
||||
#[allow(dead_code)]
|
||||
pub(crate) struct LastRowReader {
|
||||
/// Inner reader.
|
||||
reader: BoxedBatchReader,
|
||||
@@ -52,6 +53,7 @@ pub(crate) struct LastRowReader {
|
||||
selector: LastRowSelector,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
impl LastRowReader {
|
||||
/// Creates a new `LastRowReader`.
|
||||
pub(crate) fn new(reader: BoxedBatchReader) -> Self {
|
||||
|
||||
@@ -52,51 +52,27 @@ impl ProjectionMapper {
|
||||
pub fn new(
|
||||
metadata: &RegionMetadataRef,
|
||||
projection: impl Iterator<Item = usize> + Clone,
|
||||
flat_format: bool,
|
||||
) -> Result<Self> {
|
||||
if flat_format {
|
||||
Ok(ProjectionMapper::Flat(FlatProjectionMapper::new(
|
||||
metadata, projection,
|
||||
)?))
|
||||
} else {
|
||||
Ok(ProjectionMapper::PrimaryKey(
|
||||
PrimaryKeyProjectionMapper::new(metadata, projection)?,
|
||||
))
|
||||
}
|
||||
Ok(ProjectionMapper::Flat(FlatProjectionMapper::new(
|
||||
metadata, projection,
|
||||
)?))
|
||||
}
|
||||
|
||||
/// Returns a new mapper with output projection and explicit read columns.
|
||||
pub fn new_with_read_columns(
|
||||
metadata: &RegionMetadataRef,
|
||||
projection: impl Iterator<Item = usize>,
|
||||
flat_format: bool,
|
||||
read_column_ids: Vec<ColumnId>,
|
||||
) -> Result<Self> {
|
||||
let projection: Vec<_> = projection.collect();
|
||||
if flat_format {
|
||||
Ok(ProjectionMapper::Flat(
|
||||
FlatProjectionMapper::new_with_read_columns(metadata, projection, read_column_ids)?,
|
||||
))
|
||||
} else {
|
||||
Ok(ProjectionMapper::PrimaryKey(
|
||||
PrimaryKeyProjectionMapper::new_with_read_columns(
|
||||
metadata,
|
||||
projection,
|
||||
read_column_ids,
|
||||
)?,
|
||||
))
|
||||
}
|
||||
Ok(ProjectionMapper::Flat(
|
||||
FlatProjectionMapper::new_with_read_columns(metadata, projection, read_column_ids)?,
|
||||
))
|
||||
}
|
||||
|
||||
/// Returns a new mapper without projection.
|
||||
pub fn all(metadata: &RegionMetadataRef, flat_format: bool) -> Result<Self> {
|
||||
if flat_format {
|
||||
Ok(ProjectionMapper::Flat(FlatProjectionMapper::all(metadata)?))
|
||||
} else {
|
||||
Ok(ProjectionMapper::PrimaryKey(
|
||||
PrimaryKeyProjectionMapper::all(metadata)?,
|
||||
))
|
||||
}
|
||||
pub fn all(metadata: &RegionMetadataRef) -> Result<Self> {
|
||||
Ok(ProjectionMapper::Flat(FlatProjectionMapper::all(metadata)?))
|
||||
}
|
||||
|
||||
/// Returns the metadata that created the mapper.
|
||||
@@ -159,6 +135,7 @@ impl ProjectionMapper {
|
||||
}
|
||||
|
||||
/// Handles projection and converts a projected [Batch] to a projected [RecordBatch].
|
||||
#[allow(dead_code)]
|
||||
pub struct PrimaryKeyProjectionMapper {
|
||||
/// Metadata of the region.
|
||||
metadata: RegionMetadataRef,
|
||||
@@ -178,6 +155,7 @@ pub struct PrimaryKeyProjectionMapper {
|
||||
is_empty_projection: bool,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
impl PrimaryKeyProjectionMapper {
|
||||
/// Returns a new mapper with projection.
|
||||
/// If `projection` is empty, it outputs [RecordBatch] without any column but only a row count.
|
||||
@@ -413,6 +391,7 @@ pub(crate) fn read_column_ids_from_projection(
|
||||
|
||||
/// Index of a vector in a [Batch].
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
#[allow(dead_code)]
|
||||
enum BatchIndex {
|
||||
/// Index in primary keys.
|
||||
Tag((usize, ColumnId)),
|
||||
@@ -480,53 +459,6 @@ mod tests {
|
||||
};
|
||||
|
||||
use super::*;
|
||||
use crate::cache::CacheManager;
|
||||
use crate::read::BatchBuilder;
|
||||
|
||||
fn new_batch(
|
||||
ts_start: i64,
|
||||
tags: &[i64],
|
||||
fields: &[(ColumnId, i64)],
|
||||
num_rows: usize,
|
||||
) -> Batch {
|
||||
let converter = DensePrimaryKeyCodec::with_fields(
|
||||
(0..tags.len())
|
||||
.map(|idx| {
|
||||
(
|
||||
idx as u32,
|
||||
SortField::new(ConcreteDataType::int64_datatype()),
|
||||
)
|
||||
})
|
||||
.collect(),
|
||||
);
|
||||
let primary_key = converter
|
||||
.encode(tags.iter().map(|v| ValueRef::Int64(*v)))
|
||||
.unwrap();
|
||||
|
||||
let mut builder = BatchBuilder::new(primary_key);
|
||||
builder
|
||||
.timestamps_array(Arc::new(TimestampMillisecondArray::from_iter_values(
|
||||
(0..num_rows).map(|i| ts_start + i as i64 * 1000),
|
||||
)))
|
||||
.unwrap()
|
||||
.sequences_array(Arc::new(UInt64Array::from_iter_values(0..num_rows as u64)))
|
||||
.unwrap()
|
||||
.op_types_array(Arc::new(UInt8Array::from_iter_values(
|
||||
(0..num_rows).map(|_| OpType::Put as u8),
|
||||
)))
|
||||
.unwrap();
|
||||
for (column_id, field) in fields {
|
||||
builder
|
||||
.push_field_array(
|
||||
*column_id,
|
||||
Arc::new(Int64Array::from_iter_values(std::iter::repeat_n(
|
||||
*field, num_rows,
|
||||
))),
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
builder.build().unwrap()
|
||||
}
|
||||
|
||||
fn print_record_batch(record_batch: RecordBatch) -> String {
|
||||
pretty::pretty_format_batches(&[record_batch.into_df_record_batch()])
|
||||
@@ -534,166 +466,6 @@ mod tests {
|
||||
.to_string()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_projection_mapper_all() {
|
||||
let metadata = Arc::new(
|
||||
TestRegionMetadataBuilder::default()
|
||||
.num_tags(2)
|
||||
.num_fields(2)
|
||||
.build(),
|
||||
);
|
||||
// Create the enum wrapper with default format (primary key)
|
||||
let mapper = ProjectionMapper::all(&metadata, false).unwrap();
|
||||
assert_eq!([0, 1, 2, 3, 4], mapper.column_ids());
|
||||
assert_eq!(
|
||||
[
|
||||
(3, ConcreteDataType::int64_datatype()),
|
||||
(4, ConcreteDataType::int64_datatype())
|
||||
],
|
||||
mapper.as_primary_key().unwrap().batch_fields()
|
||||
);
|
||||
|
||||
// With vector cache.
|
||||
let cache = CacheManager::builder().vector_cache_size(1024).build();
|
||||
let cache = CacheStrategy::EnableAll(Arc::new(cache));
|
||||
let batch = new_batch(0, &[1, 2], &[(3, 3), (4, 4)], 3);
|
||||
let record_batch = mapper
|
||||
.as_primary_key()
|
||||
.unwrap()
|
||||
.convert(&batch, &cache)
|
||||
.unwrap();
|
||||
let expect = "\
|
||||
+---------------------+----+----+----+----+
|
||||
| ts | k0 | k1 | v0 | v1 |
|
||||
+---------------------+----+----+----+----+
|
||||
| 1970-01-01T00:00:00 | 1 | 2 | 3 | 4 |
|
||||
| 1970-01-01T00:00:01 | 1 | 2 | 3 | 4 |
|
||||
| 1970-01-01T00:00:02 | 1 | 2 | 3 | 4 |
|
||||
+---------------------+----+----+----+----+";
|
||||
assert_eq!(expect, print_record_batch(record_batch));
|
||||
|
||||
assert!(
|
||||
cache
|
||||
.get_repeated_vector(&ConcreteDataType::int64_datatype(), &Value::Int64(1))
|
||||
.is_some()
|
||||
);
|
||||
assert!(
|
||||
cache
|
||||
.get_repeated_vector(&ConcreteDataType::int64_datatype(), &Value::Int64(2))
|
||||
.is_some()
|
||||
);
|
||||
assert!(
|
||||
cache
|
||||
.get_repeated_vector(&ConcreteDataType::int64_datatype(), &Value::Int64(3))
|
||||
.is_none()
|
||||
);
|
||||
let record_batch = mapper
|
||||
.as_primary_key()
|
||||
.unwrap()
|
||||
.convert(&batch, &cache)
|
||||
.unwrap();
|
||||
assert_eq!(expect, print_record_batch(record_batch));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_projection_mapper_with_projection() {
|
||||
let metadata = Arc::new(
|
||||
TestRegionMetadataBuilder::default()
|
||||
.num_tags(2)
|
||||
.num_fields(2)
|
||||
.build(),
|
||||
);
|
||||
// Columns v1, k0
|
||||
let mapper = ProjectionMapper::new(&metadata, [4, 1].into_iter(), false).unwrap();
|
||||
assert_eq!([4, 1], mapper.column_ids());
|
||||
assert_eq!(
|
||||
[(4, ConcreteDataType::int64_datatype())],
|
||||
mapper.as_primary_key().unwrap().batch_fields()
|
||||
);
|
||||
|
||||
let batch = new_batch(0, &[1, 2], &[(4, 4)], 3);
|
||||
let cache = CacheManager::builder().vector_cache_size(1024).build();
|
||||
let cache = CacheStrategy::EnableAll(Arc::new(cache));
|
||||
let record_batch = mapper
|
||||
.as_primary_key()
|
||||
.unwrap()
|
||||
.convert(&batch, &cache)
|
||||
.unwrap();
|
||||
let expect = "\
|
||||
+----+----+
|
||||
| v1 | k0 |
|
||||
+----+----+
|
||||
| 4 | 1 |
|
||||
| 4 | 1 |
|
||||
| 4 | 1 |
|
||||
+----+----+";
|
||||
assert_eq!(expect, print_record_batch(record_batch));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_projection_mapper_read_superset() {
|
||||
let metadata = Arc::new(
|
||||
TestRegionMetadataBuilder::default()
|
||||
.num_tags(2)
|
||||
.num_fields(2)
|
||||
.build(),
|
||||
);
|
||||
// Output columns v1, k0. Read also includes v0.
|
||||
let mapper = ProjectionMapper::new_with_read_columns(
|
||||
&metadata,
|
||||
[4, 1].into_iter(),
|
||||
false,
|
||||
vec![4, 1, 3],
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!([4, 1, 3], mapper.column_ids());
|
||||
|
||||
let batch = new_batch(0, &[1, 2], &[(3, 3), (4, 4)], 3);
|
||||
let cache = CacheManager::builder().vector_cache_size(1024).build();
|
||||
let cache = CacheStrategy::EnableAll(Arc::new(cache));
|
||||
let record_batch = mapper
|
||||
.as_primary_key()
|
||||
.unwrap()
|
||||
.convert(&batch, &cache)
|
||||
.unwrap();
|
||||
let expect = "\
|
||||
+----+----+
|
||||
| v1 | k0 |
|
||||
+----+----+
|
||||
| 4 | 1 |
|
||||
| 4 | 1 |
|
||||
| 4 | 1 |
|
||||
+----+----+";
|
||||
assert_eq!(expect, print_record_batch(record_batch));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_projection_mapper_empty_projection() {
|
||||
let metadata = Arc::new(
|
||||
TestRegionMetadataBuilder::default()
|
||||
.num_tags(2)
|
||||
.num_fields(2)
|
||||
.build(),
|
||||
);
|
||||
// Empty projection
|
||||
let mapper = ProjectionMapper::new(&metadata, [].into_iter(), false).unwrap();
|
||||
assert_eq!([0], mapper.column_ids()); // Should still read the time index column
|
||||
assert!(mapper.output_schema().is_empty());
|
||||
let pk_mapper = mapper.as_primary_key().unwrap();
|
||||
assert!(pk_mapper.batch_fields().is_empty());
|
||||
assert!(!pk_mapper.has_tags);
|
||||
assert!(pk_mapper.batch_indices.is_empty());
|
||||
assert!(pk_mapper.is_empty_projection);
|
||||
|
||||
let batch = new_batch(0, &[1, 2], &[], 3);
|
||||
let cache = CacheManager::builder().vector_cache_size(1024).build();
|
||||
let cache = CacheStrategy::EnableAll(Arc::new(cache));
|
||||
let record_batch = pk_mapper.convert(&batch, &cache).unwrap();
|
||||
assert_eq!(3, record_batch.num_rows());
|
||||
assert_eq!(0, record_batch.num_columns());
|
||||
assert!(record_batch.schema.is_empty());
|
||||
}
|
||||
|
||||
fn new_flat_batch(
|
||||
ts_start: Option<i64>,
|
||||
idx_tags: &[(usize, i64)],
|
||||
@@ -809,7 +581,7 @@ mod tests {
|
||||
.build(),
|
||||
);
|
||||
let cache = CacheStrategy::Disabled;
|
||||
let mapper = ProjectionMapper::all(&metadata, true).unwrap();
|
||||
let mapper = ProjectionMapper::all(&metadata).unwrap();
|
||||
assert_eq!([0, 1, 2, 3, 4], mapper.column_ids());
|
||||
assert_eq!(
|
||||
[
|
||||
@@ -845,7 +617,7 @@ mod tests {
|
||||
);
|
||||
let cache = CacheStrategy::Disabled;
|
||||
// Columns v1, k0
|
||||
let mapper = ProjectionMapper::new(&metadata, [4, 1].into_iter(), true).unwrap();
|
||||
let mapper = ProjectionMapper::new(&metadata, [4, 1].into_iter()).unwrap();
|
||||
assert_eq!([4, 1], mapper.column_ids());
|
||||
assert_eq!(
|
||||
[
|
||||
@@ -879,13 +651,9 @@ mod tests {
|
||||
);
|
||||
let cache = CacheStrategy::Disabled;
|
||||
// Output columns v1, k0. Read also includes v0.
|
||||
let mapper = ProjectionMapper::new_with_read_columns(
|
||||
&metadata,
|
||||
[4, 1].into_iter(),
|
||||
true,
|
||||
vec![4, 1, 3],
|
||||
)
|
||||
.unwrap();
|
||||
let mapper =
|
||||
ProjectionMapper::new_with_read_columns(&metadata, [4, 1].into_iter(), vec![4, 1, 3])
|
||||
.unwrap();
|
||||
assert_eq!([4, 1, 3], mapper.column_ids());
|
||||
|
||||
let batch = new_flat_batch(None, &[(1, 1)], &[(3, 3), (4, 4)], 3);
|
||||
@@ -911,7 +679,7 @@ mod tests {
|
||||
);
|
||||
let cache = CacheStrategy::Disabled;
|
||||
// Empty projection
|
||||
let mapper = ProjectionMapper::new(&metadata, [].into_iter(), true).unwrap();
|
||||
let mapper = ProjectionMapper::new(&metadata, [].into_iter()).unwrap();
|
||||
assert_eq!([0], mapper.column_ids()); // Should still read the time index column
|
||||
assert!(mapper.output_schema().is_empty());
|
||||
let flat_mapper = mapper.as_flat().unwrap();
|
||||
|
||||
@@ -30,11 +30,13 @@ use crate::sst::file::FileTimeRange;
|
||||
use crate::sst::parquet::file_range::FileRangeContextRef;
|
||||
use crate::sst::parquet::reader::{FlatRowGroupReader, ReaderMetrics, RowGroupReader};
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub enum Source {
|
||||
RowGroup(RowGroupReader),
|
||||
LastRow(RowGroupLastRowCachedReader),
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
impl Source {
|
||||
async fn next_batch(&mut self) -> Result<Option<Batch>> {
|
||||
match self {
|
||||
@@ -44,6 +46,7 @@ impl Source {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub struct PruneReader {
|
||||
/// Context for file ranges.
|
||||
context: FileRangeContextRef,
|
||||
@@ -53,6 +56,7 @@ pub struct PruneReader {
|
||||
skip_fields: bool,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
impl PruneReader {
|
||||
pub(crate) fn new_with_row_group_reader(
|
||||
ctx: FileRangeContextRef,
|
||||
|
||||
@@ -515,7 +515,7 @@ mod tests {
|
||||
) -> (StreamContext, PartitionRange) {
|
||||
let env = SchedulerEnv::new().await;
|
||||
let metadata = Arc::new(metadata_with_primary_key(vec![0, 1], false));
|
||||
let mapper = ProjectionMapper::new(&metadata, [0, 2, 3].into_iter(), true).unwrap();
|
||||
let mapper = ProjectionMapper::new(&metadata, [0, 2, 3].into_iter()).unwrap();
|
||||
let predicate = PredicateGroup::new(metadata.as_ref(), &filters).unwrap();
|
||||
let file_id = FileId::random();
|
||||
let file = sst_file_handle_with_file_id(
|
||||
@@ -527,8 +527,7 @@ mod tests {
|
||||
.with_predicate(predicate)
|
||||
.with_time_range(query_time_range)
|
||||
.with_files(vec![file])
|
||||
.with_cache(test_cache_strategy())
|
||||
.with_flat_format(true);
|
||||
.with_cache(test_cache_strategy());
|
||||
let range_meta = RangeMeta {
|
||||
time_range: partition_time_range,
|
||||
indices: smallvec![SourceIndex {
|
||||
|
||||
@@ -46,7 +46,7 @@ use tokio_stream::wrappers::ReceiverStream;
|
||||
|
||||
use crate::access_layer::AccessLayerRef;
|
||||
use crate::cache::CacheStrategy;
|
||||
use crate::config::{DEFAULT_MAX_CONCURRENT_SCAN_FILES, DEFAULT_SCAN_CHANNEL_SIZE};
|
||||
use crate::config::DEFAULT_MAX_CONCURRENT_SCAN_FILES;
|
||||
use crate::error::{InvalidPartitionExprSnafu, InvalidRequestSnafu, Result};
|
||||
#[cfg(feature = "enterprise")]
|
||||
use crate::extension::{BoxedExtensionRange, BoxedExtensionRangeProvider};
|
||||
@@ -63,7 +63,6 @@ use crate::read::unordered_scan::UnorderedScan;
|
||||
use crate::read::{Batch, BoxedRecordBatchStream, RecordBatch, Source};
|
||||
use crate::region::options::MergeMode;
|
||||
use crate::region::version::VersionRef;
|
||||
use crate::sst::FormatType;
|
||||
use crate::sst::file::FileHandle;
|
||||
use crate::sst::index::bloom_filter::applier::{
|
||||
BloomFilterIndexApplierBuilder, BloomFilterIndexApplierRef,
|
||||
@@ -77,8 +76,6 @@ use crate::sst::index::vector_index::applier::{VectorIndexApplier, VectorIndexAp
|
||||
use crate::sst::parquet::file_range::PreFilterMode;
|
||||
use crate::sst::parquet::reader::ReaderMetrics;
|
||||
|
||||
/// Parallel scan channel size for flat format.
|
||||
const FLAT_SCAN_CHANNEL_SIZE: usize = 2;
|
||||
#[cfg(feature = "vector_index")]
|
||||
const VECTOR_INDEX_OVERFETCH_MULTIPLIER: usize = 2;
|
||||
|
||||
@@ -222,8 +219,6 @@ pub(crate) struct ScanRegion {
|
||||
request: ScanRequest,
|
||||
/// Cache.
|
||||
cache_strategy: CacheStrategy,
|
||||
/// Capacity of the channel to send data from parallel scan tasks to the main task.
|
||||
parallel_scan_channel_size: usize,
|
||||
/// Maximum number of SST files to scan concurrently.
|
||||
max_concurrent_scan_files: usize,
|
||||
/// Whether to ignore inverted index.
|
||||
@@ -254,7 +249,6 @@ impl ScanRegion {
|
||||
access_layer,
|
||||
request,
|
||||
cache_strategy,
|
||||
parallel_scan_channel_size: DEFAULT_SCAN_CHANNEL_SIZE,
|
||||
max_concurrent_scan_files: DEFAULT_MAX_CONCURRENT_SCAN_FILES,
|
||||
ignore_inverted_index: false,
|
||||
ignore_fulltext_index: false,
|
||||
@@ -266,16 +260,6 @@ impl ScanRegion {
|
||||
}
|
||||
}
|
||||
|
||||
/// Sets parallel scan task channel size.
|
||||
#[must_use]
|
||||
pub(crate) fn with_parallel_scan_channel_size(
|
||||
mut self,
|
||||
parallel_scan_channel_size: usize,
|
||||
) -> Self {
|
||||
self.parallel_scan_channel_size = parallel_scan_channel_size;
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets maximum number of SST files to scan concurrently.
|
||||
#[must_use]
|
||||
pub(crate) fn with_max_concurrent_scan_files(
|
||||
@@ -399,19 +383,12 @@ impl ScanRegion {
|
||||
self.request.distribution == Some(TimeSeriesDistribution::PerSeries)
|
||||
}
|
||||
|
||||
/// Returns true if the region use flat format.
|
||||
fn use_flat_format(&self) -> bool {
|
||||
self.request.force_flat_format
|
||||
|| self.version.options.sst_format.unwrap_or_default() == FormatType::Flat
|
||||
}
|
||||
|
||||
/// Creates a scan input.
|
||||
#[tracing::instrument(skip_all, fields(region_id = %self.region_id()))]
|
||||
async fn scan_input(mut self) -> Result<ScanInput> {
|
||||
async fn scan_input(self) -> Result<ScanInput> {
|
||||
let sst_min_sequence = self.request.sst_min_sequence.and_then(NonZeroU64::new);
|
||||
let time_range = self.build_time_range_predicate();
|
||||
let predicate = PredicateGroup::new(&self.version.metadata, &self.request.filters)?;
|
||||
let flat_format = self.use_flat_format();
|
||||
|
||||
let read_column_ids = match &self.request.projection {
|
||||
Some(p) => self.build_read_column_ids(p, &predicate)?,
|
||||
@@ -429,10 +406,9 @@ impl ScanRegion {
|
||||
Some(p) => ProjectionMapper::new_with_read_columns(
|
||||
&self.version.metadata,
|
||||
p.iter().copied(),
|
||||
flat_format,
|
||||
read_column_ids.clone(),
|
||||
)?,
|
||||
None => ProjectionMapper::all(&self.version.metadata, flat_format)?,
|
||||
None => ProjectionMapper::all(&self.version.metadata)?,
|
||||
};
|
||||
|
||||
let ssts = &self.version.ssts;
|
||||
@@ -496,14 +472,13 @@ impl ScanRegion {
|
||||
|
||||
let region_id = self.region_id();
|
||||
debug!(
|
||||
"Scan region {}, request: {:?}, time range: {:?}, memtables: {}, ssts_to_read: {}, append_mode: {}, flat_format: {}",
|
||||
"Scan region {}, request: {:?}, time range: {:?}, memtables: {}, ssts_to_read: {}, append_mode: {}",
|
||||
region_id,
|
||||
self.request,
|
||||
time_range,
|
||||
mem_range_builders.len(),
|
||||
files.len(),
|
||||
self.version.options.append_mode,
|
||||
flat_format,
|
||||
);
|
||||
|
||||
let (non_field_filters, field_filters) = self.partition_by_field_filters();
|
||||
@@ -530,11 +505,6 @@ impl ScanRegion {
|
||||
}
|
||||
});
|
||||
|
||||
if flat_format {
|
||||
// The batch is already large enough so we use a small channel size here.
|
||||
self.parallel_scan_channel_size = FLAT_SCAN_CHANNEL_SIZE;
|
||||
}
|
||||
|
||||
let input = ScanInput::new(self.access_layer, mapper)
|
||||
.with_time_range(Some(time_range))
|
||||
.with_predicate(predicate)
|
||||
@@ -544,7 +514,6 @@ impl ScanRegion {
|
||||
.with_inverted_index_appliers(inverted_index_appliers)
|
||||
.with_bloom_filter_index_appliers(bloom_filter_appliers)
|
||||
.with_fulltext_index_appliers(fulltext_index_appliers)
|
||||
.with_parallel_scan_channel_size(self.parallel_scan_channel_size)
|
||||
.with_max_concurrent_scan_files(self.max_concurrent_scan_files)
|
||||
.with_start_time(self.start_time)
|
||||
.with_append_mode(self.version.options.append_mode)
|
||||
@@ -552,7 +521,9 @@ impl ScanRegion {
|
||||
.with_merge_mode(self.version.options.merge_mode())
|
||||
.with_series_row_selector(self.request.series_row_selector)
|
||||
.with_distribution(self.request.distribution)
|
||||
.with_flat_format(flat_format);
|
||||
.with_explain_flat_format(
|
||||
self.version.options.sst_format == Some(crate::sst::FormatType::Flat),
|
||||
);
|
||||
#[cfg(feature = "vector_index")]
|
||||
let input = input
|
||||
.with_vector_index_applier(vector_index_applier)
|
||||
@@ -829,8 +800,6 @@ pub struct ScanInput {
|
||||
pub(crate) cache_strategy: CacheStrategy,
|
||||
/// Ignores file not found error.
|
||||
ignore_file_not_found: bool,
|
||||
/// Capacity of the channel to send data from parallel scan tasks to the main task.
|
||||
pub(crate) parallel_scan_channel_size: usize,
|
||||
/// Maximum number of SST files to scan concurrently.
|
||||
pub(crate) max_concurrent_scan_files: usize,
|
||||
/// Index appliers.
|
||||
@@ -855,8 +824,8 @@ pub struct ScanInput {
|
||||
pub(crate) series_row_selector: Option<TimeSeriesRowSelector>,
|
||||
/// Hint for the required distribution of the scanner.
|
||||
pub(crate) distribution: Option<TimeSeriesDistribution>,
|
||||
/// Whether to use flat format.
|
||||
pub(crate) flat_format: bool,
|
||||
/// Whether the region's configured SST format is flat.
|
||||
explain_flat_format: bool,
|
||||
/// Whether this scan is for compaction.
|
||||
pub(crate) compaction: bool,
|
||||
#[cfg(feature = "enterprise")]
|
||||
@@ -878,7 +847,6 @@ impl ScanInput {
|
||||
files: Vec::new(),
|
||||
cache_strategy: CacheStrategy::Disabled,
|
||||
ignore_file_not_found: false,
|
||||
parallel_scan_channel_size: DEFAULT_SCAN_CHANNEL_SIZE,
|
||||
max_concurrent_scan_files: DEFAULT_MAX_CONCURRENT_SCAN_FILES,
|
||||
inverted_index_appliers: [None, None],
|
||||
bloom_filter_index_appliers: [None, None],
|
||||
@@ -893,7 +861,7 @@ impl ScanInput {
|
||||
merge_mode: MergeMode::default(),
|
||||
series_row_selector: None,
|
||||
distribution: None,
|
||||
flat_format: false,
|
||||
explain_flat_format: false,
|
||||
compaction: false,
|
||||
#[cfg(feature = "enterprise")]
|
||||
extension_ranges: Vec::new(),
|
||||
@@ -943,16 +911,6 @@ impl ScanInput {
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets scan task channel size.
|
||||
#[must_use]
|
||||
pub(crate) fn with_parallel_scan_channel_size(
|
||||
mut self,
|
||||
parallel_scan_channel_size: usize,
|
||||
) -> Self {
|
||||
self.parallel_scan_channel_size = parallel_scan_channel_size;
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets maximum number of SST files to scan concurrently.
|
||||
#[must_use]
|
||||
pub(crate) fn with_max_concurrent_scan_files(
|
||||
@@ -1049,6 +1007,13 @@ impl ScanInput {
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets whether the region's configured SST format is flat for explain output.
|
||||
#[must_use]
|
||||
pub(crate) fn with_explain_flat_format(mut self, explain_flat_format: bool) -> Self {
|
||||
self.explain_flat_format = explain_flat_format;
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the time series row selector.
|
||||
#[must_use]
|
||||
pub(crate) fn with_series_row_selector(
|
||||
@@ -1059,13 +1024,6 @@ impl ScanInput {
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets whether to use flat format.
|
||||
#[must_use]
|
||||
pub(crate) fn with_flat_format(mut self, flat_format: bool) -> Self {
|
||||
self.flat_format = flat_format;
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets whether this scan is for compaction.
|
||||
#[must_use]
|
||||
pub(crate) fn with_compaction(mut self, compaction: bool) -> Self {
|
||||
@@ -1087,6 +1045,7 @@ impl ScanInput {
|
||||
&self,
|
||||
sources: Vec<Source>,
|
||||
semaphore: Arc<Semaphore>,
|
||||
channel_size: usize,
|
||||
) -> Result<Vec<Source>> {
|
||||
if sources.len() <= 1 {
|
||||
return Ok(sources);
|
||||
@@ -1096,7 +1055,7 @@ impl ScanInput {
|
||||
let sources = sources
|
||||
.into_iter()
|
||||
.map(|source| {
|
||||
let (sender, receiver) = mpsc::channel(self.parallel_scan_channel_size);
|
||||
let (sender, receiver) = mpsc::channel(channel_size);
|
||||
self.spawn_scan_task(source, semaphore.clone(), sender);
|
||||
let stream = Box::pin(ReceiverStream::new(receiver));
|
||||
Source::Stream(stream)
|
||||
@@ -1165,7 +1124,6 @@ impl ScanInput {
|
||||
};
|
||||
let res = reader
|
||||
.expected_metadata(Some(self.mapper.metadata().clone()))
|
||||
.flat_format(self.flat_format)
|
||||
.compaction(self.compaction)
|
||||
.pre_filter_mode(filter_mode)
|
||||
.decode_primary_key_values(decode_pk_values)
|
||||
@@ -1272,6 +1230,7 @@ impl ScanInput {
|
||||
&self,
|
||||
sources: Vec<BoxedRecordBatchStream>,
|
||||
semaphore: Arc<Semaphore>,
|
||||
channel_size: usize,
|
||||
) -> Result<Vec<BoxedRecordBatchStream>> {
|
||||
if sources.len() <= 1 {
|
||||
return Ok(sources);
|
||||
@@ -1281,7 +1240,7 @@ impl ScanInput {
|
||||
let sources = sources
|
||||
.into_iter()
|
||||
.map(|source| {
|
||||
let (sender, receiver) = mpsc::channel(self.parallel_scan_channel_size);
|
||||
let (sender, receiver) = mpsc::channel(channel_size);
|
||||
self.spawn_flat_scan_task(source, semaphore.clone(), sender);
|
||||
let stream = Box::pin(ReceiverStream::new(receiver));
|
||||
Box::pin(stream) as _
|
||||
@@ -1421,8 +1380,7 @@ fn pre_filter_mode(append_mode: bool, merge_mode: MergeMode) -> PreFilterMode {
|
||||
/// Builds a [ScanRequestFingerprint] from a [ScanInput] if the scan is eligible
|
||||
/// for partition range caching.
|
||||
pub(crate) fn build_scan_fingerprint(input: &ScanInput) -> Option<ScanRequestFingerprint> {
|
||||
let eligible = input.flat_format
|
||||
&& !input.compaction
|
||||
let eligible = !input.compaction
|
||||
&& !input.files.is_empty()
|
||||
&& matches!(input.cache_strategy, CacheStrategy::EnableAll(_));
|
||||
|
||||
@@ -1709,8 +1667,7 @@ impl StreamContext {
|
||||
.entries(self.input.files.iter().map(|file| FileWrapper { file }))
|
||||
.finish()?;
|
||||
}
|
||||
write!(f, ", \"flat_format\": {}", self.input.flat_format)?;
|
||||
|
||||
write!(f, ", \"flat_format\": {}", self.input.explain_flat_format)?;
|
||||
#[cfg(feature = "enterprise")]
|
||||
self.format_extension_ranges(f)?;
|
||||
|
||||
@@ -1881,9 +1838,7 @@ mod tests {
|
||||
use crate::cache::CacheManager;
|
||||
use crate::memtable::time_partition::TimePartitions;
|
||||
use crate::read::range_cache::ScanRequestFingerprintBuilder;
|
||||
use crate::region::options::RegionOptions;
|
||||
use crate::region::version::VersionBuilder;
|
||||
use crate::sst::FormatType;
|
||||
use crate::test_util::memtable_util::{EmptyMemtableBuilder, metadata_with_primary_key};
|
||||
use crate::test_util::scheduler_util::SchedulerEnv;
|
||||
|
||||
@@ -1897,30 +1852,9 @@ mod tests {
|
||||
Arc::new(VersionBuilder::new(metadata, mutable).build())
|
||||
}
|
||||
|
||||
fn new_version_with_sst_format(
|
||||
metadata: RegionMetadataRef,
|
||||
sst_format: Option<FormatType>,
|
||||
) -> VersionRef {
|
||||
let mutable = Arc::new(TimePartitions::new(
|
||||
metadata.clone(),
|
||||
Arc::new(EmptyMemtableBuilder::default()),
|
||||
0,
|
||||
None,
|
||||
));
|
||||
let options = RegionOptions {
|
||||
sst_format,
|
||||
..Default::default()
|
||||
};
|
||||
Arc::new(
|
||||
VersionBuilder::new(metadata, mutable)
|
||||
.options(options)
|
||||
.build(),
|
||||
)
|
||||
}
|
||||
|
||||
async fn new_scan_input(metadata: RegionMetadataRef, filters: Vec<Expr>) -> ScanInput {
|
||||
let env = SchedulerEnv::new().await;
|
||||
let mapper = ProjectionMapper::new(&metadata, [0, 2, 3].into_iter(), true).unwrap();
|
||||
let mapper = ProjectionMapper::new(&metadata, [0, 2, 3].into_iter()).unwrap();
|
||||
let predicate = PredicateGroup::new(metadata.as_ref(), &filters).unwrap();
|
||||
let file = FileHandle::new(
|
||||
crate::sst::file::FileMeta::default(),
|
||||
@@ -1934,7 +1868,6 @@ mod tests {
|
||||
.range_result_cache_size(1024)
|
||||
.build(),
|
||||
)))
|
||||
.with_flat_format(true)
|
||||
.with_files(vec![file])
|
||||
}
|
||||
|
||||
@@ -2018,45 +1951,6 @@ mod tests {
|
||||
assert_eq!(vec![4, 1, 3], read_ids);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_use_flat_format_honors_request_override() {
|
||||
let metadata = Arc::new(metadata_with_primary_key(vec![0, 1], false));
|
||||
let env = SchedulerEnv::new().await;
|
||||
|
||||
let primary_key_version =
|
||||
new_version_with_sst_format(metadata.clone(), Some(FormatType::PrimaryKey));
|
||||
let request = ScanRequest::default();
|
||||
let scan_region = ScanRegion::new(
|
||||
primary_key_version.clone(),
|
||||
env.access_layer.clone(),
|
||||
request,
|
||||
CacheStrategy::Disabled,
|
||||
);
|
||||
assert!(!scan_region.use_flat_format());
|
||||
|
||||
let request = ScanRequest {
|
||||
force_flat_format: true,
|
||||
..Default::default()
|
||||
};
|
||||
let scan_region = ScanRegion::new(
|
||||
primary_key_version,
|
||||
env.access_layer.clone(),
|
||||
request,
|
||||
CacheStrategy::Disabled,
|
||||
);
|
||||
assert!(scan_region.use_flat_format());
|
||||
|
||||
let flat_version = new_version_with_sst_format(metadata, Some(FormatType::Flat));
|
||||
let request = ScanRequest::default();
|
||||
let scan_region = ScanRegion::new(
|
||||
flat_version,
|
||||
env.access_layer.clone(),
|
||||
request,
|
||||
CacheStrategy::Disabled,
|
||||
);
|
||||
assert!(scan_region.use_flat_format());
|
||||
}
|
||||
|
||||
/// Helper to create a timestamp millisecond literal.
|
||||
fn ts_lit(val: i64) -> datafusion_expr::Expr {
|
||||
lit(ScalarValue::TimestampMillisecond(Some(val), None))
|
||||
@@ -2128,17 +2022,11 @@ mod tests {
|
||||
|
||||
let disabled = ScanInput::new(
|
||||
SchedulerEnv::new().await.access_layer.clone(),
|
||||
ProjectionMapper::new(&metadata, [0, 2, 3].into_iter(), true).unwrap(),
|
||||
ProjectionMapper::new(&metadata, [0, 2, 3].into_iter()).unwrap(),
|
||||
)
|
||||
.with_predicate(PredicateGroup::new(metadata.as_ref(), &filters).unwrap())
|
||||
.with_flat_format(true);
|
||||
.with_predicate(PredicateGroup::new(metadata.as_ref(), &filters).unwrap());
|
||||
assert!(build_scan_fingerprint(&disabled).is_none());
|
||||
|
||||
let non_flat = new_scan_input(metadata.clone(), filters.clone())
|
||||
.await
|
||||
.with_flat_format(false);
|
||||
assert!(build_scan_fingerprint(&non_flat).is_none());
|
||||
|
||||
let compaction = new_scan_input(metadata.clone(), filters.clone())
|
||||
.await
|
||||
.with_compaction(true);
|
||||
|
||||
@@ -43,16 +43,16 @@ use crate::read::merge::{MergeMetrics, MergeMetricsReport};
|
||||
use crate::read::pruner::PartitionPruner;
|
||||
use crate::read::range::{RangeMeta, RowGroupIndex};
|
||||
use crate::read::scan_region::StreamContext;
|
||||
use crate::read::{Batch, BoxedBatchStream, BoxedRecordBatchStream, ScannerMetrics, Source};
|
||||
use crate::read::{BoxedRecordBatchStream, ScannerMetrics};
|
||||
use crate::sst::file::{FileTimeRange, RegionFileId};
|
||||
use crate::sst::index::bloom_filter::applier::BloomFilterIndexApplyMetrics;
|
||||
use crate::sst::index::fulltext_index::applier::FulltextIndexApplyMetrics;
|
||||
use crate::sst::index::inverted_index::applier::InvertedIndexApplyMetrics;
|
||||
use crate::sst::parquet::DEFAULT_ROW_GROUP_SIZE;
|
||||
use crate::sst::parquet::file_range::FileRange;
|
||||
use crate::sst::parquet::flat_format::time_index_column_index;
|
||||
use crate::sst::parquet::reader::{MetadataCacheMetrics, ReaderFilterMetrics, ReaderMetrics};
|
||||
use crate::sst::parquet::row_group::ParquetFetchMetrics;
|
||||
use crate::sst::parquet::{DEFAULT_READ_BATCH_SIZE, DEFAULT_ROW_GROUP_SIZE};
|
||||
|
||||
/// Per-file scan metrics.
|
||||
#[derive(Default, Clone)]
|
||||
@@ -1186,45 +1186,6 @@ pub(crate) struct SeriesDistributorMetrics {
|
||||
pub(crate) divider_cost: Duration,
|
||||
}
|
||||
|
||||
/// Scans memtable ranges at `index`.
|
||||
#[tracing::instrument(
|
||||
skip_all,
|
||||
fields(
|
||||
region_id = %stream_ctx.input.region_metadata().region_id,
|
||||
file_or_mem_index = %index.index,
|
||||
row_group_index = %index.row_group_index,
|
||||
source = "mem"
|
||||
)
|
||||
)]
|
||||
pub(crate) fn scan_mem_ranges(
|
||||
stream_ctx: Arc<StreamContext>,
|
||||
part_metrics: PartitionMetrics,
|
||||
index: RowGroupIndex,
|
||||
time_range: FileTimeRange,
|
||||
) -> impl Stream<Item = Result<Batch>> {
|
||||
try_stream! {
|
||||
let ranges = stream_ctx.input.build_mem_ranges(index);
|
||||
part_metrics.inc_num_mem_ranges(ranges.len());
|
||||
for range in ranges {
|
||||
let build_reader_start = Instant::now();
|
||||
let mem_scan_metrics = Some(MemScanMetrics::default());
|
||||
let iter = range.build_prune_iter(time_range, mem_scan_metrics.clone())?;
|
||||
part_metrics.inc_build_reader_cost(build_reader_start.elapsed());
|
||||
|
||||
let mut source = Source::Iter(iter);
|
||||
while let Some(batch) = source.next_batch().await? {
|
||||
yield batch;
|
||||
}
|
||||
|
||||
// Report the memtable scan metrics to partition metrics
|
||||
if let Some(ref metrics) = mem_scan_metrics {
|
||||
let data = metrics.data();
|
||||
part_metrics.report_mem_scan_metrics(&data);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Scans memtable ranges at `index` using flat format that returns RecordBatch.
|
||||
#[tracing::instrument(
|
||||
skip_all,
|
||||
@@ -1270,15 +1231,19 @@ const NUM_SERIES_THRESHOLD: u64 = 10240;
|
||||
/// 60 samples per hour.
|
||||
const BATCH_SIZE_THRESHOLD: u64 = 50;
|
||||
|
||||
/// Returns true if splitting flat record batches may improve merge performance.
|
||||
/// Returns the estimated rows per batch after splitting if splitting flat record batches
|
||||
/// may improve merge performance. Returns `None` if splitting is not beneficial.
|
||||
pub(crate) fn should_split_flat_batches_for_merge(
|
||||
stream_ctx: &Arc<StreamContext>,
|
||||
range_meta: &RangeMeta,
|
||||
) -> bool {
|
||||
) -> Option<usize> {
|
||||
// Number of files to split and scan.
|
||||
let mut num_files_to_split = 0;
|
||||
let mut num_mem_rows = 0;
|
||||
let mut num_mem_series = 0;
|
||||
// Total rows and series for estimating batch size after splitting.
|
||||
let mut total_rows: u64 = 0;
|
||||
let mut total_series: u64 = 0;
|
||||
// Checks each file range, returns early if any range is not splittable.
|
||||
// For mem ranges, we collect the total number of rows and series because the number of rows in a
|
||||
// mem range may be too small.
|
||||
@@ -1300,23 +1265,49 @@ pub(crate) fn should_split_flat_batches_for_merge(
|
||||
debug_assert!(file.meta_ref().num_rows > 0);
|
||||
if !can_split_series(file.meta_ref().num_rows, file.meta_ref().num_series) {
|
||||
// We can't split batches in a file.
|
||||
return false;
|
||||
return None;
|
||||
} else {
|
||||
num_files_to_split += 1;
|
||||
total_rows += file.meta_ref().num_rows;
|
||||
total_series += file.meta_ref().num_series;
|
||||
}
|
||||
}
|
||||
// Skips non-file and non-mem ranges.
|
||||
}
|
||||
|
||||
if num_files_to_split > 0 {
|
||||
let should_split = if num_files_to_split > 0 {
|
||||
// We mainly consider file ranges because they have enough data for sampling.
|
||||
true
|
||||
} else if num_mem_series > 0 && num_mem_rows > 0 {
|
||||
// If we don't have files to scan, we check whether to split by the memtable.
|
||||
can_split_series(num_mem_rows as u64, num_mem_series as u64)
|
||||
} else if num_mem_series > 0
|
||||
&& num_mem_rows > 0
|
||||
&& can_split_series(num_mem_rows as u64, num_mem_series as u64)
|
||||
{
|
||||
total_rows += num_mem_rows as u64;
|
||||
total_series += num_mem_series as u64;
|
||||
true
|
||||
} else {
|
||||
false
|
||||
};
|
||||
|
||||
if !should_split {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Estimate rows per batch after splitting.
|
||||
let estimated_batch_size = if total_series > 0 && total_rows > 0 {
|
||||
((total_rows / total_series) as usize).clamp(1, DEFAULT_READ_BATCH_SIZE)
|
||||
} else {
|
||||
// No valid estimate available, use a conservative fallback.
|
||||
DEFAULT_READ_BATCH_SIZE / 4
|
||||
};
|
||||
Some(estimated_batch_size)
|
||||
}
|
||||
|
||||
/// Computes the channel size for parallel scan based on the estimated rows per batch.
|
||||
/// The channel should buffer approximately `2 * DEFAULT_READ_BATCH_SIZE` rows.
|
||||
pub(crate) fn compute_parallel_channel_size(estimated_rows_per_batch: usize) -> usize {
|
||||
let size = 2 * DEFAULT_READ_BATCH_SIZE / estimated_rows_per_batch.max(1);
|
||||
size.clamp(2, 64)
|
||||
}
|
||||
|
||||
fn can_split_series(num_rows: u64, num_series: u64) -> bool {
|
||||
@@ -1342,59 +1333,6 @@ fn new_filter_metrics(explain_verbose: bool) -> ReaderFilterMetrics {
|
||||
}
|
||||
}
|
||||
|
||||
/// Scans file ranges at `index`.
|
||||
#[tracing::instrument(
|
||||
skip_all,
|
||||
fields(
|
||||
region_id = %stream_ctx.input.region_metadata().region_id,
|
||||
row_group_index = %index.index,
|
||||
source = read_type
|
||||
)
|
||||
)]
|
||||
pub(crate) async fn scan_file_ranges(
|
||||
stream_ctx: Arc<StreamContext>,
|
||||
part_metrics: PartitionMetrics,
|
||||
index: RowGroupIndex,
|
||||
read_type: &'static str,
|
||||
partition_pruner: Arc<PartitionPruner>,
|
||||
) -> Result<impl Stream<Item = Result<Batch>>> {
|
||||
let mut reader_metrics = ReaderMetrics {
|
||||
filter_metrics: new_filter_metrics(part_metrics.explain_verbose()),
|
||||
..Default::default()
|
||||
};
|
||||
let ranges = partition_pruner
|
||||
.build_file_ranges(index, &part_metrics, &mut reader_metrics)
|
||||
.await?;
|
||||
part_metrics.inc_num_file_ranges(ranges.len());
|
||||
part_metrics.merge_reader_metrics(&reader_metrics, None);
|
||||
|
||||
// Creates initial per-file metrics with build_part_cost.
|
||||
let init_per_file_metrics = if part_metrics.explain_verbose() {
|
||||
let file = stream_ctx.input.file_from_index(index);
|
||||
let file_id = file.file_id();
|
||||
|
||||
let mut map = HashMap::new();
|
||||
map.insert(
|
||||
file_id,
|
||||
FileScanMetrics {
|
||||
build_part_cost: reader_metrics.build_cost,
|
||||
..Default::default()
|
||||
},
|
||||
);
|
||||
Some(map)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok(build_file_range_scan_stream(
|
||||
stream_ctx,
|
||||
part_metrics,
|
||||
read_type,
|
||||
ranges,
|
||||
init_per_file_metrics,
|
||||
))
|
||||
}
|
||||
|
||||
/// Scans file ranges at `index` using flat reader that returns RecordBatch.
|
||||
#[tracing::instrument(
|
||||
skip_all,
|
||||
@@ -1448,70 +1386,6 @@ pub(crate) async fn scan_flat_file_ranges(
|
||||
))
|
||||
}
|
||||
|
||||
/// Build the stream of scanning the input [`FileRange`]s.
|
||||
#[tracing::instrument(
|
||||
skip_all,
|
||||
fields(read_type = read_type, range_count = ranges.len())
|
||||
)]
|
||||
pub fn build_file_range_scan_stream(
|
||||
stream_ctx: Arc<StreamContext>,
|
||||
part_metrics: PartitionMetrics,
|
||||
read_type: &'static str,
|
||||
ranges: SmallVec<[FileRange; 2]>,
|
||||
mut per_file_metrics: Option<HashMap<RegionFileId, FileScanMetrics>>,
|
||||
) -> impl Stream<Item = Result<Batch>> {
|
||||
try_stream! {
|
||||
let fetch_metrics = if part_metrics.explain_verbose() {
|
||||
Some(Arc::new(ParquetFetchMetrics::default()))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let reader_metrics = &mut ReaderMetrics {
|
||||
fetch_metrics: fetch_metrics.clone(),
|
||||
..Default::default()
|
||||
};
|
||||
for range in ranges {
|
||||
let build_reader_start = Instant::now();
|
||||
let Some(reader) = range.reader(stream_ctx.input.series_row_selector, fetch_metrics.as_deref()).await? else {
|
||||
continue;
|
||||
};
|
||||
let build_cost = build_reader_start.elapsed();
|
||||
part_metrics.inc_build_reader_cost(build_cost);
|
||||
let compat_batch = range.compat_batch();
|
||||
let mut source = Source::PruneReader(reader);
|
||||
while let Some(mut batch) = source.next_batch().await? {
|
||||
if let Some(compact_batch) = compat_batch {
|
||||
batch = compact_batch.as_primary_key().unwrap().compat_batch(batch)?;
|
||||
}
|
||||
yield batch;
|
||||
}
|
||||
if let Source::PruneReader(reader) = source {
|
||||
let prune_metrics = reader.metrics();
|
||||
|
||||
// Update per-file metrics if tracking is enabled
|
||||
if let Some(file_metrics_map) = per_file_metrics.as_mut() {
|
||||
let file_id = range.file_handle().file_id();
|
||||
let file_metrics = file_metrics_map
|
||||
.entry(file_id)
|
||||
.or_insert_with(FileScanMetrics::default);
|
||||
|
||||
file_metrics.num_ranges += 1;
|
||||
file_metrics.num_rows += prune_metrics.num_rows;
|
||||
file_metrics.build_reader_cost += build_cost;
|
||||
file_metrics.scan_cost += prune_metrics.scan_cost;
|
||||
}
|
||||
|
||||
reader_metrics.merge_from(&prune_metrics);
|
||||
}
|
||||
}
|
||||
|
||||
// Reports metrics.
|
||||
reader_metrics.observe_rows(read_type);
|
||||
reader_metrics.filter_metrics.observe();
|
||||
part_metrics.merge_reader_metrics(reader_metrics, per_file_metrics.as_ref());
|
||||
}
|
||||
}
|
||||
|
||||
/// Build the stream of scanning the input [`FileRange`]s using flat reader that returns RecordBatch.
|
||||
#[tracing::instrument(
|
||||
skip_all,
|
||||
@@ -1591,47 +1465,6 @@ pub fn build_flat_file_range_scan_stream(
|
||||
}
|
||||
}
|
||||
|
||||
/// Build the stream of scanning the extension range denoted by the [`RowGroupIndex`].
|
||||
#[cfg(feature = "enterprise")]
|
||||
pub(crate) async fn scan_extension_range(
|
||||
context: Arc<StreamContext>,
|
||||
index: RowGroupIndex,
|
||||
partition_metrics: PartitionMetrics,
|
||||
) -> Result<BoxedBatchStream> {
|
||||
use snafu::ResultExt;
|
||||
|
||||
let range = context.input.extension_range(index.index);
|
||||
let reader = range.reader(context.as_ref());
|
||||
let stream = reader
|
||||
.read(context, partition_metrics, index)
|
||||
.await
|
||||
.context(crate::error::ScanExternalRangeSnafu)?;
|
||||
Ok(stream)
|
||||
}
|
||||
|
||||
pub(crate) async fn maybe_scan_other_ranges(
|
||||
context: &Arc<StreamContext>,
|
||||
index: RowGroupIndex,
|
||||
metrics: &PartitionMetrics,
|
||||
) -> Result<BoxedBatchStream> {
|
||||
#[cfg(feature = "enterprise")]
|
||||
{
|
||||
scan_extension_range(context.clone(), index, metrics.clone()).await
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "enterprise"))]
|
||||
{
|
||||
let _ = context;
|
||||
let _ = index;
|
||||
let _ = metrics;
|
||||
|
||||
crate::error::UnexpectedSnafu {
|
||||
reason: "no other ranges scannable",
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
}
|
||||
|
||||
/// Build the stream of scanning the extension range in flat format denoted by the [`RowGroupIndex`].
|
||||
#[cfg(feature = "enterprise")]
|
||||
pub(crate) async fn scan_flat_extension_range(
|
||||
@@ -1752,3 +1585,235 @@ pub(crate) fn split_record_batch(record_batch: RecordBatch, batches: &mut VecDeq
|
||||
batches.push_back(record_batch.slice(start, rows_in_batch));
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
use common_time::Timestamp;
|
||||
use smallvec::{SmallVec, smallvec};
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use super::*;
|
||||
use crate::cache::CacheStrategy;
|
||||
use crate::memtable::{
|
||||
BoxedBatchIterator, BoxedRecordBatchIterator, IterBuilder, MemtableRange,
|
||||
MemtableRangeContext, MemtableStats,
|
||||
};
|
||||
use crate::read::projection::ProjectionMapper;
|
||||
use crate::read::range::{MemRangeBuilder, SourceIndex};
|
||||
use crate::read::scan_region::ScanInput;
|
||||
use crate::sst::file::{FileHandle, FileMeta};
|
||||
use crate::sst::file_purger::NoopFilePurger;
|
||||
use crate::test_util::memtable_util::metadata_for_test;
|
||||
use crate::test_util::scheduler_util::SchedulerEnv;
|
||||
|
||||
struct EmptyIterBuilder;
|
||||
|
||||
impl IterBuilder for EmptyIterBuilder {
|
||||
fn build(&self, _metrics: Option<MemScanMetrics>) -> Result<BoxedBatchIterator> {
|
||||
Ok(Box::new(std::iter::empty()))
|
||||
}
|
||||
|
||||
fn is_record_batch(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn build_record_batch(
|
||||
&self,
|
||||
_time_range: Option<(Timestamp, Timestamp)>,
|
||||
_metrics: Option<MemScanMetrics>,
|
||||
) -> Result<BoxedRecordBatchIterator> {
|
||||
Ok(Box::new(std::iter::empty()))
|
||||
}
|
||||
}
|
||||
|
||||
async fn new_test_stream_ctx(
|
||||
files: Vec<FileHandle>,
|
||||
memtables: Vec<MemRangeBuilder>,
|
||||
) -> Arc<StreamContext> {
|
||||
let env = SchedulerEnv::new().await;
|
||||
let metadata = metadata_for_test();
|
||||
let mapper = ProjectionMapper::new(&metadata, [0, 2, 3].into_iter()).unwrap();
|
||||
let input = ScanInput::new(env.access_layer.clone(), mapper)
|
||||
.with_cache(CacheStrategy::Disabled)
|
||||
.with_memtables(memtables)
|
||||
.with_files(files);
|
||||
|
||||
Arc::new(StreamContext {
|
||||
input,
|
||||
ranges: Vec::new(),
|
||||
scan_fingerprint: None,
|
||||
query_start: Instant::now(),
|
||||
})
|
||||
}
|
||||
|
||||
fn new_test_file(num_rows: u64, num_series: u64) -> FileHandle {
|
||||
let meta = FileMeta {
|
||||
region_id: RegionId::new(123, 456),
|
||||
file_id: Default::default(),
|
||||
time_range: (
|
||||
Timestamp::new_millisecond(0),
|
||||
Timestamp::new_millisecond(1000),
|
||||
),
|
||||
num_rows,
|
||||
num_series,
|
||||
..Default::default()
|
||||
};
|
||||
FileHandle::new(meta, Arc::new(NoopFilePurger))
|
||||
}
|
||||
|
||||
fn new_test_memtable(num_rows: usize, series_count: usize) -> MemRangeBuilder {
|
||||
let context = Arc::new(MemtableRangeContext::new(
|
||||
0,
|
||||
Box::new(EmptyIterBuilder),
|
||||
Default::default(),
|
||||
));
|
||||
let stats = MemtableStats {
|
||||
time_range: Some((
|
||||
Timestamp::new_millisecond(0),
|
||||
Timestamp::new_millisecond(1000),
|
||||
)),
|
||||
num_rows,
|
||||
num_ranges: 1,
|
||||
series_count,
|
||||
..Default::default()
|
||||
};
|
||||
let range = MemtableRange::new(context, stats.clone());
|
||||
MemRangeBuilder::new(range, stats)
|
||||
}
|
||||
|
||||
fn new_test_range_meta(row_group_indices: SmallVec<[RowGroupIndex; 2]>) -> RangeMeta {
|
||||
let indices = row_group_indices
|
||||
.iter()
|
||||
.map(|row_group_index| SourceIndex {
|
||||
index: row_group_index.index,
|
||||
num_row_groups: 1,
|
||||
})
|
||||
.collect();
|
||||
|
||||
RangeMeta {
|
||||
time_range: (
|
||||
Timestamp::new_millisecond(0),
|
||||
Timestamp::new_millisecond(1000),
|
||||
),
|
||||
indices,
|
||||
row_group_indices,
|
||||
num_rows: 0,
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_should_split_flat_batches_for_merge_uses_splittable_file_rows_per_series() {
|
||||
let num_rows = SPLIT_ROW_THRESHOLD * 2;
|
||||
let num_series = (num_rows / 100).max(1);
|
||||
let stream_ctx =
|
||||
new_test_stream_ctx(vec![new_test_file(num_rows, num_series)], vec![]).await;
|
||||
let range_meta = new_test_range_meta(smallvec![RowGroupIndex {
|
||||
index: 0,
|
||||
row_group_index: 0,
|
||||
}]);
|
||||
|
||||
assert_eq!(
|
||||
Some((num_rows / num_series) as usize),
|
||||
should_split_flat_batches_for_merge(&stream_ctx, &range_meta)
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_should_split_flat_batches_for_merge_skips_small_or_unknown_series_files() {
|
||||
let stream_ctx = new_test_stream_ctx(
|
||||
vec![
|
||||
new_test_file(SPLIT_ROW_THRESHOLD.saturating_sub(1), 1),
|
||||
new_test_file(SPLIT_ROW_THRESHOLD * 2, 0),
|
||||
],
|
||||
vec![],
|
||||
)
|
||||
.await;
|
||||
let range_meta = new_test_range_meta(smallvec![
|
||||
RowGroupIndex {
|
||||
index: 0,
|
||||
row_group_index: 0,
|
||||
},
|
||||
RowGroupIndex {
|
||||
index: 1,
|
||||
row_group_index: 0,
|
||||
}
|
||||
]);
|
||||
|
||||
assert_eq!(
|
||||
None,
|
||||
should_split_flat_batches_for_merge(&stream_ctx, &range_meta)
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_should_split_flat_batches_for_merge_returns_none_for_unsplittable_file() {
|
||||
let num_series =
|
||||
(SPLIT_ROW_THRESHOLD / (BATCH_SIZE_THRESHOLD - 1)).max(NUM_SERIES_THRESHOLD) + 1;
|
||||
let stream_ctx =
|
||||
new_test_stream_ctx(vec![new_test_file(SPLIT_ROW_THRESHOLD, num_series)], vec![]).await;
|
||||
let range_meta = new_test_range_meta(smallvec![RowGroupIndex {
|
||||
index: 0,
|
||||
row_group_index: 0,
|
||||
}]);
|
||||
|
||||
assert_eq!(
|
||||
None,
|
||||
should_split_flat_batches_for_merge(&stream_ctx, &range_meta)
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_should_split_flat_batches_for_merge_falls_back_to_memtables() {
|
||||
let stream_ctx = new_test_stream_ctx(vec![], vec![new_test_memtable(5_000, 100)]).await;
|
||||
let range_meta = new_test_range_meta(smallvec![RowGroupIndex {
|
||||
index: 0,
|
||||
row_group_index: 0,
|
||||
}]);
|
||||
|
||||
assert_eq!(
|
||||
Some(50),
|
||||
should_split_flat_batches_for_merge(&stream_ctx, &range_meta)
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_should_split_flat_batches_for_merge_clamps_estimate() {
|
||||
let stream_ctx =
|
||||
new_test_stream_ctx(vec![new_test_file(SPLIT_ROW_THRESHOLD * 2, 1)], vec![]).await;
|
||||
let range_meta = new_test_range_meta(smallvec![RowGroupIndex {
|
||||
index: 0,
|
||||
row_group_index: 0,
|
||||
}]);
|
||||
|
||||
assert_eq!(
|
||||
Some(DEFAULT_READ_BATCH_SIZE),
|
||||
should_split_flat_batches_for_merge(&stream_ctx, &range_meta)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compute_parallel_channel_size_clamps_to_max_for_small_batches() {
|
||||
assert_eq!(64, compute_parallel_channel_size(0));
|
||||
assert_eq!(64, compute_parallel_channel_size(1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compute_parallel_channel_size_returns_expected_mid_range_size() {
|
||||
assert_eq!(
|
||||
4,
|
||||
compute_parallel_channel_size(DEFAULT_READ_BATCH_SIZE / 2)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compute_parallel_channel_size_clamps_to_min_for_large_batches() {
|
||||
assert_eq!(2, compute_parallel_channel_size(DEFAULT_READ_BATCH_SIZE));
|
||||
assert_eq!(
|
||||
2,
|
||||
compute_parallel_channel_size(DEFAULT_READ_BATCH_SIZE * 2)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,7 +27,7 @@ use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
|
||||
use datafusion::physical_plan::{DisplayAs, DisplayFormatType};
|
||||
use datatypes::schema::SchemaRef;
|
||||
use futures::{StreamExt, TryStreamExt};
|
||||
use snafu::{OptionExt, ensure};
|
||||
use snafu::ensure;
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::region_engine::{
|
||||
PartitionRange, PrepareRequest, QueryScanContext, RegionScanner, ScannerProperties,
|
||||
@@ -35,24 +35,19 @@ use store_api::region_engine::{
|
||||
use store_api::storage::TimeSeriesRowSelector;
|
||||
use tokio::sync::Semaphore;
|
||||
|
||||
use crate::error::{PartitionOutOfRangeSnafu, Result, TooManyFilesToReadSnafu, UnexpectedSnafu};
|
||||
use crate::read::dedup::{DedupReader, LastNonNull, LastRow};
|
||||
use crate::error::{PartitionOutOfRangeSnafu, Result, TooManyFilesToReadSnafu};
|
||||
use crate::read::flat_dedup::{FlatDedupReader, FlatLastNonNull, FlatLastRow};
|
||||
use crate::read::flat_merge::FlatMergeReader;
|
||||
use crate::read::last_row::{FlatLastRowReader, LastRowReader};
|
||||
use crate::read::merge::MergeReaderBuilder;
|
||||
use crate::read::last_row::FlatLastRowReader;
|
||||
use crate::read::pruner::{PartitionPruner, Pruner};
|
||||
use crate::read::range::RangeMeta;
|
||||
use crate::read::scan_region::{ScanInput, StreamContext};
|
||||
use crate::read::scan_util::{
|
||||
PartitionMetrics, PartitionMetricsList, SplitRecordBatchStream, scan_file_ranges,
|
||||
scan_flat_file_ranges, scan_flat_mem_ranges, scan_mem_ranges,
|
||||
should_split_flat_batches_for_merge,
|
||||
PartitionMetrics, PartitionMetricsList, SplitRecordBatchStream, compute_parallel_channel_size,
|
||||
scan_flat_file_ranges, scan_flat_mem_ranges, should_split_flat_batches_for_merge,
|
||||
};
|
||||
use crate::read::stream::{ConvertBatchStream, ScanBatch, ScanBatchStream};
|
||||
use crate::read::{
|
||||
Batch, BatchReader, BoxedBatchReader, BoxedRecordBatchStream, ScannerMetrics, Source, scan_util,
|
||||
};
|
||||
use crate::read::{BoxedRecordBatchStream, ScannerMetrics, scan_util};
|
||||
use crate::region::options::MergeMode;
|
||||
use crate::sst::parquet::DEFAULT_READ_BATCH_SIZE;
|
||||
|
||||
@@ -121,7 +116,7 @@ impl SeqScan {
|
||||
let streams = (0..self.properties.partitions.len())
|
||||
.map(|partition| {
|
||||
let metrics = self.new_partition_metrics(false, &metrics_set, partition);
|
||||
self.scan_batch_in_partition(partition, metrics)
|
||||
self.scan_flat_batch_in_partition(partition, metrics)
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
@@ -181,58 +176,14 @@ impl SeqScan {
|
||||
partition_ranges.len(),
|
||||
sources.len()
|
||||
);
|
||||
Self::build_flat_reader_from_sources(stream_ctx, sources, None, None).await
|
||||
}
|
||||
|
||||
/// Builds a reader to read sources. If `semaphore` is provided, reads sources in parallel
|
||||
/// if possible.
|
||||
#[tracing::instrument(level = tracing::Level::DEBUG, skip_all)]
|
||||
pub(crate) async fn build_reader_from_sources(
|
||||
stream_ctx: &StreamContext,
|
||||
mut sources: Vec<Source>,
|
||||
semaphore: Option<Arc<Semaphore>>,
|
||||
part_metrics: Option<&PartitionMetrics>,
|
||||
) -> Result<BoxedBatchReader> {
|
||||
if let Some(semaphore) = semaphore.as_ref() {
|
||||
// Read sources in parallel.
|
||||
if sources.len() > 1 {
|
||||
sources = stream_ctx
|
||||
.input
|
||||
.create_parallel_sources(sources, semaphore.clone())?;
|
||||
}
|
||||
}
|
||||
|
||||
let mut builder = MergeReaderBuilder::from_sources(sources);
|
||||
if let Some(metrics) = part_metrics {
|
||||
builder.with_metrics_reporter(Some(metrics.merge_metrics_reporter()));
|
||||
}
|
||||
let reader = builder.build().await?;
|
||||
|
||||
let dedup = !stream_ctx.input.append_mode;
|
||||
let dedup_metrics_reporter = part_metrics.map(|m| m.dedup_metrics_reporter());
|
||||
let reader = if dedup {
|
||||
match stream_ctx.input.merge_mode {
|
||||
MergeMode::LastRow => Box::new(DedupReader::new(
|
||||
reader,
|
||||
LastRow::new(stream_ctx.input.filter_deleted),
|
||||
dedup_metrics_reporter,
|
||||
)) as _,
|
||||
MergeMode::LastNonNull => Box::new(DedupReader::new(
|
||||
reader,
|
||||
LastNonNull::new(stream_ctx.input.filter_deleted),
|
||||
dedup_metrics_reporter,
|
||||
)) as _,
|
||||
}
|
||||
} else {
|
||||
Box::new(reader) as _
|
||||
};
|
||||
|
||||
let reader = match &stream_ctx.input.series_row_selector {
|
||||
Some(TimeSeriesRowSelector::LastRow) => Box::new(LastRowReader::new(reader)) as _,
|
||||
None => reader,
|
||||
};
|
||||
|
||||
Ok(reader)
|
||||
Self::build_flat_reader_from_sources(
|
||||
stream_ctx,
|
||||
sources,
|
||||
None,
|
||||
None,
|
||||
compute_parallel_channel_size(DEFAULT_READ_BATCH_SIZE),
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Builds a flat reader to read sources that returns RecordBatch. If `semaphore` is provided, reads sources in parallel
|
||||
@@ -243,13 +194,16 @@ impl SeqScan {
|
||||
mut sources: Vec<BoxedRecordBatchStream>,
|
||||
semaphore: Option<Arc<Semaphore>>,
|
||||
part_metrics: Option<&PartitionMetrics>,
|
||||
channel_size: usize,
|
||||
) -> Result<BoxedRecordBatchStream> {
|
||||
if let Some(semaphore) = semaphore.as_ref() {
|
||||
// Read sources in parallel.
|
||||
if sources.len() > 1 {
|
||||
sources = stream_ctx
|
||||
.input
|
||||
.create_parallel_flat_sources(sources, semaphore.clone())?;
|
||||
sources = stream_ctx.input.create_parallel_flat_sources(
|
||||
sources,
|
||||
semaphore.clone(),
|
||||
channel_size,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -318,13 +272,7 @@ impl SeqScan {
|
||||
let metrics = self.new_partition_metrics(ctx.explain_verbose, metrics_set, partition);
|
||||
let input = &self.stream_ctx.input;
|
||||
|
||||
let batch_stream = if input.flat_format {
|
||||
// Use flat scan for bulk memtables
|
||||
self.scan_flat_batch_in_partition(partition, metrics.clone())?
|
||||
} else {
|
||||
// Use regular batch scan for normal memtables
|
||||
self.scan_batch_in_partition(partition, metrics.clone())?
|
||||
};
|
||||
let batch_stream = self.scan_flat_batch_in_partition(partition, metrics.clone())?;
|
||||
let record_batch_stream = ConvertBatchStream::new(
|
||||
batch_stream,
|
||||
input.mapper.clone(),
|
||||
@@ -338,125 +286,6 @@ impl SeqScan {
|
||||
)))
|
||||
}
|
||||
|
||||
#[tracing::instrument(
|
||||
skip_all,
|
||||
fields(
|
||||
region_id = %self.stream_ctx.input.mapper.metadata().region_id,
|
||||
partition = partition
|
||||
)
|
||||
)]
|
||||
fn scan_batch_in_partition(
|
||||
&self,
|
||||
partition: usize,
|
||||
part_metrics: PartitionMetrics,
|
||||
) -> Result<ScanBatchStream> {
|
||||
ensure!(
|
||||
partition < self.properties.partitions.len(),
|
||||
PartitionOutOfRangeSnafu {
|
||||
given: partition,
|
||||
all: self.properties.partitions.len(),
|
||||
}
|
||||
);
|
||||
|
||||
if self.properties.partitions[partition].is_empty() {
|
||||
return Ok(Box::pin(futures::stream::empty()));
|
||||
}
|
||||
|
||||
let stream_ctx = self.stream_ctx.clone();
|
||||
let semaphore = self.new_semaphore();
|
||||
let partition_ranges = self.properties.partitions[partition].clone();
|
||||
let compaction = self.stream_ctx.input.compaction;
|
||||
let distinguish_range = self.properties.distinguish_partition_range;
|
||||
let file_scan_semaphore = if compaction { None } else { semaphore.clone() };
|
||||
let pruner = self.pruner.clone();
|
||||
// Initializes ref counts for the pruner.
|
||||
// If we call scan_batch_in_partition() multiple times but don't read all batches from the stream,
|
||||
// then the ref count won't be decremented.
|
||||
// This is a rare case and keeping all remaining entries still uses less memory than a per partition cache.
|
||||
pruner.add_partition_ranges(&partition_ranges);
|
||||
let partition_pruner = Arc::new(PartitionPruner::new(pruner, &partition_ranges));
|
||||
|
||||
let stream = try_stream! {
|
||||
part_metrics.on_first_poll();
|
||||
// Start fetch time before building sources so scan cost contains
|
||||
// build part cost.
|
||||
let mut fetch_start = Instant::now();
|
||||
|
||||
let _mapper = stream_ctx.input.mapper.as_primary_key().context(UnexpectedSnafu {
|
||||
reason: "Unexpected format",
|
||||
})?;
|
||||
// Scans each part.
|
||||
for part_range in partition_ranges {
|
||||
let mut sources = Vec::new();
|
||||
build_sources(
|
||||
&stream_ctx,
|
||||
&part_range,
|
||||
compaction,
|
||||
&part_metrics,
|
||||
partition_pruner.clone(),
|
||||
&mut sources,
|
||||
file_scan_semaphore.clone(),
|
||||
).await?;
|
||||
|
||||
let mut reader =
|
||||
Self::build_reader_from_sources(&stream_ctx, sources, semaphore.clone(), Some(&part_metrics))
|
||||
.await?;
|
||||
#[cfg(debug_assertions)]
|
||||
let mut checker = crate::read::BatchChecker::default()
|
||||
.with_start(Some(part_range.start))
|
||||
.with_end(Some(part_range.end));
|
||||
|
||||
let mut metrics = ScannerMetrics {
|
||||
scan_cost: fetch_start.elapsed(),
|
||||
..Default::default()
|
||||
};
|
||||
fetch_start = Instant::now();
|
||||
|
||||
while let Some(batch) = reader.next_batch().await? {
|
||||
metrics.scan_cost += fetch_start.elapsed();
|
||||
metrics.num_batches += 1;
|
||||
metrics.num_rows += batch.num_rows();
|
||||
|
||||
debug_assert!(!batch.is_empty());
|
||||
if batch.is_empty() {
|
||||
fetch_start = Instant::now();
|
||||
continue;
|
||||
}
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
checker.ensure_part_range_batch(
|
||||
"SeqScan",
|
||||
_mapper.metadata().region_id,
|
||||
partition,
|
||||
part_range,
|
||||
&batch,
|
||||
);
|
||||
|
||||
let yield_start = Instant::now();
|
||||
yield ScanBatch::Normal(batch);
|
||||
metrics.yield_cost += yield_start.elapsed();
|
||||
|
||||
fetch_start = Instant::now();
|
||||
}
|
||||
|
||||
// Yields an empty part to indicate this range is terminated.
|
||||
// The query engine can use this to optimize some queries.
|
||||
if distinguish_range {
|
||||
let yield_start = Instant::now();
|
||||
yield ScanBatch::Normal(Batch::empty());
|
||||
metrics.yield_cost += yield_start.elapsed();
|
||||
}
|
||||
|
||||
metrics.scan_cost += fetch_start.elapsed();
|
||||
fetch_start = Instant::now();
|
||||
part_metrics.merge_metrics(&metrics);
|
||||
}
|
||||
|
||||
part_metrics.on_finish();
|
||||
};
|
||||
Ok(Box::pin(stream))
|
||||
}
|
||||
|
||||
#[tracing::instrument(
|
||||
skip_all,
|
||||
fields(
|
||||
@@ -503,7 +332,7 @@ impl SeqScan {
|
||||
// Scans each part.
|
||||
for part_range in partition_ranges {
|
||||
let mut sources = Vec::new();
|
||||
build_flat_sources(
|
||||
let split_batch_size = build_flat_sources(
|
||||
&stream_ctx,
|
||||
&part_range,
|
||||
compaction,
|
||||
@@ -513,8 +342,11 @@ impl SeqScan {
|
||||
file_scan_semaphore.clone(),
|
||||
).await?;
|
||||
|
||||
let channel_size = compute_parallel_channel_size(
|
||||
split_batch_size.unwrap_or(DEFAULT_READ_BATCH_SIZE),
|
||||
);
|
||||
let mut reader =
|
||||
Self::build_flat_reader_from_sources(&stream_ctx, sources, semaphore.clone(), Some(&part_metrics))
|
||||
Self::build_flat_reader_from_sources(&stream_ctx, sources, semaphore.clone(), Some(&part_metrics), channel_size)
|
||||
.await?;
|
||||
|
||||
let mut metrics = ScannerMetrics {
|
||||
@@ -709,109 +541,8 @@ impl fmt::Debug for SeqScan {
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds sources for the partition range and push them to the `sources` vector.
|
||||
pub(crate) async fn build_sources(
|
||||
stream_ctx: &Arc<StreamContext>,
|
||||
part_range: &PartitionRange,
|
||||
compaction: bool,
|
||||
part_metrics: &PartitionMetrics,
|
||||
partition_pruner: Arc<PartitionPruner>,
|
||||
sources: &mut Vec<Source>,
|
||||
semaphore: Option<Arc<Semaphore>>,
|
||||
) -> Result<()> {
|
||||
// Gets range meta.
|
||||
let range_meta = &stream_ctx.ranges[part_range.identifier];
|
||||
#[cfg(debug_assertions)]
|
||||
if compaction {
|
||||
// Compaction expects input sources are not been split.
|
||||
debug_assert_eq!(range_meta.indices.len(), range_meta.row_group_indices.len());
|
||||
for (i, row_group_idx) in range_meta.row_group_indices.iter().enumerate() {
|
||||
// It should scan all row groups.
|
||||
debug_assert_eq!(
|
||||
-1, row_group_idx.row_group_index,
|
||||
"Expect {} range scan all row groups, given: {}",
|
||||
i, row_group_idx.row_group_index,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let read_type = if compaction {
|
||||
"compaction"
|
||||
} else {
|
||||
"seq_scan_files"
|
||||
};
|
||||
let num_indices = range_meta.row_group_indices.len();
|
||||
if num_indices == 0 {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
sources.reserve(num_indices);
|
||||
let mut ordered_sources = Vec::with_capacity(num_indices);
|
||||
ordered_sources.resize_with(num_indices, || None);
|
||||
let mut file_scan_tasks = Vec::new();
|
||||
|
||||
for (position, index) in range_meta.row_group_indices.iter().enumerate() {
|
||||
if stream_ctx.is_mem_range_index(*index) {
|
||||
let stream = scan_mem_ranges(
|
||||
stream_ctx.clone(),
|
||||
part_metrics.clone(),
|
||||
*index,
|
||||
range_meta.time_range,
|
||||
);
|
||||
ordered_sources[position] = Some(Source::Stream(Box::pin(stream) as _));
|
||||
} else if stream_ctx.is_file_range_index(*index) {
|
||||
if let Some(semaphore_ref) = semaphore.as_ref() {
|
||||
// run in parallel, controlled by semaphore
|
||||
let stream_ctx = stream_ctx.clone();
|
||||
let part_metrics = part_metrics.clone();
|
||||
let partition_pruner = partition_pruner.clone();
|
||||
let semaphore = Arc::clone(semaphore_ref);
|
||||
let row_group_index = *index;
|
||||
file_scan_tasks.push(async move {
|
||||
let _permit = semaphore.acquire().await.unwrap();
|
||||
let stream = scan_file_ranges(
|
||||
stream_ctx,
|
||||
part_metrics,
|
||||
row_group_index,
|
||||
read_type,
|
||||
partition_pruner,
|
||||
)
|
||||
.await?;
|
||||
Ok((position, Source::Stream(Box::pin(stream) as _)))
|
||||
});
|
||||
} else {
|
||||
// no semaphore, run sequentially
|
||||
let stream = scan_file_ranges(
|
||||
stream_ctx.clone(),
|
||||
part_metrics.clone(),
|
||||
*index,
|
||||
read_type,
|
||||
partition_pruner.clone(),
|
||||
)
|
||||
.await?;
|
||||
ordered_sources[position] = Some(Source::Stream(Box::pin(stream) as _));
|
||||
}
|
||||
} else {
|
||||
let stream =
|
||||
scan_util::maybe_scan_other_ranges(stream_ctx, *index, part_metrics).await?;
|
||||
ordered_sources[position] = Some(Source::Stream(stream));
|
||||
}
|
||||
}
|
||||
|
||||
if !file_scan_tasks.is_empty() {
|
||||
let results = futures::future::try_join_all(file_scan_tasks).await?;
|
||||
for (position, source) in results {
|
||||
ordered_sources[position] = Some(source);
|
||||
}
|
||||
}
|
||||
|
||||
for source in ordered_sources.into_iter().flatten() {
|
||||
sources.push(source);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Builds flat sources for the partition range and push them to the `sources` vector.
|
||||
/// Returns the estimated rows per batch after splitting if splitting is applied, or `None`.
|
||||
pub(crate) async fn build_flat_sources(
|
||||
stream_ctx: &Arc<StreamContext>,
|
||||
part_range: &PartitionRange,
|
||||
@@ -820,7 +551,7 @@ pub(crate) async fn build_flat_sources(
|
||||
partition_pruner: Arc<PartitionPruner>,
|
||||
sources: &mut Vec<BoxedRecordBatchStream>,
|
||||
semaphore: Option<Arc<Semaphore>>,
|
||||
) -> Result<()> {
|
||||
) -> Result<Option<usize>> {
|
||||
// Gets range meta.
|
||||
let range_meta = &stream_ctx.ranges[part_range.identifier];
|
||||
#[cfg(debug_assertions)]
|
||||
@@ -844,10 +575,11 @@ pub(crate) async fn build_flat_sources(
|
||||
};
|
||||
let num_indices = range_meta.row_group_indices.len();
|
||||
if num_indices == 0 {
|
||||
return Ok(());
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let should_split = should_split_flat_batches_for_merge(stream_ctx, range_meta);
|
||||
let split_batch_size = should_split_flat_batches_for_merge(stream_ctx, range_meta);
|
||||
let should_split = split_batch_size.is_some();
|
||||
sources.reserve(num_indices);
|
||||
let mut ordered_sources = Vec::with_capacity(num_indices);
|
||||
ordered_sources.resize_with(num_indices, || None);
|
||||
@@ -925,7 +657,7 @@ pub(crate) async fn build_flat_sources(
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
Ok(split_batch_size)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -30,7 +30,7 @@ use datatypes::arrow::array::BinaryArray;
|
||||
use datatypes::arrow::record_batch::RecordBatch;
|
||||
use datatypes::schema::SchemaRef;
|
||||
use futures::{StreamExt, TryStreamExt};
|
||||
use smallvec::{SmallVec, smallvec};
|
||||
use smallvec::SmallVec;
|
||||
use snafu::{OptionExt, ResultExt, ensure};
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::region_engine::{
|
||||
@@ -44,12 +44,15 @@ use crate::error::{
|
||||
Error, InvalidSenderSnafu, PartitionOutOfRangeSnafu, Result, ScanMultiTimesSnafu,
|
||||
ScanSeriesSnafu, TooManyFilesToReadSnafu,
|
||||
};
|
||||
use crate::read::ScannerMetrics;
|
||||
use crate::read::pruner::{PartitionPruner, Pruner};
|
||||
use crate::read::scan_region::{ScanInput, StreamContext};
|
||||
use crate::read::scan_util::{PartitionMetrics, PartitionMetricsList, SeriesDistributorMetrics};
|
||||
use crate::read::seq_scan::{SeqScan, build_flat_sources, build_sources};
|
||||
use crate::read::scan_util::{
|
||||
PartitionMetrics, PartitionMetricsList, SeriesDistributorMetrics, compute_parallel_channel_size,
|
||||
};
|
||||
use crate::read::seq_scan::{SeqScan, build_flat_sources};
|
||||
use crate::read::stream::{ConvertBatchStream, ScanBatch, ScanBatchStream};
|
||||
use crate::read::{Batch, ScannerMetrics};
|
||||
use crate::sst::parquet::DEFAULT_READ_BATCH_SIZE;
|
||||
use crate::sst::parquet::flat_format::primary_key_column_index;
|
||||
use crate::sst::parquet::format::PrimaryKeyArray;
|
||||
|
||||
@@ -443,11 +446,7 @@ impl SeriesDistributor {
|
||||
fields(region_id = %self.stream_ctx.input.mapper.metadata().region_id)
|
||||
)]
|
||||
async fn execute(&mut self) {
|
||||
let result = if self.stream_ctx.input.flat_format {
|
||||
self.scan_partitions_flat().await
|
||||
} else {
|
||||
self.scan_partitions().await
|
||||
};
|
||||
let result = self.scan_partitions_flat().await;
|
||||
|
||||
if let Err(e) = result {
|
||||
self.senders.send_error(e).await;
|
||||
@@ -486,10 +485,11 @@ impl SeriesDistributor {
|
||||
|
||||
// Scans all parts.
|
||||
let mut sources = Vec::with_capacity(self.partitions.len());
|
||||
let mut min_batch_size: Option<usize> = None;
|
||||
for partition in &self.partitions {
|
||||
sources.reserve(partition.len());
|
||||
for part_range in partition {
|
||||
build_flat_sources(
|
||||
let split_batch_size = build_flat_sources(
|
||||
&self.stream_ctx,
|
||||
part_range,
|
||||
false,
|
||||
@@ -499,15 +499,21 @@ impl SeriesDistributor {
|
||||
self.semaphore.clone(),
|
||||
)
|
||||
.await?;
|
||||
if let Some(size) = split_batch_size {
|
||||
min_batch_size = Some(min_batch_size.map_or(size, |cur| cur.min(size)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Builds a flat reader that merge sources from all parts.
|
||||
let channel_size =
|
||||
compute_parallel_channel_size(min_batch_size.unwrap_or(DEFAULT_READ_BATCH_SIZE));
|
||||
let mut reader = SeqScan::build_flat_reader_from_sources(
|
||||
&self.stream_ctx,
|
||||
sources,
|
||||
self.semaphore.clone(),
|
||||
Some(&part_metrics),
|
||||
channel_size,
|
||||
)
|
||||
.await?;
|
||||
let mut metrics = SeriesDistributorMetrics::default();
|
||||
@@ -559,151 +565,11 @@ impl SeriesDistributor {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Scans all parts.
|
||||
#[tracing::instrument(
|
||||
skip_all,
|
||||
fields(region_id = %self.stream_ctx.input.mapper.metadata().region_id)
|
||||
)]
|
||||
async fn scan_partitions(&mut self) -> Result<()> {
|
||||
// Initialize reference counts for all partition ranges.
|
||||
for partition_ranges in &self.partitions {
|
||||
self.pruner.add_partition_ranges(partition_ranges);
|
||||
}
|
||||
|
||||
// Create PartitionPruner covering all partitions
|
||||
let all_partition_ranges: Vec<_> = self.partitions.iter().flatten().cloned().collect();
|
||||
let partition_pruner = Arc::new(PartitionPruner::new(
|
||||
self.pruner.clone(),
|
||||
&all_partition_ranges,
|
||||
));
|
||||
|
||||
let part_metrics = new_partition_metrics(
|
||||
&self.stream_ctx,
|
||||
self.explain_verbose,
|
||||
&self.metrics_set,
|
||||
self.partitions.len(),
|
||||
&self.metrics_list,
|
||||
);
|
||||
part_metrics.on_first_poll();
|
||||
// Start fetch time before building sources so scan cost contains
|
||||
// build part cost.
|
||||
let mut fetch_start = Instant::now();
|
||||
|
||||
// Scans all parts.
|
||||
let mut sources = Vec::with_capacity(self.partitions.len());
|
||||
for partition in &self.partitions {
|
||||
sources.reserve(partition.len());
|
||||
for part_range in partition {
|
||||
build_sources(
|
||||
&self.stream_ctx,
|
||||
part_range,
|
||||
false,
|
||||
&part_metrics,
|
||||
partition_pruner.clone(),
|
||||
&mut sources,
|
||||
self.semaphore.clone(),
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
|
||||
// Builds a reader that merge sources from all parts.
|
||||
let mut reader = SeqScan::build_reader_from_sources(
|
||||
&self.stream_ctx,
|
||||
sources,
|
||||
self.semaphore.clone(),
|
||||
Some(&part_metrics),
|
||||
)
|
||||
.await?;
|
||||
let mut metrics = SeriesDistributorMetrics::default();
|
||||
|
||||
let mut current_series = PrimaryKeySeriesBatch::default();
|
||||
while let Some(batch) = reader.next_batch().await? {
|
||||
metrics.scan_cost += fetch_start.elapsed();
|
||||
metrics.num_batches += 1;
|
||||
metrics.num_rows += batch.num_rows();
|
||||
|
||||
debug_assert!(!batch.is_empty());
|
||||
if batch.is_empty() {
|
||||
fetch_start = Instant::now();
|
||||
continue;
|
||||
}
|
||||
|
||||
let Some(last_key) = current_series.current_key() else {
|
||||
current_series.push(batch);
|
||||
fetch_start = Instant::now();
|
||||
continue;
|
||||
};
|
||||
|
||||
if last_key == batch.primary_key() {
|
||||
current_series.push(batch);
|
||||
fetch_start = Instant::now();
|
||||
continue;
|
||||
}
|
||||
|
||||
// We find a new series, send the current one.
|
||||
let to_send =
|
||||
std::mem::replace(&mut current_series, PrimaryKeySeriesBatch::single(batch));
|
||||
let yield_start = Instant::now();
|
||||
self.senders
|
||||
.send_batch(SeriesBatch::PrimaryKey(to_send))
|
||||
.await?;
|
||||
metrics.yield_cost += yield_start.elapsed();
|
||||
fetch_start = Instant::now();
|
||||
}
|
||||
|
||||
if !current_series.is_empty() {
|
||||
let yield_start = Instant::now();
|
||||
self.senders
|
||||
.send_batch(SeriesBatch::PrimaryKey(current_series))
|
||||
.await?;
|
||||
metrics.yield_cost += yield_start.elapsed();
|
||||
}
|
||||
|
||||
metrics.scan_cost += fetch_start.elapsed();
|
||||
metrics.num_series_send_timeout = self.senders.num_timeout;
|
||||
metrics.num_series_send_full = self.senders.num_full;
|
||||
part_metrics.set_distributor_metrics(&metrics);
|
||||
|
||||
part_metrics.on_finish();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Batches of the same series in primary key format.
|
||||
#[derive(Default, Debug)]
|
||||
pub struct PrimaryKeySeriesBatch {
|
||||
pub batches: SmallVec<[Batch; 4]>,
|
||||
}
|
||||
|
||||
impl PrimaryKeySeriesBatch {
|
||||
/// Creates a new [PrimaryKeySeriesBatch] from a single [Batch].
|
||||
fn single(batch: Batch) -> Self {
|
||||
Self {
|
||||
batches: smallvec![batch],
|
||||
}
|
||||
}
|
||||
|
||||
fn current_key(&self) -> Option<&[u8]> {
|
||||
self.batches.first().map(|batch| batch.primary_key())
|
||||
}
|
||||
|
||||
fn push(&mut self, batch: Batch) {
|
||||
self.batches.push(batch);
|
||||
}
|
||||
|
||||
/// Returns true if there is no batch.
|
||||
fn is_empty(&self) -> bool {
|
||||
self.batches.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
/// Batches of the same series.
|
||||
#[derive(Debug)]
|
||||
pub enum SeriesBatch {
|
||||
PrimaryKey(PrimaryKeySeriesBatch),
|
||||
Flat(FlatSeriesBatch),
|
||||
}
|
||||
|
||||
@@ -711,7 +577,6 @@ impl SeriesBatch {
|
||||
/// Returns the number of batches.
|
||||
pub fn num_batches(&self) -> usize {
|
||||
match self {
|
||||
SeriesBatch::PrimaryKey(primary_key_batch) => primary_key_batch.batches.len(),
|
||||
SeriesBatch::Flat(flat_batch) => flat_batch.batches.len(),
|
||||
}
|
||||
}
|
||||
@@ -719,9 +584,6 @@ impl SeriesBatch {
|
||||
/// Returns the total number of rows across all batches.
|
||||
pub fn num_rows(&self) -> usize {
|
||||
match self {
|
||||
SeriesBatch::PrimaryKey(primary_key_batch) => {
|
||||
primary_key_batch.batches.iter().map(|x| x.num_rows()).sum()
|
||||
}
|
||||
SeriesBatch::Flat(flat_batch) => flat_batch.batches.iter().map(|x| x.num_rows()).sum(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,14 +27,12 @@ use snafu::ResultExt;
|
||||
|
||||
use crate::cache::CacheStrategy;
|
||||
use crate::error::Result;
|
||||
use crate::read::Batch;
|
||||
use crate::read::projection::ProjectionMapper;
|
||||
use crate::read::scan_util::PartitionMetrics;
|
||||
use crate::read::series_scan::SeriesBatch;
|
||||
|
||||
/// All kinds of [`Batch`]es to produce in scanner.
|
||||
pub enum ScanBatch {
|
||||
Normal(Batch),
|
||||
Series(SeriesBatch),
|
||||
RecordBatch(DfRecordBatch),
|
||||
}
|
||||
@@ -45,6 +43,7 @@ pub type ScanBatchStream = BoxStream<'static, Result<ScanBatch>>;
|
||||
pub(crate) struct ConvertBatchStream {
|
||||
inner: ScanBatchStream,
|
||||
projection_mapper: Arc<ProjectionMapper>,
|
||||
#[allow(dead_code)]
|
||||
cache_strategy: CacheStrategy,
|
||||
partition_metrics: PartitionMetrics,
|
||||
pending: VecDeque<RecordBatch>,
|
||||
@@ -68,41 +67,19 @@ impl ConvertBatchStream {
|
||||
|
||||
fn convert(&mut self, batch: ScanBatch) -> common_recordbatch::error::Result<RecordBatch> {
|
||||
match batch {
|
||||
ScanBatch::Normal(batch) => {
|
||||
// Safety: Only primary key format returns this batch.
|
||||
let mapper = self.projection_mapper.as_primary_key().unwrap();
|
||||
|
||||
if batch.is_empty() {
|
||||
Ok(mapper.empty_record_batch())
|
||||
} else {
|
||||
mapper.convert(&batch, &self.cache_strategy)
|
||||
}
|
||||
}
|
||||
ScanBatch::Series(series) => {
|
||||
debug_assert!(
|
||||
self.pending.is_empty(),
|
||||
"ConvertBatchStream should not convert a new SeriesBatch when pending batches exist"
|
||||
);
|
||||
|
||||
match series {
|
||||
SeriesBatch::PrimaryKey(primary_key_batch) => {
|
||||
// Safety: Only primary key format returns this batch.
|
||||
let mapper = self.projection_mapper.as_primary_key().unwrap();
|
||||
let SeriesBatch::Flat(flat_batch) = series;
|
||||
// Safety: Only flat format returns this batch.
|
||||
let mapper = self.projection_mapper.as_flat().unwrap();
|
||||
|
||||
for batch in primary_key_batch.batches {
|
||||
self.pending
|
||||
.push_back(mapper.convert(&batch, &self.cache_strategy)?);
|
||||
}
|
||||
}
|
||||
SeriesBatch::Flat(flat_batch) => {
|
||||
// Safety: Only flat format returns this batch.
|
||||
let mapper = self.projection_mapper.as_flat().unwrap();
|
||||
|
||||
for batch in flat_batch.batches {
|
||||
self.pending
|
||||
.push_back(mapper.convert(&batch, &self.cache_strategy)?);
|
||||
}
|
||||
}
|
||||
for batch in flat_batch.batches {
|
||||
self.pending
|
||||
.push_back(mapper.convert(&batch, &self.cache_strategy)?);
|
||||
}
|
||||
|
||||
let output_schema = self.projection_mapper.output_schema();
|
||||
|
||||
@@ -37,11 +37,10 @@ use crate::error::{PartitionOutOfRangeSnafu, Result};
|
||||
use crate::read::pruner::{PartitionPruner, Pruner};
|
||||
use crate::read::scan_region::{ScanInput, StreamContext};
|
||||
use crate::read::scan_util::{
|
||||
PartitionMetrics, PartitionMetricsList, scan_file_ranges, scan_flat_file_ranges,
|
||||
scan_flat_mem_ranges, scan_mem_ranges,
|
||||
PartitionMetrics, PartitionMetricsList, scan_flat_file_ranges, scan_flat_mem_ranges,
|
||||
};
|
||||
use crate::read::stream::{ConvertBatchStream, ScanBatch, ScanBatchStream};
|
||||
use crate::read::{Batch, ScannerMetrics, scan_util};
|
||||
use crate::read::{ScannerMetrics, scan_util};
|
||||
|
||||
/// Scans a region without providing any output ordering guarantee.
|
||||
///
|
||||
@@ -103,59 +102,6 @@ impl UnorderedScan {
|
||||
Ok(stream)
|
||||
}
|
||||
|
||||
/// Scans a [PartitionRange] by its `identifier` and returns a stream.
|
||||
#[tracing::instrument(
|
||||
skip_all,
|
||||
fields(
|
||||
region_id = %stream_ctx.input.region_metadata().region_id,
|
||||
part_range_id = part_range_id
|
||||
)
|
||||
)]
|
||||
fn scan_partition_range(
|
||||
stream_ctx: Arc<StreamContext>,
|
||||
part_range_id: usize,
|
||||
part_metrics: PartitionMetrics,
|
||||
partition_pruner: Arc<PartitionPruner>,
|
||||
) -> impl Stream<Item = Result<Batch>> {
|
||||
try_stream! {
|
||||
// Gets range meta.
|
||||
let range_meta = &stream_ctx.ranges[part_range_id];
|
||||
for index in &range_meta.row_group_indices {
|
||||
if stream_ctx.is_mem_range_index(*index) {
|
||||
let stream = scan_mem_ranges(
|
||||
stream_ctx.clone(),
|
||||
part_metrics.clone(),
|
||||
*index,
|
||||
range_meta.time_range,
|
||||
);
|
||||
for await batch in stream {
|
||||
yield batch?;
|
||||
}
|
||||
} else if stream_ctx.is_file_range_index(*index) {
|
||||
let stream = scan_file_ranges(
|
||||
stream_ctx.clone(),
|
||||
part_metrics.clone(),
|
||||
*index,
|
||||
"unordered_scan_files",
|
||||
partition_pruner.clone(),
|
||||
).await?;
|
||||
for await batch in stream {
|
||||
yield batch?;
|
||||
}
|
||||
} else {
|
||||
let stream = scan_util::maybe_scan_other_ranges(
|
||||
&stream_ctx,
|
||||
*index,
|
||||
&part_metrics,
|
||||
).await?;
|
||||
for await batch in stream {
|
||||
yield batch?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Scans a [PartitionRange] by its `identifier` and returns a flat stream of RecordBatch.
|
||||
#[tracing::instrument(
|
||||
skip_all,
|
||||
@@ -216,7 +162,7 @@ impl UnorderedScan {
|
||||
let streams = (0..self.properties.partitions.len())
|
||||
.map(|partition| {
|
||||
let metrics = self.partition_metrics(false, partition, &metrics_set);
|
||||
self.scan_batch_in_partition(partition, metrics)
|
||||
self.scan_flat_batch_in_partition(partition, metrics)
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
@@ -265,13 +211,7 @@ impl UnorderedScan {
|
||||
let metrics = self.partition_metrics(ctx.explain_verbose, partition, metrics_set);
|
||||
let input = &self.stream_ctx.input;
|
||||
|
||||
let batch_stream = if input.flat_format {
|
||||
// Use flat scan for bulk memtables
|
||||
self.scan_flat_batch_in_partition(partition, metrics.clone())?
|
||||
} else {
|
||||
// Use regular batch scan for normal memtables
|
||||
self.scan_batch_in_partition(partition, metrics.clone())?
|
||||
};
|
||||
let batch_stream = self.scan_flat_batch_in_partition(partition, metrics.clone())?;
|
||||
|
||||
let record_batch_stream = ConvertBatchStream::new(
|
||||
batch_stream,
|
||||
@@ -286,100 +226,6 @@ impl UnorderedScan {
|
||||
)))
|
||||
}
|
||||
|
||||
#[tracing::instrument(
|
||||
skip_all,
|
||||
fields(
|
||||
region_id = %self.stream_ctx.input.mapper.metadata().region_id,
|
||||
partition = partition
|
||||
)
|
||||
)]
|
||||
fn scan_batch_in_partition(
|
||||
&self,
|
||||
partition: usize,
|
||||
part_metrics: PartitionMetrics,
|
||||
) -> Result<ScanBatchStream> {
|
||||
ensure!(
|
||||
partition < self.properties.partitions.len(),
|
||||
PartitionOutOfRangeSnafu {
|
||||
given: partition,
|
||||
all: self.properties.partitions.len(),
|
||||
}
|
||||
);
|
||||
|
||||
let stream_ctx = self.stream_ctx.clone();
|
||||
let part_ranges = self.properties.partitions[partition].clone();
|
||||
let distinguish_range = self.properties.distinguish_partition_range;
|
||||
let pruner = self.pruner.clone();
|
||||
// Initializes ref counts for the pruner.
|
||||
// If we call scan_batch_in_partition() multiple times but don't read all batches from the stream,
|
||||
// then the ref count won't be decremented.
|
||||
// This is a rare case and keeping all remaining entries still uses less memory than a per partition cache.
|
||||
pruner.add_partition_ranges(&part_ranges);
|
||||
let partition_pruner = Arc::new(PartitionPruner::new(pruner, &part_ranges));
|
||||
|
||||
let stream = try_stream! {
|
||||
part_metrics.on_first_poll();
|
||||
|
||||
// Scans each part.
|
||||
for part_range in part_ranges {
|
||||
let mut metrics = ScannerMetrics::default();
|
||||
let mut fetch_start = Instant::now();
|
||||
let _mapper = &stream_ctx.input.mapper;
|
||||
#[cfg(debug_assertions)]
|
||||
let mut checker = crate::read::BatchChecker::default()
|
||||
.with_start(Some(part_range.start))
|
||||
.with_end(Some(part_range.end));
|
||||
|
||||
let stream = Self::scan_partition_range(
|
||||
stream_ctx.clone(),
|
||||
part_range.identifier,
|
||||
part_metrics.clone(),
|
||||
partition_pruner.clone(),
|
||||
);
|
||||
for await batch in stream {
|
||||
let batch = batch?;
|
||||
metrics.scan_cost += fetch_start.elapsed();
|
||||
metrics.num_batches += 1;
|
||||
metrics.num_rows += batch.num_rows();
|
||||
|
||||
debug_assert!(!batch.is_empty());
|
||||
if batch.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
checker.ensure_part_range_batch(
|
||||
"UnorderedScan",
|
||||
_mapper.metadata().region_id,
|
||||
partition,
|
||||
part_range,
|
||||
&batch,
|
||||
);
|
||||
|
||||
let yield_start = Instant::now();
|
||||
yield ScanBatch::Normal(batch);
|
||||
metrics.yield_cost += yield_start.elapsed();
|
||||
|
||||
fetch_start = Instant::now();
|
||||
}
|
||||
|
||||
// Yields an empty part to indicate this range is terminated.
|
||||
// The query engine can use this to optimize some queries.
|
||||
if distinguish_range {
|
||||
let yield_start = Instant::now();
|
||||
yield ScanBatch::Normal(Batch::empty());
|
||||
metrics.yield_cost += yield_start.elapsed();
|
||||
}
|
||||
|
||||
metrics.scan_cost += fetch_start.elapsed();
|
||||
part_metrics.merge_metrics(&metrics);
|
||||
}
|
||||
|
||||
part_metrics.on_finish();
|
||||
};
|
||||
Ok(Box::pin(stream))
|
||||
}
|
||||
|
||||
#[tracing::instrument(
|
||||
skip_all,
|
||||
fields(
|
||||
|
||||
@@ -269,7 +269,7 @@ impl RegionOpener {
|
||||
// Sets the sst_format based on options or flat_format flag
|
||||
let sst_format = if let Some(format) = options.sst_format {
|
||||
format
|
||||
} else if config.default_experimental_flat_format {
|
||||
} else if config.default_flat_format {
|
||||
options.sst_format = Some(FormatType::Flat);
|
||||
FormatType::Flat
|
||||
} else {
|
||||
@@ -309,7 +309,7 @@ impl RegionOpener {
|
||||
|
||||
debug!(
|
||||
"Create region {} with options: {:?}, default_flat_format: {}",
|
||||
region_id, options, config.default_experimental_flat_format
|
||||
region_id, options, config.default_flat_format
|
||||
);
|
||||
|
||||
let version = VersionBuilder::new(metadata, mutable)
|
||||
@@ -626,8 +626,10 @@ pub(crate) fn sanitize_region_options(manifest: &RegionManifest, options: &mut R
|
||||
manifest.sst_format,
|
||||
manifest.metadata.region_id,
|
||||
);
|
||||
options.sst_format = Some(manifest.sst_format);
|
||||
}
|
||||
// Always set sst_format from manifest to ensure it's explicitly stored,
|
||||
// even when the default matches the manifest value.
|
||||
options.sst_format = Some(manifest.sst_format);
|
||||
if let Some(manifest_append_mode) = manifest.append_mode
|
||||
&& options.append_mode != manifest_append_mode
|
||||
{
|
||||
|
||||
@@ -895,7 +895,6 @@ mod tests {
|
||||
handle.clone(),
|
||||
object_store.clone(),
|
||||
)
|
||||
.flat_format(true)
|
||||
.predicate(Some(Predicate::new(preds)))
|
||||
.inverted_index_appliers([inverted_index_applier.clone(), None])
|
||||
.bloom_filter_index_appliers([bloom_filter_applier.clone(), None])
|
||||
@@ -960,7 +959,6 @@ mod tests {
|
||||
handle.clone(),
|
||||
object_store.clone(),
|
||||
)
|
||||
.flat_format(true)
|
||||
.predicate(Some(Predicate::new(preds)))
|
||||
.inverted_index_appliers([inverted_index_applier.clone(), None])
|
||||
.bloom_filter_index_appliers([bloom_filter_applier.clone(), None])
|
||||
@@ -1015,7 +1013,6 @@ mod tests {
|
||||
handle.clone(),
|
||||
object_store.clone(),
|
||||
)
|
||||
.flat_format(true)
|
||||
.predicate(Some(Predicate::new(preds)))
|
||||
.inverted_index_appliers([inverted_index_applier.clone(), None])
|
||||
.bloom_filter_index_appliers([bloom_filter_applier.clone(), None])
|
||||
@@ -1549,7 +1546,6 @@ mod tests {
|
||||
handle.clone(),
|
||||
object_store.clone(),
|
||||
)
|
||||
.flat_format(true)
|
||||
.predicate(Some(Predicate::new(preds)))
|
||||
.inverted_index_appliers([inverted_index_applier.clone(), None])
|
||||
.cache(CacheStrategy::EnableAll(cache.clone()));
|
||||
@@ -1652,7 +1648,6 @@ mod tests {
|
||||
handle.clone(),
|
||||
object_store.clone(),
|
||||
)
|
||||
.flat_format(true)
|
||||
.predicate(Some(Predicate::new(preds)))
|
||||
.bloom_filter_index_appliers([None, bloom_filter_applier.clone()])
|
||||
.cache(CacheStrategy::EnableAll(cache.clone()));
|
||||
@@ -1712,7 +1707,6 @@ mod tests {
|
||||
|
||||
let builder =
|
||||
ParquetReaderBuilder::new(FILE_DIR.to_string(), PathType::Bare, handle, object_store)
|
||||
.flat_format(true)
|
||||
.predicate(Some(Predicate::new(vec![col("tag_0").eq(lit("a"))])));
|
||||
|
||||
let mut metrics = ReaderMetrics::default();
|
||||
@@ -1774,7 +1768,6 @@ mod tests {
|
||||
|
||||
let builder =
|
||||
ParquetReaderBuilder::new(FILE_DIR.to_string(), PathType::Bare, handle, object_store)
|
||||
.flat_format(true)
|
||||
.predicate(Some(Predicate::new(vec![col("tag_0").eq(lit("a"))])));
|
||||
|
||||
let mut metrics = ReaderMetrics::default();
|
||||
@@ -1884,7 +1877,6 @@ mod tests {
|
||||
handle.clone(),
|
||||
object_store.clone(),
|
||||
)
|
||||
.flat_format(true)
|
||||
.predicate(Some(Predicate::new(preds)))
|
||||
.inverted_index_appliers([inverted_index_applier.clone(), None])
|
||||
.cache(CacheStrategy::EnableAll(cache.clone()));
|
||||
@@ -1991,7 +1983,6 @@ mod tests {
|
||||
handle.clone(),
|
||||
object_store.clone(),
|
||||
)
|
||||
.flat_format(true)
|
||||
.predicate(Some(Predicate::new(preds)))
|
||||
.bloom_filter_index_appliers([None, bloom_filter_applier.clone()])
|
||||
.cache(CacheStrategy::EnableAll(cache.clone()));
|
||||
@@ -2255,7 +2246,6 @@ mod tests {
|
||||
handle.clone(),
|
||||
object_store.clone(),
|
||||
)
|
||||
.flat_format(true)
|
||||
.predicate(Some(Predicate::new(preds)))
|
||||
.fulltext_index_appliers([None, fulltext_applier.clone()])
|
||||
.cache(CacheStrategy::EnableAll(cache.clone()));
|
||||
@@ -2304,7 +2294,6 @@ mod tests {
|
||||
handle.clone(),
|
||||
object_store.clone(),
|
||||
)
|
||||
.flat_format(true)
|
||||
.predicate(Some(Predicate::new(preds)))
|
||||
.fulltext_index_appliers([None, fulltext_applier.clone()])
|
||||
.cache(CacheStrategy::EnableAll(cache.clone()));
|
||||
|
||||
@@ -175,6 +175,7 @@ impl FileRange {
|
||||
}
|
||||
|
||||
/// Returns a reader to read the [FileRange].
|
||||
#[allow(dead_code)]
|
||||
pub(crate) async fn reader(
|
||||
&self,
|
||||
selector: Option<TimeSeriesRowSelector>,
|
||||
|
||||
@@ -141,8 +141,6 @@ pub struct ParquetReaderBuilder {
|
||||
/// This is usually the latest metadata of the region. The reader use
|
||||
/// it get the correct column id of a column by name.
|
||||
expected_metadata: Option<RegionMetadataRef>,
|
||||
/// Whether to use flat format for reading.
|
||||
flat_format: bool,
|
||||
/// Whether this reader is for compaction.
|
||||
compaction: bool,
|
||||
/// Mode to pre-filter columns.
|
||||
@@ -176,7 +174,6 @@ impl ParquetReaderBuilder {
|
||||
#[cfg(feature = "vector_index")]
|
||||
vector_index_k: None,
|
||||
expected_metadata: None,
|
||||
flat_format: false,
|
||||
compaction: false,
|
||||
pre_filter_mode: PreFilterMode::All,
|
||||
decode_primary_key_values: false,
|
||||
@@ -257,13 +254,6 @@ impl ParquetReaderBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the flat format flag.
|
||||
#[must_use]
|
||||
pub fn flat_format(mut self, flat_format: bool) -> Self {
|
||||
self.flat_format = flat_format;
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the compaction flag.
|
||||
#[must_use]
|
||||
pub fn compaction(mut self, compaction: bool) -> Self {
|
||||
@@ -304,8 +294,7 @@ impl ParquetReaderBuilder {
|
||||
pub async fn build(&self) -> Result<Option<ParquetReader>> {
|
||||
let mut metrics = ReaderMetrics::default();
|
||||
|
||||
let Some((context, selection)) = self.build_reader_input_inner(&mut metrics, true).await?
|
||||
else {
|
||||
let Some((context, selection)) = self.build_reader_input_inner(&mut metrics).await? else {
|
||||
return Ok(None);
|
||||
};
|
||||
ParquetReader::new(Arc::new(context), selection)
|
||||
@@ -327,14 +316,12 @@ impl ParquetReaderBuilder {
|
||||
&self,
|
||||
metrics: &mut ReaderMetrics,
|
||||
) -> Result<Option<(FileRangeContext, RowGroupSelection)>> {
|
||||
self.build_reader_input_inner(metrics, self.flat_format)
|
||||
.await
|
||||
self.build_reader_input_inner(metrics).await
|
||||
}
|
||||
|
||||
async fn build_reader_input_inner(
|
||||
&self,
|
||||
metrics: &mut ReaderMetrics,
|
||||
flat_format: bool,
|
||||
) -> Result<Option<(FileRangeContext, RowGroupSelection)>> {
|
||||
let start = Instant::now();
|
||||
|
||||
@@ -376,7 +363,6 @@ impl ParquetReaderBuilder {
|
||||
// before compat handling.
|
||||
let compaction_projection_mapper = if self.compaction
|
||||
&& !is_same_region_partition
|
||||
&& flat_format
|
||||
&& region_meta.primary_key_encoding == PrimaryKeyEncoding::Sparse
|
||||
{
|
||||
Some(CompactionProjectionMapper::try_new(®ion_meta)?)
|
||||
@@ -388,7 +374,7 @@ impl ParquetReaderBuilder {
|
||||
ReadFormat::new(
|
||||
region_meta.clone(),
|
||||
Some(column_ids),
|
||||
flat_format,
|
||||
true, // Always reads as flat format.
|
||||
Some(parquet_meta.file_metadata().schema_descr().num_columns()),
|
||||
&file_path,
|
||||
skip_auto_convert,
|
||||
@@ -404,7 +390,7 @@ impl ParquetReaderBuilder {
|
||||
ReadFormat::new(
|
||||
region_meta.clone(),
|
||||
Some(&column_ids),
|
||||
flat_format,
|
||||
true, // Always reads as flat format.
|
||||
Some(parquet_meta.file_metadata().schema_descr().num_columns()),
|
||||
&file_path,
|
||||
skip_auto_convert,
|
||||
@@ -2060,6 +2046,7 @@ impl RowGroupReaderContext for FileRangeContextRef {
|
||||
/// [RowGroupReader] that reads from [FileRange].
|
||||
pub(crate) type RowGroupReader = RowGroupReaderBase<FileRangeContextRef>;
|
||||
|
||||
#[allow(dead_code)]
|
||||
impl RowGroupReader {
|
||||
/// Creates a new reader from file range.
|
||||
pub(crate) fn new(
|
||||
@@ -2084,6 +2071,7 @@ pub(crate) struct RowGroupReaderBase<T> {
|
||||
override_sequence: Option<ArrayRef>,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
impl<T> RowGroupReaderBase<T>
|
||||
where
|
||||
T: RowGroupReaderContext,
|
||||
|
||||
@@ -216,15 +216,6 @@ impl<S: LogStore> RegionWorkerLoop<S> {
|
||||
// If the format is unchanged, we also consider the option is altered.
|
||||
if new_format != current_options.sst_format.unwrap_or_default() {
|
||||
all_options_altered = false;
|
||||
|
||||
// Validates the format type.
|
||||
ensure!(
|
||||
new_format == FormatType::Flat,
|
||||
store_api::metadata::InvalidRegionRequestSnafu {
|
||||
region_id: region.region_id,
|
||||
err: "Only allow changing format type to flat",
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
SetRegionOption::AppendMode(new_append_mode) => {
|
||||
@@ -274,8 +265,6 @@ fn new_region_options_on_empty_memtable(
|
||||
SetRegionOption::Format(format_str) => {
|
||||
// Safety: handle_alter_region_options_fast() has validated this.
|
||||
let new_format = format_str.parse::<FormatType>().unwrap();
|
||||
assert_eq!(FormatType::Flat, new_format);
|
||||
|
||||
current_options.sst_format = Some(new_format);
|
||||
}
|
||||
SetRegionOption::AppendMode(new_append_mode) => {
|
||||
|
||||
@@ -21,7 +21,6 @@ derive_builder = { workspace = true, optional = true }
|
||||
futures.workspace = true
|
||||
humantime-serde.workspace = true
|
||||
lazy_static.workspace = true
|
||||
moka = { workspace = true, features = ["future"] }
|
||||
opendal = { version = "0.54", features = [
|
||||
"layers-tracing",
|
||||
"layers-prometheus",
|
||||
|
||||
@@ -13,7 +13,6 @@ async-trait.workspace = true
|
||||
bytemuck.workspace = true
|
||||
common-error.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-recordbatch.workspace = true
|
||||
common-telemetry.workspace = true
|
||||
datafusion.workspace = true
|
||||
datafusion-common.workspace = true
|
||||
@@ -27,6 +26,7 @@ prost.workspace = true
|
||||
snafu.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
common-recordbatch.workspace = true
|
||||
criterion.workspace = true
|
||||
datafusion-common.workspace = true
|
||||
datafusion-expr.workspace = true
|
||||
|
||||
@@ -94,6 +94,8 @@ impl WindowedSortPhysicalRule {
|
||||
&& scanner_info
|
||||
.time_index
|
||||
.contains(input_schema.field(column_expr.index()).name())
|
||||
&& sort_exec.fetch().is_none()
|
||||
// skip if there is a limit, as dyn filter along is good enough in this case
|
||||
{
|
||||
} else {
|
||||
return Ok(Transformed::no(plan));
|
||||
|
||||
@@ -237,14 +237,10 @@ impl ExecutionPlan for PartSortExec {
|
||||
} else {
|
||||
internal_err!("No children found")?
|
||||
};
|
||||
// create a new dynamic filter when with_new_children, as the old filter is bound to the old input and cannot be reused
|
||||
let new = Self::try_new(
|
||||
self.expression.clone(),
|
||||
self.limit,
|
||||
self.partition_ranges.clone(),
|
||||
new_input.clone(),
|
||||
)?;
|
||||
Ok(Arc::new(new))
|
||||
let mut new_exec = self.as_ref().clone();
|
||||
new_exec.input = new_input.clone();
|
||||
new_exec.properties = new_input.properties().clone();
|
||||
Ok(Arc::new(new_exec))
|
||||
}
|
||||
|
||||
fn execute(
|
||||
|
||||
@@ -91,7 +91,7 @@ otel-arrow-rust.workspace = true
|
||||
parking_lot.workspace = true
|
||||
partition.workspace = true
|
||||
pg_interval = { version = "0.5.2", package = "pg_interval_2" }
|
||||
pgwire = { version = "0.38.2", default-features = false, features = [
|
||||
pgwire = { version = "0.38.3", default-features = false, features = [
|
||||
"server-api-ring",
|
||||
"pg-ext-types",
|
||||
] }
|
||||
@@ -178,3 +178,7 @@ harness = false
|
||||
[[bench]]
|
||||
name = "loki_labels"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "flush_batch_physical"
|
||||
harness = false
|
||||
|
||||
289
src/servers/benches/flush_batch_physical.rs
Normal file
289
src/servers/benches/flush_batch_physical.rs
Normal file
@@ -0,0 +1,289 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::region::RegionResponse;
|
||||
use api::v1::meta::Peer;
|
||||
use api::v1::region::RegionRequest;
|
||||
use arrow::array::{Float64Array, StringArray, TimestampMillisecondArray};
|
||||
use arrow::datatypes::{DataType as ArrowDataType, Field, Schema as ArrowSchema, TimeUnit};
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use async_trait::async_trait;
|
||||
use catalog::error::Result as CatalogResult;
|
||||
use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema as DtColumnSchema, Schema as DtSchema};
|
||||
use partition::error::Result as PartitionResult;
|
||||
use partition::partition::{PartitionRule, PartitionRuleRef, RegionMask};
|
||||
use servers::error::{self, Result};
|
||||
use servers::pending_rows_batcher::{
|
||||
PhysicalFlushCatalogProvider, PhysicalFlushNodeRequester, PhysicalFlushPartitionProvider,
|
||||
PhysicalTableMetadata, TableBatch, flush_batch_physical,
|
||||
};
|
||||
use store_api::storage::RegionId;
|
||||
use table::test_util::table_info::test_table_info;
|
||||
use tokio::runtime::Runtime;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mock implementations (memory-backed, no I/O)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
struct BenchCatalogProvider {
|
||||
table: PhysicalTableMetadata,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PhysicalFlushCatalogProvider for BenchCatalogProvider {
|
||||
async fn physical_table(
|
||||
&self,
|
||||
_catalog: &str,
|
||||
_schema: &str,
|
||||
_table_name: &str,
|
||||
_query_ctx: &session::context::QueryContext,
|
||||
) -> CatalogResult<Option<PhysicalTableMetadata>> {
|
||||
Ok(Some(self.table.clone()))
|
||||
}
|
||||
}
|
||||
|
||||
struct BenchPartitionProvider;
|
||||
|
||||
struct SingleRegionPartitionRule;
|
||||
|
||||
impl PartitionRule for SingleRegionPartitionRule {
|
||||
fn as_any(&self) -> &dyn std::any::Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn partition_columns(&self) -> &[String] {
|
||||
&[]
|
||||
}
|
||||
|
||||
fn find_region(
|
||||
&self,
|
||||
_values: &[datatypes::prelude::Value],
|
||||
) -> PartitionResult<store_api::storage::RegionNumber> {
|
||||
Ok(1)
|
||||
}
|
||||
|
||||
fn split_record_batch(
|
||||
&self,
|
||||
record_batch: &RecordBatch,
|
||||
) -> PartitionResult<HashMap<store_api::storage::RegionNumber, RegionMask>> {
|
||||
let n = record_batch.num_rows();
|
||||
Ok(HashMap::from([(
|
||||
1,
|
||||
RegionMask::new(arrow::array::BooleanArray::from(vec![true; n]), n),
|
||||
)]))
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PhysicalFlushPartitionProvider for BenchPartitionProvider {
|
||||
async fn find_table_partition_rule(
|
||||
&self,
|
||||
_table_info: &table::metadata::TableInfo,
|
||||
) -> PartitionResult<PartitionRuleRef> {
|
||||
Ok(Arc::new(SingleRegionPartitionRule))
|
||||
}
|
||||
|
||||
async fn find_region_leader(&self, _region_id: RegionId) -> Result<Peer> {
|
||||
Ok(Peer {
|
||||
id: 1,
|
||||
addr: "bench-node".to_string(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
struct BenchNodeRequester;
|
||||
|
||||
#[async_trait]
|
||||
impl PhysicalFlushNodeRequester for BenchNodeRequester {
|
||||
async fn handle(&self, _peer: &Peer, _request: RegionRequest) -> error::Result<RegionResponse> {
|
||||
Ok(RegionResponse::new(0))
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn make_physical_table_metadata(num_tags: usize) -> PhysicalTableMetadata {
|
||||
let mut columns = vec![
|
||||
DtColumnSchema::new("__primary_key", ConcreteDataType::binary_datatype(), false),
|
||||
DtColumnSchema::new(
|
||||
"greptime_timestamp",
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
false,
|
||||
),
|
||||
DtColumnSchema::new("greptime_value", ConcreteDataType::float64_datatype(), true),
|
||||
];
|
||||
|
||||
let mut name_to_ids = HashMap::new();
|
||||
let mut column_ids = vec![0u32, 1, 2];
|
||||
|
||||
for i in 0..num_tags {
|
||||
let tag_name = format!("tag{}", i);
|
||||
let col_id = (i + 3) as u32;
|
||||
columns.push(DtColumnSchema::new(
|
||||
&tag_name,
|
||||
ConcreteDataType::string_datatype(),
|
||||
true,
|
||||
));
|
||||
name_to_ids.insert(tag_name, col_id);
|
||||
column_ids.push(col_id);
|
||||
}
|
||||
|
||||
let schema = Arc::new(DtSchema::try_new(columns).unwrap());
|
||||
let mut table_info = test_table_info(1, "phy", "public", "greptime", schema);
|
||||
table_info.meta.column_ids = column_ids;
|
||||
|
||||
PhysicalTableMetadata {
|
||||
table_info: Arc::new(table_info),
|
||||
col_name_to_ids: Some(name_to_ids),
|
||||
}
|
||||
}
|
||||
|
||||
fn make_tag_batch(tag_names: &[&str], num_rows: usize) -> RecordBatch {
|
||||
let mut fields = vec![
|
||||
Field::new(
|
||||
"greptime_timestamp",
|
||||
ArrowDataType::Timestamp(TimeUnit::Millisecond, None),
|
||||
false,
|
||||
),
|
||||
Field::new("greptime_value", ArrowDataType::Float64, true),
|
||||
];
|
||||
for tag in tag_names {
|
||||
fields.push(Field::new(*tag, ArrowDataType::Utf8, true));
|
||||
}
|
||||
|
||||
let schema = Arc::new(ArrowSchema::new(fields));
|
||||
|
||||
let ts: Vec<i64> = (0..num_rows as i64).collect();
|
||||
let vals: Vec<f64> = (0..num_rows).map(|i| i as f64).collect();
|
||||
|
||||
let mut arrays: Vec<Arc<dyn arrow::array::Array>> = vec![
|
||||
Arc::new(TimestampMillisecondArray::from(ts)),
|
||||
Arc::new(Float64Array::from(vals)),
|
||||
];
|
||||
|
||||
for (tag_idx, _tag) in tag_names.iter().enumerate() {
|
||||
let values: Vec<String> = (0..num_rows)
|
||||
.map(|i| format!("val-{}-{}", tag_idx, i))
|
||||
.collect();
|
||||
arrays.push(Arc::new(StringArray::from(values)));
|
||||
}
|
||||
|
||||
RecordBatch::try_new(schema, arrays).unwrap()
|
||||
}
|
||||
|
||||
fn make_table_batches(
|
||||
num_logical_tables: usize,
|
||||
rows_per_table: usize,
|
||||
tag_names: &[&str],
|
||||
) -> Vec<TableBatch> {
|
||||
(0..num_logical_tables)
|
||||
.map(|i| {
|
||||
let batch = make_tag_batch(tag_names, rows_per_table);
|
||||
let row_count = batch.num_rows();
|
||||
TableBatch {
|
||||
table_name: format!("logical_{}", i),
|
||||
table_id: (100 + i) as u32,
|
||||
batches: vec![batch],
|
||||
row_count,
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Benchmarks
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn bench_flush_batch_physical(c: &mut Criterion) {
|
||||
let rt = Runtime::new().unwrap();
|
||||
let ctx = session::context::QueryContext::arc();
|
||||
|
||||
let num_tags = 5;
|
||||
let tag_names: Vec<String> = (0..num_tags).map(|i| format!("tag{}", i)).collect();
|
||||
let tag_refs: Vec<&str> = tag_names.iter().map(|s| s.as_str()).collect();
|
||||
|
||||
let catalog_provider = BenchCatalogProvider {
|
||||
table: make_physical_table_metadata(num_tags),
|
||||
};
|
||||
let partition_provider = BenchPartitionProvider;
|
||||
let node_requester = BenchNodeRequester;
|
||||
|
||||
let mut group = c.benchmark_group("flush_batch_physical");
|
||||
|
||||
// Vary the number of logical tables
|
||||
for num_tables in [1, 10, 50, 100] {
|
||||
let rows_per_table = 100;
|
||||
let table_batches = make_table_batches(num_tables, rows_per_table, &tag_refs);
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("tables", num_tables),
|
||||
&table_batches,
|
||||
|b, batches| {
|
||||
b.iter(|| {
|
||||
rt.block_on(async {
|
||||
flush_batch_physical(
|
||||
batches,
|
||||
"phy",
|
||||
&ctx,
|
||||
&partition_provider,
|
||||
&node_requester,
|
||||
&catalog_provider,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
});
|
||||
});
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
// Vary the number of rows per table
|
||||
for rows_per_table in [10, 100, 1000, 5000] {
|
||||
let num_tables = 10;
|
||||
let table_batches = make_table_batches(num_tables, rows_per_table, &tag_refs);
|
||||
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("rows_per_table", rows_per_table),
|
||||
&table_batches,
|
||||
|b, batches| {
|
||||
b.iter(|| {
|
||||
rt.block_on(async {
|
||||
flush_batch_physical(
|
||||
batches,
|
||||
"phy",
|
||||
&ctx,
|
||||
&partition_provider,
|
||||
&node_requester,
|
||||
&catalog_provider,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
});
|
||||
});
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_flush_batch_physical);
|
||||
criterion_main!(benches);
|
||||
@@ -15,6 +15,7 @@
|
||||
use std::any::Any;
|
||||
use std::net::SocketAddr;
|
||||
use std::string::FromUtf8Error;
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::http::StatusCode as HttpStatusCode;
|
||||
use axum::response::{IntoResponse, Response};
|
||||
@@ -51,6 +52,8 @@ pub enum Error {
|
||||
Arrow {
|
||||
#[snafu(source)]
|
||||
error: arrow_schema::ArrowError,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Internal error: {}", err_msg))]
|
||||
@@ -685,6 +688,23 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(transparent)]
|
||||
Partition {
|
||||
source: partition::error::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(transparent)]
|
||||
MetricEngine {
|
||||
source: metric_engine::error::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to submit batch: {}", source))]
|
||||
SubmitBatch { source: Arc<Error> },
|
||||
}
|
||||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
@@ -818,6 +838,9 @@ impl ErrorExt for Error {
|
||||
MemoryLimitExceeded { .. } => StatusCode::RateLimited,
|
||||
|
||||
GreptimeProto { source, .. } => source.status_code(),
|
||||
Partition { source, .. } => source.status_code(),
|
||||
MetricEngine { source, .. } => source.status_code(),
|
||||
SubmitBatch { source, .. } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1312,7 +1312,7 @@ mod test {
|
||||
use std::io::Cursor;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_ipc::reader::FileReader;
|
||||
use arrow_ipc::reader::StreamReader;
|
||||
use arrow_schema::DataType;
|
||||
use axum::handler::Handler;
|
||||
use axum::http::StatusCode;
|
||||
@@ -1684,8 +1684,8 @@ mod test {
|
||||
|
||||
HttpResponse::Arrow(resp) => {
|
||||
let output = resp.data;
|
||||
let mut reader =
|
||||
FileReader::try_new(Cursor::new(output), None).expect("Arrow reader error");
|
||||
let mut reader = StreamReader::try_new(Cursor::new(output), None)
|
||||
.expect("Arrow reader error");
|
||||
let schema = reader.schema();
|
||||
assert_eq!(schema.fields[0].name(), "numbers");
|
||||
assert_eq!(schema.fields[0].data_type(), &DataType::UInt32);
|
||||
|
||||
@@ -17,7 +17,7 @@ use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::Schema;
|
||||
use arrow_ipc::CompressionType;
|
||||
use arrow_ipc::writer::{FileWriter, IpcWriteOptions};
|
||||
use arrow_ipc::writer::{IpcWriteOptions, StreamWriter};
|
||||
use axum::http::{HeaderValue, header};
|
||||
use axum::response::{IntoResponse, Response};
|
||||
use common_error::status_code::StatusCode;
|
||||
@@ -48,7 +48,7 @@ async fn write_arrow_bytes(
|
||||
let options = IpcWriteOptions::default()
|
||||
.try_with_compression(compression)
|
||||
.context(error::ArrowSnafu)?;
|
||||
let mut writer = FileWriter::try_new_with_options(&mut bytes, schema, options)
|
||||
let mut writer = StreamWriter::try_new_with_options(&mut bytes, schema, options)
|
||||
.context(error::ArrowSnafu)?;
|
||||
|
||||
while let Some(rb) = recordbatches.next().await {
|
||||
@@ -164,7 +164,7 @@ impl IntoResponse for ArrowResponse {
|
||||
mod test {
|
||||
use std::io::Cursor;
|
||||
|
||||
use arrow_ipc::reader::FileReader;
|
||||
use arrow_ipc::reader::StreamReader;
|
||||
use arrow_schema::DataType;
|
||||
use common_recordbatch::{RecordBatch, RecordBatches};
|
||||
use datatypes::prelude::*;
|
||||
@@ -200,8 +200,8 @@ mod test {
|
||||
match http_resp {
|
||||
HttpResponse::Arrow(resp) => {
|
||||
let output = resp.data;
|
||||
let mut reader =
|
||||
FileReader::try_new(Cursor::new(output), None).expect("Arrow reader error");
|
||||
let mut reader = StreamReader::try_new(Cursor::new(output), None)
|
||||
.expect("Arrow reader error");
|
||||
let schema = reader.schema();
|
||||
assert_eq!(schema.fields[0].name(), "numbers");
|
||||
assert_eq!(schema.fields[0].data_type(), &DataType::UInt32);
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -529,7 +529,7 @@ impl ExtendedQueryHandler for PostgresServerHandlerInner {
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
if let Some(schema) = &sql_plan.schema {
|
||||
schema_to_pg(schema, &Format::UnifiedBinary, None)
|
||||
schema_to_pg(schema, &Format::UnifiedText, None)
|
||||
.map(|fields| DescribeStatementResponse::new(param_types, fields))
|
||||
.map_err(convert_err)
|
||||
} else {
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user