mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-04 20:32:56 +00:00
Compare commits
3 Commits
v0.16.0-ni
...
flow/faste
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
67a60646b4 | ||
|
|
1c3bde7e4e | ||
|
|
e045a0dbdf |
5
.gitignore
vendored
5
.gitignore
vendored
@@ -60,7 +60,4 @@ tests-fuzz/corpus/
|
||||
greptimedb_data
|
||||
|
||||
# github
|
||||
!/.github
|
||||
|
||||
# Claude code
|
||||
CLAUDE.md
|
||||
!/.github
|
||||
@@ -10,10 +10,12 @@
|
||||
* [NiwakaDev](https://github.com/NiwakaDev)
|
||||
* [tisonkun](https://github.com/tisonkun)
|
||||
|
||||
|
||||
## Team Members (in alphabetical order)
|
||||
|
||||
* [apdong2022](https://github.com/apdong2022)
|
||||
* [beryl678](https://github.com/beryl678)
|
||||
* [Breeze-P](https://github.com/Breeze-P)
|
||||
* [daviderli614](https://github.com/daviderli614)
|
||||
* [discord9](https://github.com/discord9)
|
||||
* [evenyag](https://github.com/evenyag)
|
||||
|
||||
110
Cargo.lock
generated
110
Cargo.lock
generated
@@ -2302,27 +2302,6 @@ dependencies = [
|
||||
"tonic 0.12.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "common-event-recorder"
|
||||
version = "0.16.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
"backon",
|
||||
"client",
|
||||
"common-error",
|
||||
"common-macro",
|
||||
"common-meta",
|
||||
"common-telemetry",
|
||||
"common-time",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"snafu 0.8.5",
|
||||
"store-api",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "common-frontend"
|
||||
version = "0.16.0"
|
||||
@@ -2347,8 +2326,6 @@ dependencies = [
|
||||
"api",
|
||||
"approx 0.5.1",
|
||||
"arc-swap",
|
||||
"arrow 54.2.1",
|
||||
"arrow-schema 54.3.1",
|
||||
"async-trait",
|
||||
"bincode",
|
||||
"catalog",
|
||||
@@ -2367,10 +2344,8 @@ dependencies = [
|
||||
"datafusion-common",
|
||||
"datafusion-expr",
|
||||
"datafusion-functions-aggregate-common",
|
||||
"datafusion-physical-expr",
|
||||
"datatypes",
|
||||
"derive_more",
|
||||
"futures",
|
||||
"geo",
|
||||
"geo-types",
|
||||
"geohash",
|
||||
@@ -2383,7 +2358,6 @@ dependencies = [
|
||||
"num-traits",
|
||||
"once_cell",
|
||||
"paste",
|
||||
"pretty_assertions",
|
||||
"s2",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -2445,7 +2419,6 @@ dependencies = [
|
||||
"tokio-util",
|
||||
"tonic 0.12.3",
|
||||
"tower 0.5.2",
|
||||
"vec1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2558,7 +2531,6 @@ dependencies = [
|
||||
"tokio",
|
||||
"tokio-postgres",
|
||||
"tonic 0.12.3",
|
||||
"tracing",
|
||||
"typetag",
|
||||
"uuid",
|
||||
]
|
||||
@@ -3023,9 +2995,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "crc"
|
||||
version = "3.3.0"
|
||||
version = "3.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9710d3b3739c2e349eb44fe848ad0b7c8cb1e42bd87ee49371df2f7acaf3e675"
|
||||
checksum = "69e6e4d7b33a94f0991c26729976b10ebde1d34c3ee82408fb536164fa10d636"
|
||||
dependencies = [
|
||||
"crc-catalog",
|
||||
]
|
||||
@@ -3066,7 +3038,6 @@ dependencies = [
|
||||
"ciborium",
|
||||
"clap 3.2.25",
|
||||
"criterion-plot",
|
||||
"futures",
|
||||
"itertools 0.10.5",
|
||||
"lazy_static",
|
||||
"num-traits",
|
||||
@@ -3078,7 +3049,6 @@ dependencies = [
|
||||
"serde_derive",
|
||||
"serde_json",
|
||||
"tinytemplate",
|
||||
"tokio",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
@@ -3836,7 +3806,6 @@ dependencies = [
|
||||
"tokio",
|
||||
"toml 0.8.19",
|
||||
"tonic 0.12.3",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3859,7 +3828,7 @@ dependencies = [
|
||||
"jsonb",
|
||||
"num",
|
||||
"num-traits",
|
||||
"ordered-float 4.3.0",
|
||||
"ordered-float 3.9.2",
|
||||
"paste",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -4180,16 +4149,12 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "domain"
|
||||
version = "0.11.0"
|
||||
version = "0.10.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a11dd7f04a6a6d2aea0153c6e31f5ea7af8b2efdf52cdaeea7a9a592c7fefef9"
|
||||
checksum = "4c84070523f8ba0f9127ff156920f27eb27b302b425efe60bf5f41ec244d1c60"
|
||||
dependencies = [
|
||||
"bumpalo",
|
||||
"bytes",
|
||||
"domain-macros",
|
||||
"futures-util",
|
||||
"hashbrown 0.14.5",
|
||||
"log",
|
||||
"moka",
|
||||
"octseq",
|
||||
"rand 0.8.5",
|
||||
@@ -4200,17 +4165,6 @@ dependencies = [
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "domain-macros"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0e197fdfd2cdb5fdeb7f8ddcf3aed5d5d04ecde2890d448b14ffb716f7376b70"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.100",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dotenv"
|
||||
version = "0.15.0"
|
||||
@@ -4696,7 +4650,6 @@ dependencies = [
|
||||
"get-size2",
|
||||
"greptime-proto",
|
||||
"http 1.1.0",
|
||||
"humantime-serde",
|
||||
"itertools 0.14.0",
|
||||
"lazy_static",
|
||||
"meta-client",
|
||||
@@ -4841,7 +4794,6 @@ dependencies = [
|
||||
"toml 0.8.19",
|
||||
"tonic 0.12.3",
|
||||
"tower 0.5.2",
|
||||
"tracing",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
@@ -5225,7 +5177,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "greptime-proto"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=7fcaa3e413947a7a28d9af95812af26c1939ce78#7fcaa3e413947a7a28d9af95812af26c1939ce78"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=ceb1af4fa9309ce65bda0367db7b384df2bb4d4f#ceb1af4fa9309ce65bda0367db7b384df2bb4d4f"
|
||||
dependencies = [
|
||||
"prost 0.13.5",
|
||||
"serde",
|
||||
@@ -6776,7 +6728,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"windows-targets 0.48.5",
|
||||
"windows-targets 0.52.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -7223,9 +7175,6 @@ version = "0.16.0"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
"axum 0.8.1",
|
||||
"axum-extra",
|
||||
"axum-macros",
|
||||
"bytes",
|
||||
"chrono",
|
||||
"clap 4.5.19",
|
||||
@@ -7259,7 +7208,6 @@ dependencies = [
|
||||
"http-body-util",
|
||||
"humantime",
|
||||
"humantime-serde",
|
||||
"hyper 0.14.30",
|
||||
"hyper-util",
|
||||
"itertools 0.14.0",
|
||||
"lazy_static",
|
||||
@@ -7287,7 +7235,6 @@ dependencies = [
|
||||
"toml 0.8.19",
|
||||
"tonic 0.12.3",
|
||||
"tower 0.5.2",
|
||||
"tower-http 0.6.2",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
"typetag",
|
||||
@@ -7350,7 +7297,6 @@ dependencies = [
|
||||
"snafu 0.8.5",
|
||||
"store-api",
|
||||
"tokio",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -7464,7 +7410,6 @@ dependencies = [
|
||||
"datafusion-expr",
|
||||
"datatypes",
|
||||
"dotenv",
|
||||
"either",
|
||||
"futures",
|
||||
"humantime-serde",
|
||||
"index",
|
||||
@@ -7482,7 +7427,6 @@ dependencies = [
|
||||
"prost 0.13.5",
|
||||
"puffin",
|
||||
"rand 0.9.0",
|
||||
"rayon",
|
||||
"regex",
|
||||
"rskafka",
|
||||
"rstest",
|
||||
@@ -7501,7 +7445,6 @@ dependencies = [
|
||||
"tokio-stream",
|
||||
"tokio-util",
|
||||
"toml 0.8.19",
|
||||
"tracing",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
@@ -8581,7 +8524,6 @@ dependencies = [
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"tonic 0.12.3",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -8618,6 +8560,17 @@ dependencies = [
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ordered-float"
|
||||
version = "3.9.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f1e1c390732d15f1d48471625cd92d154e66db2c56645e29a9cd26f4699f72dc"
|
||||
dependencies = [
|
||||
"num-traits",
|
||||
"rand 0.8.5",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ordered-float"
|
||||
version = "4.3.0"
|
||||
@@ -8625,8 +8578,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "44d501f1a72f71d3c063a6bbc8f7271fa73aa09fe5d6283b6571e2ed176a2537"
|
||||
dependencies = [
|
||||
"num-traits",
|
||||
"rand 0.8.5",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -9163,7 +9114,6 @@ dependencies = [
|
||||
"moka",
|
||||
"once_cell",
|
||||
"operator",
|
||||
"ordered-float 4.3.0",
|
||||
"paste",
|
||||
"prometheus",
|
||||
"query",
|
||||
@@ -9677,7 +9627,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf"
|
||||
dependencies = [
|
||||
"heck 0.5.0",
|
||||
"itertools 0.11.0",
|
||||
"itertools 0.14.0",
|
||||
"log",
|
||||
"multimap",
|
||||
"once_cell",
|
||||
@@ -9723,7 +9673,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"itertools 0.11.0",
|
||||
"itertools 0.14.0",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.100",
|
||||
@@ -9975,7 +9925,6 @@ dependencies = [
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tracing",
|
||||
"unescaper",
|
||||
"uuid",
|
||||
]
|
||||
@@ -11409,10 +11358,8 @@ dependencies = [
|
||||
"tonic-reflection",
|
||||
"tower 0.5.2",
|
||||
"tower-http 0.6.2",
|
||||
"tracing",
|
||||
"urlencoding",
|
||||
"uuid",
|
||||
"vrl",
|
||||
"zstd 0.13.2",
|
||||
]
|
||||
|
||||
@@ -12527,7 +12474,6 @@ dependencies = [
|
||||
"humantime",
|
||||
"humantime-serde",
|
||||
"lazy_static",
|
||||
"once_cell",
|
||||
"parquet",
|
||||
"paste",
|
||||
"serde",
|
||||
@@ -13076,9 +13022,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
|
||||
|
||||
[[package]]
|
||||
name = "tokio"
|
||||
version = "1.45.1"
|
||||
version = "1.44.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "75ef51a33ef1da925cea3e4eb122833cb377c61439ca401b770f54902b806779"
|
||||
checksum = "e6b88822cbe49de4185e3a4cbf8321dd487cf5fe0c5c65695fef6346371e9c48"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
"bytes",
|
||||
@@ -14004,12 +13950,6 @@ version = "0.2.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
|
||||
|
||||
[[package]]
|
||||
name = "vec1"
|
||||
version = "1.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eab68b56840f69efb0fefbe3ab6661499217ffdc58e2eef7c3f6f69835386322"
|
||||
|
||||
[[package]]
|
||||
name = "vergen"
|
||||
version = "8.3.2"
|
||||
@@ -14040,9 +13980,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "vrl"
|
||||
version = "0.25.0"
|
||||
version = "0.24.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4f49394b948406ea1564aa00152e011d87a38ad35d277ebddda257a9ee39c419"
|
||||
checksum = "f9ceadaa40aef567a26079ff014ca7a567ba85344f1b81090b5ec7d7bb16a219"
|
||||
dependencies = [
|
||||
"aes",
|
||||
"aes-siv",
|
||||
@@ -14362,7 +14302,7 @@ version = "0.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
|
||||
dependencies = [
|
||||
"windows-sys 0.48.0",
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
@@ -13,7 +13,6 @@ members = [
|
||||
"src/common/datasource",
|
||||
"src/common/decimal",
|
||||
"src/common/error",
|
||||
"src/common/event-recorder",
|
||||
"src/common/frontend",
|
||||
"src/common/function",
|
||||
"src/common/greptimedb-telemetry",
|
||||
@@ -140,7 +139,7 @@ etcd-client = "0.14"
|
||||
fst = "0.4.7"
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "7fcaa3e413947a7a28d9af95812af26c1939ce78" }
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "ceb1af4fa9309ce65bda0367db7b384df2bb4d4f" }
|
||||
hex = "0.4"
|
||||
http = "1"
|
||||
humantime = "2.1"
|
||||
@@ -168,7 +167,6 @@ opentelemetry-proto = { version = "0.27", features = [
|
||||
"with-serde",
|
||||
"logs",
|
||||
] }
|
||||
ordered-float = { version = "4.3", features = ["serde"] }
|
||||
parking_lot = "0.12"
|
||||
parquet = { version = "54.2", default-features = false, features = ["arrow", "async", "object_store"] }
|
||||
paste = "1.0"
|
||||
@@ -225,13 +223,10 @@ tokio-util = { version = "0.7", features = ["io-util", "compat"] }
|
||||
toml = "0.8.8"
|
||||
tonic = { version = "0.12", features = ["tls", "gzip", "zstd"] }
|
||||
tower = "0.5"
|
||||
tower-http = "0.6"
|
||||
tracing = "0.1"
|
||||
tracing-appender = "0.2"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter", "json", "fmt"] }
|
||||
typetag = "0.2"
|
||||
uuid = { version = "1.7", features = ["serde", "v4", "fast-rng"] }
|
||||
vrl = "0.25"
|
||||
zstd = "0.13"
|
||||
# DO_NOT_REMOVE_THIS: END_OF_EXTERNAL_DEPENDENCIES
|
||||
|
||||
@@ -249,7 +244,6 @@ common-config = { path = "src/common/config" }
|
||||
common-datasource = { path = "src/common/datasource" }
|
||||
common-decimal = { path = "src/common/decimal" }
|
||||
common-error = { path = "src/common/error" }
|
||||
common-event-recorder = { path = "src/common/event-recorder" }
|
||||
common-frontend = { path = "src/common/frontend" }
|
||||
common-function = { path = "src/common/function" }
|
||||
common-greptimedb-telemetry = { path = "src/common/greptimedb-telemetry" }
|
||||
|
||||
@@ -562,16 +562,6 @@
|
||||
| `node_id` | Integer | Unset | The flownode identifier and should be unique in the cluster. |
|
||||
| `flow` | -- | -- | flow engine options. |
|
||||
| `flow.num_workers` | Integer | `0` | The number of flow worker in flownode.<br/>Not setting(or set to 0) this value will use the number of CPU cores divided by 2. |
|
||||
| `flow.batching_mode` | -- | -- | -- |
|
||||
| `flow.batching_mode.query_timeout` | String | `600s` | The default batching engine query timeout is 10 minutes. |
|
||||
| `flow.batching_mode.slow_query_threshold` | String | `60s` | will output a warn log for any query that runs for more that this threshold |
|
||||
| `flow.batching_mode.experimental_min_refresh_duration` | String | `5s` | The minimum duration between two queries execution by batching mode task |
|
||||
| `flow.batching_mode.grpc_conn_timeout` | String | `5s` | The gRPC connection timeout |
|
||||
| `flow.batching_mode.experimental_grpc_max_retries` | Integer | `3` | The gRPC max retry number |
|
||||
| `flow.batching_mode.experimental_frontend_scan_timeout` | String | `30s` | Flow wait for available frontend timeout,<br/>if failed to find available frontend after frontend_scan_timeout elapsed, return error<br/>which prevent flownode from starting |
|
||||
| `flow.batching_mode.experimental_frontend_activity_timeout` | String | `60s` | Frontend activity timeout<br/>if frontend is down(not sending heartbeat) for more than frontend_activity_timeout,<br/>it will be removed from the list that flownode use to connect |
|
||||
| `flow.batching_mode.experimental_max_filter_num_per_query` | Integer | `20` | Maximum number of filters allowed in a single query |
|
||||
| `flow.batching_mode.experimental_time_window_merge_threshold` | Integer | `3` | Time window merge distance |
|
||||
| `grpc` | -- | -- | The gRPC server options. |
|
||||
| `grpc.bind_addr` | String | `127.0.0.1:6800` | The address to bind the gRPC server. |
|
||||
| `grpc.server_addr` | String | `127.0.0.1:6800` | The address advertised to the metasrv,<br/>and used for connections from outside the host |
|
||||
|
||||
@@ -7,29 +7,6 @@ node_id = 14
|
||||
## The number of flow worker in flownode.
|
||||
## Not setting(or set to 0) this value will use the number of CPU cores divided by 2.
|
||||
#+num_workers=0
|
||||
[flow.batching_mode]
|
||||
## The default batching engine query timeout is 10 minutes.
|
||||
#+query_timeout="600s"
|
||||
## will output a warn log for any query that runs for more that this threshold
|
||||
#+slow_query_threshold="60s"
|
||||
## The minimum duration between two queries execution by batching mode task
|
||||
#+experimental_min_refresh_duration="5s"
|
||||
## The gRPC connection timeout
|
||||
#+grpc_conn_timeout="5s"
|
||||
## The gRPC max retry number
|
||||
#+experimental_grpc_max_retries=3
|
||||
## Flow wait for available frontend timeout,
|
||||
## if failed to find available frontend after frontend_scan_timeout elapsed, return error
|
||||
## which prevent flownode from starting
|
||||
#+experimental_frontend_scan_timeout="30s"
|
||||
## Frontend activity timeout
|
||||
## if frontend is down(not sending heartbeat) for more than frontend_activity_timeout,
|
||||
## it will be removed from the list that flownode use to connect
|
||||
#+experimental_frontend_activity_timeout="60s"
|
||||
## Maximum number of filters allowed in a single query
|
||||
#+experimental_max_filter_num_per_query=20
|
||||
## Time window merge distance
|
||||
#+experimental_time_window_merge_threshold=3
|
||||
|
||||
## The gRPC server options.
|
||||
[grpc]
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
Currently, our query engine is based on DataFusion, so all aggregate function is executed by DataFusion, through its UDAF interface. You can find DataFusion's UDAF example [here](https://github.com/apache/datafusion/tree/main/datafusion-examples/examples/simple_udaf.rs). Basically, we provide the same way as DataFusion to write aggregate functions: both are centered in a struct called "Accumulator" to accumulates states along the way in aggregation.
|
||||
Currently, our query engine is based on DataFusion, so all aggregate function is executed by DataFusion, through its UDAF interface. You can find DataFusion's UDAF example [here](https://github.com/apache/arrow-datafusion/blob/arrow2/datafusion-examples/examples/simple_udaf.rs). Basically, we provide the same way as DataFusion to write aggregate functions: both are centered in a struct called "Accumulator" to accumulates states along the way in aggregation.
|
||||
|
||||
However, DataFusion's UDAF implementation has a huge restriction, that it requires user to provide a concrete "Accumulator". Take `Median` aggregate function for example, to aggregate a `u32` datatype column, you have to write a `MedianU32`, and use `SELECT MEDIANU32(x)` in SQL. `MedianU32` cannot be used to aggregate a `i32` datatype column. Or, there's another way: you can use a special type that can hold all kinds of data (like our `Value` enum or Arrow's `ScalarValue`), and `match` all the way up to do aggregate calculations. It might work, though rather tedious. (But I think it's DataFusion's preferred way to write UDAF.)
|
||||
|
||||
|
||||
@@ -76,7 +76,7 @@ pub trait CompactionStrategy {
|
||||
```
|
||||
|
||||
The most suitable compaction strategy for time-series scenario would be
|
||||
a hybrid strategy that combines time window compaction with size-tired compaction, just like [Cassandra](https://cassandra.apache.org/doc/latest/cassandra/managing/operating/compaction/twcs.html) and [ScyllaDB](https://docs.scylladb.com/stable/architecture/compaction/compaction-strategies.html#time-window-compaction-strategy-twcs) does.
|
||||
a hybrid strategy that combines time window compaction with size-tired compaction, just like [Cassandra](https://cassandra.apache.org/doc/latest/cassandra/operating/compaction/twcs.html) and [ScyllaDB](https://docs.scylladb.com/stable/architecture/compaction/compaction-strategies.html#time-window-compaction-strategy-twcs) does.
|
||||
|
||||
We can first group SSTs in level n into buckets according to some predefined time window. Within that window,
|
||||
SSTs are compacted in a size-tired manner (find SSTs with similar size and compact them to level n+1).
|
||||
|
||||
@@ -28,7 +28,7 @@ In order to do those things while maintaining a low memory footprint, you need t
|
||||
- Greptime Flow's is built on top of [Hydroflow](https://github.com/hydro-project/hydroflow).
|
||||
- We have three choices for the Dataflow/Streaming process framework for our simple continuous aggregation feature:
|
||||
1. Based on the timely/differential dataflow crate that [materialize](https://github.com/MaterializeInc/materialize) based on. Later, it's proved too obscure for a simple usage, and is hard to customize memory usage control.
|
||||
2. Based on a simple dataflow framework that we write from ground up, like what [arroyo](https://www.arroyo.dev/) or [risingwave](https://www.risingwave.dev/) did, for example the core streaming logic of [arroyo](https://github.com/ArroyoSystems/arroyo/blob/master/crates/arroyo-datastream/src/lib.rs) only takes up to 2000 line of codes. However, it means maintaining another layer of dataflow framework, which might seem easy in the beginning, but I fear it might be too burdensome to maintain once we need more features.
|
||||
2. Based on a simple dataflow framework that we write from ground up, like what [arroyo](https://www.arroyo.dev/) or [risingwave](https://www.risingwave.dev/) did, for example the core streaming logic of [arroyo](https://github.com/ArroyoSystems/arroyo/blob/master/arroyo-datastream/src/lib.rs) only takes up to 2000 line of codes. However, it means maintaining another layer of dataflow framework, which might seem easy in the beginning, but I fear it might be too burdensome to maintain once we need more features.
|
||||
3. Based on a simple and lower level dataflow framework that someone else write, like [hydroflow](https://github.com/hydro-project/hydroflow), this approach combines the best of both worlds. Firstly, it boasts ease of comprehension and customization. Secondly, the dataflow framework offers precisely the necessary features for crafting uncomplicated single-node dataflow programs while delivering decent performance.
|
||||
|
||||
Hence, we choose the third option, and use a simple logical plan that's anagonistic to the underlying dataflow framework, as it only describe how the dataflow graph should be doing, not how it do that. And we built operator in hydroflow to execute the plan. And the result hydroflow graph is wrapped in a engine that only support data in/out and tick event to flush and compute the result. This provide a thin middle layer that's easy to maintain and allow switching to other dataflow framework if necessary.
|
||||
|
||||
@@ -83,7 +83,7 @@ If you use the [Helm Chart](https://github.com/GreptimeTeam/helm-charts) to depl
|
||||
- `monitoring.enabled=true`: Deploys a standalone GreptimeDB instance dedicated to monitoring the cluster;
|
||||
- `grafana.enabled=true`: Deploys Grafana and automatically imports the monitoring dashboard;
|
||||
|
||||
The standalone GreptimeDB instance will collect metrics from your cluster, and the dashboard will be available in the Grafana UI. For detailed deployment instructions, please refer to our [Kubernetes deployment guide](https://docs.greptime.com/user-guide/deployments-administration/deploy-on-kubernetes/overview).
|
||||
The standalone GreptimeDB instance will collect metrics from your cluster, and the dashboard will be available in the Grafana UI. For detailed deployment instructions, please refer to our [Kubernetes deployment guide](https://docs.greptime.com/user-guide/deployments-administration-administration/deploy-on-kubernetes/getting-started).
|
||||
|
||||
### Self-host Prometheus and import dashboards manually
|
||||
|
||||
|
||||
@@ -5954,9 +5954,9 @@
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{instance=~\"$datanode\", operation=~\"read|Reader::read\"}[$__rate_interval]))",
|
||||
"expr": "sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~\"$datanode\", operation=\"read\"}[$__rate_interval]))",
|
||||
"instant": false,
|
||||
"legendFormat": "[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]",
|
||||
"legendFormat": "[{{instance}}]-[{{pod}}]-[{{scheme}}]",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
@@ -6055,9 +6055,9 @@
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{instance=~\"$datanode\",operation=~\"read|Reader::read\"}[$__rate_interval])))",
|
||||
"expr": "histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~\"$datanode\",operation=\"read\"}[$__rate_interval])))",
|
||||
"instant": false,
|
||||
"legendFormat": "[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]",
|
||||
"legendFormat": "[{{instance}}]-[{{pod}}]-{{scheme}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
@@ -6156,9 +6156,9 @@
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{instance=~\"$datanode\", operation=~\"write|Writer::write|Writer::close\"}[$__rate_interval]))",
|
||||
"expr": "sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~\"$datanode\", operation=\"write\"}[$__rate_interval]))",
|
||||
"instant": false,
|
||||
"legendFormat": "[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]",
|
||||
"legendFormat": "[{{instance}}]-[{{pod}}]-{{scheme}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
@@ -6257,9 +6257,9 @@
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{instance=~\"$datanode\", operation =~ \"Writer::write|Writer::close|write\"}[$__rate_interval])))",
|
||||
"expr": "histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~\"$datanode\", operation=\"write\"}[$__rate_interval])))",
|
||||
"instant": false,
|
||||
"legendFormat": "[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]",
|
||||
"legendFormat": "[{{instance}}]-[{{pod}}]-[{{scheme}}]",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
@@ -6663,7 +6663,7 @@
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{instance=~\"$datanode\", operation!~\"read|write|list|Writer::write|Writer::close|Reader::read\"}[$__rate_interval])))",
|
||||
"expr": "histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{instance=~\"$datanode\", operation!~\"read|write|list\"}[$__rate_interval])))",
|
||||
"instant": false,
|
||||
"legendFormat": "[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]",
|
||||
"range": true,
|
||||
|
||||
@@ -76,14 +76,14 @@
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| QPS per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
|
||||
| Read QPS per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation=~"read\|Reader::read"}[$__rate_interval]))` | `timeseries` | Read QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
|
||||
| Read P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode",operation=~"read\|Reader::read"}[$__rate_interval])))` | `timeseries` | Read P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
|
||||
| Write QPS per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation=~"write\|Writer::write\|Writer::close"}[$__rate_interval]))` | `timeseries` | Write QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
|
||||
| Write P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation =~ "Writer::write\|Writer::close\|write"}[$__rate_interval])))` | `timeseries` | Write P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
|
||||
| Read QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="read"}[$__rate_interval]))` | `timeseries` | Read QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
|
||||
| Read P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode",operation="read"}[$__rate_interval])))` | `timeseries` | Read P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-{{scheme}}` |
|
||||
| Write QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="write"}[$__rate_interval]))` | `timeseries` | Write QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-{{scheme}}` |
|
||||
| Write P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation="write"}[$__rate_interval])))` | `timeseries` | Write P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
|
||||
| List QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="list"}[$__rate_interval]))` | `timeseries` | List QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
|
||||
| List P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation="list"}[$__rate_interval])))` | `timeseries` | List P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
|
||||
| Other Requests per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode",operation!~"read\|write\|list\|stat"}[$__rate_interval]))` | `timeseries` | Other Requests per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
|
||||
| Other Request P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation!~"read\|write\|list\|Writer::write\|Writer::close\|Reader::read"}[$__rate_interval])))` | `timeseries` | Other Request P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
|
||||
| Other Request P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation!~"read\|write\|list"}[$__rate_interval])))` | `timeseries` | Other Request P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
|
||||
| Opendal traffic | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_bytes_sum{instance=~"$datanode"}[$__rate_interval]))` | `timeseries` | Total traffic as in bytes by instance and operation | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
|
||||
| OpenDAL errors per Instance | `sum by(instance, pod, scheme, operation, error) (rate(opendal_operation_errors_total{instance=~"$datanode", error!="NotFound"}[$__rate_interval]))` | `timeseries` | OpenDAL error counts per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]-[{{error}}]` |
|
||||
# Metasrv
|
||||
|
||||
@@ -659,41 +659,41 @@ groups:
|
||||
description: Read QPS per Instance.
|
||||
unit: ops
|
||||
queries:
|
||||
- expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation=~"read|Reader::read"}[$__rate_interval]))
|
||||
- expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="read"}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
|
||||
- title: Read P99 per Instance
|
||||
type: timeseries
|
||||
description: Read P99 per Instance.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode",operation=~"read|Reader::read"}[$__rate_interval])))
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode",operation="read"}[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-{{scheme}}'
|
||||
- title: Write QPS per Instance
|
||||
type: timeseries
|
||||
description: Write QPS per Instance.
|
||||
unit: ops
|
||||
queries:
|
||||
- expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation=~"write|Writer::write|Writer::close"}[$__rate_interval]))
|
||||
- expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{instance=~"$datanode", operation="write"}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-{{scheme}}'
|
||||
- title: Write P99 per Instance
|
||||
type: timeseries
|
||||
description: Write P99 per Instance.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation =~ "Writer::write|Writer::close|write"}[$__rate_interval])))
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation="write"}[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
|
||||
- title: List QPS per Instance
|
||||
type: timeseries
|
||||
description: List QPS per Instance.
|
||||
@@ -729,7 +729,7 @@ groups:
|
||||
description: Other Request P99 per Instance.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation!~"read|write|list|Writer::write|Writer::close|Reader::read"}[$__rate_interval])))
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{instance=~"$datanode", operation!~"read|write|list"}[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
|
||||
@@ -5954,9 +5954,9 @@
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{ operation=~\"read|Reader::read\"}[$__rate_interval]))",
|
||||
"expr": "sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation=\"read\"}[$__rate_interval]))",
|
||||
"instant": false,
|
||||
"legendFormat": "[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]",
|
||||
"legendFormat": "[{{instance}}]-[{{pod}}]-[{{scheme}}]",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
@@ -6055,9 +6055,9 @@
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{operation=~\"read|Reader::read\"}[$__rate_interval])))",
|
||||
"expr": "histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{operation=\"read\"}[$__rate_interval])))",
|
||||
"instant": false,
|
||||
"legendFormat": "[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]",
|
||||
"legendFormat": "[{{instance}}]-[{{pod}}]-{{scheme}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
@@ -6156,9 +6156,9 @@
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{ operation=~\"write|Writer::write|Writer::close\"}[$__rate_interval]))",
|
||||
"expr": "sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation=\"write\"}[$__rate_interval]))",
|
||||
"instant": false,
|
||||
"legendFormat": "[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]",
|
||||
"legendFormat": "[{{instance}}]-[{{pod}}]-{{scheme}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
@@ -6257,9 +6257,9 @@
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{ operation =~ \"Writer::write|Writer::close|write\"}[$__rate_interval])))",
|
||||
"expr": "histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{ operation=\"write\"}[$__rate_interval])))",
|
||||
"instant": false,
|
||||
"legendFormat": "[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]",
|
||||
"legendFormat": "[{{instance}}]-[{{pod}}]-[{{scheme}}]",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
@@ -6663,7 +6663,7 @@
|
||||
"uid": "${metrics}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{ operation!~\"read|write|list|Writer::write|Writer::close|Reader::read\"}[$__rate_interval])))",
|
||||
"expr": "histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{ operation!~\"read|write|list\"}[$__rate_interval])))",
|
||||
"instant": false,
|
||||
"legendFormat": "[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]",
|
||||
"range": true,
|
||||
|
||||
@@ -76,14 +76,14 @@
|
||||
| Title | Query | Type | Description | Datasource | Unit | Legend Format |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| QPS per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{}[$__rate_interval]))` | `timeseries` | QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
|
||||
| Read QPS per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{ operation=~"read\|Reader::read"}[$__rate_interval]))` | `timeseries` | Read QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
|
||||
| Read P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{operation=~"read\|Reader::read"}[$__rate_interval])))` | `timeseries` | Read P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
|
||||
| Write QPS per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{ operation=~"write\|Writer::write\|Writer::close"}[$__rate_interval]))` | `timeseries` | Write QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
|
||||
| Write P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{ operation =~ "Writer::write\|Writer::close\|write"}[$__rate_interval])))` | `timeseries` | Write P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
|
||||
| Read QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="read"}[$__rate_interval]))` | `timeseries` | Read QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
|
||||
| Read P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{operation="read"}[$__rate_interval])))` | `timeseries` | Read P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-{{scheme}}` |
|
||||
| Write QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="write"}[$__rate_interval]))` | `timeseries` | Write QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-{{scheme}}` |
|
||||
| Write P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{ operation="write"}[$__rate_interval])))` | `timeseries` | Write P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
|
||||
| List QPS per Instance | `sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="list"}[$__rate_interval]))` | `timeseries` | List QPS per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
|
||||
| List P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{ operation="list"}[$__rate_interval])))` | `timeseries` | List P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]` |
|
||||
| Other Requests per Instance | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{operation!~"read\|write\|list\|stat"}[$__rate_interval]))` | `timeseries` | Other Requests per Instance. | `prometheus` | `ops` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
|
||||
| Other Request P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{ operation!~"read\|write\|list\|Writer::write\|Writer::close\|Reader::read"}[$__rate_interval])))` | `timeseries` | Other Request P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
|
||||
| Other Request P99 per Instance | `histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{ operation!~"read\|write\|list"}[$__rate_interval])))` | `timeseries` | Other Request P99 per Instance. | `prometheus` | `s` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
|
||||
| Opendal traffic | `sum by(instance, pod, scheme, operation) (rate(opendal_operation_bytes_sum{}[$__rate_interval]))` | `timeseries` | Total traffic as in bytes by instance and operation | `prometheus` | `decbytes` | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]` |
|
||||
| OpenDAL errors per Instance | `sum by(instance, pod, scheme, operation, error) (rate(opendal_operation_errors_total{ error!="NotFound"}[$__rate_interval]))` | `timeseries` | OpenDAL error counts per Instance. | `prometheus` | -- | `[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]-[{{error}}]` |
|
||||
# Metasrv
|
||||
|
||||
@@ -659,41 +659,41 @@ groups:
|
||||
description: Read QPS per Instance.
|
||||
unit: ops
|
||||
queries:
|
||||
- expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{ operation=~"read|Reader::read"}[$__rate_interval]))
|
||||
- expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="read"}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
|
||||
- title: Read P99 per Instance
|
||||
type: timeseries
|
||||
description: Read P99 per Instance.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{operation=~"read|Reader::read"}[$__rate_interval])))
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{operation="read"}[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-{{scheme}}'
|
||||
- title: Write QPS per Instance
|
||||
type: timeseries
|
||||
description: Write QPS per Instance.
|
||||
unit: ops
|
||||
queries:
|
||||
- expr: sum by(instance, pod, scheme, operation) (rate(opendal_operation_duration_seconds_count{ operation=~"write|Writer::write|Writer::close"}[$__rate_interval]))
|
||||
- expr: sum by(instance, pod, scheme) (rate(opendal_operation_duration_seconds_count{ operation="write"}[$__rate_interval]))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-{{scheme}}'
|
||||
- title: Write P99 per Instance
|
||||
type: timeseries
|
||||
description: Write P99 per Instance.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{ operation =~ "Writer::write|Writer::close|write"}[$__rate_interval])))
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme) (rate(opendal_operation_duration_seconds_bucket{ operation="write"}[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]-[{{operation}}]'
|
||||
legendFormat: '[{{instance}}]-[{{pod}}]-[{{scheme}}]'
|
||||
- title: List QPS per Instance
|
||||
type: timeseries
|
||||
description: List QPS per Instance.
|
||||
@@ -729,7 +729,7 @@ groups:
|
||||
description: Other Request P99 per Instance.
|
||||
unit: s
|
||||
queries:
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{ operation!~"read|write|list|Writer::write|Writer::close|Reader::read"}[$__rate_interval])))
|
||||
- expr: histogram_quantile(0.99, sum by(instance, pod, le, scheme, operation) (rate(opendal_operation_duration_seconds_bucket{ operation!~"read|write|list"}[$__rate_interval])))
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: ${metrics}
|
||||
|
||||
@@ -13,8 +13,8 @@
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import re
|
||||
from multiprocessing import Pool
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def find_rust_files(directory):
|
||||
@@ -24,10 +24,6 @@ def find_rust_files(directory):
|
||||
if "test" in root.lower():
|
||||
continue
|
||||
|
||||
# Skip the target directory
|
||||
if "target" in Path(root).parts:
|
||||
continue
|
||||
|
||||
for file in files:
|
||||
# Skip files with "test" in the filename
|
||||
if "test" in file.lower():
|
||||
|
||||
149
scripts/install.sh
Normal file → Executable file
149
scripts/install.sh
Normal file → Executable file
@@ -53,54 +53,6 @@ get_arch_type() {
|
||||
esac
|
||||
}
|
||||
|
||||
# Verify SHA256 checksum
|
||||
verify_sha256() {
|
||||
file="$1"
|
||||
expected_sha256="$2"
|
||||
|
||||
if command -v sha256sum >/dev/null 2>&1; then
|
||||
actual_sha256=$(sha256sum "$file" | cut -d' ' -f1)
|
||||
elif command -v shasum >/dev/null 2>&1; then
|
||||
actual_sha256=$(shasum -a 256 "$file" | cut -d' ' -f1)
|
||||
else
|
||||
echo "Warning: No SHA256 verification tool found (sha256sum or shasum). Skipping checksum verification."
|
||||
return 0
|
||||
fi
|
||||
|
||||
if [ "$actual_sha256" = "$expected_sha256" ]; then
|
||||
echo "SHA256 checksum verified successfully."
|
||||
return 0
|
||||
else
|
||||
echo "Error: SHA256 checksum verification failed!"
|
||||
echo "Expected: $expected_sha256"
|
||||
echo "Actual: $actual_sha256"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Prompt for user confirmation (compatible with different shells)
|
||||
prompt_confirmation() {
|
||||
message="$1"
|
||||
printf "%s (y/N): " "$message"
|
||||
|
||||
# Try to read user input, fallback if read fails
|
||||
answer=""
|
||||
if read answer </dev/tty 2>/dev/null; then
|
||||
case "$answer" in
|
||||
[Yy]|[Yy][Ee][Ss])
|
||||
return 0
|
||||
;;
|
||||
*)
|
||||
return 1
|
||||
;;
|
||||
esac
|
||||
else
|
||||
echo ""
|
||||
echo "Cannot read user input. Defaulting to No."
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
download_artifact() {
|
||||
if [ -n "${OS_TYPE}" ] && [ -n "${ARCH_TYPE}" ]; then
|
||||
# Use the latest stable released version.
|
||||
@@ -119,104 +71,17 @@ download_artifact() {
|
||||
fi
|
||||
|
||||
echo "Downloading ${BIN}, OS: ${OS_TYPE}, Arch: ${ARCH_TYPE}, Version: ${VERSION}"
|
||||
PKG_NAME="${BIN}-${OS_TYPE}-${ARCH_TYPE}-${VERSION}"
|
||||
PACKAGE_NAME="${PKG_NAME}.tar.gz"
|
||||
SHA256_FILE="${PKG_NAME}.sha256sum"
|
||||
PACKAGE_NAME="${BIN}-${OS_TYPE}-${ARCH_TYPE}-${VERSION}.tar.gz"
|
||||
|
||||
if [ -n "${PACKAGE_NAME}" ]; then
|
||||
# Check if files already exist and prompt for override
|
||||
if [ -f "${PACKAGE_NAME}" ]; then
|
||||
echo "File ${PACKAGE_NAME} already exists."
|
||||
if prompt_confirmation "Do you want to override it?"; then
|
||||
echo "Overriding existing file..."
|
||||
rm -f "${PACKAGE_NAME}"
|
||||
else
|
||||
echo "Skipping download. Using existing file."
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -f "${BIN}" ]; then
|
||||
echo "Binary ${BIN} already exists."
|
||||
if prompt_confirmation "Do you want to override it?"; then
|
||||
echo "Will override existing binary..."
|
||||
rm -f "${BIN}"
|
||||
else
|
||||
echo "Installation cancelled."
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
|
||||
# Download package if not exists
|
||||
if [ ! -f "${PACKAGE_NAME}" ]; then
|
||||
echo "Downloading ${PACKAGE_NAME}..."
|
||||
# Use curl instead of wget for better compatibility
|
||||
if command -v curl >/dev/null 2>&1; then
|
||||
if ! curl -L -o "${PACKAGE_NAME}" "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${PACKAGE_NAME}"; then
|
||||
echo "Error: Failed to download ${PACKAGE_NAME}"
|
||||
exit 1
|
||||
fi
|
||||
elif command -v wget >/dev/null 2>&1; then
|
||||
if ! wget -O "${PACKAGE_NAME}" "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${PACKAGE_NAME}"; then
|
||||
echo "Error: Failed to download ${PACKAGE_NAME}"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "Error: Neither curl nor wget is available for downloading."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Download and verify SHA256 checksum
|
||||
echo "Downloading SHA256 checksum..."
|
||||
sha256_download_success=0
|
||||
if command -v curl >/dev/null 2>&1; then
|
||||
if curl -L -s -o "${SHA256_FILE}" "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${SHA256_FILE}" 2>/dev/null; then
|
||||
sha256_download_success=1
|
||||
fi
|
||||
elif command -v wget >/dev/null 2>&1; then
|
||||
if wget -q -O "${SHA256_FILE}" "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${SHA256_FILE}" 2>/dev/null; then
|
||||
sha256_download_success=1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $sha256_download_success -eq 1 ] && [ -f "${SHA256_FILE}" ]; then
|
||||
expected_sha256=$(cat "${SHA256_FILE}" | cut -d' ' -f1)
|
||||
if [ -n "$expected_sha256" ]; then
|
||||
if ! verify_sha256 "${PACKAGE_NAME}" "${expected_sha256}"; then
|
||||
echo "SHA256 verification failed. Removing downloaded file."
|
||||
rm -f "${PACKAGE_NAME}" "${SHA256_FILE}"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "Warning: Could not parse SHA256 checksum from file."
|
||||
fi
|
||||
rm -f "${SHA256_FILE}"
|
||||
else
|
||||
echo "Warning: Could not download SHA256 checksum file. Skipping verification."
|
||||
fi
|
||||
wget "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${PACKAGE_NAME}"
|
||||
|
||||
# Extract the binary and clean the rest.
|
||||
echo "Extracting ${PACKAGE_NAME}..."
|
||||
if ! tar xf "${PACKAGE_NAME}"; then
|
||||
echo "Error: Failed to extract ${PACKAGE_NAME}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Find the binary in the extracted directory
|
||||
extracted_dir="${PACKAGE_NAME%.tar.gz}"
|
||||
if [ -f "${extracted_dir}/${BIN}" ]; then
|
||||
mv "${extracted_dir}/${BIN}" "${PWD}/"
|
||||
rm -f "${PACKAGE_NAME}"
|
||||
rm -rf "${extracted_dir}"
|
||||
chmod +x "${BIN}"
|
||||
echo "Installation completed successfully!"
|
||||
echo "Run './${BIN} --help' to get started"
|
||||
else
|
||||
echo "Error: Binary ${BIN} not found in extracted archive"
|
||||
rm -f "${PACKAGE_NAME}"
|
||||
rm -rf "${extracted_dir}"
|
||||
exit 1
|
||||
fi
|
||||
tar xvf "${PACKAGE_NAME}" && \
|
||||
mv "${PACKAGE_NAME%.tar.gz}/${BIN}" "${PWD}" && \
|
||||
rm -r "${PACKAGE_NAME}" && \
|
||||
rm -r "${PACKAGE_NAME%.tar.gz}" && \
|
||||
echo "Run './${BIN} --help' to get started"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
@@ -162,16 +162,6 @@ impl SystemSchemaProviderInner for InformationSchemaProvider {
|
||||
}
|
||||
|
||||
fn system_table(&self, name: &str) -> Option<SystemTableRef> {
|
||||
#[cfg(feature = "enterprise")]
|
||||
if let Some(factory) = self.extra_table_factories.get(name) {
|
||||
let req = MakeInformationTableRequest {
|
||||
catalog_name: self.catalog_name.clone(),
|
||||
catalog_manager: self.catalog_manager.clone(),
|
||||
kv_backend: self.kv_backend.clone(),
|
||||
};
|
||||
return Some(factory.make_information_table(req));
|
||||
}
|
||||
|
||||
match name.to_ascii_lowercase().as_str() {
|
||||
TABLES => Some(Arc::new(InformationSchemaTables::new(
|
||||
self.catalog_name.clone(),
|
||||
@@ -250,7 +240,22 @@ impl SystemSchemaProviderInner for InformationSchemaProvider {
|
||||
.process_manager
|
||||
.as_ref()
|
||||
.map(|p| Arc::new(InformationSchemaProcessList::new(p.clone())) as _),
|
||||
_ => None,
|
||||
table_name => {
|
||||
#[cfg(feature = "enterprise")]
|
||||
return self.extra_table_factories.get(table_name).map(|factory| {
|
||||
let req = MakeInformationTableRequest {
|
||||
catalog_name: self.catalog_name.clone(),
|
||||
catalog_manager: self.catalog_manager.clone(),
|
||||
kv_backend: self.kv_backend.clone(),
|
||||
};
|
||||
factory.make_information_table(req)
|
||||
});
|
||||
#[cfg(not(feature = "enterprise"))]
|
||||
{
|
||||
let _ = table_name;
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,8 +15,7 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_catalog::consts::{METRIC_ENGINE, MITO_ENGINE};
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use datatypes::schema::{Schema, SchemaRef};
|
||||
use datatypes::vectors::{Int64Vector, StringVector, VectorRef};
|
||||
|
||||
use crate::system_schema::information_schema::table_names::*;
|
||||
@@ -368,18 +367,28 @@ pub(super) fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>
|
||||
|
||||
TRIGGERS => (
|
||||
vec![
|
||||
string_column("TRIGGER_CATALOG"),
|
||||
string_column("TRIGGER_SCHEMA"),
|
||||
string_column("TRIGGER_NAME"),
|
||||
ColumnSchema::new(
|
||||
"trigger_id",
|
||||
ConcreteDataType::uint64_datatype(),
|
||||
false,
|
||||
),
|
||||
string_column("TRIGGER_DEFINITION"),
|
||||
ColumnSchema::new(
|
||||
"flownode_id",
|
||||
ConcreteDataType::uint64_datatype(),
|
||||
true,
|
||||
),
|
||||
string_column("EVENT_MANIPULATION"),
|
||||
string_column("EVENT_OBJECT_CATALOG"),
|
||||
string_column("EVENT_OBJECT_SCHEMA"),
|
||||
string_column("EVENT_OBJECT_TABLE"),
|
||||
bigint_column("ACTION_ORDER"),
|
||||
string_column("ACTION_CONDITION"),
|
||||
string_column("ACTION_STATEMENT"),
|
||||
string_column("ACTION_ORIENTATION"),
|
||||
string_column("ACTION_TIMING"),
|
||||
string_column("ACTION_REFERENCE_OLD_TABLE"),
|
||||
string_column("ACTION_REFERENCE_NEW_TABLE"),
|
||||
string_column("ACTION_REFERENCE_OLD_ROW"),
|
||||
string_column("ACTION_REFERENCE_NEW_ROW"),
|
||||
timestamp_micro_column("CREATED"),
|
||||
string_column("SQL_MODE"),
|
||||
string_column("DEFINER"),
|
||||
string_column("CHARACTER_SET_CLIENT"),
|
||||
string_column("COLLATION_CONNECTION"),
|
||||
string_column("DATABASE_COLLATION"),
|
||||
],
|
||||
vec![],
|
||||
),
|
||||
|
||||
@@ -329,8 +329,13 @@ impl InformationSchemaPartitionsBuilder {
|
||||
self.partition_names.push(Some(&partition_name));
|
||||
self.partition_ordinal_positions
|
||||
.push(Some((index + 1) as i64));
|
||||
let expression = partition.partition_expr.as_ref().map(|e| e.to_string());
|
||||
self.partition_expressions.push(expression.as_deref());
|
||||
let expressions = if partition.partition.partition_columns().is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(partition.partition.to_string())
|
||||
};
|
||||
|
||||
self.partition_expressions.push(expressions.as_deref());
|
||||
self.create_times.push(Some(TimestampMicrosecond::from(
|
||||
table_info.meta.created_on.timestamp_millis(),
|
||||
)));
|
||||
|
||||
@@ -44,7 +44,6 @@ const DISK_SIZE: &str = "disk_size";
|
||||
const MEMTABLE_SIZE: &str = "memtable_size";
|
||||
const MANIFEST_SIZE: &str = "manifest_size";
|
||||
const SST_SIZE: &str = "sst_size";
|
||||
const SST_NUM: &str = "sst_num";
|
||||
const INDEX_SIZE: &str = "index_size";
|
||||
const ENGINE: &str = "engine";
|
||||
const REGION_ROLE: &str = "region_role";
|
||||
@@ -88,7 +87,6 @@ impl InformationSchemaRegionStatistics {
|
||||
ColumnSchema::new(MEMTABLE_SIZE, ConcreteDataType::uint64_datatype(), true),
|
||||
ColumnSchema::new(MANIFEST_SIZE, ConcreteDataType::uint64_datatype(), true),
|
||||
ColumnSchema::new(SST_SIZE, ConcreteDataType::uint64_datatype(), true),
|
||||
ColumnSchema::new(SST_NUM, ConcreteDataType::uint64_datatype(), true),
|
||||
ColumnSchema::new(INDEX_SIZE, ConcreteDataType::uint64_datatype(), true),
|
||||
ColumnSchema::new(ENGINE, ConcreteDataType::string_datatype(), true),
|
||||
ColumnSchema::new(REGION_ROLE, ConcreteDataType::string_datatype(), true),
|
||||
@@ -151,7 +149,6 @@ struct InformationSchemaRegionStatisticsBuilder {
|
||||
memtable_sizes: UInt64VectorBuilder,
|
||||
manifest_sizes: UInt64VectorBuilder,
|
||||
sst_sizes: UInt64VectorBuilder,
|
||||
sst_nums: UInt64VectorBuilder,
|
||||
index_sizes: UInt64VectorBuilder,
|
||||
engines: StringVectorBuilder,
|
||||
region_roles: StringVectorBuilder,
|
||||
@@ -170,7 +167,6 @@ impl InformationSchemaRegionStatisticsBuilder {
|
||||
memtable_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
manifest_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
sst_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
sst_nums: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
index_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
engines: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
region_roles: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
@@ -201,7 +197,6 @@ impl InformationSchemaRegionStatisticsBuilder {
|
||||
(MEMTABLE_SIZE, &Value::from(region_stat.memtable_size)),
|
||||
(MANIFEST_SIZE, &Value::from(region_stat.manifest_size)),
|
||||
(SST_SIZE, &Value::from(region_stat.sst_size)),
|
||||
(SST_NUM, &Value::from(region_stat.sst_num)),
|
||||
(INDEX_SIZE, &Value::from(region_stat.index_size)),
|
||||
(ENGINE, &Value::from(region_stat.engine.as_str())),
|
||||
(REGION_ROLE, &Value::from(region_stat.role.to_string())),
|
||||
@@ -220,7 +215,6 @@ impl InformationSchemaRegionStatisticsBuilder {
|
||||
self.memtable_sizes.push(Some(region_stat.memtable_size));
|
||||
self.manifest_sizes.push(Some(region_stat.manifest_size));
|
||||
self.sst_sizes.push(Some(region_stat.sst_size));
|
||||
self.sst_nums.push(Some(region_stat.sst_num));
|
||||
self.index_sizes.push(Some(region_stat.index_size));
|
||||
self.engines.push(Some(®ion_stat.engine));
|
||||
self.region_roles.push(Some(®ion_stat.role.to_string()));
|
||||
@@ -236,7 +230,6 @@ impl InformationSchemaRegionStatisticsBuilder {
|
||||
Arc::new(self.memtable_sizes.finish()),
|
||||
Arc::new(self.manifest_sizes.finish()),
|
||||
Arc::new(self.sst_sizes.finish()),
|
||||
Arc::new(self.sst_nums.finish()),
|
||||
Arc::new(self.index_sizes.finish()),
|
||||
Arc::new(self.engines.finish()),
|
||||
Arc::new(self.region_roles.finish()),
|
||||
|
||||
@@ -48,3 +48,4 @@ pub const FLOWS: &str = "flows";
|
||||
pub const PROCEDURE_INFO: &str = "procedure_info";
|
||||
pub const REGION_STATISTICS: &str = "region_statistics";
|
||||
pub const PROCESS_LIST: &str = "process_list";
|
||||
pub const TRIGGER_LIST: &str = "trigger_list";
|
||||
|
||||
@@ -169,7 +169,7 @@ impl DfPartitionStream for PGClass {
|
||||
}
|
||||
|
||||
/// Builds the `pg_catalog.pg_class` table row by row
|
||||
/// TODO(J0HN50N133): `relowner` is always the [`DUMMY_OWNER_ID`] because we don't have users.
|
||||
/// TODO(J0HN50N133): `relowner` is always the [`DUMMY_OWNER_ID`] cuz we don't have user.
|
||||
/// Once we have user system, make it the actual owner of the table.
|
||||
struct PGClassBuilder {
|
||||
schema: SchemaRef,
|
||||
|
||||
@@ -188,7 +188,6 @@ fn create_region_routes(regions: Vec<RegionNumber>) -> Vec<RegionRoute> {
|
||||
name: String::new(),
|
||||
partition: None,
|
||||
attrs: BTreeMap::new(),
|
||||
partition_expr: Default::default(),
|
||||
},
|
||||
leader_peer: Some(Peer {
|
||||
id: rng.random_range(0..10),
|
||||
|
||||
@@ -23,7 +23,7 @@ use api::v1::greptime_request::Request;
|
||||
use api::v1::query_request::Query;
|
||||
use api::v1::{
|
||||
AlterTableExpr, AuthHeader, Basic, CreateTableExpr, DdlRequest, GreptimeRequest,
|
||||
InsertRequests, QueryRequest, RequestHeader, RowInsertRequests,
|
||||
InsertRequests, QueryRequest, RequestHeader,
|
||||
};
|
||||
use arrow_flight::{FlightData, Ticket};
|
||||
use async_stream::stream;
|
||||
@@ -42,7 +42,7 @@ use common_telemetry::{error, warn};
|
||||
use futures::future;
|
||||
use futures_util::{Stream, StreamExt, TryStreamExt};
|
||||
use prost::Message;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use snafu::{ensure, ResultExt};
|
||||
use tonic::metadata::{AsciiMetadataKey, AsciiMetadataValue, MetadataMap, MetadataValue};
|
||||
use tonic::transport::Channel;
|
||||
|
||||
@@ -118,7 +118,6 @@ impl Database {
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the catalog for the database client.
|
||||
pub fn set_catalog(&mut self, catalog: impl Into<String>) {
|
||||
self.catalog = catalog.into();
|
||||
}
|
||||
@@ -131,7 +130,6 @@ impl Database {
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the schema for the database client.
|
||||
pub fn set_schema(&mut self, schema: impl Into<String>) {
|
||||
self.schema = schema.into();
|
||||
}
|
||||
@@ -144,24 +142,20 @@ impl Database {
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the timezone for the database client.
|
||||
pub fn set_timezone(&mut self, timezone: impl Into<String>) {
|
||||
self.timezone = timezone.into();
|
||||
}
|
||||
|
||||
/// Set the auth scheme for the database client.
|
||||
pub fn set_auth(&mut self, auth: AuthScheme) {
|
||||
self.ctx.auth_header = Some(AuthHeader {
|
||||
auth_scheme: Some(auth),
|
||||
});
|
||||
}
|
||||
|
||||
/// Make an InsertRequests request to the database.
|
||||
pub async fn insert(&self, requests: InsertRequests) -> Result<u32> {
|
||||
self.handle(Request::Inserts(requests)).await
|
||||
}
|
||||
|
||||
/// Make an InsertRequests request to the database with hints.
|
||||
pub async fn insert_with_hints(
|
||||
&self,
|
||||
requests: InsertRequests,
|
||||
@@ -178,28 +172,6 @@ impl Database {
|
||||
from_grpc_response(response)
|
||||
}
|
||||
|
||||
/// Make a RowInsertRequests request to the database.
|
||||
pub async fn row_inserts(&self, requests: RowInsertRequests) -> Result<u32> {
|
||||
self.handle(Request::RowInserts(requests)).await
|
||||
}
|
||||
|
||||
/// Make a RowInsertRequests request to the database with hints.
|
||||
pub async fn row_inserts_with_hints(
|
||||
&self,
|
||||
requests: RowInsertRequests,
|
||||
hints: &[(&str, &str)],
|
||||
) -> Result<u32> {
|
||||
let mut client = make_database_client(&self.client)?.inner;
|
||||
let request = self.to_rpc_request(Request::RowInserts(requests));
|
||||
|
||||
let mut request = tonic::Request::new(request);
|
||||
let metadata = request.metadata_mut();
|
||||
Self::put_hints(metadata, hints)?;
|
||||
|
||||
let response = client.handle(request).await?.into_inner();
|
||||
from_grpc_response(response)
|
||||
}
|
||||
|
||||
fn put_hints(metadata: &mut MetadataMap, hints: &[(&str, &str)]) -> Result<()> {
|
||||
let Some(value) = hints
|
||||
.iter()
|
||||
@@ -215,7 +187,6 @@ impl Database {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Make a request to the database.
|
||||
pub async fn handle(&self, request: Request) -> Result<u32> {
|
||||
let mut client = make_database_client(&self.client)?.inner;
|
||||
let request = self.to_rpc_request(request);
|
||||
@@ -250,18 +221,12 @@ impl Database {
|
||||
retries += 1;
|
||||
warn!("Retrying {} times with error = {:?}", retries, err);
|
||||
continue;
|
||||
} else {
|
||||
error!(
|
||||
err; "Failed to send request to grpc handle, retries = {}, not retryable error, aborting",
|
||||
retries
|
||||
);
|
||||
return Err(err.into());
|
||||
}
|
||||
}
|
||||
(Err(err), false) => {
|
||||
error!(
|
||||
err; "Failed to send request to grpc handle after {} retries",
|
||||
retries,
|
||||
"Failed to send request to grpc handle after {} retries, error = {:?}",
|
||||
retries, err
|
||||
);
|
||||
return Err(err.into());
|
||||
}
|
||||
@@ -285,7 +250,6 @@ impl Database {
|
||||
}
|
||||
}
|
||||
|
||||
/// Executes a SQL query without any hints.
|
||||
pub async fn sql<S>(&self, sql: S) -> Result<Output>
|
||||
where
|
||||
S: AsRef<str>,
|
||||
@@ -293,7 +257,6 @@ impl Database {
|
||||
self.sql_with_hint(sql, &[]).await
|
||||
}
|
||||
|
||||
/// Executes a SQL query with optional hints for query optimization.
|
||||
pub async fn sql_with_hint<S>(&self, sql: S, hints: &[(&str, &str)]) -> Result<Output>
|
||||
where
|
||||
S: AsRef<str>,
|
||||
@@ -304,7 +267,6 @@ impl Database {
|
||||
self.do_get(request, hints).await
|
||||
}
|
||||
|
||||
/// Executes a logical plan directly without SQL parsing.
|
||||
pub async fn logical_plan(&self, logical_plan: Vec<u8>) -> Result<Output> {
|
||||
let request = Request::Query(QueryRequest {
|
||||
query: Some(Query::LogicalPlan(logical_plan)),
|
||||
@@ -312,7 +274,6 @@ impl Database {
|
||||
self.do_get(request, &[]).await
|
||||
}
|
||||
|
||||
/// Creates a new table using the provided table expression.
|
||||
pub async fn create(&self, expr: CreateTableExpr) -> Result<Output> {
|
||||
let request = Request::Ddl(DdlRequest {
|
||||
expr: Some(DdlExpr::CreateTable(expr)),
|
||||
@@ -320,7 +281,6 @@ impl Database {
|
||||
self.do_get(request, &[]).await
|
||||
}
|
||||
|
||||
/// Alters an existing table using the provided alter expression.
|
||||
pub async fn alter(&self, expr: AlterTableExpr) -> Result<Output> {
|
||||
let request = Request::Ddl(DdlRequest {
|
||||
expr: Some(DdlExpr::AlterTable(expr)),
|
||||
@@ -361,10 +321,7 @@ impl Database {
|
||||
let mut flight_message_stream = flight_data_stream.map(move |flight_data| {
|
||||
flight_data
|
||||
.map_err(Error::from)
|
||||
.and_then(|data| decoder.try_decode(&data).context(ConvertFlightDataSnafu))?
|
||||
.context(IllegalFlightMessagesSnafu {
|
||||
reason: "none message",
|
||||
})
|
||||
.and_then(|data| decoder.try_decode(&data).context(ConvertFlightDataSnafu))
|
||||
});
|
||||
|
||||
let Some(first_flight_message) = flight_message_stream.next().await else {
|
||||
|
||||
@@ -128,10 +128,7 @@ impl RegionRequester {
|
||||
let mut flight_message_stream = flight_data_stream.map(move |flight_data| {
|
||||
flight_data
|
||||
.map_err(Error::from)
|
||||
.and_then(|data| decoder.try_decode(&data).context(ConvertFlightDataSnafu))?
|
||||
.context(IllegalFlightMessagesSnafu {
|
||||
reason: "none message",
|
||||
})
|
||||
.and_then(|data| decoder.try_decode(&data).context(ConvertFlightDataSnafu))
|
||||
});
|
||||
|
||||
let Some(first_flight_message) = flight_message_stream.next().await else {
|
||||
@@ -160,70 +157,19 @@ impl RegionRequester {
|
||||
let _span = tracing_context.attach(common_telemetry::tracing::info_span!(
|
||||
"poll_flight_data_stream"
|
||||
));
|
||||
|
||||
let mut buffered_message: Option<FlightMessage> = None;
|
||||
let mut stream_ended = false;
|
||||
|
||||
while !stream_ended {
|
||||
// get the next message from the buffered message or read from the flight message stream
|
||||
let flight_message_item = if let Some(msg) = buffered_message.take() {
|
||||
Some(Ok(msg))
|
||||
} else {
|
||||
flight_message_stream.next().await
|
||||
};
|
||||
|
||||
let flight_message = match flight_message_item {
|
||||
Some(Ok(message)) => message,
|
||||
Some(Err(e)) => {
|
||||
yield Err(BoxedError::new(e)).context(ExternalSnafu);
|
||||
break;
|
||||
}
|
||||
None => break,
|
||||
};
|
||||
while let Some(flight_message) = flight_message_stream.next().await {
|
||||
let flight_message = flight_message
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
|
||||
match flight_message {
|
||||
FlightMessage::RecordBatch(record_batch) => {
|
||||
let result_to_yield = RecordBatch::try_from_df_record_batch(
|
||||
yield RecordBatch::try_from_df_record_batch(
|
||||
schema_cloned.clone(),
|
||||
record_batch,
|
||||
);
|
||||
|
||||
// get the next message from the stream. normally it should be a metrics message.
|
||||
if let Some(next_flight_message_result) = flight_message_stream.next().await
|
||||
{
|
||||
match next_flight_message_result {
|
||||
Ok(FlightMessage::Metrics(s)) => {
|
||||
let m = serde_json::from_str(&s).ok().map(Arc::new);
|
||||
metrics_ref.swap(m);
|
||||
}
|
||||
Ok(FlightMessage::RecordBatch(rb)) => {
|
||||
// for some reason it's not a metrics message, so we need to buffer this record batch
|
||||
// and yield it in the next iteration.
|
||||
buffered_message = Some(FlightMessage::RecordBatch(rb));
|
||||
}
|
||||
Ok(_) => {
|
||||
yield IllegalFlightMessagesSnafu {
|
||||
reason: "A RecordBatch message can only be succeeded by a Metrics message or another RecordBatch message"
|
||||
}
|
||||
.fail()
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu);
|
||||
break;
|
||||
}
|
||||
Err(e) => {
|
||||
yield Err(BoxedError::new(e)).context(ExternalSnafu);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// the stream has ended
|
||||
stream_ended = true;
|
||||
}
|
||||
|
||||
yield result_to_yield;
|
||||
)
|
||||
}
|
||||
FlightMessage::Metrics(s) => {
|
||||
// just a branch in case of some metrics message comes after other things.
|
||||
let m = serde_json::from_str(&s).ok().map(Arc::new);
|
||||
metrics_ref.swap(m);
|
||||
break;
|
||||
|
||||
@@ -374,7 +374,6 @@ impl StartCommand {
|
||||
meta_client.clone(),
|
||||
flow_auth_header,
|
||||
opts.query.clone(),
|
||||
opts.flow.batching_mode.clone(),
|
||||
);
|
||||
let frontend_client = Arc::new(frontend_client);
|
||||
let flownode_builder = FlownodeBuilder::new(
|
||||
|
||||
@@ -821,7 +821,6 @@ impl InformationExtension for StandaloneInformationExtension {
|
||||
memtable_size: region_stat.memtable_size,
|
||||
manifest_size: region_stat.manifest_size,
|
||||
sst_size: region_stat.sst_size,
|
||||
sst_num: region_stat.sst_num,
|
||||
index_size: region_stat.index_size,
|
||||
region_manifest: region_stat.manifest.into(),
|
||||
data_topic_latest_entry_id: region_stat.data_topic_latest_entry_id,
|
||||
|
||||
@@ -104,6 +104,8 @@ pub const INFORMATION_SCHEMA_PROCEDURE_INFO_TABLE_ID: u32 = 34;
|
||||
pub const INFORMATION_SCHEMA_REGION_STATISTICS_TABLE_ID: u32 = 35;
|
||||
/// id for information_schema.process_list
|
||||
pub const INFORMATION_SCHEMA_PROCESS_LIST_TABLE_ID: u32 = 36;
|
||||
/// id for information_schema.trigger_list (for greptimedb trigger)
|
||||
pub const INFORMATION_SCHEMA_TRIGGER_TABLE_ID: u32 = 37;
|
||||
|
||||
// ----- End of information_schema tables -----
|
||||
|
||||
|
||||
@@ -21,7 +21,6 @@ pub mod error;
|
||||
pub mod file_format;
|
||||
pub mod lister;
|
||||
pub mod object_store;
|
||||
pub mod parquet_writer;
|
||||
pub mod share_buffer;
|
||||
#[cfg(test)]
|
||||
pub mod test_util;
|
||||
|
||||
@@ -77,11 +77,6 @@ pub fn build_oss_backend(
|
||||
|
||||
let op = ObjectStore::new(builder)
|
||||
.context(error::BuildBackendSnafu)?
|
||||
.layer(
|
||||
object_store::layers::RetryLayer::new()
|
||||
.with_jitter()
|
||||
.with_notify(object_store::util::PrintDetailedError),
|
||||
)
|
||||
.layer(object_store::layers::LoggingLayer::default())
|
||||
.layer(object_store::layers::TracingLayer)
|
||||
.layer(object_store::layers::build_prometheus_metrics_layer(true))
|
||||
|
||||
@@ -85,11 +85,6 @@ pub fn build_s3_backend(
|
||||
// TODO(weny): Consider finding a better way to eliminate duplicate code.
|
||||
Ok(ObjectStore::new(builder)
|
||||
.context(error::BuildBackendSnafu)?
|
||||
.layer(
|
||||
object_store::layers::RetryLayer::new()
|
||||
.with_jitter()
|
||||
.with_notify(object_store::util::PrintDetailedError),
|
||||
)
|
||||
.layer(object_store::layers::LoggingLayer::new(
|
||||
DefaultLoggingInterceptor,
|
||||
))
|
||||
|
||||
@@ -1,52 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use bytes::Bytes;
|
||||
use futures::future::BoxFuture;
|
||||
use object_store::Writer;
|
||||
use parquet::arrow::async_writer::AsyncFileWriter;
|
||||
use parquet::errors::ParquetError;
|
||||
|
||||
/// Bridges opendal [Writer] with parquet [AsyncFileWriter].
|
||||
pub struct AsyncWriter {
|
||||
inner: Writer,
|
||||
}
|
||||
|
||||
impl AsyncWriter {
|
||||
/// Create a [`AsyncWriter`] by given [`Writer`].
|
||||
pub fn new(writer: Writer) -> Self {
|
||||
Self { inner: writer }
|
||||
}
|
||||
}
|
||||
|
||||
impl AsyncFileWriter for AsyncWriter {
|
||||
fn write(&mut self, bs: Bytes) -> BoxFuture<'_, parquet::errors::Result<()>> {
|
||||
Box::pin(async move {
|
||||
self.inner
|
||||
.write(bs)
|
||||
.await
|
||||
.map_err(|err| ParquetError::External(Box::new(err)))
|
||||
})
|
||||
}
|
||||
|
||||
fn complete(&mut self) -> BoxFuture<'_, parquet::errors::Result<()>> {
|
||||
Box::pin(async move {
|
||||
self.inner
|
||||
.close()
|
||||
.await
|
||||
.map(|_| ())
|
||||
.map_err(|err| ParquetError::External(Box::new(err)))
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -84,33 +84,24 @@ fn test_to_string() {
|
||||
assert_eq!(result.unwrap_err().to_string(), "<root cause>");
|
||||
}
|
||||
|
||||
fn normalize_path(s: &str) -> String {
|
||||
s.replace('\\', "/")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_debug_format() {
|
||||
let result = normal_error();
|
||||
let debug_output = format!("{:?}", result.unwrap_err());
|
||||
|
||||
let normalized_output = debug_output.replace('\\', "/");
|
||||
assert_eq!(
|
||||
normalize_path(&debug_output),
|
||||
format!(
|
||||
r#"0: A normal error with "display" attribute, message "blabla", at {}:55:22
|
||||
1: PlainError {{ msg: "<root cause>", status_code: Unexpected }}"#,
|
||||
normalize_path(file!())
|
||||
)
|
||||
normalized_output,
|
||||
r#"0: A normal error with "display" attribute, message "blabla", at src/common/error/tests/ext.rs:55:22
|
||||
1: PlainError { msg: "<root cause>", status_code: Unexpected }"#
|
||||
);
|
||||
|
||||
let result = transparent_error();
|
||||
let debug_output = format!("{:?}", result.unwrap_err());
|
||||
let normalized_output = debug_output.replace('\\', "/");
|
||||
assert_eq!(
|
||||
normalize_path(&debug_output),
|
||||
format!(
|
||||
r#"0: <transparent>, at {}:60:5
|
||||
1: PlainError {{ msg: "<root cause>", status_code: Unexpected }}"#,
|
||||
normalize_path(file!())
|
||||
)
|
||||
normalized_output,
|
||||
r#"0: <transparent>, at src/common/error/tests/ext.rs:60:5
|
||||
1: PlainError { msg: "<root cause>", status_code: Unexpected }"#
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
[package]
|
||||
name = "common-event-recorder"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
api.workspace = true
|
||||
async-trait.workspace = true
|
||||
backon.workspace = true
|
||||
client.workspace = true
|
||||
common-error.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-meta.workspace = true
|
||||
common-telemetry.workspace = true
|
||||
common-time.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
snafu.workspace = true
|
||||
store-api.workspace = true
|
||||
tokio.workspace = true
|
||||
tokio-util.workspace = true
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
@@ -1,53 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use api::v1::ColumnSchema;
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_macro::stack_trace_debug;
|
||||
use snafu::{Location, Snafu};
|
||||
|
||||
#[derive(Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
#[stack_trace_debug]
|
||||
pub enum Error {
|
||||
#[snafu(display("No available frontend"))]
|
||||
NoAvailableFrontend {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Mismatched schema, expected: {:?}, actual: {:?}", expected, actual))]
|
||||
MismatchedSchema {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
expected: Vec<ColumnSchema>,
|
||||
actual: Vec<ColumnSchema>,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
impl ErrorExt for Error {
|
||||
fn status_code(&self) -> StatusCode {
|
||||
match self {
|
||||
Error::MismatchedSchema { .. } => StatusCode::InvalidArguments,
|
||||
Error::NoAvailableFrontend { .. } => StatusCode::Internal,
|
||||
}
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn std::any::Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub mod error;
|
||||
pub mod recorder;
|
||||
|
||||
pub use recorder::*;
|
||||
@@ -1,527 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Debug;
|
||||
use std::sync::{Arc, OnceLock};
|
||||
use std::time::Duration;
|
||||
|
||||
use api::v1::column_data_type_extension::TypeExt;
|
||||
use api::v1::value::ValueData;
|
||||
use api::v1::{
|
||||
ColumnDataType, ColumnDataTypeExtension, ColumnSchema, JsonTypeExtension, Row,
|
||||
RowInsertRequest, RowInsertRequests, Rows, SemanticType,
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use backon::{BackoffBuilder, ExponentialBuilder};
|
||||
use common_telemetry::{debug, error, info, warn};
|
||||
use common_time::timestamp::{TimeUnit, Timestamp};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use store_api::mito_engine_options::{APPEND_MODE_KEY, TTL_KEY};
|
||||
use tokio::sync::mpsc::{channel, Receiver, Sender};
|
||||
use tokio::task::JoinHandle;
|
||||
use tokio::time::sleep;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
use crate::error::{MismatchedSchemaSnafu, Result};
|
||||
|
||||
/// The default table name for storing the events.
|
||||
pub const DEFAULT_EVENTS_TABLE_NAME: &str = "events";
|
||||
|
||||
/// The column name for the event type.
|
||||
pub const EVENTS_TABLE_TYPE_COLUMN_NAME: &str = "type";
|
||||
/// The column name for the event payload.
|
||||
pub const EVENTS_TABLE_PAYLOAD_COLUMN_NAME: &str = "payload";
|
||||
/// The column name for the event timestamp.
|
||||
pub const EVENTS_TABLE_TIMESTAMP_COLUMN_NAME: &str = "timestamp";
|
||||
|
||||
/// EventRecorderRef is the reference to the event recorder.
|
||||
pub type EventRecorderRef = Arc<dyn EventRecorder>;
|
||||
|
||||
static EVENTS_TABLE_TTL: OnceLock<String> = OnceLock::new();
|
||||
|
||||
/// The time interval for flushing batched events to the event handler.
|
||||
pub const DEFAULT_FLUSH_INTERVAL_SECONDS: Duration = Duration::from_secs(5);
|
||||
// The default TTL for the events table.
|
||||
const DEFAULT_EVENTS_TABLE_TTL: &str = "30d";
|
||||
// The capacity of the tokio channel for transmitting events to background processor.
|
||||
const DEFAULT_CHANNEL_SIZE: usize = 2048;
|
||||
// The size of the buffer for batching events before flushing to event handler.
|
||||
const DEFAULT_BUFFER_SIZE: usize = 100;
|
||||
// The maximum number of retry attempts when event handler processing fails.
|
||||
const DEFAULT_MAX_RETRY_TIMES: u64 = 3;
|
||||
|
||||
/// Event trait defines the interface for events that can be recorded and persisted as the system table.
|
||||
/// By default, the event will be persisted as the system table with the following schema:
|
||||
///
|
||||
/// - `type`: the type of the event.
|
||||
/// - `payload`: the JSON bytes of the event.
|
||||
/// - `timestamp`: the timestamp of the event.
|
||||
///
|
||||
/// The event can also add the extra schema and row to the event by overriding the `extra_schema` and `extra_row` methods.
|
||||
pub trait Event: Send + Sync + Debug {
|
||||
/// Returns the type of the event.
|
||||
fn event_type(&self) -> &str;
|
||||
|
||||
/// Returns the timestamp of the event. Default to the current time.
|
||||
fn timestamp(&self) -> Timestamp {
|
||||
Timestamp::current_time(TimeUnit::Nanosecond)
|
||||
}
|
||||
|
||||
/// Returns the JSON bytes of the event as the payload. It will use JSON type to store the payload.
|
||||
fn json_payload(&self) -> Result<String>;
|
||||
|
||||
/// Add the extra schema to the event with the default schema.
|
||||
fn extra_schema(&self) -> Vec<ColumnSchema> {
|
||||
vec![]
|
||||
}
|
||||
|
||||
/// Add the extra row to the event with the default row.
|
||||
fn extra_row(&self) -> Result<Row> {
|
||||
Ok(Row { values: vec![] })
|
||||
}
|
||||
|
||||
/// Returns the event as any type.
|
||||
fn as_any(&self) -> &dyn Any;
|
||||
}
|
||||
|
||||
/// Returns the hints for the insert operation.
|
||||
pub fn insert_hints() -> Vec<(&'static str, &'static str)> {
|
||||
vec![
|
||||
(
|
||||
TTL_KEY,
|
||||
EVENTS_TABLE_TTL
|
||||
.get()
|
||||
.map(|s| s.as_str())
|
||||
.unwrap_or(DEFAULT_EVENTS_TABLE_TTL),
|
||||
),
|
||||
(APPEND_MODE_KEY, "true"),
|
||||
]
|
||||
}
|
||||
|
||||
/// Builds the row inserts request for the events that will be persisted to the events table.
|
||||
pub fn build_row_inserts_request(events: &[Box<dyn Event>]) -> Result<RowInsertRequests> {
|
||||
// Aggregate the events by the event type.
|
||||
let mut event_groups: HashMap<&str, Vec<&Box<dyn Event>>> = HashMap::new();
|
||||
|
||||
for event in events {
|
||||
event_groups
|
||||
.entry(event.event_type())
|
||||
.or_default()
|
||||
.push(event);
|
||||
}
|
||||
|
||||
let mut row_insert_requests = RowInsertRequests {
|
||||
inserts: Vec::with_capacity(event_groups.len()),
|
||||
};
|
||||
|
||||
for (_, events) in event_groups {
|
||||
validate_events(&events)?;
|
||||
|
||||
// We already validated the events, so it's safe to get the first event to build the schema for the RowInsertRequest.
|
||||
let event = &events[0];
|
||||
let mut schema = vec![
|
||||
ColumnSchema {
|
||||
column_name: EVENTS_TABLE_TYPE_COLUMN_NAME.to_string(),
|
||||
datatype: ColumnDataType::String.into(),
|
||||
semantic_type: SemanticType::Tag.into(),
|
||||
..Default::default()
|
||||
},
|
||||
ColumnSchema {
|
||||
column_name: EVENTS_TABLE_PAYLOAD_COLUMN_NAME.to_string(),
|
||||
datatype: ColumnDataType::Binary as i32,
|
||||
semantic_type: SemanticType::Field as i32,
|
||||
datatype_extension: Some(ColumnDataTypeExtension {
|
||||
type_ext: Some(TypeExt::JsonType(JsonTypeExtension::JsonBinary.into())),
|
||||
}),
|
||||
..Default::default()
|
||||
},
|
||||
ColumnSchema {
|
||||
column_name: EVENTS_TABLE_TIMESTAMP_COLUMN_NAME.to_string(),
|
||||
datatype: ColumnDataType::TimestampNanosecond.into(),
|
||||
semantic_type: SemanticType::Timestamp.into(),
|
||||
..Default::default()
|
||||
},
|
||||
];
|
||||
schema.extend(event.extra_schema());
|
||||
|
||||
let rows = events
|
||||
.iter()
|
||||
.map(|event| {
|
||||
let mut row = Row {
|
||||
values: vec![
|
||||
ValueData::StringValue(event.event_type().to_string()).into(),
|
||||
ValueData::BinaryValue(event.json_payload()?.as_bytes().to_vec()).into(),
|
||||
ValueData::TimestampNanosecondValue(event.timestamp().value()).into(),
|
||||
],
|
||||
};
|
||||
row.values.extend(event.extra_row()?.values);
|
||||
Ok(row)
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
row_insert_requests.inserts.push(RowInsertRequest {
|
||||
table_name: DEFAULT_EVENTS_TABLE_NAME.to_string(),
|
||||
rows: Some(Rows { schema, rows }),
|
||||
});
|
||||
}
|
||||
|
||||
Ok(row_insert_requests)
|
||||
}
|
||||
|
||||
// Ensure the events with the same event type have the same extra schema.
|
||||
#[allow(clippy::borrowed_box)]
|
||||
fn validate_events(events: &[&Box<dyn Event>]) -> Result<()> {
|
||||
// It's safe to get the first event because the events are already grouped by the event type.
|
||||
let extra_schema = events[0].extra_schema();
|
||||
for event in events {
|
||||
if event.extra_schema() != extra_schema {
|
||||
MismatchedSchemaSnafu {
|
||||
expected: extra_schema.clone(),
|
||||
actual: event.extra_schema(),
|
||||
}
|
||||
.fail()?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// EventRecorder trait defines the interface for recording events.
|
||||
pub trait EventRecorder: Send + Sync + 'static {
|
||||
/// Records an event for persistence and processing by [EventHandler].
|
||||
fn record(&self, event: Box<dyn Event>);
|
||||
|
||||
/// Cancels the event recorder.
|
||||
fn close(&self);
|
||||
}
|
||||
|
||||
/// EventHandler trait defines the interface for how to handle the event.
|
||||
#[async_trait]
|
||||
pub trait EventHandler: Send + Sync + 'static {
|
||||
/// Processes and handles incoming events. The [DefaultEventHandlerImpl] implementation forwards events to frontend instances for persistence.
|
||||
/// We use `&[Box<dyn Event>]` to avoid consuming the events, so the caller can buffer the events and retry if the handler fails.
|
||||
async fn handle(&self, events: &[Box<dyn Event>]) -> Result<()>;
|
||||
}
|
||||
|
||||
/// Configuration options for the event recorder.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct EventRecorderOptions {
|
||||
/// TTL for the events table that will be used to store the events.
|
||||
pub ttl: String,
|
||||
}
|
||||
|
||||
impl Default for EventRecorderOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
ttl: DEFAULT_EVENTS_TABLE_TTL.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Implementation of [EventRecorder] that records the events and processes them in the background by the [EventHandler].
|
||||
pub struct EventRecorderImpl {
|
||||
// The channel to send the events to the background processor.
|
||||
tx: Sender<Box<dyn Event>>,
|
||||
// The cancel token to cancel the background processor.
|
||||
cancel_token: CancellationToken,
|
||||
// The background processor to process the events.
|
||||
handle: Option<JoinHandle<()>>,
|
||||
}
|
||||
|
||||
impl EventRecorderImpl {
|
||||
pub fn new(event_handler: Box<dyn EventHandler>, opts: EventRecorderOptions) -> Self {
|
||||
info!("Creating event recorder with options: {:?}", opts);
|
||||
|
||||
let (tx, rx) = channel(DEFAULT_CHANNEL_SIZE);
|
||||
let cancel_token = CancellationToken::new();
|
||||
|
||||
let mut recorder = Self {
|
||||
tx,
|
||||
handle: None,
|
||||
cancel_token: cancel_token.clone(),
|
||||
};
|
||||
|
||||
let processor = EventProcessor::new(
|
||||
rx,
|
||||
event_handler,
|
||||
DEFAULT_FLUSH_INTERVAL_SECONDS,
|
||||
DEFAULT_MAX_RETRY_TIMES,
|
||||
)
|
||||
.with_cancel_token(cancel_token);
|
||||
|
||||
// Spawn a background task to process the events.
|
||||
let handle = tokio::spawn(async move {
|
||||
processor.process(DEFAULT_BUFFER_SIZE).await;
|
||||
});
|
||||
|
||||
recorder.handle = Some(handle);
|
||||
|
||||
// It only sets the ttl once, so it's safe to skip the error.
|
||||
if EVENTS_TABLE_TTL.set(opts.ttl.clone()).is_err() {
|
||||
info!(
|
||||
"Events table ttl already set to {}, skip setting it",
|
||||
opts.ttl
|
||||
);
|
||||
}
|
||||
|
||||
recorder
|
||||
}
|
||||
}
|
||||
|
||||
impl EventRecorder for EventRecorderImpl {
|
||||
// Accepts an event and send it to the background handler.
|
||||
fn record(&self, event: Box<dyn Event>) {
|
||||
if let Err(e) = self.tx.try_send(event) {
|
||||
error!("Failed to send event to the background processor: {}", e);
|
||||
}
|
||||
}
|
||||
|
||||
// Closes the event recorder. It will stop the background processor and flush the buffer.
|
||||
fn close(&self) {
|
||||
self.cancel_token.cancel();
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for EventRecorderImpl {
|
||||
fn drop(&mut self) {
|
||||
if let Some(handle) = self.handle.take() {
|
||||
handle.abort();
|
||||
info!("Aborted the background processor in event recorder");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct EventProcessor {
|
||||
rx: Receiver<Box<dyn Event>>,
|
||||
event_handler: Box<dyn EventHandler>,
|
||||
max_retry_times: u64,
|
||||
process_interval: Duration,
|
||||
cancel_token: CancellationToken,
|
||||
}
|
||||
|
||||
impl EventProcessor {
|
||||
fn new(
|
||||
rx: Receiver<Box<dyn Event>>,
|
||||
event_handler: Box<dyn EventHandler>,
|
||||
process_interval: Duration,
|
||||
max_retry_times: u64,
|
||||
) -> Self {
|
||||
Self {
|
||||
rx,
|
||||
event_handler,
|
||||
max_retry_times,
|
||||
process_interval,
|
||||
cancel_token: CancellationToken::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn with_cancel_token(mut self, cancel_token: CancellationToken) -> Self {
|
||||
self.cancel_token = cancel_token;
|
||||
self
|
||||
}
|
||||
|
||||
async fn process(mut self, buffer_size: usize) {
|
||||
info!("Start the background processor in event recorder to handle the received events.");
|
||||
|
||||
let mut buffer = Vec::with_capacity(buffer_size);
|
||||
let mut interval = tokio::time::interval(self.process_interval);
|
||||
|
||||
loop {
|
||||
tokio::select! {
|
||||
maybe_event = self.rx.recv() => {
|
||||
if let Some(maybe_event) = maybe_event {
|
||||
debug!("Received event: {:?}", maybe_event);
|
||||
|
||||
if buffer.len() >= buffer_size {
|
||||
debug!(
|
||||
"Flushing events to the event handler because the buffer is full with {} events",
|
||||
buffer.len()
|
||||
);
|
||||
self.flush_events_to_handler(&mut buffer).await;
|
||||
}
|
||||
|
||||
// Push the event to the buffer, the buffer will be flushed when the interval is triggered or received a closed signal.
|
||||
buffer.push(maybe_event);
|
||||
} else {
|
||||
// When received a closed signal, flush the buffer and exit the loop.
|
||||
self.flush_events_to_handler(&mut buffer).await;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Cancel the processor through the cancel token.
|
||||
_ = self.cancel_token.cancelled() => {
|
||||
warn!("Received a cancel signal, flushing the buffer and exiting the loop");
|
||||
self.flush_events_to_handler(&mut buffer).await;
|
||||
break;
|
||||
}
|
||||
// When the interval is triggered, flush the buffer and send the events to the event handler.
|
||||
_ = interval.tick() => {
|
||||
self.flush_events_to_handler(&mut buffer).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: While we implement a retry mechanism for failed event handling, there is no guarantee that all events will be processed successfully.
|
||||
async fn flush_events_to_handler(&self, buffer: &mut Vec<Box<dyn Event>>) {
|
||||
if !buffer.is_empty() {
|
||||
debug!("Flushing {} events to the event handler", buffer.len());
|
||||
|
||||
let mut backoff = ExponentialBuilder::default()
|
||||
.with_min_delay(Duration::from_millis(
|
||||
DEFAULT_FLUSH_INTERVAL_SECONDS.as_millis() as u64 / self.max_retry_times.max(1),
|
||||
))
|
||||
.with_max_delay(Duration::from_millis(
|
||||
DEFAULT_FLUSH_INTERVAL_SECONDS.as_millis() as u64,
|
||||
))
|
||||
.with_max_times(self.max_retry_times as usize)
|
||||
.build();
|
||||
|
||||
loop {
|
||||
match self.event_handler.handle(buffer).await {
|
||||
Ok(()) => {
|
||||
debug!("Successfully handled {} events", buffer.len());
|
||||
break;
|
||||
}
|
||||
Err(e) => {
|
||||
if let Some(d) = backoff.next() {
|
||||
warn!(e; "Failed to handle events, retrying...");
|
||||
sleep(d).await;
|
||||
continue;
|
||||
} else {
|
||||
warn!(
|
||||
e; "Failed to handle events after {} retries",
|
||||
self.max_retry_times
|
||||
);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Clear the buffer to prevent unbounded memory growth, regardless of whether event processing succeeded or failed.
|
||||
buffer.clear();
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[derive(Debug)]
|
||||
struct TestEvent {}
|
||||
|
||||
impl Event for TestEvent {
|
||||
fn event_type(&self) -> &str {
|
||||
"test_event"
|
||||
}
|
||||
|
||||
fn json_payload(&self) -> Result<String> {
|
||||
Ok("{\"procedure_id\": \"1234567890\"}".to_string())
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
struct TestEventHandlerImpl {}
|
||||
|
||||
#[async_trait]
|
||||
impl EventHandler for TestEventHandlerImpl {
|
||||
async fn handle(&self, events: &[Box<dyn Event>]) -> Result<()> {
|
||||
let event = events
|
||||
.first()
|
||||
.unwrap()
|
||||
.as_any()
|
||||
.downcast_ref::<TestEvent>()
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
event.json_payload().unwrap(),
|
||||
"{\"procedure_id\": \"1234567890\"}"
|
||||
);
|
||||
assert_eq!(event.event_type(), "test_event");
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_event_recorder() {
|
||||
let mut event_recorder = EventRecorderImpl::new(
|
||||
Box::new(TestEventHandlerImpl {}),
|
||||
EventRecorderOptions::default(),
|
||||
);
|
||||
event_recorder.record(Box::new(TestEvent {}));
|
||||
|
||||
// Sleep for a while to let the event be sent to the event handler.
|
||||
sleep(Duration::from_millis(500)).await;
|
||||
|
||||
// Close the event recorder to flush the buffer.
|
||||
event_recorder.close();
|
||||
|
||||
// Sleep for a while to let the background task process the event.
|
||||
sleep(Duration::from_millis(500)).await;
|
||||
|
||||
if let Some(handle) = event_recorder.handle.take() {
|
||||
assert!(handle.await.is_ok());
|
||||
}
|
||||
}
|
||||
|
||||
struct TestEventHandlerImplShouldPanic {}
|
||||
|
||||
#[async_trait]
|
||||
impl EventHandler for TestEventHandlerImplShouldPanic {
|
||||
async fn handle(&self, events: &[Box<dyn Event>]) -> Result<()> {
|
||||
let event = events
|
||||
.first()
|
||||
.unwrap()
|
||||
.as_any()
|
||||
.downcast_ref::<TestEvent>()
|
||||
.unwrap();
|
||||
|
||||
// Set the incorrect payload and event type to trigger the panic.
|
||||
assert_eq!(
|
||||
event.json_payload().unwrap(),
|
||||
"{\"procedure_id\": \"should_panic\"}"
|
||||
);
|
||||
assert_eq!(event.event_type(), "should_panic");
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_event_recorder_should_panic() {
|
||||
let mut event_recorder = EventRecorderImpl::new(
|
||||
Box::new(TestEventHandlerImplShouldPanic {}),
|
||||
EventRecorderOptions::default(),
|
||||
);
|
||||
|
||||
event_recorder.record(Box::new(TestEvent {}));
|
||||
|
||||
// Sleep for a while to let the event be sent to the event handler.
|
||||
sleep(Duration::from_millis(500)).await;
|
||||
|
||||
// Close the event recorder to flush the buffer.
|
||||
event_recorder.close();
|
||||
|
||||
// Sleep for a while to let the background task process the event.
|
||||
sleep(Duration::from_millis(500)).await;
|
||||
|
||||
if let Some(handle) = event_recorder.handle.take() {
|
||||
assert!(handle.await.unwrap_err().is_panic());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -16,8 +16,6 @@ geo = ["geohash", "h3o", "s2", "wkt", "geo-types", "dep:geo"]
|
||||
ahash.workspace = true
|
||||
api.workspace = true
|
||||
arc-swap = "1.0"
|
||||
arrow.workspace = true
|
||||
arrow-schema.workspace = true
|
||||
async-trait.workspace = true
|
||||
bincode = "1.3"
|
||||
catalog.workspace = true
|
||||
@@ -36,7 +34,6 @@ datafusion.workspace = true
|
||||
datafusion-common.workspace = true
|
||||
datafusion-expr.workspace = true
|
||||
datafusion-functions-aggregate-common.workspace = true
|
||||
datafusion-physical-expr.workspace = true
|
||||
datatypes.workspace = true
|
||||
derive_more = { version = "1", default-features = false, features = ["display"] }
|
||||
geo = { version = "0.29", optional = true }
|
||||
@@ -65,7 +62,5 @@ wkt = { version = "0.11", optional = true }
|
||||
|
||||
[dev-dependencies]
|
||||
approx = "0.5"
|
||||
futures.workspace = true
|
||||
pretty_assertions = "1.4.0"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
tokio.workspace = true
|
||||
|
||||
@@ -17,5 +17,3 @@ pub mod count_hash;
|
||||
#[cfg(feature = "geo")]
|
||||
pub mod geo;
|
||||
pub mod vector;
|
||||
|
||||
pub mod aggr_wrapper;
|
||||
|
||||
@@ -1,538 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Wrapper for making aggregate functions out of state/merge functions of original aggregate functions.
|
||||
//!
|
||||
//! i.e. for a aggregate function `foo`, we will have a state function `foo_state` and a merge function `foo_merge`.
|
||||
//!
|
||||
//! `foo_state`'s input args is the same as `foo`'s, and its output is a state object.
|
||||
//! Note that `foo_state` might have multiple output columns, so it's a struct array
|
||||
//! that each output column is a struct field.
|
||||
//! `foo_merge`'s input arg is the same as `foo_state`'s output, and its output is the same as `foo`'s input.
|
||||
//!
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::StructArray;
|
||||
use arrow_schema::Fields;
|
||||
use datafusion::optimizer::analyzer::type_coercion::TypeCoercion;
|
||||
use datafusion::optimizer::AnalyzerRule;
|
||||
use datafusion::physical_planner::create_aggregate_expr_and_maybe_filter;
|
||||
use datafusion_common::{Column, ScalarValue};
|
||||
use datafusion_expr::expr::AggregateFunction;
|
||||
use datafusion_expr::function::StateFieldsArgs;
|
||||
use datafusion_expr::{
|
||||
Accumulator, Aggregate, AggregateUDF, AggregateUDFImpl, Expr, ExprSchemable, LogicalPlan,
|
||||
Signature,
|
||||
};
|
||||
use datafusion_physical_expr::aggregate::AggregateFunctionExpr;
|
||||
use datatypes::arrow::datatypes::{DataType, Field};
|
||||
|
||||
/// Returns the name of the state function for the given aggregate function name.
|
||||
/// The state function is used to compute the state of the aggregate function.
|
||||
/// The state function's name is in the format `__<aggr_name>_state
|
||||
pub fn aggr_state_func_name(aggr_name: &str) -> String {
|
||||
format!("__{}_state", aggr_name)
|
||||
}
|
||||
|
||||
/// Returns the name of the merge function for the given aggregate function name.
|
||||
/// The merge function is used to merge the states of the state functions.
|
||||
/// The merge function's name is in the format `__<aggr_name>_merge
|
||||
pub fn aggr_merge_func_name(aggr_name: &str) -> String {
|
||||
format!("__{}_merge", aggr_name)
|
||||
}
|
||||
|
||||
/// A wrapper to make an aggregate function out of the state and merge functions of the original aggregate function.
|
||||
/// It contains the original aggregate function, the state functions, and the merge function.
|
||||
///
|
||||
/// Notice state functions may have multiple output columns, so it's return type is always a struct array, and the merge function is used to merge the states of the state functions.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct StateMergeHelper;
|
||||
|
||||
/// A struct to hold the two aggregate plans, one for the state function(lower) and one for the merge function(upper).
|
||||
#[allow(unused)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct StepAggrPlan {
|
||||
/// Upper merge plan, which is the aggregate plan that merges the states of the state function.
|
||||
pub upper_merge: Arc<LogicalPlan>,
|
||||
/// Lower state plan, which is the aggregate plan that computes the state of the aggregate function.
|
||||
pub lower_state: Arc<LogicalPlan>,
|
||||
}
|
||||
|
||||
pub fn get_aggr_func(expr: &Expr) -> Option<&datafusion_expr::expr::AggregateFunction> {
|
||||
let mut expr_ref = expr;
|
||||
while let Expr::Alias(alias) = expr_ref {
|
||||
expr_ref = &alias.expr;
|
||||
}
|
||||
if let Expr::AggregateFunction(aggr_func) = expr_ref {
|
||||
Some(aggr_func)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl StateMergeHelper {
|
||||
/// Split an aggregate plan into two aggregate plans, one for the state function and one for the merge function.
|
||||
pub fn split_aggr_node(aggr_plan: Aggregate) -> datafusion_common::Result<StepAggrPlan> {
|
||||
let aggr = {
|
||||
// certain aggr func need type coercion to work correctly, so we need to analyze the plan first.
|
||||
let aggr_plan = TypeCoercion::new().analyze(
|
||||
LogicalPlan::Aggregate(aggr_plan).clone(),
|
||||
&Default::default(),
|
||||
)?;
|
||||
if let LogicalPlan::Aggregate(aggr) = aggr_plan {
|
||||
aggr
|
||||
} else {
|
||||
return Err(datafusion_common::DataFusionError::Internal(format!(
|
||||
"Failed to coerce expressions in aggregate plan, expected Aggregate, got: {:?}",
|
||||
aggr_plan
|
||||
)));
|
||||
}
|
||||
};
|
||||
let mut lower_aggr_exprs = vec![];
|
||||
let mut upper_aggr_exprs = vec![];
|
||||
|
||||
for aggr_expr in aggr.aggr_expr.iter() {
|
||||
let Some(aggr_func) = get_aggr_func(aggr_expr) else {
|
||||
return Err(datafusion_common::DataFusionError::NotImplemented(format!(
|
||||
"Unsupported aggregate expression for step aggr optimize: {:?}",
|
||||
aggr_expr
|
||||
)));
|
||||
};
|
||||
|
||||
let original_input_types = aggr_func
|
||||
.args
|
||||
.iter()
|
||||
.map(|e| e.get_type(&aggr.input.schema()))
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
|
||||
// first create the state function from the original aggregate function.
|
||||
let state_func = StateWrapper::new((*aggr_func.func).clone())?;
|
||||
|
||||
let expr = AggregateFunction {
|
||||
func: Arc::new(state_func.into()),
|
||||
args: aggr_func.args.clone(),
|
||||
distinct: aggr_func.distinct,
|
||||
filter: aggr_func.filter.clone(),
|
||||
order_by: aggr_func.order_by.clone(),
|
||||
null_treatment: aggr_func.null_treatment,
|
||||
};
|
||||
let expr = Expr::AggregateFunction(expr);
|
||||
let lower_state_output_col_name = expr.schema_name().to_string();
|
||||
|
||||
lower_aggr_exprs.push(expr);
|
||||
|
||||
let (original_phy_expr, _filter, _ordering) = create_aggregate_expr_and_maybe_filter(
|
||||
aggr_expr,
|
||||
aggr.input.schema(),
|
||||
aggr.input.schema().as_arrow(),
|
||||
&Default::default(),
|
||||
)?;
|
||||
|
||||
let merge_func = MergeWrapper::new(
|
||||
(*aggr_func.func).clone(),
|
||||
original_phy_expr,
|
||||
original_input_types,
|
||||
)?;
|
||||
let arg = Expr::Column(Column::new_unqualified(lower_state_output_col_name));
|
||||
let expr = AggregateFunction {
|
||||
func: Arc::new(merge_func.into()),
|
||||
args: vec![arg],
|
||||
distinct: aggr_func.distinct,
|
||||
filter: aggr_func.filter.clone(),
|
||||
order_by: aggr_func.order_by.clone(),
|
||||
null_treatment: aggr_func.null_treatment,
|
||||
};
|
||||
|
||||
// alias to the original aggregate expr's schema name, so parent plan can refer to it
|
||||
// correctly.
|
||||
let expr = Expr::AggregateFunction(expr).alias(aggr_expr.schema_name().to_string());
|
||||
upper_aggr_exprs.push(expr);
|
||||
}
|
||||
|
||||
let mut lower = aggr.clone();
|
||||
lower.aggr_expr = lower_aggr_exprs;
|
||||
let lower_plan = LogicalPlan::Aggregate(lower);
|
||||
|
||||
// update aggregate's output schema
|
||||
let lower_plan = Arc::new(lower_plan.recompute_schema()?);
|
||||
|
||||
let mut upper = aggr.clone();
|
||||
let aggr_plan = LogicalPlan::Aggregate(aggr);
|
||||
upper.aggr_expr = upper_aggr_exprs;
|
||||
upper.input = lower_plan.clone();
|
||||
// upper schema's output schema should be the same as the original aggregate plan's output schema
|
||||
let upper_check = upper.clone();
|
||||
let upper_plan = Arc::new(LogicalPlan::Aggregate(upper_check).recompute_schema()?);
|
||||
if *upper_plan.schema() != *aggr_plan.schema() {
|
||||
return Err(datafusion_common::DataFusionError::Internal(format!(
|
||||
"Upper aggregate plan's schema is not the same as the original aggregate plan's schema: \n[transformed]:{}\n[ original]{}",
|
||||
upper_plan.schema(), aggr_plan.schema()
|
||||
)));
|
||||
}
|
||||
|
||||
Ok(StepAggrPlan {
|
||||
lower_state: lower_plan,
|
||||
upper_merge: upper_plan,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Wrapper to make an aggregate function out of a state function.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct StateWrapper {
|
||||
inner: AggregateUDF,
|
||||
name: String,
|
||||
}
|
||||
|
||||
impl StateWrapper {
|
||||
/// `state_index`: The index of the state in the output of the state function.
|
||||
pub fn new(inner: AggregateUDF) -> datafusion_common::Result<Self> {
|
||||
let name = aggr_state_func_name(inner.name());
|
||||
Ok(Self { inner, name })
|
||||
}
|
||||
|
||||
pub fn inner(&self) -> &AggregateUDF {
|
||||
&self.inner
|
||||
}
|
||||
|
||||
/// Deduce the return type of the original aggregate function
|
||||
/// based on the accumulator arguments.
|
||||
///
|
||||
pub fn deduce_aggr_return_type(
|
||||
&self,
|
||||
acc_args: &datafusion_expr::function::AccumulatorArgs,
|
||||
) -> datafusion_common::Result<DataType> {
|
||||
let input_exprs = acc_args.exprs;
|
||||
let input_schema = acc_args.schema;
|
||||
let input_types = input_exprs
|
||||
.iter()
|
||||
.map(|e| e.data_type(input_schema))
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
let return_type = self.inner.return_type(&input_types)?;
|
||||
Ok(return_type)
|
||||
}
|
||||
}
|
||||
|
||||
impl AggregateUDFImpl for StateWrapper {
|
||||
fn accumulator<'a, 'b>(
|
||||
&'a self,
|
||||
acc_args: datafusion_expr::function::AccumulatorArgs<'b>,
|
||||
) -> datafusion_common::Result<Box<dyn Accumulator>> {
|
||||
// fix and recover proper acc args for the original aggregate function.
|
||||
let state_type = acc_args.return_type.clone();
|
||||
let inner = {
|
||||
let old_return_type = self.deduce_aggr_return_type(&acc_args)?;
|
||||
let acc_args = datafusion_expr::function::AccumulatorArgs {
|
||||
return_type: &old_return_type,
|
||||
schema: acc_args.schema,
|
||||
ignore_nulls: acc_args.ignore_nulls,
|
||||
ordering_req: acc_args.ordering_req,
|
||||
is_reversed: acc_args.is_reversed,
|
||||
name: acc_args.name,
|
||||
is_distinct: acc_args.is_distinct,
|
||||
exprs: acc_args.exprs,
|
||||
};
|
||||
self.inner.accumulator(acc_args)?
|
||||
};
|
||||
Ok(Box::new(StateAccum::new(inner, state_type)?))
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn std::any::Any {
|
||||
self
|
||||
}
|
||||
fn name(&self) -> &str {
|
||||
self.name.as_str()
|
||||
}
|
||||
|
||||
fn is_nullable(&self) -> bool {
|
||||
self.inner.is_nullable()
|
||||
}
|
||||
|
||||
/// Return state_fields as the output struct type.
|
||||
///
|
||||
fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result<DataType> {
|
||||
let old_return_type = self.inner.return_type(arg_types)?;
|
||||
let state_fields_args = StateFieldsArgs {
|
||||
name: self.inner().name(),
|
||||
input_types: arg_types,
|
||||
return_type: &old_return_type,
|
||||
// TODO(discord9): how to get this?, probably ok?
|
||||
ordering_fields: &[],
|
||||
is_distinct: false,
|
||||
};
|
||||
let state_fields = self.inner.state_fields(state_fields_args)?;
|
||||
let struct_field = DataType::Struct(state_fields.into());
|
||||
Ok(struct_field)
|
||||
}
|
||||
|
||||
/// The state function's output fields are the same as the original aggregate function's state fields.
|
||||
fn state_fields(
|
||||
&self,
|
||||
args: datafusion_expr::function::StateFieldsArgs,
|
||||
) -> datafusion_common::Result<Vec<Field>> {
|
||||
let old_return_type = self.inner.return_type(args.input_types)?;
|
||||
let state_fields_args = StateFieldsArgs {
|
||||
name: args.name,
|
||||
input_types: args.input_types,
|
||||
return_type: &old_return_type,
|
||||
ordering_fields: args.ordering_fields,
|
||||
is_distinct: args.is_distinct,
|
||||
};
|
||||
self.inner.state_fields(state_fields_args)
|
||||
}
|
||||
|
||||
/// The state function's signature is the same as the original aggregate function's signature,
|
||||
fn signature(&self) -> &Signature {
|
||||
self.inner.signature()
|
||||
}
|
||||
|
||||
/// Coerce types also do nothing, as optimzer should be able to already make struct types
|
||||
fn coerce_types(&self, arg_types: &[DataType]) -> datafusion_common::Result<Vec<DataType>> {
|
||||
self.inner.coerce_types(arg_types)
|
||||
}
|
||||
}
|
||||
|
||||
/// The wrapper's input is the same as the original aggregate function's input,
|
||||
/// and the output is the state function's output.
|
||||
#[derive(Debug)]
|
||||
pub struct StateAccum {
|
||||
inner: Box<dyn Accumulator>,
|
||||
state_fields: Fields,
|
||||
}
|
||||
|
||||
impl StateAccum {
|
||||
pub fn new(
|
||||
inner: Box<dyn Accumulator>,
|
||||
state_type: DataType,
|
||||
) -> datafusion_common::Result<Self> {
|
||||
let DataType::Struct(fields) = state_type else {
|
||||
return Err(datafusion_common::DataFusionError::Internal(format!(
|
||||
"Expected a struct type for state, got: {:?}",
|
||||
state_type
|
||||
)));
|
||||
};
|
||||
Ok(Self {
|
||||
inner,
|
||||
state_fields: fields,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Accumulator for StateAccum {
|
||||
fn evaluate(&mut self) -> datafusion_common::Result<ScalarValue> {
|
||||
let state = self.inner.state()?;
|
||||
|
||||
let array = state
|
||||
.iter()
|
||||
.map(|s| s.to_array())
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
let struct_array = StructArray::try_new(self.state_fields.clone(), array, None)?;
|
||||
Ok(ScalarValue::Struct(Arc::new(struct_array)))
|
||||
}
|
||||
|
||||
fn merge_batch(
|
||||
&mut self,
|
||||
states: &[datatypes::arrow::array::ArrayRef],
|
||||
) -> datafusion_common::Result<()> {
|
||||
self.inner.merge_batch(states)
|
||||
}
|
||||
|
||||
fn update_batch(
|
||||
&mut self,
|
||||
values: &[datatypes::arrow::array::ArrayRef],
|
||||
) -> datafusion_common::Result<()> {
|
||||
self.inner.update_batch(values)
|
||||
}
|
||||
|
||||
fn size(&self) -> usize {
|
||||
self.inner.size()
|
||||
}
|
||||
|
||||
fn state(&mut self) -> datafusion_common::Result<Vec<ScalarValue>> {
|
||||
self.inner.state()
|
||||
}
|
||||
}
|
||||
|
||||
/// TODO(discord9): mark this function as non-ser/de able
|
||||
///
|
||||
/// This wrapper shouldn't be register as a udaf, as it contain extra data that is not serializable.
|
||||
/// and changes for different logical plans.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MergeWrapper {
|
||||
inner: AggregateUDF,
|
||||
name: String,
|
||||
merge_signature: Signature,
|
||||
/// The original physical expression of the aggregate function, can't store the original aggregate function directly, as PhysicalExpr didn't implement Any
|
||||
original_phy_expr: Arc<AggregateFunctionExpr>,
|
||||
original_input_types: Vec<DataType>,
|
||||
}
|
||||
impl MergeWrapper {
|
||||
pub fn new(
|
||||
inner: AggregateUDF,
|
||||
original_phy_expr: Arc<AggregateFunctionExpr>,
|
||||
original_input_types: Vec<DataType>,
|
||||
) -> datafusion_common::Result<Self> {
|
||||
let name = aggr_merge_func_name(inner.name());
|
||||
// the input type is actually struct type, which is the state fields of the original aggregate function.
|
||||
let merge_signature = Signature::user_defined(datafusion_expr::Volatility::Immutable);
|
||||
|
||||
Ok(Self {
|
||||
inner,
|
||||
name,
|
||||
merge_signature,
|
||||
original_phy_expr,
|
||||
original_input_types,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn inner(&self) -> &AggregateUDF {
|
||||
&self.inner
|
||||
}
|
||||
}
|
||||
|
||||
impl AggregateUDFImpl for MergeWrapper {
|
||||
fn accumulator<'a, 'b>(
|
||||
&'a self,
|
||||
acc_args: datafusion_expr::function::AccumulatorArgs<'b>,
|
||||
) -> datafusion_common::Result<Box<dyn Accumulator>> {
|
||||
if acc_args.schema.fields().len() != 1
|
||||
|| !matches!(acc_args.schema.field(0).data_type(), DataType::Struct(_))
|
||||
{
|
||||
return Err(datafusion_common::DataFusionError::Internal(format!(
|
||||
"Expected one struct type as input, got: {:?}",
|
||||
acc_args.schema
|
||||
)));
|
||||
}
|
||||
let input_type = acc_args.schema.field(0).data_type();
|
||||
let DataType::Struct(fields) = input_type else {
|
||||
return Err(datafusion_common::DataFusionError::Internal(format!(
|
||||
"Expected a struct type for input, got: {:?}",
|
||||
input_type
|
||||
)));
|
||||
};
|
||||
|
||||
let inner_accum = self.original_phy_expr.create_accumulator()?;
|
||||
Ok(Box::new(MergeAccum::new(inner_accum, fields)))
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn std::any::Any {
|
||||
self
|
||||
}
|
||||
fn name(&self) -> &str {
|
||||
self.name.as_str()
|
||||
}
|
||||
|
||||
fn is_nullable(&self) -> bool {
|
||||
self.inner.is_nullable()
|
||||
}
|
||||
|
||||
/// Notice here the `arg_types` is actually the `state_fields`'s data types,
|
||||
/// so return fixed return type instead of using `arg_types` to determine the return type.
|
||||
fn return_type(&self, _arg_types: &[DataType]) -> datafusion_common::Result<DataType> {
|
||||
// The return type is the same as the original aggregate function's return type.
|
||||
let ret_type = self.inner.return_type(&self.original_input_types)?;
|
||||
Ok(ret_type)
|
||||
}
|
||||
fn signature(&self) -> &Signature {
|
||||
&self.merge_signature
|
||||
}
|
||||
|
||||
/// Coerce types also do nothing, as optimzer should be able to already make struct types
|
||||
fn coerce_types(&self, arg_types: &[DataType]) -> datafusion_common::Result<Vec<DataType>> {
|
||||
// just check if the arg_types are only one and is struct array
|
||||
if arg_types.len() != 1 || !matches!(arg_types.first(), Some(DataType::Struct(_))) {
|
||||
return Err(datafusion_common::DataFusionError::Internal(format!(
|
||||
"Expected one struct type as input, got: {:?}",
|
||||
arg_types
|
||||
)));
|
||||
}
|
||||
Ok(arg_types.to_vec())
|
||||
}
|
||||
|
||||
/// Just return the original aggregate function's state fields.
|
||||
fn state_fields(
|
||||
&self,
|
||||
_args: datafusion_expr::function::StateFieldsArgs,
|
||||
) -> datafusion_common::Result<Vec<Field>> {
|
||||
self.original_phy_expr.state_fields()
|
||||
}
|
||||
}
|
||||
|
||||
/// The merge accumulator, which modify `update_batch`'s behavior to accept one struct array which
|
||||
/// include the state fields of original aggregate function, and merge said states into original accumulator
|
||||
/// the output is the same as original aggregate function
|
||||
#[derive(Debug)]
|
||||
pub struct MergeAccum {
|
||||
inner: Box<dyn Accumulator>,
|
||||
state_fields: Fields,
|
||||
}
|
||||
|
||||
impl MergeAccum {
|
||||
pub fn new(inner: Box<dyn Accumulator>, state_fields: &Fields) -> Self {
|
||||
Self {
|
||||
inner,
|
||||
state_fields: state_fields.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Accumulator for MergeAccum {
|
||||
fn evaluate(&mut self) -> datafusion_common::Result<ScalarValue> {
|
||||
self.inner.evaluate()
|
||||
}
|
||||
|
||||
fn merge_batch(&mut self, states: &[arrow::array::ArrayRef]) -> datafusion_common::Result<()> {
|
||||
self.inner.merge_batch(states)
|
||||
}
|
||||
|
||||
fn update_batch(&mut self, values: &[arrow::array::ArrayRef]) -> datafusion_common::Result<()> {
|
||||
let value = values.first().ok_or_else(|| {
|
||||
datafusion_common::DataFusionError::Internal("No values provided for merge".to_string())
|
||||
})?;
|
||||
// The input values are states from other accumulators, so we merge them.
|
||||
let struct_arr = value
|
||||
.as_any()
|
||||
.downcast_ref::<StructArray>()
|
||||
.ok_or_else(|| {
|
||||
datafusion_common::DataFusionError::Internal(format!(
|
||||
"Expected StructArray, got: {:?}",
|
||||
value.data_type()
|
||||
))
|
||||
})?;
|
||||
let fields = struct_arr.fields();
|
||||
if fields != &self.state_fields {
|
||||
return Err(datafusion_common::DataFusionError::Internal(format!(
|
||||
"Expected state fields: {:?}, got: {:?}",
|
||||
self.state_fields, fields
|
||||
)));
|
||||
}
|
||||
|
||||
// now fields should be the same, so we can merge the batch
|
||||
// by pass the columns as order should be the same
|
||||
let state_columns = struct_arr.columns();
|
||||
self.inner.merge_batch(state_columns)
|
||||
}
|
||||
|
||||
fn size(&self) -> usize {
|
||||
self.inner.size()
|
||||
}
|
||||
|
||||
fn state(&mut self) -> datafusion_common::Result<Vec<ScalarValue>> {
|
||||
self.inner.state()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
@@ -1,804 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::pin::Pin;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
use arrow::array::{ArrayRef, Float64Array, Int64Array, UInt64Array};
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use arrow_schema::SchemaRef;
|
||||
use datafusion::catalog::{Session, TableProvider};
|
||||
use datafusion::datasource::DefaultTableSource;
|
||||
use datafusion::execution::{RecordBatchStream, SendableRecordBatchStream, TaskContext};
|
||||
use datafusion::functions_aggregate::average::avg_udaf;
|
||||
use datafusion::functions_aggregate::sum::sum_udaf;
|
||||
use datafusion::optimizer::analyzer::type_coercion::TypeCoercion;
|
||||
use datafusion::optimizer::AnalyzerRule;
|
||||
use datafusion::physical_plan::aggregates::AggregateExec;
|
||||
use datafusion::physical_plan::execution_plan::{Boundedness, EmissionType};
|
||||
use datafusion::physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties};
|
||||
use datafusion::physical_planner::{DefaultPhysicalPlanner, PhysicalPlanner};
|
||||
use datafusion::prelude::SessionContext;
|
||||
use datafusion_common::{Column, TableReference};
|
||||
use datafusion_expr::expr::AggregateFunction;
|
||||
use datafusion_expr::sqlparser::ast::NullTreatment;
|
||||
use datafusion_expr::{Aggregate, Expr, LogicalPlan, SortExpr, TableScan};
|
||||
use datafusion_physical_expr::aggregate::AggregateExprBuilder;
|
||||
use datafusion_physical_expr::{EquivalenceProperties, Partitioning};
|
||||
use datatypes::arrow_array::StringArray;
|
||||
use futures::{Stream, StreamExt as _};
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
use super::*;
|
||||
use crate::aggrs::approximate::hll::HllState;
|
||||
use crate::aggrs::approximate::uddsketch::UddSketchState;
|
||||
use crate::aggrs::count_hash::CountHash;
|
||||
use crate::function::Function as _;
|
||||
use crate::scalars::hll_count::HllCalcFunction;
|
||||
use crate::scalars::uddsketch_calc::UddSketchCalcFunction;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct MockInputExec {
|
||||
input: Vec<RecordBatch>,
|
||||
schema: SchemaRef,
|
||||
properties: PlanProperties,
|
||||
}
|
||||
|
||||
impl MockInputExec {
|
||||
pub fn new(input: Vec<RecordBatch>, schema: SchemaRef) -> Self {
|
||||
Self {
|
||||
properties: PlanProperties::new(
|
||||
EquivalenceProperties::new(schema.clone()),
|
||||
Partitioning::UnknownPartitioning(1),
|
||||
EmissionType::Incremental,
|
||||
Boundedness::Bounded,
|
||||
),
|
||||
input,
|
||||
schema,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl DisplayAs for MockInputExec {
|
||||
fn fmt_as(&self, _t: DisplayFormatType, _f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
impl ExecutionPlan for MockInputExec {
|
||||
fn name(&self) -> &str {
|
||||
"MockInputExec"
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn properties(&self) -> &PlanProperties {
|
||||
&self.properties
|
||||
}
|
||||
|
||||
fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
|
||||
vec![]
|
||||
}
|
||||
|
||||
fn with_new_children(
|
||||
self: Arc<Self>,
|
||||
_children: Vec<Arc<dyn ExecutionPlan>>,
|
||||
) -> datafusion_common::Result<Arc<dyn ExecutionPlan>> {
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
fn execute(
|
||||
&self,
|
||||
_partition: usize,
|
||||
_context: Arc<TaskContext>,
|
||||
) -> datafusion_common::Result<SendableRecordBatchStream> {
|
||||
let stream = MockStream {
|
||||
stream: self.input.clone(),
|
||||
schema: self.schema.clone(),
|
||||
idx: 0,
|
||||
};
|
||||
Ok(Box::pin(stream))
|
||||
}
|
||||
}
|
||||
|
||||
struct MockStream {
|
||||
stream: Vec<RecordBatch>,
|
||||
schema: SchemaRef,
|
||||
idx: usize,
|
||||
}
|
||||
|
||||
impl Stream for MockStream {
|
||||
type Item = datafusion_common::Result<RecordBatch>;
|
||||
fn poll_next(
|
||||
mut self: Pin<&mut Self>,
|
||||
_cx: &mut Context<'_>,
|
||||
) -> Poll<Option<datafusion_common::Result<RecordBatch>>> {
|
||||
if self.idx < self.stream.len() {
|
||||
let ret = self.stream[self.idx].clone();
|
||||
self.idx += 1;
|
||||
Poll::Ready(Some(Ok(ret)))
|
||||
} else {
|
||||
Poll::Ready(None)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RecordBatchStream for MockStream {
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.schema.clone()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct DummyTableProvider {
|
||||
schema: Arc<arrow_schema::Schema>,
|
||||
record_batch: Mutex<Option<RecordBatch>>,
|
||||
}
|
||||
|
||||
impl DummyTableProvider {
|
||||
#[allow(unused)]
|
||||
pub fn new(schema: Arc<arrow_schema::Schema>, record_batch: Option<RecordBatch>) -> Self {
|
||||
Self {
|
||||
schema,
|
||||
record_batch: Mutex::new(record_batch),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for DummyTableProvider {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
schema: Arc::new(arrow_schema::Schema::new(vec![Field::new(
|
||||
"number",
|
||||
DataType::Int64,
|
||||
true,
|
||||
)])),
|
||||
record_batch: Mutex::new(None),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl TableProvider for DummyTableProvider {
|
||||
fn as_any(&self) -> &dyn std::any::Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn schema(&self) -> Arc<arrow_schema::Schema> {
|
||||
self.schema.clone()
|
||||
}
|
||||
|
||||
fn table_type(&self) -> datafusion_expr::TableType {
|
||||
datafusion_expr::TableType::Base
|
||||
}
|
||||
|
||||
async fn scan(
|
||||
&self,
|
||||
_state: &dyn Session,
|
||||
_projection: Option<&Vec<usize>>,
|
||||
_filters: &[Expr],
|
||||
_limit: Option<usize>,
|
||||
) -> datafusion::error::Result<Arc<dyn ExecutionPlan>> {
|
||||
let input: Vec<RecordBatch> = self
|
||||
.record_batch
|
||||
.lock()
|
||||
.unwrap()
|
||||
.clone()
|
||||
.map(|r| vec![r])
|
||||
.unwrap_or_default();
|
||||
Ok(Arc::new(MockInputExec::new(input, self.schema.clone())))
|
||||
}
|
||||
}
|
||||
|
||||
fn dummy_table_scan() -> LogicalPlan {
|
||||
let table_provider = Arc::new(DummyTableProvider::default());
|
||||
let table_source = DefaultTableSource::new(table_provider);
|
||||
LogicalPlan::TableScan(
|
||||
TableScan::try_new(
|
||||
TableReference::bare("Number"),
|
||||
Arc::new(table_source),
|
||||
None,
|
||||
vec![],
|
||||
None,
|
||||
)
|
||||
.unwrap(),
|
||||
)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_sum_udaf() {
|
||||
let ctx = SessionContext::new();
|
||||
|
||||
let sum = datafusion::functions_aggregate::sum::sum_udaf();
|
||||
let sum = (*sum).clone();
|
||||
let original_aggr = Aggregate::try_new(
|
||||
Arc::new(dummy_table_scan()),
|
||||
vec![],
|
||||
vec![Expr::AggregateFunction(AggregateFunction::new_udf(
|
||||
Arc::new(sum.clone()),
|
||||
vec![Expr::Column(Column::new_unqualified("number"))],
|
||||
false,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
))],
|
||||
)
|
||||
.unwrap();
|
||||
let res = StateMergeHelper::split_aggr_node(original_aggr).unwrap();
|
||||
|
||||
let expected_lower_plan = LogicalPlan::Aggregate(
|
||||
Aggregate::try_new(
|
||||
Arc::new(dummy_table_scan()),
|
||||
vec![],
|
||||
vec![Expr::AggregateFunction(AggregateFunction::new_udf(
|
||||
Arc::new(StateWrapper::new(sum.clone()).unwrap().into()),
|
||||
vec![Expr::Column(Column::new_unqualified("number"))],
|
||||
false,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
))],
|
||||
)
|
||||
.unwrap(),
|
||||
)
|
||||
.recompute_schema()
|
||||
.unwrap();
|
||||
assert_eq!(res.lower_state.as_ref(), &expected_lower_plan);
|
||||
|
||||
let expected_merge_plan = LogicalPlan::Aggregate(
|
||||
Aggregate::try_new(
|
||||
Arc::new(expected_lower_plan),
|
||||
vec![],
|
||||
vec![Expr::AggregateFunction(AggregateFunction::new_udf(
|
||||
Arc::new(
|
||||
MergeWrapper::new(
|
||||
sum.clone(),
|
||||
Arc::new(
|
||||
AggregateExprBuilder::new(
|
||||
Arc::new(sum.clone()),
|
||||
vec![Arc::new(
|
||||
datafusion::physical_expr::expressions::Column::new(
|
||||
"number", 0,
|
||||
),
|
||||
)],
|
||||
)
|
||||
.schema(Arc::new(dummy_table_scan().schema().as_arrow().clone()))
|
||||
.alias("sum(number)")
|
||||
.build()
|
||||
.unwrap(),
|
||||
),
|
||||
vec![DataType::Int64],
|
||||
)
|
||||
.unwrap()
|
||||
.into(),
|
||||
),
|
||||
vec![Expr::Column(Column::new_unqualified("__sum_state(number)"))],
|
||||
false,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
))
|
||||
.alias("sum(number)")],
|
||||
)
|
||||
.unwrap(),
|
||||
);
|
||||
assert_eq!(res.upper_merge.as_ref(), &expected_merge_plan);
|
||||
|
||||
let phy_aggr_state_plan = DefaultPhysicalPlanner::default()
|
||||
.create_physical_plan(&res.lower_state, &ctx.state())
|
||||
.await
|
||||
.unwrap();
|
||||
let aggr_exec = phy_aggr_state_plan
|
||||
.as_any()
|
||||
.downcast_ref::<AggregateExec>()
|
||||
.unwrap();
|
||||
let aggr_func_expr = &aggr_exec.aggr_expr()[0];
|
||||
let mut state_accum = aggr_func_expr.create_accumulator().unwrap();
|
||||
|
||||
// evaluate the state function
|
||||
let input = Int64Array::from(vec![Some(1), Some(2), None, Some(3)]);
|
||||
let values = vec![Arc::new(input) as arrow::array::ArrayRef];
|
||||
|
||||
state_accum.update_batch(&values).unwrap();
|
||||
let state = state_accum.state().unwrap();
|
||||
assert_eq!(state.len(), 1);
|
||||
assert_eq!(state[0], ScalarValue::Int64(Some(6)));
|
||||
|
||||
let eval_res = state_accum.evaluate().unwrap();
|
||||
assert_eq!(
|
||||
eval_res,
|
||||
ScalarValue::Struct(Arc::new(
|
||||
StructArray::try_new(
|
||||
vec![Field::new("sum[sum]", DataType::Int64, true)].into(),
|
||||
vec![Arc::new(Int64Array::from(vec![Some(6)]))],
|
||||
None,
|
||||
)
|
||||
.unwrap(),
|
||||
))
|
||||
);
|
||||
|
||||
let phy_aggr_merge_plan = DefaultPhysicalPlanner::default()
|
||||
.create_physical_plan(&res.upper_merge, &ctx.state())
|
||||
.await
|
||||
.unwrap();
|
||||
let aggr_exec = phy_aggr_merge_plan
|
||||
.as_any()
|
||||
.downcast_ref::<AggregateExec>()
|
||||
.unwrap();
|
||||
let aggr_func_expr = &aggr_exec.aggr_expr()[0];
|
||||
let mut merge_accum = aggr_func_expr.create_accumulator().unwrap();
|
||||
|
||||
let merge_input =
|
||||
vec![Arc::new(Int64Array::from(vec![Some(6), Some(42), None])) as arrow::array::ArrayRef];
|
||||
let merge_input_struct_arr = StructArray::try_new(
|
||||
vec![Field::new("sum[sum]", DataType::Int64, true)].into(),
|
||||
merge_input,
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
merge_accum
|
||||
.update_batch(&[Arc::new(merge_input_struct_arr)])
|
||||
.unwrap();
|
||||
let merge_state = merge_accum.state().unwrap();
|
||||
assert_eq!(merge_state.len(), 1);
|
||||
assert_eq!(merge_state[0], ScalarValue::Int64(Some(48)));
|
||||
|
||||
let merge_eval_res = merge_accum.evaluate().unwrap();
|
||||
assert_eq!(merge_eval_res, ScalarValue::Int64(Some(48)));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_avg_udaf() {
|
||||
let ctx = SessionContext::new();
|
||||
|
||||
let avg = datafusion::functions_aggregate::average::avg_udaf();
|
||||
let avg = (*avg).clone();
|
||||
|
||||
let original_aggr = Aggregate::try_new(
|
||||
Arc::new(dummy_table_scan()),
|
||||
vec![],
|
||||
vec![Expr::AggregateFunction(AggregateFunction::new_udf(
|
||||
Arc::new(avg.clone()),
|
||||
vec![Expr::Column(Column::new_unqualified("number"))],
|
||||
false,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
))],
|
||||
)
|
||||
.unwrap();
|
||||
let res = StateMergeHelper::split_aggr_node(original_aggr).unwrap();
|
||||
|
||||
let state_func: Arc<AggregateUDF> = Arc::new(StateWrapper::new(avg.clone()).unwrap().into());
|
||||
let expected_aggr_state_plan = LogicalPlan::Aggregate(
|
||||
Aggregate::try_new(
|
||||
Arc::new(dummy_table_scan()),
|
||||
vec![],
|
||||
vec![Expr::AggregateFunction(AggregateFunction::new_udf(
|
||||
state_func,
|
||||
vec![Expr::Column(Column::new_unqualified("number"))],
|
||||
false,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
))],
|
||||
)
|
||||
.unwrap(),
|
||||
);
|
||||
// type coerced so avg aggr function can function correctly
|
||||
let coerced_aggr_state_plan = TypeCoercion::new()
|
||||
.analyze(expected_aggr_state_plan.clone(), &Default::default())
|
||||
.unwrap();
|
||||
assert_eq!(res.lower_state.as_ref(), &coerced_aggr_state_plan);
|
||||
assert_eq!(
|
||||
res.lower_state.schema().as_arrow(),
|
||||
&arrow_schema::Schema::new(vec![Field::new(
|
||||
"__avg_state(number)",
|
||||
DataType::Struct(
|
||||
vec![
|
||||
Field::new("avg[count]", DataType::UInt64, true),
|
||||
Field::new("avg[sum]", DataType::Float64, true)
|
||||
]
|
||||
.into()
|
||||
),
|
||||
true,
|
||||
)])
|
||||
);
|
||||
|
||||
let expected_merge_fn = MergeWrapper::new(
|
||||
avg.clone(),
|
||||
Arc::new(
|
||||
AggregateExprBuilder::new(
|
||||
Arc::new(avg.clone()),
|
||||
vec![Arc::new(
|
||||
datafusion::physical_expr::expressions::Column::new("number", 0),
|
||||
)],
|
||||
)
|
||||
.schema(Arc::new(dummy_table_scan().schema().as_arrow().clone()))
|
||||
.alias("avg(number)")
|
||||
.build()
|
||||
.unwrap(),
|
||||
),
|
||||
// coerced to float64
|
||||
vec![DataType::Float64],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let expected_merge_plan = LogicalPlan::Aggregate(
|
||||
Aggregate::try_new(
|
||||
Arc::new(coerced_aggr_state_plan.clone()),
|
||||
vec![],
|
||||
vec![Expr::AggregateFunction(AggregateFunction::new_udf(
|
||||
Arc::new(expected_merge_fn.into()),
|
||||
vec![Expr::Column(Column::new_unqualified("__avg_state(number)"))],
|
||||
false,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
))
|
||||
.alias("avg(number)")],
|
||||
)
|
||||
.unwrap(),
|
||||
);
|
||||
assert_eq!(res.upper_merge.as_ref(), &expected_merge_plan);
|
||||
|
||||
let phy_aggr_state_plan = DefaultPhysicalPlanner::default()
|
||||
.create_physical_plan(&coerced_aggr_state_plan, &ctx.state())
|
||||
.await
|
||||
.unwrap();
|
||||
let aggr_exec = phy_aggr_state_plan
|
||||
.as_any()
|
||||
.downcast_ref::<AggregateExec>()
|
||||
.unwrap();
|
||||
let aggr_func_expr = &aggr_exec.aggr_expr()[0];
|
||||
let mut state_accum = aggr_func_expr.create_accumulator().unwrap();
|
||||
|
||||
// evaluate the state function
|
||||
let input = Float64Array::from(vec![Some(1.), Some(2.), None, Some(3.)]);
|
||||
let values = vec![Arc::new(input) as arrow::array::ArrayRef];
|
||||
|
||||
state_accum.update_batch(&values).unwrap();
|
||||
let state = state_accum.state().unwrap();
|
||||
assert_eq!(state.len(), 2);
|
||||
assert_eq!(state[0], ScalarValue::UInt64(Some(3)));
|
||||
assert_eq!(state[1], ScalarValue::Float64(Some(6.)));
|
||||
|
||||
let eval_res = state_accum.evaluate().unwrap();
|
||||
let expected = Arc::new(
|
||||
StructArray::try_new(
|
||||
vec![
|
||||
Field::new("avg[count]", DataType::UInt64, true),
|
||||
Field::new("avg[sum]", DataType::Float64, true),
|
||||
]
|
||||
.into(),
|
||||
vec![
|
||||
Arc::new(UInt64Array::from(vec![Some(3)])),
|
||||
Arc::new(Float64Array::from(vec![Some(6.)])),
|
||||
],
|
||||
None,
|
||||
)
|
||||
.unwrap(),
|
||||
);
|
||||
assert_eq!(eval_res, ScalarValue::Struct(expected));
|
||||
|
||||
let phy_aggr_merge_plan = DefaultPhysicalPlanner::default()
|
||||
.create_physical_plan(&res.upper_merge, &ctx.state())
|
||||
.await
|
||||
.unwrap();
|
||||
let aggr_exec = phy_aggr_merge_plan
|
||||
.as_any()
|
||||
.downcast_ref::<AggregateExec>()
|
||||
.unwrap();
|
||||
let aggr_func_expr = &aggr_exec.aggr_expr()[0];
|
||||
|
||||
let mut merge_accum = aggr_func_expr.create_accumulator().unwrap();
|
||||
|
||||
let merge_input = vec![
|
||||
Arc::new(UInt64Array::from(vec![Some(3), Some(42), None])) as arrow::array::ArrayRef,
|
||||
Arc::new(Float64Array::from(vec![Some(48.), Some(84.), None])),
|
||||
];
|
||||
let merge_input_struct_arr = StructArray::try_new(
|
||||
vec![
|
||||
Field::new("avg[count]", DataType::UInt64, true),
|
||||
Field::new("avg[sum]", DataType::Float64, true),
|
||||
]
|
||||
.into(),
|
||||
merge_input,
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
merge_accum
|
||||
.update_batch(&[Arc::new(merge_input_struct_arr)])
|
||||
.unwrap();
|
||||
let merge_state = merge_accum.state().unwrap();
|
||||
assert_eq!(merge_state.len(), 2);
|
||||
assert_eq!(merge_state[0], ScalarValue::UInt64(Some(45)));
|
||||
assert_eq!(merge_state[1], ScalarValue::Float64(Some(132.)));
|
||||
|
||||
let merge_eval_res = merge_accum.evaluate().unwrap();
|
||||
// the merge function returns the average, which is 132 / 45
|
||||
assert_eq!(merge_eval_res, ScalarValue::Float64(Some(132. / 45_f64)));
|
||||
}
|
||||
|
||||
/// For testing whether the UDAF state fields are correctly implemented.
|
||||
/// esp. for our own custom UDAF's state fields.
|
||||
/// By compare eval results before and after split to state/merge functions.
|
||||
#[tokio::test]
|
||||
async fn test_udaf_correct_eval_result() {
|
||||
struct TestCase {
|
||||
func: Arc<AggregateUDF>,
|
||||
args: Vec<Expr>,
|
||||
input_schema: SchemaRef,
|
||||
input: Vec<ArrayRef>,
|
||||
expected_output: Option<ScalarValue>,
|
||||
expected_fn: Option<ExpectedFn>,
|
||||
distinct: bool,
|
||||
filter: Option<Box<Expr>>,
|
||||
order_by: Option<Vec<SortExpr>>,
|
||||
null_treatment: Option<NullTreatment>,
|
||||
}
|
||||
type ExpectedFn = fn(ArrayRef) -> bool;
|
||||
|
||||
let test_cases = vec![
|
||||
TestCase {
|
||||
func: sum_udaf(),
|
||||
input_schema: Arc::new(arrow_schema::Schema::new(vec![Field::new(
|
||||
"number",
|
||||
DataType::Int64,
|
||||
true,
|
||||
)])),
|
||||
args: vec![Expr::Column(Column::new_unqualified("number"))],
|
||||
input: vec![Arc::new(Int64Array::from(vec![
|
||||
Some(1),
|
||||
Some(2),
|
||||
None,
|
||||
Some(3),
|
||||
]))],
|
||||
expected_output: Some(ScalarValue::Int64(Some(6))),
|
||||
expected_fn: None,
|
||||
distinct: false,
|
||||
filter: None,
|
||||
order_by: None,
|
||||
null_treatment: None,
|
||||
},
|
||||
TestCase {
|
||||
func: avg_udaf(),
|
||||
input_schema: Arc::new(arrow_schema::Schema::new(vec![Field::new(
|
||||
"number",
|
||||
DataType::Int64,
|
||||
true,
|
||||
)])),
|
||||
args: vec![Expr::Column(Column::new_unqualified("number"))],
|
||||
input: vec![Arc::new(Int64Array::from(vec![
|
||||
Some(1),
|
||||
Some(2),
|
||||
None,
|
||||
Some(3),
|
||||
]))],
|
||||
expected_output: Some(ScalarValue::Float64(Some(2.0))),
|
||||
expected_fn: None,
|
||||
distinct: false,
|
||||
filter: None,
|
||||
order_by: None,
|
||||
null_treatment: None,
|
||||
},
|
||||
TestCase {
|
||||
func: Arc::new(CountHash::udf_impl()),
|
||||
input_schema: Arc::new(arrow_schema::Schema::new(vec![Field::new(
|
||||
"number",
|
||||
DataType::Int64,
|
||||
true,
|
||||
)])),
|
||||
args: vec![Expr::Column(Column::new_unqualified("number"))],
|
||||
input: vec![Arc::new(Int64Array::from(vec![
|
||||
Some(1),
|
||||
Some(2),
|
||||
None,
|
||||
Some(3),
|
||||
Some(3),
|
||||
Some(3),
|
||||
]))],
|
||||
expected_output: Some(ScalarValue::Int64(Some(4))),
|
||||
expected_fn: None,
|
||||
distinct: false,
|
||||
filter: None,
|
||||
order_by: None,
|
||||
null_treatment: None,
|
||||
},
|
||||
TestCase {
|
||||
func: Arc::new(UddSketchState::state_udf_impl()),
|
||||
input_schema: Arc::new(arrow_schema::Schema::new(vec![Field::new(
|
||||
"number",
|
||||
DataType::Float64,
|
||||
true,
|
||||
)])),
|
||||
args: vec![
|
||||
Expr::Literal(ScalarValue::Int64(Some(128))),
|
||||
Expr::Literal(ScalarValue::Float64(Some(0.05))),
|
||||
Expr::Column(Column::new_unqualified("number")),
|
||||
],
|
||||
input: vec![Arc::new(Float64Array::from(vec![
|
||||
Some(1.),
|
||||
Some(2.),
|
||||
None,
|
||||
Some(3.),
|
||||
Some(3.),
|
||||
Some(3.),
|
||||
]))],
|
||||
expected_output: None,
|
||||
expected_fn: Some(|arr| {
|
||||
let percent = ScalarValue::Float64(Some(0.5)).to_array().unwrap();
|
||||
let percent = datatypes::vectors::Helper::try_into_vector(percent).unwrap();
|
||||
let state = datatypes::vectors::Helper::try_into_vector(arr).unwrap();
|
||||
let udd_calc = UddSketchCalcFunction;
|
||||
let res = udd_calc
|
||||
.eval(&Default::default(), &[percent, state])
|
||||
.unwrap();
|
||||
let binding = res.to_arrow_array();
|
||||
let res_arr = binding.as_any().downcast_ref::<Float64Array>().unwrap();
|
||||
assert!(res_arr.len() == 1);
|
||||
assert!((res_arr.value(0) - 2.856578984907706f64).abs() <= f64::EPSILON);
|
||||
true
|
||||
}),
|
||||
distinct: false,
|
||||
filter: None,
|
||||
order_by: None,
|
||||
null_treatment: None,
|
||||
},
|
||||
TestCase {
|
||||
func: Arc::new(HllState::state_udf_impl()),
|
||||
input_schema: Arc::new(arrow_schema::Schema::new(vec![Field::new(
|
||||
"word",
|
||||
DataType::Utf8,
|
||||
true,
|
||||
)])),
|
||||
args: vec![Expr::Column(Column::new_unqualified("word"))],
|
||||
input: vec![Arc::new(StringArray::from(vec![
|
||||
Some("foo"),
|
||||
Some("bar"),
|
||||
None,
|
||||
Some("baz"),
|
||||
Some("baz"),
|
||||
]))],
|
||||
expected_output: None,
|
||||
expected_fn: Some(|arr| {
|
||||
let state = datatypes::vectors::Helper::try_into_vector(arr).unwrap();
|
||||
let hll_calc = HllCalcFunction;
|
||||
let res = hll_calc.eval(&Default::default(), &[state]).unwrap();
|
||||
let binding = res.to_arrow_array();
|
||||
let res_arr = binding.as_any().downcast_ref::<UInt64Array>().unwrap();
|
||||
assert!(res_arr.len() == 1);
|
||||
assert_eq!(res_arr.value(0), 3);
|
||||
true
|
||||
}),
|
||||
distinct: false,
|
||||
filter: None,
|
||||
order_by: None,
|
||||
null_treatment: None,
|
||||
},
|
||||
// TODO(discord9): udd_merge/hll_merge/geo_path/quantile_aggr tests
|
||||
];
|
||||
let test_table_ref = TableReference::bare("TestTable");
|
||||
|
||||
for case in test_cases {
|
||||
let ctx = SessionContext::new();
|
||||
let table_provider = DummyTableProvider::new(
|
||||
case.input_schema.clone(),
|
||||
Some(RecordBatch::try_new(case.input_schema.clone(), case.input.clone()).unwrap()),
|
||||
);
|
||||
let table_source = DefaultTableSource::new(Arc::new(table_provider));
|
||||
let logical_plan = LogicalPlan::TableScan(
|
||||
TableScan::try_new(
|
||||
test_table_ref.clone(),
|
||||
Arc::new(table_source),
|
||||
None,
|
||||
vec![],
|
||||
None,
|
||||
)
|
||||
.unwrap(),
|
||||
);
|
||||
|
||||
let args = case.args;
|
||||
|
||||
let aggr_expr = Expr::AggregateFunction(AggregateFunction::new_udf(
|
||||
case.func.clone(),
|
||||
args,
|
||||
case.distinct,
|
||||
case.filter,
|
||||
case.order_by,
|
||||
case.null_treatment,
|
||||
));
|
||||
|
||||
let aggr_plan = LogicalPlan::Aggregate(
|
||||
Aggregate::try_new(Arc::new(logical_plan), vec![], vec![aggr_expr]).unwrap(),
|
||||
);
|
||||
|
||||
// make sure the aggr_plan is type coerced
|
||||
let aggr_plan = TypeCoercion::new()
|
||||
.analyze(aggr_plan, &Default::default())
|
||||
.unwrap();
|
||||
|
||||
// first eval the original aggregate function
|
||||
let phy_full_aggr_plan = DefaultPhysicalPlanner::default()
|
||||
.create_physical_plan(&aggr_plan, &ctx.state())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
{
|
||||
let unsplit_result = execute_phy_plan(&phy_full_aggr_plan).await.unwrap();
|
||||
assert_eq!(unsplit_result.len(), 1);
|
||||
let unsplit_batch = &unsplit_result[0];
|
||||
assert_eq!(unsplit_batch.num_columns(), 1);
|
||||
assert_eq!(unsplit_batch.num_rows(), 1);
|
||||
let unsplit_col = unsplit_batch.column(0);
|
||||
if let Some(expected_output) = &case.expected_output {
|
||||
assert_eq!(unsplit_col.data_type(), &expected_output.data_type());
|
||||
assert_eq!(unsplit_col.len(), 1);
|
||||
assert_eq!(unsplit_col, &expected_output.to_array().unwrap());
|
||||
}
|
||||
|
||||
if let Some(expected_fn) = &case.expected_fn {
|
||||
assert!(expected_fn(unsplit_col.clone()));
|
||||
}
|
||||
}
|
||||
let LogicalPlan::Aggregate(aggr_plan) = aggr_plan else {
|
||||
panic!("Expected Aggregate plan");
|
||||
};
|
||||
let split_plan = StateMergeHelper::split_aggr_node(aggr_plan).unwrap();
|
||||
|
||||
let phy_upper_plan = DefaultPhysicalPlanner::default()
|
||||
.create_physical_plan(&split_plan.upper_merge, &ctx.state())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// since upper plan use lower plan as input, execute upper plan should also execute lower plan
|
||||
// which should give the same result as the original aggregate function
|
||||
{
|
||||
let split_res = execute_phy_plan(&phy_upper_plan).await.unwrap();
|
||||
|
||||
assert_eq!(split_res.len(), 1);
|
||||
let split_batch = &split_res[0];
|
||||
assert_eq!(split_batch.num_columns(), 1);
|
||||
assert_eq!(split_batch.num_rows(), 1);
|
||||
let split_col = split_batch.column(0);
|
||||
if let Some(expected_output) = &case.expected_output {
|
||||
assert_eq!(split_col.data_type(), &expected_output.data_type());
|
||||
assert_eq!(split_col.len(), 1);
|
||||
assert_eq!(split_col, &expected_output.to_array().unwrap());
|
||||
}
|
||||
|
||||
if let Some(expected_fn) = &case.expected_fn {
|
||||
assert!(expected_fn(split_col.clone()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn execute_phy_plan(
|
||||
phy_plan: &Arc<dyn ExecutionPlan>,
|
||||
) -> datafusion_common::Result<Vec<RecordBatch>> {
|
||||
let task_ctx = Arc::new(TaskContext::default());
|
||||
let mut stream = phy_plan.execute(0, task_ctx)?;
|
||||
let mut batches = Vec::new();
|
||||
while let Some(batch) = stream.next().await {
|
||||
batches.push(batch?);
|
||||
}
|
||||
Ok(batches)
|
||||
}
|
||||
@@ -16,8 +16,8 @@ use std::env;
|
||||
use std::io::ErrorKind;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::{Arc, LazyLock};
|
||||
use std::time::{Duration, SystemTime};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use common_runtime::error::{Error, Result};
|
||||
use common_runtime::{BoxedTaskFunction, RepeatedTask, TaskFunction};
|
||||
@@ -31,9 +31,6 @@ pub const TELEMETRY_URL: &str = "https://telemetry.greptimestats.com/db/otel/sta
|
||||
/// The local installation uuid cache file
|
||||
const UUID_FILE_NAME: &str = ".greptimedb-telemetry-uuid";
|
||||
|
||||
/// System start time for uptime calculation
|
||||
static START_TIME: LazyLock<SystemTime> = LazyLock::new(SystemTime::now);
|
||||
|
||||
/// The default interval of reporting telemetry data to greptime cloud
|
||||
pub static TELEMETRY_INTERVAL: Duration = Duration::from_secs(60 * 30);
|
||||
/// The default connect timeout to greptime cloud.
|
||||
@@ -106,8 +103,6 @@ struct StatisticData {
|
||||
pub nodes: Option<i32>,
|
||||
/// The local installation uuid
|
||||
pub uuid: String,
|
||||
/// System uptime range (e.g., "hours", "days", "weeks")
|
||||
pub uptime: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
|
||||
@@ -176,25 +171,6 @@ fn print_anonymous_usage_data_disclaimer() {
|
||||
info!("https://docs.greptime.com/reference/telemetry");
|
||||
}
|
||||
|
||||
/// Format uptime duration into a general time range string
|
||||
/// Returns privacy-friendly descriptions like "hours", "days", etc.
|
||||
fn format_uptime() -> String {
|
||||
let uptime_duration = START_TIME.elapsed().unwrap_or(Duration::ZERO);
|
||||
let total_seconds = uptime_duration.as_secs();
|
||||
|
||||
if total_seconds < 86400 {
|
||||
"hours".to_string()
|
||||
} else if total_seconds < 604800 {
|
||||
"days".to_string()
|
||||
} else if total_seconds < 2629746 {
|
||||
"weeks".to_string()
|
||||
} else if total_seconds < 31556952 {
|
||||
"months".to_string()
|
||||
} else {
|
||||
"years".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn default_get_uuid(working_home: &Option<String>) -> Option<String> {
|
||||
let temp_dir = env::temp_dir();
|
||||
|
||||
@@ -284,7 +260,6 @@ impl GreptimeDBTelemetry {
|
||||
mode: self.statistics.get_mode(),
|
||||
nodes: self.statistics.get_nodes().await,
|
||||
uuid,
|
||||
uptime: format_uptime(),
|
||||
};
|
||||
|
||||
if let Some(client) = self.client.as_ref() {
|
||||
@@ -319,9 +294,7 @@ mod tests {
|
||||
use reqwest::{Client, Response};
|
||||
use tokio::spawn;
|
||||
|
||||
use crate::{
|
||||
default_get_uuid, format_uptime, Collector, GreptimeDBTelemetry, Mode, StatisticData,
|
||||
};
|
||||
use crate::{default_get_uuid, Collector, GreptimeDBTelemetry, Mode, StatisticData};
|
||||
|
||||
static COUNT: AtomicUsize = std::sync::atomic::AtomicUsize::new(0);
|
||||
|
||||
@@ -465,7 +438,6 @@ mod tests {
|
||||
assert_eq!(build_info().commit, body.git_commit);
|
||||
assert_eq!(Mode::Standalone, body.mode);
|
||||
assert_eq!(1, body.nodes.unwrap());
|
||||
assert!(!body.uptime.is_empty());
|
||||
|
||||
let failed_statistic = Box::new(FailedStatistic);
|
||||
let failed_report = GreptimeDBTelemetry::new(
|
||||
@@ -505,18 +477,4 @@ mod tests {
|
||||
assert_eq!(uuid, default_get_uuid(&Some(working_home.clone())));
|
||||
assert_eq!(uuid, default_get_uuid(&Some(working_home)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_format_uptime() {
|
||||
let uptime = format_uptime();
|
||||
assert!(!uptime.is_empty());
|
||||
// Should be a valid general time range (no specific numbers)
|
||||
assert!(
|
||||
uptime == "hours"
|
||||
|| uptime == "days"
|
||||
|| uptime == "weeks"
|
||||
|| uptime == "months"
|
||||
|| uptime == "years"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -29,8 +29,8 @@ use snafu::{ensure, OptionExt, ResultExt};
|
||||
use store_api::region_request::{SetRegionOption, UnsetRegionOption};
|
||||
use table::metadata::TableId;
|
||||
use table::requests::{
|
||||
AddColumnRequest, AlterKind, AlterTableRequest, ModifyColumnTypeRequest, SetIndexOption,
|
||||
UnsetIndexOption,
|
||||
AddColumnRequest, AlterKind, AlterTableRequest, ModifyColumnTypeRequest, SetIndexOptions,
|
||||
UnsetIndexOptions,
|
||||
};
|
||||
|
||||
use crate::error::{
|
||||
@@ -43,59 +43,6 @@ use crate::error::{
|
||||
const LOCATION_TYPE_FIRST: i32 = LocationType::First as i32;
|
||||
const LOCATION_TYPE_AFTER: i32 = LocationType::After as i32;
|
||||
|
||||
fn set_index_option_from_proto(set_index: api::v1::SetIndex) -> Result<SetIndexOption> {
|
||||
let options = set_index.options.context(MissingAlterIndexOptionSnafu)?;
|
||||
Ok(match options {
|
||||
api::v1::set_index::Options::Fulltext(f) => SetIndexOption::Fulltext {
|
||||
column_name: f.column_name.clone(),
|
||||
options: FulltextOptions::new(
|
||||
f.enable,
|
||||
as_fulltext_option_analyzer(
|
||||
Analyzer::try_from(f.analyzer).context(InvalidSetFulltextOptionRequestSnafu)?,
|
||||
),
|
||||
f.case_sensitive,
|
||||
as_fulltext_option_backend(
|
||||
PbFulltextBackend::try_from(f.backend)
|
||||
.context(InvalidSetFulltextOptionRequestSnafu)?,
|
||||
),
|
||||
f.granularity as u32,
|
||||
f.false_positive_rate,
|
||||
)
|
||||
.context(InvalidIndexOptionSnafu)?,
|
||||
},
|
||||
api::v1::set_index::Options::Inverted(i) => SetIndexOption::Inverted {
|
||||
column_name: i.column_name,
|
||||
},
|
||||
api::v1::set_index::Options::Skipping(s) => SetIndexOption::Skipping {
|
||||
column_name: s.column_name,
|
||||
options: SkippingIndexOptions::new(
|
||||
s.granularity as u32,
|
||||
s.false_positive_rate,
|
||||
as_skipping_index_type(
|
||||
PbSkippingIndexType::try_from(s.skipping_index_type)
|
||||
.context(InvalidSetSkippingIndexOptionRequestSnafu)?,
|
||||
),
|
||||
)
|
||||
.context(InvalidIndexOptionSnafu)?,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
fn unset_index_option_from_proto(unset_index: api::v1::UnsetIndex) -> Result<UnsetIndexOption> {
|
||||
let options = unset_index.options.context(MissingAlterIndexOptionSnafu)?;
|
||||
Ok(match options {
|
||||
api::v1::unset_index::Options::Fulltext(f) => UnsetIndexOption::Fulltext {
|
||||
column_name: f.column_name,
|
||||
},
|
||||
api::v1::unset_index::Options::Inverted(i) => UnsetIndexOption::Inverted {
|
||||
column_name: i.column_name,
|
||||
},
|
||||
api::v1::unset_index::Options::Skipping(s) => UnsetIndexOption::Skipping {
|
||||
column_name: s.column_name,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
/// Convert an [`AlterTableExpr`] to an [`AlterTableRequest`]
|
||||
pub fn alter_expr_to_request(table_id: TableId, expr: AlterTableExpr) -> Result<AlterTableRequest> {
|
||||
let catalog_name = expr.catalog_name;
|
||||
@@ -174,34 +121,70 @@ pub fn alter_expr_to_request(table_id: TableId, expr: AlterTableExpr) -> Result<
|
||||
.context(InvalidUnsetTableOptionRequestSnafu)?,
|
||||
}
|
||||
}
|
||||
Kind::SetIndex(o) => {
|
||||
let option = set_index_option_from_proto(o)?;
|
||||
AlterKind::SetIndexes {
|
||||
options: vec![option],
|
||||
}
|
||||
}
|
||||
Kind::UnsetIndex(o) => {
|
||||
let option = unset_index_option_from_proto(o)?;
|
||||
AlterKind::UnsetIndexes {
|
||||
options: vec![option],
|
||||
}
|
||||
}
|
||||
Kind::SetIndexes(o) => {
|
||||
let options = o
|
||||
.set_indexes
|
||||
.into_iter()
|
||||
.map(set_index_option_from_proto)
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
AlterKind::SetIndexes { options }
|
||||
}
|
||||
Kind::UnsetIndexes(o) => {
|
||||
let options = o
|
||||
.unset_indexes
|
||||
.into_iter()
|
||||
.map(unset_index_option_from_proto)
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
AlterKind::UnsetIndexes { options }
|
||||
}
|
||||
Kind::SetIndex(o) => match o.options {
|
||||
Some(opt) => match opt {
|
||||
api::v1::set_index::Options::Fulltext(f) => AlterKind::SetIndex {
|
||||
options: SetIndexOptions::Fulltext {
|
||||
column_name: f.column_name.clone(),
|
||||
options: FulltextOptions::new(
|
||||
f.enable,
|
||||
as_fulltext_option_analyzer(
|
||||
Analyzer::try_from(f.analyzer)
|
||||
.context(InvalidSetFulltextOptionRequestSnafu)?,
|
||||
),
|
||||
f.case_sensitive,
|
||||
as_fulltext_option_backend(
|
||||
PbFulltextBackend::try_from(f.backend)
|
||||
.context(InvalidSetFulltextOptionRequestSnafu)?,
|
||||
),
|
||||
f.granularity as u32,
|
||||
f.false_positive_rate,
|
||||
)
|
||||
.context(InvalidIndexOptionSnafu)?,
|
||||
},
|
||||
},
|
||||
api::v1::set_index::Options::Inverted(i) => AlterKind::SetIndex {
|
||||
options: SetIndexOptions::Inverted {
|
||||
column_name: i.column_name,
|
||||
},
|
||||
},
|
||||
api::v1::set_index::Options::Skipping(s) => AlterKind::SetIndex {
|
||||
options: SetIndexOptions::Skipping {
|
||||
column_name: s.column_name,
|
||||
options: SkippingIndexOptions::new(
|
||||
s.granularity as u32,
|
||||
s.false_positive_rate,
|
||||
as_skipping_index_type(
|
||||
PbSkippingIndexType::try_from(s.skipping_index_type)
|
||||
.context(InvalidSetSkippingIndexOptionRequestSnafu)?,
|
||||
),
|
||||
)
|
||||
.context(InvalidIndexOptionSnafu)?,
|
||||
},
|
||||
},
|
||||
},
|
||||
None => return MissingAlterIndexOptionSnafu.fail(),
|
||||
},
|
||||
Kind::UnsetIndex(o) => match o.options {
|
||||
Some(opt) => match opt {
|
||||
api::v1::unset_index::Options::Fulltext(f) => AlterKind::UnsetIndex {
|
||||
options: UnsetIndexOptions::Fulltext {
|
||||
column_name: f.column_name,
|
||||
},
|
||||
},
|
||||
api::v1::unset_index::Options::Inverted(i) => AlterKind::UnsetIndex {
|
||||
options: UnsetIndexOptions::Inverted {
|
||||
column_name: i.column_name,
|
||||
},
|
||||
},
|
||||
api::v1::unset_index::Options::Skipping(s) => AlterKind::UnsetIndex {
|
||||
options: UnsetIndexOptions::Skipping {
|
||||
column_name: s.column_name,
|
||||
},
|
||||
},
|
||||
},
|
||||
None => return MissingAlterIndexOptionSnafu.fail(),
|
||||
},
|
||||
Kind::DropDefaults(o) => {
|
||||
let names = o
|
||||
.drop_defaults
|
||||
|
||||
@@ -31,7 +31,6 @@ tokio.workspace = true
|
||||
tokio-util.workspace = true
|
||||
tonic.workspace = true
|
||||
tower.workspace = true
|
||||
vec1 = "1.12"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.4"
|
||||
|
||||
@@ -84,12 +84,9 @@ fn prepare_random_record_batch(
|
||||
fn prepare_flight_data(num_rows: usize) -> (FlightData, FlightData) {
|
||||
let schema = schema();
|
||||
let mut encoder = FlightEncoder::default();
|
||||
let schema_data = encoder.encode_schema(schema.as_ref());
|
||||
let schema_data = encoder.encode(FlightMessage::Schema(schema.clone()));
|
||||
let rb = prepare_random_record_batch(schema, num_rows);
|
||||
let [rb_data] = encoder
|
||||
.encode(FlightMessage::RecordBatch(rb))
|
||||
.try_into()
|
||||
.unwrap();
|
||||
let rb_data = encoder.encode(FlightMessage::RecordBatch(rb));
|
||||
(schema_data, rb_data)
|
||||
}
|
||||
|
||||
@@ -99,7 +96,7 @@ fn decode_flight_data_from_protobuf(schema: &Bytes, payload: &Bytes) -> DfRecord
|
||||
let mut decoder = FlightDecoder::default();
|
||||
let _schema = decoder.try_decode(&schema).unwrap();
|
||||
let message = decoder.try_decode(&payload).unwrap();
|
||||
let Some(FlightMessage::RecordBatch(batch)) = message else {
|
||||
let FlightMessage::RecordBatch(batch) = message else {
|
||||
unreachable!("unexpected message");
|
||||
};
|
||||
batch
|
||||
|
||||
@@ -23,7 +23,6 @@ use arrow_flight::{FlightData, SchemaAsIpc};
|
||||
use common_base::bytes::Bytes;
|
||||
use common_recordbatch::DfRecordBatch;
|
||||
use datatypes::arrow;
|
||||
use datatypes::arrow::array::ArrayRef;
|
||||
use datatypes::arrow::buffer::Buffer;
|
||||
use datatypes::arrow::datatypes::{Schema as ArrowSchema, SchemaRef};
|
||||
use datatypes::arrow::error::ArrowError;
|
||||
@@ -32,7 +31,6 @@ use flatbuffers::FlatBufferBuilder;
|
||||
use prost::bytes::Bytes as ProstBytes;
|
||||
use prost::Message;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use vec1::{vec1, Vec1};
|
||||
|
||||
use crate::error;
|
||||
use crate::error::{DecodeFlightDataSnafu, InvalidFlightDataSnafu, Result};
|
||||
@@ -79,19 +77,9 @@ impl FlightEncoder {
|
||||
}
|
||||
}
|
||||
|
||||
/// Encode the Arrow schema to [FlightData].
|
||||
pub fn encode_schema(&self, schema: &ArrowSchema) -> FlightData {
|
||||
SchemaAsIpc::new(schema, &self.write_options).into()
|
||||
}
|
||||
|
||||
/// Encode the [FlightMessage] to a list (at least one element) of [FlightData]s.
|
||||
///
|
||||
/// Normally only when the [FlightMessage] is an Arrow [RecordBatch] with dictionary arrays
|
||||
/// will the encoder produce more than one [FlightData]s. Other types of [FlightMessage] should
|
||||
/// be encoded to exactly one [FlightData].
|
||||
pub fn encode(&mut self, flight_message: FlightMessage) -> Vec1<FlightData> {
|
||||
pub fn encode(&mut self, flight_message: FlightMessage) -> FlightData {
|
||||
match flight_message {
|
||||
FlightMessage::Schema(schema) => vec1![self.encode_schema(schema.as_ref())],
|
||||
FlightMessage::Schema(schema) => SchemaAsIpc::new(&schema, &self.write_options).into(),
|
||||
FlightMessage::RecordBatch(record_batch) => {
|
||||
let (encoded_dictionaries, encoded_batch) = self
|
||||
.data_gen
|
||||
@@ -102,10 +90,14 @@ impl FlightEncoder {
|
||||
)
|
||||
.expect("DictionaryTracker configured above to not fail on replacement");
|
||||
|
||||
Vec1::from_vec_push(
|
||||
encoded_dictionaries.into_iter().map(Into::into).collect(),
|
||||
encoded_batch.into(),
|
||||
)
|
||||
// TODO(LFC): Handle dictionary as FlightData here, when we supported Arrow's Dictionary DataType.
|
||||
// Currently we don't have a datatype corresponding to Arrow's Dictionary DataType,
|
||||
// so there won't be any "dictionaries" here. Assert to be sure about it, and
|
||||
// perform a "testing guard" in case we forgot to handle the possible "dictionaries"
|
||||
// here in the future.
|
||||
debug_assert_eq!(encoded_dictionaries.len(), 0);
|
||||
|
||||
encoded_batch.into()
|
||||
}
|
||||
FlightMessage::AffectedRows(rows) => {
|
||||
let metadata = FlightMetadata {
|
||||
@@ -113,12 +105,12 @@ impl FlightEncoder {
|
||||
metrics: None,
|
||||
}
|
||||
.encode_to_vec();
|
||||
vec1![FlightData {
|
||||
FlightData {
|
||||
flight_descriptor: None,
|
||||
data_header: build_none_flight_msg().into(),
|
||||
app_metadata: metadata.into(),
|
||||
data_body: ProstBytes::default(),
|
||||
}]
|
||||
}
|
||||
}
|
||||
FlightMessage::Metrics(s) => {
|
||||
let metadata = FlightMetadata {
|
||||
@@ -128,12 +120,12 @@ impl FlightEncoder {
|
||||
}),
|
||||
}
|
||||
.encode_to_vec();
|
||||
vec1![FlightData {
|
||||
FlightData {
|
||||
flight_descriptor: None,
|
||||
data_header: build_none_flight_msg().into(),
|
||||
app_metadata: metadata.into(),
|
||||
data_body: ProstBytes::default(),
|
||||
}]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -143,7 +135,6 @@ impl FlightEncoder {
|
||||
pub struct FlightDecoder {
|
||||
schema: Option<SchemaRef>,
|
||||
schema_bytes: Option<bytes::Bytes>,
|
||||
dictionaries_by_id: HashMap<i64, ArrayRef>,
|
||||
}
|
||||
|
||||
impl FlightDecoder {
|
||||
@@ -154,7 +145,6 @@ impl FlightDecoder {
|
||||
Ok(Self {
|
||||
schema: Some(Arc::new(arrow_schema)),
|
||||
schema_bytes: Some(schema_bytes.clone()),
|
||||
dictionaries_by_id: HashMap::new(),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -196,13 +186,7 @@ impl FlightDecoder {
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Try to decode the [FlightData] to a [FlightMessage].
|
||||
///
|
||||
/// If the [FlightData] is of type `DictionaryBatch` (produced while encoding an Arrow
|
||||
/// [RecordBatch] with dictionary arrays), the decoder will not return any [FlightMessage]s.
|
||||
/// Instead, it will update its internal dictionary cache. Other types of [FlightData] will
|
||||
/// be decoded to exactly one [FlightMessage].
|
||||
pub fn try_decode(&mut self, flight_data: &FlightData) -> Result<Option<FlightMessage>> {
|
||||
pub fn try_decode(&mut self, flight_data: &FlightData) -> Result<FlightMessage> {
|
||||
let message = root_as_message(&flight_data.data_header).map_err(|e| {
|
||||
InvalidFlightDataSnafu {
|
||||
reason: e.to_string(),
|
||||
@@ -214,12 +198,12 @@ impl FlightDecoder {
|
||||
let metadata = FlightMetadata::decode(flight_data.app_metadata.clone())
|
||||
.context(DecodeFlightDataSnafu)?;
|
||||
if let Some(AffectedRows { value }) = metadata.affected_rows {
|
||||
return Ok(Some(FlightMessage::AffectedRows(value as _)));
|
||||
return Ok(FlightMessage::AffectedRows(value as _));
|
||||
}
|
||||
if let Some(Metrics { metrics }) = metadata.metrics {
|
||||
return Ok(Some(FlightMessage::Metrics(
|
||||
return Ok(FlightMessage::Metrics(
|
||||
String::from_utf8_lossy(&metrics).to_string(),
|
||||
)));
|
||||
));
|
||||
}
|
||||
InvalidFlightDataSnafu {
|
||||
reason: "Expecting FlightMetadata have some meaningful content.",
|
||||
@@ -235,46 +219,21 @@ impl FlightDecoder {
|
||||
})?);
|
||||
self.schema = Some(arrow_schema.clone());
|
||||
self.schema_bytes = Some(flight_data.data_header.clone());
|
||||
Ok(Some(FlightMessage::Schema(arrow_schema)))
|
||||
Ok(FlightMessage::Schema(arrow_schema))
|
||||
}
|
||||
MessageHeader::RecordBatch => {
|
||||
let schema = self.schema.clone().context(InvalidFlightDataSnafu {
|
||||
reason: "Should have decoded schema first!",
|
||||
})?;
|
||||
let arrow_batch = flight_data_to_arrow_batch(
|
||||
flight_data,
|
||||
schema.clone(),
|
||||
&self.dictionaries_by_id,
|
||||
)
|
||||
.map_err(|e| {
|
||||
InvalidFlightDataSnafu {
|
||||
reason: e.to_string(),
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
Ok(Some(FlightMessage::RecordBatch(arrow_batch)))
|
||||
}
|
||||
MessageHeader::DictionaryBatch => {
|
||||
let dictionary_batch =
|
||||
message
|
||||
.header_as_dictionary_batch()
|
||||
.context(InvalidFlightDataSnafu {
|
||||
reason: "could not get dictionary batch from DictionaryBatch message",
|
||||
let arrow_batch =
|
||||
flight_data_to_arrow_batch(flight_data, schema.clone(), &HashMap::new())
|
||||
.map_err(|e| {
|
||||
InvalidFlightDataSnafu {
|
||||
reason: e.to_string(),
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
|
||||
let schema = self.schema.as_ref().context(InvalidFlightDataSnafu {
|
||||
reason: "schema message is not present previously",
|
||||
})?;
|
||||
|
||||
reader::read_dictionary(
|
||||
&flight_data.data_body.clone().into(),
|
||||
dictionary_batch,
|
||||
schema,
|
||||
&mut self.dictionaries_by_id,
|
||||
&message.version(),
|
||||
)
|
||||
.context(error::ArrowSnafu)?;
|
||||
Ok(None)
|
||||
Ok(FlightMessage::RecordBatch(arrow_batch))
|
||||
}
|
||||
other => {
|
||||
let name = other.variant_name().unwrap_or("UNKNOWN");
|
||||
@@ -346,16 +305,14 @@ fn build_none_flight_msg() -> Bytes {
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use arrow_flight::utils::batches_to_flight_data;
|
||||
use datatypes::arrow::array::{
|
||||
DictionaryArray, Int32Array, StringArray, UInt32Array, UInt8Array,
|
||||
};
|
||||
use datatypes::arrow::array::Int32Array;
|
||||
use datatypes::arrow::datatypes::{DataType, Field, Schema};
|
||||
|
||||
use super::*;
|
||||
use crate::Error;
|
||||
|
||||
#[test]
|
||||
fn test_try_decode() -> Result<()> {
|
||||
fn test_try_decode() {
|
||||
let schema = Arc::new(ArrowSchema::new(vec![Field::new(
|
||||
"n",
|
||||
DataType::Int32,
|
||||
@@ -390,7 +347,7 @@ mod test {
|
||||
.to_string()
|
||||
.contains("Should have decoded schema first!"));
|
||||
|
||||
let message = decoder.try_decode(d1)?.unwrap();
|
||||
let message = decoder.try_decode(d1).unwrap();
|
||||
assert!(matches!(message, FlightMessage::Schema(_)));
|
||||
let FlightMessage::Schema(decoded_schema) = message else {
|
||||
unreachable!()
|
||||
@@ -399,20 +356,19 @@ mod test {
|
||||
|
||||
let _ = decoder.schema.as_ref().unwrap();
|
||||
|
||||
let message = decoder.try_decode(d2)?.unwrap();
|
||||
let message = decoder.try_decode(d2).unwrap();
|
||||
assert!(matches!(message, FlightMessage::RecordBatch(_)));
|
||||
let FlightMessage::RecordBatch(actual_batch) = message else {
|
||||
unreachable!()
|
||||
};
|
||||
assert_eq!(actual_batch, batch1);
|
||||
|
||||
let message = decoder.try_decode(d3)?.unwrap();
|
||||
let message = decoder.try_decode(d3).unwrap();
|
||||
assert!(matches!(message, FlightMessage::RecordBatch(_)));
|
||||
let FlightMessage::RecordBatch(actual_batch) = message else {
|
||||
unreachable!()
|
||||
};
|
||||
assert_eq!(actual_batch, batch2);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -451,86 +407,4 @@ mod test {
|
||||
let actual = flight_messages_to_recordbatches(vec![m1, m2, m3]).unwrap();
|
||||
assert_eq!(actual, recordbatches);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_flight_encode_decode_with_dictionary_array() -> Result<()> {
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new("i", DataType::UInt8, true),
|
||||
Field::new_dictionary("s", DataType::UInt32, DataType::Utf8, true),
|
||||
]));
|
||||
let batch1 = DfRecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![
|
||||
Arc::new(UInt8Array::from_iter_values(vec![1, 2, 3])) as _,
|
||||
Arc::new(DictionaryArray::new(
|
||||
UInt32Array::from_value(0, 3),
|
||||
Arc::new(StringArray::from_iter_values(["x"])),
|
||||
)) as _,
|
||||
],
|
||||
)
|
||||
.unwrap();
|
||||
let batch2 = DfRecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![
|
||||
Arc::new(UInt8Array::from_iter_values(vec![4, 5, 6, 7, 8])) as _,
|
||||
Arc::new(DictionaryArray::new(
|
||||
UInt32Array::from_iter_values([0, 1, 2, 2, 3]),
|
||||
Arc::new(StringArray::from_iter_values(["h", "e", "l", "o"])),
|
||||
)) as _,
|
||||
],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let message_1 = FlightMessage::Schema(schema.clone());
|
||||
let message_2 = FlightMessage::RecordBatch(batch1);
|
||||
let message_3 = FlightMessage::RecordBatch(batch2);
|
||||
|
||||
let mut encoder = FlightEncoder::default();
|
||||
let encoded_1 = encoder.encode(message_1);
|
||||
let encoded_2 = encoder.encode(message_2);
|
||||
let encoded_3 = encoder.encode(message_3);
|
||||
// message 1 is Arrow Schema, should be encoded to one FlightData:
|
||||
assert_eq!(encoded_1.len(), 1);
|
||||
// message 2 and 3 are Arrow RecordBatch with dictionary arrays, should be encoded to
|
||||
// multiple FlightData:
|
||||
assert_eq!(encoded_2.len(), 2);
|
||||
assert_eq!(encoded_3.len(), 2);
|
||||
|
||||
let mut decoder = FlightDecoder::default();
|
||||
let decoded_1 = decoder.try_decode(encoded_1.first())?;
|
||||
let Some(FlightMessage::Schema(actual_schema)) = decoded_1 else {
|
||||
unreachable!()
|
||||
};
|
||||
assert_eq!(actual_schema, schema);
|
||||
let decoded_2 = decoder.try_decode(&encoded_2[0])?;
|
||||
// expected to be a dictionary batch message, decoder should return none:
|
||||
assert!(decoded_2.is_none());
|
||||
let Some(FlightMessage::RecordBatch(decoded_2)) = decoder.try_decode(&encoded_2[1])? else {
|
||||
unreachable!()
|
||||
};
|
||||
let decoded_3 = decoder.try_decode(&encoded_3[0])?;
|
||||
// expected to be a dictionary batch message, decoder should return none:
|
||||
assert!(decoded_3.is_none());
|
||||
let Some(FlightMessage::RecordBatch(decoded_3)) = decoder.try_decode(&encoded_3[1])? else {
|
||||
unreachable!()
|
||||
};
|
||||
let actual = arrow::util::pretty::pretty_format_batches(&[decoded_2, decoded_3])
|
||||
.unwrap()
|
||||
.to_string();
|
||||
let expected = r"
|
||||
+---+---+
|
||||
| i | s |
|
||||
+---+---+
|
||||
| 1 | x |
|
||||
| 2 | x |
|
||||
| 3 | x |
|
||||
| 4 | h |
|
||||
| 5 | e |
|
||||
| 6 | l |
|
||||
| 7 | l |
|
||||
| 8 | o |
|
||||
+---+---+";
|
||||
assert_eq!(actual, expected.trim());
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -69,7 +69,6 @@ table = { workspace = true, features = ["testing"] }
|
||||
tokio.workspace = true
|
||||
tokio-postgres = { workspace = true, optional = true }
|
||||
tonic.workspace = true
|
||||
tracing.workspace = true
|
||||
typetag.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
|
||||
19
src/common/meta/src/cache/flow/table_flownode.rs
vendored
19
src/common/meta/src/cache/flow/table_flownode.rs
vendored
@@ -15,7 +15,6 @@
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_telemetry::info;
|
||||
use futures::future::BoxFuture;
|
||||
use moka::future::Cache;
|
||||
use moka::ops::compute::Op;
|
||||
@@ -90,12 +89,6 @@ fn init_factory(table_flow_manager: TableFlowManagerRef) -> Initializer<TableId,
|
||||
// we have a corresponding cache invalidation mechanism to invalidate `(Key, EmptyHashSet)`.
|
||||
.map(Arc::new)
|
||||
.map(Some)
|
||||
.inspect(|set| {
|
||||
info!(
|
||||
"Initialized table_flownode cache for table_id: {}, set: {:?}",
|
||||
table_id, set
|
||||
);
|
||||
})
|
||||
})
|
||||
})
|
||||
}
|
||||
@@ -174,13 +167,6 @@ fn invalidator<'a>(
|
||||
match ident {
|
||||
CacheIdent::CreateFlow(create_flow) => handle_create_flow(cache, create_flow).await,
|
||||
CacheIdent::DropFlow(drop_flow) => handle_drop_flow(cache, drop_flow).await,
|
||||
CacheIdent::FlowNodeAddressChange(node_id) => {
|
||||
info!(
|
||||
"Invalidate flow node cache for node_id in table_flownode: {}",
|
||||
node_id
|
||||
);
|
||||
cache.invalidate_all();
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
Ok(())
|
||||
@@ -188,10 +174,7 @@ fn invalidator<'a>(
|
||||
}
|
||||
|
||||
fn filter(ident: &CacheIdent) -> bool {
|
||||
matches!(
|
||||
ident,
|
||||
CacheIdent::CreateFlow(_) | CacheIdent::DropFlow(_) | CacheIdent::FlowNodeAddressChange(_)
|
||||
)
|
||||
matches!(ident, CacheIdent::CreateFlow(_) | CacheIdent::DropFlow(_))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -22,7 +22,6 @@ use crate::key::flow::flow_name::FlowNameKey;
|
||||
use crate::key::flow::flow_route::FlowRouteKey;
|
||||
use crate::key::flow::flownode_flow::FlownodeFlowKey;
|
||||
use crate::key::flow::table_flow::TableFlowKey;
|
||||
use crate::key::node_address::NodeAddressKey;
|
||||
use crate::key::schema_name::SchemaNameKey;
|
||||
use crate::key::table_info::TableInfoKey;
|
||||
use crate::key::table_name::TableNameKey;
|
||||
@@ -54,10 +53,6 @@ pub struct Context {
|
||||
#[async_trait::async_trait]
|
||||
pub trait CacheInvalidator: Send + Sync {
|
||||
async fn invalidate(&self, ctx: &Context, caches: &[CacheIdent]) -> Result<()>;
|
||||
|
||||
fn name(&self) -> &'static str {
|
||||
std::any::type_name::<Self>()
|
||||
}
|
||||
}
|
||||
|
||||
pub type CacheInvalidatorRef = Arc<dyn CacheInvalidator>;
|
||||
@@ -142,13 +137,6 @@ where
|
||||
let key = FlowInfoKey::new(*flow_id);
|
||||
self.invalidate_key(&key.to_bytes()).await;
|
||||
}
|
||||
CacheIdent::FlowNodeAddressChange(node_id) => {
|
||||
// other caches doesn't need to be invalidated
|
||||
// since this is only for flownode address change not id change
|
||||
common_telemetry::info!("Invalidate flow node cache for node_id: {}", node_id);
|
||||
let key = NodeAddressKey::with_flownode(*node_id);
|
||||
self.invalidate_key(&key.to_bytes()).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
|
||||
@@ -93,8 +93,6 @@ pub struct RegionStat {
|
||||
pub manifest_size: u64,
|
||||
/// The size of the SST data files in bytes.
|
||||
pub sst_size: u64,
|
||||
/// The num of the SST data files.
|
||||
pub sst_num: u64,
|
||||
/// The size of the SST index files in bytes.
|
||||
pub index_size: u64,
|
||||
/// The manifest infoof the region.
|
||||
@@ -175,8 +173,8 @@ impl RegionStat {
|
||||
std::mem::size_of::<RegionId>() +
|
||||
// rcus, wcus, approximate_bytes, num_rows
|
||||
std::mem::size_of::<i64>() * 4 +
|
||||
// memtable_size, manifest_size, sst_size, sst_num, index_size
|
||||
std::mem::size_of::<u64>() * 5 +
|
||||
// memtable_size, manifest_size, sst_size, index_size
|
||||
std::mem::size_of::<u64>() * 4 +
|
||||
// engine
|
||||
std::mem::size_of::<String>() + self.engine.capacity() +
|
||||
// region_manifest
|
||||
@@ -277,7 +275,6 @@ impl From<&api::v1::meta::RegionStat> for RegionStat {
|
||||
memtable_size: region_stat.memtable_size,
|
||||
manifest_size: region_stat.manifest_size,
|
||||
sst_size: region_stat.sst_size,
|
||||
sst_num: region_stat.sst_num,
|
||||
index_size: region_stat.index_size,
|
||||
region_manifest: region_stat.manifest.into(),
|
||||
data_topic_latest_entry_id: region_stat.data_topic_latest_entry_id,
|
||||
|
||||
@@ -227,6 +227,7 @@ impl AlterTableProcedure {
|
||||
}
|
||||
|
||||
fn handle_alter_region_response(&mut self, mut results: Vec<RegionResponse>) -> Result<()> {
|
||||
self.data.state = AlterTableState::UpdateMetadata;
|
||||
if let Some(column_metadatas) =
|
||||
extract_column_metadatas(&mut results, TABLE_COLUMN_METADATA_EXTENSION_KEY)?
|
||||
{
|
||||
@@ -234,7 +235,7 @@ impl AlterTableProcedure {
|
||||
} else {
|
||||
warn!("altering table result doesn't contains extension key `{TABLE_COLUMN_METADATA_EXTENSION_KEY}`,leaving the table's column metadata unchanged");
|
||||
}
|
||||
self.data.state = AlterTableState::UpdateMetadata;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -20,8 +20,8 @@ use api::v1::region::{alter_request, AlterRequest, RegionRequest, RegionRequestH
|
||||
use api::v1::AlterTableExpr;
|
||||
use common_catalog::format_full_table_name;
|
||||
use common_grpc_expr::alter_expr_to_request;
|
||||
use common_telemetry::debug;
|
||||
use common_telemetry::tracing_context::TracingContext;
|
||||
use common_telemetry::{debug, info};
|
||||
use futures::future;
|
||||
use snafu::{ensure, ResultExt};
|
||||
use store_api::metadata::ColumnMetadata;
|
||||
@@ -299,15 +299,10 @@ fn build_new_table_info(
|
||||
| AlterKind::ModifyColumnTypes { .. }
|
||||
| AlterKind::SetTableOptions { .. }
|
||||
| AlterKind::UnsetTableOptions { .. }
|
||||
| AlterKind::SetIndexes { .. }
|
||||
| AlterKind::UnsetIndexes { .. }
|
||||
| AlterKind::SetIndex { .. }
|
||||
| AlterKind::UnsetIndex { .. }
|
||||
| AlterKind::DropDefaults { .. } => {}
|
||||
}
|
||||
|
||||
info!(
|
||||
"Built new table info: {:?} for table {}, table_id: {}",
|
||||
new_info.meta, table_name, table_id
|
||||
);
|
||||
|
||||
Ok(new_info)
|
||||
}
|
||||
|
||||
@@ -108,8 +108,6 @@ fn create_proto_alter_kind(
|
||||
Kind::UnsetTableOptions(v) => Ok(Some(alter_request::Kind::UnsetTableOptions(v.clone()))),
|
||||
Kind::SetIndex(v) => Ok(Some(alter_request::Kind::SetIndex(v.clone()))),
|
||||
Kind::UnsetIndex(v) => Ok(Some(alter_request::Kind::UnsetIndex(v.clone()))),
|
||||
Kind::SetIndexes(v) => Ok(Some(alter_request::Kind::SetIndexes(v.clone()))),
|
||||
Kind::UnsetIndexes(v) => Ok(Some(alter_request::Kind::UnsetIndexes(v.clone()))),
|
||||
Kind::DropDefaults(v) => Ok(Some(alter_request::Kind::DropDefaults(v.clone()))),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,7 +21,7 @@ use crate::key::table_name::TableNameKey;
|
||||
impl CreateFlowProcedure {
|
||||
/// Allocates the [FlowId].
|
||||
pub(crate) async fn allocate_flow_id(&mut self) -> Result<()> {
|
||||
// TODO(weny, ruihang): We don't support the partitions. It's always be 1, now.
|
||||
//TODO(weny, ruihang): We doesn't support the partitions. It's always be 1, now.
|
||||
let partitions = 1;
|
||||
let (flow_id, peers) = self
|
||||
.context
|
||||
|
||||
@@ -21,7 +21,7 @@ use common_error::ext::BoxedError;
|
||||
use common_procedure::error::{
|
||||
ExternalSnafu, FromJsonSnafu, Result as ProcedureResult, ToJsonSnafu,
|
||||
};
|
||||
use common_procedure::{Context as ProcedureContext, LockKey, Procedure, ProcedureId, Status};
|
||||
use common_procedure::{Context as ProcedureContext, LockKey, Procedure, Status};
|
||||
use common_telemetry::tracing_context::TracingContext;
|
||||
use common_telemetry::{info, warn};
|
||||
use futures::future::join_all;
|
||||
@@ -246,6 +246,8 @@ impl CreateTableProcedure {
|
||||
}
|
||||
}
|
||||
|
||||
self.creator.data.state = CreateTableState::CreateMetadata;
|
||||
|
||||
let mut results = join_all(create_region_tasks)
|
||||
.await
|
||||
.into_iter()
|
||||
@@ -259,7 +261,6 @@ impl CreateTableProcedure {
|
||||
warn!("creating table result doesn't contains extension key `{TABLE_COLUMN_METADATA_EXTENSION_KEY}`,leaving the table's column metadata unchanged");
|
||||
}
|
||||
|
||||
self.creator.data.state = CreateTableState::CreateMetadata;
|
||||
Ok(Status::executing(true))
|
||||
}
|
||||
|
||||
@@ -267,9 +268,8 @@ impl CreateTableProcedure {
|
||||
///
|
||||
/// Abort(not-retry):
|
||||
/// - Failed to create table metadata.
|
||||
async fn on_create_metadata(&mut self, pid: ProcedureId) -> Result<Status> {
|
||||
async fn on_create_metadata(&mut self) -> Result<Status> {
|
||||
let table_id = self.table_id();
|
||||
let table_ref = self.creator.data.table_ref();
|
||||
let manager = &self.context.table_metadata_manager;
|
||||
|
||||
let mut raw_table_info = self.table_info().clone();
|
||||
@@ -289,10 +289,7 @@ impl CreateTableProcedure {
|
||||
self.context
|
||||
.register_failure_detectors(detecting_regions)
|
||||
.await;
|
||||
info!(
|
||||
"Successfully created table: {}, table_id: {}, procedure_id: {}",
|
||||
table_ref, table_id, pid
|
||||
);
|
||||
info!("Created table metadata for table {table_id}");
|
||||
|
||||
self.creator.opening_regions.clear();
|
||||
Ok(Status::done_with_output(table_id))
|
||||
@@ -320,7 +317,7 @@ impl Procedure for CreateTableProcedure {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn execute(&mut self, ctx: &ProcedureContext) -> ProcedureResult<Status> {
|
||||
async fn execute(&mut self, _ctx: &ProcedureContext) -> ProcedureResult<Status> {
|
||||
let state = &self.creator.data.state;
|
||||
|
||||
let _timer = metrics::METRIC_META_PROCEDURE_CREATE_TABLE
|
||||
@@ -330,7 +327,7 @@ impl Procedure for CreateTableProcedure {
|
||||
match state {
|
||||
CreateTableState::Prepare => self.on_prepare().await,
|
||||
CreateTableState::DatanodeCreateRegions => self.on_datanode_create_regions().await,
|
||||
CreateTableState::CreateMetadata => self.on_create_metadata(ctx.procedure_id).await,
|
||||
CreateTableState::CreateMetadata => self.on_create_metadata().await,
|
||||
}
|
||||
.map_err(map_to_procedure_error)
|
||||
}
|
||||
|
||||
@@ -21,7 +21,7 @@ use snafu::ensure;
|
||||
use store_api::storage::{RegionId, RegionNumber, TableId};
|
||||
|
||||
use crate::ddl::TableMetadata;
|
||||
use crate::error::{Result, UnsupportedSnafu};
|
||||
use crate::error::{self, Result, UnsupportedSnafu};
|
||||
use crate::key::table_route::PhysicalTableRouteValue;
|
||||
use crate::peer::Peer;
|
||||
use crate::rpc::ddl::CreateTableTask;
|
||||
@@ -113,18 +113,24 @@ impl TableMetadataAllocator {
|
||||
table_id: TableId,
|
||||
task: &CreateTableTask,
|
||||
) -> Result<PhysicalTableRouteValue> {
|
||||
let regions = task.partitions.len().max(1);
|
||||
let regions = task.partitions.len();
|
||||
ensure!(
|
||||
regions > 0,
|
||||
error::UnexpectedSnafu {
|
||||
err_msg: "The number of partitions must be greater than 0"
|
||||
}
|
||||
);
|
||||
|
||||
let peers = self.peer_allocator.alloc(regions).await?;
|
||||
debug!("Allocated peers {:?} for table {}", peers, table_id);
|
||||
|
||||
let mut region_routes = task
|
||||
let region_routes = task
|
||||
.partitions
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, partition)| {
|
||||
let region = Region {
|
||||
id: RegionId::new(table_id, i as u32),
|
||||
partition_expr: partition.expression.clone(),
|
||||
partition: Some(partition.clone().into()),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
@@ -138,18 +144,6 @@ impl TableMetadataAllocator {
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// If the table has no partitions, we need to create a default region.
|
||||
if region_routes.is_empty() {
|
||||
region_routes.push(RegionRoute {
|
||||
region: Region {
|
||||
id: RegionId::new(table_id, 0),
|
||||
..Default::default()
|
||||
},
|
||||
leader_peer: Some(peers[0].clone()),
|
||||
..Default::default()
|
||||
});
|
||||
}
|
||||
|
||||
Ok(PhysicalTableRouteValue::new(region_routes))
|
||||
}
|
||||
|
||||
|
||||
@@ -17,7 +17,6 @@ pub mod columns;
|
||||
pub mod create_table;
|
||||
pub mod datanode_handler;
|
||||
pub mod flownode_handler;
|
||||
pub mod region_metadata;
|
||||
|
||||
use std::assert_matches::assert_matches;
|
||||
use std::collections::HashMap;
|
||||
@@ -146,7 +145,10 @@ pub fn test_create_logical_table_task(name: &str) -> CreateTableTask {
|
||||
CreateTableTask {
|
||||
create_table,
|
||||
// Single region
|
||||
partitions: vec![Partition::default()],
|
||||
partitions: vec![Partition {
|
||||
column_list: vec![],
|
||||
value_list: vec![],
|
||||
}],
|
||||
table_info,
|
||||
}
|
||||
}
|
||||
@@ -181,7 +183,10 @@ pub fn test_create_physical_table_task(name: &str) -> CreateTableTask {
|
||||
CreateTableTask {
|
||||
create_table,
|
||||
// Single region
|
||||
partitions: vec![Partition::default()],
|
||||
partitions: vec![Partition {
|
||||
column_list: vec![],
|
||||
value_list: vec![],
|
||||
}],
|
||||
table_info,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -175,7 +175,10 @@ pub fn test_create_table_task(name: &str, table_id: TableId) -> CreateTableTask
|
||||
CreateTableTask {
|
||||
create_table,
|
||||
// Single region
|
||||
partitions: vec![Partition::default()],
|
||||
partitions: vec![Partition {
|
||||
column_list: vec![],
|
||||
value_list: vec![],
|
||||
}],
|
||||
table_info,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,11 +12,9 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::region::RegionResponse;
|
||||
use api::v1::region::region_request::Body;
|
||||
use api::v1::region::RegionRequest;
|
||||
use common_error::ext::{BoxedError, ErrorExt, StackError};
|
||||
use common_error::status_code::StatusCode;
|
||||
@@ -24,8 +22,6 @@ use common_query::request::QueryRequest;
|
||||
use common_recordbatch::SendableRecordBatchStream;
|
||||
use common_telemetry::debug;
|
||||
use snafu::{ResultExt, Snafu};
|
||||
use store_api::metadata::RegionMetadata;
|
||||
use store_api::storage::RegionId;
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
use crate::error::{self, Error, Result};
|
||||
@@ -282,47 +278,3 @@ impl MockDatanodeHandler for AllFailureDatanodeHandler {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ListMetadataDatanodeHandler {
|
||||
pub region_metadatas: HashMap<RegionId, Option<RegionMetadata>>,
|
||||
}
|
||||
|
||||
impl ListMetadataDatanodeHandler {
|
||||
pub fn new(region_metadatas: HashMap<RegionId, Option<RegionMetadata>>) -> Self {
|
||||
Self { region_metadatas }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl MockDatanodeHandler for ListMetadataDatanodeHandler {
|
||||
async fn handle(&self, _peer: &Peer, request: RegionRequest) -> Result<RegionResponse> {
|
||||
let Some(Body::ListMetadata(req)) = request.body else {
|
||||
unreachable!()
|
||||
};
|
||||
let mut response = RegionResponse::new(0);
|
||||
|
||||
let mut output = Vec::with_capacity(req.region_ids.len());
|
||||
for region_id in req.region_ids {
|
||||
match self.region_metadatas.get(&RegionId::from_u64(region_id)) {
|
||||
Some(metadata) => {
|
||||
output.push(metadata.clone());
|
||||
}
|
||||
None => {
|
||||
output.push(None);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
response.metadata = serde_json::to_vec(&output).unwrap();
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
async fn handle_query(
|
||||
&self,
|
||||
_peer: &Peer,
|
||||
_request: QueryRequest,
|
||||
) -> Result<SendableRecordBatchStream> {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,34 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use api::v1::SemanticType;
|
||||
use store_api::metadata::{ColumnMetadata, RegionMetadata, RegionMetadataBuilder};
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
/// Builds a region metadata with the given column metadatas.
|
||||
pub fn build_region_metadata(
|
||||
region_id: RegionId,
|
||||
column_metadatas: &[ColumnMetadata],
|
||||
) -> RegionMetadata {
|
||||
let mut builder = RegionMetadataBuilder::new(region_id);
|
||||
let mut primary_key = vec![];
|
||||
for column_metadata in column_metadatas {
|
||||
builder.push_column_metadata(column_metadata.clone());
|
||||
if column_metadata.semantic_type == SemanticType::Tag {
|
||||
primary_key.push(column_metadata.column_id);
|
||||
}
|
||||
}
|
||||
builder.primary_key(primary_key);
|
||||
builder.build().unwrap()
|
||||
}
|
||||
@@ -141,7 +141,10 @@ pub(crate) fn test_create_table_task(name: &str) -> CreateTableTask {
|
||||
CreateTableTask {
|
||||
create_table,
|
||||
// Single region
|
||||
partitions: vec![Partition::default()],
|
||||
partitions: vec![Partition {
|
||||
column_list: vec![],
|
||||
value_list: vec![],
|
||||
}],
|
||||
table_info,
|
||||
}
|
||||
}
|
||||
@@ -210,6 +213,21 @@ async fn test_on_prepare_without_create_if_table_exists() {
|
||||
assert_eq!(procedure.table_id(), 1024);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_on_prepare_with_no_partition_err() {
|
||||
let node_manager = Arc::new(MockDatanodeManager::new(()));
|
||||
let ddl_context = new_ddl_context(node_manager);
|
||||
let mut task = test_create_table_task("foo");
|
||||
task.partitions = vec![];
|
||||
task.create_table.create_if_not_exists = true;
|
||||
let mut procedure = CreateTableProcedure::new(task, ddl_context);
|
||||
let err = procedure.on_prepare().await.unwrap_err();
|
||||
assert_matches!(err, Error::Unexpected { .. });
|
||||
assert!(err
|
||||
.to_string()
|
||||
.contains("The number of partitions must be greater than 0"),);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_on_datanode_create_regions_should_retry() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
|
||||
@@ -13,8 +13,6 @@
|
||||
// limitations under the License.
|
||||
|
||||
pub(crate) mod raw_table_info;
|
||||
#[allow(dead_code)]
|
||||
pub(crate) mod region_metadata_lister;
|
||||
pub(crate) mod table_id;
|
||||
pub(crate) mod table_info;
|
||||
|
||||
@@ -448,7 +446,6 @@ pub fn extract_column_metadatas(
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
if schemas.is_empty() {
|
||||
warn!("extract_column_metadatas: no extension key `{key}` found in results");
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
|
||||
@@ -54,10 +54,7 @@ pub(crate) fn build_new_physical_table_info(
|
||||
}
|
||||
}
|
||||
SemanticType::Field => value_indices.push(idx),
|
||||
SemanticType::Timestamp => {
|
||||
value_indices.push(idx);
|
||||
*time_index = Some(idx);
|
||||
}
|
||||
SemanticType::Timestamp => *time_index = Some(idx),
|
||||
}
|
||||
|
||||
columns.push(col.column_schema.clone());
|
||||
|
||||
@@ -1,240 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use api::v1::region::region_request::Body as PbRegionRequest;
|
||||
use api::v1::region::{ListMetadataRequest, RegionRequest, RegionRequestHeader};
|
||||
use common_telemetry::tracing_context::TracingContext;
|
||||
use futures::future::join_all;
|
||||
use snafu::ResultExt;
|
||||
use store_api::metadata::RegionMetadata;
|
||||
use store_api::storage::{RegionId, TableId};
|
||||
|
||||
use crate::ddl::utils::add_peer_context_if_needed;
|
||||
use crate::error::{DecodeJsonSnafu, Result};
|
||||
use crate::node_manager::NodeManagerRef;
|
||||
use crate::rpc::router::{find_leaders, region_distribution, RegionRoute};
|
||||
|
||||
/// Collects the region metadata from the datanodes.
|
||||
pub struct RegionMetadataLister {
|
||||
node_manager: NodeManagerRef,
|
||||
}
|
||||
|
||||
impl RegionMetadataLister {
|
||||
/// Creates a new [`RegionMetadataLister`] with the given [`NodeManagerRef`].
|
||||
pub fn new(node_manager: NodeManagerRef) -> Self {
|
||||
Self { node_manager }
|
||||
}
|
||||
|
||||
/// Collects the region metadata from the datanodes.
|
||||
pub async fn list(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
region_routes: &[RegionRoute],
|
||||
) -> Result<Vec<Option<RegionMetadata>>> {
|
||||
let region_distribution = region_distribution(region_routes);
|
||||
let leaders = find_leaders(region_routes)
|
||||
.into_iter()
|
||||
.map(|p| (p.id, p))
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
let total_num_region = region_distribution
|
||||
.values()
|
||||
.map(|r| r.leader_regions.len())
|
||||
.sum::<usize>();
|
||||
|
||||
let mut list_metadata_tasks = Vec::with_capacity(leaders.len());
|
||||
|
||||
// Build requests.
|
||||
for (datanode_id, region_role_set) in region_distribution {
|
||||
if region_role_set.leader_regions.is_empty() {
|
||||
continue;
|
||||
}
|
||||
// Safety: must exists.
|
||||
let peer = leaders.get(&datanode_id).unwrap();
|
||||
let requester = self.node_manager.datanode(peer).await;
|
||||
let region_ids = region_role_set
|
||||
.leader_regions
|
||||
.iter()
|
||||
.map(|r| RegionId::new(table_id, *r).as_u64())
|
||||
.collect();
|
||||
let request = Self::build_list_metadata_request(region_ids);
|
||||
|
||||
let peer = peer.clone();
|
||||
list_metadata_tasks.push(async move {
|
||||
requester
|
||||
.handle(request)
|
||||
.await
|
||||
.map_err(add_peer_context_if_needed(peer))
|
||||
});
|
||||
}
|
||||
|
||||
let results = join_all(list_metadata_tasks)
|
||||
.await
|
||||
.into_iter()
|
||||
.collect::<Result<Vec<_>>>()?
|
||||
.into_iter()
|
||||
.map(|r| r.metadata);
|
||||
|
||||
let mut output = Vec::with_capacity(total_num_region);
|
||||
for result in results {
|
||||
let region_metadatas: Vec<Option<RegionMetadata>> =
|
||||
serde_json::from_slice(&result).context(DecodeJsonSnafu)?;
|
||||
output.extend(region_metadatas);
|
||||
}
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
fn build_list_metadata_request(region_ids: Vec<u64>) -> RegionRequest {
|
||||
RegionRequest {
|
||||
header: Some(RegionRequestHeader {
|
||||
tracing_context: TracingContext::from_current_span().to_w3c(),
|
||||
..Default::default()
|
||||
}),
|
||||
body: Some(PbRegionRequest::ListMetadata(ListMetadataRequest {
|
||||
region_ids,
|
||||
})),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::region::RegionResponse;
|
||||
use api::v1::meta::Peer;
|
||||
use api::v1::region::region_request::Body;
|
||||
use api::v1::region::RegionRequest;
|
||||
use store_api::metadata::RegionMetadata;
|
||||
use store_api::storage::RegionId;
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
use crate::ddl::test_util::datanode_handler::{DatanodeWatcher, ListMetadataDatanodeHandler};
|
||||
use crate::ddl::test_util::region_metadata::build_region_metadata;
|
||||
use crate::ddl::test_util::test_column_metadatas;
|
||||
use crate::ddl::utils::region_metadata_lister::RegionMetadataLister;
|
||||
use crate::error::Result;
|
||||
use crate::rpc::router::{Region, RegionRoute};
|
||||
use crate::test_util::MockDatanodeManager;
|
||||
|
||||
fn assert_list_metadata_request(req: RegionRequest, expected_region_ids: &[RegionId]) {
|
||||
let Some(Body::ListMetadata(req)) = req.body else {
|
||||
unreachable!()
|
||||
};
|
||||
|
||||
assert_eq!(req.region_ids.len(), expected_region_ids.len());
|
||||
for region_id in expected_region_ids {
|
||||
assert!(req.region_ids.contains(®ion_id.as_u64()));
|
||||
}
|
||||
}
|
||||
|
||||
fn empty_list_metadata_handler(_peer: Peer, request: RegionRequest) -> Result<RegionResponse> {
|
||||
let Some(Body::ListMetadata(req)) = request.body else {
|
||||
unreachable!()
|
||||
};
|
||||
|
||||
let mut output: Vec<Option<RegionMetadata>> = Vec::with_capacity(req.region_ids.len());
|
||||
for _region_id in req.region_ids {
|
||||
output.push(None);
|
||||
}
|
||||
|
||||
Ok(RegionResponse::from_metadata(
|
||||
serde_json::to_vec(&output).unwrap(),
|
||||
))
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_list_request() {
|
||||
let (tx, mut rx) = mpsc::channel(8);
|
||||
let handler = DatanodeWatcher::new(tx).with_handler(empty_list_metadata_handler);
|
||||
let node_manager = Arc::new(MockDatanodeManager::new(handler));
|
||||
let lister = RegionMetadataLister::new(node_manager);
|
||||
let region_routes = vec![
|
||||
RegionRoute {
|
||||
region: Region::new_test(RegionId::new(1024, 1)),
|
||||
leader_peer: Some(Peer::empty(1)),
|
||||
follower_peers: vec![Peer::empty(5)],
|
||||
leader_state: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
RegionRoute {
|
||||
region: Region::new_test(RegionId::new(1024, 2)),
|
||||
leader_peer: Some(Peer::empty(3)),
|
||||
follower_peers: vec![Peer::empty(4)],
|
||||
leader_state: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
RegionRoute {
|
||||
region: Region::new_test(RegionId::new(1024, 3)),
|
||||
leader_peer: Some(Peer::empty(3)),
|
||||
follower_peers: vec![Peer::empty(4)],
|
||||
leader_state: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
];
|
||||
let region_metadatas = lister.list(1024, ®ion_routes).await.unwrap();
|
||||
assert_eq!(region_metadatas.len(), 3);
|
||||
|
||||
let mut requests = vec![];
|
||||
for _ in 0..2 {
|
||||
let (peer, request) = rx.try_recv().unwrap();
|
||||
requests.push((peer, request));
|
||||
}
|
||||
rx.try_recv().unwrap_err();
|
||||
|
||||
let (peer, request) = requests.remove(0);
|
||||
assert_eq!(peer.id, 1);
|
||||
assert_list_metadata_request(request, &[RegionId::new(1024, 1)]);
|
||||
let (peer, request) = requests.remove(0);
|
||||
assert_eq!(peer.id, 3);
|
||||
assert_list_metadata_request(request, &[RegionId::new(1024, 2), RegionId::new(1024, 3)]);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_list_region_metadata() {
|
||||
let region_metadata =
|
||||
build_region_metadata(RegionId::new(1024, 1), &test_column_metadatas(&["tag_0"]));
|
||||
let region_metadatas = HashMap::from([
|
||||
(RegionId::new(1024, 0), None),
|
||||
(RegionId::new(1024, 1), Some(region_metadata.clone())),
|
||||
]);
|
||||
let handler = ListMetadataDatanodeHandler::new(region_metadatas);
|
||||
let node_manager = Arc::new(MockDatanodeManager::new(handler));
|
||||
let lister = RegionMetadataLister::new(node_manager);
|
||||
let region_routes = vec![
|
||||
RegionRoute {
|
||||
region: Region::new_test(RegionId::new(1024, 0)),
|
||||
leader_peer: Some(Peer::empty(1)),
|
||||
follower_peers: vec![],
|
||||
leader_state: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
RegionRoute {
|
||||
region: Region::new_test(RegionId::new(1024, 1)),
|
||||
leader_peer: Some(Peer::empty(3)),
|
||||
follower_peers: vec![],
|
||||
leader_state: None,
|
||||
leader_down_since: None,
|
||||
},
|
||||
];
|
||||
let region_metadatas = lister.list(1024, ®ion_routes).await.unwrap();
|
||||
assert_eq!(region_metadatas.len(), 2);
|
||||
assert_eq!(region_metadatas[0], None);
|
||||
assert_eq!(region_metadatas[1], Some(region_metadata));
|
||||
}
|
||||
}
|
||||
@@ -877,36 +877,6 @@ pub enum Error {
|
||||
#[snafu(source)]
|
||||
error: object_store::Error,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Missing column in column metadata: {}, table: {}, table_id: {}",
|
||||
column_name,
|
||||
table_name,
|
||||
table_id,
|
||||
))]
|
||||
MissingColumnInColumnMetadata {
|
||||
column_name: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
table_name: String,
|
||||
table_id: TableId,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Mismatch column id: column_name: {}, column_id: {}, table: {}, table_id: {}",
|
||||
column_name,
|
||||
column_id,
|
||||
table_name,
|
||||
table_id,
|
||||
))]
|
||||
MismatchColumnId {
|
||||
column_name: String,
|
||||
column_id: u32,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
table_name: String,
|
||||
table_id: TableId,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -926,10 +896,7 @@ impl ErrorExt for Error {
|
||||
| DeserializeFromJson { .. } => StatusCode::Internal,
|
||||
|
||||
NoLeader { .. } => StatusCode::TableUnavailable,
|
||||
ValueNotExist { .. }
|
||||
| ProcedurePoisonConflict { .. }
|
||||
| MissingColumnInColumnMetadata { .. }
|
||||
| MismatchColumnId { .. } => StatusCode::Unexpected,
|
||||
ValueNotExist { .. } | ProcedurePoisonConflict { .. } => StatusCode::Unexpected,
|
||||
|
||||
Unsupported { .. } => StatusCode::Unsupported,
|
||||
WriteObject { .. } | ReadObject { .. } => StatusCode::StorageUnavailable,
|
||||
|
||||
@@ -174,8 +174,6 @@ pub struct UpgradeRegion {
|
||||
/// The identifier of cache.
|
||||
pub enum CacheIdent {
|
||||
FlowId(FlowId),
|
||||
/// Indicate change of address of flownode.
|
||||
FlowNodeAddressChange(u64),
|
||||
FlowName(FlowName),
|
||||
TableId(TableId),
|
||||
TableName(TableName),
|
||||
|
||||
@@ -1509,7 +1509,6 @@ mod tests {
|
||||
name: "r1".to_string(),
|
||||
partition: None,
|
||||
attrs: BTreeMap::new(),
|
||||
partition_expr: Default::default(),
|
||||
},
|
||||
leader_peer: Some(Peer::new(datanode, "a2")),
|
||||
follower_peers: vec![],
|
||||
@@ -2002,7 +2001,6 @@ mod tests {
|
||||
name: "r1".to_string(),
|
||||
partition: None,
|
||||
attrs: BTreeMap::new(),
|
||||
partition_expr: Default::default(),
|
||||
},
|
||||
leader_peer: Some(Peer::new(datanode, "a2")),
|
||||
leader_state: Some(LeaderState::Downgrading),
|
||||
@@ -2015,7 +2013,6 @@ mod tests {
|
||||
name: "r2".to_string(),
|
||||
partition: None,
|
||||
attrs: BTreeMap::new(),
|
||||
partition_expr: Default::default(),
|
||||
},
|
||||
leader_peer: Some(Peer::new(datanode, "a1")),
|
||||
leader_state: None,
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Display;
|
||||
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
@@ -58,17 +58,12 @@ impl Default for SchemaNameKey<'_> {
|
||||
pub struct SchemaNameValue {
|
||||
#[serde(default)]
|
||||
pub ttl: Option<DatabaseTimeToLive>,
|
||||
#[serde(default)]
|
||||
pub extra_options: BTreeMap<String, String>,
|
||||
}
|
||||
|
||||
impl Display for SchemaNameValue {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
if let Some(ttl) = self.ttl.map(|i| i.to_string()) {
|
||||
writeln!(f, "'ttl'='{}'", ttl)?;
|
||||
}
|
||||
for (k, v) in self.extra_options.iter() {
|
||||
writeln!(f, "'{k}'='{v}'")?;
|
||||
write!(f, "ttl='{}'", ttl)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -92,18 +87,7 @@ impl TryFrom<&HashMap<String, String>> for SchemaNameValue {
|
||||
})
|
||||
.transpose()?
|
||||
.map(|ttl| ttl.into());
|
||||
let extra_options = value
|
||||
.iter()
|
||||
.filter_map(|(k, v)| {
|
||||
if k == OPT_KEY_TTL {
|
||||
None
|
||||
} else {
|
||||
Some((k.clone(), v.clone()))
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(Self { ttl, extra_options })
|
||||
Ok(Self { ttl })
|
||||
}
|
||||
}
|
||||
|
||||
@@ -113,12 +97,6 @@ impl From<SchemaNameValue> for HashMap<String, String> {
|
||||
if let Some(ttl) = value.ttl.map(|ttl| ttl.to_string()) {
|
||||
opts.insert(OPT_KEY_TTL.to_string(), ttl);
|
||||
}
|
||||
opts.extend(
|
||||
value
|
||||
.extra_options
|
||||
.iter()
|
||||
.map(|(k, v)| (k.clone(), v.clone())),
|
||||
);
|
||||
opts
|
||||
}
|
||||
}
|
||||
@@ -338,23 +316,18 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_display_schema_value() {
|
||||
let schema_value = SchemaNameValue {
|
||||
ttl: None,
|
||||
..Default::default()
|
||||
};
|
||||
let schema_value = SchemaNameValue { ttl: None };
|
||||
assert_eq!("", schema_value.to_string());
|
||||
|
||||
let schema_value = SchemaNameValue {
|
||||
ttl: Some(Duration::from_secs(9).into()),
|
||||
..Default::default()
|
||||
};
|
||||
assert_eq!("'ttl'='9s'\n", schema_value.to_string());
|
||||
assert_eq!("ttl='9s'", schema_value.to_string());
|
||||
|
||||
let schema_value = SchemaNameValue {
|
||||
ttl: Some(Duration::from_secs(0).into()),
|
||||
..Default::default()
|
||||
};
|
||||
assert_eq!("'ttl'='forever'\n", schema_value.to_string());
|
||||
assert_eq!("ttl='forever'", schema_value.to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -368,7 +341,6 @@ mod tests {
|
||||
|
||||
let value = SchemaNameValue {
|
||||
ttl: Some(Duration::from_secs(10).into()),
|
||||
..Default::default()
|
||||
};
|
||||
let mut opts: HashMap<String, String> = HashMap::new();
|
||||
opts.insert("ttl".to_string(), "10s".to_string());
|
||||
@@ -383,7 +355,6 @@ mod tests {
|
||||
|
||||
let forever = SchemaNameValue {
|
||||
ttl: Some(Default::default()),
|
||||
..Default::default()
|
||||
};
|
||||
let parsed = SchemaNameValue::try_from_raw_value(
|
||||
serde_json::json!({"ttl": "forever"}).to_string().as_bytes(),
|
||||
@@ -403,81 +374,6 @@ mod tests {
|
||||
assert!(err_empty.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extra_options_compatibility() {
|
||||
// Test with extra_options only
|
||||
let mut opts: HashMap<String, String> = HashMap::new();
|
||||
opts.insert("foo".to_string(), "bar".to_string());
|
||||
opts.insert("baz".to_string(), "qux".to_string());
|
||||
let value = SchemaNameValue::try_from(&opts).unwrap();
|
||||
assert_eq!(value.ttl, None);
|
||||
assert_eq!(value.extra_options.get("foo"), Some(&"bar".to_string()));
|
||||
assert_eq!(value.extra_options.get("baz"), Some(&"qux".to_string()));
|
||||
|
||||
// Test round-trip conversion
|
||||
let opts_back: HashMap<String, String> = value.clone().into();
|
||||
assert_eq!(opts_back.get("foo"), Some(&"bar".to_string()));
|
||||
assert_eq!(opts_back.get("baz"), Some(&"qux".to_string()));
|
||||
assert!(!opts_back.contains_key("ttl"));
|
||||
|
||||
// Test with both ttl and extra_options
|
||||
let mut opts: HashMap<String, String> = HashMap::new();
|
||||
opts.insert("ttl".to_string(), "5m".to_string());
|
||||
opts.insert("opt1".to_string(), "val1".to_string());
|
||||
let value = SchemaNameValue::try_from(&opts).unwrap();
|
||||
assert_eq!(value.ttl, Some(Duration::from_secs(300).into()));
|
||||
assert_eq!(value.extra_options.get("opt1"), Some(&"val1".to_string()));
|
||||
|
||||
// Test serialization/deserialization compatibility
|
||||
let json = serde_json::to_string(&value).unwrap();
|
||||
let deserialized: SchemaNameValue = serde_json::from_str(&json).unwrap();
|
||||
assert_eq!(value, deserialized);
|
||||
|
||||
// Test display includes extra_options
|
||||
let mut value = SchemaNameValue::default();
|
||||
value
|
||||
.extra_options
|
||||
.insert("foo".to_string(), "bar".to_string());
|
||||
let display = value.to_string();
|
||||
assert!(display.contains("'foo'='bar'"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_backward_compatibility_with_old_format() {
|
||||
// Simulate old format: only ttl, no extra_options
|
||||
let json = r#"{"ttl":"10s"}"#;
|
||||
let parsed = SchemaNameValue::try_from_raw_value(json.as_bytes()).unwrap();
|
||||
assert_eq!(
|
||||
parsed,
|
||||
Some(SchemaNameValue {
|
||||
ttl: Some(Duration::from_secs(10).into()),
|
||||
extra_options: BTreeMap::new(),
|
||||
})
|
||||
);
|
||||
|
||||
// Simulate old format: null value
|
||||
let json = r#"null"#;
|
||||
let parsed = SchemaNameValue::try_from_raw_value(json.as_bytes()).unwrap();
|
||||
assert!(parsed.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_forward_compatibility_with_new_options() {
|
||||
// Simulate new format: ttl + extra_options
|
||||
let json = r#"{"ttl":"15s","extra_options":{"foo":"bar","baz":"qux"}}"#;
|
||||
let parsed = SchemaNameValue::try_from_raw_value(json.as_bytes()).unwrap();
|
||||
let mut expected_options = BTreeMap::new();
|
||||
expected_options.insert("foo".to_string(), "bar".to_string());
|
||||
expected_options.insert("baz".to_string(), "qux".to_string());
|
||||
assert_eq!(
|
||||
parsed,
|
||||
Some(SchemaNameValue {
|
||||
ttl: Some(Duration::from_secs(15).into()),
|
||||
extra_options: expected_options,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_key_exist() {
|
||||
let manager = SchemaManager::new(Arc::new(MemoryKvBackend::default()));
|
||||
@@ -500,7 +396,6 @@ mod tests {
|
||||
let current_schema_value = manager.get(schema_key).await.unwrap().unwrap();
|
||||
let new_schema_value = SchemaNameValue {
|
||||
ttl: Some(Duration::from_secs(10).into()),
|
||||
..Default::default()
|
||||
};
|
||||
manager
|
||||
.update(schema_key, ¤t_schema_value, &new_schema_value)
|
||||
@@ -515,11 +410,9 @@ mod tests {
|
||||
|
||||
let new_schema_value = SchemaNameValue {
|
||||
ttl: Some(Duration::from_secs(40).into()),
|
||||
..Default::default()
|
||||
};
|
||||
let incorrect_schema_value = SchemaNameValue {
|
||||
ttl: Some(Duration::from_secs(20).into()),
|
||||
..Default::default()
|
||||
}
|
||||
.try_as_raw_value()
|
||||
.unwrap();
|
||||
@@ -532,10 +425,7 @@ mod tests {
|
||||
.unwrap_err();
|
||||
|
||||
let current_schema_value = manager.get(schema_key).await.unwrap().unwrap();
|
||||
let new_schema_value = SchemaNameValue {
|
||||
ttl: None,
|
||||
..Default::default()
|
||||
};
|
||||
let new_schema_value = SchemaNameValue { ttl: None };
|
||||
manager
|
||||
.update(schema_key, ¤t_schema_value, &new_schema_value)
|
||||
.await
|
||||
|
||||
@@ -711,7 +711,6 @@ mod tests {
|
||||
name: "r1".to_string(),
|
||||
partition: None,
|
||||
attrs: Default::default(),
|
||||
partition_expr: Default::default(),
|
||||
},
|
||||
leader_peer: Some(Peer {
|
||||
id: 2,
|
||||
@@ -727,7 +726,6 @@ mod tests {
|
||||
name: "r1".to_string(),
|
||||
partition: None,
|
||||
attrs: Default::default(),
|
||||
partition_expr: Default::default(),
|
||||
},
|
||||
leader_peer: Some(Peer {
|
||||
id: 2,
|
||||
|
||||
@@ -32,7 +32,6 @@ pub mod key;
|
||||
pub mod kv_backend;
|
||||
pub mod leadership_notifier;
|
||||
pub mod lock_key;
|
||||
pub mod maintenance;
|
||||
pub mod metrics;
|
||||
pub mod node_expiry_listener;
|
||||
pub mod node_manager;
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub(crate) mod reconcile_table;
|
||||
@@ -1,17 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// TODO(weny): Remove it
|
||||
#[allow(dead_code)]
|
||||
pub(crate) mod utils;
|
||||
@@ -1,694 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::fmt;
|
||||
|
||||
use api::v1::SemanticType;
|
||||
use datatypes::schema::ColumnSchema;
|
||||
use snafu::{ensure, OptionExt};
|
||||
use store_api::metadata::{ColumnMetadata, RegionMetadata};
|
||||
use store_api::storage::{RegionId, TableId};
|
||||
use table::metadata::RawTableMeta;
|
||||
use table::table_reference::TableReference;
|
||||
|
||||
use crate::error::{
|
||||
MismatchColumnIdSnafu, MissingColumnInColumnMetadataSnafu, Result, UnexpectedSnafu,
|
||||
};
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
struct PartialRegionMetadata<'a> {
|
||||
column_metadatas: &'a [ColumnMetadata],
|
||||
primary_key: &'a [u32],
|
||||
table_id: TableId,
|
||||
}
|
||||
|
||||
impl<'a> From<&'a RegionMetadata> for PartialRegionMetadata<'a> {
|
||||
fn from(region_metadata: &'a RegionMetadata) -> Self {
|
||||
Self {
|
||||
column_metadatas: ®ion_metadata.column_metadatas,
|
||||
primary_key: ®ion_metadata.primary_key,
|
||||
table_id: region_metadata.region_id.table_id(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A display wrapper for [`ColumnMetadata`] that formats the column metadata in a more readable way.
|
||||
struct ColumnMetadataDisplay<'a>(pub &'a ColumnMetadata);
|
||||
|
||||
impl<'a> fmt::Debug for ColumnMetadataDisplay<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let col = self.0;
|
||||
write!(
|
||||
f,
|
||||
"Column {{ name: {}, id: {}, semantic_type: {:?}, data_type: {:?} }}",
|
||||
col.column_schema.name, col.column_id, col.semantic_type, col.column_schema.data_type,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks if the column metadatas are consistent.
|
||||
///
|
||||
/// The column metadatas are consistent if:
|
||||
/// - The column metadatas are the same.
|
||||
/// - The primary key are the same.
|
||||
/// - The table id of the region metadatas are the same.
|
||||
///
|
||||
/// ## Panic
|
||||
/// Panic if region_metadatas is empty.
|
||||
pub(crate) fn check_column_metadatas_consistent(
|
||||
region_metadatas: &[RegionMetadata],
|
||||
) -> Option<Vec<ColumnMetadata>> {
|
||||
let is_column_metadata_consistent = region_metadatas
|
||||
.windows(2)
|
||||
.all(|w| PartialRegionMetadata::from(&w[0]) == PartialRegionMetadata::from(&w[1]));
|
||||
|
||||
if !is_column_metadata_consistent {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(region_metadatas[0].column_metadatas.clone())
|
||||
}
|
||||
|
||||
/// Resolves column metadata inconsistencies among the given region metadatas
|
||||
/// by using the column metadata from the metasrv as the source of truth.
|
||||
///
|
||||
/// All region metadatas whose column metadata differs from the given `column_metadatas`
|
||||
/// will be marked for reconciliation.
|
||||
///
|
||||
/// Returns the region ids that need to be reconciled.
|
||||
pub(crate) fn resolve_column_metadatas_with_metasrv(
|
||||
column_metadatas: &[ColumnMetadata],
|
||||
region_metadatas: &[RegionMetadata],
|
||||
) -> Result<Vec<RegionId>> {
|
||||
let is_same_table = region_metadatas
|
||||
.windows(2)
|
||||
.all(|w| w[0].region_id.table_id() == w[1].region_id.table_id());
|
||||
|
||||
ensure!(
|
||||
is_same_table,
|
||||
UnexpectedSnafu {
|
||||
err_msg: "Region metadatas are not from the same table"
|
||||
}
|
||||
);
|
||||
|
||||
let mut regions_ids = vec![];
|
||||
for region_metadata in region_metadatas {
|
||||
if region_metadata.column_metadatas != column_metadatas {
|
||||
let is_invariant_preserved = check_column_metadata_invariants(
|
||||
column_metadatas,
|
||||
®ion_metadata.column_metadatas,
|
||||
);
|
||||
ensure!(
|
||||
is_invariant_preserved,
|
||||
UnexpectedSnafu {
|
||||
err_msg: format!(
|
||||
"Column metadata invariants violated for region {}. Resolved column metadata: {:?}, region column metadata: {:?}",
|
||||
region_metadata.region_id,
|
||||
column_metadatas.iter().map(ColumnMetadataDisplay).collect::<Vec<_>>(),
|
||||
region_metadata.column_metadatas.iter().map(ColumnMetadataDisplay).collect::<Vec<_>>(),
|
||||
)
|
||||
}
|
||||
);
|
||||
regions_ids.push(region_metadata.region_id);
|
||||
}
|
||||
}
|
||||
Ok(regions_ids)
|
||||
}
|
||||
|
||||
/// Resolves column metadata inconsistencies among the given region metadatas
|
||||
/// by selecting the column metadata with the highest schema version.
|
||||
///
|
||||
/// This strategy assumes that at most two versions of column metadata may exist,
|
||||
/// due to the poison mechanism, making the highest schema version a safe choice.
|
||||
///
|
||||
/// Returns the resolved column metadata and the region ids that need to be reconciled.
|
||||
pub(crate) fn resolve_column_metadatas_with_latest(
|
||||
region_metadatas: &[RegionMetadata],
|
||||
) -> Result<(Vec<ColumnMetadata>, Vec<RegionId>)> {
|
||||
let is_same_table = region_metadatas
|
||||
.windows(2)
|
||||
.all(|w| w[0].region_id.table_id() == w[1].region_id.table_id());
|
||||
|
||||
ensure!(
|
||||
is_same_table,
|
||||
UnexpectedSnafu {
|
||||
err_msg: "Region metadatas are not from the same table"
|
||||
}
|
||||
);
|
||||
|
||||
let latest_region_metadata = region_metadatas
|
||||
.iter()
|
||||
.max_by_key(|c| c.schema_version)
|
||||
.context(UnexpectedSnafu {
|
||||
err_msg: "All Region metadatas have the same schema version",
|
||||
})?;
|
||||
let latest_column_metadatas = PartialRegionMetadata::from(latest_region_metadata);
|
||||
|
||||
let mut region_ids = vec![];
|
||||
for region_metadata in region_metadatas {
|
||||
if PartialRegionMetadata::from(region_metadata) != latest_column_metadatas {
|
||||
let is_invariant_preserved = check_column_metadata_invariants(
|
||||
&latest_region_metadata.column_metadatas,
|
||||
®ion_metadata.column_metadatas,
|
||||
);
|
||||
ensure!(
|
||||
is_invariant_preserved,
|
||||
UnexpectedSnafu {
|
||||
err_msg: format!(
|
||||
"Column metadata invariants violated for region {}. Resolved column metadata: {:?}, region column metadata: {:?}",
|
||||
region_metadata.region_id,
|
||||
latest_column_metadatas.column_metadatas.iter().map(ColumnMetadataDisplay).collect::<Vec<_>>(),
|
||||
region_metadata.column_metadatas.iter().map(ColumnMetadataDisplay).collect::<Vec<_>>()
|
||||
)
|
||||
}
|
||||
);
|
||||
region_ids.push(region_metadata.region_id);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(weny): verify the new column metadatas are acceptable for regions.
|
||||
Ok((latest_region_metadata.column_metadatas.clone(), region_ids))
|
||||
}
|
||||
|
||||
/// Constructs a vector of [`ColumnMetadata`] from the provided table information.
|
||||
///
|
||||
/// This function maps each [`ColumnSchema`] to its corresponding [`ColumnMetadata`] by
|
||||
/// determining the semantic type (Tag, Timestamp, or Field) and retrieving the column ID
|
||||
/// from the `name_to_ids` mapping.
|
||||
///
|
||||
/// Returns an error if any column name is missing in the mapping.
|
||||
pub(crate) fn build_column_metadata_from_table_info(
|
||||
column_schemas: &[ColumnSchema],
|
||||
primary_key_indexes: &[usize],
|
||||
name_to_ids: &HashMap<String, u32>,
|
||||
) -> Result<Vec<ColumnMetadata>> {
|
||||
let primary_names = primary_key_indexes
|
||||
.iter()
|
||||
.map(|i| column_schemas[*i].name.as_str())
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
column_schemas
|
||||
.iter()
|
||||
.map(|column_schema| {
|
||||
let column_id = *name_to_ids
|
||||
.get(column_schema.name.as_str())
|
||||
.with_context(|| UnexpectedSnafu {
|
||||
err_msg: format!(
|
||||
"Column name {} not found in name_to_ids",
|
||||
column_schema.name
|
||||
),
|
||||
})?;
|
||||
|
||||
let semantic_type = if primary_names.contains(&column_schema.name.as_str()) {
|
||||
SemanticType::Tag
|
||||
} else if column_schema.is_time_index() {
|
||||
SemanticType::Timestamp
|
||||
} else {
|
||||
SemanticType::Field
|
||||
};
|
||||
Ok(ColumnMetadata {
|
||||
column_schema: column_schema.clone(),
|
||||
semantic_type,
|
||||
column_id,
|
||||
})
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()
|
||||
}
|
||||
|
||||
/// Checks whether the schema invariants hold between the existing and new column metadata.
|
||||
///
|
||||
/// Invariants:
|
||||
/// - Primary key (Tag) columns must exist in the new metadata, with identical name and ID.
|
||||
/// - Timestamp column must remain exactly the same in name and ID.
|
||||
pub(crate) fn check_column_metadata_invariants(
|
||||
new_column_metadatas: &[ColumnMetadata],
|
||||
column_metadatas: &[ColumnMetadata],
|
||||
) -> bool {
|
||||
let new_primary_keys = new_column_metadatas
|
||||
.iter()
|
||||
.filter(|c| c.semantic_type == SemanticType::Tag)
|
||||
.map(|c| (c.column_schema.name.as_str(), c.column_id))
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
let old_primary_keys = column_metadatas
|
||||
.iter()
|
||||
.filter(|c| c.semantic_type == SemanticType::Tag)
|
||||
.map(|c| (c.column_schema.name.as_str(), c.column_id));
|
||||
|
||||
for (name, id) in old_primary_keys {
|
||||
if new_primary_keys.get(name) != Some(&id) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
let new_ts_column = new_column_metadatas
|
||||
.iter()
|
||||
.find(|c| c.semantic_type == SemanticType::Timestamp)
|
||||
.map(|c| (c.column_schema.name.as_str(), c.column_id));
|
||||
|
||||
let old_ts_column = column_metadatas
|
||||
.iter()
|
||||
.find(|c| c.semantic_type == SemanticType::Timestamp)
|
||||
.map(|c| (c.column_schema.name.as_str(), c.column_id));
|
||||
|
||||
new_ts_column == old_ts_column
|
||||
}
|
||||
|
||||
/// Builds a [`RawTableMeta`] from the provided [`ColumnMetadata`]s.
|
||||
///
|
||||
/// Returns an error if:
|
||||
/// - Any column is missing in the `name_to_ids`.
|
||||
/// - The column id in table metadata is not the same as the column id in the column metadata.
|
||||
/// - The table index is missing in the column metadata.
|
||||
/// - The primary key or partition key columns are missing in the column metadata.
|
||||
pub(crate) fn build_table_meta_from_column_metadatas(
|
||||
table_id: TableId,
|
||||
table_ref: TableReference,
|
||||
table_meta: &RawTableMeta,
|
||||
name_to_ids: &HashMap<String, u32>,
|
||||
column_metadata: &[ColumnMetadata],
|
||||
) -> Result<RawTableMeta> {
|
||||
let column_in_column_metadata = column_metadata
|
||||
.iter()
|
||||
.map(|c| (c.column_schema.name.as_str(), c))
|
||||
.collect::<HashMap<_, _>>();
|
||||
let primary_key_names = table_meta
|
||||
.primary_key_indices
|
||||
.iter()
|
||||
.map(|i| table_meta.schema.column_schemas[*i].name.as_str())
|
||||
.collect::<HashSet<_>>();
|
||||
let partition_key_names = table_meta
|
||||
.partition_key_indices
|
||||
.iter()
|
||||
.map(|i| table_meta.schema.column_schemas[*i].name.as_str())
|
||||
.collect::<HashSet<_>>();
|
||||
ensure!(
|
||||
column_metadata
|
||||
.iter()
|
||||
.any(|c| c.semantic_type == SemanticType::Timestamp),
|
||||
UnexpectedSnafu {
|
||||
err_msg: format!(
|
||||
"Missing table index in column metadata, table: {}, table_id: {}",
|
||||
table_ref, table_id
|
||||
),
|
||||
}
|
||||
);
|
||||
|
||||
// Ensures all primary key and partition key exists in the column metadata.
|
||||
for column_name in primary_key_names.iter().chain(partition_key_names.iter()) {
|
||||
let column_in_column_metadata =
|
||||
column_in_column_metadata
|
||||
.get(column_name)
|
||||
.with_context(|| MissingColumnInColumnMetadataSnafu {
|
||||
column_name: column_name.to_string(),
|
||||
table_name: table_ref.to_string(),
|
||||
table_id,
|
||||
})?;
|
||||
|
||||
let column_id = *name_to_ids
|
||||
.get(*column_name)
|
||||
.with_context(|| UnexpectedSnafu {
|
||||
err_msg: format!("column id not found in name_to_ids: {}", column_name),
|
||||
})?;
|
||||
ensure!(
|
||||
column_id == column_in_column_metadata.column_id,
|
||||
MismatchColumnIdSnafu {
|
||||
column_name: column_name.to_string(),
|
||||
column_id,
|
||||
table_name: table_ref.to_string(),
|
||||
table_id,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
let mut new_raw_table_meta = table_meta.clone();
|
||||
let primary_key_indices = &mut new_raw_table_meta.primary_key_indices;
|
||||
let partition_key_indices = &mut new_raw_table_meta.partition_key_indices;
|
||||
let value_indices = &mut new_raw_table_meta.value_indices;
|
||||
let time_index = &mut new_raw_table_meta.schema.timestamp_index;
|
||||
let columns = &mut new_raw_table_meta.schema.column_schemas;
|
||||
let column_ids = &mut new_raw_table_meta.column_ids;
|
||||
|
||||
column_ids.clear();
|
||||
value_indices.clear();
|
||||
columns.clear();
|
||||
primary_key_indices.clear();
|
||||
partition_key_indices.clear();
|
||||
|
||||
for (idx, col) in column_metadata.iter().enumerate() {
|
||||
if partition_key_names.contains(&col.column_schema.name.as_str()) {
|
||||
partition_key_indices.push(idx);
|
||||
}
|
||||
match col.semantic_type {
|
||||
SemanticType::Tag => {
|
||||
primary_key_indices.push(idx);
|
||||
}
|
||||
SemanticType::Field => {
|
||||
value_indices.push(idx);
|
||||
}
|
||||
SemanticType::Timestamp => {
|
||||
value_indices.push(idx);
|
||||
*time_index = Some(idx);
|
||||
}
|
||||
}
|
||||
|
||||
columns.push(col.column_schema.clone());
|
||||
column_ids.push(col.column_id);
|
||||
}
|
||||
|
||||
if let Some(time_index) = *time_index {
|
||||
new_raw_table_meta.schema.column_schemas[time_index].set_time_index();
|
||||
}
|
||||
|
||||
Ok(new_raw_table_meta)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::assert_matches::assert_matches;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::SemanticType;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaBuilder};
|
||||
use store_api::metadata::ColumnMetadata;
|
||||
use table::metadata::{RawTableMeta, TableMetaBuilder};
|
||||
use table::table_reference::TableReference;
|
||||
|
||||
use super::*;
|
||||
use crate::ddl::test_util::region_metadata::build_region_metadata;
|
||||
use crate::error::Error;
|
||||
|
||||
fn new_test_schema() -> Schema {
|
||||
let column_schemas = vec![
|
||||
ColumnSchema::new("col1", ConcreteDataType::int32_datatype(), true),
|
||||
ColumnSchema::new(
|
||||
"ts",
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
false,
|
||||
)
|
||||
.with_time_index(true),
|
||||
ColumnSchema::new("col2", ConcreteDataType::int32_datatype(), true),
|
||||
];
|
||||
SchemaBuilder::try_from(column_schemas)
|
||||
.unwrap()
|
||||
.version(123)
|
||||
.build()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn new_test_column_metadatas() -> Vec<ColumnMetadata> {
|
||||
vec![
|
||||
ColumnMetadata {
|
||||
column_schema: ColumnSchema::new("col1", ConcreteDataType::int32_datatype(), true),
|
||||
semantic_type: SemanticType::Tag,
|
||||
column_id: 0,
|
||||
},
|
||||
ColumnMetadata {
|
||||
column_schema: ColumnSchema::new(
|
||||
"ts",
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
false,
|
||||
)
|
||||
.with_time_index(true),
|
||||
semantic_type: SemanticType::Timestamp,
|
||||
column_id: 1,
|
||||
},
|
||||
ColumnMetadata {
|
||||
column_schema: ColumnSchema::new("col2", ConcreteDataType::int32_datatype(), true),
|
||||
semantic_type: SemanticType::Field,
|
||||
column_id: 2,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
fn new_test_raw_table_info() -> RawTableMeta {
|
||||
let mut table_meta_builder = TableMetaBuilder::empty();
|
||||
let table_meta = table_meta_builder
|
||||
.schema(Arc::new(new_test_schema()))
|
||||
.primary_key_indices(vec![0])
|
||||
.partition_key_indices(vec![2])
|
||||
.next_column_id(4)
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
table_meta.into()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_table_info_from_column_metadatas() {
|
||||
let mut column_metadatas = new_test_column_metadatas();
|
||||
column_metadatas.push(ColumnMetadata {
|
||||
column_schema: ColumnSchema::new("col3", ConcreteDataType::string_datatype(), true),
|
||||
semantic_type: SemanticType::Tag,
|
||||
column_id: 3,
|
||||
});
|
||||
|
||||
let table_id = 1;
|
||||
let table_ref = TableReference::full("test_catalog", "test_schema", "test_table");
|
||||
let table_meta = new_test_raw_table_info();
|
||||
let name_to_ids = HashMap::from([
|
||||
("col1".to_string(), 0),
|
||||
("ts".to_string(), 1),
|
||||
("col2".to_string(), 2),
|
||||
]);
|
||||
|
||||
let new_table_meta = build_table_meta_from_column_metadatas(
|
||||
table_id,
|
||||
table_ref,
|
||||
&table_meta,
|
||||
&name_to_ids,
|
||||
&column_metadatas,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(new_table_meta.primary_key_indices, vec![0, 3]);
|
||||
assert_eq!(new_table_meta.partition_key_indices, vec![2]);
|
||||
assert_eq!(new_table_meta.value_indices, vec![1, 2]);
|
||||
assert_eq!(new_table_meta.schema.timestamp_index, Some(1));
|
||||
assert_eq!(new_table_meta.column_ids, vec![0, 1, 2, 3]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_table_info_from_column_metadatas_with_incorrect_name_to_ids() {
|
||||
let column_metadatas = new_test_column_metadatas();
|
||||
let table_id = 1;
|
||||
let table_ref = TableReference::full("test_catalog", "test_schema", "test_table");
|
||||
let table_meta = new_test_raw_table_info();
|
||||
let name_to_ids = HashMap::from([
|
||||
("col1".to_string(), 0),
|
||||
("ts".to_string(), 1),
|
||||
// Change column id of col2 to 3.
|
||||
("col2".to_string(), 3),
|
||||
]);
|
||||
|
||||
let err = build_table_meta_from_column_metadatas(
|
||||
table_id,
|
||||
table_ref,
|
||||
&table_meta,
|
||||
&name_to_ids,
|
||||
&column_metadatas,
|
||||
)
|
||||
.unwrap_err();
|
||||
|
||||
assert_matches!(err, Error::MismatchColumnId { .. });
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_table_info_from_column_metadatas_with_missing_time_index() {
|
||||
let mut column_metadatas = new_test_column_metadatas();
|
||||
column_metadatas.retain(|c| c.semantic_type != SemanticType::Timestamp);
|
||||
let table_id = 1;
|
||||
let table_ref = TableReference::full("test_catalog", "test_schema", "test_table");
|
||||
let table_meta = new_test_raw_table_info();
|
||||
let name_to_ids = HashMap::from([
|
||||
("col1".to_string(), 0),
|
||||
("ts".to_string(), 1),
|
||||
("col2".to_string(), 2),
|
||||
]);
|
||||
|
||||
let err = build_table_meta_from_column_metadatas(
|
||||
table_id,
|
||||
table_ref,
|
||||
&table_meta,
|
||||
&name_to_ids,
|
||||
&column_metadatas,
|
||||
)
|
||||
.unwrap_err();
|
||||
|
||||
assert!(
|
||||
err.to_string()
|
||||
.contains("Missing table index in column metadata"),
|
||||
"err: {}",
|
||||
err
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_table_info_from_column_metadatas_with_missing_column() {
|
||||
let mut column_metadatas = new_test_column_metadatas();
|
||||
// Remove primary key column.
|
||||
column_metadatas.retain(|c| c.column_id != 0);
|
||||
let table_id = 1;
|
||||
let table_ref = TableReference::full("test_catalog", "test_schema", "test_table");
|
||||
let table_meta = new_test_raw_table_info();
|
||||
let name_to_ids = HashMap::from([
|
||||
("col1".to_string(), 0),
|
||||
("ts".to_string(), 1),
|
||||
("col2".to_string(), 2),
|
||||
]);
|
||||
|
||||
let err = build_table_meta_from_column_metadatas(
|
||||
table_id,
|
||||
table_ref,
|
||||
&table_meta,
|
||||
&name_to_ids,
|
||||
&column_metadatas,
|
||||
)
|
||||
.unwrap_err();
|
||||
assert_matches!(err, Error::MissingColumnInColumnMetadata { .. });
|
||||
|
||||
let mut column_metadatas = new_test_column_metadatas();
|
||||
// Remove partition key column.
|
||||
column_metadatas.retain(|c| c.column_id != 2);
|
||||
|
||||
let err = build_table_meta_from_column_metadatas(
|
||||
table_id,
|
||||
table_ref,
|
||||
&table_meta,
|
||||
&name_to_ids,
|
||||
&column_metadatas,
|
||||
)
|
||||
.unwrap_err();
|
||||
assert_matches!(err, Error::MissingColumnInColumnMetadata { .. });
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_check_column_metadatas_consistent() {
|
||||
let column_metadatas = new_test_column_metadatas();
|
||||
let region_metadata1 = build_region_metadata(RegionId::new(1024, 0), &column_metadatas);
|
||||
let region_metadata2 = build_region_metadata(RegionId::new(1024, 1), &column_metadatas);
|
||||
let result =
|
||||
check_column_metadatas_consistent(&[region_metadata1, region_metadata2]).unwrap();
|
||||
assert_eq!(result, column_metadatas);
|
||||
|
||||
let region_metadata1 = build_region_metadata(RegionId::new(1025, 0), &column_metadatas);
|
||||
let region_metadata2 = build_region_metadata(RegionId::new(1024, 1), &column_metadatas);
|
||||
let result = check_column_metadatas_consistent(&[region_metadata1, region_metadata2]);
|
||||
assert!(result.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_check_column_metadata_invariants() {
|
||||
let column_metadatas = new_test_column_metadatas();
|
||||
let mut new_column_metadatas = column_metadatas.clone();
|
||||
new_column_metadatas.push(ColumnMetadata {
|
||||
column_schema: ColumnSchema::new("col3", ConcreteDataType::int32_datatype(), true),
|
||||
semantic_type: SemanticType::Field,
|
||||
column_id: 3,
|
||||
});
|
||||
assert!(check_column_metadata_invariants(
|
||||
&new_column_metadatas,
|
||||
&column_metadatas
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_check_column_metadata_invariants_missing_primary_key_column_or_ts_column() {
|
||||
let column_metadatas = new_test_column_metadatas();
|
||||
let mut new_column_metadatas = column_metadatas.clone();
|
||||
new_column_metadatas.retain(|c| c.semantic_type != SemanticType::Timestamp);
|
||||
assert!(!check_column_metadata_invariants(
|
||||
&new_column_metadatas,
|
||||
&column_metadatas
|
||||
));
|
||||
|
||||
let column_metadatas = new_test_column_metadatas();
|
||||
let mut new_column_metadatas = column_metadatas.clone();
|
||||
new_column_metadatas.retain(|c| c.semantic_type != SemanticType::Tag);
|
||||
assert!(!check_column_metadata_invariants(
|
||||
&new_column_metadatas,
|
||||
&column_metadatas
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_check_column_metadata_invariants_mismatch_column_id() {
|
||||
let column_metadatas = new_test_column_metadatas();
|
||||
let mut new_column_metadatas = column_metadatas.clone();
|
||||
if let Some(col) = new_column_metadatas
|
||||
.iter_mut()
|
||||
.find(|c| c.semantic_type == SemanticType::Timestamp)
|
||||
{
|
||||
col.column_id = 100;
|
||||
}
|
||||
assert!(!check_column_metadata_invariants(
|
||||
&new_column_metadatas,
|
||||
&column_metadatas
|
||||
));
|
||||
|
||||
let column_metadatas = new_test_column_metadatas();
|
||||
let mut new_column_metadatas = column_metadatas.clone();
|
||||
if let Some(col) = new_column_metadatas
|
||||
.iter_mut()
|
||||
.find(|c| c.semantic_type == SemanticType::Tag)
|
||||
{
|
||||
col.column_id = 100;
|
||||
}
|
||||
assert!(!check_column_metadata_invariants(
|
||||
&new_column_metadatas,
|
||||
&column_metadatas
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_column_metadatas_with_use_metasrv_strategy() {
|
||||
let column_metadatas = new_test_column_metadatas();
|
||||
let region_metadata1 = build_region_metadata(RegionId::new(1024, 0), &column_metadatas);
|
||||
let mut metasrv_column_metadatas = region_metadata1.column_metadatas.clone();
|
||||
metasrv_column_metadatas.push(ColumnMetadata {
|
||||
column_schema: ColumnSchema::new("col3", ConcreteDataType::int32_datatype(), true),
|
||||
semantic_type: SemanticType::Field,
|
||||
column_id: 3,
|
||||
});
|
||||
let result =
|
||||
resolve_column_metadatas_with_metasrv(&metasrv_column_metadatas, &[region_metadata1])
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(result, vec![RegionId::new(1024, 0)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_column_metadatas_with_use_latest_strategy() {
|
||||
let column_metadatas = new_test_column_metadatas();
|
||||
let region_metadata1 = build_region_metadata(RegionId::new(1024, 0), &column_metadatas);
|
||||
let mut new_column_metadatas = column_metadatas.clone();
|
||||
new_column_metadatas.push(ColumnMetadata {
|
||||
column_schema: ColumnSchema::new("col3", ConcreteDataType::int32_datatype(), true),
|
||||
semantic_type: SemanticType::Field,
|
||||
column_id: 3,
|
||||
});
|
||||
|
||||
let mut region_metadata2 =
|
||||
build_region_metadata(RegionId::new(1024, 1), &new_column_metadatas);
|
||||
region_metadata2.schema_version = 2;
|
||||
|
||||
let (resolved_column_metadatas, region_ids) =
|
||||
resolve_column_metadatas_with_latest(&[region_metadata1, region_metadata2]).unwrap();
|
||||
assert_eq!(region_ids, vec![RegionId::new(1024, 0)]);
|
||||
assert_eq!(resolved_column_metadatas, new_column_metadatas);
|
||||
}
|
||||
}
|
||||
@@ -19,17 +19,11 @@ pub use api::v1::meta::Peer;
|
||||
use crate::error::Error;
|
||||
use crate::{DatanodeId, FlownodeId};
|
||||
|
||||
/// PeerLookupService is a service that can lookup peers.
|
||||
/// can query peer given a node id
|
||||
#[async_trait::async_trait]
|
||||
pub trait PeerLookupService {
|
||||
/// Returns the datanode with the given id. It may return inactive peers.
|
||||
async fn datanode(&self, id: DatanodeId) -> Result<Option<Peer>, Error>;
|
||||
|
||||
/// Returns the flownode with the given id. It may return inactive peers.
|
||||
async fn flownode(&self, id: FlownodeId) -> Result<Option<Peer>, Error>;
|
||||
|
||||
/// Returns all currently active frontend nodes that have reported a heartbeat within the most recent heartbeat interval from the in-memory backend.
|
||||
async fn active_frontends(&self) -> Result<Vec<Peer>, Error>;
|
||||
}
|
||||
|
||||
pub type PeerLookupServiceRef = Arc<dyn PeerLookupService + Send + Sync>;
|
||||
|
||||
@@ -1440,6 +1440,6 @@ mod tests {
|
||||
create_table_task.table_info.meta.primary_key_indices,
|
||||
vec![2]
|
||||
);
|
||||
assert_eq!(create_table_task.table_info.meta.value_indices, vec![0, 1]);
|
||||
assert_eq!(create_table_task.table_info.meta.value_indices, vec![1]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -355,13 +355,8 @@ impl RegionRoutes {
|
||||
pub struct Region {
|
||||
pub id: RegionId,
|
||||
pub name: String,
|
||||
pub partition: Option<Partition>,
|
||||
pub attrs: BTreeMap<String, String>,
|
||||
|
||||
/// **Deprecated:** Use `partition_expr` instead.
|
||||
pub partition: Option<LegacyPartition>,
|
||||
/// The partition expression of the region.
|
||||
#[serde(default)]
|
||||
pub partition_expr: String,
|
||||
}
|
||||
|
||||
impl Region {
|
||||
@@ -372,47 +367,14 @@ impl Region {
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets the partition expression of the region in compatible mode.
|
||||
pub fn partition_expr(&self) -> String {
|
||||
if !self.partition_expr.is_empty() {
|
||||
self.partition_expr.clone()
|
||||
} else if let Some(LegacyPartition { value_list, .. }) = &self.partition {
|
||||
if !value_list.is_empty() {
|
||||
String::from_utf8_lossy(&value_list[0]).to_string()
|
||||
} else {
|
||||
"".to_string()
|
||||
}
|
||||
} else {
|
||||
"".to_string()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets the partition expression of the `PbRegion` in compatible mode.
|
||||
#[allow(deprecated)]
|
||||
pub fn pb_region_partition_expr(r: &PbRegion) -> String {
|
||||
if let Some(partition) = &r.partition {
|
||||
if !partition.expression.is_empty() {
|
||||
partition.expression.clone()
|
||||
} else if !partition.value_list.is_empty() {
|
||||
String::from_utf8_lossy(&partition.value_list[0]).to_string()
|
||||
} else {
|
||||
"".to_string()
|
||||
}
|
||||
} else {
|
||||
"".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<PbRegion> for Region {
|
||||
fn from(r: PbRegion) -> Self {
|
||||
let partition_expr = pb_region_partition_expr(&r);
|
||||
Self {
|
||||
id: r.id.into(),
|
||||
name: r.name,
|
||||
partition: None,
|
||||
partition_expr,
|
||||
partition: r.partition.map(Into::into),
|
||||
attrs: r.attrs.into_iter().collect::<BTreeMap<_, _>>(),
|
||||
}
|
||||
}
|
||||
@@ -420,21 +382,17 @@ impl From<PbRegion> for Region {
|
||||
|
||||
impl From<Region> for PbRegion {
|
||||
fn from(region: Region) -> Self {
|
||||
let partition_expr = region.partition_expr();
|
||||
Self {
|
||||
id: region.id.into(),
|
||||
name: region.name,
|
||||
partition: Some(PbPartition {
|
||||
expression: partition_expr,
|
||||
..Default::default()
|
||||
}),
|
||||
partition: region.partition.map(Into::into),
|
||||
attrs: region.attrs.into_iter().collect::<HashMap<_, _>>(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
|
||||
pub struct LegacyPartition {
|
||||
pub struct Partition {
|
||||
#[serde(serialize_with = "as_utf8_vec", deserialize_with = "from_utf8_vec")]
|
||||
pub column_list: Vec<Vec<u8>>,
|
||||
#[serde(serialize_with = "as_utf8_vec", deserialize_with = "from_utf8_vec")]
|
||||
@@ -482,6 +440,24 @@ where
|
||||
Ok(values)
|
||||
}
|
||||
|
||||
impl From<Partition> for PbPartition {
|
||||
fn from(p: Partition) -> Self {
|
||||
Self {
|
||||
column_list: p.column_list,
|
||||
value_list: p.value_list,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<PbPartition> for Partition {
|
||||
fn from(p: PbPartition) -> Self {
|
||||
Self {
|
||||
column_list: p.column_list,
|
||||
value_list: p.value_list,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -493,9 +469,8 @@ mod tests {
|
||||
region: Region {
|
||||
id: 2.into(),
|
||||
name: "r2".to_string(),
|
||||
attrs: BTreeMap::new(),
|
||||
partition: None,
|
||||
partition_expr: "".to_string(),
|
||||
attrs: BTreeMap::new(),
|
||||
},
|
||||
leader_peer: Some(Peer::new(1, "a1")),
|
||||
follower_peers: vec![Peer::new(2, "a2"), Peer::new(3, "a3")],
|
||||
@@ -516,9 +491,8 @@ mod tests {
|
||||
region: Region {
|
||||
id: 2.into(),
|
||||
name: "r2".to_string(),
|
||||
attrs: BTreeMap::new(),
|
||||
partition: None,
|
||||
partition_expr: "".to_string(),
|
||||
attrs: BTreeMap::new(),
|
||||
},
|
||||
leader_peer: Some(Peer::new(1, "a1")),
|
||||
follower_peers: vec![Peer::new(2, "a2"), Peer::new(3, "a3")],
|
||||
@@ -539,9 +513,8 @@ mod tests {
|
||||
region: Region {
|
||||
id: 2.into(),
|
||||
name: "r2".to_string(),
|
||||
attrs: BTreeMap::new(),
|
||||
partition: None,
|
||||
partition_expr: "".to_string(),
|
||||
attrs: BTreeMap::new(),
|
||||
},
|
||||
leader_peer: Some(Peer::new(1, "a1")),
|
||||
follower_peers: vec![Peer::new(2, "a2"), Peer::new(3, "a3")],
|
||||
@@ -556,9 +529,8 @@ mod tests {
|
||||
region: Region {
|
||||
id: 2.into(),
|
||||
name: "r2".to_string(),
|
||||
attrs: BTreeMap::new(),
|
||||
partition: None,
|
||||
partition_expr: "".to_string(),
|
||||
attrs: BTreeMap::new(),
|
||||
},
|
||||
leader_peer: Some(Peer::new(1, "a1")),
|
||||
follower_peers: vec![Peer::new(2, "a2"), Peer::new(3, "a3")],
|
||||
@@ -573,9 +545,8 @@ mod tests {
|
||||
region: Region {
|
||||
id: 2.into(),
|
||||
name: "r2".to_string(),
|
||||
attrs: BTreeMap::new(),
|
||||
partition: None,
|
||||
partition_expr: "".to_string(),
|
||||
attrs: BTreeMap::new(),
|
||||
},
|
||||
leader_peer: Some(Peer::new(1, "a1")),
|
||||
follower_peers: vec![Peer::new(2, "a2"), Peer::new(3, "a3")],
|
||||
@@ -590,9 +561,8 @@ mod tests {
|
||||
region: Region {
|
||||
id: 2.into(),
|
||||
name: "r2".to_string(),
|
||||
attrs: BTreeMap::new(),
|
||||
partition: None,
|
||||
partition_expr: "".to_string(),
|
||||
attrs: BTreeMap::new(),
|
||||
},
|
||||
leader_peer: Some(Peer::new(1, "a1")),
|
||||
follower_peers: vec![Peer::new(2, "a2"), Peer::new(3, "a3")],
|
||||
@@ -604,6 +574,19 @@ mod tests {
|
||||
assert_eq!(decoded, region_route);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_de_serialize_partition() {
|
||||
let p = Partition {
|
||||
column_list: vec![b"a".to_vec(), b"b".to_vec()],
|
||||
value_list: vec![b"hi".to_vec(), b",".to_vec()],
|
||||
};
|
||||
|
||||
let output = serde_json::to_string(&p).unwrap();
|
||||
let got: Partition = serde_json::from_str(&output).unwrap();
|
||||
|
||||
assert_eq!(got, p);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_region_distribution() {
|
||||
let region_routes = vec![
|
||||
@@ -611,9 +594,8 @@ mod tests {
|
||||
region: Region {
|
||||
id: RegionId::new(1, 1),
|
||||
name: "r1".to_string(),
|
||||
attrs: BTreeMap::new(),
|
||||
partition: None,
|
||||
partition_expr: "".to_string(),
|
||||
attrs: BTreeMap::new(),
|
||||
},
|
||||
leader_peer: Some(Peer::new(1, "a1")),
|
||||
follower_peers: vec![Peer::new(2, "a2"), Peer::new(3, "a3")],
|
||||
@@ -624,9 +606,8 @@ mod tests {
|
||||
region: Region {
|
||||
id: RegionId::new(1, 2),
|
||||
name: "r2".to_string(),
|
||||
attrs: BTreeMap::new(),
|
||||
partition: None,
|
||||
partition_expr: "".to_string(),
|
||||
attrs: BTreeMap::new(),
|
||||
},
|
||||
leader_peer: Some(Peer::new(2, "a2")),
|
||||
follower_peers: vec![Peer::new(1, "a1"), Peer::new(3, "a3")],
|
||||
@@ -641,74 +622,4 @@ mod tests {
|
||||
assert_eq!(distribution[&2], RegionRoleSet::new(vec![2], vec![1]));
|
||||
assert_eq!(distribution[&3], RegionRoleSet::new(vec![], vec![1, 2]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_de_serialize_partition() {
|
||||
let p = LegacyPartition {
|
||||
column_list: vec![b"a".to_vec(), b"b".to_vec()],
|
||||
value_list: vec![b"hi".to_vec(), b",".to_vec()],
|
||||
};
|
||||
|
||||
let output = serde_json::to_string(&p).unwrap();
|
||||
let got: LegacyPartition = serde_json::from_str(&output).unwrap();
|
||||
|
||||
assert_eq!(got, p);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(deprecated)]
|
||||
fn test_region_partition_expr() {
|
||||
let r = PbRegion {
|
||||
id: 1,
|
||||
name: "r1".to_string(),
|
||||
partition: None,
|
||||
attrs: Default::default(),
|
||||
};
|
||||
assert_eq!(pb_region_partition_expr(&r), "");
|
||||
|
||||
let r2: Region = r.into();
|
||||
assert_eq!(r2.partition_expr(), "");
|
||||
assert!(r2.partition.is_none());
|
||||
|
||||
let r3: PbRegion = r2.into();
|
||||
assert_eq!(r3.partition.as_ref().unwrap().expression, "");
|
||||
|
||||
let r = PbRegion {
|
||||
id: 1,
|
||||
name: "r1".to_string(),
|
||||
partition: Some(PbPartition {
|
||||
column_list: vec![b"a".to_vec()],
|
||||
value_list: vec![b"{}".to_vec()],
|
||||
expression: Default::default(),
|
||||
}),
|
||||
attrs: Default::default(),
|
||||
};
|
||||
assert_eq!(pb_region_partition_expr(&r), "{}");
|
||||
|
||||
let r2: Region = r.into();
|
||||
assert_eq!(r2.partition_expr(), "{}");
|
||||
assert!(r2.partition.is_none());
|
||||
|
||||
let r3: PbRegion = r2.into();
|
||||
assert_eq!(r3.partition.as_ref().unwrap().expression, "{}");
|
||||
|
||||
let r = PbRegion {
|
||||
id: 1,
|
||||
name: "r1".to_string(),
|
||||
partition: Some(PbPartition {
|
||||
column_list: vec![b"a".to_vec()],
|
||||
value_list: vec![b"{}".to_vec()],
|
||||
expression: "a>b".to_string(),
|
||||
}),
|
||||
attrs: Default::default(),
|
||||
};
|
||||
assert_eq!(pb_region_partition_expr(&r), "a>b");
|
||||
|
||||
let r2: Region = r.into();
|
||||
assert_eq!(r2.partition_expr(), "a>b");
|
||||
assert!(r2.partition.is_none());
|
||||
|
||||
let r3: PbRegion = r2.into();
|
||||
assert_eq!(r3.partition.as_ref().unwrap().expression, "a>b");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -213,10 +213,6 @@ impl PeerLookupService for NoopPeerLookupService {
|
||||
async fn flownode(&self, id: FlownodeId) -> Result<Option<Peer>> {
|
||||
Ok(Some(Peer::empty(id)))
|
||||
}
|
||||
|
||||
async fn active_frontends(&self) -> Result<Vec<Peer>> {
|
||||
Ok(vec![])
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a kafka topic pool for testing.
|
||||
|
||||
@@ -222,7 +222,6 @@ pub struct RecordBatchStreamAdapter {
|
||||
enum Metrics {
|
||||
Unavailable,
|
||||
Unresolved(Arc<dyn ExecutionPlan>),
|
||||
PartialResolved(Arc<dyn ExecutionPlan>, RecordBatchMetrics),
|
||||
Resolved(RecordBatchMetrics),
|
||||
}
|
||||
|
||||
@@ -276,9 +275,7 @@ impl RecordBatchStream for RecordBatchStreamAdapter {
|
||||
|
||||
fn metrics(&self) -> Option<RecordBatchMetrics> {
|
||||
match &self.metrics_2 {
|
||||
Metrics::Resolved(metrics) | Metrics::PartialResolved(_, metrics) => {
|
||||
Some(metrics.clone())
|
||||
}
|
||||
Metrics::Resolved(metrics) => Some(metrics.clone()),
|
||||
Metrics::Unavailable | Metrics::Unresolved(_) => None,
|
||||
}
|
||||
}
|
||||
@@ -302,25 +299,13 @@ impl Stream for RecordBatchStreamAdapter {
|
||||
Poll::Pending => Poll::Pending,
|
||||
Poll::Ready(Some(df_record_batch)) => {
|
||||
let df_record_batch = df_record_batch?;
|
||||
if let Metrics::Unresolved(df_plan) | Metrics::PartialResolved(df_plan, _) =
|
||||
&self.metrics_2
|
||||
{
|
||||
let mut metric_collector = MetricCollector::new(self.explain_verbose);
|
||||
accept(df_plan.as_ref(), &mut metric_collector).unwrap();
|
||||
self.metrics_2 = Metrics::PartialResolved(
|
||||
df_plan.clone(),
|
||||
metric_collector.record_batch_metrics,
|
||||
);
|
||||
}
|
||||
Poll::Ready(Some(RecordBatch::try_from_df_record_batch(
|
||||
self.schema(),
|
||||
df_record_batch,
|
||||
)))
|
||||
}
|
||||
Poll::Ready(None) => {
|
||||
if let Metrics::Unresolved(df_plan) | Metrics::PartialResolved(df_plan, _) =
|
||||
&self.metrics_2
|
||||
{
|
||||
if let Metrics::Unresolved(df_plan) = &self.metrics_2 {
|
||||
let mut metric_collector = MetricCollector::new(self.explain_verbose);
|
||||
accept(df_plan.as_ref(), &mut metric_collector).unwrap();
|
||||
self.metrics_2 = Metrics::Resolved(metric_collector.record_batch_metrics);
|
||||
|
||||
@@ -56,18 +56,8 @@ macro_rules! parse_number_to_value {
|
||||
},
|
||||
)+
|
||||
ConcreteDataType::Timestamp(t) => {
|
||||
let n = parse_sql_number::<i64>($n)?;
|
||||
let timestamp = Timestamp::new(n, t.unit());
|
||||
|
||||
// Check if the value is within the valid range for the target unit
|
||||
if Timestamp::is_overflow(n, t.unit()) {
|
||||
return TimestampOverflowSnafu {
|
||||
timestamp,
|
||||
target_unit: t.unit(),
|
||||
}.fail();
|
||||
}
|
||||
|
||||
Ok(Value::Timestamp(timestamp))
|
||||
let n = parse_sql_number::<i64>($n)?;
|
||||
Ok(Value::Timestamp(Timestamp::new(n, t.unit())))
|
||||
},
|
||||
// TODO(QuenKar): This could need to be optimized
|
||||
// if this from_str function is slow,
|
||||
@@ -372,7 +362,6 @@ pub(crate) fn parse_hex_string(s: &str) -> Result<Value> {
|
||||
mod test {
|
||||
use common_base::bytes::Bytes;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use datatypes::types::TimestampType;
|
||||
use datatypes::value::OrderedFloat;
|
||||
|
||||
use super::*;
|
||||
@@ -1092,89 +1081,4 @@ mod test {
|
||||
);
|
||||
assert!(v.is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sql_number_to_value_timestamp_strict_typing() {
|
||||
// Test that values are interpreted according to the target column type
|
||||
let timestamp_type = TimestampType::Millisecond(datatypes::types::TimestampMillisecondType);
|
||||
let data_type = ConcreteDataType::Timestamp(timestamp_type);
|
||||
|
||||
// Valid millisecond timestamp
|
||||
let millisecond_str = "1747814093865";
|
||||
let result = sql_number_to_value(&data_type, millisecond_str).unwrap();
|
||||
if let Value::Timestamp(ts) = result {
|
||||
assert_eq!(ts.unit(), TimeUnit::Millisecond);
|
||||
assert_eq!(ts.value(), 1747814093865);
|
||||
} else {
|
||||
panic!("Expected timestamp value");
|
||||
}
|
||||
|
||||
// Large value that would overflow when treated as milliseconds should be rejected
|
||||
let nanosecond_str = "1747814093865000000"; // This is too large for millisecond precision
|
||||
let result = sql_number_to_value(&data_type, nanosecond_str);
|
||||
assert!(
|
||||
result.is_err(),
|
||||
"Should reject overly large timestamp values"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sql_number_to_value_timestamp_different_units() {
|
||||
// Test second precision
|
||||
let second_type = TimestampType::Second(datatypes::types::TimestampSecondType);
|
||||
let second_data_type = ConcreteDataType::Timestamp(second_type);
|
||||
|
||||
let second_str = "1747814093";
|
||||
let result = sql_number_to_value(&second_data_type, second_str).unwrap();
|
||||
if let Value::Timestamp(ts) = result {
|
||||
assert_eq!(ts.unit(), TimeUnit::Second);
|
||||
assert_eq!(ts.value(), 1747814093);
|
||||
} else {
|
||||
panic!("Expected timestamp value");
|
||||
}
|
||||
|
||||
// Test nanosecond precision
|
||||
let nanosecond_type = TimestampType::Nanosecond(datatypes::types::TimestampNanosecondType);
|
||||
let nanosecond_data_type = ConcreteDataType::Timestamp(nanosecond_type);
|
||||
|
||||
let nanosecond_str = "1747814093865000000";
|
||||
let result = sql_number_to_value(&nanosecond_data_type, nanosecond_str).unwrap();
|
||||
if let Value::Timestamp(ts) = result {
|
||||
assert_eq!(ts.unit(), TimeUnit::Nanosecond);
|
||||
assert_eq!(ts.value(), 1747814093865000000);
|
||||
} else {
|
||||
panic!("Expected timestamp value");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_timestamp_range_validation() {
|
||||
// Test that our range checking works correctly
|
||||
let nanosecond_value = 1747814093865000000i64; // This should be too large for millisecond
|
||||
|
||||
// This should work for nanosecond precision
|
||||
let nanosecond_type = TimestampType::Nanosecond(datatypes::types::TimestampNanosecondType);
|
||||
let nanosecond_data_type = ConcreteDataType::Timestamp(nanosecond_type);
|
||||
let result = sql_number_to_value(&nanosecond_data_type, "1747814093865000000");
|
||||
assert!(
|
||||
result.is_ok(),
|
||||
"Nanosecond value should be valid for nanosecond column"
|
||||
);
|
||||
|
||||
// This should fail for millisecond precision (value too large)
|
||||
let millisecond_type =
|
||||
TimestampType::Millisecond(datatypes::types::TimestampMillisecondType);
|
||||
let millisecond_data_type = ConcreteDataType::Timestamp(millisecond_type);
|
||||
let result = sql_number_to_value(&millisecond_data_type, "1747814093865000000");
|
||||
assert!(
|
||||
result.is_err(),
|
||||
"Nanosecond value should be rejected for millisecond column"
|
||||
);
|
||||
|
||||
// Verify the ranges work as expected
|
||||
assert!(
|
||||
nanosecond_value > Timestamp::MAX_MILLISECOND.value(),
|
||||
"Test value should exceed millisecond range"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,10 +19,8 @@ use common_recordbatch::DfRecordBatch;
|
||||
/// Encodes record batch to a Schema message and a RecordBatch message.
|
||||
pub fn encode_to_flight_data(rb: DfRecordBatch) -> (FlightData, FlightData) {
|
||||
let mut encoder = FlightEncoder::default();
|
||||
let schema = encoder.encode_schema(rb.schema_ref().as_ref());
|
||||
let [data] = encoder
|
||||
.encode(FlightMessage::RecordBatch(rb))
|
||||
.try_into()
|
||||
.unwrap();
|
||||
(schema, data)
|
||||
(
|
||||
encoder.encode(FlightMessage::Schema(rb.schema())),
|
||||
encoder.encode(FlightMessage::RecordBatch(rb)),
|
||||
)
|
||||
}
|
||||
|
||||
@@ -498,17 +498,6 @@ impl Timestamp {
|
||||
|
||||
pub const MIN_NANOSECOND: Self = Self::new_nanosecond(i64::MIN);
|
||||
pub const MAX_NANOSECOND: Self = Self::new_nanosecond(i64::MAX);
|
||||
|
||||
/// Checks if a value would overflow for the given time unit.
|
||||
pub fn is_overflow(value: i64, unit: TimeUnit) -> bool {
|
||||
let (min_val, max_val) = match unit {
|
||||
TimeUnit::Second => (Self::MIN_SECOND.value(), Self::MAX_SECOND.value()),
|
||||
TimeUnit::Millisecond => (Self::MIN_MILLISECOND.value(), Self::MAX_MILLISECOND.value()),
|
||||
TimeUnit::Microsecond => (Self::MIN_MICROSECOND.value(), Self::MAX_MICROSECOND.value()),
|
||||
TimeUnit::Nanosecond => (Self::MIN_NANOSECOND.value(), Self::MAX_NANOSECOND.value()),
|
||||
};
|
||||
value < min_val || value > max_val
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts the naive datetime (which has no specific timezone) to a
|
||||
|
||||
@@ -66,7 +66,6 @@ table.workspace = true
|
||||
tokio.workspace = true
|
||||
toml.workspace = true
|
||||
tonic.workspace = true
|
||||
tracing.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
cache.workspace = true
|
||||
|
||||
@@ -424,15 +424,7 @@ impl CountdownTask {
|
||||
},
|
||||
Some(CountdownCommand::Reset((role, deadline, extension_info))) => {
|
||||
if let Err(err) = self.region_server.set_region_role(self.region_id, role) {
|
||||
if err.status_code() == StatusCode::RegionNotFound {
|
||||
// Table metadata in metasrv is deleted after its regions are dropped.
|
||||
// The datanode may still receive lease renewal responses that depend on the metadata
|
||||
// during the short period before it is removed.
|
||||
warn!(err; "Failed to set region role to {role} for region {region_id}");
|
||||
}else{
|
||||
error!(err; "Failed to set region role to {role} for region {region_id}");
|
||||
}
|
||||
|
||||
error!(err; "Failed to set region role to {role} for region {region_id}");
|
||||
}
|
||||
if let Some(ext_handler) = self.handler_ext.as_ref() {
|
||||
ext_handler.reset_deadline(
|
||||
|
||||
@@ -48,9 +48,9 @@ use query::QueryEngineFactory;
|
||||
use servers::export_metrics::ExportMetricsTask;
|
||||
use servers::server::ServerHandlers;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use store_api::path_utils::{table_dir, WAL_DIR};
|
||||
use store_api::path_utils::{region_dir, WAL_DIR};
|
||||
use store_api::region_engine::{RegionEngineRef, RegionRole};
|
||||
use store_api::region_request::{PathType, RegionOpenRequest};
|
||||
use store_api::region_request::RegionOpenRequest;
|
||||
use store_api::storage::RegionId;
|
||||
use tokio::fs;
|
||||
use tokio::sync::Notify;
|
||||
@@ -632,13 +632,12 @@ async fn open_all_regions(
|
||||
|
||||
let mut region_requests = Vec::with_capacity(regions.len());
|
||||
for (region_id, engine, store_path, options) in regions {
|
||||
let table_dir = table_dir(&store_path, region_id.table_id());
|
||||
let region_dir = region_dir(&store_path, region_id);
|
||||
region_requests.push((
|
||||
region_id,
|
||||
RegionOpenRequest {
|
||||
engine,
|
||||
table_dir,
|
||||
path_type: PathType::Bare,
|
||||
region_dir,
|
||||
options,
|
||||
skip_wal_replay: false,
|
||||
},
|
||||
@@ -677,13 +676,12 @@ async fn open_all_regions(
|
||||
);
|
||||
let mut region_requests = Vec::with_capacity(follower_regions.len());
|
||||
for (region_id, engine, store_path, options) in follower_regions {
|
||||
let table_dir = table_dir(&store_path, region_id.table_id());
|
||||
let region_dir = region_dir(&store_path, region_id);
|
||||
region_requests.push((
|
||||
region_id,
|
||||
RegionOpenRequest {
|
||||
engine,
|
||||
table_dir,
|
||||
path_type: PathType::Bare,
|
||||
region_dir,
|
||||
options,
|
||||
skip_wal_replay: true,
|
||||
},
|
||||
|
||||
@@ -168,7 +168,7 @@ mod tests {
|
||||
use mito2::config::MitoConfig;
|
||||
use mito2::engine::MITO_ENGINE_NAME;
|
||||
use mito2::test_util::{CreateRequestBuilder, TestEnv};
|
||||
use store_api::path_utils::table_dir;
|
||||
use store_api::path_utils::region_dir;
|
||||
use store_api::region_engine::RegionRole;
|
||||
use store_api::region_request::{RegionCloseRequest, RegionRequest};
|
||||
use store_api::storage::RegionId;
|
||||
@@ -334,7 +334,7 @@ mod tests {
|
||||
let builder = CreateRequestBuilder::new();
|
||||
let mut create_req = builder.build();
|
||||
let storage_path = "test";
|
||||
create_req.table_dir = table_dir(storage_path, region_id.table_id());
|
||||
create_req.region_dir = region_dir(storage_path, region_id);
|
||||
|
||||
region_server
|
||||
.handle_request(region_id, RegionRequest::Create(create_req))
|
||||
@@ -414,7 +414,7 @@ mod tests {
|
||||
let builder = CreateRequestBuilder::new();
|
||||
let mut create_req = builder.build();
|
||||
let storage_path = "test";
|
||||
create_req.table_dir = table_dir(storage_path, region_id.table_id());
|
||||
create_req.region_dir = region_dir(storage_path, region_id);
|
||||
|
||||
region_server
|
||||
.handle_request(region_id, RegionRequest::Create(create_req))
|
||||
|
||||
@@ -15,8 +15,8 @@
|
||||
use common_meta::instruction::{InstructionReply, OpenRegion, SimpleReply};
|
||||
use common_meta::wal_options_allocator::prepare_wal_options;
|
||||
use futures_util::future::BoxFuture;
|
||||
use store_api::path_utils::table_dir;
|
||||
use store_api::region_request::{PathType, RegionOpenRequest, RegionRequest};
|
||||
use store_api::path_utils::region_dir;
|
||||
use store_api::region_request::{RegionOpenRequest, RegionRequest};
|
||||
|
||||
use crate::heartbeat::handler::HandlerContext;
|
||||
|
||||
@@ -36,8 +36,7 @@ impl HandlerContext {
|
||||
prepare_wal_options(&mut region_options, region_id, ®ion_wal_options);
|
||||
let request = RegionRequest::Open(RegionOpenRequest {
|
||||
engine: region_ident.engine,
|
||||
table_dir: table_dir(®ion_storage_path, region_id.table_id()),
|
||||
path_type: PathType::Bare,
|
||||
region_dir: region_dir(®ion_storage_path, region_id),
|
||||
options: region_options,
|
||||
skip_wal_replay,
|
||||
});
|
||||
|
||||
@@ -27,14 +27,14 @@ lazy_static! {
|
||||
pub static ref HANDLE_REGION_REQUEST_ELAPSED: HistogramVec = register_histogram_vec!(
|
||||
"greptime_datanode_handle_region_request_elapsed",
|
||||
"datanode handle region request elapsed",
|
||||
&[REGION_REQUEST_TYPE]
|
||||
&[REGION_ID, REGION_REQUEST_TYPE]
|
||||
)
|
||||
.unwrap();
|
||||
/// The number of rows in region request received by region server, labeled with request type.
|
||||
pub static ref REGION_CHANGED_ROW_COUNT: IntCounterVec = register_int_counter_vec!(
|
||||
"greptime_datanode_region_changed_row_count",
|
||||
"datanode region changed row count",
|
||||
&[REGION_REQUEST_TYPE]
|
||||
&[REGION_ID, REGION_REQUEST_TYPE]
|
||||
)
|
||||
.unwrap();
|
||||
/// The elapsed time since the last received heartbeat.
|
||||
|
||||
@@ -54,7 +54,7 @@ use servers::error::{self as servers_error, ExecuteGrpcRequestSnafu, Result as S
|
||||
use servers::grpc::flight::{FlightCraft, FlightRecordBatchStream, TonicStream};
|
||||
use servers::grpc::region_server::RegionServerHandler;
|
||||
use servers::grpc::FlightCompression;
|
||||
use session::context::{QueryContext, QueryContextBuilder, QueryContextRef};
|
||||
use session::context::{QueryContextBuilder, QueryContextRef};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use store_api::metric_engine_consts::{
|
||||
FILE_ENGINE_NAME, LOGICAL_TABLE_METADATA_KEY, METRIC_ENGINE_NAME,
|
||||
@@ -197,7 +197,6 @@ impl RegionServer {
|
||||
pub async fn handle_remote_read(
|
||||
&self,
|
||||
request: api::v1::region::QueryRequest,
|
||||
query_ctx: QueryContextRef,
|
||||
) -> Result<SendableRecordBatchStream> {
|
||||
let _permit = if let Some(p) = &self.inner.parallelism {
|
||||
Some(p.acquire().await?)
|
||||
@@ -205,6 +204,12 @@ impl RegionServer {
|
||||
None
|
||||
};
|
||||
|
||||
let query_ctx: QueryContextRef = request
|
||||
.header
|
||||
.as_ref()
|
||||
.map(|h| Arc::new(h.into()))
|
||||
.unwrap_or_else(|| Arc::new(QueryContextBuilder::default().build()));
|
||||
|
||||
let region_id = RegionId::from_u64(request.region_id);
|
||||
let provider = self.table_provider(region_id, Some(&query_ctx)).await?;
|
||||
let catalog_list = Arc::new(DummyCatalogList::with_table_provider(provider));
|
||||
@@ -212,7 +217,7 @@ impl RegionServer {
|
||||
let decoder = self
|
||||
.inner
|
||||
.query_engine
|
||||
.engine_context(query_ctx.clone())
|
||||
.engine_context(query_ctx)
|
||||
.new_plan_decoder()
|
||||
.context(NewPlanDecoderSnafu)?;
|
||||
|
||||
@@ -222,14 +227,11 @@ impl RegionServer {
|
||||
.context(DecodeLogicalPlanSnafu)?;
|
||||
|
||||
self.inner
|
||||
.handle_read(
|
||||
QueryRequest {
|
||||
header: request.header,
|
||||
region_id,
|
||||
plan,
|
||||
},
|
||||
query_ctx,
|
||||
)
|
||||
.handle_read(QueryRequest {
|
||||
header: request.header,
|
||||
region_id,
|
||||
plan,
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
@@ -244,7 +246,6 @@ impl RegionServer {
|
||||
let ctx: Option<session::context::QueryContext> = request.header.as_ref().map(|h| h.into());
|
||||
|
||||
let provider = self.table_provider(request.region_id, ctx.as_ref()).await?;
|
||||
let query_ctx = Arc::new(ctx.unwrap_or_else(|| QueryContextBuilder::default().build()));
|
||||
|
||||
struct RegionDataSourceInjector {
|
||||
source: Arc<dyn TableSource>,
|
||||
@@ -273,7 +274,7 @@ impl RegionServer {
|
||||
.data;
|
||||
|
||||
self.inner
|
||||
.handle_read(QueryRequest { plan, ..request }, query_ctx)
|
||||
.handle_read(QueryRequest { plan, ..request })
|
||||
.await
|
||||
}
|
||||
|
||||
@@ -587,14 +588,9 @@ impl FlightCraft for RegionServer {
|
||||
.as_ref()
|
||||
.map(|h| TracingContext::from_w3c(&h.tracing_context))
|
||||
.unwrap_or_default();
|
||||
let query_ctx = request
|
||||
.header
|
||||
.as_ref()
|
||||
.map(|h| Arc::new(QueryContext::from(h)))
|
||||
.unwrap_or(QueryContext::arc());
|
||||
|
||||
let result = self
|
||||
.handle_remote_read(request, query_ctx.clone())
|
||||
.handle_remote_read(request)
|
||||
.trace(tracing_context.attach(info_span!("RegionServer::handle_read")))
|
||||
.await?;
|
||||
|
||||
@@ -602,7 +598,6 @@ impl FlightCraft for RegionServer {
|
||||
result,
|
||||
tracing_context,
|
||||
self.flight_compression,
|
||||
query_ctx,
|
||||
));
|
||||
Ok(Response::new(stream))
|
||||
}
|
||||
@@ -973,8 +968,9 @@ impl RegionServerInner {
|
||||
request: RegionRequest,
|
||||
) -> Result<RegionResponse> {
|
||||
let request_type = request.request_type();
|
||||
let region_id_str = region_id.to_string();
|
||||
let _timer = crate::metrics::HANDLE_REGION_REQUEST_ELAPSED
|
||||
.with_label_values(&[request_type])
|
||||
.with_label_values(&[®ion_id_str, request_type])
|
||||
.start_timer();
|
||||
|
||||
let region_change = match &request {
|
||||
@@ -1014,7 +1010,7 @@ impl RegionServerInner {
|
||||
// Update metrics
|
||||
if matches!(region_change, RegionChange::Ingest) {
|
||||
crate::metrics::REGION_CHANGED_ROW_COUNT
|
||||
.with_label_values(&[request_type])
|
||||
.with_label_values(&[®ion_id_str, request_type])
|
||||
.inc_by(result.affected_rows as u64);
|
||||
}
|
||||
// Sets corresponding region status to ready.
|
||||
@@ -1182,13 +1178,16 @@ impl RegionServerInner {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn handle_read(
|
||||
&self,
|
||||
request: QueryRequest,
|
||||
query_ctx: QueryContextRef,
|
||||
) -> Result<SendableRecordBatchStream> {
|
||||
pub async fn handle_read(&self, request: QueryRequest) -> Result<SendableRecordBatchStream> {
|
||||
// TODO(ruihang): add metrics and set trace id
|
||||
|
||||
// Build query context from gRPC header
|
||||
let query_ctx: QueryContextRef = request
|
||||
.header
|
||||
.as_ref()
|
||||
.map(|h| Arc::new(h.into()))
|
||||
.unwrap_or_else(|| QueryContextBuilder::default().build().into());
|
||||
|
||||
let result = self
|
||||
.query_engine
|
||||
.execute(request.plan, query_ctx)
|
||||
@@ -1303,9 +1302,7 @@ mod tests {
|
||||
use mito2::test_util::CreateRequestBuilder;
|
||||
use store_api::metadata::{ColumnMetadata, RegionMetadata, RegionMetadataBuilder};
|
||||
use store_api::region_engine::RegionEngine;
|
||||
use store_api::region_request::{
|
||||
PathType, RegionDropRequest, RegionOpenRequest, RegionTruncateRequest,
|
||||
};
|
||||
use store_api::region_request::{RegionDropRequest, RegionOpenRequest, RegionTruncateRequest};
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use super::*;
|
||||
@@ -1350,8 +1347,7 @@ mod tests {
|
||||
region_id,
|
||||
RegionRequest::Open(RegionOpenRequest {
|
||||
engine: engine_name.to_string(),
|
||||
table_dir: String::new(),
|
||||
path_type: PathType::Bare,
|
||||
region_dir: String::new(),
|
||||
options: Default::default(),
|
||||
skip_wal_replay: false,
|
||||
}),
|
||||
|
||||
@@ -16,14 +16,15 @@
|
||||
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use common_telemetry::info;
|
||||
use common_telemetry::{info, warn};
|
||||
use object_store::factory::new_raw_object_store;
|
||||
use object_store::layers::{LruCacheLayer, RetryLayer};
|
||||
use object_store::layers::{LruCacheLayer, RetryInterceptor, RetryLayer};
|
||||
use object_store::services::Fs;
|
||||
use object_store::util::{clean_temp_dir, join_dir, with_instrument_layers, PrintDetailedError};
|
||||
use object_store::util::{clean_temp_dir, join_dir, with_instrument_layers};
|
||||
use object_store::{
|
||||
Access, ObjectStore, ObjectStoreBuilder, ATOMIC_WRITE_DIR, OLD_ATOMIC_WRITE_DIR,
|
||||
Access, Error, ObjectStore, ObjectStoreBuilder, ATOMIC_WRITE_DIR, OLD_ATOMIC_WRITE_DIR,
|
||||
};
|
||||
use snafu::prelude::*;
|
||||
|
||||
@@ -175,3 +176,12 @@ async fn build_cache_layer(
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
struct PrintDetailedError;
|
||||
|
||||
// PrintDetailedError is a retry interceptor that prints error in Debug format in retrying.
|
||||
impl RetryInterceptor for PrintDetailedError {
|
||||
fn intercept(&self, err: &Error, dur: Duration) {
|
||||
warn!("Retry after {}s, error: {:#?}", dur.as_secs_f64(), err);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,7 +28,7 @@ greptime-proto.workspace = true
|
||||
jsonb.workspace = true
|
||||
num = "0.4"
|
||||
num-traits = "0.2"
|
||||
ordered-float.workspace = true
|
||||
ordered-float = { version = "3.0", features = ["serde"] }
|
||||
paste.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
|
||||
@@ -18,7 +18,6 @@ use std::sync::Arc;
|
||||
use common_datasource::file_format::Format;
|
||||
use object_store::ObjectStore;
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::path_utils::region_name;
|
||||
use store_api::region_request::{RegionCreateRequest, RegionOpenRequest};
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
@@ -51,10 +50,7 @@ impl FileRegion {
|
||||
options: request.options,
|
||||
};
|
||||
|
||||
let region_dir = object_store::util::join_dir(
|
||||
&request.table_dir,
|
||||
®ion_name(region_id.table_id(), region_id.region_sequence()),
|
||||
);
|
||||
let region_dir = request.region_dir;
|
||||
let url = manifest.url()?;
|
||||
let file_options = manifest.file_options()?;
|
||||
let format = manifest.format()?;
|
||||
@@ -78,14 +74,11 @@ impl FileRegion {
|
||||
request: RegionOpenRequest,
|
||||
object_store: &ObjectStore,
|
||||
) -> Result<FileRegionRef> {
|
||||
let region_dir = object_store::util::join_dir(
|
||||
&request.table_dir,
|
||||
®ion_name(region_id.table_id(), region_id.region_sequence()),
|
||||
);
|
||||
let manifest = FileRegionManifest::load(region_id, ®ion_dir, object_store).await?;
|
||||
let manifest =
|
||||
FileRegionManifest::load(region_id, &request.region_dir, object_store).await?;
|
||||
|
||||
Ok(Arc::new(Self {
|
||||
region_dir,
|
||||
region_dir: request.region_dir,
|
||||
url: manifest.url()?,
|
||||
file_options: manifest.file_options()?,
|
||||
format: manifest.format()?,
|
||||
@@ -107,8 +100,6 @@ impl FileRegion {
|
||||
mod tests {
|
||||
use std::assert_matches::assert_matches;
|
||||
|
||||
use store_api::region_request::PathType;
|
||||
|
||||
use super::*;
|
||||
use crate::error::Error;
|
||||
use crate::test_util::{new_test_column_metadata, new_test_object_store, new_test_options};
|
||||
@@ -122,8 +113,7 @@ mod tests {
|
||||
column_metadatas: new_test_column_metadata(),
|
||||
primary_key: vec![1],
|
||||
options: new_test_options(),
|
||||
table_dir: "create_region_dir/".to_string(),
|
||||
path_type: PathType::Bare,
|
||||
region_dir: "create_region_dir/".to_string(),
|
||||
};
|
||||
let region_id = RegionId::new(1, 0);
|
||||
|
||||
@@ -131,7 +121,7 @@ mod tests {
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(region.region_dir, "create_region_dir/1_0000000000/");
|
||||
assert_eq!(region.region_dir, "create_region_dir/");
|
||||
assert_eq!(region.url, "test");
|
||||
assert_eq!(region.file_options.files, vec!["1.csv"]);
|
||||
assert_matches!(region.format, Format::Csv { .. });
|
||||
@@ -140,7 +130,7 @@ mod tests {
|
||||
assert_eq!(region.metadata.primary_key, vec![1]);
|
||||
|
||||
assert!(object_store
|
||||
.exists("create_region_dir/1_0000000000/manifest/_file_manifest")
|
||||
.exists("create_region_dir/manifest/_file_manifest")
|
||||
.await
|
||||
.unwrap());
|
||||
|
||||
@@ -161,8 +151,7 @@ mod tests {
|
||||
column_metadatas: new_test_column_metadata(),
|
||||
primary_key: vec![1],
|
||||
options: new_test_options(),
|
||||
table_dir: region_dir.clone(),
|
||||
path_type: PathType::Bare,
|
||||
region_dir: region_dir.clone(),
|
||||
};
|
||||
let region_id = RegionId::new(1, 0);
|
||||
|
||||
@@ -172,8 +161,7 @@ mod tests {
|
||||
|
||||
let request = RegionOpenRequest {
|
||||
engine: "file".to_string(),
|
||||
table_dir: region_dir,
|
||||
path_type: PathType::Bare,
|
||||
region_dir,
|
||||
options: HashMap::default(),
|
||||
skip_wal_replay: false,
|
||||
};
|
||||
@@ -182,7 +170,7 @@ mod tests {
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(region.region_dir, "open_region_dir/1_0000000000/");
|
||||
assert_eq!(region.region_dir, "open_region_dir/");
|
||||
assert_eq!(region.url, "test");
|
||||
assert_eq!(region.file_options.files, vec!["1.csv"]);
|
||||
assert_matches!(region.format, Format::Csv { .. });
|
||||
@@ -201,8 +189,7 @@ mod tests {
|
||||
column_metadatas: new_test_column_metadata(),
|
||||
primary_key: vec![1],
|
||||
options: new_test_options(),
|
||||
table_dir: region_dir.clone(),
|
||||
path_type: PathType::Bare,
|
||||
region_dir: region_dir.clone(),
|
||||
};
|
||||
let region_id = RegionId::new(1, 0);
|
||||
|
||||
@@ -211,20 +198,19 @@ mod tests {
|
||||
.unwrap();
|
||||
|
||||
assert!(object_store
|
||||
.exists("drop_region_dir/1_0000000000/manifest/_file_manifest")
|
||||
.exists("drop_region_dir/manifest/_file_manifest")
|
||||
.await
|
||||
.unwrap());
|
||||
|
||||
FileRegion::drop(®ion, &object_store).await.unwrap();
|
||||
assert!(!object_store
|
||||
.exists("drop_region_dir/1_0000000000/manifest/_file_manifest")
|
||||
.exists("drop_region_dir/manifest/_file_manifest")
|
||||
.await
|
||||
.unwrap());
|
||||
|
||||
let request = RegionOpenRequest {
|
||||
engine: "file".to_string(),
|
||||
table_dir: region_dir,
|
||||
path_type: PathType::Bare,
|
||||
region_dir,
|
||||
options: HashMap::default(),
|
||||
skip_wal_replay: false,
|
||||
};
|
||||
|
||||
@@ -48,7 +48,6 @@ futures.workspace = true
|
||||
get-size2 = "0.1.2"
|
||||
greptime-proto.workspace = true
|
||||
http.workspace = true
|
||||
humantime-serde.workspace = true
|
||||
itertools.workspace = true
|
||||
lazy_static.workspace = true
|
||||
meta-client.workspace = true
|
||||
|
||||
@@ -50,7 +50,6 @@ use crate::adapter::refill::RefillTask;
|
||||
use crate::adapter::table_source::ManagedTableSource;
|
||||
use crate::adapter::util::relation_desc_to_column_schemas_with_fallback;
|
||||
pub(crate) use crate::adapter::worker::{create_worker, Worker, WorkerHandle};
|
||||
use crate::batching_mode::BatchingModeOptions;
|
||||
use crate::compute::ErrCollector;
|
||||
use crate::df_optimizer::sql_to_flow_plan;
|
||||
use crate::error::{EvalSnafu, ExternalSnafu, InternalSnafu, InvalidQuerySnafu, UnexpectedSnafu};
|
||||
@@ -85,14 +84,12 @@ pub const AUTO_CREATED_UPDATE_AT_TS_COL: &str = "update_at";
|
||||
#[serde(default)]
|
||||
pub struct FlowConfig {
|
||||
pub num_workers: usize,
|
||||
pub batching_mode: BatchingModeOptions,
|
||||
}
|
||||
|
||||
impl Default for FlowConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
num_workers: (common_config::utils::get_cpus() / 2).max(1),
|
||||
batching_mode: BatchingModeOptions::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -500,7 +497,7 @@ impl StreamingEngine {
|
||||
&self,
|
||||
schema: &RelationDesc,
|
||||
) -> Result<(Vec<String>, Vec<ColumnSchema>, bool), Error> {
|
||||
// TODO(discord9): consider remove buggy auto create by schema
|
||||
// TODO(discord9): condiser remove buggy auto create by schema
|
||||
|
||||
// TODO(discord9): use default key from schema
|
||||
let primary_keys = schema
|
||||
|
||||
@@ -41,6 +41,7 @@ use tokio::sync::{Mutex, RwLock};
|
||||
|
||||
use crate::adapter::{CreateFlowArgs, StreamingEngine};
|
||||
use crate::batching_mode::engine::BatchingEngine;
|
||||
use crate::batching_mode::{FRONTEND_SCAN_TIMEOUT, MIN_REFRESH_DURATION};
|
||||
use crate::engine::FlowEngine;
|
||||
use crate::error::{
|
||||
CreateFlowSnafu, ExternalSnafu, FlowNotFoundSnafu, FlowNotRecoveredSnafu,
|
||||
@@ -439,21 +440,13 @@ struct ConsistentCheckTask {
|
||||
impl ConsistentCheckTask {
|
||||
async fn start_check_task(engine: &Arc<FlowDualEngine>) -> Result<Self, Error> {
|
||||
let engine = engine.clone();
|
||||
let min_refresh_duration = engine
|
||||
.batching_engine()
|
||||
.batch_opts
|
||||
.experimental_min_refresh_duration;
|
||||
let frontend_scan_timeout = engine
|
||||
.batching_engine()
|
||||
.batch_opts
|
||||
.experimental_frontend_scan_timeout;
|
||||
let (tx, mut rx) = tokio::sync::mpsc::channel(1);
|
||||
let (trigger_tx, mut trigger_rx) =
|
||||
tokio::sync::mpsc::channel::<(bool, bool, tokio::sync::oneshot::Sender<()>)>(10);
|
||||
let handle = common_runtime::spawn_global(async move {
|
||||
// first check if available frontend is found
|
||||
if let Err(err) = engine
|
||||
.wait_for_available_frontend(frontend_scan_timeout)
|
||||
.wait_for_available_frontend(FRONTEND_SCAN_TIMEOUT)
|
||||
.await
|
||||
{
|
||||
warn!("No frontend is available yet:\n {err:?}");
|
||||
@@ -466,9 +459,9 @@ impl ConsistentCheckTask {
|
||||
error!(
|
||||
"Failed to recover flows:\n {err:?}, retry {} in {}s",
|
||||
recover_retry,
|
||||
min_refresh_duration.as_secs()
|
||||
MIN_REFRESH_DURATION.as_secs()
|
||||
);
|
||||
tokio::time::sleep(min_refresh_duration).await;
|
||||
tokio::time::sleep(MIN_REFRESH_DURATION).await;
|
||||
}
|
||||
|
||||
engine.set_done_recovering();
|
||||
|
||||
@@ -16,8 +16,6 @@
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
pub(crate) mod engine;
|
||||
pub(crate) mod frontend_client;
|
||||
mod state;
|
||||
@@ -25,49 +23,27 @@ mod task;
|
||||
mod time_window;
|
||||
mod utils;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub struct BatchingModeOptions {
|
||||
/// The default batching engine query timeout is 10 minutes
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub query_timeout: Duration,
|
||||
/// will output a warn log for any query that runs for more that this threshold
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub slow_query_threshold: Duration,
|
||||
/// The minimum duration between two queries execution by batching mode task
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub experimental_min_refresh_duration: Duration,
|
||||
/// The gRPC connection timeout
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub grpc_conn_timeout: Duration,
|
||||
/// The gRPC max retry number
|
||||
pub experimental_grpc_max_retries: u32,
|
||||
/// Flow wait for available frontend timeout,
|
||||
/// if failed to find available frontend after frontend_scan_timeout elapsed, return error
|
||||
/// which prevent flownode from starting
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub experimental_frontend_scan_timeout: Duration,
|
||||
/// Frontend activity timeout
|
||||
/// if frontend is down(not sending heartbeat) for more than frontend_activity_timeout, it will be removed from the list that flownode use to connect
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub experimental_frontend_activity_timeout: Duration,
|
||||
/// Maximum number of filters allowed in a single query
|
||||
pub experimental_max_filter_num_per_query: usize,
|
||||
/// Time window merge distance
|
||||
pub experimental_time_window_merge_threshold: usize,
|
||||
}
|
||||
/// TODO(discord9): make those constants configurable
|
||||
/// The default batching engine query timeout is 10 minutes
|
||||
pub const DEFAULT_BATCHING_ENGINE_QUERY_TIMEOUT: Duration = Duration::from_secs(10 * 60);
|
||||
|
||||
impl Default for BatchingModeOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
query_timeout: Duration::from_secs(10 * 60),
|
||||
slow_query_threshold: Duration::from_secs(60),
|
||||
experimental_min_refresh_duration: Duration::new(5, 0),
|
||||
grpc_conn_timeout: Duration::from_secs(5),
|
||||
experimental_grpc_max_retries: 3,
|
||||
experimental_frontend_scan_timeout: Duration::from_secs(30),
|
||||
experimental_frontend_activity_timeout: Duration::from_secs(60),
|
||||
experimental_max_filter_num_per_query: 20,
|
||||
experimental_time_window_merge_threshold: 3,
|
||||
}
|
||||
}
|
||||
}
|
||||
/// will output a warn log for any query that runs for more that 1 minutes, and also every 1 minutes when that query is still running
|
||||
pub const SLOW_QUERY_THRESHOLD: Duration = Duration::from_secs(60);
|
||||
|
||||
/// The minimum duration between two queries execution by batching mode task
|
||||
pub const MIN_REFRESH_DURATION: Duration = Duration::new(5, 0);
|
||||
|
||||
/// Grpc connection timeout
|
||||
const GRPC_CONN_TIMEOUT: Duration = Duration::from_secs(5);
|
||||
|
||||
/// Grpc max retry number
|
||||
const GRPC_MAX_RETRIES: u32 = 3;
|
||||
|
||||
/// Flow wait for available frontend timeout,
|
||||
/// if failed to find available frontend after FRONTEND_SCAN_TIMEOUT elapsed, return error
|
||||
/// which should prevent flownode from starting
|
||||
pub const FRONTEND_SCAN_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
|
||||
/// Frontend activity timeout
|
||||
/// if frontend is down(not sending heartbeat) for more than FRONTEND_ACTIVITY_TIMEOUT, it will be removed from the list that flownode use to connect
|
||||
pub const FRONTEND_ACTIVITY_TIMEOUT: Duration = Duration::from_secs(60);
|
||||
|
||||
@@ -34,10 +34,9 @@ use store_api::storage::{RegionId, TableId};
|
||||
use tokio::sync::{oneshot, RwLock};
|
||||
|
||||
use crate::batching_mode::frontend_client::FrontendClient;
|
||||
use crate::batching_mode::task::{BatchingTask, TaskArgs};
|
||||
use crate::batching_mode::task::BatchingTask;
|
||||
use crate::batching_mode::time_window::{find_time_window_expr, TimeWindowExpr};
|
||||
use crate::batching_mode::utils::sql_to_df_plan;
|
||||
use crate::batching_mode::BatchingModeOptions;
|
||||
use crate::engine::FlowEngine;
|
||||
use crate::error::{
|
||||
ExternalSnafu, FlowAlreadyExistSnafu, FlowNotFoundSnafu, TableNotFoundMetaSnafu,
|
||||
@@ -58,9 +57,6 @@ pub struct BatchingEngine {
|
||||
table_meta: TableMetadataManagerRef,
|
||||
catalog_manager: CatalogManagerRef,
|
||||
query_engine: QueryEngineRef,
|
||||
/// Batching mode options for control how batching mode query works
|
||||
///
|
||||
pub(crate) batch_opts: Arc<BatchingModeOptions>,
|
||||
}
|
||||
|
||||
impl BatchingEngine {
|
||||
@@ -70,7 +66,6 @@ impl BatchingEngine {
|
||||
flow_metadata_manager: FlowMetadataManagerRef,
|
||||
table_meta: TableMetadataManagerRef,
|
||||
catalog_manager: CatalogManagerRef,
|
||||
batch_opts: BatchingModeOptions,
|
||||
) -> Self {
|
||||
Self {
|
||||
tasks: Default::default(),
|
||||
@@ -80,7 +75,6 @@ impl BatchingEngine {
|
||||
table_meta,
|
||||
catalog_manager,
|
||||
query_engine,
|
||||
batch_opts: Arc::new(batch_opts),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -430,21 +424,18 @@ impl BatchingEngine {
|
||||
.unwrap_or("None".to_string())
|
||||
);
|
||||
|
||||
let task_args = TaskArgs {
|
||||
let task = BatchingTask::try_new(
|
||||
flow_id,
|
||||
query: &sql,
|
||||
&sql,
|
||||
plan,
|
||||
time_window_expr: phy_expr,
|
||||
phy_expr,
|
||||
expire_after,
|
||||
sink_table_name,
|
||||
source_table_names,
|
||||
query_ctx,
|
||||
catalog_manager: self.catalog_manager.clone(),
|
||||
shutdown_rx: rx,
|
||||
batch_opts: self.batch_opts.clone(),
|
||||
};
|
||||
|
||||
let task = BatchingTask::try_new(task_args)?;
|
||||
self.catalog_manager.clone(),
|
||||
rx,
|
||||
)?;
|
||||
|
||||
let task_inner = task.clone();
|
||||
let engine = self.query_engine.clone();
|
||||
|
||||
@@ -38,7 +38,10 @@ use servers::query_handler::grpc::GrpcQueryHandler;
|
||||
use session::context::{QueryContextBuilder, QueryContextRef};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::batching_mode::BatchingModeOptions;
|
||||
use crate::batching_mode::{
|
||||
DEFAULT_BATCHING_ENGINE_QUERY_TIMEOUT, FRONTEND_ACTIVITY_TIMEOUT, GRPC_CONN_TIMEOUT,
|
||||
GRPC_MAX_RETRIES,
|
||||
};
|
||||
use crate::error::{ExternalSnafu, InvalidRequestSnafu, NoAvailableFrontendSnafu, UnexpectedSnafu};
|
||||
use crate::{Error, FlowAuthHeader};
|
||||
|
||||
@@ -85,7 +88,6 @@ pub enum FrontendClient {
|
||||
chnl_mgr: ChannelManager,
|
||||
auth: Option<FlowAuthHeader>,
|
||||
query: QueryOptions,
|
||||
batch_opts: BatchingModeOptions,
|
||||
},
|
||||
Standalone {
|
||||
/// for the sake of simplicity still use grpc even in standalone mode
|
||||
@@ -112,20 +114,18 @@ impl FrontendClient {
|
||||
meta_client: Arc<MetaClient>,
|
||||
auth: Option<FlowAuthHeader>,
|
||||
query: QueryOptions,
|
||||
batch_opts: BatchingModeOptions,
|
||||
) -> Self {
|
||||
common_telemetry::info!("Frontend client build with auth={:?}", auth);
|
||||
Self::Distributed {
|
||||
meta_client,
|
||||
chnl_mgr: {
|
||||
let cfg = ChannelConfig::new()
|
||||
.connect_timeout(batch_opts.grpc_conn_timeout)
|
||||
.timeout(batch_opts.query_timeout);
|
||||
.connect_timeout(GRPC_CONN_TIMEOUT)
|
||||
.timeout(DEFAULT_BATCHING_ENGINE_QUERY_TIMEOUT);
|
||||
ChannelManager::with_config(cfg)
|
||||
},
|
||||
auth,
|
||||
query,
|
||||
batch_opts,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -209,7 +209,6 @@ impl FrontendClient {
|
||||
chnl_mgr,
|
||||
auth,
|
||||
query: _,
|
||||
batch_opts,
|
||||
} = self
|
||||
else {
|
||||
return UnexpectedSnafu {
|
||||
@@ -218,9 +217,9 @@ impl FrontendClient {
|
||||
.fail();
|
||||
};
|
||||
|
||||
let mut interval = tokio::time::interval(batch_opts.grpc_conn_timeout);
|
||||
let mut interval = tokio::time::interval(GRPC_CONN_TIMEOUT);
|
||||
interval.tick().await;
|
||||
for retry in 0..batch_opts.experimental_grpc_max_retries {
|
||||
for retry in 0..GRPC_MAX_RETRIES {
|
||||
let mut frontends = self.scan_for_frontend().await?;
|
||||
let now_in_ms = SystemTime::now()
|
||||
.duration_since(SystemTime::UNIX_EPOCH)
|
||||
@@ -234,10 +233,7 @@ impl FrontendClient {
|
||||
.iter()
|
||||
// filter out frontend that have been down for more than 1 min
|
||||
.filter(|(_, node_info)| {
|
||||
node_info.last_activity_ts
|
||||
+ batch_opts
|
||||
.experimental_frontend_activity_timeout
|
||||
.as_millis() as i64
|
||||
node_info.last_activity_ts + FRONTEND_ACTIVITY_TIMEOUT.as_millis() as i64
|
||||
> now_in_ms
|
||||
})
|
||||
{
|
||||
@@ -267,7 +263,7 @@ impl FrontendClient {
|
||||
}
|
||||
|
||||
NoAvailableFrontendSnafu {
|
||||
timeout: batch_opts.grpc_conn_timeout,
|
||||
timeout: GRPC_CONN_TIMEOUT,
|
||||
context: "No available frontend found that is able to process query",
|
||||
}
|
||||
.fail()
|
||||
@@ -350,9 +346,7 @@ impl FrontendClient {
|
||||
peer_desc: &mut Option<PeerDesc>,
|
||||
) -> Result<u32, Error> {
|
||||
match self {
|
||||
FrontendClient::Distributed {
|
||||
query, batch_opts, ..
|
||||
} => {
|
||||
FrontendClient::Distributed { query, .. } => {
|
||||
let db = self.get_random_active_frontend(catalog, schema).await?;
|
||||
|
||||
*peer_desc = Some(PeerDesc::Dist {
|
||||
@@ -362,7 +356,7 @@ impl FrontendClient {
|
||||
db.database
|
||||
.handle_with_retry(
|
||||
req.clone(),
|
||||
batch_opts.experimental_grpc_max_retries,
|
||||
GRPC_MAX_RETRIES,
|
||||
&[(QUERY_PARALLELISM_HINT, &query.parallelism.to_string())],
|
||||
)
|
||||
.await
|
||||
|
||||
@@ -28,6 +28,7 @@ use tokio::time::Instant;
|
||||
|
||||
use crate::batching_mode::task::BatchingTask;
|
||||
use crate::batching_mode::time_window::TimeWindowExpr;
|
||||
use crate::batching_mode::MIN_REFRESH_DURATION;
|
||||
use crate::error::{DatatypesSnafu, InternalSnafu, TimeSnafu, UnexpectedSnafu};
|
||||
use crate::metrics::{
|
||||
METRIC_FLOW_BATCHING_ENGINE_QUERY_WINDOW_CNT, METRIC_FLOW_BATCHING_ENGINE_QUERY_WINDOW_SIZE,
|
||||
@@ -88,12 +89,10 @@ impl TaskState {
|
||||
&self,
|
||||
flow_id: FlowId,
|
||||
time_window_size: &Option<Duration>,
|
||||
min_refresh_duration: Duration,
|
||||
max_timeout: Option<Duration>,
|
||||
max_filter_num_per_query: usize,
|
||||
) -> Instant {
|
||||
// = last query duration, capped by [max(min_run_interval, time_window_size), max_timeout], note at most `max_timeout`
|
||||
let lower = time_window_size.unwrap_or(min_refresh_duration);
|
||||
let lower = time_window_size.unwrap_or(MIN_REFRESH_DURATION);
|
||||
let next_duration = self.last_query_duration.max(lower);
|
||||
let next_duration = if let Some(max_timeout) = max_timeout {
|
||||
next_duration.min(max_timeout)
|
||||
@@ -105,7 +104,7 @@ impl TaskState {
|
||||
// compute how much time range can be handled in one query
|
||||
let max_query_update_range = (*time_window_size)
|
||||
.unwrap_or_default()
|
||||
.mul_f64(max_filter_num_per_query as f64);
|
||||
.mul_f64(DirtyTimeWindows::MAX_FILTER_NUM as f64);
|
||||
// if dirty time range is more than one query can handle, execute immediately
|
||||
// to faster clean up dirty time windows
|
||||
if cur_dirty_window_size < max_query_update_range {
|
||||
@@ -126,36 +125,11 @@ impl TaskState {
|
||||
|
||||
/// For keep recording of dirty time windows, which is time window that have new data inserted
|
||||
/// since last query.
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct DirtyTimeWindows {
|
||||
/// windows's `start -> end` and non-overlapping
|
||||
/// `end` is exclusive(and optional)
|
||||
windows: BTreeMap<Timestamp, Option<Timestamp>>,
|
||||
/// Maximum number of filters allowed in a single query
|
||||
max_filter_num_per_query: usize,
|
||||
/// Time window merge distance
|
||||
///
|
||||
time_window_merge_threshold: usize,
|
||||
}
|
||||
|
||||
impl DirtyTimeWindows {
|
||||
pub fn new(max_filter_num_per_query: usize, time_window_merge_threshold: usize) -> Self {
|
||||
Self {
|
||||
windows: BTreeMap::new(),
|
||||
max_filter_num_per_query,
|
||||
time_window_merge_threshold,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for DirtyTimeWindows {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
windows: BTreeMap::new(),
|
||||
max_filter_num_per_query: 20,
|
||||
time_window_merge_threshold: 3,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl DirtyTimeWindows {
|
||||
@@ -164,6 +138,9 @@ impl DirtyTimeWindows {
|
||||
/// TODO(discord9): make those configurable
|
||||
pub const MERGE_DIST: i32 = 3;
|
||||
|
||||
/// Maximum number of filters allowed in a single query
|
||||
pub const MAX_FILTER_NUM: usize = 20;
|
||||
|
||||
/// Add lower bounds to the dirty time windows. Upper bounds are ignored.
|
||||
///
|
||||
/// # Arguments
|
||||
@@ -257,7 +234,7 @@ impl DirtyTimeWindows {
|
||||
);
|
||||
self.merge_dirty_time_windows(window_size, expire_lower_bound)?;
|
||||
|
||||
if self.windows.len() > self.max_filter_num_per_query {
|
||||
if self.windows.len() > Self::MAX_FILTER_NUM {
|
||||
let first_time_window = self.windows.first_key_value();
|
||||
let last_time_window = self.windows.last_key_value();
|
||||
|
||||
@@ -266,7 +243,7 @@ impl DirtyTimeWindows {
|
||||
"Flow id = {:?}, too many time windows: {}, only the first {} are taken for this query, the group by expression might be wrong. Time window expr={:?}, expire_after={:?}, first_time_window={:?}, last_time_window={:?}, the original query: {:?}",
|
||||
task_ctx.config.flow_id,
|
||||
self.windows.len(),
|
||||
self.max_filter_num_per_query,
|
||||
Self::MAX_FILTER_NUM,
|
||||
task_ctx.config.time_window_expr,
|
||||
task_ctx.config.expire_after,
|
||||
first_time_window,
|
||||
@@ -277,7 +254,7 @@ impl DirtyTimeWindows {
|
||||
warn!("Flow id = {:?}, too many time windows: {}, only the first {} are taken for this query, the group by expression might be wrong. first_time_window={:?}, last_time_window={:?}",
|
||||
flow_id,
|
||||
self.windows.len(),
|
||||
self.max_filter_num_per_query,
|
||||
Self::MAX_FILTER_NUM,
|
||||
first_time_window,
|
||||
last_time_window
|
||||
)
|
||||
@@ -483,7 +460,7 @@ impl DirtyTimeWindows {
|
||||
|
||||
if lower_bound
|
||||
.sub(&prev_upper)
|
||||
.map(|dist| dist <= window_size * self.time_window_merge_threshold as i32)
|
||||
.map(|dist| dist <= window_size * Self::MERGE_DIST)
|
||||
.unwrap_or(false)
|
||||
{
|
||||
prev_tw.1 = Some(cur_upper);
|
||||
@@ -531,19 +508,18 @@ mod test {
|
||||
|
||||
#[test]
|
||||
fn test_merge_dirty_time_windows() {
|
||||
let merge_dist = DirtyTimeWindows::default().time_window_merge_threshold;
|
||||
let testcases = vec![
|
||||
// just enough to merge
|
||||
(
|
||||
vec![
|
||||
Timestamp::new_second(0),
|
||||
Timestamp::new_second((1 + merge_dist as i64) * 5 * 60),
|
||||
Timestamp::new_second((1 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60),
|
||||
],
|
||||
(chrono::Duration::seconds(5 * 60), None),
|
||||
BTreeMap::from([(
|
||||
Timestamp::new_second(0),
|
||||
Some(Timestamp::new_second(
|
||||
(2 + merge_dist as i64) * 5 * 60,
|
||||
(2 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60,
|
||||
)),
|
||||
)]),
|
||||
Some(
|
||||
@@ -554,7 +530,7 @@ mod test {
|
||||
(
|
||||
vec![
|
||||
Timestamp::new_second(0),
|
||||
Timestamp::new_second((2 + merge_dist as i64) * 5 * 60),
|
||||
Timestamp::new_second((2 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60),
|
||||
],
|
||||
(chrono::Duration::seconds(5 * 60), None),
|
||||
BTreeMap::from([
|
||||
@@ -563,9 +539,9 @@ mod test {
|
||||
Some(Timestamp::new_second(5 * 60)),
|
||||
),
|
||||
(
|
||||
Timestamp::new_second((2 + merge_dist as i64) * 5 * 60),
|
||||
Timestamp::new_second((2 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60),
|
||||
Some(Timestamp::new_second(
|
||||
(3 + merge_dist as i64) * 5 * 60,
|
||||
(3 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60,
|
||||
)),
|
||||
),
|
||||
]),
|
||||
@@ -577,13 +553,13 @@ mod test {
|
||||
(
|
||||
vec![
|
||||
Timestamp::new_second(0),
|
||||
Timestamp::new_second((merge_dist as i64) * 5 * 60),
|
||||
Timestamp::new_second((DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60),
|
||||
],
|
||||
(chrono::Duration::seconds(5 * 60), None),
|
||||
BTreeMap::from([(
|
||||
Timestamp::new_second(0),
|
||||
Some(Timestamp::new_second(
|
||||
(1 + merge_dist as i64) * 5 * 60,
|
||||
(1 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60,
|
||||
)),
|
||||
)]),
|
||||
Some(
|
||||
@@ -594,14 +570,14 @@ mod test {
|
||||
(
|
||||
vec![
|
||||
Timestamp::new_second(0),
|
||||
Timestamp::new_second((merge_dist as i64) * 3),
|
||||
Timestamp::new_second((merge_dist as i64) * 3 * 2),
|
||||
Timestamp::new_second((DirtyTimeWindows::MERGE_DIST as i64) * 3),
|
||||
Timestamp::new_second((DirtyTimeWindows::MERGE_DIST as i64) * 3 * 2),
|
||||
],
|
||||
(chrono::Duration::seconds(3), None),
|
||||
BTreeMap::from([(
|
||||
Timestamp::new_second(0),
|
||||
Some(Timestamp::new_second(
|
||||
(merge_dist as i64) * 7
|
||||
(DirtyTimeWindows::MERGE_DIST as i64) * 7
|
||||
)),
|
||||
)]),
|
||||
Some(
|
||||
@@ -670,12 +646,12 @@ mod test {
|
||||
(
|
||||
vec![
|
||||
Timestamp::new_second(0),
|
||||
Timestamp::new_second((merge_dist as i64) * 5 * 60),
|
||||
Timestamp::new_second((DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60),
|
||||
],
|
||||
(
|
||||
chrono::Duration::seconds(5 * 60),
|
||||
Some(Timestamp::new_second(
|
||||
(merge_dist as i64) * 6 * 60,
|
||||
(DirtyTimeWindows::MERGE_DIST as i64) * 6 * 60,
|
||||
)),
|
||||
),
|
||||
BTreeMap::from([]),
|
||||
@@ -698,7 +674,7 @@ mod test {
|
||||
"ts",
|
||||
expire_lower_bound,
|
||||
window_size,
|
||||
dirty.max_filter_num_per_query,
|
||||
DirtyTimeWindows::MAX_FILTER_NUM,
|
||||
0,
|
||||
None,
|
||||
)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user