mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-23 06:30:05 +00:00
Compare commits
43 Commits
v0.15.4
...
poc/create
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7e79b4b2f6 | ||
|
|
4ad40af468 | ||
|
|
e4b048e788 | ||
|
|
ecbf372de3 | ||
|
|
3d81a17360 | ||
|
|
025cae3679 | ||
|
|
68409e28ea | ||
|
|
699406ae32 | ||
|
|
344006deca | ||
|
|
63803f2b43 | ||
|
|
cf62767b98 | ||
|
|
4e53c1531d | ||
|
|
892cb66c53 | ||
|
|
8b392477c8 | ||
|
|
905593dc16 | ||
|
|
6c04cb9b19 | ||
|
|
24da3367c1 | ||
|
|
80b14965a6 | ||
|
|
5da3f86d0c | ||
|
|
151273d1df | ||
|
|
b0289dbdde | ||
|
|
c51730a954 | ||
|
|
207709c727 | ||
|
|
deca8c44fa | ||
|
|
2edd861ce9 | ||
|
|
14f3a4ab05 | ||
|
|
34875c0346 | ||
|
|
1d07864b29 | ||
|
|
9be75361a4 | ||
|
|
9c1df68a5f | ||
|
|
0209461155 | ||
|
|
e728cb33fb | ||
|
|
cde7e11983 | ||
|
|
944b4b3e49 | ||
|
|
7953b090c0 | ||
|
|
7aa9af5ba6 | ||
|
|
7a9444c85b | ||
|
|
bb12be3310 | ||
|
|
24019334ee | ||
|
|
116d5cf82b | ||
|
|
90a3894564 | ||
|
|
39d3e0651d | ||
|
|
a49edc6ca6 |
133
Cargo.lock
generated
133
Cargo.lock
generated
@@ -1986,7 +1986,7 @@ dependencies = [
|
||||
"operator",
|
||||
"query",
|
||||
"rand 0.9.0",
|
||||
"reqwest",
|
||||
"reqwest 0.12.9",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"servers",
|
||||
@@ -2118,13 +2118,14 @@ dependencies = [
|
||||
"mito2",
|
||||
"moka",
|
||||
"nu-ansi-term",
|
||||
"object-store",
|
||||
"plugins",
|
||||
"prometheus",
|
||||
"prost 0.13.5",
|
||||
"query",
|
||||
"rand 0.9.0",
|
||||
"regex",
|
||||
"reqwest",
|
||||
"reqwest 0.12.9",
|
||||
"rexpect",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -2220,6 +2221,7 @@ dependencies = [
|
||||
"humantime-serde",
|
||||
"meta-client",
|
||||
"num_cpus",
|
||||
"object-store",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_with",
|
||||
@@ -2332,6 +2334,7 @@ dependencies = [
|
||||
"datafusion",
|
||||
"datafusion-common",
|
||||
"datafusion-expr",
|
||||
"datafusion-functions-aggregate-common",
|
||||
"datatypes",
|
||||
"derive_more",
|
||||
"geo",
|
||||
@@ -2370,7 +2373,7 @@ dependencies = [
|
||||
"common-test-util",
|
||||
"common-version",
|
||||
"hyper 0.14.30",
|
||||
"reqwest",
|
||||
"reqwest 0.12.9",
|
||||
"serde",
|
||||
"tempfile",
|
||||
"tokio",
|
||||
@@ -3762,7 +3765,7 @@ dependencies = [
|
||||
"prometheus",
|
||||
"prost 0.13.5",
|
||||
"query",
|
||||
"reqwest",
|
||||
"reqwest 0.12.9",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"servers",
|
||||
@@ -4735,6 +4738,7 @@ dependencies = [
|
||||
"log-store",
|
||||
"meta-client",
|
||||
"num_cpus",
|
||||
"object-store",
|
||||
"opentelemetry-proto 0.27.0",
|
||||
"operator",
|
||||
"otel-arrow-rust",
|
||||
@@ -5144,7 +5148,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "greptime-proto"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=82fe5c6282f623c185b86f03e898ee8952e50cf9#82fe5c6282f623c185b86f03e898ee8952e50cf9"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=464226cf8a4a22696503536a123d0b9e318582f4#464226cf8a4a22696503536a123d0b9e318582f4"
|
||||
dependencies = [
|
||||
"prost 0.13.5",
|
||||
"serde",
|
||||
@@ -6695,7 +6699,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"windows-targets 0.48.5",
|
||||
"windows-targets 0.52.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -7231,6 +7235,7 @@ dependencies = [
|
||||
name = "metric-engine"
|
||||
version = "0.15.0"
|
||||
dependencies = [
|
||||
"ahash 0.8.11",
|
||||
"api",
|
||||
"aquamarine",
|
||||
"async-stream",
|
||||
@@ -8096,14 +8101,21 @@ version = "0.15.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytes",
|
||||
"common-base",
|
||||
"common-error",
|
||||
"common-macro",
|
||||
"common-telemetry",
|
||||
"common-test-util",
|
||||
"futures",
|
||||
"humantime-serde",
|
||||
"lazy_static",
|
||||
"md5",
|
||||
"moka",
|
||||
"opendal",
|
||||
"prometheus",
|
||||
"reqwest 0.12.9",
|
||||
"serde",
|
||||
"snafu 0.8.5",
|
||||
"tokio",
|
||||
"uuid",
|
||||
]
|
||||
@@ -8238,7 +8250,7 @@ dependencies = [
|
||||
"prometheus",
|
||||
"quick-xml 0.36.2",
|
||||
"reqsign",
|
||||
"reqwest",
|
||||
"reqwest 0.12.9",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sha2",
|
||||
@@ -8310,6 +8322,19 @@ dependencies = [
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-http"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f51189ce8be654f9b5f7e70e49967ed894e84a06fc35c6c042e64ac1fc5399e"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bytes",
|
||||
"http 0.2.12",
|
||||
"opentelemetry 0.21.0",
|
||||
"reqwest 0.11.27",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry-otlp"
|
||||
version = "0.14.0"
|
||||
@@ -8320,10 +8345,12 @@ dependencies = [
|
||||
"futures-core",
|
||||
"http 0.2.12",
|
||||
"opentelemetry 0.21.0",
|
||||
"opentelemetry-http",
|
||||
"opentelemetry-proto 0.4.0",
|
||||
"opentelemetry-semantic-conventions",
|
||||
"opentelemetry_sdk 0.21.2",
|
||||
"prost 0.11.9",
|
||||
"reqwest 0.11.27",
|
||||
"thiserror 1.0.64",
|
||||
"tokio",
|
||||
"tonic 0.9.2",
|
||||
@@ -10310,7 +10337,7 @@ dependencies = [
|
||||
"percent-encoding",
|
||||
"quick-xml 0.35.0",
|
||||
"rand 0.8.5",
|
||||
"reqwest",
|
||||
"reqwest 0.12.9",
|
||||
"rsa",
|
||||
"rust-ini 0.21.1",
|
||||
"serde",
|
||||
@@ -10319,6 +10346,42 @@ dependencies = [
|
||||
"sha2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "reqwest"
|
||||
version = "0.11.27"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dd67538700a17451e7cba03ac727fb961abb7607553461627b97de0b89cf4a62"
|
||||
dependencies = [
|
||||
"base64 0.21.7",
|
||||
"bytes",
|
||||
"encoding_rs",
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"h2 0.3.26",
|
||||
"http 0.2.12",
|
||||
"http-body 0.4.6",
|
||||
"hyper 0.14.30",
|
||||
"ipnet",
|
||||
"js-sys",
|
||||
"log",
|
||||
"mime",
|
||||
"once_cell",
|
||||
"percent-encoding",
|
||||
"pin-project-lite",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_urlencoded",
|
||||
"sync_wrapper 0.1.2",
|
||||
"system-configuration",
|
||||
"tokio",
|
||||
"tower-service",
|
||||
"url",
|
||||
"wasm-bindgen",
|
||||
"wasm-bindgen-futures",
|
||||
"web-sys",
|
||||
"winreg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "reqwest"
|
||||
version = "0.12.9"
|
||||
@@ -11169,6 +11232,7 @@ dependencies = [
|
||||
"common-base",
|
||||
"common-catalog",
|
||||
"common-config",
|
||||
"common-datasource",
|
||||
"common-error",
|
||||
"common-frontend",
|
||||
"common-grpc",
|
||||
@@ -11211,16 +11275,23 @@ dependencies = [
|
||||
"local-ip-address",
|
||||
"log-query",
|
||||
"loki-proto",
|
||||
"metric-engine",
|
||||
"mime_guess",
|
||||
"mito-codec",
|
||||
"mito2",
|
||||
"mysql_async",
|
||||
"notify",
|
||||
"object-pool",
|
||||
"object-store",
|
||||
"once_cell",
|
||||
"openmetrics-parser",
|
||||
"opensrv-mysql",
|
||||
"opentelemetry-proto 0.27.0",
|
||||
"operator",
|
||||
"otel-arrow-rust",
|
||||
"parking_lot 0.12.3",
|
||||
"parquet",
|
||||
"partition",
|
||||
"permutation",
|
||||
"pgwire",
|
||||
"pin-project",
|
||||
@@ -11234,7 +11305,7 @@ dependencies = [
|
||||
"quoted-string",
|
||||
"rand 0.9.0",
|
||||
"regex",
|
||||
"reqwest",
|
||||
"reqwest 0.12.9",
|
||||
"rust-embed",
|
||||
"rustls",
|
||||
"rustls-pemfile",
|
||||
@@ -11678,7 +11749,7 @@ dependencies = [
|
||||
"local-ip-address",
|
||||
"mysql",
|
||||
"num_cpus",
|
||||
"reqwest",
|
||||
"reqwest 0.12.9",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sha2",
|
||||
@@ -12328,6 +12399,27 @@ dependencies = [
|
||||
"nom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "system-configuration"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7"
|
||||
dependencies = [
|
||||
"bitflags 1.3.2",
|
||||
"core-foundation",
|
||||
"system-configuration-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "system-configuration-sys"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9"
|
||||
dependencies = [
|
||||
"core-foundation-sys",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "table"
|
||||
version = "0.15.0"
|
||||
@@ -12618,7 +12710,7 @@ dependencies = [
|
||||
"paste",
|
||||
"rand 0.9.0",
|
||||
"rand_chacha 0.9.0",
|
||||
"reqwest",
|
||||
"reqwest 0.12.9",
|
||||
"schemars",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -13774,12 +13866,13 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
||||
|
||||
[[package]]
|
||||
name = "uuid"
|
||||
version = "1.10.0"
|
||||
version = "1.17.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314"
|
||||
checksum = "3cf4199d1e5d15ddd86a694e4d0dffa9c323ce759fea589f00fef9d81cc1931d"
|
||||
dependencies = [
|
||||
"getrandom 0.2.15",
|
||||
"rand 0.8.5",
|
||||
"getrandom 0.3.2",
|
||||
"js-sys",
|
||||
"rand 0.9.0",
|
||||
"serde",
|
||||
"wasm-bindgen",
|
||||
]
|
||||
@@ -14501,6 +14594,16 @@ dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winreg"
|
||||
version = "0.50.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wit-bindgen-rt"
|
||||
version = "0.39.0"
|
||||
|
||||
@@ -121,6 +121,7 @@ datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "
|
||||
datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
|
||||
datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
|
||||
datafusion-functions = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
|
||||
datafusion-functions-aggregate-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
|
||||
datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
|
||||
datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
|
||||
datafusion-physical-plan = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "12c0381babd52c681043957e9d6ee083a03f7646" }
|
||||
@@ -134,7 +135,7 @@ etcd-client = "0.14"
|
||||
fst = "0.4.7"
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "82fe5c6282f623c185b86f03e898ee8952e50cf9" }
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "464226cf8a4a22696503536a123d0b9e318582f4" }
|
||||
hex = "0.4"
|
||||
http = "1"
|
||||
humantime = "2.1"
|
||||
|
||||
@@ -185,10 +185,11 @@
|
||||
| `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
|
||||
| `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
|
||||
| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
|
||||
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
|
||||
| `logging.otlp_endpoint` | String | `http://localhost:4318` | The OTLP tracing endpoint. |
|
||||
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
|
||||
| `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
|
||||
| `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
|
||||
| `logging.otlp_export_protocol` | String | `http` | The OTLP tracing export protocol. Can be `grpc`/`http`. |
|
||||
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
|
||||
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
|
||||
| `slow_query` | -- | -- | The slow query log options. |
|
||||
@@ -288,10 +289,11 @@
|
||||
| `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
|
||||
| `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
|
||||
| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
|
||||
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
|
||||
| `logging.otlp_endpoint` | String | `http://localhost:4318` | The OTLP tracing endpoint. |
|
||||
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
|
||||
| `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
|
||||
| `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
|
||||
| `logging.otlp_export_protocol` | String | `http` | The OTLP tracing export protocol. Can be `grpc`/`http`. |
|
||||
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
|
||||
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
|
||||
| `slow_query` | -- | -- | The slow query log options. |
|
||||
@@ -323,6 +325,7 @@
|
||||
| `selector` | String | `round_robin` | Datanode selector type.<br/>- `round_robin` (default value)<br/>- `lease_based`<br/>- `load_based`<br/>For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". |
|
||||
| `use_memory_store` | Bool | `false` | Store data in memory. |
|
||||
| `enable_region_failover` | Bool | `false` | Whether to enable region failover.<br/>This feature is only available on GreptimeDB running on cluster mode and<br/>- Using Remote WAL<br/>- Using shared storage (e.g., s3). |
|
||||
| `region_failure_detector_initialization_delay` | String | `10m` | Delay before initializing region failure detectors.<br/>This delay helps prevent premature initialization of region failure detectors in cases where<br/>cluster maintenance mode is enabled right after metasrv starts, especially when the cluster<br/>is not deployed via the recommended GreptimeDB Operator. Without this delay, early detector registration<br/>may trigger unnecessary region failovers during datanode startup. |
|
||||
| `allow_region_failover_on_local_wal` | Bool | `false` | Whether to allow region failover on local WAL.<br/>**This option is not recommended to be set to true, because it may lead to data loss during failover.** |
|
||||
| `node_max_idle_time` | String | `24hours` | Max allowed idle time before removing node info from metasrv memory. |
|
||||
| `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. Enabled by default. |
|
||||
@@ -370,10 +373,11 @@
|
||||
| `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
|
||||
| `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
|
||||
| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
|
||||
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
|
||||
| `logging.otlp_endpoint` | String | `http://localhost:4318` | The OTLP tracing endpoint. |
|
||||
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
|
||||
| `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
|
||||
| `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
|
||||
| `logging.otlp_export_protocol` | String | `http` | The OTLP tracing export protocol. Can be `grpc`/`http`. |
|
||||
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
|
||||
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
|
||||
| `export_metrics` | -- | -- | The metasrv can export its metrics and send to Prometheus compatible service (e.g. `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
|
||||
@@ -534,10 +538,11 @@
|
||||
| `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
|
||||
| `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
|
||||
| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
|
||||
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
|
||||
| `logging.otlp_endpoint` | String | `http://localhost:4318` | The OTLP tracing endpoint. |
|
||||
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
|
||||
| `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
|
||||
| `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
|
||||
| `logging.otlp_export_protocol` | String | `http` | The OTLP tracing export protocol. Can be `grpc`/`http`. |
|
||||
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
|
||||
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
|
||||
| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
|
||||
@@ -584,10 +589,11 @@
|
||||
| `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
|
||||
| `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
|
||||
| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
|
||||
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
|
||||
| `logging.otlp_endpoint` | String | `http://localhost:4318` | The OTLP tracing endpoint. |
|
||||
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
|
||||
| `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
|
||||
| `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
|
||||
| `logging.otlp_export_protocol` | String | `http` | The OTLP tracing export protocol. Can be `grpc`/`http`. |
|
||||
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
|
||||
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
|
||||
| `tracing` | -- | -- | The tracing options. Only effect when compiled with `tokio-console` feature. |
|
||||
|
||||
@@ -629,7 +629,7 @@ level = "info"
|
||||
enable_otlp_tracing = false
|
||||
|
||||
## The OTLP tracing endpoint.
|
||||
otlp_endpoint = "http://localhost:4317"
|
||||
otlp_endpoint = "http://localhost:4318"
|
||||
|
||||
## Whether to append logs to stdout.
|
||||
append_stdout = true
|
||||
@@ -640,6 +640,9 @@ log_format = "text"
|
||||
## The maximum amount of log files.
|
||||
max_log_files = 720
|
||||
|
||||
## The OTLP tracing export protocol. Can be `grpc`/`http`.
|
||||
otlp_export_protocol = "http"
|
||||
|
||||
## The percentage of tracing will be sampled and exported.
|
||||
## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
|
||||
## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
|
||||
|
||||
@@ -83,7 +83,7 @@ level = "info"
|
||||
enable_otlp_tracing = false
|
||||
|
||||
## The OTLP tracing endpoint.
|
||||
otlp_endpoint = "http://localhost:4317"
|
||||
otlp_endpoint = "http://localhost:4318"
|
||||
|
||||
## Whether to append logs to stdout.
|
||||
append_stdout = true
|
||||
@@ -94,6 +94,9 @@ log_format = "text"
|
||||
## The maximum amount of log files.
|
||||
max_log_files = 720
|
||||
|
||||
## The OTLP tracing export protocol. Can be `grpc`/`http`.
|
||||
otlp_export_protocol = "http"
|
||||
|
||||
## The percentage of tracing will be sampled and exported.
|
||||
## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
|
||||
## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
|
||||
|
||||
@@ -218,7 +218,7 @@ level = "info"
|
||||
enable_otlp_tracing = false
|
||||
|
||||
## The OTLP tracing endpoint.
|
||||
otlp_endpoint = "http://localhost:4317"
|
||||
otlp_endpoint = "http://localhost:4318"
|
||||
|
||||
## Whether to append logs to stdout.
|
||||
append_stdout = true
|
||||
@@ -229,6 +229,9 @@ log_format = "text"
|
||||
## The maximum amount of log files.
|
||||
max_log_files = 720
|
||||
|
||||
## The OTLP tracing export protocol. Can be `grpc`/`http`.
|
||||
otlp_export_protocol = "http"
|
||||
|
||||
## The percentage of tracing will be sampled and exported.
|
||||
## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
|
||||
## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
|
||||
|
||||
@@ -43,6 +43,13 @@ use_memory_store = false
|
||||
## - Using shared storage (e.g., s3).
|
||||
enable_region_failover = false
|
||||
|
||||
## Delay before initializing region failure detectors.
|
||||
## This delay helps prevent premature initialization of region failure detectors in cases where
|
||||
## cluster maintenance mode is enabled right after metasrv starts, especially when the cluster
|
||||
## is not deployed via the recommended GreptimeDB Operator. Without this delay, early detector registration
|
||||
## may trigger unnecessary region failovers during datanode startup.
|
||||
region_failure_detector_initialization_delay = '10m'
|
||||
|
||||
## Whether to allow region failover on local WAL.
|
||||
## **This option is not recommended to be set to true, because it may lead to data loss during failover.**
|
||||
allow_region_failover_on_local_wal = false
|
||||
@@ -220,7 +227,7 @@ level = "info"
|
||||
enable_otlp_tracing = false
|
||||
|
||||
## The OTLP tracing endpoint.
|
||||
otlp_endpoint = "http://localhost:4317"
|
||||
otlp_endpoint = "http://localhost:4318"
|
||||
|
||||
## Whether to append logs to stdout.
|
||||
append_stdout = true
|
||||
@@ -231,6 +238,9 @@ log_format = "text"
|
||||
## The maximum amount of log files.
|
||||
max_log_files = 720
|
||||
|
||||
## The OTLP tracing export protocol. Can be `grpc`/`http`.
|
||||
otlp_export_protocol = "http"
|
||||
|
||||
## The percentage of tracing will be sampled and exported.
|
||||
## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
|
||||
## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
|
||||
|
||||
@@ -720,7 +720,7 @@ level = "info"
|
||||
enable_otlp_tracing = false
|
||||
|
||||
## The OTLP tracing endpoint.
|
||||
otlp_endpoint = "http://localhost:4317"
|
||||
otlp_endpoint = "http://localhost:4318"
|
||||
|
||||
## Whether to append logs to stdout.
|
||||
append_stdout = true
|
||||
@@ -731,6 +731,9 @@ log_format = "text"
|
||||
## The maximum amount of log files.
|
||||
max_log_files = 720
|
||||
|
||||
## The OTLP tracing export protocol. Can be `grpc`/`http`.
|
||||
otlp_export_protocol = "http"
|
||||
|
||||
## The percentage of tracing will be sampled and exported.
|
||||
## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
|
||||
## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
|
||||
|
||||
@@ -31,6 +31,7 @@ excludes = [
|
||||
"src/operator/src/expr_helper/trigger.rs",
|
||||
"src/sql/src/statements/create/trigger.rs",
|
||||
"src/sql/src/statements/show/trigger.rs",
|
||||
"src/sql/src/statements/drop/trigger.rs",
|
||||
"src/sql/src/parsers/create_parser/trigger.rs",
|
||||
"src/sql/src/parsers/show_parser/trigger.rs",
|
||||
]
|
||||
|
||||
@@ -22,6 +22,7 @@ use greptime_proto::v1::region::RegionResponse as RegionResponseV1;
|
||||
pub struct RegionResponse {
|
||||
pub affected_rows: AffectedRows,
|
||||
pub extensions: HashMap<String, Vec<u8>>,
|
||||
pub metadata: Vec<u8>,
|
||||
}
|
||||
|
||||
impl RegionResponse {
|
||||
@@ -29,6 +30,7 @@ impl RegionResponse {
|
||||
Self {
|
||||
affected_rows: region_response.affected_rows as _,
|
||||
extensions: region_response.extensions,
|
||||
metadata: region_response.metadata,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -37,6 +39,16 @@ impl RegionResponse {
|
||||
Self {
|
||||
affected_rows,
|
||||
extensions: Default::default(),
|
||||
metadata: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates one response with metadata.
|
||||
pub fn from_metadata(metadata: Vec<u8>) -> Self {
|
||||
Self {
|
||||
affected_rows: 0,
|
||||
extensions: Default::default(),
|
||||
metadata,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@ mod information_memory_table;
|
||||
pub mod key_column_usage;
|
||||
mod partitions;
|
||||
mod procedure_info;
|
||||
mod process_list;
|
||||
pub mod process_list;
|
||||
pub mod region_peers;
|
||||
mod region_statistics;
|
||||
mod runtime_metrics;
|
||||
|
||||
@@ -39,14 +39,14 @@ use crate::process_manager::ProcessManagerRef;
|
||||
use crate::system_schema::information_schema::InformationTable;
|
||||
|
||||
/// Column names of `information_schema.process_list`
|
||||
const ID: &str = "id";
|
||||
const CATALOG: &str = "catalog";
|
||||
const SCHEMAS: &str = "schemas";
|
||||
const QUERY: &str = "query";
|
||||
const CLIENT: &str = "client";
|
||||
const FRONTEND: &str = "frontend";
|
||||
const START_TIMESTAMP: &str = "start_timestamp";
|
||||
const ELAPSED_TIME: &str = "elapsed_time";
|
||||
pub const ID: &str = "id";
|
||||
pub const CATALOG: &str = "catalog";
|
||||
pub const SCHEMAS: &str = "schemas";
|
||||
pub const QUERY: &str = "query";
|
||||
pub const CLIENT: &str = "client";
|
||||
pub const FRONTEND: &str = "frontend";
|
||||
pub const START_TIMESTAMP: &str = "start_timestamp";
|
||||
pub const ELAPSED_TIME: &str = "elapsed_time";
|
||||
|
||||
/// `information_schema.process_list` table implementation that tracks running
|
||||
/// queries in current cluster.
|
||||
|
||||
@@ -67,6 +67,7 @@ metric-engine.workspace = true
|
||||
mito2.workspace = true
|
||||
moka.workspace = true
|
||||
nu-ansi-term = "0.46"
|
||||
object-store.workspace = true
|
||||
plugins.workspace = true
|
||||
prometheus.workspace = true
|
||||
prost.workspace = true
|
||||
|
||||
@@ -280,7 +280,7 @@ mod tests {
|
||||
|
||||
use common_config::ENV_VAR_SEP;
|
||||
use common_test_util::temp_dir::create_named_temp_file;
|
||||
use datanode::config::{FileConfig, GcsConfig, ObjectStoreConfig, S3Config};
|
||||
use object_store::config::{FileConfig, GcsConfig, ObjectStoreConfig, S3Config};
|
||||
use servers::heartbeat_options::HeartbeatOptions;
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -30,20 +30,16 @@ use common_catalog::consts::{MIN_USER_FLOW_ID, MIN_USER_TABLE_ID};
|
||||
use common_config::{metadata_store_dir, Configurable, KvBackendConfig};
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::cache::LayeredCacheRegistryBuilder;
|
||||
use common_meta::cache_invalidator::CacheInvalidatorRef;
|
||||
use common_meta::cluster::{NodeInfo, NodeStatus};
|
||||
use common_meta::datanode::RegionStat;
|
||||
use common_meta::ddl::flow_meta::{FlowMetadataAllocator, FlowMetadataAllocatorRef};
|
||||
use common_meta::ddl::table_meta::{TableMetadataAllocator, TableMetadataAllocatorRef};
|
||||
use common_meta::ddl::flow_meta::FlowMetadataAllocator;
|
||||
use common_meta::ddl::table_meta::TableMetadataAllocator;
|
||||
use common_meta::ddl::{DdlContext, NoopRegionFailureDetectorControl, ProcedureExecutorRef};
|
||||
use common_meta::ddl_manager::DdlManager;
|
||||
#[cfg(feature = "enterprise")]
|
||||
use common_meta::ddl_manager::TriggerDdlManagerRef;
|
||||
use common_meta::key::flow::flow_state::FlowStat;
|
||||
use common_meta::key::flow::{FlowMetadataManager, FlowMetadataManagerRef};
|
||||
use common_meta::key::flow::FlowMetadataManager;
|
||||
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
use common_meta::node_manager::NodeManagerRef;
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::region_keeper::MemoryRegionKeeper;
|
||||
use common_meta::region_registry::LeaderRegionRegistry;
|
||||
@@ -261,15 +257,34 @@ pub struct Instance {
|
||||
flownode: FlownodeInstance,
|
||||
procedure_manager: ProcedureManagerRef,
|
||||
wal_options_allocator: WalOptionsAllocatorRef,
|
||||
|
||||
// The components of standalone, which make it easier to expand based
|
||||
// on the components.
|
||||
#[cfg(feature = "enterprise")]
|
||||
components: Components,
|
||||
|
||||
// Keep the logging guard to prevent the worker from being dropped.
|
||||
_guard: Vec<WorkerGuard>,
|
||||
}
|
||||
|
||||
#[cfg(feature = "enterprise")]
|
||||
pub struct Components {
|
||||
pub plugins: Plugins,
|
||||
pub kv_backend: KvBackendRef,
|
||||
pub frontend_client: Arc<FrontendClient>,
|
||||
pub catalog_manager: catalog::CatalogManagerRef,
|
||||
}
|
||||
|
||||
impl Instance {
|
||||
/// Find the socket addr of a server by its `name`.
|
||||
pub fn server_addr(&self, name: &str) -> Option<SocketAddr> {
|
||||
self.frontend.server_handlers().addr(name)
|
||||
}
|
||||
|
||||
#[cfg(feature = "enterprise")]
|
||||
pub fn components(&self) -> &Components {
|
||||
&self.components
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -550,13 +565,14 @@ impl StartCommand {
|
||||
// actually make a connection
|
||||
let (frontend_client, frontend_instance_handler) =
|
||||
FrontendClient::from_empty_grpc_handler();
|
||||
let frontend_client = Arc::new(frontend_client);
|
||||
let flow_builder = FlownodeBuilder::new(
|
||||
flownode_options,
|
||||
plugins.clone(),
|
||||
table_metadata_manager.clone(),
|
||||
catalog_manager.clone(),
|
||||
flow_metadata_manager.clone(),
|
||||
Arc::new(frontend_client.clone()),
|
||||
frontend_client.clone(),
|
||||
);
|
||||
let flownode = flow_builder
|
||||
.build()
|
||||
@@ -594,28 +610,36 @@ impl StartCommand {
|
||||
.await
|
||||
.context(error::BuildWalOptionsAllocatorSnafu)?;
|
||||
let wal_options_allocator = Arc::new(wal_options_allocator);
|
||||
let table_meta_allocator = Arc::new(TableMetadataAllocator::new(
|
||||
let table_metadata_allocator = Arc::new(TableMetadataAllocator::new(
|
||||
table_id_sequence,
|
||||
wal_options_allocator.clone(),
|
||||
));
|
||||
let flow_meta_allocator = Arc::new(FlowMetadataAllocator::with_noop_peer_allocator(
|
||||
let flow_metadata_allocator = Arc::new(FlowMetadataAllocator::with_noop_peer_allocator(
|
||||
flow_id_sequence,
|
||||
));
|
||||
|
||||
let ddl_context = DdlContext {
|
||||
node_manager: node_manager.clone(),
|
||||
cache_invalidator: layered_cache_registry.clone(),
|
||||
memory_region_keeper: Arc::new(MemoryRegionKeeper::default()),
|
||||
leader_region_registry: Arc::new(LeaderRegionRegistry::default()),
|
||||
table_metadata_manager: table_metadata_manager.clone(),
|
||||
table_metadata_allocator: table_metadata_allocator.clone(),
|
||||
flow_metadata_manager: flow_metadata_manager.clone(),
|
||||
flow_metadata_allocator: flow_metadata_allocator.clone(),
|
||||
region_failure_detector_controller: Arc::new(NoopRegionFailureDetectorControl),
|
||||
};
|
||||
let procedure_manager_c = procedure_manager.clone();
|
||||
|
||||
let ddl_manager = DdlManager::try_new(ddl_context, procedure_manager_c, true)
|
||||
.context(error::InitDdlManagerSnafu)?;
|
||||
#[cfg(feature = "enterprise")]
|
||||
let trigger_ddl_manager: Option<TriggerDdlManagerRef> = plugins.get();
|
||||
let ddl_task_executor = Self::create_ddl_task_executor(
|
||||
procedure_manager.clone(),
|
||||
node_manager.clone(),
|
||||
layered_cache_registry.clone(),
|
||||
table_metadata_manager,
|
||||
table_meta_allocator,
|
||||
flow_metadata_manager,
|
||||
flow_meta_allocator,
|
||||
#[cfg(feature = "enterprise")]
|
||||
trigger_ddl_manager,
|
||||
)
|
||||
.await?;
|
||||
let ddl_manager = {
|
||||
let trigger_ddl_manager: Option<common_meta::ddl_manager::TriggerDdlManagerRef> =
|
||||
plugins.get();
|
||||
ddl_manager.with_trigger_ddl_manager(trigger_ddl_manager)
|
||||
};
|
||||
let ddl_task_executor: ProcedureExecutorRef = Arc::new(ddl_manager);
|
||||
|
||||
let fe_instance = FrontendBuilder::new(
|
||||
fe_opts.clone(),
|
||||
@@ -658,7 +682,7 @@ impl StartCommand {
|
||||
let export_metrics_task = ExportMetricsTask::try_new(&opts.export_metrics, Some(&plugins))
|
||||
.context(error::ServersSnafu)?;
|
||||
|
||||
let servers = Services::new(opts, fe_instance.clone(), plugins)
|
||||
let servers = Services::new(opts, fe_instance.clone(), plugins.clone())
|
||||
.build()
|
||||
.context(error::StartFrontendSnafu)?;
|
||||
|
||||
@@ -669,51 +693,26 @@ impl StartCommand {
|
||||
export_metrics_task,
|
||||
};
|
||||
|
||||
#[cfg(feature = "enterprise")]
|
||||
let components = Components {
|
||||
plugins,
|
||||
kv_backend,
|
||||
frontend_client,
|
||||
catalog_manager,
|
||||
};
|
||||
|
||||
Ok(Instance {
|
||||
datanode,
|
||||
frontend,
|
||||
flownode,
|
||||
procedure_manager,
|
||||
wal_options_allocator,
|
||||
#[cfg(feature = "enterprise")]
|
||||
components,
|
||||
_guard: guard,
|
||||
})
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub async fn create_ddl_task_executor(
|
||||
procedure_manager: ProcedureManagerRef,
|
||||
node_manager: NodeManagerRef,
|
||||
cache_invalidator: CacheInvalidatorRef,
|
||||
table_metadata_manager: TableMetadataManagerRef,
|
||||
table_metadata_allocator: TableMetadataAllocatorRef,
|
||||
flow_metadata_manager: FlowMetadataManagerRef,
|
||||
flow_metadata_allocator: FlowMetadataAllocatorRef,
|
||||
#[cfg(feature = "enterprise")] trigger_ddl_manager: Option<TriggerDdlManagerRef>,
|
||||
) -> Result<ProcedureExecutorRef> {
|
||||
let procedure_executor: ProcedureExecutorRef = Arc::new(
|
||||
DdlManager::try_new(
|
||||
DdlContext {
|
||||
node_manager,
|
||||
cache_invalidator,
|
||||
memory_region_keeper: Arc::new(MemoryRegionKeeper::default()),
|
||||
leader_region_registry: Arc::new(LeaderRegionRegistry::default()),
|
||||
table_metadata_manager,
|
||||
table_metadata_allocator,
|
||||
flow_metadata_manager,
|
||||
flow_metadata_allocator,
|
||||
region_failure_detector_controller: Arc::new(NoopRegionFailureDetectorControl),
|
||||
},
|
||||
procedure_manager,
|
||||
true,
|
||||
#[cfg(feature = "enterprise")]
|
||||
trigger_ddl_manager,
|
||||
)
|
||||
.context(error::InitDdlManagerSnafu)?,
|
||||
);
|
||||
|
||||
Ok(procedure_executor)
|
||||
}
|
||||
|
||||
pub async fn create_table_metadata_manager(
|
||||
kv_backend: KvBackendRef,
|
||||
) -> Result<TableMetadataManagerRef> {
|
||||
@@ -849,7 +848,7 @@ mod tests {
|
||||
use common_config::ENV_VAR_SEP;
|
||||
use common_test_util::temp_dir::create_named_temp_file;
|
||||
use common_wal::config::DatanodeWalConfig;
|
||||
use datanode::config::{FileConfig, GcsConfig};
|
||||
use object_store::config::{FileConfig, GcsConfig};
|
||||
|
||||
use super::*;
|
||||
use crate::options::GlobalOptions;
|
||||
@@ -968,15 +967,15 @@ mod tests {
|
||||
|
||||
assert!(matches!(
|
||||
&dn_opts.storage.store,
|
||||
datanode::config::ObjectStoreConfig::File(FileConfig { .. })
|
||||
object_store::config::ObjectStoreConfig::File(FileConfig { .. })
|
||||
));
|
||||
assert_eq!(dn_opts.storage.providers.len(), 2);
|
||||
assert!(matches!(
|
||||
dn_opts.storage.providers[0],
|
||||
datanode::config::ObjectStoreConfig::Gcs(GcsConfig { .. })
|
||||
object_store::config::ObjectStoreConfig::Gcs(GcsConfig { .. })
|
||||
));
|
||||
match &dn_opts.storage.providers[1] {
|
||||
datanode::config::ObjectStoreConfig::S3(s3_config) => {
|
||||
object_store::config::ObjectStoreConfig::S3(s3_config) => {
|
||||
assert_eq!(
|
||||
"SecretBox<alloc::string::String>([REDACTED])".to_string(),
|
||||
format!("{:?}", s3_config.access_key_id)
|
||||
|
||||
@@ -18,7 +18,7 @@ use cmd::options::GreptimeOptions;
|
||||
use cmd::standalone::StandaloneOptions;
|
||||
use common_config::{Configurable, DEFAULT_DATA_HOME};
|
||||
use common_options::datanode::{ClientOptions, DatanodeClientOptions};
|
||||
use common_telemetry::logging::{LoggingOptions, DEFAULT_LOGGING_DIR, DEFAULT_OTLP_ENDPOINT};
|
||||
use common_telemetry::logging::{LoggingOptions, DEFAULT_LOGGING_DIR, DEFAULT_OTLP_HTTP_ENDPOINT};
|
||||
use common_wal::config::raft_engine::RaftEngineConfig;
|
||||
use common_wal::config::DatanodeWalConfig;
|
||||
use datanode::config::{DatanodeOptions, RegionEngineConfig, StorageConfig};
|
||||
@@ -81,7 +81,7 @@ fn test_load_datanode_example_config() {
|
||||
logging: LoggingOptions {
|
||||
level: Some("info".to_string()),
|
||||
dir: format!("{}/{}", DEFAULT_DATA_HOME, DEFAULT_LOGGING_DIR),
|
||||
otlp_endpoint: Some(DEFAULT_OTLP_ENDPOINT.to_string()),
|
||||
otlp_endpoint: Some(DEFAULT_OTLP_HTTP_ENDPOINT.to_string()),
|
||||
tracing_sample_ratio: Some(Default::default()),
|
||||
..Default::default()
|
||||
},
|
||||
@@ -124,7 +124,7 @@ fn test_load_frontend_example_config() {
|
||||
logging: LoggingOptions {
|
||||
level: Some("info".to_string()),
|
||||
dir: format!("{}/{}", DEFAULT_DATA_HOME, DEFAULT_LOGGING_DIR),
|
||||
otlp_endpoint: Some(DEFAULT_OTLP_ENDPOINT.to_string()),
|
||||
otlp_endpoint: Some(DEFAULT_OTLP_HTTP_ENDPOINT.to_string()),
|
||||
tracing_sample_ratio: Some(Default::default()),
|
||||
..Default::default()
|
||||
},
|
||||
@@ -172,7 +172,7 @@ fn test_load_metasrv_example_config() {
|
||||
logging: LoggingOptions {
|
||||
dir: format!("{}/{}", DEFAULT_DATA_HOME, DEFAULT_LOGGING_DIR),
|
||||
level: Some("info".to_string()),
|
||||
otlp_endpoint: Some(DEFAULT_OTLP_ENDPOINT.to_string()),
|
||||
otlp_endpoint: Some(DEFAULT_OTLP_HTTP_ENDPOINT.to_string()),
|
||||
tracing_sample_ratio: Some(Default::default()),
|
||||
..Default::default()
|
||||
},
|
||||
@@ -229,7 +229,7 @@ fn test_load_standalone_example_config() {
|
||||
logging: LoggingOptions {
|
||||
level: Some("info".to_string()),
|
||||
dir: format!("{}/{}", DEFAULT_DATA_HOME, DEFAULT_LOGGING_DIR),
|
||||
otlp_endpoint: Some(DEFAULT_OTLP_ENDPOINT.to_string()),
|
||||
otlp_endpoint: Some(DEFAULT_OTLP_HTTP_ENDPOINT.to_string()),
|
||||
tracing_sample_ratio: Some(Default::default()),
|
||||
..Default::default()
|
||||
},
|
||||
|
||||
@@ -14,6 +14,7 @@ common-macro.workspace = true
|
||||
config.workspace = true
|
||||
humantime-serde.workspace = true
|
||||
num_cpus.workspace = true
|
||||
object-store.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
serde_with.workspace = true
|
||||
|
||||
@@ -106,7 +106,7 @@ mod tests {
|
||||
use common_telemetry::logging::LoggingOptions;
|
||||
use common_test_util::temp_dir::create_named_temp_file;
|
||||
use common_wal::config::DatanodeWalConfig;
|
||||
use datanode::config::{ObjectStoreConfig, StorageConfig};
|
||||
use datanode::config::StorageConfig;
|
||||
use meta_client::MetaClientOptions;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
@@ -212,7 +212,7 @@ mod tests {
|
||||
|
||||
// Check the configs from environment variables.
|
||||
match &opts.storage.store {
|
||||
ObjectStoreConfig::S3(s3_config) => {
|
||||
object_store::config::ObjectStoreConfig::S3(s3_config) => {
|
||||
assert_eq!(s3_config.bucket, "mybucket".to_string());
|
||||
}
|
||||
_ => panic!("unexpected store type"),
|
||||
|
||||
@@ -21,6 +21,7 @@ pub mod error;
|
||||
pub mod file_format;
|
||||
pub mod lister;
|
||||
pub mod object_store;
|
||||
pub mod parquet_writer;
|
||||
pub mod share_buffer;
|
||||
#[cfg(test)]
|
||||
pub mod test_util;
|
||||
|
||||
52
src/common/datasource/src/parquet_writer.rs
Normal file
52
src/common/datasource/src/parquet_writer.rs
Normal file
@@ -0,0 +1,52 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use bytes::Bytes;
|
||||
use futures::future::BoxFuture;
|
||||
use object_store::Writer;
|
||||
use parquet::arrow::async_writer::AsyncFileWriter;
|
||||
use parquet::errors::ParquetError;
|
||||
|
||||
/// Bridges opendal [Writer] with parquet [AsyncFileWriter].
|
||||
pub struct AsyncWriter {
|
||||
inner: Writer,
|
||||
}
|
||||
|
||||
impl AsyncWriter {
|
||||
/// Create a [`AsyncWriter`] by given [`Writer`].
|
||||
pub fn new(writer: Writer) -> Self {
|
||||
Self { inner: writer }
|
||||
}
|
||||
}
|
||||
|
||||
impl AsyncFileWriter for AsyncWriter {
|
||||
fn write(&mut self, bs: Bytes) -> BoxFuture<'_, parquet::errors::Result<()>> {
|
||||
Box::pin(async move {
|
||||
self.inner
|
||||
.write(bs)
|
||||
.await
|
||||
.map_err(|err| ParquetError::External(Box::new(err)))
|
||||
})
|
||||
}
|
||||
|
||||
fn complete(&mut self) -> BoxFuture<'_, parquet::errors::Result<()>> {
|
||||
Box::pin(async move {
|
||||
self.inner
|
||||
.close()
|
||||
.await
|
||||
.map(|_| ())
|
||||
.map_err(|err| ParquetError::External(Box::new(err)))
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -33,6 +33,7 @@ common-version.workspace = true
|
||||
datafusion.workspace = true
|
||||
datafusion-common.workspace = true
|
||||
datafusion-expr.workspace = true
|
||||
datafusion-functions-aggregate-common.workspace = true
|
||||
datatypes.workspace = true
|
||||
derive_more = { version = "1", default-features = false, features = ["display"] }
|
||||
geo = { version = "0.29", optional = true }
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
pub mod approximate;
|
||||
pub mod count_hash;
|
||||
#[cfg(feature = "geo")]
|
||||
pub mod geo;
|
||||
pub mod vector;
|
||||
|
||||
647
src/common/function/src/aggrs/count_hash.rs
Normal file
647
src/common/function/src/aggrs/count_hash.rs
Normal file
@@ -0,0 +1,647 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! `CountHash` / `count_hash` is a hash-based approximate distinct count function.
|
||||
//!
|
||||
//! It is a variant of `CountDistinct` that uses a hash function to approximate the
|
||||
//! distinct count.
|
||||
//! It is designed to be more efficient than `CountDistinct` for large datasets,
|
||||
//! but it is not as accurate, as the hash value may be collision.
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::fmt::Debug;
|
||||
use std::sync::Arc;
|
||||
|
||||
use ahash::RandomState;
|
||||
use datafusion_common::cast::as_list_array;
|
||||
use datafusion_common::error::Result;
|
||||
use datafusion_common::hash_utils::create_hashes;
|
||||
use datafusion_common::utils::SingleRowListArrayBuilder;
|
||||
use datafusion_common::{internal_err, not_impl_err, ScalarValue};
|
||||
use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
|
||||
use datafusion_expr::utils::{format_state_name, AggregateOrderSensitivity};
|
||||
use datafusion_expr::{
|
||||
Accumulator, AggregateUDF, AggregateUDFImpl, EmitTo, GroupsAccumulator, ReversedUDAF,
|
||||
SetMonotonicity, Signature, TypeSignature, Volatility,
|
||||
};
|
||||
use datafusion_functions_aggregate_common::aggregate::groups_accumulator::nulls::filtered_null_mask;
|
||||
use datatypes::arrow;
|
||||
use datatypes::arrow::array::{
|
||||
Array, ArrayRef, AsArray, BooleanArray, Int64Array, ListArray, UInt64Array,
|
||||
};
|
||||
use datatypes::arrow::buffer::{OffsetBuffer, ScalarBuffer};
|
||||
use datatypes::arrow::datatypes::{DataType, Field};
|
||||
|
||||
use crate::function_registry::FunctionRegistry;
|
||||
|
||||
type HashValueType = u64;
|
||||
|
||||
// read from /dev/urandom 4047821dc6144e4b2abddf23ad4171126a52eeecd26eff2191cf673b965a7875
|
||||
const RANDOM_SEED_0: u64 = 0x4047821dc6144e4b;
|
||||
const RANDOM_SEED_1: u64 = 0x2abddf23ad417112;
|
||||
const RANDOM_SEED_2: u64 = 0x6a52eeecd26eff21;
|
||||
const RANDOM_SEED_3: u64 = 0x91cf673b965a7875;
|
||||
|
||||
impl CountHash {
|
||||
pub fn register(registry: &FunctionRegistry) {
|
||||
registry.register_aggr(CountHash::udf_impl());
|
||||
}
|
||||
|
||||
pub fn udf_impl() -> AggregateUDF {
|
||||
AggregateUDF::new_from_impl(CountHash {
|
||||
signature: Signature::one_of(
|
||||
vec![TypeSignature::VariadicAny, TypeSignature::Nullary],
|
||||
Volatility::Immutable,
|
||||
),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CountHash {
|
||||
signature: Signature,
|
||||
}
|
||||
|
||||
impl AggregateUDFImpl for CountHash {
|
||||
fn as_any(&self) -> &dyn std::any::Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"count_hash"
|
||||
}
|
||||
|
||||
fn signature(&self) -> &Signature {
|
||||
&self.signature
|
||||
}
|
||||
|
||||
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
|
||||
Ok(DataType::Int64)
|
||||
}
|
||||
|
||||
fn is_nullable(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<Field>> {
|
||||
Ok(vec![Field::new_list(
|
||||
format_state_name(args.name, "count_hash"),
|
||||
Field::new_list_field(DataType::UInt64, true),
|
||||
// For count_hash accumulator, null list item stands for an
|
||||
// empty value set (i.e., all NULL value so far for that group).
|
||||
true,
|
||||
)])
|
||||
}
|
||||
|
||||
fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
|
||||
if acc_args.exprs.len() > 1 {
|
||||
return not_impl_err!("count_hash with multiple arguments");
|
||||
}
|
||||
|
||||
Ok(Box::new(CountHashAccumulator {
|
||||
values: HashSet::default(),
|
||||
random_state: RandomState::with_seeds(
|
||||
RANDOM_SEED_0,
|
||||
RANDOM_SEED_1,
|
||||
RANDOM_SEED_2,
|
||||
RANDOM_SEED_3,
|
||||
),
|
||||
batch_hashes: vec![],
|
||||
}))
|
||||
}
|
||||
|
||||
fn aliases(&self) -> &[String] {
|
||||
&[]
|
||||
}
|
||||
|
||||
fn groups_accumulator_supported(&self, _args: AccumulatorArgs) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn create_groups_accumulator(
|
||||
&self,
|
||||
args: AccumulatorArgs,
|
||||
) -> Result<Box<dyn GroupsAccumulator>> {
|
||||
if args.exprs.len() > 1 {
|
||||
return not_impl_err!("count_hash with multiple arguments");
|
||||
}
|
||||
|
||||
Ok(Box::new(CountHashGroupAccumulator::new()))
|
||||
}
|
||||
|
||||
fn reverse_expr(&self) -> ReversedUDAF {
|
||||
ReversedUDAF::Identical
|
||||
}
|
||||
|
||||
fn order_sensitivity(&self) -> AggregateOrderSensitivity {
|
||||
AggregateOrderSensitivity::Insensitive
|
||||
}
|
||||
|
||||
fn default_value(&self, _data_type: &DataType) -> Result<ScalarValue> {
|
||||
Ok(ScalarValue::Int64(Some(0)))
|
||||
}
|
||||
|
||||
fn set_monotonicity(&self, _data_type: &DataType) -> SetMonotonicity {
|
||||
SetMonotonicity::Increasing
|
||||
}
|
||||
}
|
||||
|
||||
/// GroupsAccumulator for `count_hash` aggregate function
|
||||
#[derive(Debug)]
|
||||
pub struct CountHashGroupAccumulator {
|
||||
/// One HashSet per group to track distinct values
|
||||
distinct_sets: Vec<HashSet<HashValueType, RandomState>>,
|
||||
random_state: RandomState,
|
||||
batch_hashes: Vec<HashValueType>,
|
||||
}
|
||||
|
||||
impl Default for CountHashGroupAccumulator {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl CountHashGroupAccumulator {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
distinct_sets: vec![],
|
||||
random_state: RandomState::with_seeds(
|
||||
RANDOM_SEED_0,
|
||||
RANDOM_SEED_1,
|
||||
RANDOM_SEED_2,
|
||||
RANDOM_SEED_3,
|
||||
),
|
||||
batch_hashes: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
fn ensure_sets(&mut self, total_num_groups: usize) {
|
||||
if self.distinct_sets.len() < total_num_groups {
|
||||
self.distinct_sets
|
||||
.resize_with(total_num_groups, HashSet::default);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl GroupsAccumulator for CountHashGroupAccumulator {
|
||||
fn update_batch(
|
||||
&mut self,
|
||||
values: &[ArrayRef],
|
||||
group_indices: &[usize],
|
||||
opt_filter: Option<&BooleanArray>,
|
||||
total_num_groups: usize,
|
||||
) -> Result<()> {
|
||||
assert_eq!(values.len(), 1, "count_hash expects a single argument");
|
||||
self.ensure_sets(total_num_groups);
|
||||
|
||||
let array = &values[0];
|
||||
self.batch_hashes.clear();
|
||||
self.batch_hashes.resize(array.len(), 0);
|
||||
let hashes = create_hashes(
|
||||
&[ArrayRef::clone(array)],
|
||||
&self.random_state,
|
||||
&mut self.batch_hashes,
|
||||
)?;
|
||||
|
||||
// Use a pattern similar to accumulate_indices to process rows
|
||||
// that are not null and pass the filter
|
||||
let nulls = array.logical_nulls();
|
||||
|
||||
match (nulls.as_ref(), opt_filter) {
|
||||
(None, None) => {
|
||||
// No nulls, no filter - process all rows
|
||||
for (row_idx, &group_idx) in group_indices.iter().enumerate() {
|
||||
self.distinct_sets[group_idx].insert(hashes[row_idx]);
|
||||
}
|
||||
}
|
||||
(Some(nulls), None) => {
|
||||
// Has nulls, no filter
|
||||
for (row_idx, (&group_idx, is_valid)) in
|
||||
group_indices.iter().zip(nulls.iter()).enumerate()
|
||||
{
|
||||
if is_valid {
|
||||
self.distinct_sets[group_idx].insert(hashes[row_idx]);
|
||||
}
|
||||
}
|
||||
}
|
||||
(None, Some(filter)) => {
|
||||
// No nulls, has filter
|
||||
for (row_idx, (&group_idx, filter_value)) in
|
||||
group_indices.iter().zip(filter.iter()).enumerate()
|
||||
{
|
||||
if let Some(true) = filter_value {
|
||||
self.distinct_sets[group_idx].insert(hashes[row_idx]);
|
||||
}
|
||||
}
|
||||
}
|
||||
(Some(nulls), Some(filter)) => {
|
||||
// Has nulls and filter
|
||||
let iter = filter
|
||||
.iter()
|
||||
.zip(group_indices.iter())
|
||||
.zip(nulls.iter())
|
||||
.enumerate();
|
||||
|
||||
for (row_idx, ((filter_value, &group_idx), is_valid)) in iter {
|
||||
if is_valid && filter_value == Some(true) {
|
||||
self.distinct_sets[group_idx].insert(hashes[row_idx]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate(&mut self, emit_to: EmitTo) -> Result<ArrayRef> {
|
||||
let distinct_sets: Vec<HashSet<u64, RandomState>> =
|
||||
emit_to.take_needed(&mut self.distinct_sets);
|
||||
|
||||
let counts = distinct_sets
|
||||
.iter()
|
||||
.map(|set| set.len() as i64)
|
||||
.collect::<Vec<_>>();
|
||||
Ok(Arc::new(Int64Array::from(counts)))
|
||||
}
|
||||
|
||||
fn merge_batch(
|
||||
&mut self,
|
||||
values: &[ArrayRef],
|
||||
group_indices: &[usize],
|
||||
_opt_filter: Option<&BooleanArray>,
|
||||
total_num_groups: usize,
|
||||
) -> Result<()> {
|
||||
assert_eq!(
|
||||
values.len(),
|
||||
1,
|
||||
"count_hash merge expects a single state array"
|
||||
);
|
||||
self.ensure_sets(total_num_groups);
|
||||
|
||||
let list_array = as_list_array(&values[0])?;
|
||||
|
||||
// For each group in the incoming batch
|
||||
for (i, &group_idx) in group_indices.iter().enumerate() {
|
||||
if i < list_array.len() {
|
||||
let inner_array = list_array.value(i);
|
||||
let inner_array = inner_array.as_any().downcast_ref::<UInt64Array>().unwrap();
|
||||
// Add each value to our set for this group
|
||||
for j in 0..inner_array.len() {
|
||||
if !inner_array.is_null(j) {
|
||||
self.distinct_sets[group_idx].insert(inner_array.value(j));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn state(&mut self, emit_to: EmitTo) -> Result<Vec<ArrayRef>> {
|
||||
let distinct_sets: Vec<HashSet<u64, RandomState>> =
|
||||
emit_to.take_needed(&mut self.distinct_sets);
|
||||
|
||||
let mut offsets = Vec::with_capacity(distinct_sets.len() + 1);
|
||||
offsets.push(0);
|
||||
let mut curr_len = 0i32;
|
||||
|
||||
let mut value_iter = distinct_sets
|
||||
.into_iter()
|
||||
.flat_map(|set| {
|
||||
// build offset
|
||||
curr_len += set.len() as i32;
|
||||
offsets.push(curr_len);
|
||||
// convert into iter
|
||||
set.into_iter()
|
||||
})
|
||||
.peekable();
|
||||
let data_array: ArrayRef = if value_iter.peek().is_none() {
|
||||
arrow::array::new_empty_array(&DataType::UInt64) as _
|
||||
} else {
|
||||
Arc::new(UInt64Array::from_iter_values(value_iter))
|
||||
};
|
||||
let offset_buffer = OffsetBuffer::new(ScalarBuffer::from(offsets));
|
||||
|
||||
let list_array = ListArray::new(
|
||||
Arc::new(Field::new_list_field(DataType::UInt64, true)),
|
||||
offset_buffer,
|
||||
data_array,
|
||||
None,
|
||||
);
|
||||
|
||||
Ok(vec![Arc::new(list_array) as _])
|
||||
}
|
||||
|
||||
fn convert_to_state(
|
||||
&self,
|
||||
values: &[ArrayRef],
|
||||
opt_filter: Option<&BooleanArray>,
|
||||
) -> Result<Vec<ArrayRef>> {
|
||||
// For a single hash value per row, create a list array with that value
|
||||
assert_eq!(values.len(), 1, "count_hash expects a single argument");
|
||||
let values = ArrayRef::clone(&values[0]);
|
||||
|
||||
let offsets = OffsetBuffer::new(ScalarBuffer::from_iter(0..values.len() as i32 + 1));
|
||||
let nulls = filtered_null_mask(opt_filter, &values);
|
||||
let list_array = ListArray::new(
|
||||
Arc::new(Field::new_list_field(DataType::UInt64, true)),
|
||||
offsets,
|
||||
values,
|
||||
nulls,
|
||||
);
|
||||
|
||||
Ok(vec![Arc::new(list_array)])
|
||||
}
|
||||
|
||||
fn supports_convert_to_state(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn size(&self) -> usize {
|
||||
// Base size of the struct
|
||||
let mut size = size_of::<Self>();
|
||||
|
||||
// Size of the vector holding the HashSets
|
||||
size += size_of::<Vec<HashSet<HashValueType, RandomState>>>()
|
||||
+ self.distinct_sets.capacity() * size_of::<HashSet<HashValueType, RandomState>>();
|
||||
|
||||
// Estimate HashSet contents size more efficiently
|
||||
// Instead of iterating through all values which is expensive, use an approximation
|
||||
for set in &self.distinct_sets {
|
||||
// Base size of the HashSet
|
||||
size += set.capacity() * size_of::<HashValueType>();
|
||||
}
|
||||
|
||||
size
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct CountHashAccumulator {
|
||||
values: HashSet<HashValueType, RandomState>,
|
||||
random_state: RandomState,
|
||||
batch_hashes: Vec<HashValueType>,
|
||||
}
|
||||
|
||||
impl CountHashAccumulator {
|
||||
// calculating the size for fixed length values, taking first batch size *
|
||||
// number of batches.
|
||||
fn fixed_size(&self) -> usize {
|
||||
size_of_val(self) + (size_of::<HashValueType>() * self.values.capacity())
|
||||
}
|
||||
}
|
||||
|
||||
impl Accumulator for CountHashAccumulator {
|
||||
/// Returns the distinct values seen so far as (one element) ListArray.
|
||||
fn state(&mut self) -> Result<Vec<ScalarValue>> {
|
||||
let values = self.values.iter().cloned().collect::<Vec<_>>();
|
||||
let arr = Arc::new(UInt64Array::from(values)) as _;
|
||||
let list_scalar = SingleRowListArrayBuilder::new(arr).build_list_scalar();
|
||||
Ok(vec![list_scalar])
|
||||
}
|
||||
|
||||
fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
|
||||
if values.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let arr = &values[0];
|
||||
if arr.data_type() == &DataType::Null {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
self.batch_hashes.clear();
|
||||
self.batch_hashes.resize(arr.len(), 0);
|
||||
let hashes = create_hashes(
|
||||
&[ArrayRef::clone(arr)],
|
||||
&self.random_state,
|
||||
&mut self.batch_hashes,
|
||||
)?;
|
||||
for hash in hashes.as_slice() {
|
||||
self.values.insert(*hash);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Merges multiple sets of distinct values into the current set.
|
||||
///
|
||||
/// The input to this function is a `ListArray` with **multiple** rows,
|
||||
/// where each row contains the values from a partial aggregate's phase (e.g.
|
||||
/// the result of calling `Self::state` on multiple accumulators).
|
||||
fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
|
||||
if states.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
assert_eq!(states.len(), 1, "array_agg states must be singleton!");
|
||||
let array = &states[0];
|
||||
let list_array = array.as_list::<i32>();
|
||||
for inner_array in list_array.iter() {
|
||||
let Some(inner_array) = inner_array else {
|
||||
return internal_err!(
|
||||
"Intermediate results of count_hash should always be non null"
|
||||
);
|
||||
};
|
||||
let hash_array = inner_array.as_any().downcast_ref::<UInt64Array>().unwrap();
|
||||
for i in 0..hash_array.len() {
|
||||
self.values.insert(hash_array.value(i));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate(&mut self) -> Result<ScalarValue> {
|
||||
Ok(ScalarValue::Int64(Some(self.values.len() as i64)))
|
||||
}
|
||||
|
||||
fn size(&self) -> usize {
|
||||
self.fixed_size()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use datatypes::arrow::array::{Array, BooleanArray, Int32Array, Int64Array};
|
||||
|
||||
use super::*;
|
||||
|
||||
fn create_test_accumulator() -> CountHashAccumulator {
|
||||
CountHashAccumulator {
|
||||
values: HashSet::default(),
|
||||
random_state: RandomState::with_seeds(
|
||||
RANDOM_SEED_0,
|
||||
RANDOM_SEED_1,
|
||||
RANDOM_SEED_2,
|
||||
RANDOM_SEED_3,
|
||||
),
|
||||
batch_hashes: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_count_hash_accumulator() -> Result<()> {
|
||||
let mut acc = create_test_accumulator();
|
||||
|
||||
// Test with some data
|
||||
let array = Arc::new(Int32Array::from(vec![
|
||||
Some(1),
|
||||
Some(2),
|
||||
Some(3),
|
||||
Some(1),
|
||||
Some(2),
|
||||
None,
|
||||
])) as ArrayRef;
|
||||
acc.update_batch(&[array])?;
|
||||
let result = acc.evaluate()?;
|
||||
assert_eq!(result, ScalarValue::Int64(Some(4)));
|
||||
|
||||
// Test with empty data
|
||||
let mut acc = create_test_accumulator();
|
||||
let array = Arc::new(Int32Array::from(vec![] as Vec<Option<i32>>)) as ArrayRef;
|
||||
acc.update_batch(&[array])?;
|
||||
let result = acc.evaluate()?;
|
||||
assert_eq!(result, ScalarValue::Int64(Some(0)));
|
||||
|
||||
// Test with only nulls
|
||||
let mut acc = create_test_accumulator();
|
||||
let array = Arc::new(Int32Array::from(vec![None, None, None])) as ArrayRef;
|
||||
acc.update_batch(&[array])?;
|
||||
let result = acc.evaluate()?;
|
||||
assert_eq!(result, ScalarValue::Int64(Some(1)));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_count_hash_accumulator_merge() -> Result<()> {
|
||||
// Accumulator 1
|
||||
let mut acc1 = create_test_accumulator();
|
||||
let array1 = Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3)])) as ArrayRef;
|
||||
acc1.update_batch(&[array1])?;
|
||||
let state1 = acc1.state()?;
|
||||
|
||||
// Accumulator 2
|
||||
let mut acc2 = create_test_accumulator();
|
||||
let array2 = Arc::new(Int32Array::from(vec![Some(3), Some(4), Some(5)])) as ArrayRef;
|
||||
acc2.update_batch(&[array2])?;
|
||||
let state2 = acc2.state()?;
|
||||
|
||||
// Merge state1 and state2 into a new accumulator
|
||||
let mut acc_merged = create_test_accumulator();
|
||||
let state_array1 = state1[0].to_array()?;
|
||||
let state_array2 = state2[0].to_array()?;
|
||||
|
||||
acc_merged.merge_batch(&[state_array1])?;
|
||||
acc_merged.merge_batch(&[state_array2])?;
|
||||
|
||||
let result = acc_merged.evaluate()?;
|
||||
// Distinct values are {1, 2, 3, 4, 5}, so count is 5
|
||||
assert_eq!(result, ScalarValue::Int64(Some(5)));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn create_test_group_accumulator() -> CountHashGroupAccumulator {
|
||||
CountHashGroupAccumulator::new()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_count_hash_group_accumulator() -> Result<()> {
|
||||
let mut acc = create_test_group_accumulator();
|
||||
let values = Arc::new(Int32Array::from(vec![1, 2, 1, 3, 2, 4, 5])) as ArrayRef;
|
||||
let group_indices = vec![0, 1, 0, 0, 1, 2, 0];
|
||||
let total_num_groups = 3;
|
||||
|
||||
acc.update_batch(&[values], &group_indices, None, total_num_groups)?;
|
||||
|
||||
let result_array = acc.evaluate(EmitTo::All)?;
|
||||
let result = result_array.as_any().downcast_ref::<Int64Array>().unwrap();
|
||||
|
||||
// Group 0: {1, 3, 5} -> 3
|
||||
// Group 1: {2} -> 1
|
||||
// Group 2: {4} -> 1
|
||||
assert_eq!(result.value(0), 3);
|
||||
assert_eq!(result.value(1), 1);
|
||||
assert_eq!(result.value(2), 1);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_count_hash_group_accumulator_with_filter() -> Result<()> {
|
||||
let mut acc = create_test_group_accumulator();
|
||||
let values = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6])) as ArrayRef;
|
||||
let group_indices = vec![0, 0, 1, 1, 2, 2];
|
||||
let filter = BooleanArray::from(vec![true, false, true, true, false, true]);
|
||||
let total_num_groups = 3;
|
||||
|
||||
acc.update_batch(&[values], &group_indices, Some(&filter), total_num_groups)?;
|
||||
|
||||
let result_array = acc.evaluate(EmitTo::All)?;
|
||||
let result = result_array.as_any().downcast_ref::<Int64Array>().unwrap();
|
||||
|
||||
// Group 0: {1} (2 is filtered out) -> 1
|
||||
// Group 1: {3, 4} -> 2
|
||||
// Group 2: {6} (5 is filtered out) -> 1
|
||||
assert_eq!(result.value(0), 1);
|
||||
assert_eq!(result.value(1), 2);
|
||||
assert_eq!(result.value(2), 1);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_count_hash_group_accumulator_merge() -> Result<()> {
|
||||
// Accumulator 1
|
||||
let mut acc1 = create_test_group_accumulator();
|
||||
let values1 = Arc::new(Int32Array::from(vec![1, 2, 3, 4])) as ArrayRef;
|
||||
let group_indices1 = vec![0, 0, 1, 1];
|
||||
acc1.update_batch(&[values1], &group_indices1, None, 2)?;
|
||||
// acc1 state: group 0 -> {1, 2}, group 1 -> {3, 4}
|
||||
let state1 = acc1.state(EmitTo::All)?;
|
||||
|
||||
// Accumulator 2
|
||||
let mut acc2 = create_test_group_accumulator();
|
||||
let values2 = Arc::new(Int32Array::from(vec![5, 6, 1, 3])) as ArrayRef;
|
||||
// Merge into different group indices
|
||||
let group_indices2 = vec![2, 2, 0, 1];
|
||||
acc2.update_batch(&[values2], &group_indices2, None, 3)?;
|
||||
// acc2 state: group 0 -> {1}, group 1 -> {3}, group 2 -> {5, 6}
|
||||
|
||||
// Merge state from acc1 into acc2
|
||||
// We will merge acc1's group 0 into acc2's group 0
|
||||
// and acc1's group 1 into acc2's group 2
|
||||
let merge_group_indices = vec![0, 2];
|
||||
acc2.merge_batch(&state1, &merge_group_indices, None, 3)?;
|
||||
|
||||
let result_array = acc2.evaluate(EmitTo::All)?;
|
||||
let result = result_array.as_any().downcast_ref::<Int64Array>().unwrap();
|
||||
|
||||
// Final state of acc2:
|
||||
// Group 0: {1} U {1, 2} -> {1, 2}, count = 2
|
||||
// Group 1: {3}, count = 1
|
||||
// Group 2: {5, 6} U {3, 4} -> {3, 4, 5, 6}, count = 4
|
||||
assert_eq!(result.value(0), 2);
|
||||
assert_eq!(result.value(1), 1);
|
||||
assert_eq!(result.value(2), 4);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_size() {
|
||||
let acc = create_test_group_accumulator();
|
||||
// Just test it doesn't crash and returns a value.
|
||||
assert!(acc.size() > 0);
|
||||
}
|
||||
}
|
||||
@@ -21,6 +21,7 @@ use once_cell::sync::Lazy;
|
||||
|
||||
use crate::admin::AdminFunction;
|
||||
use crate::aggrs::approximate::ApproximateFunction;
|
||||
use crate::aggrs::count_hash::CountHash;
|
||||
use crate::aggrs::vector::VectorFunction as VectorAggrFunction;
|
||||
use crate::function::{AsyncFunctionRef, Function, FunctionRef};
|
||||
use crate::function_factory::ScalarFunctionFactory;
|
||||
@@ -144,6 +145,9 @@ pub static FUNCTION_REGISTRY: Lazy<Arc<FunctionRegistry>> = Lazy::new(|| {
|
||||
// Approximate functions
|
||||
ApproximateFunction::register(&function_registry);
|
||||
|
||||
// CountHash function
|
||||
CountHash::register(&function_registry);
|
||||
|
||||
Arc::new(function_registry)
|
||||
});
|
||||
|
||||
|
||||
@@ -32,6 +32,7 @@ impl MockDatanodeHandler for () {
|
||||
Ok(RegionResponse {
|
||||
affected_rows: 0,
|
||||
extensions: Default::default(),
|
||||
metadata: Vec::new(),
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -50,7 +50,11 @@ use crate::key::{DeserializedValueWithBytes, TableMetadataManagerRef};
|
||||
#[cfg(feature = "enterprise")]
|
||||
use crate::rpc::ddl::trigger::CreateTriggerTask;
|
||||
#[cfg(feature = "enterprise")]
|
||||
use crate::rpc::ddl::trigger::DropTriggerTask;
|
||||
#[cfg(feature = "enterprise")]
|
||||
use crate::rpc::ddl::DdlTask::CreateTrigger;
|
||||
#[cfg(feature = "enterprise")]
|
||||
use crate::rpc::ddl::DdlTask::DropTrigger;
|
||||
use crate::rpc::ddl::DdlTask::{
|
||||
AlterDatabase, AlterLogicalTables, AlterTable, CreateDatabase, CreateFlow, CreateLogicalTables,
|
||||
CreateTable, CreateView, DropDatabase, DropFlow, DropLogicalTables, DropTable, DropView,
|
||||
@@ -91,6 +95,14 @@ pub trait TriggerDdlManager: Send + Sync {
|
||||
query_context: QueryContext,
|
||||
) -> Result<SubmitDdlTaskResponse>;
|
||||
|
||||
async fn drop_trigger(
|
||||
&self,
|
||||
drop_trigger_task: DropTriggerTask,
|
||||
procedure_manager: ProcedureManagerRef,
|
||||
ddl_context: DdlContext,
|
||||
query_context: QueryContext,
|
||||
) -> Result<SubmitDdlTaskResponse>;
|
||||
|
||||
fn as_any(&self) -> &dyn std::any::Any;
|
||||
}
|
||||
|
||||
@@ -125,13 +137,12 @@ impl DdlManager {
|
||||
ddl_context: DdlContext,
|
||||
procedure_manager: ProcedureManagerRef,
|
||||
register_loaders: bool,
|
||||
#[cfg(feature = "enterprise")] trigger_ddl_manager: Option<TriggerDdlManagerRef>,
|
||||
) -> Result<Self> {
|
||||
let manager = Self {
|
||||
ddl_context,
|
||||
procedure_manager,
|
||||
#[cfg(feature = "enterprise")]
|
||||
trigger_ddl_manager,
|
||||
trigger_ddl_manager: None,
|
||||
};
|
||||
if register_loaders {
|
||||
manager.register_loaders()?;
|
||||
@@ -139,6 +150,15 @@ impl DdlManager {
|
||||
Ok(manager)
|
||||
}
|
||||
|
||||
#[cfg(feature = "enterprise")]
|
||||
pub fn with_trigger_ddl_manager(
|
||||
mut self,
|
||||
trigger_ddl_manager: Option<TriggerDdlManagerRef>,
|
||||
) -> Self {
|
||||
self.trigger_ddl_manager = trigger_ddl_manager;
|
||||
self
|
||||
}
|
||||
|
||||
/// Returns the [TableMetadataManagerRef].
|
||||
pub fn table_metadata_manager(&self) -> &TableMetadataManagerRef {
|
||||
&self.ddl_context.table_metadata_manager
|
||||
@@ -640,6 +660,28 @@ async fn handle_drop_flow_task(
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(feature = "enterprise")]
|
||||
async fn handle_drop_trigger_task(
|
||||
ddl_manager: &DdlManager,
|
||||
drop_trigger_task: DropTriggerTask,
|
||||
query_context: QueryContext,
|
||||
) -> Result<SubmitDdlTaskResponse> {
|
||||
let Some(m) = ddl_manager.trigger_ddl_manager.as_ref() else {
|
||||
return UnsupportedSnafu {
|
||||
operation: "drop trigger",
|
||||
}
|
||||
.fail();
|
||||
};
|
||||
|
||||
m.drop_trigger(
|
||||
drop_trigger_task,
|
||||
ddl_manager.procedure_manager.clone(),
|
||||
ddl_manager.ddl_context.clone(),
|
||||
query_context,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn handle_drop_view_task(
|
||||
ddl_manager: &DdlManager,
|
||||
drop_view_task: DropViewTask,
|
||||
@@ -827,6 +869,11 @@ impl ProcedureExecutor for DdlManager {
|
||||
handle_create_flow_task(self, create_flow_task, request.query_context.into())
|
||||
.await
|
||||
}
|
||||
DropFlow(drop_flow_task) => handle_drop_flow_task(self, drop_flow_task).await,
|
||||
CreateView(create_view_task) => {
|
||||
handle_create_view_task(self, create_view_task).await
|
||||
}
|
||||
DropView(drop_view_task) => handle_drop_view_task(self, drop_view_task).await,
|
||||
#[cfg(feature = "enterprise")]
|
||||
CreateTrigger(create_trigger_task) => {
|
||||
handle_create_trigger_task(
|
||||
@@ -836,11 +883,11 @@ impl ProcedureExecutor for DdlManager {
|
||||
)
|
||||
.await
|
||||
}
|
||||
DropFlow(drop_flow_task) => handle_drop_flow_task(self, drop_flow_task).await,
|
||||
CreateView(create_view_task) => {
|
||||
handle_create_view_task(self, create_view_task).await
|
||||
#[cfg(feature = "enterprise")]
|
||||
DropTrigger(drop_trigger_task) => {
|
||||
handle_drop_trigger_task(self, drop_trigger_task, request.query_context.into())
|
||||
.await
|
||||
}
|
||||
DropView(drop_view_task) => handle_drop_view_task(self, drop_view_task).await,
|
||||
}
|
||||
}
|
||||
.trace(span)
|
||||
@@ -964,8 +1011,6 @@ mod tests {
|
||||
},
|
||||
procedure_manager.clone(),
|
||||
true,
|
||||
#[cfg(feature = "enterprise")]
|
||||
None,
|
||||
);
|
||||
|
||||
let expected_loaders = vec![
|
||||
|
||||
@@ -48,6 +48,11 @@ impl TableRouteKey {
|
||||
pub fn new(table_id: TableId) -> Self {
|
||||
Self { table_id }
|
||||
}
|
||||
|
||||
/// Returns the range prefix of the table route key.
|
||||
pub fn range_prefix() -> Vec<u8> {
|
||||
format!("{}/", TABLE_ROUTE_PREFIX).into_bytes()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
|
||||
|
||||
@@ -69,6 +69,8 @@ pub enum DdlTask {
|
||||
AlterDatabase(AlterDatabaseTask),
|
||||
CreateFlow(CreateFlowTask),
|
||||
DropFlow(DropFlowTask),
|
||||
#[cfg(feature = "enterprise")]
|
||||
DropTrigger(trigger::DropTriggerTask),
|
||||
CreateView(CreateViewTask),
|
||||
DropView(DropViewTask),
|
||||
#[cfg(feature = "enterprise")]
|
||||
@@ -259,6 +261,18 @@ impl TryFrom<Task> for DdlTask {
|
||||
.fail()
|
||||
}
|
||||
}
|
||||
Task::DropTriggerTask(drop_trigger) => {
|
||||
#[cfg(feature = "enterprise")]
|
||||
return Ok(DdlTask::DropTrigger(drop_trigger.try_into()?));
|
||||
#[cfg(not(feature = "enterprise"))]
|
||||
{
|
||||
let _ = drop_trigger;
|
||||
crate::error::UnsupportedSnafu {
|
||||
operation: "drop trigger",
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -311,6 +325,8 @@ impl TryFrom<SubmitDdlTaskRequest> for PbDdlTaskRequest {
|
||||
DdlTask::DropView(task) => Task::DropViewTask(task.into()),
|
||||
#[cfg(feature = "enterprise")]
|
||||
DdlTask::CreateTrigger(task) => Task::CreateTriggerTask(task.into()),
|
||||
#[cfg(feature = "enterprise")]
|
||||
DdlTask::DropTrigger(task) => Task::DropTriggerTask(task.into()),
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
|
||||
@@ -1,10 +1,13 @@
|
||||
use std::collections::HashMap;
|
||||
use std::time::Duration;
|
||||
|
||||
use api::v1::meta::CreateTriggerTask as PbCreateTriggerTask;
|
||||
use api::v1::meta::{
|
||||
CreateTriggerTask as PbCreateTriggerTask, DropTriggerTask as PbDropTriggerTask,
|
||||
};
|
||||
use api::v1::notify_channel::ChannelType as PbChannelType;
|
||||
use api::v1::{
|
||||
CreateTriggerExpr, NotifyChannel as PbNotifyChannel, WebhookOptions as PbWebhookOptions,
|
||||
CreateTriggerExpr as PbCreateTriggerExpr, DropTriggerExpr as PbDropTriggerExpr,
|
||||
NotifyChannel as PbNotifyChannel, WebhookOptions as PbWebhookOptions,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::OptionExt;
|
||||
@@ -56,7 +59,7 @@ impl From<CreateTriggerTask> for PbCreateTriggerTask {
|
||||
.map(PbNotifyChannel::from)
|
||||
.collect();
|
||||
|
||||
let expr = CreateTriggerExpr {
|
||||
let expr = PbCreateTriggerExpr {
|
||||
catalog_name: task.catalog_name,
|
||||
trigger_name: task.trigger_name,
|
||||
create_if_not_exists: task.if_not_exists,
|
||||
@@ -139,17 +142,86 @@ impl TryFrom<PbNotifyChannel> for NotifyChannel {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DropTriggerTask {
|
||||
pub catalog_name: String,
|
||||
pub trigger_name: String,
|
||||
pub drop_if_exists: bool,
|
||||
}
|
||||
|
||||
impl From<DropTriggerTask> for PbDropTriggerTask {
|
||||
fn from(task: DropTriggerTask) -> Self {
|
||||
let expr = PbDropTriggerExpr {
|
||||
catalog_name: task.catalog_name,
|
||||
trigger_name: task.trigger_name,
|
||||
drop_if_exists: task.drop_if_exists,
|
||||
};
|
||||
|
||||
PbDropTriggerTask {
|
||||
drop_trigger: Some(expr),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<PbDropTriggerTask> for DropTriggerTask {
|
||||
type Error = error::Error;
|
||||
|
||||
fn try_from(task: PbDropTriggerTask) -> Result<Self> {
|
||||
let expr = task.drop_trigger.context(error::InvalidProtoMsgSnafu {
|
||||
err_msg: "expected drop_trigger",
|
||||
})?;
|
||||
|
||||
Ok(DropTriggerTask {
|
||||
catalog_name: expr.catalog_name,
|
||||
trigger_name: expr.trigger_name,
|
||||
drop_if_exists: expr.drop_if_exists,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl DdlTask {
|
||||
/// Creates a [`DdlTask`] to create a trigger.
|
||||
pub fn new_create_trigger(expr: CreateTriggerTask) -> Self {
|
||||
DdlTask::CreateTrigger(expr)
|
||||
}
|
||||
|
||||
/// Creates a [`DdlTask`] to drop a trigger.
|
||||
pub fn new_drop_trigger(expr: DropTriggerTask) -> Self {
|
||||
DdlTask::DropTrigger(expr)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_convert_drop_trigger_task() {
|
||||
let original = DropTriggerTask {
|
||||
catalog_name: "test_catalog".to_string(),
|
||||
trigger_name: "test_trigger".to_string(),
|
||||
drop_if_exists: true,
|
||||
};
|
||||
|
||||
let pb_task: PbDropTriggerTask = original.clone().into();
|
||||
|
||||
let expr = pb_task.drop_trigger.as_ref().unwrap();
|
||||
assert_eq!(expr.catalog_name, "test_catalog");
|
||||
assert_eq!(expr.trigger_name, "test_trigger");
|
||||
assert!(expr.drop_if_exists);
|
||||
|
||||
let round_tripped = DropTriggerTask::try_from(pb_task).unwrap();
|
||||
|
||||
assert_eq!(original.catalog_name, round_tripped.catalog_name);
|
||||
assert_eq!(original.trigger_name, round_tripped.trigger_name);
|
||||
assert_eq!(original.drop_if_exists, round_tripped.drop_if_exists);
|
||||
|
||||
// Test invalid case where drop_trigger is None
|
||||
let invalid_task = PbDropTriggerTask { drop_trigger: None };
|
||||
let result = DropTriggerTask::try_from(invalid_task);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_convert_create_trigger_task() {
|
||||
let original = CreateTriggerTask {
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
pub mod columnar_value;
|
||||
pub mod error;
|
||||
mod function;
|
||||
pub mod function;
|
||||
pub mod logical_plan;
|
||||
pub mod prelude;
|
||||
pub mod request;
|
||||
|
||||
@@ -22,7 +22,7 @@ once_cell.workspace = true
|
||||
opentelemetry = { version = "0.21.0", default-features = false, features = [
|
||||
"trace",
|
||||
] }
|
||||
opentelemetry-otlp = { version = "0.14.0", features = ["tokio"] }
|
||||
opentelemetry-otlp = { version = "0.14.0", features = ["tokio", "http-proto", "reqwest-client"] }
|
||||
opentelemetry-semantic-conventions = "0.13.0"
|
||||
opentelemetry_sdk = { version = "0.21.0", features = ["rt-tokio"] }
|
||||
parking_lot.workspace = true
|
||||
|
||||
@@ -20,7 +20,7 @@ use std::time::Duration;
|
||||
|
||||
use once_cell::sync::{Lazy, OnceCell};
|
||||
use opentelemetry::{global, KeyValue};
|
||||
use opentelemetry_otlp::WithExportConfig;
|
||||
use opentelemetry_otlp::{Protocol, SpanExporterBuilder, WithExportConfig};
|
||||
use opentelemetry_sdk::propagation::TraceContextPropagator;
|
||||
use opentelemetry_sdk::trace::Sampler;
|
||||
use opentelemetry_semantic_conventions::resource;
|
||||
@@ -36,7 +36,11 @@ use tracing_subscriber::{filter, EnvFilter, Registry};
|
||||
|
||||
use crate::tracing_sampler::{create_sampler, TracingSampleOptions};
|
||||
|
||||
pub const DEFAULT_OTLP_ENDPOINT: &str = "http://localhost:4317";
|
||||
/// The default endpoint when use gRPC exporter protocol.
|
||||
pub const DEFAULT_OTLP_GRPC_ENDPOINT: &str = "http://localhost:4317";
|
||||
|
||||
/// The default endpoint when use HTTP exporter protocol.
|
||||
pub const DEFAULT_OTLP_HTTP_ENDPOINT: &str = "http://localhost:4318";
|
||||
|
||||
/// The default logs directory.
|
||||
pub const DEFAULT_LOGGING_DIR: &str = "logs";
|
||||
@@ -67,11 +71,25 @@ pub struct LoggingOptions {
|
||||
/// Whether to enable tracing with OTLP. Default is false.
|
||||
pub enable_otlp_tracing: bool,
|
||||
|
||||
/// The endpoint of OTLP. Default is "http://localhost:4317".
|
||||
/// The endpoint of OTLP. Default is "http://localhost:4318".
|
||||
pub otlp_endpoint: Option<String>,
|
||||
|
||||
/// The tracing sample ratio.
|
||||
pub tracing_sample_ratio: Option<TracingSampleOptions>,
|
||||
|
||||
/// The protocol of OTLP export.
|
||||
pub otlp_export_protocol: Option<OtlpExportProtocol>,
|
||||
}
|
||||
|
||||
/// The protocol of OTLP export.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum OtlpExportProtocol {
|
||||
/// GRPC protocol.
|
||||
Grpc,
|
||||
|
||||
/// HTTP protocol with binary protobuf.
|
||||
Http,
|
||||
}
|
||||
|
||||
/// The options of slow query.
|
||||
@@ -147,6 +165,7 @@ impl Default for LoggingOptions {
|
||||
append_stdout: true,
|
||||
// Rotation hourly, 24 files per day, keeps info log files of 30 days
|
||||
max_log_files: 720,
|
||||
otlp_export_protocol: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -388,22 +407,9 @@ pub fn init_global_logging(
|
||||
KeyValue::new(resource::PROCESS_PID, std::process::id().to_string()),
|
||||
]));
|
||||
|
||||
let exporter = opentelemetry_otlp::new_exporter().tonic().with_endpoint(
|
||||
opts.otlp_endpoint
|
||||
.as_ref()
|
||||
.map(|e| {
|
||||
if e.starts_with("http") {
|
||||
e.to_string()
|
||||
} else {
|
||||
format!("http://{}", e)
|
||||
}
|
||||
})
|
||||
.unwrap_or(DEFAULT_OTLP_ENDPOINT.to_string()),
|
||||
);
|
||||
|
||||
let tracer = opentelemetry_otlp::new_pipeline()
|
||||
.tracing()
|
||||
.with_exporter(exporter)
|
||||
.with_exporter(build_otlp_exporter(opts))
|
||||
.with_trace_config(trace_config)
|
||||
.install_batch(opentelemetry_sdk::runtime::Tokio)
|
||||
.expect("otlp tracer install failed");
|
||||
@@ -421,6 +427,42 @@ pub fn init_global_logging(
|
||||
guards
|
||||
}
|
||||
|
||||
fn build_otlp_exporter(opts: &LoggingOptions) -> SpanExporterBuilder {
|
||||
let protocol = opts
|
||||
.otlp_export_protocol
|
||||
.clone()
|
||||
.unwrap_or(OtlpExportProtocol::Http);
|
||||
|
||||
let endpoint = opts
|
||||
.otlp_endpoint
|
||||
.as_ref()
|
||||
.map(|e| {
|
||||
if e.starts_with("http") {
|
||||
e.to_string()
|
||||
} else {
|
||||
format!("http://{}", e)
|
||||
}
|
||||
})
|
||||
.unwrap_or_else(|| match protocol {
|
||||
OtlpExportProtocol::Grpc => DEFAULT_OTLP_GRPC_ENDPOINT.to_string(),
|
||||
OtlpExportProtocol::Http => DEFAULT_OTLP_HTTP_ENDPOINT.to_string(),
|
||||
});
|
||||
|
||||
match protocol {
|
||||
OtlpExportProtocol::Grpc => SpanExporterBuilder::Tonic(
|
||||
opentelemetry_otlp::new_exporter()
|
||||
.tonic()
|
||||
.with_endpoint(endpoint),
|
||||
),
|
||||
OtlpExportProtocol::Http => SpanExporterBuilder::Http(
|
||||
opentelemetry_otlp::new_exporter()
|
||||
.http()
|
||||
.with_endpoint(endpoint)
|
||||
.with_protocol(Protocol::HttpBinary),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
fn build_slow_query_logger<S>(
|
||||
opts: &LoggingOptions,
|
||||
slow_query_opts: Option<&SlowQueryOptions>,
|
||||
|
||||
@@ -14,10 +14,7 @@
|
||||
|
||||
//! Datanode configurations
|
||||
|
||||
use core::time::Duration;
|
||||
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use common_base::secrets::{ExposeSecret, SecretString};
|
||||
use common_config::{Configurable, DEFAULT_DATA_HOME};
|
||||
pub use common_procedure::options::ProcedureConfig;
|
||||
use common_telemetry::logging::{LoggingOptions, TracingOptions};
|
||||
@@ -27,6 +24,7 @@ use file_engine::config::EngineConfig as FileEngineConfig;
|
||||
use meta_client::MetaClientOptions;
|
||||
use metric_engine::config::EngineConfig as MetricEngineConfig;
|
||||
use mito2::config::MitoConfig;
|
||||
pub(crate) use object_store::config::ObjectStoreConfig;
|
||||
use query::options::QueryOptions;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use servers::export_metrics::ExportMetricsOption;
|
||||
@@ -36,53 +34,6 @@ use servers::http::HttpOptions;
|
||||
|
||||
pub const DEFAULT_OBJECT_STORE_CACHE_SIZE: ReadableSize = ReadableSize::gb(5);
|
||||
|
||||
/// Object storage config
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum ObjectStoreConfig {
|
||||
File(FileConfig),
|
||||
S3(S3Config),
|
||||
Oss(OssConfig),
|
||||
Azblob(AzblobConfig),
|
||||
Gcs(GcsConfig),
|
||||
}
|
||||
|
||||
impl ObjectStoreConfig {
|
||||
/// Returns the object storage type name, such as `S3`, `Oss` etc.
|
||||
pub fn provider_name(&self) -> &'static str {
|
||||
match self {
|
||||
Self::File(_) => "File",
|
||||
Self::S3(_) => "S3",
|
||||
Self::Oss(_) => "Oss",
|
||||
Self::Azblob(_) => "Azblob",
|
||||
Self::Gcs(_) => "Gcs",
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true when it's a remote object storage such as AWS s3 etc.
|
||||
pub fn is_object_storage(&self) -> bool {
|
||||
!matches!(self, Self::File(_))
|
||||
}
|
||||
|
||||
/// Returns the object storage configuration name, return the provider name if it's empty.
|
||||
pub fn config_name(&self) -> &str {
|
||||
let name = match self {
|
||||
// file storage doesn't support name
|
||||
Self::File(_) => self.provider_name(),
|
||||
Self::S3(s3) => &s3.name,
|
||||
Self::Oss(oss) => &oss.name,
|
||||
Self::Azblob(az) => &az.name,
|
||||
Self::Gcs(gcs) => &gcs.name,
|
||||
};
|
||||
|
||||
if name.trim().is_empty() {
|
||||
return self.provider_name();
|
||||
}
|
||||
|
||||
name
|
||||
}
|
||||
}
|
||||
|
||||
/// Storage engine config
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(default)]
|
||||
@@ -112,252 +63,6 @@ impl Default for StorageConfig {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Default, Deserialize, Eq, PartialEq)]
|
||||
#[serde(default)]
|
||||
pub struct FileConfig {}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)]
|
||||
#[serde(default)]
|
||||
pub struct ObjectStorageCacheConfig {
|
||||
/// The local file cache directory
|
||||
pub cache_path: Option<String>,
|
||||
/// The cache capacity in bytes
|
||||
pub cache_capacity: Option<ReadableSize>,
|
||||
}
|
||||
|
||||
/// The http client options to the storage.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(default)]
|
||||
pub struct HttpClientConfig {
|
||||
/// The maximum idle connection per host allowed in the pool.
|
||||
pub(crate) pool_max_idle_per_host: u32,
|
||||
|
||||
/// The timeout for only the connect phase of a http client.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub(crate) connect_timeout: Duration,
|
||||
|
||||
/// The total request timeout, applied from when the request starts connecting until the response body has finished.
|
||||
/// Also considered a total deadline.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub(crate) timeout: Duration,
|
||||
|
||||
/// The timeout for idle sockets being kept-alive.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub(crate) pool_idle_timeout: Duration,
|
||||
|
||||
/// Skip SSL certificate validation (insecure)
|
||||
pub skip_ssl_validation: bool,
|
||||
}
|
||||
|
||||
impl Default for HttpClientConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
pool_max_idle_per_host: 1024,
|
||||
connect_timeout: Duration::from_secs(30),
|
||||
timeout: Duration::from_secs(30),
|
||||
pool_idle_timeout: Duration::from_secs(90),
|
||||
skip_ssl_validation: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(default)]
|
||||
pub struct S3Config {
|
||||
pub name: String,
|
||||
pub bucket: String,
|
||||
pub root: String,
|
||||
#[serde(skip_serializing)]
|
||||
pub access_key_id: SecretString,
|
||||
#[serde(skip_serializing)]
|
||||
pub secret_access_key: SecretString,
|
||||
pub endpoint: Option<String>,
|
||||
pub region: Option<String>,
|
||||
/// Enable virtual host style so that opendal will send API requests in virtual host style instead of path style.
|
||||
/// By default, opendal will send API to https://s3.us-east-1.amazonaws.com/bucket_name
|
||||
/// Enabled, opendal will send API to https://bucket_name.s3.us-east-1.amazonaws.com
|
||||
pub enable_virtual_host_style: bool,
|
||||
#[serde(flatten)]
|
||||
pub cache: ObjectStorageCacheConfig,
|
||||
pub http_client: HttpClientConfig,
|
||||
}
|
||||
|
||||
impl PartialEq for S3Config {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.name == other.name
|
||||
&& self.bucket == other.bucket
|
||||
&& self.root == other.root
|
||||
&& self.access_key_id.expose_secret() == other.access_key_id.expose_secret()
|
||||
&& self.secret_access_key.expose_secret() == other.secret_access_key.expose_secret()
|
||||
&& self.endpoint == other.endpoint
|
||||
&& self.region == other.region
|
||||
&& self.enable_virtual_host_style == other.enable_virtual_host_style
|
||||
&& self.cache == other.cache
|
||||
&& self.http_client == other.http_client
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(default)]
|
||||
pub struct OssConfig {
|
||||
pub name: String,
|
||||
pub bucket: String,
|
||||
pub root: String,
|
||||
#[serde(skip_serializing)]
|
||||
pub access_key_id: SecretString,
|
||||
#[serde(skip_serializing)]
|
||||
pub access_key_secret: SecretString,
|
||||
pub endpoint: String,
|
||||
#[serde(flatten)]
|
||||
pub cache: ObjectStorageCacheConfig,
|
||||
pub http_client: HttpClientConfig,
|
||||
}
|
||||
|
||||
impl PartialEq for OssConfig {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.name == other.name
|
||||
&& self.bucket == other.bucket
|
||||
&& self.root == other.root
|
||||
&& self.access_key_id.expose_secret() == other.access_key_id.expose_secret()
|
||||
&& self.access_key_secret.expose_secret() == other.access_key_secret.expose_secret()
|
||||
&& self.endpoint == other.endpoint
|
||||
&& self.cache == other.cache
|
||||
&& self.http_client == other.http_client
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(default)]
|
||||
pub struct AzblobConfig {
|
||||
pub name: String,
|
||||
pub container: String,
|
||||
pub root: String,
|
||||
#[serde(skip_serializing)]
|
||||
pub account_name: SecretString,
|
||||
#[serde(skip_serializing)]
|
||||
pub account_key: SecretString,
|
||||
pub endpoint: String,
|
||||
pub sas_token: Option<String>,
|
||||
#[serde(flatten)]
|
||||
pub cache: ObjectStorageCacheConfig,
|
||||
pub http_client: HttpClientConfig,
|
||||
}
|
||||
|
||||
impl PartialEq for AzblobConfig {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.name == other.name
|
||||
&& self.container == other.container
|
||||
&& self.root == other.root
|
||||
&& self.account_name.expose_secret() == other.account_name.expose_secret()
|
||||
&& self.account_key.expose_secret() == other.account_key.expose_secret()
|
||||
&& self.endpoint == other.endpoint
|
||||
&& self.sas_token == other.sas_token
|
||||
&& self.cache == other.cache
|
||||
&& self.http_client == other.http_client
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(default)]
|
||||
pub struct GcsConfig {
|
||||
pub name: String,
|
||||
pub root: String,
|
||||
pub bucket: String,
|
||||
pub scope: String,
|
||||
#[serde(skip_serializing)]
|
||||
pub credential_path: SecretString,
|
||||
#[serde(skip_serializing)]
|
||||
pub credential: SecretString,
|
||||
pub endpoint: String,
|
||||
#[serde(flatten)]
|
||||
pub cache: ObjectStorageCacheConfig,
|
||||
pub http_client: HttpClientConfig,
|
||||
}
|
||||
|
||||
impl PartialEq for GcsConfig {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.name == other.name
|
||||
&& self.root == other.root
|
||||
&& self.bucket == other.bucket
|
||||
&& self.scope == other.scope
|
||||
&& self.credential_path.expose_secret() == other.credential_path.expose_secret()
|
||||
&& self.credential.expose_secret() == other.credential.expose_secret()
|
||||
&& self.endpoint == other.endpoint
|
||||
&& self.cache == other.cache
|
||||
&& self.http_client == other.http_client
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for S3Config {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
name: String::default(),
|
||||
bucket: String::default(),
|
||||
root: String::default(),
|
||||
access_key_id: SecretString::from(String::default()),
|
||||
secret_access_key: SecretString::from(String::default()),
|
||||
enable_virtual_host_style: false,
|
||||
endpoint: Option::default(),
|
||||
region: Option::default(),
|
||||
cache: ObjectStorageCacheConfig::default(),
|
||||
http_client: HttpClientConfig::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for OssConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
name: String::default(),
|
||||
bucket: String::default(),
|
||||
root: String::default(),
|
||||
access_key_id: SecretString::from(String::default()),
|
||||
access_key_secret: SecretString::from(String::default()),
|
||||
endpoint: String::default(),
|
||||
cache: ObjectStorageCacheConfig::default(),
|
||||
http_client: HttpClientConfig::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for AzblobConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
name: String::default(),
|
||||
container: String::default(),
|
||||
root: String::default(),
|
||||
account_name: SecretString::from(String::default()),
|
||||
account_key: SecretString::from(String::default()),
|
||||
endpoint: String::default(),
|
||||
sas_token: Option::default(),
|
||||
cache: ObjectStorageCacheConfig::default(),
|
||||
http_client: HttpClientConfig::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for GcsConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
name: String::default(),
|
||||
root: String::default(),
|
||||
bucket: String::default(),
|
||||
scope: String::default(),
|
||||
credential_path: SecretString::from(String::default()),
|
||||
credential: SecretString::from(String::default()),
|
||||
endpoint: String::default(),
|
||||
cache: ObjectStorageCacheConfig::default(),
|
||||
http_client: HttpClientConfig::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for ObjectStoreConfig {
|
||||
fn default() -> Self {
|
||||
ObjectStoreConfig::File(FileConfig {})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(default)]
|
||||
pub struct DatanodeOptions {
|
||||
@@ -467,37 +172,6 @@ mod tests {
|
||||
let _parsed: DatanodeOptions = toml::from_str(&toml_string).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_config_name() {
|
||||
let object_store_config = ObjectStoreConfig::default();
|
||||
assert_eq!("File", object_store_config.config_name());
|
||||
|
||||
let s3_config = ObjectStoreConfig::S3(S3Config::default());
|
||||
assert_eq!("S3", s3_config.config_name());
|
||||
assert_eq!("S3", s3_config.provider_name());
|
||||
|
||||
let s3_config = ObjectStoreConfig::S3(S3Config {
|
||||
name: "test".to_string(),
|
||||
..Default::default()
|
||||
});
|
||||
assert_eq!("test", s3_config.config_name());
|
||||
assert_eq!("S3", s3_config.provider_name());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_object_storage() {
|
||||
let store = ObjectStoreConfig::default();
|
||||
assert!(!store.is_object_storage());
|
||||
let s3_config = ObjectStoreConfig::S3(S3Config::default());
|
||||
assert!(s3_config.is_object_storage());
|
||||
let oss_config = ObjectStoreConfig::Oss(OssConfig::default());
|
||||
assert!(oss_config.is_object_storage());
|
||||
let gcs_config = ObjectStoreConfig::Gcs(GcsConfig::default());
|
||||
assert!(gcs_config.is_object_storage());
|
||||
let azblob_config = ObjectStoreConfig::Azblob(AzblobConfig::default());
|
||||
assert!(azblob_config.is_object_storage());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_secstr() {
|
||||
let toml_str = r#"
|
||||
|
||||
@@ -142,14 +142,6 @@ pub enum Error {
|
||||
source: Box<log_store::error::Error>,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to init backend"))]
|
||||
InitBackend {
|
||||
#[snafu(source)]
|
||||
error: object_store::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid SQL, error: {}", msg))]
|
||||
InvalidSql { msg: String },
|
||||
|
||||
@@ -387,6 +379,29 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to serialize json"))]
|
||||
SerializeJson {
|
||||
#[snafu(source)]
|
||||
error: serde_json::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed object store operation"))]
|
||||
ObjectStore {
|
||||
source: object_store::error::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to build cache store"))]
|
||||
BuildCacheStore {
|
||||
#[snafu(source)]
|
||||
error: object_store::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -439,8 +454,6 @@ impl ErrorExt for Error {
|
||||
|
||||
StartServer { source, .. } | ShutdownServer { source, .. } => source.status_code(),
|
||||
|
||||
InitBackend { .. } => StatusCode::StorageUnavailable,
|
||||
|
||||
OpenLogStore { source, .. } => source.status_code(),
|
||||
MetaClientInit { source, .. } => source.status_code(),
|
||||
UnsupportedOutput { .. } => StatusCode::Unsupported,
|
||||
@@ -457,6 +470,10 @@ impl ErrorExt for Error {
|
||||
StatusCode::RegionBusy
|
||||
}
|
||||
MissingCache { .. } => StatusCode::Internal,
|
||||
SerializeJson { .. } => StatusCode::Internal,
|
||||
|
||||
ObjectStore { source, .. } => source.status_code(),
|
||||
BuildCacheStore { .. } => StatusCode::StorageUnavailable,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -20,12 +20,14 @@ use std::time::Duration;
|
||||
|
||||
use api::region::RegionResponse;
|
||||
use api::v1::region::sync_request::ManifestInfo;
|
||||
use api::v1::region::{region_request, RegionResponse as RegionResponseV1, SyncRequest};
|
||||
use api::v1::region::{
|
||||
region_request, ListMetadataRequest, RegionResponse as RegionResponseV1, SyncRequest,
|
||||
};
|
||||
use api::v1::{ResponseHeader, Status};
|
||||
use arrow_flight::{FlightData, Ticket};
|
||||
use async_trait::async_trait;
|
||||
use bytes::Bytes;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_error::ext::{BoxedError, ErrorExt};
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_query::request::QueryRequest;
|
||||
use common_query::OutputData;
|
||||
@@ -47,6 +49,7 @@ pub use query::dummy_catalog::{
|
||||
DummyCatalogList, DummyTableProviderFactory, TableProviderFactoryRef,
|
||||
};
|
||||
use query::QueryEngineRef;
|
||||
use serde_json;
|
||||
use servers::error::{self as servers_error, ExecuteGrpcRequestSnafu, Result as ServerResult};
|
||||
use servers::grpc::flight::{FlightCraft, FlightRecordBatchStream, TonicStream};
|
||||
use servers::grpc::region_server::RegionServerHandler;
|
||||
@@ -71,10 +74,10 @@ use tonic::{Request, Response, Result as TonicResult};
|
||||
use crate::error::{
|
||||
self, BuildRegionRequestsSnafu, ConcurrentQueryLimiterClosedSnafu,
|
||||
ConcurrentQueryLimiterTimeoutSnafu, DataFusionSnafu, DecodeLogicalPlanSnafu,
|
||||
ExecuteLogicalPlanSnafu, FindLogicalRegionsSnafu, HandleBatchDdlRequestSnafu,
|
||||
HandleBatchOpenRequestSnafu, HandleRegionRequestSnafu, NewPlanDecoderSnafu,
|
||||
RegionEngineNotFoundSnafu, RegionNotFoundSnafu, RegionNotReadySnafu, Result,
|
||||
StopRegionEngineSnafu, UnexpectedSnafu, UnsupportedOutputSnafu,
|
||||
ExecuteLogicalPlanSnafu, FindLogicalRegionsSnafu, GetRegionMetadataSnafu,
|
||||
HandleBatchDdlRequestSnafu, HandleBatchOpenRequestSnafu, HandleRegionRequestSnafu,
|
||||
NewPlanDecoderSnafu, RegionEngineNotFoundSnafu, RegionNotFoundSnafu, RegionNotReadySnafu,
|
||||
Result, SerializeJsonSnafu, StopRegionEngineSnafu, UnexpectedSnafu, UnsupportedOutputSnafu,
|
||||
};
|
||||
use crate::event_listener::RegionServerEventListenerRef;
|
||||
|
||||
@@ -138,12 +141,12 @@ impl RegionServer {
|
||||
|
||||
/// Finds the region's engine by its id. If the region is not ready, returns `None`.
|
||||
pub fn find_engine(&self, region_id: RegionId) -> Result<Option<RegionEngineRef>> {
|
||||
self.inner
|
||||
.get_engine(region_id, &RegionChange::None)
|
||||
.map(|x| match x {
|
||||
CurrentEngine::Engine(engine) => Some(engine),
|
||||
CurrentEngine::EarlyReturn(_) => None,
|
||||
})
|
||||
match self.inner.get_engine(region_id, &RegionChange::None) {
|
||||
Ok(CurrentEngine::Engine(engine)) => Ok(Some(engine)),
|
||||
Ok(CurrentEngine::EarlyReturn(_)) => Ok(None),
|
||||
Err(error::Error::RegionNotFound { .. }) => Ok(None),
|
||||
Err(err) => Err(err),
|
||||
}
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
@@ -412,6 +415,7 @@ impl RegionServer {
|
||||
Ok(RegionResponse {
|
||||
affected_rows,
|
||||
extensions,
|
||||
metadata: Vec::new(),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -441,6 +445,7 @@ impl RegionServer {
|
||||
Ok(RegionResponse {
|
||||
affected_rows,
|
||||
extensions,
|
||||
metadata: Vec::new(),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -473,6 +478,48 @@ impl RegionServer {
|
||||
.map(|_| RegionResponse::new(AffectedRows::default()))
|
||||
}
|
||||
|
||||
/// Handles the ListMetadata request and retrieves metadata for specified regions.
|
||||
///
|
||||
/// Returns the results as a JSON-serialized list in the [RegionResponse]. It serializes
|
||||
/// non-existing regions as `null`.
|
||||
#[tracing::instrument(skip_all)]
|
||||
async fn handle_list_metadata_request(
|
||||
&self,
|
||||
request: &ListMetadataRequest,
|
||||
) -> Result<RegionResponse> {
|
||||
let mut region_metadatas = Vec::new();
|
||||
// Collect metadata for each region
|
||||
for region_id in &request.region_ids {
|
||||
let region_id = RegionId::from_u64(*region_id);
|
||||
// Get the engine.
|
||||
let Some(engine) = self.find_engine(region_id)? else {
|
||||
region_metadatas.push(None);
|
||||
continue;
|
||||
};
|
||||
|
||||
match engine.get_metadata(region_id).await {
|
||||
Ok(metadata) => region_metadatas.push(Some(metadata)),
|
||||
Err(err) => {
|
||||
if err.status_code() == StatusCode::RegionNotFound {
|
||||
region_metadatas.push(None);
|
||||
} else {
|
||||
Err(err).with_context(|_| GetRegionMetadataSnafu {
|
||||
engine: engine.name(),
|
||||
region_id,
|
||||
})?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Serialize metadata to JSON
|
||||
let json_result = serde_json::to_vec(®ion_metadatas).context(SerializeJsonSnafu)?;
|
||||
|
||||
let response = RegionResponse::from_metadata(json_result);
|
||||
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
/// Sync region manifest and registers new opened logical regions.
|
||||
pub async fn sync_region(
|
||||
&self,
|
||||
@@ -504,6 +551,10 @@ impl RegionServerHandler for RegionServer {
|
||||
region_request::Body::Sync(sync_request) => {
|
||||
self.handle_sync_region_request(sync_request).await
|
||||
}
|
||||
region_request::Body::ListMetadata(list_metadata_request) => {
|
||||
self.handle_list_metadata_request(list_metadata_request)
|
||||
.await
|
||||
}
|
||||
_ => self.handle_requests_in_serial(request).await,
|
||||
}
|
||||
.map_err(BoxedError::new)
|
||||
@@ -518,6 +569,7 @@ impl RegionServerHandler for RegionServer {
|
||||
}),
|
||||
affected_rows: response.affected_rows as _,
|
||||
extensions: response.extensions,
|
||||
metadata: response.metadata,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -897,6 +949,7 @@ impl RegionServerInner {
|
||||
Ok(RegionResponse {
|
||||
affected_rows: result.affected_rows,
|
||||
extensions: result.extensions,
|
||||
metadata: Vec::new(),
|
||||
})
|
||||
}
|
||||
Err(err) => {
|
||||
@@ -967,6 +1020,7 @@ impl RegionServerInner {
|
||||
Ok(RegionResponse {
|
||||
affected_rows: result.affected_rows,
|
||||
extensions: result.extensions,
|
||||
metadata: Vec::new(),
|
||||
})
|
||||
}
|
||||
Err(err) => {
|
||||
@@ -1242,8 +1296,11 @@ mod tests {
|
||||
|
||||
use std::assert_matches::assert_matches;
|
||||
|
||||
use api::v1::SemanticType;
|
||||
use common_error::ext::ErrorExt;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use mito2::test_util::CreateRequestBuilder;
|
||||
use store_api::metadata::{ColumnMetadata, RegionMetadata, RegionMetadataBuilder};
|
||||
use store_api::region_engine::RegionEngine;
|
||||
use store_api::region_request::{RegionDropRequest, RegionOpenRequest, RegionTruncateRequest};
|
||||
use store_api::storage::RegionId;
|
||||
@@ -1605,4 +1662,175 @@ mod tests {
|
||||
let forth_query = p.acquire().await;
|
||||
assert!(forth_query.is_ok());
|
||||
}
|
||||
|
||||
fn mock_region_metadata(region_id: RegionId) -> RegionMetadata {
|
||||
let mut metadata_builder = RegionMetadataBuilder::new(region_id);
|
||||
metadata_builder.push_column_metadata(ColumnMetadata {
|
||||
column_schema: datatypes::schema::ColumnSchema::new(
|
||||
"timestamp",
|
||||
ConcreteDataType::timestamp_nanosecond_datatype(),
|
||||
false,
|
||||
),
|
||||
semantic_type: SemanticType::Timestamp,
|
||||
column_id: 0,
|
||||
});
|
||||
metadata_builder.push_column_metadata(ColumnMetadata {
|
||||
column_schema: datatypes::schema::ColumnSchema::new(
|
||||
"file",
|
||||
ConcreteDataType::string_datatype(),
|
||||
true,
|
||||
),
|
||||
semantic_type: SemanticType::Tag,
|
||||
column_id: 1,
|
||||
});
|
||||
metadata_builder.push_column_metadata(ColumnMetadata {
|
||||
column_schema: datatypes::schema::ColumnSchema::new(
|
||||
"message",
|
||||
ConcreteDataType::string_datatype(),
|
||||
true,
|
||||
),
|
||||
semantic_type: SemanticType::Field,
|
||||
column_id: 2,
|
||||
});
|
||||
metadata_builder.primary_key(vec![1]);
|
||||
metadata_builder.build().unwrap()
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_handle_list_metadata_request() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
|
||||
let mut mock_region_server = mock_region_server();
|
||||
let region_id_1 = RegionId::new(1, 0);
|
||||
let region_id_2 = RegionId::new(2, 0);
|
||||
|
||||
let metadata_1 = mock_region_metadata(region_id_1);
|
||||
let metadata_2 = mock_region_metadata(region_id_2);
|
||||
let metadatas = vec![Some(metadata_1.clone()), Some(metadata_2.clone())];
|
||||
|
||||
let metadata_1 = Arc::new(metadata_1);
|
||||
let metadata_2 = Arc::new(metadata_2);
|
||||
let (engine, _) = MockRegionEngine::with_metadata_mock_fn(
|
||||
MITO_ENGINE_NAME,
|
||||
Box::new(move |region_id| {
|
||||
if region_id == region_id_1 {
|
||||
Ok(metadata_1.clone())
|
||||
} else if region_id == region_id_2 {
|
||||
Ok(metadata_2.clone())
|
||||
} else {
|
||||
error::RegionNotFoundSnafu { region_id }.fail()
|
||||
}
|
||||
}),
|
||||
);
|
||||
|
||||
mock_region_server.register_engine(engine.clone());
|
||||
mock_region_server
|
||||
.inner
|
||||
.region_map
|
||||
.insert(region_id_1, RegionEngineWithStatus::Ready(engine.clone()));
|
||||
mock_region_server
|
||||
.inner
|
||||
.region_map
|
||||
.insert(region_id_2, RegionEngineWithStatus::Ready(engine.clone()));
|
||||
|
||||
// All regions exist.
|
||||
let list_metadata_request = ListMetadataRequest {
|
||||
region_ids: vec![region_id_1.as_u64(), region_id_2.as_u64()],
|
||||
};
|
||||
let response = mock_region_server
|
||||
.handle_list_metadata_request(&list_metadata_request)
|
||||
.await
|
||||
.unwrap();
|
||||
let decoded_metadata: Vec<Option<RegionMetadata>> =
|
||||
serde_json::from_slice(&response.metadata).unwrap();
|
||||
assert_eq!(metadatas, decoded_metadata);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_handle_list_metadata_not_found() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
|
||||
let mut mock_region_server = mock_region_server();
|
||||
let region_id_1 = RegionId::new(1, 0);
|
||||
let region_id_2 = RegionId::new(2, 0);
|
||||
|
||||
let metadata_1 = mock_region_metadata(region_id_1);
|
||||
let metadatas = vec![Some(metadata_1.clone()), None];
|
||||
|
||||
let metadata_1 = Arc::new(metadata_1);
|
||||
let (engine, _) = MockRegionEngine::with_metadata_mock_fn(
|
||||
MITO_ENGINE_NAME,
|
||||
Box::new(move |region_id| {
|
||||
if region_id == region_id_1 {
|
||||
Ok(metadata_1.clone())
|
||||
} else {
|
||||
error::RegionNotFoundSnafu { region_id }.fail()
|
||||
}
|
||||
}),
|
||||
);
|
||||
|
||||
mock_region_server.register_engine(engine.clone());
|
||||
mock_region_server
|
||||
.inner
|
||||
.region_map
|
||||
.insert(region_id_1, RegionEngineWithStatus::Ready(engine.clone()));
|
||||
|
||||
// Not in region map.
|
||||
let list_metadata_request = ListMetadataRequest {
|
||||
region_ids: vec![region_id_1.as_u64(), region_id_2.as_u64()],
|
||||
};
|
||||
let response = mock_region_server
|
||||
.handle_list_metadata_request(&list_metadata_request)
|
||||
.await
|
||||
.unwrap();
|
||||
let decoded_metadata: Vec<Option<RegionMetadata>> =
|
||||
serde_json::from_slice(&response.metadata).unwrap();
|
||||
assert_eq!(metadatas, decoded_metadata);
|
||||
|
||||
// Not in region engine.
|
||||
mock_region_server
|
||||
.inner
|
||||
.region_map
|
||||
.insert(region_id_2, RegionEngineWithStatus::Ready(engine.clone()));
|
||||
let response = mock_region_server
|
||||
.handle_list_metadata_request(&list_metadata_request)
|
||||
.await
|
||||
.unwrap();
|
||||
let decoded_metadata: Vec<Option<RegionMetadata>> =
|
||||
serde_json::from_slice(&response.metadata).unwrap();
|
||||
assert_eq!(metadatas, decoded_metadata);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_handle_list_metadata_failed() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
|
||||
let mut mock_region_server = mock_region_server();
|
||||
let region_id_1 = RegionId::new(1, 0);
|
||||
|
||||
let (engine, _) = MockRegionEngine::with_metadata_mock_fn(
|
||||
MITO_ENGINE_NAME,
|
||||
Box::new(move |region_id| {
|
||||
error::UnexpectedSnafu {
|
||||
violated: format!("Failed to get region {region_id}"),
|
||||
}
|
||||
.fail()
|
||||
}),
|
||||
);
|
||||
|
||||
mock_region_server.register_engine(engine.clone());
|
||||
mock_region_server
|
||||
.inner
|
||||
.region_map
|
||||
.insert(region_id_1, RegionEngineWithStatus::Ready(engine.clone()));
|
||||
|
||||
// Failed to get.
|
||||
let list_metadata_request = ListMetadataRequest {
|
||||
region_ids: vec![region_id_1.as_u64()],
|
||||
};
|
||||
mock_region_server
|
||||
.handle_list_metadata_request(&list_metadata_request)
|
||||
.await
|
||||
.unwrap_err();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,45 +14,22 @@
|
||||
|
||||
//! object storage utilities
|
||||
|
||||
mod azblob;
|
||||
pub mod fs;
|
||||
mod gcs;
|
||||
mod oss;
|
||||
mod s3;
|
||||
use std::path;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use common_telemetry::{info, warn};
|
||||
use mito2::access_layer::{ATOMIC_WRITE_DIR, OLD_ATOMIC_WRITE_DIR};
|
||||
use object_store::factory::new_raw_object_store;
|
||||
use object_store::layers::{LruCacheLayer, RetryInterceptor, RetryLayer};
|
||||
use object_store::services::Fs;
|
||||
use object_store::util::{join_dir, normalize_dir, with_instrument_layers};
|
||||
use object_store::{Access, Error, HttpClient, ObjectStore, ObjectStoreBuilder};
|
||||
use object_store::util::{clean_temp_dir, join_dir, with_instrument_layers};
|
||||
use object_store::{
|
||||
Access, Error, ObjectStore, ObjectStoreBuilder, ATOMIC_WRITE_DIR, OLD_ATOMIC_WRITE_DIR,
|
||||
};
|
||||
use snafu::prelude::*;
|
||||
|
||||
use crate::config::{HttpClientConfig, ObjectStoreConfig, DEFAULT_OBJECT_STORE_CACHE_SIZE};
|
||||
use crate::error::{self, BuildHttpClientSnafu, CreateDirSnafu, Result};
|
||||
|
||||
pub(crate) async fn new_raw_object_store(
|
||||
store: &ObjectStoreConfig,
|
||||
data_home: &str,
|
||||
) -> Result<ObjectStore> {
|
||||
let data_home = normalize_dir(data_home);
|
||||
let object_store = match store {
|
||||
ObjectStoreConfig::File(file_config) => {
|
||||
fs::new_fs_object_store(&data_home, file_config).await
|
||||
}
|
||||
ObjectStoreConfig::S3(s3_config) => s3::new_s3_object_store(s3_config).await,
|
||||
ObjectStoreConfig::Oss(oss_config) => oss::new_oss_object_store(oss_config).await,
|
||||
ObjectStoreConfig::Azblob(azblob_config) => {
|
||||
azblob::new_azblob_object_store(azblob_config).await
|
||||
}
|
||||
ObjectStoreConfig::Gcs(gcs_config) => gcs::new_gcs_object_store(gcs_config).await,
|
||||
}?;
|
||||
Ok(object_store)
|
||||
}
|
||||
use crate::config::{ObjectStoreConfig, DEFAULT_OBJECT_STORE_CACHE_SIZE};
|
||||
use crate::error::{self, CreateDirSnafu, Result};
|
||||
|
||||
fn with_retry_layers(object_store: ObjectStore) -> ObjectStore {
|
||||
object_store.layer(
|
||||
@@ -66,7 +43,9 @@ pub(crate) async fn new_object_store_without_cache(
|
||||
store: &ObjectStoreConfig,
|
||||
data_home: &str,
|
||||
) -> Result<ObjectStore> {
|
||||
let object_store = new_raw_object_store(store, data_home).await?;
|
||||
let object_store = new_raw_object_store(store, data_home)
|
||||
.await
|
||||
.context(error::ObjectStoreSnafu)?;
|
||||
// Enable retry layer and cache layer for non-fs object storages
|
||||
let object_store = if store.is_object_storage() {
|
||||
// Adds retry layer
|
||||
@@ -83,7 +62,9 @@ pub(crate) async fn new_object_store(
|
||||
store: ObjectStoreConfig,
|
||||
data_home: &str,
|
||||
) -> Result<ObjectStore> {
|
||||
let object_store = new_raw_object_store(&store, data_home).await?;
|
||||
let object_store = new_raw_object_store(&store, data_home)
|
||||
.await
|
||||
.context(error::ObjectStoreSnafu)?;
|
||||
// Enable retry layer and cache layer for non-fs object storages
|
||||
let object_store = if store.is_object_storage() {
|
||||
let object_store = if let Some(cache_layer) = build_cache_layer(&store, data_home).await? {
|
||||
@@ -170,20 +151,20 @@ async fn build_cache_layer(
|
||||
&& !path.trim().is_empty()
|
||||
{
|
||||
let atomic_temp_dir = join_dir(path, ATOMIC_WRITE_DIR);
|
||||
clean_temp_dir(&atomic_temp_dir)?;
|
||||
clean_temp_dir(&atomic_temp_dir).context(error::ObjectStoreSnafu)?;
|
||||
|
||||
// Compatible code. Remove this after a major release.
|
||||
let old_atomic_temp_dir = join_dir(path, OLD_ATOMIC_WRITE_DIR);
|
||||
clean_temp_dir(&old_atomic_temp_dir)?;
|
||||
clean_temp_dir(&old_atomic_temp_dir).context(error::ObjectStoreSnafu)?;
|
||||
|
||||
let cache_store = Fs::default()
|
||||
.root(path)
|
||||
.atomic_write_dir(&atomic_temp_dir)
|
||||
.build()
|
||||
.context(error::InitBackendSnafu)?;
|
||||
.context(error::BuildCacheStoreSnafu)?;
|
||||
|
||||
let cache_layer = LruCacheLayer::new(Arc::new(cache_store), cache_capacity.0 as usize)
|
||||
.context(error::InitBackendSnafu)?;
|
||||
.context(error::BuildCacheStoreSnafu)?;
|
||||
cache_layer.recover_cache(false).await;
|
||||
info!(
|
||||
"Enabled local object storage cache, path: {}, capacity: {}.",
|
||||
@@ -196,31 +177,6 @@ async fn build_cache_layer(
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn clean_temp_dir(dir: &str) -> Result<()> {
|
||||
if path::Path::new(&dir).exists() {
|
||||
info!("Begin to clean temp storage directory: {}", dir);
|
||||
std::fs::remove_dir_all(dir).context(error::RemoveDirSnafu { dir })?;
|
||||
info!("Cleaned temp storage directory: {}", dir);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn build_http_client(config: &HttpClientConfig) -> Result<HttpClient> {
|
||||
if config.skip_ssl_validation {
|
||||
common_telemetry::warn!("Skipping SSL validation for object storage HTTP client. Please ensure the environment is trusted.");
|
||||
}
|
||||
|
||||
let client = reqwest::ClientBuilder::new()
|
||||
.pool_max_idle_per_host(config.pool_max_idle_per_host as usize)
|
||||
.connect_timeout(config.connect_timeout)
|
||||
.pool_idle_timeout(config.pool_idle_timeout)
|
||||
.timeout(config.timeout)
|
||||
.danger_accept_invalid_certs(config.skip_ssl_validation)
|
||||
.build()
|
||||
.context(BuildHttpClientSnafu)?;
|
||||
Ok(HttpClient::with(client))
|
||||
}
|
||||
struct PrintDetailedError;
|
||||
|
||||
// PrintDetailedError is a retry interceptor that prints error in Debug format in retrying.
|
||||
|
||||
@@ -1,50 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_base::secrets::ExposeSecret;
|
||||
use common_telemetry::info;
|
||||
use object_store::services::Azblob;
|
||||
use object_store::{util, ObjectStore};
|
||||
use snafu::prelude::*;
|
||||
|
||||
use crate::config::AzblobConfig;
|
||||
use crate::error::{self, Result};
|
||||
use crate::store::build_http_client;
|
||||
|
||||
pub(crate) async fn new_azblob_object_store(azblob_config: &AzblobConfig) -> Result<ObjectStore> {
|
||||
let root = util::normalize_dir(&azblob_config.root);
|
||||
|
||||
info!(
|
||||
"The azure storage container is: {}, root is: {}",
|
||||
azblob_config.container, &root
|
||||
);
|
||||
|
||||
let client = build_http_client(&azblob_config.http_client)?;
|
||||
|
||||
let mut builder = Azblob::default()
|
||||
.root(&root)
|
||||
.container(&azblob_config.container)
|
||||
.endpoint(&azblob_config.endpoint)
|
||||
.account_name(azblob_config.account_name.expose_secret())
|
||||
.account_key(azblob_config.account_key.expose_secret())
|
||||
.http_client(client);
|
||||
|
||||
if let Some(token) = &azblob_config.sas_token {
|
||||
builder = builder.sas_token(token);
|
||||
};
|
||||
|
||||
Ok(ObjectStore::new(builder)
|
||||
.context(error::InitBackendSnafu)?
|
||||
.finish())
|
||||
}
|
||||
@@ -1,53 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::{fs, path};
|
||||
|
||||
use common_telemetry::info;
|
||||
use mito2::access_layer::{ATOMIC_WRITE_DIR, OLD_ATOMIC_WRITE_DIR};
|
||||
use object_store::services::Fs;
|
||||
use object_store::util::join_dir;
|
||||
use object_store::ObjectStore;
|
||||
use snafu::prelude::*;
|
||||
|
||||
use crate::config::FileConfig;
|
||||
use crate::error::{self, Result};
|
||||
use crate::store;
|
||||
|
||||
/// A helper function to create a file system object store.
|
||||
pub async fn new_fs_object_store(
|
||||
data_home: &str,
|
||||
_file_config: &FileConfig,
|
||||
) -> Result<ObjectStore> {
|
||||
fs::create_dir_all(path::Path::new(&data_home))
|
||||
.context(error::CreateDirSnafu { dir: data_home })?;
|
||||
info!("The file storage home is: {}", data_home);
|
||||
|
||||
let atomic_write_dir = join_dir(data_home, ATOMIC_WRITE_DIR);
|
||||
store::clean_temp_dir(&atomic_write_dir)?;
|
||||
|
||||
// Compatible code. Remove this after a major release.
|
||||
let old_atomic_temp_dir = join_dir(data_home, OLD_ATOMIC_WRITE_DIR);
|
||||
store::clean_temp_dir(&old_atomic_temp_dir)?;
|
||||
|
||||
let builder = Fs::default()
|
||||
.root(data_home)
|
||||
.atomic_write_dir(&atomic_write_dir);
|
||||
|
||||
let object_store = ObjectStore::new(builder)
|
||||
.context(error::InitBackendSnafu)?
|
||||
.finish();
|
||||
|
||||
Ok(object_store)
|
||||
}
|
||||
@@ -1,46 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_base::secrets::ExposeSecret;
|
||||
use common_telemetry::info;
|
||||
use object_store::services::Gcs;
|
||||
use object_store::{util, ObjectStore};
|
||||
use snafu::prelude::*;
|
||||
|
||||
use crate::config::GcsConfig;
|
||||
use crate::error::{self, Result};
|
||||
use crate::store::build_http_client;
|
||||
|
||||
pub(crate) async fn new_gcs_object_store(gcs_config: &GcsConfig) -> Result<ObjectStore> {
|
||||
let root = util::normalize_dir(&gcs_config.root);
|
||||
info!(
|
||||
"The gcs storage bucket is: {}, root is: {}",
|
||||
gcs_config.bucket, &root
|
||||
);
|
||||
|
||||
let client = build_http_client(&gcs_config.http_client);
|
||||
|
||||
let builder = Gcs::default()
|
||||
.root(&root)
|
||||
.bucket(&gcs_config.bucket)
|
||||
.scope(&gcs_config.scope)
|
||||
.credential_path(gcs_config.credential_path.expose_secret())
|
||||
.credential(gcs_config.credential.expose_secret())
|
||||
.endpoint(&gcs_config.endpoint)
|
||||
.http_client(client?);
|
||||
|
||||
Ok(ObjectStore::new(builder)
|
||||
.context(error::InitBackendSnafu)?
|
||||
.finish())
|
||||
}
|
||||
@@ -1,45 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_base::secrets::ExposeSecret;
|
||||
use common_telemetry::info;
|
||||
use object_store::services::Oss;
|
||||
use object_store::{util, ObjectStore};
|
||||
use snafu::prelude::*;
|
||||
|
||||
use crate::config::OssConfig;
|
||||
use crate::error::{self, Result};
|
||||
use crate::store::build_http_client;
|
||||
|
||||
pub(crate) async fn new_oss_object_store(oss_config: &OssConfig) -> Result<ObjectStore> {
|
||||
let root = util::normalize_dir(&oss_config.root);
|
||||
info!(
|
||||
"The oss storage bucket is: {}, root is: {}",
|
||||
oss_config.bucket, &root
|
||||
);
|
||||
|
||||
let client = build_http_client(&oss_config.http_client)?;
|
||||
|
||||
let builder = Oss::default()
|
||||
.root(&root)
|
||||
.bucket(&oss_config.bucket)
|
||||
.endpoint(&oss_config.endpoint)
|
||||
.access_key_id(oss_config.access_key_id.expose_secret())
|
||||
.access_key_secret(oss_config.access_key_secret.expose_secret())
|
||||
.http_client(client);
|
||||
|
||||
Ok(ObjectStore::new(builder)
|
||||
.context(error::InitBackendSnafu)?
|
||||
.finish())
|
||||
}
|
||||
@@ -1,55 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_base::secrets::ExposeSecret;
|
||||
use common_telemetry::info;
|
||||
use object_store::services::S3;
|
||||
use object_store::{util, ObjectStore};
|
||||
use snafu::prelude::*;
|
||||
|
||||
use crate::config::S3Config;
|
||||
use crate::error::{self, Result};
|
||||
use crate::store::build_http_client;
|
||||
|
||||
pub(crate) async fn new_s3_object_store(s3_config: &S3Config) -> Result<ObjectStore> {
|
||||
let root = util::normalize_dir(&s3_config.root);
|
||||
|
||||
info!(
|
||||
"The s3 storage bucket is: {}, root is: {}",
|
||||
s3_config.bucket, &root
|
||||
);
|
||||
|
||||
let client = build_http_client(&s3_config.http_client)?;
|
||||
|
||||
let mut builder = S3::default()
|
||||
.root(&root)
|
||||
.bucket(&s3_config.bucket)
|
||||
.access_key_id(s3_config.access_key_id.expose_secret())
|
||||
.secret_access_key(s3_config.secret_access_key.expose_secret())
|
||||
.http_client(client);
|
||||
|
||||
if s3_config.endpoint.is_some() {
|
||||
builder = builder.endpoint(s3_config.endpoint.as_ref().unwrap());
|
||||
}
|
||||
if s3_config.region.is_some() {
|
||||
builder = builder.region(s3_config.region.as_ref().unwrap());
|
||||
}
|
||||
if s3_config.enable_virtual_host_style {
|
||||
builder = builder.enable_virtual_host_style();
|
||||
}
|
||||
|
||||
Ok(ObjectStore::new(builder)
|
||||
.context(error::InitBackendSnafu)?
|
||||
.finish())
|
||||
}
|
||||
@@ -108,11 +108,15 @@ pub type MockRequestHandler =
|
||||
pub type MockSetReadonlyGracefullyHandler =
|
||||
Box<dyn Fn(RegionId) -> Result<SetRegionRoleStateResponse, Error> + Send + Sync>;
|
||||
|
||||
pub type MockGetMetadataHandler =
|
||||
Box<dyn Fn(RegionId) -> Result<RegionMetadataRef, Error> + Send + Sync>;
|
||||
|
||||
pub struct MockRegionEngine {
|
||||
sender: Sender<(RegionId, RegionRequest)>,
|
||||
pub(crate) handle_request_delay: Option<Duration>,
|
||||
pub(crate) handle_request_mock_fn: Option<MockRequestHandler>,
|
||||
pub(crate) handle_set_readonly_gracefully_mock_fn: Option<MockSetReadonlyGracefullyHandler>,
|
||||
pub(crate) handle_get_metadata_mock_fn: Option<MockGetMetadataHandler>,
|
||||
pub(crate) mock_role: Option<Option<RegionRole>>,
|
||||
engine: String,
|
||||
}
|
||||
@@ -127,6 +131,7 @@ impl MockRegionEngine {
|
||||
sender: tx,
|
||||
handle_request_mock_fn: None,
|
||||
handle_set_readonly_gracefully_mock_fn: None,
|
||||
handle_get_metadata_mock_fn: None,
|
||||
mock_role: None,
|
||||
engine: engine.to_string(),
|
||||
}),
|
||||
@@ -146,6 +151,27 @@ impl MockRegionEngine {
|
||||
sender: tx,
|
||||
handle_request_mock_fn: Some(mock_fn),
|
||||
handle_set_readonly_gracefully_mock_fn: None,
|
||||
handle_get_metadata_mock_fn: None,
|
||||
mock_role: None,
|
||||
engine: engine.to_string(),
|
||||
}),
|
||||
rx,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn with_metadata_mock_fn(
|
||||
engine: &str,
|
||||
mock_fn: MockGetMetadataHandler,
|
||||
) -> (Arc<Self>, Receiver<(RegionId, RegionRequest)>) {
|
||||
let (tx, rx) = tokio::sync::mpsc::channel(8);
|
||||
|
||||
(
|
||||
Arc::new(Self {
|
||||
handle_request_delay: None,
|
||||
sender: tx,
|
||||
handle_request_mock_fn: None,
|
||||
handle_set_readonly_gracefully_mock_fn: None,
|
||||
handle_get_metadata_mock_fn: Some(mock_fn),
|
||||
mock_role: None,
|
||||
engine: engine.to_string(),
|
||||
}),
|
||||
@@ -166,6 +192,7 @@ impl MockRegionEngine {
|
||||
sender: tx,
|
||||
handle_request_mock_fn: None,
|
||||
handle_set_readonly_gracefully_mock_fn: None,
|
||||
handle_get_metadata_mock_fn: None,
|
||||
mock_role: None,
|
||||
engine: engine.to_string(),
|
||||
};
|
||||
@@ -208,7 +235,11 @@ impl RegionEngine for MockRegionEngine {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn get_metadata(&self, _region_id: RegionId) -> Result<RegionMetadataRef, BoxedError> {
|
||||
async fn get_metadata(&self, region_id: RegionId) -> Result<RegionMetadataRef, BoxedError> {
|
||||
if let Some(mock_fn) = &self.handle_get_metadata_mock_fn {
|
||||
return mock_fn(region_id).map_err(BoxedError::new);
|
||||
};
|
||||
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
|
||||
@@ -26,7 +26,7 @@ use common_error::ext::BoxedError;
|
||||
use common_meta::cache::{LayeredCacheRegistryRef, TableFlownodeSetCacheRef, TableRouteCacheRef};
|
||||
use common_meta::ddl::ProcedureExecutorRef;
|
||||
use common_meta::key::flow::FlowMetadataManagerRef;
|
||||
use common_meta::key::TableMetadataManagerRef;
|
||||
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
use common_meta::node_manager::{Flownode, NodeManagerRef};
|
||||
use common_query::Output;
|
||||
@@ -37,6 +37,7 @@ use greptime_proto::v1::flow::{flow_server, FlowRequest, FlowResponse, InsertReq
|
||||
use itertools::Itertools;
|
||||
use operator::delete::Deleter;
|
||||
use operator::insert::Inserter;
|
||||
use operator::schema_helper::SchemaHelper;
|
||||
use operator::statement::StatementExecutor;
|
||||
use partition::manager::PartitionRuleManager;
|
||||
use query::{QueryEngine, QueryEngineFactory};
|
||||
@@ -546,8 +547,14 @@ impl FrontendInvoker {
|
||||
name: TABLE_FLOWNODE_SET_CACHE_NAME,
|
||||
})?;
|
||||
|
||||
let inserter = Arc::new(Inserter::new(
|
||||
let schema_helper = SchemaHelper::new(
|
||||
catalog_manager.clone(),
|
||||
Arc::new(TableMetadataManager::new(kv_backend.clone())),
|
||||
procedure_executor.clone(),
|
||||
layered_cache_registry.clone(),
|
||||
);
|
||||
let inserter = Arc::new(Inserter::new(
|
||||
schema_helper,
|
||||
partition_manager.clone(),
|
||||
node_manager.clone(),
|
||||
table_flownode_cache,
|
||||
@@ -588,7 +595,7 @@ impl FrontendInvoker {
|
||||
.start_timer();
|
||||
|
||||
self.inserter
|
||||
.handle_row_inserts(requests, ctx, &self.statement_executor, false, false)
|
||||
.handle_row_inserts(requests, ctx, false, false)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(common_frontend::error::ExternalSnafu)
|
||||
|
||||
@@ -49,6 +49,7 @@ log-query.workspace = true
|
||||
log-store.workspace = true
|
||||
meta-client.workspace = true
|
||||
num_cpus.workspace = true
|
||||
object-store.workspace = true
|
||||
opentelemetry-proto.workspace = true
|
||||
operator.workspace = true
|
||||
otel-arrow-rust.workspace = true
|
||||
|
||||
@@ -19,6 +19,7 @@ use common_config::config::Configurable;
|
||||
use common_options::datanode::DatanodeClientOptions;
|
||||
use common_telemetry::logging::{LoggingOptions, SlowQueryOptions, TracingOptions};
|
||||
use meta_client::MetaClientOptions;
|
||||
use object_store::config::ObjectStoreConfig;
|
||||
use query::options::QueryOptions;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use servers::export_metrics::{ExportMetricsOption, ExportMetricsTask};
|
||||
@@ -62,6 +63,7 @@ pub struct FrontendOptions {
|
||||
pub query: QueryOptions,
|
||||
pub max_in_flight_write_bytes: Option<ReadableSize>,
|
||||
pub slow_query: Option<SlowQueryOptions>,
|
||||
pub store: ObjectStoreConfig,
|
||||
}
|
||||
|
||||
impl Default for FrontendOptions {
|
||||
@@ -88,6 +90,7 @@ impl Default for FrontendOptions {
|
||||
query: QueryOptions::default(),
|
||||
max_in_flight_write_bytes: None,
|
||||
slow_query: Some(SlowQueryOptions::default()),
|
||||
store: ObjectStoreConfig::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -116,8 +119,7 @@ impl Frontend {
|
||||
if let Some(t) = self.export_metrics_task.as_ref() {
|
||||
if t.send_by_handler {
|
||||
let inserter = self.instance.inserter().clone();
|
||||
let statement_executor = self.instance.statement_executor().clone();
|
||||
let handler = ExportMetricHandler::new_handler(inserter, statement_executor);
|
||||
let handler = ExportMetricHandler::new_handler(inserter);
|
||||
t.start(Some(handler)).context(error::StartServerSnafu)?
|
||||
} else {
|
||||
t.start(None).context(error::StartServerSnafu)?;
|
||||
|
||||
@@ -39,6 +39,7 @@ use common_config::KvBackendConfig;
|
||||
use common_error::ext::{BoxedError, ErrorExt};
|
||||
use common_meta::key::TableMetadataManagerRef;
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
use common_meta::node_manager::NodeManagerRef;
|
||||
use common_meta::state_store::KvStateStore;
|
||||
use common_procedure::local::{LocalManager, ManagerConfig};
|
||||
use common_procedure::options::ProcedureConfig;
|
||||
@@ -49,7 +50,9 @@ use datafusion_expr::LogicalPlan;
|
||||
use log_store::raft_engine::RaftEngineBackend;
|
||||
use operator::delete::DeleterRef;
|
||||
use operator::insert::InserterRef;
|
||||
use operator::schema_helper::SchemaHelper;
|
||||
use operator::statement::{StatementExecutor, StatementExecutorRef};
|
||||
use partition::manager::PartitionRuleManagerRef;
|
||||
use pipeline::pipeline_operator::PipelineOperator;
|
||||
use prometheus::HistogramTimer;
|
||||
use promql_parser::label::Matcher;
|
||||
@@ -58,6 +61,7 @@ use query::parser::{PromQuery, QueryLanguageParser, QueryStatement};
|
||||
use query::query_engine::options::{validate_catalog_and_schema, QueryOptions};
|
||||
use query::query_engine::DescribeResult;
|
||||
use query::QueryEngineRef;
|
||||
use servers::access_layer::AccessLayerFactory;
|
||||
use servers::error as server_error;
|
||||
use servers::error::{AuthSnafu, ExecuteQuerySnafu, ParsePromQLSnafu};
|
||||
use servers::interceptor::{
|
||||
@@ -100,6 +104,7 @@ pub struct Instance {
|
||||
slow_query_recorder: Option<SlowQueryRecorder>,
|
||||
limiter: Option<LimiterRef>,
|
||||
process_manager: ProcessManagerRef,
|
||||
access_layer_factory: AccessLayerFactory,
|
||||
}
|
||||
|
||||
impl Instance {
|
||||
@@ -161,6 +166,27 @@ impl Instance {
|
||||
pub fn process_manager(&self) -> &ProcessManagerRef {
|
||||
&self.process_manager
|
||||
}
|
||||
|
||||
pub fn create_schema_helper(&self) -> SchemaHelper {
|
||||
SchemaHelper::new(
|
||||
self.catalog_manager.clone(),
|
||||
self.table_metadata_manager.clone(),
|
||||
self.statement_executor.procedure_executor().clone(),
|
||||
self.statement_executor.cache_invalidator().clone(),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn partition_manager(&self) -> &PartitionRuleManagerRef {
|
||||
self.inserter.partition_manager()
|
||||
}
|
||||
|
||||
pub fn node_manager(&self) -> &NodeManagerRef {
|
||||
self.inserter.node_manager()
|
||||
}
|
||||
|
||||
pub fn access_layer_factory(&self) -> &AccessLayerFactory {
|
||||
&self.access_layer_factory
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_stmt(sql: &str, dialect: &(dyn Dialect + Send + Sync)) -> Result<Vec<Statement>> {
|
||||
@@ -513,6 +539,8 @@ pub fn check_permission(
|
||||
| Statement::AlterDatabase(_)
|
||||
| Statement::DropFlow(_)
|
||||
| Statement::Use(_) => {}
|
||||
#[cfg(feature = "enterprise")]
|
||||
Statement::DropTrigger(_) => {}
|
||||
Statement::ShowCreateDatabase(stmt) => {
|
||||
validate_database(&stmt.database_name, query_ctx)?;
|
||||
}
|
||||
@@ -616,6 +644,8 @@ pub fn check_permission(
|
||||
Statement::FetchCursor(_) | Statement::CloseCursor(_) => {}
|
||||
// User can only kill process in their own catalog.
|
||||
Statement::Kill(_) => {}
|
||||
// SHOW PROCESSLIST
|
||||
Statement::ShowProcesslist(_) => {}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -30,12 +30,14 @@ use operator::flow::FlowServiceOperator;
|
||||
use operator::insert::Inserter;
|
||||
use operator::procedure::ProcedureServiceOperator;
|
||||
use operator::request::Requester;
|
||||
use operator::schema_helper::SchemaHelper;
|
||||
use operator::statement::{StatementExecutor, StatementExecutorRef};
|
||||
use operator::table::TableMutationOperator;
|
||||
use partition::manager::PartitionRuleManager;
|
||||
use pipeline::pipeline_operator::PipelineOperator;
|
||||
use query::region_query::RegionQueryHandlerFactoryRef;
|
||||
use query::QueryEngineFactory;
|
||||
use servers::access_layer::AccessLayerFactory;
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
@@ -130,8 +132,15 @@ impl FrontendBuilder {
|
||||
name: TABLE_FLOWNODE_SET_CACHE_NAME,
|
||||
})?;
|
||||
|
||||
let inserter = Arc::new(Inserter::new(
|
||||
let table_metadata_manager = Arc::new(TableMetadataManager::new(kv_backend.clone()));
|
||||
let schema_helper = SchemaHelper::new(
|
||||
self.catalog_manager.clone(),
|
||||
table_metadata_manager.clone(),
|
||||
self.procedure_executor.clone(),
|
||||
local_cache_invalidator.clone(),
|
||||
);
|
||||
let inserter = Arc::new(Inserter::new(
|
||||
schema_helper,
|
||||
partition_manager.clone(),
|
||||
node_manager.clone(),
|
||||
table_flownode_cache,
|
||||
@@ -176,7 +185,7 @@ impl FrontendBuilder {
|
||||
self.catalog_manager.clone(),
|
||||
query_engine.clone(),
|
||||
self.procedure_executor,
|
||||
kv_backend.clone(),
|
||||
kv_backend,
|
||||
local_cache_invalidator,
|
||||
inserter.clone(),
|
||||
table_route_cache,
|
||||
@@ -211,6 +220,7 @@ impl FrontendBuilder {
|
||||
Arc::new(Limiter::new(max_in_flight_write_bytes.as_bytes()))
|
||||
});
|
||||
|
||||
let access_layer_factory = AccessLayerFactory::new(&self.options.store).await.unwrap();
|
||||
Ok(Instance {
|
||||
catalog_manager: self.catalog_manager,
|
||||
pipeline_operator,
|
||||
@@ -219,10 +229,11 @@ impl FrontendBuilder {
|
||||
plugins,
|
||||
inserter,
|
||||
deleter,
|
||||
table_metadata_manager: Arc::new(TableMetadataManager::new(kv_backend)),
|
||||
table_metadata_manager,
|
||||
slow_query_recorder,
|
||||
limiter,
|
||||
process_manager,
|
||||
access_layer_factory,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -408,7 +408,7 @@ impl Instance {
|
||||
ctx: QueryContextRef,
|
||||
) -> Result<Output> {
|
||||
self.inserter
|
||||
.handle_column_inserts(requests, ctx, self.statement_executor.as_ref())
|
||||
.handle_column_inserts(requests, ctx)
|
||||
.await
|
||||
.context(TableOperationSnafu)
|
||||
}
|
||||
@@ -422,13 +422,7 @@ impl Instance {
|
||||
is_single_value: bool,
|
||||
) -> Result<Output> {
|
||||
self.inserter
|
||||
.handle_row_inserts(
|
||||
requests,
|
||||
ctx,
|
||||
self.statement_executor.as_ref(),
|
||||
accommodate_existing_schema,
|
||||
is_single_value,
|
||||
)
|
||||
.handle_row_inserts(requests, ctx, accommodate_existing_schema, is_single_value)
|
||||
.await
|
||||
.context(TableOperationSnafu)
|
||||
}
|
||||
@@ -441,10 +435,7 @@ impl Instance {
|
||||
) -> Result<Output> {
|
||||
self.inserter
|
||||
.handle_last_non_null_inserts(
|
||||
requests,
|
||||
ctx,
|
||||
self.statement_executor.as_ref(),
|
||||
true,
|
||||
requests, ctx, true,
|
||||
// Influx protocol may writes multiple fields (values).
|
||||
false,
|
||||
)
|
||||
@@ -460,7 +451,7 @@ impl Instance {
|
||||
physical_table: String,
|
||||
) -> Result<Output> {
|
||||
self.inserter
|
||||
.handle_metric_row_inserts(requests, ctx, &self.statement_executor, physical_table)
|
||||
.handle_metric_row_inserts(requests, ctx, physical_table)
|
||||
.await
|
||||
.context(TableOperationSnafu)
|
||||
}
|
||||
|
||||
@@ -135,7 +135,7 @@ impl Instance {
|
||||
};
|
||||
|
||||
self.inserter
|
||||
.handle_log_inserts(log, ctx, self.statement_executor.as_ref())
|
||||
.handle_log_inserts(log, ctx)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExecuteGrpcRequestSnafu)
|
||||
@@ -157,7 +157,7 @@ impl Instance {
|
||||
};
|
||||
|
||||
self.inserter
|
||||
.handle_trace_inserts(rows, ctx, self.statement_executor.as_ref())
|
||||
.handle_trace_inserts(rows, ctx)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExecuteGrpcRequestSnafu)
|
||||
|
||||
@@ -28,7 +28,6 @@ use common_query::Output;
|
||||
use common_recordbatch::RecordBatches;
|
||||
use common_telemetry::{debug, tracing};
|
||||
use operator::insert::InserterRef;
|
||||
use operator::statement::StatementExecutor;
|
||||
use prost::Message;
|
||||
use servers::error::{self, AuthSnafu, InFlightWriteBytesExceededSnafu, Result as ServerResult};
|
||||
use servers::http::header::{collect_plan_metrics, CONTENT_ENCODING_SNAPPY, CONTENT_TYPE_PROTOBUF};
|
||||
@@ -271,18 +270,11 @@ impl PromStoreProtocolHandler for Instance {
|
||||
/// so only implement `PromStoreProtocolHandler::write` method.
|
||||
pub struct ExportMetricHandler {
|
||||
inserter: InserterRef,
|
||||
statement_executor: Arc<StatementExecutor>,
|
||||
}
|
||||
|
||||
impl ExportMetricHandler {
|
||||
pub fn new_handler(
|
||||
inserter: InserterRef,
|
||||
statement_executor: Arc<StatementExecutor>,
|
||||
) -> PromStoreProtocolHandlerRef {
|
||||
Arc::new(Self {
|
||||
inserter,
|
||||
statement_executor,
|
||||
})
|
||||
pub fn new_handler(inserter: InserterRef) -> PromStoreProtocolHandlerRef {
|
||||
Arc::new(Self { inserter })
|
||||
}
|
||||
}
|
||||
|
||||
@@ -295,12 +287,7 @@ impl PromStoreProtocolHandler for ExportMetricHandler {
|
||||
_: bool,
|
||||
) -> ServerResult<Output> {
|
||||
self.inserter
|
||||
.handle_metric_row_inserts(
|
||||
request,
|
||||
ctx,
|
||||
&self.statement_executor,
|
||||
GREPTIME_PHYSICAL_TABLE.to_string(),
|
||||
)
|
||||
.handle_metric_row_inserts(request, ctx, GREPTIME_PHYSICAL_TABLE.to_string())
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::ExecuteGrpcQuerySnafu)
|
||||
|
||||
@@ -24,6 +24,7 @@ use servers::grpc::frontend_grpc_handler::FrontendGrpcHandler;
|
||||
use servers::grpc::greptime_handler::GreptimeRequestHandler;
|
||||
use servers::grpc::{GrpcOptions, GrpcServer};
|
||||
use servers::http::event::LogValidatorRef;
|
||||
use servers::http::prom_store::{PromBulkState, PromStoreState};
|
||||
use servers::http::{HttpServer, HttpServerBuilder};
|
||||
use servers::interceptor::LogIngestInterceptorRef;
|
||||
use servers::metrics_handler::MetricsHandler;
|
||||
@@ -95,13 +96,30 @@ where
|
||||
}
|
||||
|
||||
if opts.prom_store.enable {
|
||||
let bulk_state = if opts.prom_store.bulk_mode {
|
||||
let mut state = PromBulkState {
|
||||
schema_helper: self.instance.create_schema_helper(),
|
||||
partition_manager: self.instance.partition_manager().clone(),
|
||||
node_manager: self.instance.node_manager().clone(),
|
||||
access_layer_factory: self.instance.access_layer_factory().clone(),
|
||||
tx: None,
|
||||
};
|
||||
state.start_background_task();
|
||||
Some(state)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let state = PromStoreState {
|
||||
prom_store_handler: self.instance.clone(),
|
||||
pipeline_handler: Some(self.instance.clone()),
|
||||
prom_store_with_metric_engine: opts.prom_store.with_metric_engine,
|
||||
prom_validation_mode: opts.http.prom_validation_mode,
|
||||
bulk_state,
|
||||
};
|
||||
|
||||
builder = builder
|
||||
.with_prom_handler(
|
||||
self.instance.clone(),
|
||||
Some(self.instance.clone()),
|
||||
opts.prom_store.with_metric_engine,
|
||||
opts.http.prom_validation_mode,
|
||||
)
|
||||
.with_prom_handler(state)
|
||||
.with_prometheus_handler(self.instance.clone());
|
||||
}
|
||||
|
||||
|
||||
@@ -18,6 +18,7 @@ use serde::{Deserialize, Serialize};
|
||||
pub struct PromStoreOptions {
|
||||
pub enable: bool,
|
||||
pub with_metric_engine: bool,
|
||||
pub bulk_mode: bool,
|
||||
}
|
||||
|
||||
impl Default for PromStoreOptions {
|
||||
@@ -25,6 +26,7 @@ impl Default for PromStoreOptions {
|
||||
Self {
|
||||
enable: true,
|
||||
with_metric_engine: true,
|
||||
bulk_mode: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -37,6 +39,7 @@ mod tests {
|
||||
fn test_prom_store_options() {
|
||||
let default = PromStoreOptions::default();
|
||||
assert!(default.enable);
|
||||
assert!(default.with_metric_engine)
|
||||
assert!(default.with_metric_engine);
|
||||
assert!(!default.bulk_mode);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -233,7 +233,7 @@ impl SlowQueryEventHandler {
|
||||
.into();
|
||||
|
||||
self.inserter
|
||||
.handle_row_inserts(requests, query_ctx, &self.statement_executor, false, false)
|
||||
.handle_row_inserts(requests, query_ctx, false, false)
|
||||
.await
|
||||
.context(TableOperationSnafu)?;
|
||||
|
||||
|
||||
@@ -54,14 +54,6 @@ pub enum Error {
|
||||
peer_id: u64,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to lookup peer: {}", peer_id))]
|
||||
LookupPeer {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: common_meta::error::Error,
|
||||
peer_id: u64,
|
||||
},
|
||||
|
||||
#[snafu(display("Another migration procedure is running for region: {}", region_id))]
|
||||
MigrationRunning {
|
||||
#[snafu(implicit)]
|
||||
@@ -1033,7 +1025,6 @@ impl ErrorExt for Error {
|
||||
}
|
||||
|
||||
Error::Other { source, .. } => source.status_code(),
|
||||
Error::LookupPeer { source, .. } => source.status_code(),
|
||||
Error::NoEnoughAvailableNode { .. } => StatusCode::RuntimeResourcesExhausted,
|
||||
|
||||
#[cfg(feature = "pg_kvbackend")]
|
||||
|
||||
@@ -110,6 +110,14 @@ pub struct MetasrvOptions {
|
||||
pub use_memory_store: bool,
|
||||
/// Whether to enable region failover.
|
||||
pub enable_region_failover: bool,
|
||||
/// Delay before initializing region failure detectors.
|
||||
///
|
||||
/// This delay helps prevent premature initialization of region failure detectors in cases where
|
||||
/// cluster maintenance mode is enabled right after metasrv starts, especially when the cluster
|
||||
/// is not deployed via the recommended GreptimeDB Operator. Without this delay, early detector registration
|
||||
/// may trigger unnecessary region failovers during datanode startup.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub region_failure_detector_initialization_delay: Duration,
|
||||
/// Whether to allow region failover on local WAL.
|
||||
///
|
||||
/// If it's true, the region failover will be allowed even if the local WAL is used.
|
||||
@@ -219,6 +227,7 @@ impl Default for MetasrvOptions {
|
||||
selector: SelectorType::default(),
|
||||
use_memory_store: false,
|
||||
enable_region_failover: false,
|
||||
region_failure_detector_initialization_delay: Duration::from_secs(10 * 60),
|
||||
allow_region_failover_on_local_wal: false,
|
||||
grpc: GrpcOptions {
|
||||
bind_addr: format!("127.0.0.1:{}", DEFAULT_METASRV_ADDR_PORT),
|
||||
|
||||
@@ -64,7 +64,7 @@ use crate::procedure::wal_prune::manager::{WalPruneManager, WalPruneTicker};
|
||||
use crate::procedure::wal_prune::Context as WalPruneContext;
|
||||
use crate::region::supervisor::{
|
||||
HeartbeatAcceptor, RegionFailureDetectorControl, RegionSupervisor, RegionSupervisorSelector,
|
||||
RegionSupervisorTicker, DEFAULT_TICK_INTERVAL,
|
||||
RegionSupervisorTicker, DEFAULT_INITIALIZATION_RETRY_PERIOD, DEFAULT_TICK_INTERVAL,
|
||||
};
|
||||
use crate::selector::lease_based::LeaseBasedSelector;
|
||||
use crate::selector::round_robin::RoundRobinSelector;
|
||||
@@ -299,6 +299,8 @@ impl MetasrvBuilder {
|
||||
Arc::new(RegionFailureDetectorControl::new(tx.clone())) as _,
|
||||
Some(Arc::new(RegionSupervisorTicker::new(
|
||||
DEFAULT_TICK_INTERVAL,
|
||||
options.region_failure_detector_initialization_delay,
|
||||
DEFAULT_INITIALIZATION_RETRY_PERIOD,
|
||||
tx.clone(),
|
||||
))),
|
||||
)
|
||||
@@ -341,6 +343,7 @@ impl MetasrvBuilder {
|
||||
region_migration_manager.clone(),
|
||||
maintenance_mode_manager.clone(),
|
||||
peer_lookup_service.clone(),
|
||||
leader_cached_kv_backend.clone(),
|
||||
);
|
||||
|
||||
Some(RegionFailureHandler::new(
|
||||
@@ -353,30 +356,28 @@ impl MetasrvBuilder {
|
||||
|
||||
let leader_region_registry = Arc::new(LeaderRegionRegistry::default());
|
||||
|
||||
let ddl_context = DdlContext {
|
||||
node_manager,
|
||||
cache_invalidator: cache_invalidator.clone(),
|
||||
memory_region_keeper: memory_region_keeper.clone(),
|
||||
leader_region_registry: leader_region_registry.clone(),
|
||||
table_metadata_manager: table_metadata_manager.clone(),
|
||||
table_metadata_allocator: table_metadata_allocator.clone(),
|
||||
flow_metadata_manager: flow_metadata_manager.clone(),
|
||||
flow_metadata_allocator: flow_metadata_allocator.clone(),
|
||||
region_failure_detector_controller,
|
||||
};
|
||||
let procedure_manager_c = procedure_manager.clone();
|
||||
let ddl_manager = DdlManager::try_new(ddl_context, procedure_manager_c, true)
|
||||
.context(error::InitDdlManagerSnafu)?;
|
||||
#[cfg(feature = "enterprise")]
|
||||
let trigger_ddl_manager = plugins
|
||||
.as_ref()
|
||||
.and_then(|plugins| plugins.get::<common_meta::ddl_manager::TriggerDdlManagerRef>());
|
||||
let ddl_manager = Arc::new(
|
||||
DdlManager::try_new(
|
||||
DdlContext {
|
||||
node_manager,
|
||||
cache_invalidator: cache_invalidator.clone(),
|
||||
memory_region_keeper: memory_region_keeper.clone(),
|
||||
leader_region_registry: leader_region_registry.clone(),
|
||||
table_metadata_manager: table_metadata_manager.clone(),
|
||||
table_metadata_allocator: table_metadata_allocator.clone(),
|
||||
flow_metadata_manager: flow_metadata_manager.clone(),
|
||||
flow_metadata_allocator: flow_metadata_allocator.clone(),
|
||||
region_failure_detector_controller,
|
||||
},
|
||||
procedure_manager.clone(),
|
||||
true,
|
||||
#[cfg(feature = "enterprise")]
|
||||
trigger_ddl_manager,
|
||||
)
|
||||
.context(error::InitDdlManagerSnafu)?,
|
||||
);
|
||||
let ddl_manager = {
|
||||
let trigger_ddl_manager = plugins.as_ref().and_then(|plugins| {
|
||||
plugins.get::<common_meta::ddl_manager::TriggerDdlManagerRef>()
|
||||
});
|
||||
ddl_manager.with_trigger_ddl_manager(trigger_ddl_manager)
|
||||
};
|
||||
let ddl_manager = Arc::new(ddl_manager);
|
||||
|
||||
// remote WAL prune ticker and manager
|
||||
let wal_prune_ticker = if is_remote_wal && options.wal.enable_active_wal_pruning() {
|
||||
|
||||
@@ -23,7 +23,7 @@ use common_meta::key::table_route::TableRouteValue;
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::rpc::router::RegionRoute;
|
||||
use common_procedure::{watcher, ProcedureId, ProcedureManagerRef, ProcedureWithId};
|
||||
use common_telemetry::{error, info};
|
||||
use common_telemetry::{error, info, warn};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use store_api::storage::RegionId;
|
||||
use table::table_name::TableName;
|
||||
@@ -253,10 +253,12 @@ impl RegionMigrationManager {
|
||||
}
|
||||
|
||||
/// Throws an error if `leader_peer` is not the `from_peer`.
|
||||
///
|
||||
/// If `from_peer` is unknown, use the leader peer as the `from_peer`.
|
||||
fn verify_region_leader_peer(
|
||||
&self,
|
||||
region_route: &RegionRoute,
|
||||
task: &RegionMigrationProcedureTask,
|
||||
task: &mut RegionMigrationProcedureTask,
|
||||
) -> Result<()> {
|
||||
let leader_peer = region_route
|
||||
.leader_peer
|
||||
@@ -275,6 +277,15 @@ impl RegionMigrationManager {
|
||||
}
|
||||
);
|
||||
|
||||
if task.from_peer.addr.is_empty() {
|
||||
warn!(
|
||||
"The `from_peer` is unknown, use the leader peer({}) as the `from_peer`, region: {}",
|
||||
leader_peer, task.region_id
|
||||
);
|
||||
// The peer id is the same as the leader peer id.
|
||||
task.from_peer = leader_peer.clone();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -300,7 +311,7 @@ impl RegionMigrationManager {
|
||||
/// Submits a new region migration procedure.
|
||||
pub async fn submit_procedure(
|
||||
&self,
|
||||
task: RegionMigrationProcedureTask,
|
||||
mut task: RegionMigrationProcedureTask,
|
||||
) -> Result<Option<ProcedureId>> {
|
||||
let Some(guard) = self.insert_running_procedure(&task) else {
|
||||
return error::MigrationRunningSnafu {
|
||||
@@ -333,7 +344,7 @@ impl RegionMigrationManager {
|
||||
.fail();
|
||||
}
|
||||
|
||||
self.verify_region_leader_peer(®ion_route, &task)?;
|
||||
self.verify_region_leader_peer(®ion_route, &mut task)?;
|
||||
self.verify_region_follower_peers(®ion_route, &task)?;
|
||||
let table_info = self.retrieve_table_info(region_id).await?;
|
||||
let TableName {
|
||||
@@ -341,12 +352,6 @@ impl RegionMigrationManager {
|
||||
schema_name,
|
||||
..
|
||||
} = table_info.table_name();
|
||||
METRIC_META_REGION_MIGRATION_DATANODES
|
||||
.with_label_values(&["src", &task.from_peer.id.to_string()])
|
||||
.inc();
|
||||
METRIC_META_REGION_MIGRATION_DATANODES
|
||||
.with_label_values(&["desc", &task.to_peer.id.to_string()])
|
||||
.inc();
|
||||
let RegionMigrationProcedureTask {
|
||||
region_id,
|
||||
from_peer,
|
||||
@@ -377,6 +382,12 @@ impl RegionMigrationManager {
|
||||
return;
|
||||
}
|
||||
};
|
||||
METRIC_META_REGION_MIGRATION_DATANODES
|
||||
.with_label_values(&["src", &task.from_peer.id.to_string()])
|
||||
.inc();
|
||||
METRIC_META_REGION_MIGRATION_DATANODES
|
||||
.with_label_values(&["desc", &task.to_peer.id.to_string()])
|
||||
.inc();
|
||||
|
||||
if let Err(e) = watcher::wait(watcher).await {
|
||||
error!(e; "Failed to wait region migration procedure {procedure_id} for {task}");
|
||||
|
||||
@@ -103,6 +103,7 @@ pub mod mock {
|
||||
}),
|
||||
affected_rows: 0,
|
||||
extensions: Default::default(),
|
||||
metadata: Vec::new(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,23 +15,30 @@
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::fmt::Debug;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::Duration;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_meta::datanode::Stat;
|
||||
use common_meta::ddl::{DetectingRegion, RegionFailureDetectorController};
|
||||
use common_meta::key::maintenance::MaintenanceModeManagerRef;
|
||||
use common_meta::key::table_route::{TableRouteKey, TableRouteValue};
|
||||
use common_meta::key::{MetadataKey, MetadataValue};
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
use common_meta::leadership_notifier::LeadershipChangeListener;
|
||||
use common_meta::peer::{Peer, PeerLookupServiceRef};
|
||||
use common_meta::range_stream::{PaginationStream, DEFAULT_PAGE_SIZE};
|
||||
use common_meta::rpc::store::RangeRequest;
|
||||
use common_meta::DatanodeId;
|
||||
use common_runtime::JoinHandle;
|
||||
use common_telemetry::{debug, error, info, warn};
|
||||
use common_time::util::current_time_millis;
|
||||
use error::Error::{LeaderPeerChanged, MigrationRunning, RegionMigrated, TableRouteNotFound};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use futures::{StreamExt, TryStreamExt};
|
||||
use snafu::{ensure, ResultExt};
|
||||
use store_api::storage::RegionId;
|
||||
use tokio::sync::mpsc::{Receiver, Sender};
|
||||
use tokio::time::{interval, MissedTickBehavior};
|
||||
use tokio::sync::oneshot;
|
||||
use tokio::time::{interval, interval_at, MissedTickBehavior};
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::failure_detector::PhiAccrualFailureDetectorOptions;
|
||||
@@ -70,6 +77,9 @@ impl From<&Stat> for DatanodeHeartbeat {
|
||||
///
|
||||
/// Variants:
|
||||
/// - `Tick`: This event is used to trigger region failure detection periodically.
|
||||
/// - `InitializeAllRegions`: This event is used to initialize all region failure detectors.
|
||||
/// - `RegisterFailureDetectors`: This event is used to register failure detectors for regions.
|
||||
/// - `DeregisterFailureDetectors`: This event is used to deregister failure detectors for regions.
|
||||
/// - `HeartbeatArrived`: This event presents the metasrv received [`DatanodeHeartbeat`] from the datanodes.
|
||||
/// - `Clear`: This event is used to reset the state of the supervisor, typically used
|
||||
/// when a system-wide reset or reinitialization is needed.
|
||||
@@ -78,6 +88,7 @@ impl From<&Stat> for DatanodeHeartbeat {
|
||||
/// of the supervisor during tests.
|
||||
pub(crate) enum Event {
|
||||
Tick,
|
||||
InitializeAllRegions(tokio::sync::oneshot::Sender<()>),
|
||||
RegisterFailureDetectors(Vec<DetectingRegion>),
|
||||
DeregisterFailureDetectors(Vec<DetectingRegion>),
|
||||
HeartbeatArrived(DatanodeHeartbeat),
|
||||
@@ -102,6 +113,7 @@ impl Debug for Event {
|
||||
Self::Tick => write!(f, "Tick"),
|
||||
Self::HeartbeatArrived(arg0) => f.debug_tuple("HeartbeatArrived").field(arg0).finish(),
|
||||
Self::Clear => write!(f, "Clear"),
|
||||
Self::InitializeAllRegions(_) => write!(f, "InspectAndRegisterRegions"),
|
||||
Self::RegisterFailureDetectors(arg0) => f
|
||||
.debug_tuple("RegisterFailureDetectors")
|
||||
.field(arg0)
|
||||
@@ -127,6 +139,12 @@ pub struct RegionSupervisorTicker {
|
||||
/// The interval of tick.
|
||||
tick_interval: Duration,
|
||||
|
||||
/// The delay before initializing all region failure detectors.
|
||||
initialization_delay: Duration,
|
||||
|
||||
/// The retry period for initializing all region failure detectors.
|
||||
initialization_retry_period: Duration,
|
||||
|
||||
/// Sends [Event]s.
|
||||
sender: Sender<Event>,
|
||||
}
|
||||
@@ -149,10 +167,21 @@ impl LeadershipChangeListener for RegionSupervisorTicker {
|
||||
}
|
||||
|
||||
impl RegionSupervisorTicker {
|
||||
pub(crate) fn new(tick_interval: Duration, sender: Sender<Event>) -> Self {
|
||||
pub(crate) fn new(
|
||||
tick_interval: Duration,
|
||||
initialization_delay: Duration,
|
||||
initialization_retry_period: Duration,
|
||||
sender: Sender<Event>,
|
||||
) -> Self {
|
||||
info!(
|
||||
"RegionSupervisorTicker is created, tick_interval: {:?}, initialization_delay: {:?}, initialization_retry_period: {:?}",
|
||||
tick_interval, initialization_delay, initialization_retry_period
|
||||
);
|
||||
Self {
|
||||
tick_handle: Mutex::new(None),
|
||||
tick_interval,
|
||||
initialization_delay,
|
||||
initialization_retry_period,
|
||||
sender,
|
||||
}
|
||||
}
|
||||
@@ -163,15 +192,39 @@ impl RegionSupervisorTicker {
|
||||
if handle.is_none() {
|
||||
let sender = self.sender.clone();
|
||||
let tick_interval = self.tick_interval;
|
||||
let initialization_delay = self.initialization_delay;
|
||||
|
||||
let mut initialization_interval = interval_at(
|
||||
tokio::time::Instant::now() + initialization_delay,
|
||||
self.initialization_retry_period,
|
||||
);
|
||||
initialization_interval.set_missed_tick_behavior(MissedTickBehavior::Skip);
|
||||
common_runtime::spawn_global(async move {
|
||||
loop {
|
||||
initialization_interval.tick().await;
|
||||
let (tx, rx) = oneshot::channel();
|
||||
if sender.send(Event::InitializeAllRegions(tx)).await.is_err() {
|
||||
info!("EventReceiver is dropped, region failure detectors initialization loop is stopped");
|
||||
break;
|
||||
}
|
||||
if rx.await.is_ok() {
|
||||
info!("All region failure detectors are initialized.");
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let sender = self.sender.clone();
|
||||
let ticker_loop = tokio::spawn(async move {
|
||||
let mut interval = interval(tick_interval);
|
||||
interval.set_missed_tick_behavior(MissedTickBehavior::Skip);
|
||||
let mut tick_interval = interval(tick_interval);
|
||||
tick_interval.set_missed_tick_behavior(MissedTickBehavior::Skip);
|
||||
|
||||
if let Err(err) = sender.send(Event::Clear).await {
|
||||
warn!(err; "EventReceiver is dropped, failed to send Event::Clear");
|
||||
return;
|
||||
}
|
||||
loop {
|
||||
interval.tick().await;
|
||||
tick_interval.tick().await;
|
||||
if sender.send(Event::Tick).await.is_err() {
|
||||
info!("EventReceiver is dropped, tick loop is stopped");
|
||||
break;
|
||||
@@ -202,6 +255,8 @@ pub type RegionSupervisorRef = Arc<RegionSupervisor>;
|
||||
|
||||
/// The default tick interval.
|
||||
pub const DEFAULT_TICK_INTERVAL: Duration = Duration::from_secs(1);
|
||||
/// The default initialization retry period.
|
||||
pub const DEFAULT_INITIALIZATION_RETRY_PERIOD: Duration = Duration::from_secs(60);
|
||||
|
||||
/// Selector for region supervisor.
|
||||
pub enum RegionSupervisorSelector {
|
||||
@@ -228,6 +283,8 @@ pub struct RegionSupervisor {
|
||||
maintenance_mode_manager: MaintenanceModeManagerRef,
|
||||
/// Peer lookup service
|
||||
peer_lookup: PeerLookupServiceRef,
|
||||
/// The kv backend.
|
||||
kv_backend: KvBackendRef,
|
||||
}
|
||||
|
||||
/// Controller for managing failure detectors for regions.
|
||||
@@ -290,6 +347,7 @@ impl RegionSupervisor {
|
||||
tokio::sync::mpsc::channel(1024)
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(crate) fn new(
|
||||
event_receiver: Receiver<Event>,
|
||||
options: PhiAccrualFailureDetectorOptions,
|
||||
@@ -298,6 +356,7 @@ impl RegionSupervisor {
|
||||
region_migration_manager: RegionMigrationManagerRef,
|
||||
maintenance_mode_manager: MaintenanceModeManagerRef,
|
||||
peer_lookup: PeerLookupServiceRef,
|
||||
kv_backend: KvBackendRef,
|
||||
) -> Self {
|
||||
Self {
|
||||
failure_detector: RegionFailureDetector::new(options),
|
||||
@@ -308,6 +367,7 @@ impl RegionSupervisor {
|
||||
region_migration_manager,
|
||||
maintenance_mode_manager,
|
||||
peer_lookup,
|
||||
kv_backend,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -315,6 +375,26 @@ impl RegionSupervisor {
|
||||
pub(crate) async fn run(&mut self) {
|
||||
while let Some(event) = self.receiver.recv().await {
|
||||
match event {
|
||||
Event::InitializeAllRegions(sender) => {
|
||||
match self.is_maintenance_mode_enabled().await {
|
||||
Ok(false) => {}
|
||||
Ok(true) => {
|
||||
warn!("Skipping initialize all regions since maintenance mode is enabled.");
|
||||
continue;
|
||||
}
|
||||
Err(err) => {
|
||||
error!(err; "Failed to check maintenance mode during initialize all regions.");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if let Err(err) = self.initialize_all().await {
|
||||
error!(err; "Failed to initialize all regions.");
|
||||
} else {
|
||||
// Ignore the error.
|
||||
let _ = sender.send(());
|
||||
}
|
||||
}
|
||||
Event::Tick => {
|
||||
let regions = self.detect_region_failure();
|
||||
self.handle_region_failures(regions).await;
|
||||
@@ -336,6 +416,59 @@ impl RegionSupervisor {
|
||||
info!("RegionSupervisor is stopped!");
|
||||
}
|
||||
|
||||
async fn initialize_all(&self) -> Result<()> {
|
||||
let now = Instant::now();
|
||||
let regions = self.regions();
|
||||
let req = RangeRequest::new().with_prefix(TableRouteKey::range_prefix());
|
||||
let stream = PaginationStream::new(self.kv_backend.clone(), req, DEFAULT_PAGE_SIZE, |kv| {
|
||||
TableRouteKey::from_bytes(&kv.key).map(|v| (v.table_id, kv.value))
|
||||
})
|
||||
.into_stream();
|
||||
|
||||
let mut stream = stream
|
||||
.map_ok(|(_, value)| {
|
||||
TableRouteValue::try_from_raw_value(&value)
|
||||
.context(error::TableMetadataManagerSnafu)
|
||||
})
|
||||
.boxed();
|
||||
let mut detecting_regions = Vec::new();
|
||||
while let Some(route) = stream
|
||||
.try_next()
|
||||
.await
|
||||
.context(error::TableMetadataManagerSnafu)?
|
||||
{
|
||||
let route = route?;
|
||||
if !route.is_physical() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let physical_table_route = route.into_physical_table_route();
|
||||
physical_table_route
|
||||
.region_routes
|
||||
.iter()
|
||||
.for_each(|region_route| {
|
||||
if !regions.contains(®ion_route.region.id) {
|
||||
if let Some(leader_peer) = ®ion_route.leader_peer {
|
||||
detecting_regions.push((leader_peer.id, region_route.region.id));
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
let num_detecting_regions = detecting_regions.len();
|
||||
if !detecting_regions.is_empty() {
|
||||
self.register_failure_detectors(detecting_regions).await;
|
||||
}
|
||||
|
||||
info!(
|
||||
"Initialize {} region failure detectors, elapsed: {:?}",
|
||||
num_detecting_regions,
|
||||
now.elapsed()
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn register_failure_detectors(&self, detecting_regions: Vec<DetectingRegion>) {
|
||||
let ts_millis = current_time_millis();
|
||||
for region in detecting_regions {
|
||||
@@ -497,12 +630,10 @@ impl RegionSupervisor {
|
||||
.peer_lookup
|
||||
.datanode(from_peer_id)
|
||||
.await
|
||||
.context(error::LookupPeerSnafu {
|
||||
peer_id: from_peer_id,
|
||||
})?
|
||||
.context(error::PeerUnavailableSnafu {
|
||||
peer_id: from_peer_id,
|
||||
})?;
|
||||
.ok()
|
||||
.flatten()
|
||||
.unwrap_or_else(|| Peer::empty(from_peer_id));
|
||||
|
||||
let region_peers = self
|
||||
.select_peers(from_peer_id, regions, failed_datanodes)
|
||||
.await?;
|
||||
@@ -599,6 +730,14 @@ impl RegionSupervisor {
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
/// Returns all regions that registered in the failure detector.
|
||||
fn regions(&self) -> HashSet<RegionId> {
|
||||
self.failure_detector
|
||||
.iter()
|
||||
.map(|e| e.region_ident().1)
|
||||
.collect::<HashSet<_>>()
|
||||
}
|
||||
|
||||
/// Updates the state of corresponding failure detectors.
|
||||
fn on_heartbeat_arrived(&self, heartbeat: DatanodeHeartbeat) {
|
||||
for region_id in heartbeat.regions {
|
||||
@@ -618,13 +757,22 @@ impl RegionSupervisor {
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests {
|
||||
use std::assert_matches::assert_matches;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::Duration;
|
||||
|
||||
use common_meta::ddl::test_util::{
|
||||
test_create_logical_table_task, test_create_physical_table_task,
|
||||
};
|
||||
use common_meta::ddl::RegionFailureDetectorController;
|
||||
use common_meta::key::maintenance;
|
||||
use common_meta::key::table_route::{
|
||||
LogicalTableRouteValue, PhysicalTableRouteValue, TableRouteValue,
|
||||
};
|
||||
use common_meta::key::{maintenance, TableMetadataManager};
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::rpc::router::{Region, RegionRoute};
|
||||
use common_meta::test_util::NoopPeerLookupService;
|
||||
use common_telemetry::info;
|
||||
use common_time::util::current_time_millis;
|
||||
use rand::Rng;
|
||||
use store_api::storage::RegionId;
|
||||
@@ -654,6 +802,7 @@ pub(crate) mod tests {
|
||||
Arc::new(maintenance::MaintenanceModeManager::new(env.kv_backend()));
|
||||
let peer_lookup = Arc::new(NoopPeerLookupService);
|
||||
let (tx, rx) = RegionSupervisor::channel();
|
||||
let kv_backend = env.kv_backend();
|
||||
|
||||
(
|
||||
RegionSupervisor::new(
|
||||
@@ -664,6 +813,7 @@ pub(crate) mod tests {
|
||||
region_migration_manager,
|
||||
maintenance_mode_manager,
|
||||
peer_lookup,
|
||||
kv_backend,
|
||||
),
|
||||
tx,
|
||||
)
|
||||
@@ -748,6 +898,8 @@ pub(crate) mod tests {
|
||||
let ticker = RegionSupervisorTicker {
|
||||
tick_handle: Mutex::new(None),
|
||||
tick_interval: Duration::from_millis(10),
|
||||
initialization_delay: Duration::from_millis(100),
|
||||
initialization_retry_period: Duration::from_millis(100),
|
||||
sender: tx,
|
||||
};
|
||||
// It's ok if we start the ticker again.
|
||||
@@ -757,11 +909,116 @@ pub(crate) mod tests {
|
||||
ticker.stop();
|
||||
assert!(!rx.is_empty());
|
||||
while let Ok(event) = rx.try_recv() {
|
||||
assert_matches!(event, Event::Tick | Event::Clear);
|
||||
assert_matches!(
|
||||
event,
|
||||
Event::Tick | Event::Clear | Event::InitializeAllRegions(_)
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_initialize_all_regions_event_handling() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let (tx, mut rx) = tokio::sync::mpsc::channel(128);
|
||||
let ticker = RegionSupervisorTicker {
|
||||
tick_handle: Mutex::new(None),
|
||||
tick_interval: Duration::from_millis(1000),
|
||||
initialization_delay: Duration::from_millis(50),
|
||||
initialization_retry_period: Duration::from_millis(50),
|
||||
sender: tx,
|
||||
};
|
||||
ticker.start();
|
||||
sleep(Duration::from_millis(60)).await;
|
||||
let handle = tokio::spawn(async move {
|
||||
let mut counter = 0;
|
||||
while let Some(event) = rx.recv().await {
|
||||
if let Event::InitializeAllRegions(tx) = event {
|
||||
if counter == 0 {
|
||||
// Ignore the first event
|
||||
counter += 1;
|
||||
continue;
|
||||
}
|
||||
tx.send(()).unwrap();
|
||||
info!("Responded initialize all regions event");
|
||||
break;
|
||||
}
|
||||
}
|
||||
rx
|
||||
});
|
||||
|
||||
let rx = handle.await.unwrap();
|
||||
for _ in 0..3 {
|
||||
sleep(Duration::from_millis(100)).await;
|
||||
assert!(rx.is_empty());
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_initialize_all_regions() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let (mut supervisor, sender) = new_test_supervisor();
|
||||
let table_metadata_manager = TableMetadataManager::new(supervisor.kv_backend.clone());
|
||||
|
||||
// Create a physical table metadata
|
||||
let table_id = 1024;
|
||||
let mut create_physical_table_task = test_create_physical_table_task("my_physical_table");
|
||||
create_physical_table_task.set_table_id(table_id);
|
||||
let table_info = create_physical_table_task.table_info;
|
||||
let table_route = PhysicalTableRouteValue::new(vec![RegionRoute {
|
||||
region: Region {
|
||||
id: RegionId::new(table_id, 0),
|
||||
..Default::default()
|
||||
},
|
||||
leader_peer: Some(Peer::empty(1)),
|
||||
..Default::default()
|
||||
}]);
|
||||
let table_route_value = TableRouteValue::Physical(table_route);
|
||||
table_metadata_manager
|
||||
.create_table_metadata(table_info, table_route_value, HashMap::new())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Create a logical table metadata
|
||||
let logical_table_id = 1025;
|
||||
let mut test_create_logical_table_task = test_create_logical_table_task("my_logical_table");
|
||||
test_create_logical_table_task.set_table_id(logical_table_id);
|
||||
let table_info = test_create_logical_table_task.table_info;
|
||||
let table_route = LogicalTableRouteValue::new(1024, vec![RegionId::new(1025, 0)]);
|
||||
let table_route_value = TableRouteValue::Logical(table_route);
|
||||
table_metadata_manager
|
||||
.create_table_metadata(table_info, table_route_value, HashMap::new())
|
||||
.await
|
||||
.unwrap();
|
||||
tokio::spawn(async move { supervisor.run().await });
|
||||
let (tx, rx) = oneshot::channel();
|
||||
sender.send(Event::InitializeAllRegions(tx)).await.unwrap();
|
||||
assert!(rx.await.is_ok());
|
||||
|
||||
let (tx, rx) = oneshot::channel();
|
||||
sender.send(Event::Dump(tx)).await.unwrap();
|
||||
let detector = rx.await.unwrap();
|
||||
assert_eq!(detector.len(), 1);
|
||||
assert!(detector.contains(&(1, RegionId::new(1024, 0))));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_initialize_all_regions_with_maintenance_mode() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let (mut supervisor, sender) = new_test_supervisor();
|
||||
|
||||
supervisor
|
||||
.maintenance_mode_manager
|
||||
.set_maintenance_mode()
|
||||
.await
|
||||
.unwrap();
|
||||
tokio::spawn(async move { supervisor.run().await });
|
||||
let (tx, rx) = oneshot::channel();
|
||||
sender.send(Event::InitializeAllRegions(tx)).await.unwrap();
|
||||
// The sender is dropped, so the receiver will receive an error.
|
||||
assert!(rx.await.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_region_failure_detector_controller() {
|
||||
let (mut supervisor, sender) = new_test_supervisor();
|
||||
|
||||
@@ -8,6 +8,7 @@ license.workspace = true
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
ahash.workspace = true
|
||||
api.workspace = true
|
||||
aquamarine.workspace = true
|
||||
async-stream.workspace = true
|
||||
|
||||
@@ -158,6 +158,7 @@ impl RegionEngine for MetricEngine {
|
||||
Ok(RegionResponse {
|
||||
affected_rows: rows,
|
||||
extensions: extension_return_value,
|
||||
metadata: Vec::new(),
|
||||
})
|
||||
}
|
||||
BatchRegionDdlRequest::Alter(requests) => {
|
||||
@@ -171,6 +172,7 @@ impl RegionEngine for MetricEngine {
|
||||
Ok(RegionResponse {
|
||||
affected_rows: rows,
|
||||
extensions: extension_return_value,
|
||||
metadata: Vec::new(),
|
||||
})
|
||||
}
|
||||
BatchRegionDdlRequest::Drop(requests) => {
|
||||
@@ -243,6 +245,7 @@ impl RegionEngine for MetricEngine {
|
||||
result.map_err(BoxedError::new).map(|rows| RegionResponse {
|
||||
affected_rows: rows,
|
||||
extensions: extension_return_value,
|
||||
metadata: Vec::new(),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -439,6 +442,7 @@ impl MetricEngine {
|
||||
Ok(RegionResponse {
|
||||
affected_rows,
|
||||
extensions,
|
||||
metadata: Vec::new(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -147,7 +147,7 @@ impl MetricEngineInner {
|
||||
fn modify_rows(
|
||||
&self,
|
||||
physical_region_id: RegionId,
|
||||
table_id: TableId,
|
||||
logical_table_id: TableId,
|
||||
rows: &mut Rows,
|
||||
encoding: PrimaryKeyEncoding,
|
||||
) -> Result<()> {
|
||||
@@ -163,7 +163,9 @@ impl MetricEngineInner {
|
||||
.physical_columns();
|
||||
RowsIter::new(input, name_to_id)
|
||||
};
|
||||
let output = self.row_modifier.modify_rows(iter, table_id, encoding)?;
|
||||
let output = self
|
||||
.row_modifier
|
||||
.modify_rows(iter, logical_table_id, encoding)?;
|
||||
*rows = output;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -40,7 +40,7 @@ const TSID_HASH_SEED: u32 = 846793005;
|
||||
///
|
||||
/// - For [`PrimaryKeyEncoding::Dense`] encoding,
|
||||
/// it adds two columns(`__table_id`, `__tsid`) to the row.
|
||||
pub(crate) struct RowModifier {
|
||||
pub struct RowModifier {
|
||||
codec: SparsePrimaryKeyCodec,
|
||||
}
|
||||
|
||||
@@ -52,7 +52,7 @@ impl RowModifier {
|
||||
}
|
||||
|
||||
/// Modify rows with the given primary key encoding.
|
||||
pub(crate) fn modify_rows(
|
||||
pub fn modify_rows(
|
||||
&self,
|
||||
iter: RowsIter,
|
||||
table_id: TableId,
|
||||
@@ -74,7 +74,7 @@ impl RowModifier {
|
||||
|
||||
let mut buffer = vec![];
|
||||
for mut iter in iter.iter_mut() {
|
||||
let (table_id, tsid) = self.fill_internal_columns(table_id, &iter);
|
||||
let (table_id, tsid) = Self::fill_internal_columns(table_id, &iter);
|
||||
let mut values = Vec::with_capacity(num_output_column);
|
||||
buffer.clear();
|
||||
let internal_columns = [
|
||||
@@ -135,7 +135,7 @@ impl RowModifier {
|
||||
options: None,
|
||||
});
|
||||
for iter in iter.iter_mut() {
|
||||
let (table_id, tsid) = self.fill_internal_columns(table_id, &iter);
|
||||
let (table_id, tsid) = Self::fill_internal_columns(table_id, &iter);
|
||||
iter.row.values.push(table_id);
|
||||
iter.row.values.push(tsid);
|
||||
}
|
||||
@@ -144,7 +144,7 @@ impl RowModifier {
|
||||
}
|
||||
|
||||
/// Fills internal columns of a row with table name and a hash of tag values.
|
||||
fn fill_internal_columns(&self, table_id: TableId, iter: &RowIter<'_>) -> (Value, Value) {
|
||||
pub fn fill_internal_columns(table_id: TableId, iter: &RowIter<'_>) -> (Value, Value) {
|
||||
let mut hasher = TsidGenerator::default();
|
||||
for (name, value) in iter.primary_keys_with_name() {
|
||||
// The type is checked before. So only null is ignored.
|
||||
@@ -264,7 +264,7 @@ impl IterIndex {
|
||||
}
|
||||
|
||||
/// Iterator of rows.
|
||||
pub(crate) struct RowsIter {
|
||||
pub struct RowsIter {
|
||||
rows: Rows,
|
||||
index: IterIndex,
|
||||
}
|
||||
@@ -276,7 +276,7 @@ impl RowsIter {
|
||||
}
|
||||
|
||||
/// Returns the iterator of rows.
|
||||
fn iter_mut(&mut self) -> impl Iterator<Item = RowIter> {
|
||||
pub fn iter_mut(&mut self) -> impl Iterator<Item = RowIter> {
|
||||
self.rows.rows.iter_mut().map(|row| RowIter {
|
||||
row,
|
||||
index: &self.index,
|
||||
@@ -290,10 +290,22 @@ impl RowsIter {
|
||||
.iter()
|
||||
.map(|idx| std::mem::take(&mut self.rows.schema[idx.index]))
|
||||
}
|
||||
|
||||
pub fn num_rows(&self) -> usize {
|
||||
self.rows.rows.len()
|
||||
}
|
||||
|
||||
pub fn num_columns(&self) -> usize {
|
||||
self.rows.schema.len()
|
||||
}
|
||||
|
||||
pub fn num_primary_keys(&self) -> usize {
|
||||
self.index.num_primary_key_column
|
||||
}
|
||||
}
|
||||
|
||||
/// Iterator of a row.
|
||||
struct RowIter<'a> {
|
||||
pub struct RowIter<'a> {
|
||||
row: &'a mut Row,
|
||||
index: &'a IterIndex,
|
||||
schema: &'a Vec<ColumnSchema>,
|
||||
@@ -313,7 +325,7 @@ impl RowIter<'_> {
|
||||
}
|
||||
|
||||
/// Returns the primary keys.
|
||||
fn primary_keys(&self) -> impl Iterator<Item = (ColumnId, ValueRef)> {
|
||||
pub fn primary_keys(&self) -> impl Iterator<Item = (ColumnId, ValueRef)> {
|
||||
self.index.indices[..self.index.num_primary_key_column]
|
||||
.iter()
|
||||
.map(|idx| {
|
||||
@@ -333,6 +345,13 @@ impl RowIter<'_> {
|
||||
.iter()
|
||||
.map(|idx| std::mem::take(&mut self.row.values[idx.index]))
|
||||
}
|
||||
|
||||
/// Returns value at given offset.
|
||||
/// # Panics
|
||||
/// Panics if offset out-of-bound
|
||||
pub fn value_at(&self, idx: usize) -> &Value {
|
||||
&self.row.values[idx]
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -476,7 +495,6 @@ mod tests {
|
||||
#[test]
|
||||
fn test_fill_internal_columns() {
|
||||
let name_to_column_id = test_name_to_column_id();
|
||||
let encoder = RowModifier::new();
|
||||
let table_id = 1025;
|
||||
let schema = test_schema();
|
||||
let row = test_row("greptimedb", "127.0.0.1");
|
||||
@@ -486,7 +504,7 @@ mod tests {
|
||||
};
|
||||
let mut rows_iter = RowsIter::new(rows, &name_to_column_id);
|
||||
let row_iter = rows_iter.iter_mut().next().unwrap();
|
||||
let (encoded_table_id, tsid) = encoder.fill_internal_columns(table_id, &row_iter);
|
||||
let (encoded_table_id, tsid) = RowModifier::fill_internal_columns(table_id, &row_iter);
|
||||
assert_eq!(encoded_table_id, ValueData::U32Value(1025).into());
|
||||
assert_eq!(tsid, ValueData::U64Value(9442261431637846000).into());
|
||||
|
||||
@@ -514,7 +532,7 @@ mod tests {
|
||||
};
|
||||
let mut rows_iter = RowsIter::new(rows, &name_to_column_id);
|
||||
let row_iter = rows_iter.iter_mut().next().unwrap();
|
||||
let (encoded_table_id, tsid) = encoder.fill_internal_columns(table_id, &row_iter);
|
||||
let (encoded_table_id, tsid) = RowModifier::fill_internal_columns(table_id, &row_iter);
|
||||
assert_eq!(encoded_table_id, ValueData::U32Value(1025).into());
|
||||
assert_eq!(tsid, ValueData::U64Value(9442261431637846000).into());
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ use std::sync::Arc;
|
||||
|
||||
use object_store::services::Fs;
|
||||
use object_store::util::{join_dir, with_instrument_layers};
|
||||
use object_store::{ErrorKind, ObjectStore};
|
||||
use object_store::{ErrorKind, ObjectStore, ATOMIC_WRITE_DIR, OLD_ATOMIC_WRITE_DIR};
|
||||
use smallvec::SmallVec;
|
||||
use snafu::ResultExt;
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
@@ -42,10 +42,6 @@ pub type AccessLayerRef = Arc<AccessLayer>;
|
||||
/// SST write results.
|
||||
pub type SstInfoArray = SmallVec<[SstInfo; 2]>;
|
||||
|
||||
pub const ATOMIC_WRITE_DIR: &str = "tmp/";
|
||||
/// For compatibility. Remove this after a major version release.
|
||||
pub const OLD_ATOMIC_WRITE_DIR: &str = ".tmp/";
|
||||
|
||||
/// A layer to access SST files under the same directory.
|
||||
pub struct AccessLayer {
|
||||
region_dir: String,
|
||||
|
||||
3
src/mito2/src/cache/write_cache.rs
vendored
3
src/mito2/src/cache/write_cache.rs
vendored
@@ -430,9 +430,10 @@ impl UploadTracker {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_test_util::temp_dir::create_temp_dir;
|
||||
use object_store::ATOMIC_WRITE_DIR;
|
||||
|
||||
use super::*;
|
||||
use crate::access_layer::{OperationType, ATOMIC_WRITE_DIR};
|
||||
use crate::access_layer::OperationType;
|
||||
use crate::cache::test_util::new_fs_store;
|
||||
use crate::cache::{CacheManager, CacheStrategy};
|
||||
use crate::error::InvalidBatchSnafu;
|
||||
|
||||
@@ -80,7 +80,6 @@ use snafu::{ensure, OptionExt, ResultExt};
|
||||
use store_api::codec::PrimaryKeyEncoding;
|
||||
use store_api::logstore::provider::Provider;
|
||||
use store_api::logstore::LogStore;
|
||||
use store_api::manifest::ManifestVersion;
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::metric_engine_consts::MANIFEST_INFO_EXTENSION_KEY;
|
||||
use store_api::region_engine::{
|
||||
@@ -89,6 +88,7 @@ use store_api::region_engine::{
|
||||
};
|
||||
use store_api::region_request::{AffectedRows, RegionOpenRequest, RegionRequest};
|
||||
use store_api::storage::{RegionId, ScanRequest, SequenceNumber};
|
||||
use store_api::ManifestVersion;
|
||||
use tokio::sync::{oneshot, Semaphore};
|
||||
|
||||
use crate::cache::CacheStrategy;
|
||||
@@ -101,6 +101,7 @@ use crate::manifest::action::RegionEdit;
|
||||
use crate::memtable::MemtableStats;
|
||||
use crate::metrics::HANDLE_REQUEST_ELAPSED;
|
||||
use crate::read::scan_region::{ScanRegion, Scanner};
|
||||
use crate::read::stream::ScanBatchStream;
|
||||
use crate::region::MitoRegionRef;
|
||||
use crate::request::{RegionEditRequest, WorkerRequest};
|
||||
use crate::sst::file::FileMeta;
|
||||
@@ -183,6 +184,18 @@ impl MitoEngine {
|
||||
.await
|
||||
}
|
||||
|
||||
/// Scan [`Batch`]es by [`ScanRequest`].
|
||||
pub async fn scan_batch(
|
||||
&self,
|
||||
region_id: RegionId,
|
||||
request: ScanRequest,
|
||||
filter_deleted: bool,
|
||||
) -> Result<ScanBatchStream> {
|
||||
let mut scan_region = self.scan_region(region_id, request)?;
|
||||
scan_region.set_filter_deleted(filter_deleted);
|
||||
scan_region.scanner().await?.scan_batch()
|
||||
}
|
||||
|
||||
/// Returns a scanner to scan for `request`.
|
||||
async fn scanner(&self, region_id: RegionId, request: ScanRequest) -> Result<Scanner> {
|
||||
self.scan_region(region_id, request)?.scanner().await
|
||||
|
||||
@@ -28,8 +28,8 @@ use object_store::ErrorKind;
|
||||
use prost::DecodeError;
|
||||
use snafu::{Location, Snafu};
|
||||
use store_api::logstore::provider::Provider;
|
||||
use store_api::manifest::ManifestVersion;
|
||||
use store_api::storage::RegionId;
|
||||
use store_api::ManifestVersion;
|
||||
use tokio::time::error::Elapsed;
|
||||
|
||||
use crate::cache::file_cache::FileType;
|
||||
|
||||
@@ -19,9 +19,9 @@ use std::time::Duration;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::manifest::ManifestVersion;
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::storage::{RegionId, SequenceNumber};
|
||||
use store_api::ManifestVersion;
|
||||
use strum::Display;
|
||||
|
||||
use crate::error::{RegionMetadataNotFoundSnafu, Result, SerdeJsonSnafu, Utf8Snafu};
|
||||
|
||||
@@ -17,8 +17,8 @@ use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_telemetry::{error, info};
|
||||
use store_api::manifest::{ManifestVersion, MIN_VERSION};
|
||||
use store_api::storage::RegionId;
|
||||
use store_api::{ManifestVersion, MIN_VERSION};
|
||||
|
||||
use crate::manifest::action::{RegionCheckpoint, RegionManifest};
|
||||
use crate::manifest::manager::RegionManifestOptions;
|
||||
|
||||
@@ -20,8 +20,8 @@ use common_telemetry::{debug, info};
|
||||
use futures::TryStreamExt;
|
||||
use object_store::ObjectStore;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use store_api::manifest::{ManifestVersion, MAX_VERSION, MIN_VERSION};
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::{ManifestVersion, MAX_VERSION, MIN_VERSION};
|
||||
|
||||
use crate::error::{
|
||||
self, InstallManifestToSnafu, NoCheckpointSnafu, NoManifestsSnafu, RegionStoppedSnafu, Result,
|
||||
|
||||
@@ -28,8 +28,8 @@ use object_store::{util, Entry, ErrorKind, Lister, ObjectStore};
|
||||
use regex::Regex;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{ensure, ResultExt};
|
||||
use store_api::manifest::ManifestVersion;
|
||||
use store_api::storage::RegionId;
|
||||
use store_api::ManifestVersion;
|
||||
use tokio::sync::Semaphore;
|
||||
|
||||
use crate::error::{
|
||||
|
||||
@@ -21,11 +21,12 @@ pub mod merge;
|
||||
pub mod plain_batch;
|
||||
pub mod projection;
|
||||
pub(crate) mod prune;
|
||||
pub(crate) mod range;
|
||||
pub(crate) mod scan_region;
|
||||
pub(crate) mod scan_util;
|
||||
pub mod range;
|
||||
pub mod scan_region;
|
||||
pub mod scan_util;
|
||||
pub(crate) mod seq_scan;
|
||||
pub(crate) mod series_scan;
|
||||
pub mod series_scan;
|
||||
pub mod stream;
|
||||
pub(crate) mod unordered_scan;
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
@@ -41,12 +42,14 @@ use datatypes::arrow::array::{Array, ArrayRef, UInt64Array};
|
||||
use datatypes::arrow::compute::SortOptions;
|
||||
use datatypes::arrow::row::{RowConverter, SortField};
|
||||
use datatypes::prelude::{ConcreteDataType, DataType, ScalarVector};
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::types::TimestampType;
|
||||
use datatypes::value::{Value, ValueRef};
|
||||
use datatypes::vectors::{
|
||||
BooleanVector, Helper, TimestampMicrosecondVector, TimestampMillisecondVector,
|
||||
TimestampNanosecondVector, TimestampSecondVector, UInt32Vector, UInt64Vector, UInt8Vector,
|
||||
Vector, VectorRef,
|
||||
TimestampMillisecondVectorBuilder, TimestampNanosecondVector, TimestampSecondVector,
|
||||
UInt32Vector, UInt64Vector, UInt64VectorBuilder, UInt8Vector, UInt8VectorBuilder, Vector,
|
||||
VectorRef,
|
||||
};
|
||||
use futures::stream::BoxStream;
|
||||
use futures::TryStreamExt;
|
||||
@@ -161,6 +164,19 @@ impl Batch {
|
||||
self.sequences.len()
|
||||
}
|
||||
|
||||
/// Create an empty [`Batch`].
|
||||
pub(crate) fn empty() -> Self {
|
||||
Self {
|
||||
primary_key: vec![],
|
||||
pk_values: None,
|
||||
timestamps: Arc::new(TimestampMillisecondVectorBuilder::with_capacity(0).finish()),
|
||||
sequences: Arc::new(UInt64VectorBuilder::with_capacity(0).finish()),
|
||||
op_types: Arc::new(UInt8VectorBuilder::with_capacity(0).finish()),
|
||||
fields: vec![],
|
||||
fields_idx: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if the number of rows in the batch is 0.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.num_rows() == 0
|
||||
@@ -1011,8 +1027,6 @@ pub(crate) struct ScannerMetrics {
|
||||
build_reader_cost: Duration,
|
||||
/// Duration to scan data.
|
||||
scan_cost: Duration,
|
||||
/// Duration to convert batches.
|
||||
convert_cost: Duration,
|
||||
/// Duration while waiting for `yield`.
|
||||
yield_cost: Duration,
|
||||
/// Number of batches returned.
|
||||
@@ -1048,7 +1062,8 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_empty_batch() {
|
||||
let batch = new_batch(&[], &[], &[], &[]);
|
||||
let batch = Batch::empty();
|
||||
assert!(batch.is_empty());
|
||||
assert_eq!(None, batch.first_timestamp());
|
||||
assert_eq!(None, batch.last_timestamp());
|
||||
assert_eq!(None, batch.first_sequence());
|
||||
|
||||
@@ -30,7 +30,7 @@ use datafusion_common::Column;
|
||||
use datafusion_expr::utils::expr_to_columns;
|
||||
use datafusion_expr::Expr;
|
||||
use smallvec::SmallVec;
|
||||
use store_api::metadata::RegionMetadata;
|
||||
use store_api::metadata::{RegionMetadata, RegionMetadataRef};
|
||||
use store_api::region_engine::{PartitionRange, RegionScannerRef};
|
||||
use store_api::storage::{RegionId, ScanRequest, TimeSeriesDistribution, TimeSeriesRowSelector};
|
||||
use table::predicate::{build_time_range_predicate, Predicate};
|
||||
@@ -48,6 +48,7 @@ use crate::read::projection::ProjectionMapper;
|
||||
use crate::read::range::{FileRangeBuilder, MemRangeBuilder, RangeMeta, RowGroupIndex};
|
||||
use crate::read::seq_scan::SeqScan;
|
||||
use crate::read::series_scan::SeriesScan;
|
||||
use crate::read::stream::ScanBatchStream;
|
||||
use crate::read::unordered_scan::UnorderedScan;
|
||||
use crate::read::{Batch, Source};
|
||||
use crate::region::options::MergeMode;
|
||||
@@ -82,6 +83,15 @@ impl Scanner {
|
||||
Scanner::Series(series_scan) => series_scan.build_stream().await,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a stream of [`Batch`] by this scanner.
|
||||
pub(crate) fn scan_batch(&self) -> Result<ScanBatchStream> {
|
||||
match self {
|
||||
Scanner::Seq(x) => x.scan_all_partitions(),
|
||||
Scanner::Unordered(x) => x.scan_all_partitions(),
|
||||
Scanner::Series(x) => x.scan_all_partitions(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -259,7 +269,6 @@ impl ScanRegion {
|
||||
self
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn set_filter_deleted(&mut self, filter_deleted: bool) {
|
||||
self.filter_deleted = filter_deleted;
|
||||
}
|
||||
@@ -897,6 +906,10 @@ impl ScanInput {
|
||||
pub(crate) fn num_files(&self) -> usize {
|
||||
self.files.len()
|
||||
}
|
||||
|
||||
pub fn region_metadata(&self) -> &RegionMetadataRef {
|
||||
self.mapper.metadata()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -45,8 +45,6 @@ struct ScanMetricsSet {
|
||||
build_reader_cost: Duration,
|
||||
/// Duration to scan data.
|
||||
scan_cost: Duration,
|
||||
/// Duration to convert batches.
|
||||
convert_cost: Duration,
|
||||
/// Duration while waiting for `yield`.
|
||||
yield_cost: Duration,
|
||||
/// Duration of the scan.
|
||||
@@ -111,7 +109,6 @@ impl fmt::Debug for ScanMetricsSet {
|
||||
prepare_scan_cost,
|
||||
build_reader_cost,
|
||||
scan_cost,
|
||||
convert_cost,
|
||||
yield_cost,
|
||||
total_cost,
|
||||
num_rows,
|
||||
@@ -145,7 +142,6 @@ impl fmt::Debug for ScanMetricsSet {
|
||||
"{{\"prepare_scan_cost\":\"{prepare_scan_cost:?}\", \
|
||||
\"build_reader_cost\":\"{build_reader_cost:?}\", \
|
||||
\"scan_cost\":\"{scan_cost:?}\", \
|
||||
\"convert_cost\":\"{convert_cost:?}\", \
|
||||
\"yield_cost\":\"{yield_cost:?}\", \
|
||||
\"total_cost\":\"{total_cost:?}\", \
|
||||
\"num_rows\":{num_rows}, \
|
||||
@@ -188,7 +184,6 @@ impl ScanMetricsSet {
|
||||
prepare_scan_cost,
|
||||
build_reader_cost,
|
||||
scan_cost,
|
||||
convert_cost,
|
||||
yield_cost,
|
||||
num_batches,
|
||||
num_rows,
|
||||
@@ -199,7 +194,6 @@ impl ScanMetricsSet {
|
||||
self.prepare_scan_cost += *prepare_scan_cost;
|
||||
self.build_reader_cost += *build_reader_cost;
|
||||
self.scan_cost += *scan_cost;
|
||||
self.convert_cost += *convert_cost;
|
||||
self.yield_cost += *yield_cost;
|
||||
self.num_rows += *num_rows;
|
||||
self.num_batches += *num_batches;
|
||||
@@ -274,9 +268,6 @@ impl ScanMetricsSet {
|
||||
READ_STAGE_ELAPSED
|
||||
.with_label_values(&["build_reader"])
|
||||
.observe(self.build_reader_cost.as_secs_f64());
|
||||
READ_STAGE_ELAPSED
|
||||
.with_label_values(&["convert_rb"])
|
||||
.observe(self.convert_cost.as_secs_f64());
|
||||
READ_STAGE_ELAPSED
|
||||
.with_label_values(&["scan"])
|
||||
.observe(self.scan_cost.as_secs_f64());
|
||||
@@ -348,6 +339,8 @@ struct PartitionMetricsInner {
|
||||
scan_cost: Time,
|
||||
/// Duration while waiting for `yield`.
|
||||
yield_cost: Time,
|
||||
/// Duration to convert [`Batch`]es.
|
||||
convert_cost: Time,
|
||||
}
|
||||
|
||||
impl PartitionMetricsInner {
|
||||
@@ -367,8 +360,8 @@ impl Drop for PartitionMetricsInner {
|
||||
self.in_progress_scan.dec();
|
||||
|
||||
debug!(
|
||||
"{} finished, region_id: {}, partition: {}, metrics: {:?}",
|
||||
self.scanner_type, self.region_id, self.partition, metrics
|
||||
"{} finished, region_id: {}, partition: {}, scan_metrics: {:?}, convert_batch_costs: {}",
|
||||
self.scanner_type, self.region_id, self.partition, metrics, self.convert_cost,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -400,7 +393,7 @@ impl PartitionMetricsList {
|
||||
|
||||
/// Metrics while reading a partition.
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct PartitionMetrics(Arc<PartitionMetricsInner>);
|
||||
pub struct PartitionMetrics(Arc<PartitionMetricsInner>);
|
||||
|
||||
impl PartitionMetrics {
|
||||
pub(crate) fn new(
|
||||
@@ -427,6 +420,7 @@ impl PartitionMetrics {
|
||||
.subset_time("build_reader_cost", partition),
|
||||
scan_cost: MetricBuilder::new(metrics_set).subset_time("scan_cost", partition),
|
||||
yield_cost: MetricBuilder::new(metrics_set).subset_time("yield_cost", partition),
|
||||
convert_cost: MetricBuilder::new(metrics_set).subset_time("convert_cost", partition),
|
||||
};
|
||||
Self(Arc::new(inner))
|
||||
}
|
||||
@@ -441,7 +435,7 @@ impl PartitionMetrics {
|
||||
metrics.num_mem_ranges += num;
|
||||
}
|
||||
|
||||
pub(crate) fn inc_num_file_ranges(&self, num: usize) {
|
||||
pub fn inc_num_file_ranges(&self, num: usize) {
|
||||
let mut metrics = self.0.metrics.lock().unwrap();
|
||||
metrics.num_file_ranges += num;
|
||||
}
|
||||
@@ -454,6 +448,10 @@ impl PartitionMetrics {
|
||||
metrics.build_reader_cost += cost;
|
||||
}
|
||||
|
||||
pub(crate) fn inc_convert_batch_cost(&self, cost: Duration) {
|
||||
self.0.convert_cost.add_duration(cost);
|
||||
}
|
||||
|
||||
/// Merges [ScannerMetrics], `build_reader_cost`, `scan_cost` and `yield_cost`.
|
||||
pub(crate) fn merge_metrics(&self, metrics: &ScannerMetrics) {
|
||||
self.0
|
||||
|
||||
@@ -20,14 +20,14 @@ use std::time::Instant;
|
||||
|
||||
use async_stream::try_stream;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_recordbatch::error::ExternalSnafu;
|
||||
use common_recordbatch::util::ChainedRecordBatchStream;
|
||||
use common_recordbatch::{RecordBatchStreamWrapper, SendableRecordBatchStream};
|
||||
use common_telemetry::tracing;
|
||||
use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
|
||||
use datafusion::physical_plan::{DisplayAs, DisplayFormatType};
|
||||
use datatypes::schema::SchemaRef;
|
||||
use snafu::ResultExt;
|
||||
use futures::StreamExt;
|
||||
use snafu::ensure;
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::region_engine::{PartitionRange, PrepareRequest, RegionScanner, ScannerProperties};
|
||||
use store_api::storage::TimeSeriesRowSelector;
|
||||
@@ -42,7 +42,8 @@ use crate::read::scan_region::{ScanInput, StreamContext};
|
||||
use crate::read::scan_util::{
|
||||
scan_file_ranges, scan_mem_ranges, PartitionMetrics, PartitionMetricsList,
|
||||
};
|
||||
use crate::read::{BatchReader, BoxedBatchReader, ScannerMetrics, Source};
|
||||
use crate::read::stream::{ConvertBatchStream, ScanBatch, ScanBatchStream};
|
||||
use crate::read::{Batch, BatchReader, BoxedBatchReader, ScannerMetrics, Source};
|
||||
use crate::region::options::MergeMode;
|
||||
|
||||
/// Scans a region and returns rows in a sorted sequence.
|
||||
@@ -93,6 +94,20 @@ impl SeqScan {
|
||||
Ok(Box::pin(aggr_stream))
|
||||
}
|
||||
|
||||
/// Scan [`Batch`] in all partitions one by one.
|
||||
pub(crate) fn scan_all_partitions(&self) -> Result<ScanBatchStream> {
|
||||
let metrics_set = ExecutionPlanMetricsSet::new();
|
||||
|
||||
let streams = (0..self.properties.partitions.len())
|
||||
.map(|partition| {
|
||||
let metrics = self.new_partition_metrics(&metrics_set, partition);
|
||||
self.scan_batch_in_partition(partition, metrics)
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
Ok(Box::pin(futures::stream::iter(streams).flatten()))
|
||||
}
|
||||
|
||||
/// Builds a [BoxedBatchReader] from sequential scan for compaction.
|
||||
///
|
||||
/// # Panics
|
||||
@@ -196,23 +211,40 @@ impl SeqScan {
|
||||
&self,
|
||||
metrics_set: &ExecutionPlanMetricsSet,
|
||||
partition: usize,
|
||||
) -> Result<SendableRecordBatchStream, BoxedError> {
|
||||
if partition >= self.properties.partitions.len() {
|
||||
return Err(BoxedError::new(
|
||||
PartitionOutOfRangeSnafu {
|
||||
given: partition,
|
||||
all: self.properties.partitions.len(),
|
||||
}
|
||||
.build(),
|
||||
));
|
||||
}
|
||||
) -> Result<SendableRecordBatchStream> {
|
||||
let metrics = self.new_partition_metrics(metrics_set, partition);
|
||||
|
||||
let batch_stream = self.scan_batch_in_partition(partition, metrics.clone())?;
|
||||
|
||||
let input = &self.stream_ctx.input;
|
||||
let record_batch_stream = ConvertBatchStream::new(
|
||||
batch_stream,
|
||||
input.mapper.clone(),
|
||||
input.cache_strategy.clone(),
|
||||
metrics,
|
||||
);
|
||||
|
||||
Ok(Box::pin(RecordBatchStreamWrapper::new(
|
||||
input.mapper.output_schema(),
|
||||
Box::pin(record_batch_stream),
|
||||
)))
|
||||
}
|
||||
|
||||
fn scan_batch_in_partition(
|
||||
&self,
|
||||
partition: usize,
|
||||
part_metrics: PartitionMetrics,
|
||||
) -> Result<ScanBatchStream> {
|
||||
ensure!(
|
||||
partition < self.properties.partitions.len(),
|
||||
PartitionOutOfRangeSnafu {
|
||||
given: partition,
|
||||
all: self.properties.partitions.len(),
|
||||
}
|
||||
);
|
||||
|
||||
if self.properties.partitions[partition].is_empty() {
|
||||
return Ok(Box::pin(RecordBatchStreamWrapper::new(
|
||||
self.stream_ctx.input.mapper.output_schema(),
|
||||
common_recordbatch::EmptyRecordBatchStream::new(
|
||||
self.stream_ctx.input.mapper.output_schema(),
|
||||
),
|
||||
)));
|
||||
return Ok(Box::pin(futures::stream::empty()));
|
||||
}
|
||||
|
||||
let stream_ctx = self.stream_ctx.clone();
|
||||
@@ -220,7 +252,6 @@ impl SeqScan {
|
||||
let partition_ranges = self.properties.partitions[partition].clone();
|
||||
let compaction = self.compaction;
|
||||
let distinguish_range = self.properties.distinguish_partition_range;
|
||||
let part_metrics = self.new_partition_metrics(metrics_set, partition);
|
||||
|
||||
let stream = try_stream! {
|
||||
part_metrics.on_first_poll();
|
||||
@@ -245,21 +276,13 @@ impl SeqScan {
|
||||
let mut fetch_start = Instant::now();
|
||||
let mut reader =
|
||||
Self::build_reader_from_sources(&stream_ctx, sources, semaphore.clone())
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
let cache = &stream_ctx.input.cache_strategy;
|
||||
.await?;
|
||||
#[cfg(debug_assertions)]
|
||||
let mut checker = crate::read::BatchChecker::default()
|
||||
.with_start(Some(part_range.start))
|
||||
.with_end(Some(part_range.end));
|
||||
|
||||
while let Some(batch) = reader
|
||||
.next_batch()
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?
|
||||
{
|
||||
while let Some(batch) = reader.next_batch().await? {
|
||||
metrics.scan_cost += fetch_start.elapsed();
|
||||
metrics.num_batches += 1;
|
||||
metrics.num_rows += batch.num_rows();
|
||||
@@ -278,11 +301,8 @@ impl SeqScan {
|
||||
&batch,
|
||||
);
|
||||
|
||||
let convert_start = Instant::now();
|
||||
let record_batch = stream_ctx.input.mapper.convert(&batch, cache)?;
|
||||
metrics.convert_cost += convert_start.elapsed();
|
||||
let yield_start = Instant::now();
|
||||
yield record_batch;
|
||||
yield ScanBatch::Normal(batch);
|
||||
metrics.yield_cost += yield_start.elapsed();
|
||||
|
||||
fetch_start = Instant::now();
|
||||
@@ -292,7 +312,7 @@ impl SeqScan {
|
||||
// The query engine can use this to optimize some queries.
|
||||
if distinguish_range {
|
||||
let yield_start = Instant::now();
|
||||
yield stream_ctx.input.mapper.empty_record_batch();
|
||||
yield ScanBatch::Normal(Batch::empty());
|
||||
metrics.yield_cost += yield_start.elapsed();
|
||||
}
|
||||
|
||||
@@ -302,13 +322,7 @@ impl SeqScan {
|
||||
|
||||
part_metrics.on_finish();
|
||||
};
|
||||
|
||||
let stream = Box::pin(RecordBatchStreamWrapper::new(
|
||||
self.stream_ctx.input.mapper.output_schema(),
|
||||
Box::pin(stream),
|
||||
));
|
||||
|
||||
Ok(stream)
|
||||
Ok(Box::pin(stream))
|
||||
}
|
||||
|
||||
fn new_semaphore(&self) -> Option<Arc<Semaphore>> {
|
||||
@@ -368,6 +382,7 @@ impl RegionScanner for SeqScan {
|
||||
partition: usize,
|
||||
) -> Result<SendableRecordBatchStream, BoxedError> {
|
||||
self.scan_partition_impl(metrics_set, partition)
|
||||
.map_err(BoxedError::new)
|
||||
}
|
||||
|
||||
fn prepare(&mut self, request: PrepareRequest) -> Result<(), BoxedError> {
|
||||
|
||||
@@ -20,13 +20,12 @@ use std::time::{Duration, Instant};
|
||||
|
||||
use async_stream::try_stream;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_recordbatch::error::ExternalSnafu;
|
||||
use common_recordbatch::util::ChainedRecordBatchStream;
|
||||
use common_recordbatch::{RecordBatch, RecordBatchStreamWrapper, SendableRecordBatchStream};
|
||||
use common_recordbatch::{RecordBatchStreamWrapper, SendableRecordBatchStream};
|
||||
use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
|
||||
use datafusion::physical_plan::{DisplayAs, DisplayFormatType};
|
||||
use datatypes::compute::concat_batches;
|
||||
use datatypes::schema::SchemaRef;
|
||||
use futures::StreamExt;
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
@@ -36,13 +35,14 @@ use tokio::sync::mpsc::{self, Receiver, Sender};
|
||||
use tokio::sync::Semaphore;
|
||||
|
||||
use crate::error::{
|
||||
ComputeArrowSnafu, Error, InvalidSenderSnafu, PartitionOutOfRangeSnafu, Result,
|
||||
ScanMultiTimesSnafu, ScanSeriesSnafu,
|
||||
Error, InvalidSenderSnafu, PartitionOutOfRangeSnafu, Result, ScanMultiTimesSnafu,
|
||||
ScanSeriesSnafu,
|
||||
};
|
||||
use crate::read::range::RangeBuilderList;
|
||||
use crate::read::scan_region::{ScanInput, StreamContext};
|
||||
use crate::read::scan_util::{PartitionMetrics, PartitionMetricsList, SeriesDistributorMetrics};
|
||||
use crate::read::seq_scan::{build_sources, SeqScan};
|
||||
use crate::read::stream::{ConvertBatchStream, ScanBatch, ScanBatchStream};
|
||||
use crate::read::{Batch, ScannerMetrics};
|
||||
|
||||
/// Timeout to send a batch to a sender.
|
||||
@@ -89,71 +89,65 @@ impl SeriesScan {
|
||||
&self,
|
||||
metrics_set: &ExecutionPlanMetricsSet,
|
||||
partition: usize,
|
||||
) -> Result<SendableRecordBatchStream, BoxedError> {
|
||||
if partition >= self.properties.num_partitions() {
|
||||
return Err(BoxedError::new(
|
||||
PartitionOutOfRangeSnafu {
|
||||
given: partition,
|
||||
all: self.properties.num_partitions(),
|
||||
}
|
||||
.build(),
|
||||
));
|
||||
}
|
||||
) -> Result<SendableRecordBatchStream> {
|
||||
let metrics =
|
||||
new_partition_metrics(&self.stream_ctx, metrics_set, partition, &self.metrics_list);
|
||||
|
||||
let batch_stream = self.scan_batch_in_partition(partition, metrics.clone(), metrics_set)?;
|
||||
|
||||
let input = &self.stream_ctx.input;
|
||||
let record_batch_stream = ConvertBatchStream::new(
|
||||
batch_stream,
|
||||
input.mapper.clone(),
|
||||
input.cache_strategy.clone(),
|
||||
metrics,
|
||||
);
|
||||
|
||||
Ok(Box::pin(RecordBatchStreamWrapper::new(
|
||||
input.mapper.output_schema(),
|
||||
Box::pin(record_batch_stream),
|
||||
)))
|
||||
}
|
||||
|
||||
fn scan_batch_in_partition(
|
||||
&self,
|
||||
partition: usize,
|
||||
part_metrics: PartitionMetrics,
|
||||
metrics_set: &ExecutionPlanMetricsSet,
|
||||
) -> Result<ScanBatchStream> {
|
||||
ensure!(
|
||||
partition < self.properties.num_partitions(),
|
||||
PartitionOutOfRangeSnafu {
|
||||
given: partition,
|
||||
all: self.properties.num_partitions(),
|
||||
}
|
||||
);
|
||||
|
||||
self.maybe_start_distributor(metrics_set, &self.metrics_list);
|
||||
|
||||
let part_metrics =
|
||||
new_partition_metrics(&self.stream_ctx, metrics_set, partition, &self.metrics_list);
|
||||
let mut receiver = self.take_receiver(partition).map_err(BoxedError::new)?;
|
||||
let stream_ctx = self.stream_ctx.clone();
|
||||
|
||||
let mut receiver = self.take_receiver(partition)?;
|
||||
let stream = try_stream! {
|
||||
part_metrics.on_first_poll();
|
||||
|
||||
let cache = &stream_ctx.input.cache_strategy;
|
||||
let mut df_record_batches = Vec::new();
|
||||
let mut fetch_start = Instant::now();
|
||||
while let Some(result) = receiver.recv().await {
|
||||
while let Some(series) = receiver.recv().await {
|
||||
let series = series?;
|
||||
|
||||
let mut metrics = ScannerMetrics::default();
|
||||
let series = result.map_err(BoxedError::new).context(ExternalSnafu)?;
|
||||
metrics.scan_cost += fetch_start.elapsed();
|
||||
fetch_start = Instant::now();
|
||||
|
||||
let convert_start = Instant::now();
|
||||
df_record_batches.reserve(series.batches.len());
|
||||
for batch in series.batches {
|
||||
metrics.num_batches += 1;
|
||||
metrics.num_rows += batch.num_rows();
|
||||
|
||||
let record_batch = stream_ctx.input.mapper.convert(&batch, cache)?;
|
||||
df_record_batches.push(record_batch.into_df_record_batch());
|
||||
}
|
||||
|
||||
let output_schema = stream_ctx.input.mapper.output_schema();
|
||||
let df_record_batch =
|
||||
concat_batches(output_schema.arrow_schema(), &df_record_batches)
|
||||
.context(ComputeArrowSnafu)
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
df_record_batches.clear();
|
||||
let record_batch =
|
||||
RecordBatch::try_from_df_record_batch(output_schema, df_record_batch)?;
|
||||
metrics.convert_cost += convert_start.elapsed();
|
||||
metrics.num_batches += series.batches.len();
|
||||
metrics.num_rows += series.batches.iter().map(|x| x.num_rows()).sum::<usize>();
|
||||
|
||||
let yield_start = Instant::now();
|
||||
yield record_batch;
|
||||
yield ScanBatch::Series(series);
|
||||
metrics.yield_cost += yield_start.elapsed();
|
||||
|
||||
part_metrics.merge_metrics(&metrics);
|
||||
}
|
||||
};
|
||||
|
||||
let stream = Box::pin(RecordBatchStreamWrapper::new(
|
||||
self.stream_ctx.input.mapper.output_schema(),
|
||||
Box::pin(stream),
|
||||
));
|
||||
|
||||
Ok(stream)
|
||||
Ok(Box::pin(stream))
|
||||
}
|
||||
|
||||
/// Takes the receiver for the partition.
|
||||
@@ -201,6 +195,26 @@ impl SeriesScan {
|
||||
let chained_stream = ChainedRecordBatchStream::new(streams).map_err(BoxedError::new)?;
|
||||
Ok(Box::pin(chained_stream))
|
||||
}
|
||||
|
||||
/// Scan [`Batch`] in all partitions one by one.
|
||||
pub(crate) fn scan_all_partitions(&self) -> Result<ScanBatchStream> {
|
||||
let metrics_set = ExecutionPlanMetricsSet::new();
|
||||
|
||||
let streams = (0..self.properties.partitions.len())
|
||||
.map(|partition| {
|
||||
let metrics = new_partition_metrics(
|
||||
&self.stream_ctx,
|
||||
&metrics_set,
|
||||
partition,
|
||||
&self.metrics_list,
|
||||
);
|
||||
|
||||
self.scan_batch_in_partition(partition, metrics, &metrics_set)
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
Ok(Box::pin(futures::stream::iter(streams).flatten()))
|
||||
}
|
||||
}
|
||||
|
||||
fn new_channel_list(num_partitions: usize) -> (SenderList, ReceiverList) {
|
||||
@@ -232,6 +246,7 @@ impl RegionScanner for SeriesScan {
|
||||
partition: usize,
|
||||
) -> Result<SendableRecordBatchStream, BoxedError> {
|
||||
self.scan_partition_impl(metrics_set, partition)
|
||||
.map_err(BoxedError::new)
|
||||
}
|
||||
|
||||
fn prepare(&mut self, request: PrepareRequest) -> Result<(), BoxedError> {
|
||||
@@ -393,8 +408,8 @@ impl SeriesDistributor {
|
||||
|
||||
/// Batches of the same series.
|
||||
#[derive(Default)]
|
||||
struct SeriesBatch {
|
||||
batches: SmallVec<[Batch; 4]>,
|
||||
pub struct SeriesBatch {
|
||||
pub batches: SmallVec<[Batch; 4]>,
|
||||
}
|
||||
|
||||
impl SeriesBatch {
|
||||
|
||||
120
src/mito2/src/read/stream.rs
Normal file
120
src/mito2/src/read/stream.rs
Normal file
@@ -0,0 +1,120 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
use std::task::{Context, Poll};
|
||||
use std::time::Instant;
|
||||
|
||||
use common_error::ext::BoxedError;
|
||||
use common_recordbatch::error::{ArrowComputeSnafu, ExternalSnafu};
|
||||
use common_recordbatch::{DfRecordBatch, RecordBatch};
|
||||
use datatypes::compute;
|
||||
use futures::stream::BoxStream;
|
||||
use futures::{Stream, StreamExt};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::cache::CacheStrategy;
|
||||
use crate::error::Result;
|
||||
use crate::read::projection::ProjectionMapper;
|
||||
use crate::read::scan_util::PartitionMetrics;
|
||||
use crate::read::series_scan::SeriesBatch;
|
||||
use crate::read::Batch;
|
||||
|
||||
/// All kinds of [`Batch`]es to produce in scanner.
|
||||
pub enum ScanBatch {
|
||||
Normal(Batch),
|
||||
Series(SeriesBatch),
|
||||
}
|
||||
|
||||
pub type ScanBatchStream = BoxStream<'static, Result<ScanBatch>>;
|
||||
|
||||
/// A stream that takes [`ScanBatch`]es and produces (converts them to) [`RecordBatch`]es.
|
||||
pub(crate) struct ConvertBatchStream {
|
||||
inner: ScanBatchStream,
|
||||
projection_mapper: Arc<ProjectionMapper>,
|
||||
cache_strategy: CacheStrategy,
|
||||
partition_metrics: PartitionMetrics,
|
||||
buffer: Vec<DfRecordBatch>,
|
||||
}
|
||||
|
||||
impl ConvertBatchStream {
|
||||
pub(crate) fn new(
|
||||
inner: ScanBatchStream,
|
||||
projection_mapper: Arc<ProjectionMapper>,
|
||||
cache_strategy: CacheStrategy,
|
||||
partition_metrics: PartitionMetrics,
|
||||
) -> Self {
|
||||
Self {
|
||||
inner,
|
||||
projection_mapper,
|
||||
cache_strategy,
|
||||
partition_metrics,
|
||||
buffer: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn convert(&mut self, batch: ScanBatch) -> common_recordbatch::error::Result<RecordBatch> {
|
||||
match batch {
|
||||
ScanBatch::Normal(batch) => {
|
||||
if batch.is_empty() {
|
||||
Ok(self.projection_mapper.empty_record_batch())
|
||||
} else {
|
||||
self.projection_mapper.convert(&batch, &self.cache_strategy)
|
||||
}
|
||||
}
|
||||
ScanBatch::Series(series) => {
|
||||
self.buffer.clear();
|
||||
self.buffer.reserve(series.batches.len());
|
||||
|
||||
for batch in series.batches {
|
||||
let record_batch = self
|
||||
.projection_mapper
|
||||
.convert(&batch, &self.cache_strategy)?;
|
||||
self.buffer.push(record_batch.into_df_record_batch());
|
||||
}
|
||||
|
||||
let output_schema = self.projection_mapper.output_schema();
|
||||
let record_batch =
|
||||
compute::concat_batches(output_schema.arrow_schema(), &self.buffer)
|
||||
.context(ArrowComputeSnafu)?;
|
||||
|
||||
RecordBatch::try_from_df_record_batch(output_schema, record_batch)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for ConvertBatchStream {
|
||||
type Item = common_recordbatch::error::Result<RecordBatch>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
let batch = futures::ready!(self.inner.poll_next_unpin(cx));
|
||||
let Some(batch) = batch else {
|
||||
return Poll::Ready(None);
|
||||
};
|
||||
|
||||
let record_batch = match batch {
|
||||
Ok(batch) => {
|
||||
let start = Instant::now();
|
||||
let record_batch = self.convert(batch);
|
||||
self.partition_metrics
|
||||
.inc_convert_batch_cost(start.elapsed());
|
||||
record_batch
|
||||
}
|
||||
Err(e) => Err(BoxedError::new(e)).context(ExternalSnafu),
|
||||
};
|
||||
Poll::Ready(Some(record_batch))
|
||||
}
|
||||
}
|
||||
@@ -20,13 +20,12 @@ use std::time::Instant;
|
||||
|
||||
use async_stream::{stream, try_stream};
|
||||
use common_error::ext::BoxedError;
|
||||
use common_recordbatch::error::ExternalSnafu;
|
||||
use common_recordbatch::{RecordBatchStreamWrapper, SendableRecordBatchStream};
|
||||
use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
|
||||
use datafusion::physical_plan::{DisplayAs, DisplayFormatType};
|
||||
use datatypes::schema::SchemaRef;
|
||||
use futures::{Stream, StreamExt};
|
||||
use snafu::ResultExt;
|
||||
use snafu::ensure;
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::region_engine::{PrepareRequest, RegionScanner, ScannerProperties};
|
||||
|
||||
@@ -36,6 +35,7 @@ use crate::read::scan_region::{ScanInput, StreamContext};
|
||||
use crate::read::scan_util::{
|
||||
scan_file_ranges, scan_mem_ranges, PartitionMetrics, PartitionMetricsList,
|
||||
};
|
||||
use crate::read::stream::{ConvertBatchStream, ScanBatch, ScanBatchStream};
|
||||
use crate::read::{Batch, ScannerMetrics};
|
||||
|
||||
/// Scans a region without providing any output ordering guarantee.
|
||||
@@ -124,21 +124,25 @@ impl UnorderedScan {
|
||||
}
|
||||
}
|
||||
|
||||
fn scan_partition_impl(
|
||||
&self,
|
||||
metrics_set: &ExecutionPlanMetricsSet,
|
||||
partition: usize,
|
||||
) -> Result<SendableRecordBatchStream, BoxedError> {
|
||||
if partition >= self.properties.partitions.len() {
|
||||
return Err(BoxedError::new(
|
||||
PartitionOutOfRangeSnafu {
|
||||
given: partition,
|
||||
all: self.properties.partitions.len(),
|
||||
}
|
||||
.build(),
|
||||
));
|
||||
}
|
||||
/// Scan [`Batch`] in all partitions one by one.
|
||||
pub(crate) fn scan_all_partitions(&self) -> Result<ScanBatchStream> {
|
||||
let metrics_set = ExecutionPlanMetricsSet::new();
|
||||
|
||||
let streams = (0..self.properties.partitions.len())
|
||||
.map(|partition| {
|
||||
let metrics = self.partition_metrics(partition, &metrics_set);
|
||||
self.scan_batch_in_partition(partition, metrics)
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
Ok(Box::pin(futures::stream::iter(streams).flatten()))
|
||||
}
|
||||
|
||||
fn partition_metrics(
|
||||
&self,
|
||||
partition: usize,
|
||||
metrics_set: &ExecutionPlanMetricsSet,
|
||||
) -> PartitionMetrics {
|
||||
let part_metrics = PartitionMetrics::new(
|
||||
self.stream_ctx.input.mapper.metadata().region_id,
|
||||
partition,
|
||||
@@ -147,6 +151,45 @@ impl UnorderedScan {
|
||||
metrics_set,
|
||||
);
|
||||
self.metrics_list.set(partition, part_metrics.clone());
|
||||
part_metrics
|
||||
}
|
||||
|
||||
fn scan_partition_impl(
|
||||
&self,
|
||||
metrics_set: &ExecutionPlanMetricsSet,
|
||||
partition: usize,
|
||||
) -> Result<SendableRecordBatchStream> {
|
||||
let metrics = self.partition_metrics(partition, metrics_set);
|
||||
|
||||
let batch_stream = self.scan_batch_in_partition(partition, metrics.clone())?;
|
||||
|
||||
let input = &self.stream_ctx.input;
|
||||
let record_batch_stream = ConvertBatchStream::new(
|
||||
batch_stream,
|
||||
input.mapper.clone(),
|
||||
input.cache_strategy.clone(),
|
||||
metrics,
|
||||
);
|
||||
|
||||
Ok(Box::pin(RecordBatchStreamWrapper::new(
|
||||
input.mapper.output_schema(),
|
||||
Box::pin(record_batch_stream),
|
||||
)))
|
||||
}
|
||||
|
||||
fn scan_batch_in_partition(
|
||||
&self,
|
||||
partition: usize,
|
||||
part_metrics: PartitionMetrics,
|
||||
) -> Result<ScanBatchStream> {
|
||||
ensure!(
|
||||
partition < self.properties.partitions.len(),
|
||||
PartitionOutOfRangeSnafu {
|
||||
given: partition,
|
||||
all: self.properties.partitions.len(),
|
||||
}
|
||||
);
|
||||
|
||||
let stream_ctx = self.stream_ctx.clone();
|
||||
let part_ranges = self.properties.partitions[partition].clone();
|
||||
let distinguish_range = self.properties.distinguish_partition_range;
|
||||
@@ -154,7 +197,6 @@ impl UnorderedScan {
|
||||
let stream = try_stream! {
|
||||
part_metrics.on_first_poll();
|
||||
|
||||
let cache = &stream_ctx.input.cache_strategy;
|
||||
let range_builder_list = Arc::new(RangeBuilderList::new(
|
||||
stream_ctx.input.num_memtables(),
|
||||
stream_ctx.input.num_files(),
|
||||
@@ -175,7 +217,7 @@ impl UnorderedScan {
|
||||
range_builder_list.clone(),
|
||||
);
|
||||
for await batch in stream {
|
||||
let batch = batch.map_err(BoxedError::new).context(ExternalSnafu)?;
|
||||
let batch = batch?;
|
||||
metrics.scan_cost += fetch_start.elapsed();
|
||||
metrics.num_batches += 1;
|
||||
metrics.num_rows += batch.num_rows();
|
||||
@@ -194,11 +236,8 @@ impl UnorderedScan {
|
||||
&batch,
|
||||
);
|
||||
|
||||
let convert_start = Instant::now();
|
||||
let record_batch = stream_ctx.input.mapper.convert(&batch, cache)?;
|
||||
metrics.convert_cost += convert_start.elapsed();
|
||||
let yield_start = Instant::now();
|
||||
yield record_batch;
|
||||
yield ScanBatch::Normal(batch);
|
||||
metrics.yield_cost += yield_start.elapsed();
|
||||
|
||||
fetch_start = Instant::now();
|
||||
@@ -208,22 +247,15 @@ impl UnorderedScan {
|
||||
// The query engine can use this to optimize some queries.
|
||||
if distinguish_range {
|
||||
let yield_start = Instant::now();
|
||||
yield stream_ctx.input.mapper.empty_record_batch();
|
||||
yield ScanBatch::Normal(Batch::empty());
|
||||
metrics.yield_cost += yield_start.elapsed();
|
||||
}
|
||||
|
||||
metrics.scan_cost += fetch_start.elapsed();
|
||||
part_metrics.merge_metrics(&metrics);
|
||||
}
|
||||
|
||||
part_metrics.on_finish();
|
||||
};
|
||||
let stream = Box::pin(RecordBatchStreamWrapper::new(
|
||||
self.stream_ctx.input.mapper.output_schema(),
|
||||
Box::pin(stream),
|
||||
));
|
||||
|
||||
Ok(stream)
|
||||
Ok(Box::pin(stream))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -251,6 +283,7 @@ impl RegionScanner for UnorderedScan {
|
||||
partition: usize,
|
||||
) -> Result<SendableRecordBatchStream, BoxedError> {
|
||||
self.scan_partition_impl(metrics_set, partition)
|
||||
.map_err(BoxedError::new)
|
||||
}
|
||||
|
||||
fn has_predicate(&self) -> bool {
|
||||
|
||||
@@ -28,12 +28,12 @@ use crossbeam_utils::atomic::AtomicCell;
|
||||
use snafu::{ensure, OptionExt};
|
||||
use store_api::codec::PrimaryKeyEncoding;
|
||||
use store_api::logstore::provider::Provider;
|
||||
use store_api::manifest::ManifestVersion;
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::region_engine::{
|
||||
RegionManifestInfo, RegionRole, RegionStatistic, SettableRegionRoleState,
|
||||
};
|
||||
use store_api::storage::{RegionId, SequenceNumber};
|
||||
use store_api::ManifestVersion;
|
||||
|
||||
use crate::access_layer::AccessLayerRef;
|
||||
use crate::error::{
|
||||
|
||||
@@ -31,7 +31,6 @@ use prost::Message;
|
||||
use smallvec::SmallVec;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use store_api::codec::{infer_primary_key_encoding_from_hint, PrimaryKeyEncoding};
|
||||
use store_api::manifest::ManifestVersion;
|
||||
use store_api::metadata::{ColumnMetadata, RegionMetadata, RegionMetadataRef};
|
||||
use store_api::region_engine::{SetRegionRoleStateResponse, SettableRegionRoleState};
|
||||
use store_api::region_request::{
|
||||
@@ -40,6 +39,7 @@ use store_api::region_request::{
|
||||
RegionOpenRequest, RegionRequest, RegionTruncateRequest,
|
||||
};
|
||||
use store_api::storage::{RegionId, SequenceNumber};
|
||||
use store_api::ManifestVersion;
|
||||
use tokio::sync::oneshot::{self, Receiver, Sender};
|
||||
|
||||
use crate::error::{
|
||||
|
||||
@@ -41,7 +41,7 @@ pub const PARQUET_METADATA_KEY: &str = "greptime:metadata";
|
||||
/// Default batch size to read parquet files.
|
||||
pub(crate) const DEFAULT_READ_BATCH_SIZE: usize = 1024;
|
||||
/// Default row group size for parquet files.
|
||||
pub(crate) const DEFAULT_ROW_GROUP_SIZE: usize = 100 * DEFAULT_READ_BATCH_SIZE;
|
||||
pub const DEFAULT_ROW_GROUP_SIZE: usize = 100 * DEFAULT_READ_BATCH_SIZE;
|
||||
|
||||
/// Parquet write options.
|
||||
#[derive(Debug)]
|
||||
|
||||
@@ -12,8 +12,12 @@ services-memory = ["opendal/services-memory"]
|
||||
|
||||
[dependencies]
|
||||
bytes.workspace = true
|
||||
common-base.workspace = true
|
||||
common-error.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-telemetry.workspace = true
|
||||
futures.workspace = true
|
||||
humantime-serde.workspace = true
|
||||
lazy_static.workspace = true
|
||||
md5 = "0.7"
|
||||
moka = { workspace = true, features = ["future"] }
|
||||
@@ -28,6 +32,9 @@ opendal = { version = "0.52", features = [
|
||||
"services-s3",
|
||||
] }
|
||||
prometheus.workspace = true
|
||||
reqwest.workspace = true
|
||||
serde.workspace = true
|
||||
snafu.workspace = true
|
||||
tokio.workspace = true
|
||||
uuid.workspace = true
|
||||
|
||||
|
||||
348
src/object-store/src/config.rs
Normal file
348
src/object-store/src/config.rs
Normal file
@@ -0,0 +1,348 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use common_base::secrets::{ExposeSecret, SecretString};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Object storage config
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum ObjectStoreConfig {
|
||||
File(FileConfig),
|
||||
S3(S3Config),
|
||||
Oss(OssConfig),
|
||||
Azblob(AzblobConfig),
|
||||
Gcs(GcsConfig),
|
||||
}
|
||||
|
||||
impl Default for ObjectStoreConfig {
|
||||
fn default() -> Self {
|
||||
ObjectStoreConfig::File(FileConfig {})
|
||||
}
|
||||
}
|
||||
|
||||
impl ObjectStoreConfig {
|
||||
/// Returns the object storage type name, such as `S3`, `Oss` etc.
|
||||
pub fn provider_name(&self) -> &'static str {
|
||||
match self {
|
||||
Self::File(_) => "File",
|
||||
Self::S3(_) => "S3",
|
||||
Self::Oss(_) => "Oss",
|
||||
Self::Azblob(_) => "Azblob",
|
||||
Self::Gcs(_) => "Gcs",
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true when it's a remote object storage such as AWS s3 etc.
|
||||
pub fn is_object_storage(&self) -> bool {
|
||||
!matches!(self, Self::File(_))
|
||||
}
|
||||
|
||||
/// Returns the object storage configuration name, return the provider name if it's empty.
|
||||
pub fn config_name(&self) -> &str {
|
||||
let name = match self {
|
||||
// file storage doesn't support name
|
||||
Self::File(_) => self.provider_name(),
|
||||
Self::S3(s3) => &s3.name,
|
||||
Self::Oss(oss) => &oss.name,
|
||||
Self::Azblob(az) => &az.name,
|
||||
Self::Gcs(gcs) => &gcs.name,
|
||||
};
|
||||
|
||||
if name.trim().is_empty() {
|
||||
return self.provider_name();
|
||||
}
|
||||
|
||||
name
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Default, Deserialize, Eq, PartialEq)]
|
||||
#[serde(default)]
|
||||
pub struct FileConfig {}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(default)]
|
||||
pub struct S3Config {
|
||||
pub name: String,
|
||||
pub bucket: String,
|
||||
pub root: String,
|
||||
#[serde(skip_serializing)]
|
||||
pub access_key_id: SecretString,
|
||||
#[serde(skip_serializing)]
|
||||
pub secret_access_key: SecretString,
|
||||
pub endpoint: Option<String>,
|
||||
pub region: Option<String>,
|
||||
/// Enable virtual host style so that opendal will send API requests in virtual host style instead of path style.
|
||||
/// By default, opendal will send API to https://s3.us-east-1.amazonaws.com/bucket_name
|
||||
/// Enabled, opendal will send API to https://bucket_name.s3.us-east-1.amazonaws.com
|
||||
pub enable_virtual_host_style: bool,
|
||||
#[serde(flatten)]
|
||||
pub cache: ObjectStorageCacheConfig,
|
||||
pub http_client: HttpClientConfig,
|
||||
}
|
||||
|
||||
impl Default for S3Config {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
name: String::default(),
|
||||
bucket: String::default(),
|
||||
root: String::default(),
|
||||
access_key_id: SecretString::from(String::default()),
|
||||
secret_access_key: SecretString::from(String::default()),
|
||||
enable_virtual_host_style: false,
|
||||
endpoint: Option::default(),
|
||||
region: Option::default(),
|
||||
cache: ObjectStorageCacheConfig::default(),
|
||||
http_client: HttpClientConfig::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for S3Config {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.name == other.name
|
||||
&& self.bucket == other.bucket
|
||||
&& self.root == other.root
|
||||
&& self.access_key_id.expose_secret() == other.access_key_id.expose_secret()
|
||||
&& self.secret_access_key.expose_secret() == other.secret_access_key.expose_secret()
|
||||
&& self.endpoint == other.endpoint
|
||||
&& self.region == other.region
|
||||
&& self.enable_virtual_host_style == other.enable_virtual_host_style
|
||||
&& self.cache == other.cache
|
||||
&& self.http_client == other.http_client
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(default)]
|
||||
pub struct OssConfig {
|
||||
pub name: String,
|
||||
pub bucket: String,
|
||||
pub root: String,
|
||||
#[serde(skip_serializing)]
|
||||
pub access_key_id: SecretString,
|
||||
#[serde(skip_serializing)]
|
||||
pub access_key_secret: SecretString,
|
||||
pub endpoint: String,
|
||||
#[serde(flatten)]
|
||||
pub cache: ObjectStorageCacheConfig,
|
||||
pub http_client: HttpClientConfig,
|
||||
}
|
||||
|
||||
impl PartialEq for OssConfig {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.name == other.name
|
||||
&& self.bucket == other.bucket
|
||||
&& self.root == other.root
|
||||
&& self.access_key_id.expose_secret() == other.access_key_id.expose_secret()
|
||||
&& self.access_key_secret.expose_secret() == other.access_key_secret.expose_secret()
|
||||
&& self.endpoint == other.endpoint
|
||||
&& self.cache == other.cache
|
||||
&& self.http_client == other.http_client
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for OssConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
name: String::default(),
|
||||
bucket: String::default(),
|
||||
root: String::default(),
|
||||
access_key_id: SecretString::from(String::default()),
|
||||
access_key_secret: SecretString::from(String::default()),
|
||||
endpoint: String::default(),
|
||||
cache: ObjectStorageCacheConfig::default(),
|
||||
http_client: HttpClientConfig::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(default)]
|
||||
pub struct AzblobConfig {
|
||||
pub name: String,
|
||||
pub container: String,
|
||||
pub root: String,
|
||||
#[serde(skip_serializing)]
|
||||
pub account_name: SecretString,
|
||||
#[serde(skip_serializing)]
|
||||
pub account_key: SecretString,
|
||||
pub endpoint: String,
|
||||
pub sas_token: Option<String>,
|
||||
#[serde(flatten)]
|
||||
pub cache: ObjectStorageCacheConfig,
|
||||
pub http_client: HttpClientConfig,
|
||||
}
|
||||
|
||||
impl PartialEq for AzblobConfig {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.name == other.name
|
||||
&& self.container == other.container
|
||||
&& self.root == other.root
|
||||
&& self.account_name.expose_secret() == other.account_name.expose_secret()
|
||||
&& self.account_key.expose_secret() == other.account_key.expose_secret()
|
||||
&& self.endpoint == other.endpoint
|
||||
&& self.sas_token == other.sas_token
|
||||
&& self.cache == other.cache
|
||||
&& self.http_client == other.http_client
|
||||
}
|
||||
}
|
||||
impl Default for AzblobConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
name: String::default(),
|
||||
container: String::default(),
|
||||
root: String::default(),
|
||||
account_name: SecretString::from(String::default()),
|
||||
account_key: SecretString::from(String::default()),
|
||||
endpoint: String::default(),
|
||||
sas_token: Option::default(),
|
||||
cache: ObjectStorageCacheConfig::default(),
|
||||
http_client: HttpClientConfig::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(default)]
|
||||
pub struct GcsConfig {
|
||||
pub name: String,
|
||||
pub root: String,
|
||||
pub bucket: String,
|
||||
pub scope: String,
|
||||
#[serde(skip_serializing)]
|
||||
pub credential_path: SecretString,
|
||||
#[serde(skip_serializing)]
|
||||
pub credential: SecretString,
|
||||
pub endpoint: String,
|
||||
#[serde(flatten)]
|
||||
pub cache: ObjectStorageCacheConfig,
|
||||
pub http_client: HttpClientConfig,
|
||||
}
|
||||
|
||||
impl Default for GcsConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
name: String::default(),
|
||||
root: String::default(),
|
||||
bucket: String::default(),
|
||||
scope: String::default(),
|
||||
credential_path: SecretString::from(String::default()),
|
||||
credential: SecretString::from(String::default()),
|
||||
endpoint: String::default(),
|
||||
cache: ObjectStorageCacheConfig::default(),
|
||||
http_client: HttpClientConfig::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for GcsConfig {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.name == other.name
|
||||
&& self.root == other.root
|
||||
&& self.bucket == other.bucket
|
||||
&& self.scope == other.scope
|
||||
&& self.credential_path.expose_secret() == other.credential_path.expose_secret()
|
||||
&& self.credential.expose_secret() == other.credential.expose_secret()
|
||||
&& self.endpoint == other.endpoint
|
||||
&& self.cache == other.cache
|
||||
&& self.http_client == other.http_client
|
||||
}
|
||||
}
|
||||
|
||||
/// The http client options to the storage.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(default)]
|
||||
pub struct HttpClientConfig {
|
||||
/// The maximum idle connection per host allowed in the pool.
|
||||
pub(crate) pool_max_idle_per_host: u32,
|
||||
|
||||
/// The timeout for only the connect phase of a http client.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub(crate) connect_timeout: Duration,
|
||||
|
||||
/// The total request timeout, applied from when the request starts connecting until the response body has finished.
|
||||
/// Also considered a total deadline.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub(crate) timeout: Duration,
|
||||
|
||||
/// The timeout for idle sockets being kept-alive.
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub(crate) pool_idle_timeout: Duration,
|
||||
|
||||
/// Skip SSL certificate validation (insecure)
|
||||
pub skip_ssl_validation: bool,
|
||||
}
|
||||
|
||||
impl Default for HttpClientConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
pool_max_idle_per_host: 1024,
|
||||
connect_timeout: Duration::from_secs(30),
|
||||
timeout: Duration::from_secs(30),
|
||||
pool_idle_timeout: Duration::from_secs(90),
|
||||
skip_ssl_validation: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)]
|
||||
#[serde(default)]
|
||||
pub struct ObjectStorageCacheConfig {
|
||||
/// The local file cache directory
|
||||
pub cache_path: Option<String>,
|
||||
/// The cache capacity in bytes
|
||||
pub cache_capacity: Option<ReadableSize>,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::config::ObjectStoreConfig;
|
||||
|
||||
#[test]
|
||||
fn test_config_name() {
|
||||
let object_store_config = ObjectStoreConfig::default();
|
||||
assert_eq!("File", object_store_config.config_name());
|
||||
|
||||
let s3_config = ObjectStoreConfig::S3(S3Config::default());
|
||||
assert_eq!("S3", s3_config.config_name());
|
||||
assert_eq!("S3", s3_config.provider_name());
|
||||
|
||||
let s3_config = ObjectStoreConfig::S3(S3Config {
|
||||
name: "test".to_string(),
|
||||
..Default::default()
|
||||
});
|
||||
assert_eq!("test", s3_config.config_name());
|
||||
assert_eq!("S3", s3_config.provider_name());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_object_storage() {
|
||||
let store = ObjectStoreConfig::default();
|
||||
assert!(!store.is_object_storage());
|
||||
let s3_config = ObjectStoreConfig::S3(S3Config::default());
|
||||
assert!(s3_config.is_object_storage());
|
||||
let oss_config = ObjectStoreConfig::Oss(OssConfig::default());
|
||||
assert!(oss_config.is_object_storage());
|
||||
let gcs_config = ObjectStoreConfig::Gcs(GcsConfig::default());
|
||||
assert!(gcs_config.is_object_storage());
|
||||
let azblob_config = ObjectStoreConfig::Azblob(AzblobConfig::default());
|
||||
assert!(azblob_config.is_object_storage());
|
||||
}
|
||||
}
|
||||
72
src/object-store/src/error.rs
Normal file
72
src/object-store/src/error.rs
Normal file
@@ -0,0 +1,72 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
|
||||
use common_macro::stack_trace_debug;
|
||||
use common_telemetry::common_error::ext::ErrorExt;
|
||||
use common_telemetry::common_error::status_code::StatusCode;
|
||||
use snafu::{Location, Snafu};
|
||||
|
||||
#[derive(Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
#[stack_trace_debug]
|
||||
pub enum Error {
|
||||
#[snafu(display("Failed to init backend"))]
|
||||
InitBackend {
|
||||
#[snafu(source)]
|
||||
error: opendal::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to build http client"))]
|
||||
BuildHttpClient {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
#[snafu(source)]
|
||||
error: reqwest::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to create directory {}", dir))]
|
||||
CreateDir {
|
||||
dir: String,
|
||||
#[snafu(source)]
|
||||
error: std::io::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to remove directory {}", dir))]
|
||||
RemoveDir {
|
||||
dir: String,
|
||||
#[snafu(source)]
|
||||
error: std::io::Error,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
impl ErrorExt for Error {
|
||||
fn status_code(&self) -> StatusCode {
|
||||
use Error::*;
|
||||
match self {
|
||||
InitBackend { .. } => StatusCode::StorageUnavailable,
|
||||
BuildHttpClient { .. } => StatusCode::Unexpected,
|
||||
CreateDir { .. } | RemoveDir { .. } => StatusCode::Internal,
|
||||
}
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
171
src/object-store/src/factory.rs
Normal file
171
src/object-store/src/factory.rs
Normal file
@@ -0,0 +1,171 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::{fs, path};
|
||||
|
||||
use common_base::secrets::ExposeSecret;
|
||||
use common_telemetry::info;
|
||||
use opendal::services::{Fs, Gcs, Oss, S3};
|
||||
use snafu::prelude::*;
|
||||
|
||||
use crate::config::{AzblobConfig, FileConfig, GcsConfig, ObjectStoreConfig, OssConfig, S3Config};
|
||||
use crate::error::{self, Result};
|
||||
use crate::services::Azblob;
|
||||
use crate::util::{build_http_client, clean_temp_dir, join_dir, normalize_dir};
|
||||
use crate::{util, ObjectStore, ATOMIC_WRITE_DIR, OLD_ATOMIC_WRITE_DIR};
|
||||
|
||||
pub async fn new_raw_object_store(
|
||||
store: &ObjectStoreConfig,
|
||||
data_home: &str,
|
||||
) -> Result<ObjectStore> {
|
||||
let data_home = normalize_dir(data_home);
|
||||
match store {
|
||||
ObjectStoreConfig::File(file_config) => new_fs_object_store(&data_home, file_config).await,
|
||||
ObjectStoreConfig::S3(s3_config) => new_s3_object_store(s3_config).await,
|
||||
ObjectStoreConfig::Oss(oss_config) => new_oss_object_store(oss_config).await,
|
||||
ObjectStoreConfig::Azblob(azblob_config) => new_azblob_object_store(azblob_config).await,
|
||||
ObjectStoreConfig::Gcs(gcs_config) => new_gcs_object_store(gcs_config).await,
|
||||
}
|
||||
}
|
||||
|
||||
/// A helper function to create a file system object store.
|
||||
pub async fn new_fs_object_store(
|
||||
data_home: &str,
|
||||
_file_config: &FileConfig,
|
||||
) -> Result<ObjectStore> {
|
||||
fs::create_dir_all(path::Path::new(&data_home))
|
||||
.context(error::CreateDirSnafu { dir: data_home })?;
|
||||
info!("The file storage home is: {}", data_home);
|
||||
|
||||
let atomic_write_dir = join_dir(data_home, ATOMIC_WRITE_DIR);
|
||||
clean_temp_dir(&atomic_write_dir)?;
|
||||
|
||||
// Compatible code. Remove this after a major release.
|
||||
let old_atomic_temp_dir = join_dir(data_home, OLD_ATOMIC_WRITE_DIR);
|
||||
clean_temp_dir(&old_atomic_temp_dir)?;
|
||||
|
||||
let builder = Fs::default()
|
||||
.root(data_home)
|
||||
.atomic_write_dir(&atomic_write_dir);
|
||||
|
||||
let object_store = ObjectStore::new(builder)
|
||||
.context(error::InitBackendSnafu)?
|
||||
.finish();
|
||||
|
||||
Ok(object_store)
|
||||
}
|
||||
|
||||
pub async fn new_azblob_object_store(azblob_config: &AzblobConfig) -> Result<ObjectStore> {
|
||||
let root = util::normalize_dir(&azblob_config.root);
|
||||
|
||||
info!(
|
||||
"The azure storage container is: {}, root is: {}",
|
||||
azblob_config.container, &root
|
||||
);
|
||||
|
||||
let client = build_http_client(&azblob_config.http_client)?;
|
||||
|
||||
let mut builder = Azblob::default()
|
||||
.root(&root)
|
||||
.container(&azblob_config.container)
|
||||
.endpoint(&azblob_config.endpoint)
|
||||
.account_name(azblob_config.account_name.expose_secret())
|
||||
.account_key(azblob_config.account_key.expose_secret())
|
||||
.http_client(client);
|
||||
|
||||
if let Some(token) = &azblob_config.sas_token {
|
||||
builder = builder.sas_token(token);
|
||||
};
|
||||
|
||||
Ok(ObjectStore::new(builder)
|
||||
.context(error::InitBackendSnafu)?
|
||||
.finish())
|
||||
}
|
||||
|
||||
pub async fn new_gcs_object_store(gcs_config: &GcsConfig) -> Result<ObjectStore> {
|
||||
let root = util::normalize_dir(&gcs_config.root);
|
||||
info!(
|
||||
"The gcs storage bucket is: {}, root is: {}",
|
||||
gcs_config.bucket, &root
|
||||
);
|
||||
|
||||
let client = build_http_client(&gcs_config.http_client);
|
||||
|
||||
let builder = Gcs::default()
|
||||
.root(&root)
|
||||
.bucket(&gcs_config.bucket)
|
||||
.scope(&gcs_config.scope)
|
||||
.credential_path(gcs_config.credential_path.expose_secret())
|
||||
.credential(gcs_config.credential.expose_secret())
|
||||
.endpoint(&gcs_config.endpoint)
|
||||
.http_client(client?);
|
||||
|
||||
Ok(ObjectStore::new(builder)
|
||||
.context(error::InitBackendSnafu)?
|
||||
.finish())
|
||||
}
|
||||
|
||||
pub async fn new_oss_object_store(oss_config: &OssConfig) -> Result<ObjectStore> {
|
||||
let root = util::normalize_dir(&oss_config.root);
|
||||
info!(
|
||||
"The oss storage bucket is: {}, root is: {}",
|
||||
oss_config.bucket, &root
|
||||
);
|
||||
|
||||
let client = build_http_client(&oss_config.http_client)?;
|
||||
|
||||
let builder = Oss::default()
|
||||
.root(&root)
|
||||
.bucket(&oss_config.bucket)
|
||||
.endpoint(&oss_config.endpoint)
|
||||
.access_key_id(oss_config.access_key_id.expose_secret())
|
||||
.access_key_secret(oss_config.access_key_secret.expose_secret())
|
||||
.http_client(client);
|
||||
|
||||
Ok(ObjectStore::new(builder)
|
||||
.context(error::InitBackendSnafu)?
|
||||
.finish())
|
||||
}
|
||||
|
||||
pub async fn new_s3_object_store(s3_config: &S3Config) -> Result<ObjectStore> {
|
||||
let root = util::normalize_dir(&s3_config.root);
|
||||
|
||||
info!(
|
||||
"The s3 storage bucket is: {}, root is: {}",
|
||||
s3_config.bucket, &root
|
||||
);
|
||||
|
||||
let client = build_http_client(&s3_config.http_client)?;
|
||||
|
||||
let mut builder = S3::default()
|
||||
.root(&root)
|
||||
.bucket(&s3_config.bucket)
|
||||
.access_key_id(s3_config.access_key_id.expose_secret())
|
||||
.secret_access_key(s3_config.secret_access_key.expose_secret())
|
||||
.http_client(client);
|
||||
|
||||
if s3_config.endpoint.is_some() {
|
||||
builder = builder.endpoint(s3_config.endpoint.as_ref().unwrap());
|
||||
}
|
||||
if s3_config.region.is_some() {
|
||||
builder = builder.region(s3_config.region.as_ref().unwrap());
|
||||
}
|
||||
if s3_config.enable_virtual_host_style {
|
||||
builder = builder.enable_virtual_host_style();
|
||||
}
|
||||
|
||||
Ok(ObjectStore::new(builder)
|
||||
.context(error::InitBackendSnafu)?
|
||||
.finish())
|
||||
}
|
||||
@@ -19,10 +19,18 @@ pub use opendal::{
|
||||
Writer,
|
||||
};
|
||||
|
||||
pub mod config;
|
||||
pub mod error;
|
||||
pub mod factory;
|
||||
pub mod layers;
|
||||
pub mod manager;
|
||||
mod metrics;
|
||||
pub mod test_util;
|
||||
pub mod util;
|
||||
|
||||
/// The default object cache directory name.
|
||||
pub const OBJECT_CACHE_DIR: &str = "object_cache";
|
||||
|
||||
pub const ATOMIC_WRITE_DIR: &str = "tmp/";
|
||||
/// For compatibility. Remove this after a major version release.
|
||||
pub const OLD_ATOMIC_WRITE_DIR: &str = ".tmp/";
|
||||
|
||||
@@ -13,13 +13,16 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::Display;
|
||||
use std::path;
|
||||
|
||||
use common_telemetry::{debug, error, trace};
|
||||
use common_telemetry::{debug, error, info, trace};
|
||||
use opendal::layers::{LoggingInterceptor, LoggingLayer, TracingLayer};
|
||||
use opendal::raw::{AccessorInfo, Operation};
|
||||
use opendal::raw::{AccessorInfo, HttpClient, Operation};
|
||||
use opendal::ErrorKind;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::ObjectStore;
|
||||
use crate::config::HttpClientConfig;
|
||||
use crate::{error, ObjectStore};
|
||||
|
||||
/// Join two paths and normalize the output dir.
|
||||
///
|
||||
@@ -200,6 +203,32 @@ impl LoggingInterceptor for DefaultLoggingInterceptor {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn build_http_client(config: &HttpClientConfig) -> error::Result<HttpClient> {
|
||||
if config.skip_ssl_validation {
|
||||
common_telemetry::warn!("Skipping SSL validation for object storage HTTP client. Please ensure the environment is trusted.");
|
||||
}
|
||||
|
||||
let client = reqwest::ClientBuilder::new()
|
||||
.pool_max_idle_per_host(config.pool_max_idle_per_host as usize)
|
||||
.connect_timeout(config.connect_timeout)
|
||||
.pool_idle_timeout(config.pool_idle_timeout)
|
||||
.timeout(config.timeout)
|
||||
.danger_accept_invalid_certs(config.skip_ssl_validation)
|
||||
.build()
|
||||
.context(error::BuildHttpClientSnafu)?;
|
||||
Ok(HttpClient::with(client))
|
||||
}
|
||||
|
||||
pub fn clean_temp_dir(dir: &str) -> error::Result<()> {
|
||||
if path::Path::new(&dir).exists() {
|
||||
info!("Begin to clean temp storage directory: {}", dir);
|
||||
std::fs::remove_dir_all(dir).context(error::RemoveDirSnafu { dir })?;
|
||||
info!("Cleaned temp storage directory: {}", dir);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
@@ -860,6 +860,14 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to decode object from json"))]
|
||||
DecodeJson {
|
||||
#[snafu(source)]
|
||||
error: serde_json::error::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -991,6 +999,7 @@ impl ErrorExt for Error {
|
||||
Error::InvalidProcessId { .. } => StatusCode::InvalidArguments,
|
||||
Error::ProcessManagerMissing { .. } => StatusCode::Unexpected,
|
||||
Error::PathNotFound { .. } => StatusCode::InvalidArguments,
|
||||
Error::DecodeJson { .. } => StatusCode::Unexpected,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -22,10 +22,9 @@ use api::v1::region::{
|
||||
RegionRequestHeader,
|
||||
};
|
||||
use api::v1::{
|
||||
AlterTableExpr, ColumnDataType, ColumnSchema, CreateTableExpr, InsertRequests,
|
||||
RowInsertRequest, RowInsertRequests, SemanticType,
|
||||
AlterTableExpr, ColumnSchema, CreateTableExpr, InsertRequests, RowInsertRequest,
|
||||
RowInsertRequests, SemanticType,
|
||||
};
|
||||
use catalog::CatalogManagerRef;
|
||||
use client::{OutputData, OutputMeta};
|
||||
use common_catalog::consts::{
|
||||
default_engine, trace_services_table_name, PARENT_SPAN_ID_COLUMN, SERVICE_NAME_COLUMN,
|
||||
@@ -35,7 +34,6 @@ use common_grpc_expr::util::ColumnExpr;
|
||||
use common_meta::cache::TableFlownodeSetCacheRef;
|
||||
use common_meta::node_manager::{AffectedRows, NodeManagerRef};
|
||||
use common_meta::peer::Peer;
|
||||
use common_query::prelude::{GREPTIME_TIMESTAMP, GREPTIME_VALUE};
|
||||
use common_query::Output;
|
||||
use common_telemetry::tracing_context::TracingContext;
|
||||
use common_telemetry::{error, info, warn};
|
||||
@@ -49,9 +47,7 @@ use snafu::ResultExt;
|
||||
use sql::partition::partition_rule_for_hexstring;
|
||||
use sql::statements::create::Partitions;
|
||||
use sql::statements::insert::Insert;
|
||||
use store_api::metric_engine_consts::{
|
||||
LOGICAL_TABLE_METADATA_KEY, METRIC_ENGINE_NAME, PHYSICAL_TABLE_METADATA_KEY,
|
||||
};
|
||||
use store_api::metric_engine_consts::{LOGICAL_TABLE_METADATA_KEY, METRIC_ENGINE_NAME};
|
||||
use store_api::mito_engine_options::{APPEND_MODE_KEY, MERGE_MODE_KEY};
|
||||
use store_api::storage::{RegionId, TableId};
|
||||
use table::metadata::TableInfo;
|
||||
@@ -63,7 +59,7 @@ use table::table_reference::TableReference;
|
||||
use table::TableRef;
|
||||
|
||||
use crate::error::{
|
||||
CatalogSnafu, ColumnOptionsSnafu, CreatePartitionRulesSnafu, FindRegionLeaderSnafu,
|
||||
ColumnOptionsSnafu, CreatePartitionRulesSnafu, FindRegionLeaderSnafu,
|
||||
InvalidInsertRequestSnafu, JoinTaskSnafu, RequestInsertsSnafu, Result, TableNotFoundSnafu,
|
||||
};
|
||||
use crate::expr_helper;
|
||||
@@ -72,10 +68,10 @@ use crate::req_convert::common::preprocess_row_insert_requests;
|
||||
use crate::req_convert::insert::{
|
||||
fill_reqs_with_impure_default, ColumnToRow, RowToRegion, StatementToRegion, TableToRegion,
|
||||
};
|
||||
use crate::statement::StatementExecutor;
|
||||
use crate::schema_helper::SchemaHelper;
|
||||
|
||||
pub struct Inserter {
|
||||
catalog_manager: CatalogManagerRef,
|
||||
pub(crate) schema_helper: SchemaHelper,
|
||||
pub(crate) partition_manager: PartitionRuleManagerRef,
|
||||
pub(crate) node_manager: NodeManagerRef,
|
||||
pub(crate) table_flownode_set_cache: TableFlownodeSetCacheRef,
|
||||
@@ -85,7 +81,7 @@ pub type InserterRef = Arc<Inserter>;
|
||||
|
||||
/// Hint for the table type to create automatically.
|
||||
#[derive(Clone)]
|
||||
enum AutoCreateTableType {
|
||||
pub(crate) enum AutoCreateTableType {
|
||||
/// A logical table with the physical table name.
|
||||
Logical(String),
|
||||
/// A physical table.
|
||||
@@ -127,27 +123,34 @@ pub struct InstantAndNormalInsertRequests {
|
||||
|
||||
impl Inserter {
|
||||
pub fn new(
|
||||
catalog_manager: CatalogManagerRef,
|
||||
schema_helper: SchemaHelper,
|
||||
partition_manager: PartitionRuleManagerRef,
|
||||
node_manager: NodeManagerRef,
|
||||
table_flownode_set_cache: TableFlownodeSetCacheRef,
|
||||
) -> Self {
|
||||
Self {
|
||||
catalog_manager,
|
||||
schema_helper,
|
||||
partition_manager,
|
||||
node_manager,
|
||||
table_flownode_set_cache,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn partition_manager(&self) -> &PartitionRuleManagerRef {
|
||||
&self.partition_manager
|
||||
}
|
||||
|
||||
pub fn node_manager(&self) -> &NodeManagerRef {
|
||||
&self.node_manager
|
||||
}
|
||||
|
||||
pub async fn handle_column_inserts(
|
||||
&self,
|
||||
requests: InsertRequests,
|
||||
ctx: QueryContextRef,
|
||||
statement_executor: &StatementExecutor,
|
||||
) -> Result<Output> {
|
||||
let row_inserts = ColumnToRow::convert(requests)?;
|
||||
self.handle_row_inserts(row_inserts, ctx, statement_executor, false, false)
|
||||
self.handle_row_inserts(row_inserts, ctx, false, false)
|
||||
.await
|
||||
}
|
||||
|
||||
@@ -156,7 +159,6 @@ impl Inserter {
|
||||
&self,
|
||||
mut requests: RowInsertRequests,
|
||||
ctx: QueryContextRef,
|
||||
statement_executor: &StatementExecutor,
|
||||
accommodate_existing_schema: bool,
|
||||
is_single_value: bool,
|
||||
) -> Result<Output> {
|
||||
@@ -164,7 +166,6 @@ impl Inserter {
|
||||
self.handle_row_inserts_with_create_type(
|
||||
requests,
|
||||
ctx,
|
||||
statement_executor,
|
||||
AutoCreateTableType::Physical,
|
||||
accommodate_existing_schema,
|
||||
is_single_value,
|
||||
@@ -177,12 +178,10 @@ impl Inserter {
|
||||
&self,
|
||||
requests: RowInsertRequests,
|
||||
ctx: QueryContextRef,
|
||||
statement_executor: &StatementExecutor,
|
||||
) -> Result<Output> {
|
||||
self.handle_row_inserts_with_create_type(
|
||||
requests,
|
||||
ctx,
|
||||
statement_executor,
|
||||
AutoCreateTableType::Log,
|
||||
false,
|
||||
false,
|
||||
@@ -194,12 +193,10 @@ impl Inserter {
|
||||
&self,
|
||||
requests: RowInsertRequests,
|
||||
ctx: QueryContextRef,
|
||||
statement_executor: &StatementExecutor,
|
||||
) -> Result<Output> {
|
||||
self.handle_row_inserts_with_create_type(
|
||||
requests,
|
||||
ctx,
|
||||
statement_executor,
|
||||
AutoCreateTableType::Trace,
|
||||
false,
|
||||
false,
|
||||
@@ -212,14 +209,12 @@ impl Inserter {
|
||||
&self,
|
||||
requests: RowInsertRequests,
|
||||
ctx: QueryContextRef,
|
||||
statement_executor: &StatementExecutor,
|
||||
accommodate_existing_schema: bool,
|
||||
is_single_value: bool,
|
||||
) -> Result<Output> {
|
||||
self.handle_row_inserts_with_create_type(
|
||||
requests,
|
||||
ctx,
|
||||
statement_executor,
|
||||
AutoCreateTableType::LastNonNull,
|
||||
accommodate_existing_schema,
|
||||
is_single_value,
|
||||
@@ -232,7 +227,6 @@ impl Inserter {
|
||||
&self,
|
||||
mut requests: RowInsertRequests,
|
||||
ctx: QueryContextRef,
|
||||
statement_executor: &StatementExecutor,
|
||||
create_type: AutoCreateTableType,
|
||||
accommodate_existing_schema: bool,
|
||||
is_single_value: bool,
|
||||
@@ -254,7 +248,6 @@ impl Inserter {
|
||||
&mut requests,
|
||||
&ctx,
|
||||
create_type,
|
||||
statement_executor,
|
||||
accommodate_existing_schema,
|
||||
is_single_value,
|
||||
)
|
||||
@@ -280,7 +273,6 @@ impl Inserter {
|
||||
&self,
|
||||
mut requests: RowInsertRequests,
|
||||
ctx: QueryContextRef,
|
||||
statement_executor: &StatementExecutor,
|
||||
physical_table: String,
|
||||
) -> Result<Output> {
|
||||
// remove empty requests
|
||||
@@ -293,7 +285,8 @@ impl Inserter {
|
||||
validate_column_count_match(&requests)?;
|
||||
|
||||
// check and create physical table
|
||||
self.create_physical_table_on_demand(&ctx, physical_table.clone(), statement_executor)
|
||||
self.schema_helper
|
||||
.create_metric_physical_table(&ctx, physical_table.clone())
|
||||
.await?;
|
||||
|
||||
// check and create logical tables
|
||||
@@ -305,7 +298,6 @@ impl Inserter {
|
||||
&mut requests,
|
||||
&ctx,
|
||||
AutoCreateTableType::Logical(physical_table.to_string()),
|
||||
statement_executor,
|
||||
true,
|
||||
true,
|
||||
)
|
||||
@@ -350,10 +342,13 @@ impl Inserter {
|
||||
insert: &Insert,
|
||||
ctx: &QueryContextRef,
|
||||
) -> Result<Output> {
|
||||
let (inserts, table_info) =
|
||||
StatementToRegion::new(self.catalog_manager.as_ref(), &self.partition_manager, ctx)
|
||||
.convert(insert, ctx)
|
||||
.await?;
|
||||
let (inserts, table_info) = StatementToRegion::new(
|
||||
self.schema_helper.catalog_manager().as_ref(),
|
||||
&self.partition_manager,
|
||||
ctx,
|
||||
)
|
||||
.convert(insert, ctx)
|
||||
.await?;
|
||||
|
||||
let table_infos =
|
||||
HashMap::from_iter([(table_info.table_id(), table_info.clone())].into_iter());
|
||||
@@ -482,7 +477,6 @@ impl Inserter {
|
||||
requests: &mut RowInsertRequests,
|
||||
ctx: &QueryContextRef,
|
||||
auto_create_table_type: AutoCreateTableType,
|
||||
statement_executor: &StatementExecutor,
|
||||
accommodate_existing_schema: bool,
|
||||
is_single_value: bool,
|
||||
) -> Result<CreateAlterTableResult> {
|
||||
@@ -543,7 +537,7 @@ impl Inserter {
|
||||
instant_table_ids.insert(table_info.table_id());
|
||||
}
|
||||
table_infos.insert(table_info.table_id(), table.table_info());
|
||||
if let Some(alter_expr) = self.get_alter_table_expr_on_demand(
|
||||
if let Some(alter_expr) = Self::get_alter_table_expr_on_demand(
|
||||
req,
|
||||
&table,
|
||||
ctx,
|
||||
@@ -565,9 +559,7 @@ impl Inserter {
|
||||
AutoCreateTableType::Logical(_) => {
|
||||
if !create_tables.is_empty() {
|
||||
// Creates logical tables in batch.
|
||||
let tables = self
|
||||
.create_logical_tables(create_tables, ctx, statement_executor)
|
||||
.await?;
|
||||
let tables = self.create_logical_tables(create_tables, ctx).await?;
|
||||
|
||||
for table in tables {
|
||||
let table_info = table.table_info();
|
||||
@@ -579,7 +571,7 @@ impl Inserter {
|
||||
}
|
||||
if !alter_tables.is_empty() {
|
||||
// Alter logical tables in batch.
|
||||
statement_executor
|
||||
self.schema_helper
|
||||
.alter_logical_tables(alter_tables, ctx.clone())
|
||||
.await?;
|
||||
}
|
||||
@@ -590,9 +582,7 @@ impl Inserter {
|
||||
// note that auto create table shouldn't be ttl instant table
|
||||
// for it's a very unexpected behavior and should be set by user explicitly
|
||||
for create_table in create_tables {
|
||||
let table = self
|
||||
.create_physical_table(create_table, None, ctx, statement_executor)
|
||||
.await?;
|
||||
let table = self.create_physical_table(create_table, None, ctx).await?;
|
||||
let table_info = table.table_info();
|
||||
if table_info.is_ttl_instant_table() {
|
||||
instant_table_ids.insert(table_info.table_id());
|
||||
@@ -600,8 +590,8 @@ impl Inserter {
|
||||
table_infos.insert(table_info.table_id(), table.table_info());
|
||||
}
|
||||
for alter_expr in alter_tables.into_iter() {
|
||||
statement_executor
|
||||
.alter_table_inner(alter_expr, ctx.clone())
|
||||
self.schema_helper
|
||||
.alter_table_by_expr(alter_expr, ctx.clone())
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
@@ -619,9 +609,7 @@ impl Inserter {
|
||||
create_table
|
||||
.table_options
|
||||
.insert(APPEND_MODE_KEY.to_string(), "false".to_string());
|
||||
let table = self
|
||||
.create_physical_table(create_table, None, ctx, statement_executor)
|
||||
.await?;
|
||||
let table = self.create_physical_table(create_table, None, ctx).await?;
|
||||
let table_info = table.table_info();
|
||||
if table_info.is_ttl_instant_table() {
|
||||
instant_table_ids.insert(table_info.table_id());
|
||||
@@ -662,12 +650,7 @@ impl Inserter {
|
||||
);
|
||||
|
||||
let table = self
|
||||
.create_physical_table(
|
||||
create_table,
|
||||
Some(partitions),
|
||||
ctx,
|
||||
statement_executor,
|
||||
)
|
||||
.create_physical_table(create_table, Some(partitions), ctx)
|
||||
.await?;
|
||||
let table_info = table.table_info();
|
||||
if table_info.is_ttl_instant_table() {
|
||||
@@ -677,8 +660,8 @@ impl Inserter {
|
||||
}
|
||||
}
|
||||
for alter_expr in alter_tables.into_iter() {
|
||||
statement_executor
|
||||
.alter_table_inner(alter_expr, ctx.clone())
|
||||
self.schema_helper
|
||||
.alter_table_by_expr(alter_expr, ctx.clone())
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
@@ -690,79 +673,13 @@ impl Inserter {
|
||||
})
|
||||
}
|
||||
|
||||
async fn create_physical_table_on_demand(
|
||||
&self,
|
||||
ctx: &QueryContextRef,
|
||||
physical_table: String,
|
||||
statement_executor: &StatementExecutor,
|
||||
) -> Result<()> {
|
||||
let catalog_name = ctx.current_catalog();
|
||||
let schema_name = ctx.current_schema();
|
||||
|
||||
// check if exist
|
||||
if self
|
||||
.get_table(catalog_name, &schema_name, &physical_table)
|
||||
.await?
|
||||
.is_some()
|
||||
{
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let table_reference = TableReference::full(catalog_name, &schema_name, &physical_table);
|
||||
info!("Physical metric table `{table_reference}` does not exist, try creating table");
|
||||
|
||||
// schema with timestamp and field column
|
||||
let default_schema = vec![
|
||||
ColumnSchema {
|
||||
column_name: GREPTIME_TIMESTAMP.to_string(),
|
||||
datatype: ColumnDataType::TimestampMillisecond as _,
|
||||
semantic_type: SemanticType::Timestamp as _,
|
||||
datatype_extension: None,
|
||||
options: None,
|
||||
},
|
||||
ColumnSchema {
|
||||
column_name: GREPTIME_VALUE.to_string(),
|
||||
datatype: ColumnDataType::Float64 as _,
|
||||
semantic_type: SemanticType::Field as _,
|
||||
datatype_extension: None,
|
||||
options: None,
|
||||
},
|
||||
];
|
||||
let create_table_expr =
|
||||
&mut build_create_table_expr(&table_reference, &default_schema, default_engine())?;
|
||||
|
||||
create_table_expr.engine = METRIC_ENGINE_NAME.to_string();
|
||||
create_table_expr
|
||||
.table_options
|
||||
.insert(PHYSICAL_TABLE_METADATA_KEY.to_string(), "true".to_string());
|
||||
|
||||
// create physical table
|
||||
let res = statement_executor
|
||||
.create_table_inner(create_table_expr, None, ctx.clone())
|
||||
.await;
|
||||
|
||||
match res {
|
||||
Ok(_) => {
|
||||
info!("Successfully created table {table_reference}",);
|
||||
Ok(())
|
||||
}
|
||||
Err(err) => {
|
||||
error!(err; "Failed to create table {table_reference}");
|
||||
Err(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_table(
|
||||
&self,
|
||||
catalog: &str,
|
||||
schema: &str,
|
||||
table: &str,
|
||||
) -> Result<Option<TableRef>> {
|
||||
self.catalog_manager
|
||||
.table(catalog, schema, table, None)
|
||||
.await
|
||||
.context(CatalogSnafu)
|
||||
self.schema_helper.get_table(catalog, schema, table).await
|
||||
}
|
||||
|
||||
fn get_create_table_expr_on_demand(
|
||||
@@ -771,38 +688,9 @@ impl Inserter {
|
||||
create_type: &AutoCreateTableType,
|
||||
ctx: &QueryContextRef,
|
||||
) -> Result<CreateTableExpr> {
|
||||
let mut table_options = Vec::with_capacity(4);
|
||||
for key in VALID_TABLE_OPTION_KEYS {
|
||||
if let Some(value) = ctx.extension(key) {
|
||||
table_options.push((key, value));
|
||||
}
|
||||
}
|
||||
|
||||
let mut engine_name = default_engine();
|
||||
match create_type {
|
||||
AutoCreateTableType::Logical(physical_table) => {
|
||||
engine_name = METRIC_ENGINE_NAME;
|
||||
table_options.push((LOGICAL_TABLE_METADATA_KEY, physical_table));
|
||||
}
|
||||
AutoCreateTableType::Physical => {
|
||||
if let Some(append_mode) = ctx.extension(APPEND_MODE_KEY) {
|
||||
table_options.push((APPEND_MODE_KEY, append_mode));
|
||||
}
|
||||
if let Some(merge_mode) = ctx.extension(MERGE_MODE_KEY) {
|
||||
table_options.push((MERGE_MODE_KEY, merge_mode));
|
||||
}
|
||||
}
|
||||
// Set append_mode to true for log table.
|
||||
// because log tables should keep rows with the same ts and tags.
|
||||
AutoCreateTableType::Log => {
|
||||
table_options.push((APPEND_MODE_KEY, "true"));
|
||||
}
|
||||
AutoCreateTableType::LastNonNull => {
|
||||
table_options.push((MERGE_MODE_KEY, "last_non_null"));
|
||||
}
|
||||
AutoCreateTableType::Trace => {
|
||||
table_options.push((APPEND_MODE_KEY, "true"));
|
||||
}
|
||||
if matches!(create_type, AutoCreateTableType::Logical(_)) {
|
||||
engine_name = METRIC_ENGINE_NAME;
|
||||
}
|
||||
|
||||
let schema = ctx.current_schema();
|
||||
@@ -813,11 +701,9 @@ impl Inserter {
|
||||
build_create_table_expr(&table_ref, request_schema, engine_name)?;
|
||||
|
||||
info!("Table `{table_ref}` does not exist, try creating table");
|
||||
for (k, v) in table_options {
|
||||
create_table_expr
|
||||
.table_options
|
||||
.insert(k.to_string(), v.to_string());
|
||||
}
|
||||
|
||||
// Use the common fill_table_options_for_create function to populate table options
|
||||
fill_table_options_for_create(&mut create_table_expr.table_options, create_type, ctx);
|
||||
|
||||
Ok(create_table_expr)
|
||||
}
|
||||
@@ -830,7 +716,6 @@ impl Inserter {
|
||||
/// When `accommodate_existing_schema` is true and `is_single_value` is true, it also consider fields when modifying the
|
||||
/// input `req`.
|
||||
fn get_alter_table_expr_on_demand(
|
||||
&self,
|
||||
req: &mut RowInsertRequest,
|
||||
table: &TableRef,
|
||||
ctx: &QueryContextRef,
|
||||
@@ -918,7 +803,6 @@ impl Inserter {
|
||||
mut create_table_expr: CreateTableExpr,
|
||||
partitions: Option<Partitions>,
|
||||
ctx: &QueryContextRef,
|
||||
statement_executor: &StatementExecutor,
|
||||
) -> Result<TableRef> {
|
||||
{
|
||||
let table_ref = TableReference::full(
|
||||
@@ -929,8 +813,9 @@ impl Inserter {
|
||||
|
||||
info!("Table `{table_ref}` does not exist, try creating table");
|
||||
}
|
||||
let res = statement_executor
|
||||
.create_table_inner(&mut create_table_expr, partitions, ctx.clone())
|
||||
let res = self
|
||||
.schema_helper
|
||||
.create_table_by_expr(&mut create_table_expr, partitions, ctx.clone())
|
||||
.await;
|
||||
|
||||
let table_ref = TableReference::full(
|
||||
@@ -958,9 +843,9 @@ impl Inserter {
|
||||
&self,
|
||||
create_table_exprs: Vec<CreateTableExpr>,
|
||||
ctx: &QueryContextRef,
|
||||
statement_executor: &StatementExecutor,
|
||||
) -> Result<Vec<TableRef>> {
|
||||
let res = statement_executor
|
||||
let res = self
|
||||
.schema_helper
|
||||
.create_logical_tables(&create_table_exprs, ctx.clone())
|
||||
.await;
|
||||
|
||||
@@ -1011,7 +896,49 @@ fn validate_column_count_match(requests: &RowInsertRequests) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn build_create_table_expr(
|
||||
/// Fill table options for a new table by create type.
|
||||
pub(crate) fn fill_table_options_for_create(
|
||||
table_options: &mut std::collections::HashMap<String, String>,
|
||||
create_type: &AutoCreateTableType,
|
||||
ctx: &QueryContextRef,
|
||||
) {
|
||||
for key in VALID_TABLE_OPTION_KEYS {
|
||||
if let Some(value) = ctx.extension(key) {
|
||||
table_options.insert(key.to_string(), value.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
match create_type {
|
||||
AutoCreateTableType::Logical(physical_table) => {
|
||||
table_options.insert(
|
||||
LOGICAL_TABLE_METADATA_KEY.to_string(),
|
||||
physical_table.to_string(),
|
||||
);
|
||||
}
|
||||
AutoCreateTableType::Physical => {
|
||||
if let Some(append_mode) = ctx.extension(APPEND_MODE_KEY) {
|
||||
table_options.insert(APPEND_MODE_KEY.to_string(), append_mode.to_string());
|
||||
}
|
||||
if let Some(merge_mode) = ctx.extension(MERGE_MODE_KEY) {
|
||||
table_options.insert(MERGE_MODE_KEY.to_string(), merge_mode.to_string());
|
||||
}
|
||||
}
|
||||
// Set append_mode to true for log table.
|
||||
// because log tables should keep rows with the same ts and tags.
|
||||
AutoCreateTableType::Log => {
|
||||
table_options.insert(APPEND_MODE_KEY.to_string(), "true".to_string());
|
||||
}
|
||||
AutoCreateTableType::LastNonNull => {
|
||||
table_options.insert(MERGE_MODE_KEY.to_string(), "last_non_null".to_string());
|
||||
}
|
||||
AutoCreateTableType::Trace => {
|
||||
table_options.insert(APPEND_MODE_KEY.to_string(), "true".to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds a [CreateTableExpr] for the given table and schema.
|
||||
pub(crate) fn build_create_table_expr(
|
||||
table: &TableReference,
|
||||
request_schema: &[ColumnSchema],
|
||||
engine: &str,
|
||||
@@ -1144,19 +1071,14 @@ mod tests {
|
||||
|
||||
use api::v1::{ColumnSchema as GrpcColumnSchema, RowInsertRequest, Rows, SemanticType, Value};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_meta::cache::new_table_flownode_set_cache;
|
||||
use common_meta::ddl::test_util::datanode_handler::NaiveDatanodeHandler;
|
||||
use common_meta::test_util::MockDatanodeManager;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::schema::ColumnSchema;
|
||||
use moka::future::Cache;
|
||||
use session::context::QueryContext;
|
||||
use table::dist_table::DummyDataSource;
|
||||
use table::metadata::{TableInfoBuilder, TableMetaBuilder, TableType};
|
||||
use table::TableRef;
|
||||
|
||||
use super::*;
|
||||
use crate::tests::{create_partition_rule_manager, prepare_mocked_backend};
|
||||
|
||||
fn make_table_ref_with_schema(ts_name: &str, field_name: &str) -> TableRef {
|
||||
let schema = datatypes::schema::SchemaBuilder::try_from_columns(vec![
|
||||
@@ -1236,20 +1158,8 @@ mod tests {
|
||||
DEFAULT_SCHEMA_NAME,
|
||||
));
|
||||
|
||||
let kv_backend = prepare_mocked_backend().await;
|
||||
let inserter = Inserter::new(
|
||||
catalog::memory::MemoryCatalogManager::new(),
|
||||
create_partition_rule_manager(kv_backend.clone()).await,
|
||||
Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler)),
|
||||
Arc::new(new_table_flownode_set_cache(
|
||||
String::new(),
|
||||
Cache::new(100),
|
||||
kv_backend.clone(),
|
||||
)),
|
||||
);
|
||||
let alter_expr = inserter
|
||||
.get_alter_table_expr_on_demand(&mut req, &table, &ctx, true, true)
|
||||
.unwrap();
|
||||
let alter_expr =
|
||||
Inserter::get_alter_table_expr_on_demand(&mut req, &table, &ctx, true, true).unwrap();
|
||||
assert!(alter_expr.is_none());
|
||||
|
||||
// The request's schema should have updated names for timestamp and field columns
|
||||
|
||||
@@ -27,6 +27,7 @@ pub mod procedure;
|
||||
pub mod region_req_factory;
|
||||
pub mod req_convert;
|
||||
pub mod request;
|
||||
pub mod schema_helper;
|
||||
pub mod statement;
|
||||
pub mod table;
|
||||
#[cfg(test)]
|
||||
|
||||
799
src/operator/src/schema_helper.rs
Normal file
799
src/operator/src/schema_helper.rs
Normal file
@@ -0,0 +1,799 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Utilities to deal with table schemas.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::alter_table_expr::Kind;
|
||||
use api::v1::region::region_request::Body;
|
||||
use api::v1::region::{ListMetadataRequest, RegionRequestHeader};
|
||||
use api::v1::{AlterTableExpr, ColumnDataType, ColumnSchema, CreateTableExpr, SemanticType};
|
||||
use catalog::CatalogManagerRef;
|
||||
use common_catalog::consts::{
|
||||
default_engine, is_readonly_schema, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME,
|
||||
};
|
||||
use common_catalog::format_full_table_name;
|
||||
use common_grpc_expr::util::ColumnExpr;
|
||||
use common_meta::cache_invalidator::{CacheInvalidatorRef, Context};
|
||||
use common_meta::ddl::{ExecutorContext, ProcedureExecutorRef};
|
||||
use common_meta::instruction::CacheIdent;
|
||||
use common_meta::key::schema_name::SchemaNameKey;
|
||||
use common_meta::key::table_route::TableRouteManager;
|
||||
use common_meta::key::TableMetadataManagerRef;
|
||||
use common_meta::node_manager::NodeManagerRef;
|
||||
use common_meta::rpc::ddl::{DdlTask, SubmitDdlTaskRequest, SubmitDdlTaskResponse};
|
||||
use common_meta::rpc::router::Partition;
|
||||
use common_query::prelude::{GREPTIME_TIMESTAMP, GREPTIME_VALUE};
|
||||
use common_query::Output;
|
||||
use common_telemetry::tracing;
|
||||
use common_telemetry::tracing_context::TracingContext;
|
||||
use futures::future;
|
||||
use partition::manager::PartitionRuleManagerRef;
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use sql::statements::create::Partitions;
|
||||
use store_api::metadata::RegionMetadata;
|
||||
use store_api::metric_engine_consts::{
|
||||
LOGICAL_TABLE_METADATA_KEY, METRIC_ENGINE_NAME, PHYSICAL_TABLE_METADATA_KEY,
|
||||
};
|
||||
use store_api::storage::RegionId;
|
||||
use table::dist_table::DistTable;
|
||||
use table::metadata::{RawTableInfo, TableId, TableInfo};
|
||||
use table::table_name::TableName;
|
||||
use table::table_reference::TableReference;
|
||||
use table::TableRef;
|
||||
|
||||
use crate::error::{
|
||||
CatalogSnafu, CreateLogicalTablesSnafu, CreateTableInfoSnafu, DecodeJsonSnafu,
|
||||
EmptyDdlExprSnafu, ExecuteDdlSnafu, FindRegionLeaderSnafu, InvalidPartitionRuleSnafu,
|
||||
InvalidTableNameSnafu, InvalidateTableCacheSnafu, JoinTaskSnafu, RequestRegionSnafu, Result,
|
||||
SchemaNotFoundSnafu, SchemaReadOnlySnafu, TableAlreadyExistsSnafu, TableMetadataManagerSnafu,
|
||||
TableNotFoundSnafu, UnexpectedSnafu,
|
||||
};
|
||||
use crate::expr_helper;
|
||||
use crate::insert::{build_create_table_expr, fill_table_options_for_create, AutoCreateTableType};
|
||||
use crate::region_req_factory::RegionRequestFactory;
|
||||
use crate::statement::ddl::{create_table_info, parse_partitions, verify_alter, NAME_PATTERN_REG};
|
||||
|
||||
/// Helper to query and manipulate (CREATE/ALTER) table schemas.
|
||||
#[derive(Clone)]
|
||||
pub struct SchemaHelper {
|
||||
catalog_manager: CatalogManagerRef,
|
||||
table_metadata_manager: TableMetadataManagerRef,
|
||||
procedure_executor: ProcedureExecutorRef,
|
||||
cache_invalidator: CacheInvalidatorRef,
|
||||
}
|
||||
|
||||
impl SchemaHelper {
|
||||
/// Creates a new [`SchemaHelper`].
|
||||
pub fn new(
|
||||
catalog_manager: CatalogManagerRef,
|
||||
table_metadata_manager: TableMetadataManagerRef,
|
||||
procedure_executor: ProcedureExecutorRef,
|
||||
cache_invalidator: CacheInvalidatorRef,
|
||||
) -> Self {
|
||||
Self {
|
||||
catalog_manager,
|
||||
table_metadata_manager,
|
||||
procedure_executor,
|
||||
cache_invalidator,
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets the table by catalog, schema and table name.
|
||||
pub async fn get_table(
|
||||
&self,
|
||||
catalog: &str,
|
||||
schema: &str,
|
||||
table: &str,
|
||||
) -> Result<Option<TableRef>> {
|
||||
self.catalog_manager
|
||||
.table(catalog, schema, table, None)
|
||||
.await
|
||||
.context(CatalogSnafu)
|
||||
}
|
||||
|
||||
// TODO(yingwen): Can we create the physical table with all columns from the prometheus metrics?
|
||||
/// Creates a physical table for metric engine.
|
||||
///
|
||||
/// If table already exists, do nothing.
|
||||
pub async fn create_metric_physical_table(
|
||||
&self,
|
||||
ctx: &QueryContextRef,
|
||||
physical_table: String,
|
||||
) -> Result<()> {
|
||||
let catalog_name = ctx.current_catalog();
|
||||
let schema_name = ctx.current_schema();
|
||||
|
||||
// check if exist
|
||||
if self
|
||||
.get_table(catalog_name, &schema_name, &physical_table)
|
||||
.await?
|
||||
.is_some()
|
||||
{
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let table_reference = TableReference::full(catalog_name, &schema_name, &physical_table);
|
||||
common_telemetry::info!(
|
||||
"Physical metric table `{table_reference}` does not exist, try creating table"
|
||||
);
|
||||
|
||||
// schema with timestamp and field column
|
||||
let default_schema = vec![
|
||||
ColumnSchema {
|
||||
column_name: GREPTIME_TIMESTAMP.to_string(),
|
||||
datatype: ColumnDataType::TimestampMillisecond as _,
|
||||
semantic_type: SemanticType::Timestamp as _,
|
||||
datatype_extension: None,
|
||||
options: None,
|
||||
},
|
||||
ColumnSchema {
|
||||
column_name: GREPTIME_VALUE.to_string(),
|
||||
datatype: ColumnDataType::Float64 as _,
|
||||
semantic_type: SemanticType::Field as _,
|
||||
datatype_extension: None,
|
||||
options: None,
|
||||
},
|
||||
];
|
||||
let create_table_expr =
|
||||
&mut build_create_table_expr(&table_reference, &default_schema, default_engine())?;
|
||||
create_table_expr.engine = METRIC_ENGINE_NAME.to_string();
|
||||
create_table_expr
|
||||
.table_options
|
||||
.insert(PHYSICAL_TABLE_METADATA_KEY.to_string(), "true".to_string());
|
||||
|
||||
// create physical table.
|
||||
// TODO(yingwen): Simplify this function. But remember to start the timer.
|
||||
let res = self
|
||||
.create_table_by_expr(create_table_expr, None, ctx.clone())
|
||||
.await;
|
||||
match res {
|
||||
Ok(_) => {
|
||||
common_telemetry::info!("Successfully created table {table_reference}",);
|
||||
Ok(())
|
||||
}
|
||||
Err(err) => {
|
||||
common_telemetry::error!(err; "Failed to create table {table_reference}");
|
||||
Err(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a table by [CreateTableExpr].
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub async fn create_table_by_expr(
|
||||
&self,
|
||||
create_table: &mut CreateTableExpr,
|
||||
partitions: Option<Partitions>,
|
||||
query_ctx: QueryContextRef,
|
||||
) -> Result<TableRef> {
|
||||
ensure!(
|
||||
!is_readonly_schema(&create_table.schema_name),
|
||||
SchemaReadOnlySnafu {
|
||||
name: create_table.schema_name.clone()
|
||||
}
|
||||
);
|
||||
|
||||
if create_table.engine == METRIC_ENGINE_NAME
|
||||
&& create_table
|
||||
.table_options
|
||||
.contains_key(LOGICAL_TABLE_METADATA_KEY)
|
||||
{
|
||||
// Create logical tables
|
||||
ensure!(
|
||||
partitions.is_none(),
|
||||
InvalidPartitionRuleSnafu {
|
||||
reason: "logical table in metric engine should not have partition rule, it will be inherited from physical table",
|
||||
}
|
||||
);
|
||||
self.create_logical_tables(std::slice::from_ref(create_table), query_ctx)
|
||||
.await?
|
||||
.into_iter()
|
||||
.next()
|
||||
.context(UnexpectedSnafu {
|
||||
violated: "expected to create logical tables",
|
||||
})
|
||||
} else {
|
||||
// Create other normal table
|
||||
self.create_non_logic_table(create_table, partitions, query_ctx)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a non-logical table.
|
||||
/// - If the schema doesn't exist, returns an error
|
||||
/// - If the table already exists:
|
||||
/// - If `create_if_not_exists` is true, returns the existing table
|
||||
/// - If `create_if_not_exists` is false, returns an error
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub async fn create_non_logic_table(
|
||||
&self,
|
||||
create_table: &mut CreateTableExpr,
|
||||
partitions: Option<Partitions>,
|
||||
query_ctx: QueryContextRef,
|
||||
) -> Result<TableRef> {
|
||||
let _timer = crate::metrics::DIST_CREATE_TABLE.start_timer();
|
||||
|
||||
// Check if schema exists
|
||||
let schema = self
|
||||
.table_metadata_manager
|
||||
.schema_manager()
|
||||
.get(SchemaNameKey::new(
|
||||
&create_table.catalog_name,
|
||||
&create_table.schema_name,
|
||||
))
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?;
|
||||
ensure!(
|
||||
schema.is_some(),
|
||||
SchemaNotFoundSnafu {
|
||||
schema_info: &create_table.schema_name,
|
||||
}
|
||||
);
|
||||
|
||||
// if table exists.
|
||||
if let Some(table) = self
|
||||
.catalog_manager
|
||||
.table(
|
||||
&create_table.catalog_name,
|
||||
&create_table.schema_name,
|
||||
&create_table.table_name,
|
||||
Some(&query_ctx),
|
||||
)
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
{
|
||||
return if create_table.create_if_not_exists {
|
||||
Ok(table)
|
||||
} else {
|
||||
TableAlreadyExistsSnafu {
|
||||
table: format_full_table_name(
|
||||
&create_table.catalog_name,
|
||||
&create_table.schema_name,
|
||||
&create_table.table_name,
|
||||
),
|
||||
}
|
||||
.fail()
|
||||
};
|
||||
}
|
||||
|
||||
ensure!(
|
||||
NAME_PATTERN_REG.is_match(&create_table.table_name),
|
||||
InvalidTableNameSnafu {
|
||||
table_name: &create_table.table_name,
|
||||
}
|
||||
);
|
||||
|
||||
let table_name = TableName::new(
|
||||
&create_table.catalog_name,
|
||||
&create_table.schema_name,
|
||||
&create_table.table_name,
|
||||
);
|
||||
|
||||
let (partitions, partition_cols) = parse_partitions(create_table, partitions, &query_ctx)?;
|
||||
let mut table_info = create_table_info(create_table, partition_cols)?;
|
||||
|
||||
let resp = self
|
||||
.create_table_procedure(
|
||||
create_table.clone(),
|
||||
partitions,
|
||||
table_info.clone(),
|
||||
query_ctx,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let table_id = resp.table_ids.into_iter().next().context(UnexpectedSnafu {
|
||||
violated: "expected table_id",
|
||||
})?;
|
||||
common_telemetry::info!(
|
||||
"Successfully created table '{table_name}' with table id {table_id}"
|
||||
);
|
||||
|
||||
table_info.ident.table_id = table_id;
|
||||
|
||||
let table_info: Arc<TableInfo> =
|
||||
Arc::new(table_info.try_into().context(CreateTableInfoSnafu)?);
|
||||
create_table.table_id = Some(api::v1::TableId { id: table_id });
|
||||
|
||||
let table = DistTable::table(table_info);
|
||||
|
||||
Ok(table)
|
||||
}
|
||||
|
||||
/// Creates logical tables.
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub async fn create_logical_tables(
|
||||
&self,
|
||||
create_table_exprs: &[CreateTableExpr],
|
||||
query_context: QueryContextRef,
|
||||
) -> Result<Vec<TableRef>> {
|
||||
let _timer = crate::metrics::DIST_CREATE_TABLES.start_timer();
|
||||
ensure!(
|
||||
!create_table_exprs.is_empty(),
|
||||
EmptyDdlExprSnafu {
|
||||
name: "create logic tables"
|
||||
}
|
||||
);
|
||||
|
||||
// Check table names
|
||||
for create_table in create_table_exprs {
|
||||
ensure!(
|
||||
NAME_PATTERN_REG.is_match(&create_table.table_name),
|
||||
InvalidTableNameSnafu {
|
||||
table_name: &create_table.table_name,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
let mut raw_tables_info = create_table_exprs
|
||||
.iter()
|
||||
.map(|create| create_table_info(create, vec![]))
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
let tables_data = create_table_exprs
|
||||
.iter()
|
||||
.cloned()
|
||||
.zip(raw_tables_info.iter().cloned())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let resp = self
|
||||
.create_logical_tables_procedure(tables_data, query_context)
|
||||
.await?;
|
||||
|
||||
let table_ids = resp.table_ids;
|
||||
ensure!(table_ids.len() == raw_tables_info.len(), CreateLogicalTablesSnafu {
|
||||
reason: format!("The number of tables is inconsistent with the expected number to be created, expected: {}, actual: {}", raw_tables_info.len(), table_ids.len())
|
||||
});
|
||||
common_telemetry::info!("Successfully created logical tables: {:?}", table_ids);
|
||||
|
||||
for (i, table_info) in raw_tables_info.iter_mut().enumerate() {
|
||||
table_info.ident.table_id = table_ids[i];
|
||||
}
|
||||
let tables_info = raw_tables_info
|
||||
.into_iter()
|
||||
.map(|x| x.try_into().context(CreateTableInfoSnafu))
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
Ok(tables_info
|
||||
.into_iter()
|
||||
.map(|x| DistTable::table(Arc::new(x)))
|
||||
.collect())
|
||||
}
|
||||
|
||||
/// Alters a table by [AlterTableExpr].
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub async fn alter_table_by_expr(
|
||||
&self,
|
||||
expr: AlterTableExpr,
|
||||
query_context: QueryContextRef,
|
||||
) -> Result<Output> {
|
||||
ensure!(
|
||||
!is_readonly_schema(&expr.schema_name),
|
||||
SchemaReadOnlySnafu {
|
||||
name: expr.schema_name.clone()
|
||||
}
|
||||
);
|
||||
|
||||
let catalog_name = if expr.catalog_name.is_empty() {
|
||||
DEFAULT_CATALOG_NAME.to_string()
|
||||
} else {
|
||||
expr.catalog_name.clone()
|
||||
};
|
||||
|
||||
let schema_name = if expr.schema_name.is_empty() {
|
||||
DEFAULT_SCHEMA_NAME.to_string()
|
||||
} else {
|
||||
expr.schema_name.clone()
|
||||
};
|
||||
|
||||
let table_name = expr.table_name.clone();
|
||||
|
||||
let table = self
|
||||
.catalog_manager
|
||||
.table(
|
||||
&catalog_name,
|
||||
&schema_name,
|
||||
&table_name,
|
||||
Some(&query_context),
|
||||
)
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
.with_context(|| TableNotFoundSnafu {
|
||||
table_name: format_full_table_name(&catalog_name, &schema_name, &table_name),
|
||||
})?;
|
||||
|
||||
let table_id = table.table_info().ident.table_id;
|
||||
let need_alter = verify_alter(table_id, table.table_info(), expr.clone())?;
|
||||
if !need_alter {
|
||||
return Ok(Output::new_with_affected_rows(0));
|
||||
}
|
||||
common_telemetry::info!(
|
||||
"Table info before alter is {:?}, expr: {:?}",
|
||||
table.table_info(),
|
||||
expr
|
||||
);
|
||||
|
||||
let physical_table_id = self
|
||||
.table_metadata_manager
|
||||
.table_route_manager()
|
||||
.get_physical_table_id(table_id)
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?;
|
||||
|
||||
let (req, invalidate_keys) = if physical_table_id == table_id {
|
||||
// This is physical table
|
||||
let req = SubmitDdlTaskRequest {
|
||||
query_context,
|
||||
task: DdlTask::new_alter_table(expr),
|
||||
};
|
||||
|
||||
let invalidate_keys = vec![
|
||||
CacheIdent::TableId(table_id),
|
||||
CacheIdent::TableName(TableName::new(catalog_name, schema_name, table_name)),
|
||||
];
|
||||
|
||||
(req, invalidate_keys)
|
||||
} else {
|
||||
// This is logical table
|
||||
let req = SubmitDdlTaskRequest {
|
||||
query_context,
|
||||
task: DdlTask::new_alter_logical_tables(vec![expr]),
|
||||
};
|
||||
|
||||
let mut invalidate_keys = vec![
|
||||
CacheIdent::TableId(physical_table_id),
|
||||
CacheIdent::TableId(table_id),
|
||||
CacheIdent::TableName(TableName::new(catalog_name, schema_name, table_name)),
|
||||
];
|
||||
|
||||
let physical_table = self
|
||||
.table_metadata_manager
|
||||
.table_info_manager()
|
||||
.get(physical_table_id)
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?
|
||||
.map(|x| x.into_inner());
|
||||
if let Some(physical_table) = physical_table {
|
||||
let physical_table_name = TableName::new(
|
||||
physical_table.table_info.catalog_name,
|
||||
physical_table.table_info.schema_name,
|
||||
physical_table.table_info.name,
|
||||
);
|
||||
invalidate_keys.push(CacheIdent::TableName(physical_table_name));
|
||||
}
|
||||
|
||||
(req, invalidate_keys)
|
||||
};
|
||||
|
||||
self.procedure_executor
|
||||
.submit_ddl_task(&ExecutorContext::default(), req)
|
||||
.await
|
||||
.context(ExecuteDdlSnafu)?;
|
||||
|
||||
// Invalidates local cache ASAP.
|
||||
self.cache_invalidator
|
||||
.invalidate(&Context::default(), &invalidate_keys)
|
||||
.await
|
||||
.context(InvalidateTableCacheSnafu)?;
|
||||
|
||||
Ok(Output::new_with_affected_rows(0))
|
||||
}
|
||||
|
||||
/// Alter logical tables.
|
||||
pub async fn alter_logical_tables(
|
||||
&self,
|
||||
alter_table_exprs: Vec<AlterTableExpr>,
|
||||
query_context: QueryContextRef,
|
||||
) -> Result<Output> {
|
||||
let _timer = crate::metrics::DIST_ALTER_TABLES.start_timer();
|
||||
ensure!(
|
||||
!alter_table_exprs.is_empty(),
|
||||
EmptyDdlExprSnafu {
|
||||
name: "alter logical tables"
|
||||
}
|
||||
);
|
||||
|
||||
// group by physical table id
|
||||
let mut groups: HashMap<TableId, Vec<AlterTableExpr>> = HashMap::new();
|
||||
for expr in alter_table_exprs {
|
||||
// Get table_id from catalog_manager
|
||||
let catalog = if expr.catalog_name.is_empty() {
|
||||
query_context.current_catalog()
|
||||
} else {
|
||||
&expr.catalog_name
|
||||
};
|
||||
let schema = if expr.schema_name.is_empty() {
|
||||
query_context.current_schema()
|
||||
} else {
|
||||
expr.schema_name.to_string()
|
||||
};
|
||||
let table_name = &expr.table_name;
|
||||
let table = self
|
||||
.catalog_manager
|
||||
.table(catalog, &schema, table_name, Some(&query_context))
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
.with_context(|| TableNotFoundSnafu {
|
||||
table_name: format_full_table_name(catalog, &schema, table_name),
|
||||
})?;
|
||||
let table_id = table.table_info().ident.table_id;
|
||||
let physical_table_id = self
|
||||
.table_metadata_manager
|
||||
.table_route_manager()
|
||||
.get_physical_table_id(table_id)
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?;
|
||||
groups.entry(physical_table_id).or_default().push(expr);
|
||||
}
|
||||
|
||||
// Submit procedure for each physical table
|
||||
let mut handles = Vec::with_capacity(groups.len());
|
||||
for (_physical_table_id, exprs) in groups {
|
||||
let fut = self.alter_logical_tables_procedure(exprs, query_context.clone());
|
||||
handles.push(fut);
|
||||
}
|
||||
let _results = futures::future::try_join_all(handles).await?;
|
||||
|
||||
Ok(Output::new_with_affected_rows(0))
|
||||
}
|
||||
|
||||
/// Returns the catalog manager.
|
||||
pub fn catalog_manager(&self) -> &CatalogManagerRef {
|
||||
&self.catalog_manager
|
||||
}
|
||||
|
||||
/// Returns the table route manager.
|
||||
pub fn table_route_manager(&self) -> &TableRouteManager {
|
||||
self.table_metadata_manager.table_route_manager()
|
||||
}
|
||||
|
||||
/// Submits a procedure to create a non-logical table.
|
||||
async fn create_table_procedure(
|
||||
&self,
|
||||
create_table: CreateTableExpr,
|
||||
partitions: Vec<Partition>,
|
||||
table_info: RawTableInfo,
|
||||
query_context: QueryContextRef,
|
||||
) -> Result<SubmitDdlTaskResponse> {
|
||||
let partitions = partitions.into_iter().map(Into::into).collect();
|
||||
|
||||
let request = SubmitDdlTaskRequest {
|
||||
query_context,
|
||||
task: DdlTask::new_create_table(create_table, partitions, table_info),
|
||||
};
|
||||
|
||||
self.procedure_executor
|
||||
.submit_ddl_task(&ExecutorContext::default(), request)
|
||||
.await
|
||||
.context(ExecuteDdlSnafu)
|
||||
}
|
||||
|
||||
/// Submits a procedure to create logical tables.
|
||||
async fn create_logical_tables_procedure(
|
||||
&self,
|
||||
tables_data: Vec<(CreateTableExpr, RawTableInfo)>,
|
||||
query_context: QueryContextRef,
|
||||
) -> Result<SubmitDdlTaskResponse> {
|
||||
let request = SubmitDdlTaskRequest {
|
||||
query_context,
|
||||
task: DdlTask::new_create_logical_tables(tables_data),
|
||||
};
|
||||
|
||||
self.procedure_executor
|
||||
.submit_ddl_task(&ExecutorContext::default(), request)
|
||||
.await
|
||||
.context(ExecuteDdlSnafu)
|
||||
}
|
||||
|
||||
/// Submits a procedure to alter logical tables.
|
||||
async fn alter_logical_tables_procedure(
|
||||
&self,
|
||||
tables_data: Vec<AlterTableExpr>,
|
||||
query_context: QueryContextRef,
|
||||
) -> Result<SubmitDdlTaskResponse> {
|
||||
let request = SubmitDdlTaskRequest {
|
||||
query_context,
|
||||
task: DdlTask::new_alter_logical_tables(tables_data),
|
||||
};
|
||||
|
||||
self.procedure_executor
|
||||
.submit_ddl_task(&ExecutorContext::default(), request)
|
||||
.await
|
||||
.context(ExecuteDdlSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
/// Schema of a logical table.
|
||||
pub struct LogicalSchema {
|
||||
/// Name of the logical table.
|
||||
pub name: String,
|
||||
/// Schema of columns in the logical table.
|
||||
pub columns: Vec<ColumnSchema>,
|
||||
}
|
||||
|
||||
/// Logical table schemas.
|
||||
pub struct LogicalSchemas {
|
||||
/// Logical table schemas group by physical table name.
|
||||
pub schemas: HashMap<String, Vec<LogicalSchema>>,
|
||||
}
|
||||
|
||||
/// Creates or alters logical tables to match the provided schemas
|
||||
/// for prometheus metrics.
|
||||
pub async fn ensure_logical_tables_for_metrics(
|
||||
helper: &SchemaHelper,
|
||||
schemas: &LogicalSchemas,
|
||||
query_ctx: &QueryContextRef,
|
||||
) -> Result<()> {
|
||||
let catalog_name = query_ctx.current_catalog();
|
||||
let schema_name = query_ctx.current_schema();
|
||||
|
||||
// 1. For each physical table, creates it if it doesn't exist.
|
||||
for physical_table_name in schemas.schemas.keys() {
|
||||
// Check if the physical table exists and create it if it doesn't
|
||||
let physical_table_opt = helper
|
||||
.get_table(catalog_name, &schema_name, physical_table_name)
|
||||
.await?;
|
||||
|
||||
if physical_table_opt.is_none() {
|
||||
// Physical table doesn't exist, create it
|
||||
helper
|
||||
.create_metric_physical_table(query_ctx, physical_table_name.clone())
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Collects logical tables that do not exist. (CreateTableExpr)
|
||||
let mut tables_to_create: Vec<CreateTableExpr> = Vec::new();
|
||||
|
||||
// 3. Collects alterations (columns to add) for each logical table. (AlterTableExpr)
|
||||
let mut tables_to_alter: Vec<AlterTableExpr> = Vec::new();
|
||||
|
||||
// Process each logical table to determine if it needs to be created or altered
|
||||
for (physical_table_name, logical_schemas) in &schemas.schemas {
|
||||
for logical_schema in logical_schemas {
|
||||
let table_name = &logical_schema.name;
|
||||
|
||||
// Check if the logical table exists
|
||||
let table_opt = helper
|
||||
.get_table(catalog_name, &schema_name, table_name)
|
||||
.await?;
|
||||
|
||||
if let Some(existing_table) = table_opt {
|
||||
// Logical table exists, determine if it needs alteration
|
||||
let existing_schema = existing_table.schema();
|
||||
let column_exprs = ColumnExpr::from_column_schemas(&logical_schema.columns);
|
||||
let add_columns =
|
||||
expr_helper::extract_add_columns_expr(&existing_schema, column_exprs)?;
|
||||
let Some(add_columns) = add_columns else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let alter_expr = AlterTableExpr {
|
||||
catalog_name: catalog_name.to_string(),
|
||||
schema_name: schema_name.clone(),
|
||||
table_name: table_name.to_string(),
|
||||
kind: Some(Kind::AddColumns(add_columns)),
|
||||
};
|
||||
tables_to_alter.push(alter_expr);
|
||||
} else {
|
||||
// Logical table doesn't exist, prepare for creation
|
||||
// Build a CreateTableExpr from the table reference and columns
|
||||
let table_ref = TableReference::full(catalog_name, &schema_name, table_name);
|
||||
let mut create_expr = build_create_table_expr(
|
||||
&table_ref,
|
||||
&logical_schema.columns,
|
||||
METRIC_ENGINE_NAME,
|
||||
)?;
|
||||
create_expr.create_if_not_exists = true;
|
||||
let create_type = AutoCreateTableType::Logical(physical_table_name.clone());
|
||||
// Fill table options.
|
||||
fill_table_options_for_create(
|
||||
&mut create_expr.table_options,
|
||||
&create_type,
|
||||
query_ctx,
|
||||
);
|
||||
|
||||
tables_to_create.push(create_expr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Creates logical tables in batch using `create_logical_tables()`.
|
||||
if !tables_to_create.is_empty() {
|
||||
helper
|
||||
.create_logical_tables(&tables_to_create, query_ctx.clone())
|
||||
.await?;
|
||||
}
|
||||
|
||||
// 5. Alters logical tables in batch using `alter_logical_tables()`.
|
||||
if !tables_to_alter.is_empty() {
|
||||
helper
|
||||
.alter_logical_tables(tables_to_alter, query_ctx.clone())
|
||||
.await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Gets the list of metadatas for a list of region ids.
|
||||
// TODO(yingwen): Should we return RegionMetadataRef?
|
||||
pub async fn metadatas_for_region_ids(
|
||||
partition_manager: &PartitionRuleManagerRef,
|
||||
node_manager: &NodeManagerRef,
|
||||
region_ids: &[RegionId],
|
||||
ctx: &QueryContextRef,
|
||||
) -> Result<Vec<Option<RegionMetadata>>> {
|
||||
// Groups regions by peers.
|
||||
// This map contains: peer => (ListMetadataRequest, A vec of indices of regions).
|
||||
let mut request_per_region = HashMap::new();
|
||||
for (index, region_id) in region_ids.iter().copied().enumerate() {
|
||||
let peer = partition_manager
|
||||
.find_region_leader(region_id)
|
||||
.await
|
||||
.context(FindRegionLeaderSnafu)?;
|
||||
let request_indices = request_per_region
|
||||
.entry(peer)
|
||||
.or_insert_with(|| (ListMetadataRequest::default(), Vec::new()));
|
||||
request_indices.0.region_ids.push(region_id.as_u64());
|
||||
request_indices.1.push(index);
|
||||
}
|
||||
|
||||
// Sends requests to datanode and waits for responses.
|
||||
let tasks = request_per_region
|
||||
.into_iter()
|
||||
.map(|(peer, (request, indices))| {
|
||||
let node_manager = node_manager.clone();
|
||||
let request_factory = RegionRequestFactory::new(RegionRequestHeader {
|
||||
tracing_context: TracingContext::from_current_span().to_w3c(),
|
||||
dbname: ctx.get_db_string(),
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
common_runtime::spawn_global(async move {
|
||||
let request = request_factory.build_request(Body::ListMetadata(request));
|
||||
let resp = node_manager
|
||||
.datanode(&peer)
|
||||
.await
|
||||
.handle(request)
|
||||
.await
|
||||
.context(RequestRegionSnafu)?;
|
||||
|
||||
let metadatas: Vec<Option<RegionMetadata>> =
|
||||
serde_json::from_slice(&resp.metadata).context(DecodeJsonSnafu)?;
|
||||
Ok((metadatas, indices))
|
||||
})
|
||||
});
|
||||
let results = future::try_join_all(tasks).await.context(JoinTaskSnafu)?;
|
||||
let mut output_metadatas = vec![None; region_ids.len()];
|
||||
for result in results {
|
||||
let (mut metadatas, indices) = result?;
|
||||
ensure!(
|
||||
metadatas.len() == indices.len(),
|
||||
UnexpectedSnafu {
|
||||
violated: format!(
|
||||
"Length mismatch between request and response, expected {} metadatas, got {}",
|
||||
indices.len(),
|
||||
metadatas.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
for index in indices {
|
||||
output_metadatas[index] = metadatas[index].take();
|
||||
}
|
||||
}
|
||||
|
||||
Ok(output_metadatas)
|
||||
}
|
||||
@@ -18,7 +18,7 @@ mod copy_query_to;
|
||||
mod copy_table_from;
|
||||
mod copy_table_to;
|
||||
mod cursor;
|
||||
mod ddl;
|
||||
pub(crate) mod ddl;
|
||||
mod describe;
|
||||
mod dml;
|
||||
mod kill;
|
||||
@@ -102,6 +102,14 @@ pub struct StatementExecutor {
|
||||
pub type StatementExecutorRef = Arc<StatementExecutor>;
|
||||
|
||||
impl StatementExecutor {
|
||||
pub fn procedure_executor(&self) -> &ProcedureExecutorRef {
|
||||
&self.procedure_executor
|
||||
}
|
||||
|
||||
pub fn cache_invalidator(&self) -> &CacheInvalidatorRef {
|
||||
&self.cache_invalidator
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn new(
|
||||
catalog_manager: CatalogManagerRef,
|
||||
@@ -244,6 +252,16 @@ impl StatementExecutor {
|
||||
)
|
||||
.await
|
||||
}
|
||||
#[cfg(feature = "enterprise")]
|
||||
Statement::DropTrigger(stmt) => {
|
||||
self.drop_trigger(
|
||||
query_ctx.current_catalog().to_string(),
|
||||
format_raw_object_name(stmt.trigger_name()),
|
||||
stmt.drop_if_exists(),
|
||||
query_ctx,
|
||||
)
|
||||
.await
|
||||
}
|
||||
Statement::CreateView(stmt) => {
|
||||
let _ = self.create_view(stmt, query_ctx).await?;
|
||||
Ok(Output::new_with_affected_rows(0))
|
||||
@@ -370,6 +388,7 @@ impl StatementExecutor {
|
||||
Statement::Use(db) => self.use_database(db, query_ctx).await,
|
||||
Statement::Admin(admin) => self.execute_admin_command(admin, query_ctx).await,
|
||||
Statement::Kill(kill) => self.execute_kill(query_ctx, kill).await,
|
||||
Statement::ShowProcesslist(show) => self.show_processlist(show, query_ctx).await,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -26,7 +26,7 @@ use api::v1::{
|
||||
};
|
||||
use catalog::CatalogManagerRef;
|
||||
use chrono::Utc;
|
||||
use common_catalog::consts::{is_readonly_schema, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_catalog::consts::is_readonly_schema;
|
||||
use common_catalog::{format_full_flow_name, format_full_table_name};
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::cache_invalidator::Context;
|
||||
@@ -37,11 +37,13 @@ use common_meta::key::schema_name::{SchemaName, SchemaNameKey};
|
||||
use common_meta::key::NAME_PATTERN;
|
||||
#[cfg(feature = "enterprise")]
|
||||
use common_meta::rpc::ddl::trigger::CreateTriggerTask;
|
||||
#[cfg(feature = "enterprise")]
|
||||
use common_meta::rpc::ddl::trigger::DropTriggerTask;
|
||||
use common_meta::rpc::ddl::{
|
||||
CreateFlowTask, DdlTask, DropFlowTask, DropViewTask, SubmitDdlTaskRequest,
|
||||
SubmitDdlTaskResponse,
|
||||
};
|
||||
use common_meta::rpc::router::{Partition, Partition as MetaPartition};
|
||||
use common_meta::rpc::router::Partition as MetaPartition;
|
||||
use common_query::Output;
|
||||
use common_telemetry::{debug, info, tracing, warn};
|
||||
use common_time::Timezone;
|
||||
@@ -72,7 +74,6 @@ use sql::statements::create::{
|
||||
use sql::statements::sql_value_to_value;
|
||||
use sql::statements::statement::Statement;
|
||||
use sqlparser::ast::{Expr, Ident, UnaryOperator, Value as ParserValue};
|
||||
use store_api::metric_engine_consts::{LOGICAL_TABLE_METADATA_KEY, METRIC_ENGINE_NAME};
|
||||
use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan};
|
||||
use table::dist_table::DistTable;
|
||||
use table::metadata::{self, RawTableInfo, RawTableMeta, TableId, TableInfo, TableType};
|
||||
@@ -82,12 +83,11 @@ use table::TableRef;
|
||||
|
||||
use crate::error::{
|
||||
self, AlterExprToRequestSnafu, BuildDfLogicalPlanSnafu, CatalogSnafu, ColumnDataTypeSnafu,
|
||||
ColumnNotFoundSnafu, ConvertSchemaSnafu, CreateLogicalTablesSnafu, CreateTableInfoSnafu,
|
||||
DeserializePartitionSnafu, EmptyDdlExprSnafu, ExternalSnafu, ExtractTableNamesSnafu,
|
||||
FlowNotFoundSnafu, InvalidPartitionRuleSnafu, InvalidPartitionSnafu, InvalidSqlSnafu,
|
||||
InvalidTableNameSnafu, InvalidViewNameSnafu, InvalidViewStmtSnafu, ParseSqlValueSnafu, Result,
|
||||
SchemaInUseSnafu, SchemaNotFoundSnafu, SchemaReadOnlySnafu, SubstraitCodecSnafu,
|
||||
TableAlreadyExistsSnafu, TableMetadataManagerSnafu, TableNotFoundSnafu,
|
||||
ColumnNotFoundSnafu, ConvertSchemaSnafu, CreateTableInfoSnafu, DeserializePartitionSnafu,
|
||||
ExternalSnafu, ExtractTableNamesSnafu, FlowNotFoundSnafu, InvalidPartitionRuleSnafu,
|
||||
InvalidPartitionSnafu, InvalidSqlSnafu, InvalidViewNameSnafu, InvalidViewStmtSnafu,
|
||||
ParseSqlValueSnafu, Result, SchemaInUseSnafu, SchemaNotFoundSnafu, SchemaReadOnlySnafu,
|
||||
SubstraitCodecSnafu, TableAlreadyExistsSnafu, TableMetadataManagerSnafu, TableNotFoundSnafu,
|
||||
UnrecognizedTableOptionSnafu, ViewAlreadyExistsSnafu,
|
||||
};
|
||||
use crate::expr_helper;
|
||||
@@ -95,7 +95,8 @@ use crate::statement::show::create_partitions_stmt;
|
||||
use crate::statement::StatementExecutor;
|
||||
|
||||
lazy_static! {
|
||||
static ref NAME_PATTERN_REG: Regex = Regex::new(&format!("^{NAME_PATTERN}$")).unwrap();
|
||||
/// Regex to validate table name.
|
||||
pub(crate) static ref NAME_PATTERN_REG: Regex = Regex::new(&format!("^{NAME_PATTERN}$")).unwrap();
|
||||
}
|
||||
|
||||
impl StatementExecutor {
|
||||
@@ -180,192 +181,10 @@ impl StatementExecutor {
|
||||
partitions: Option<Partitions>,
|
||||
query_ctx: QueryContextRef,
|
||||
) -> Result<TableRef> {
|
||||
ensure!(
|
||||
!is_readonly_schema(&create_table.schema_name),
|
||||
SchemaReadOnlySnafu {
|
||||
name: create_table.schema_name.clone()
|
||||
}
|
||||
);
|
||||
|
||||
if create_table.engine == METRIC_ENGINE_NAME
|
||||
&& create_table
|
||||
.table_options
|
||||
.contains_key(LOGICAL_TABLE_METADATA_KEY)
|
||||
{
|
||||
// Create logical tables
|
||||
ensure!(
|
||||
partitions.is_none(),
|
||||
InvalidPartitionRuleSnafu {
|
||||
reason: "logical table in metric engine should not have partition rule, it will be inherited from physical table",
|
||||
}
|
||||
);
|
||||
self.create_logical_tables(std::slice::from_ref(create_table), query_ctx)
|
||||
.await?
|
||||
.into_iter()
|
||||
.next()
|
||||
.context(error::UnexpectedSnafu {
|
||||
violated: "expected to create logical tables",
|
||||
})
|
||||
} else {
|
||||
// Create other normal table
|
||||
self.create_non_logic_table(create_table, partitions, query_ctx)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub async fn create_non_logic_table(
|
||||
&self,
|
||||
create_table: &mut CreateTableExpr,
|
||||
partitions: Option<Partitions>,
|
||||
query_ctx: QueryContextRef,
|
||||
) -> Result<TableRef> {
|
||||
let _timer = crate::metrics::DIST_CREATE_TABLE.start_timer();
|
||||
|
||||
// Check if schema exists
|
||||
let schema = self
|
||||
.table_metadata_manager
|
||||
.schema_manager()
|
||||
.get(SchemaNameKey::new(
|
||||
&create_table.catalog_name,
|
||||
&create_table.schema_name,
|
||||
))
|
||||
self.inserter
|
||||
.schema_helper
|
||||
.create_table_by_expr(create_table, partitions, query_ctx)
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?;
|
||||
ensure!(
|
||||
schema.is_some(),
|
||||
SchemaNotFoundSnafu {
|
||||
schema_info: &create_table.schema_name,
|
||||
}
|
||||
);
|
||||
|
||||
// if table exists.
|
||||
if let Some(table) = self
|
||||
.catalog_manager
|
||||
.table(
|
||||
&create_table.catalog_name,
|
||||
&create_table.schema_name,
|
||||
&create_table.table_name,
|
||||
Some(&query_ctx),
|
||||
)
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
{
|
||||
return if create_table.create_if_not_exists {
|
||||
Ok(table)
|
||||
} else {
|
||||
TableAlreadyExistsSnafu {
|
||||
table: format_full_table_name(
|
||||
&create_table.catalog_name,
|
||||
&create_table.schema_name,
|
||||
&create_table.table_name,
|
||||
),
|
||||
}
|
||||
.fail()
|
||||
};
|
||||
}
|
||||
|
||||
ensure!(
|
||||
NAME_PATTERN_REG.is_match(&create_table.table_name),
|
||||
InvalidTableNameSnafu {
|
||||
table_name: &create_table.table_name,
|
||||
}
|
||||
);
|
||||
|
||||
let table_name = TableName::new(
|
||||
&create_table.catalog_name,
|
||||
&create_table.schema_name,
|
||||
&create_table.table_name,
|
||||
);
|
||||
|
||||
let (partitions, partition_cols) = parse_partitions(create_table, partitions, &query_ctx)?;
|
||||
let mut table_info = create_table_info(create_table, partition_cols)?;
|
||||
|
||||
let resp = self
|
||||
.create_table_procedure(
|
||||
create_table.clone(),
|
||||
partitions,
|
||||
table_info.clone(),
|
||||
query_ctx,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let table_id = resp
|
||||
.table_ids
|
||||
.into_iter()
|
||||
.next()
|
||||
.context(error::UnexpectedSnafu {
|
||||
violated: "expected table_id",
|
||||
})?;
|
||||
info!("Successfully created table '{table_name}' with table id {table_id}");
|
||||
|
||||
table_info.ident.table_id = table_id;
|
||||
|
||||
let table_info: Arc<TableInfo> =
|
||||
Arc::new(table_info.try_into().context(CreateTableInfoSnafu)?);
|
||||
create_table.table_id = Some(api::v1::TableId { id: table_id });
|
||||
|
||||
let table = DistTable::table(table_info);
|
||||
|
||||
Ok(table)
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub async fn create_logical_tables(
|
||||
&self,
|
||||
create_table_exprs: &[CreateTableExpr],
|
||||
query_context: QueryContextRef,
|
||||
) -> Result<Vec<TableRef>> {
|
||||
let _timer = crate::metrics::DIST_CREATE_TABLES.start_timer();
|
||||
ensure!(
|
||||
!create_table_exprs.is_empty(),
|
||||
EmptyDdlExprSnafu {
|
||||
name: "create logic tables"
|
||||
}
|
||||
);
|
||||
|
||||
// Check table names
|
||||
for create_table in create_table_exprs {
|
||||
ensure!(
|
||||
NAME_PATTERN_REG.is_match(&create_table.table_name),
|
||||
InvalidTableNameSnafu {
|
||||
table_name: &create_table.table_name,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
let mut raw_tables_info = create_table_exprs
|
||||
.iter()
|
||||
.map(|create| create_table_info(create, vec![]))
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
let tables_data = create_table_exprs
|
||||
.iter()
|
||||
.cloned()
|
||||
.zip(raw_tables_info.iter().cloned())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let resp = self
|
||||
.create_logical_tables_procedure(tables_data, query_context)
|
||||
.await?;
|
||||
|
||||
let table_ids = resp.table_ids;
|
||||
ensure!(table_ids.len() == raw_tables_info.len(), CreateLogicalTablesSnafu {
|
||||
reason: format!("The number of tables is inconsistent with the expected number to be created, expected: {}, actual: {}", raw_tables_info.len(), table_ids.len())
|
||||
});
|
||||
info!("Successfully created logical tables: {:?}", table_ids);
|
||||
|
||||
for (i, table_info) in raw_tables_info.iter_mut().enumerate() {
|
||||
table_info.ident.table_id = table_ids[i];
|
||||
}
|
||||
let tables_info = raw_tables_info
|
||||
.into_iter()
|
||||
.map(|x| x.try_into().context(CreateTableInfoSnafu))
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
Ok(tables_info
|
||||
.into_iter()
|
||||
.map(|x| DistTable::table(Arc::new(x)))
|
||||
.collect())
|
||||
}
|
||||
|
||||
#[cfg(feature = "enterprise")]
|
||||
@@ -848,6 +667,41 @@ impl StatementExecutor {
|
||||
.context(error::ExecuteDdlSnafu)
|
||||
}
|
||||
|
||||
#[cfg(feature = "enterprise")]
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub(super) async fn drop_trigger(
|
||||
&self,
|
||||
catalog_name: String,
|
||||
trigger_name: String,
|
||||
drop_if_exists: bool,
|
||||
query_context: QueryContextRef,
|
||||
) -> Result<Output> {
|
||||
let task = DropTriggerTask {
|
||||
catalog_name,
|
||||
trigger_name,
|
||||
drop_if_exists,
|
||||
};
|
||||
self.drop_trigger_procedure(task, query_context).await?;
|
||||
Ok(Output::new_with_affected_rows(0))
|
||||
}
|
||||
|
||||
#[cfg(feature = "enterprise")]
|
||||
async fn drop_trigger_procedure(
|
||||
&self,
|
||||
expr: DropTriggerTask,
|
||||
query_context: QueryContextRef,
|
||||
) -> Result<SubmitDdlTaskResponse> {
|
||||
let request = SubmitDdlTaskRequest {
|
||||
query_context,
|
||||
task: DdlTask::new_drop_trigger(expr),
|
||||
};
|
||||
|
||||
self.procedure_executor
|
||||
.submit_ddl_task(&ExecutorContext::default(), request)
|
||||
.await
|
||||
.context(error::ExecuteDdlSnafu)
|
||||
}
|
||||
|
||||
/// Drop a view
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub(crate) async fn drop_view(
|
||||
@@ -916,64 +770,6 @@ impl StatementExecutor {
|
||||
.context(error::ExecuteDdlSnafu)
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub async fn alter_logical_tables(
|
||||
&self,
|
||||
alter_table_exprs: Vec<AlterTableExpr>,
|
||||
query_context: QueryContextRef,
|
||||
) -> Result<Output> {
|
||||
let _timer = crate::metrics::DIST_ALTER_TABLES.start_timer();
|
||||
ensure!(
|
||||
!alter_table_exprs.is_empty(),
|
||||
EmptyDdlExprSnafu {
|
||||
name: "alter logical tables"
|
||||
}
|
||||
);
|
||||
|
||||
// group by physical table id
|
||||
let mut groups: HashMap<TableId, Vec<AlterTableExpr>> = HashMap::new();
|
||||
for expr in alter_table_exprs {
|
||||
// Get table_id from catalog_manager
|
||||
let catalog = if expr.catalog_name.is_empty() {
|
||||
query_context.current_catalog()
|
||||
} else {
|
||||
&expr.catalog_name
|
||||
};
|
||||
let schema = if expr.schema_name.is_empty() {
|
||||
query_context.current_schema()
|
||||
} else {
|
||||
expr.schema_name.to_string()
|
||||
};
|
||||
let table_name = &expr.table_name;
|
||||
let table = self
|
||||
.catalog_manager
|
||||
.table(catalog, &schema, table_name, Some(&query_context))
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
.with_context(|| TableNotFoundSnafu {
|
||||
table_name: format_full_table_name(catalog, &schema, table_name),
|
||||
})?;
|
||||
let table_id = table.table_info().ident.table_id;
|
||||
let physical_table_id = self
|
||||
.table_metadata_manager
|
||||
.table_route_manager()
|
||||
.get_physical_table_id(table_id)
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?;
|
||||
groups.entry(physical_table_id).or_default().push(expr);
|
||||
}
|
||||
|
||||
// Submit procedure for each physical table
|
||||
let mut handles = Vec::with_capacity(groups.len());
|
||||
for (_physical_table_id, exprs) in groups {
|
||||
let fut = self.alter_logical_tables_procedure(exprs, query_context.clone());
|
||||
handles.push(fut);
|
||||
}
|
||||
let _results = futures::future::try_join_all(handles).await?;
|
||||
|
||||
Ok(Output::new_with_affected_rows(0))
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub async fn drop_table(
|
||||
&self,
|
||||
@@ -1115,60 +911,6 @@ impl StatementExecutor {
|
||||
Ok(Output::new_with_affected_rows(0))
|
||||
}
|
||||
|
||||
/// Verifies an alter and returns whether it is necessary to perform the alter.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// Returns true if the alter need to be porformed; otherwise, it returns false.
|
||||
fn verify_alter(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
table_info: Arc<TableInfo>,
|
||||
expr: AlterTableExpr,
|
||||
) -> Result<bool> {
|
||||
let request: AlterTableRequest = common_grpc_expr::alter_expr_to_request(table_id, expr)
|
||||
.context(AlterExprToRequestSnafu)?;
|
||||
|
||||
let AlterTableRequest {
|
||||
table_name,
|
||||
alter_kind,
|
||||
..
|
||||
} = &request;
|
||||
|
||||
if let AlterKind::RenameTable { new_table_name } = alter_kind {
|
||||
ensure!(
|
||||
NAME_PATTERN_REG.is_match(new_table_name),
|
||||
error::UnexpectedSnafu {
|
||||
violated: format!("Invalid table name: {}", new_table_name)
|
||||
}
|
||||
);
|
||||
} else if let AlterKind::AddColumns { columns } = alter_kind {
|
||||
// If all the columns are marked as add_if_not_exists and they already exist in the table,
|
||||
// there is no need to perform the alter.
|
||||
let column_names: HashSet<_> = table_info
|
||||
.meta
|
||||
.schema
|
||||
.column_schemas()
|
||||
.iter()
|
||||
.map(|schema| &schema.name)
|
||||
.collect();
|
||||
if columns.iter().all(|column| {
|
||||
column_names.contains(&column.column_schema.name) && column.add_if_not_exists
|
||||
}) {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
|
||||
let _ = table_info
|
||||
.meta
|
||||
.builder_with_alter_kind(table_name, &request.alter_kind)
|
||||
.context(error::TableSnafu)?
|
||||
.build()
|
||||
.context(error::BuildTableMetaSnafu { table_name })?;
|
||||
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub async fn alter_table(
|
||||
&self,
|
||||
@@ -1185,116 +927,10 @@ impl StatementExecutor {
|
||||
expr: AlterTableExpr,
|
||||
query_context: QueryContextRef,
|
||||
) -> Result<Output> {
|
||||
ensure!(
|
||||
!is_readonly_schema(&expr.schema_name),
|
||||
SchemaReadOnlySnafu {
|
||||
name: expr.schema_name.clone()
|
||||
}
|
||||
);
|
||||
|
||||
let catalog_name = if expr.catalog_name.is_empty() {
|
||||
DEFAULT_CATALOG_NAME.to_string()
|
||||
} else {
|
||||
expr.catalog_name.clone()
|
||||
};
|
||||
|
||||
let schema_name = if expr.schema_name.is_empty() {
|
||||
DEFAULT_SCHEMA_NAME.to_string()
|
||||
} else {
|
||||
expr.schema_name.clone()
|
||||
};
|
||||
|
||||
let table_name = expr.table_name.clone();
|
||||
|
||||
let table = self
|
||||
.catalog_manager
|
||||
.table(
|
||||
&catalog_name,
|
||||
&schema_name,
|
||||
&table_name,
|
||||
Some(&query_context),
|
||||
)
|
||||
self.inserter
|
||||
.schema_helper
|
||||
.alter_table_by_expr(expr, query_context)
|
||||
.await
|
||||
.context(CatalogSnafu)?
|
||||
.with_context(|| TableNotFoundSnafu {
|
||||
table_name: format_full_table_name(&catalog_name, &schema_name, &table_name),
|
||||
})?;
|
||||
|
||||
let table_id = table.table_info().ident.table_id;
|
||||
let need_alter = self.verify_alter(table_id, table.table_info(), expr.clone())?;
|
||||
if !need_alter {
|
||||
return Ok(Output::new_with_affected_rows(0));
|
||||
}
|
||||
info!(
|
||||
"Table info before alter is {:?}, expr: {:?}",
|
||||
table.table_info(),
|
||||
expr
|
||||
);
|
||||
|
||||
let physical_table_id = self
|
||||
.table_metadata_manager
|
||||
.table_route_manager()
|
||||
.get_physical_table_id(table_id)
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?;
|
||||
|
||||
let (req, invalidate_keys) = if physical_table_id == table_id {
|
||||
// This is physical table
|
||||
let req = SubmitDdlTaskRequest {
|
||||
query_context,
|
||||
task: DdlTask::new_alter_table(expr),
|
||||
};
|
||||
|
||||
let invalidate_keys = vec![
|
||||
CacheIdent::TableId(table_id),
|
||||
CacheIdent::TableName(TableName::new(catalog_name, schema_name, table_name)),
|
||||
];
|
||||
|
||||
(req, invalidate_keys)
|
||||
} else {
|
||||
// This is logical table
|
||||
let req = SubmitDdlTaskRequest {
|
||||
query_context,
|
||||
task: DdlTask::new_alter_logical_tables(vec![expr]),
|
||||
};
|
||||
|
||||
let mut invalidate_keys = vec![
|
||||
CacheIdent::TableId(physical_table_id),
|
||||
CacheIdent::TableId(table_id),
|
||||
CacheIdent::TableName(TableName::new(catalog_name, schema_name, table_name)),
|
||||
];
|
||||
|
||||
let physical_table = self
|
||||
.table_metadata_manager
|
||||
.table_info_manager()
|
||||
.get(physical_table_id)
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?
|
||||
.map(|x| x.into_inner());
|
||||
if let Some(physical_table) = physical_table {
|
||||
let physical_table_name = TableName::new(
|
||||
physical_table.table_info.catalog_name,
|
||||
physical_table.table_info.schema_name,
|
||||
physical_table.table_info.name,
|
||||
);
|
||||
invalidate_keys.push(CacheIdent::TableName(physical_table_name));
|
||||
}
|
||||
|
||||
(req, invalidate_keys)
|
||||
};
|
||||
|
||||
self.procedure_executor
|
||||
.submit_ddl_task(&ExecutorContext::default(), req)
|
||||
.await
|
||||
.context(error::ExecuteDdlSnafu)?;
|
||||
|
||||
// Invalidates local cache ASAP.
|
||||
self.cache_invalidator
|
||||
.invalidate(&Context::default(), &invalidate_keys)
|
||||
.await
|
||||
.context(error::InvalidateTableCacheSnafu)?;
|
||||
|
||||
Ok(Output::new_with_affected_rows(0))
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
@@ -1349,58 +985,6 @@ impl StatementExecutor {
|
||||
Ok(Output::new_with_affected_rows(0))
|
||||
}
|
||||
|
||||
async fn create_table_procedure(
|
||||
&self,
|
||||
create_table: CreateTableExpr,
|
||||
partitions: Vec<Partition>,
|
||||
table_info: RawTableInfo,
|
||||
query_context: QueryContextRef,
|
||||
) -> Result<SubmitDdlTaskResponse> {
|
||||
let partitions = partitions.into_iter().map(Into::into).collect();
|
||||
|
||||
let request = SubmitDdlTaskRequest {
|
||||
query_context,
|
||||
task: DdlTask::new_create_table(create_table, partitions, table_info),
|
||||
};
|
||||
|
||||
self.procedure_executor
|
||||
.submit_ddl_task(&ExecutorContext::default(), request)
|
||||
.await
|
||||
.context(error::ExecuteDdlSnafu)
|
||||
}
|
||||
|
||||
async fn create_logical_tables_procedure(
|
||||
&self,
|
||||
tables_data: Vec<(CreateTableExpr, RawTableInfo)>,
|
||||
query_context: QueryContextRef,
|
||||
) -> Result<SubmitDdlTaskResponse> {
|
||||
let request = SubmitDdlTaskRequest {
|
||||
query_context,
|
||||
task: DdlTask::new_create_logical_tables(tables_data),
|
||||
};
|
||||
|
||||
self.procedure_executor
|
||||
.submit_ddl_task(&ExecutorContext::default(), request)
|
||||
.await
|
||||
.context(error::ExecuteDdlSnafu)
|
||||
}
|
||||
|
||||
async fn alter_logical_tables_procedure(
|
||||
&self,
|
||||
tables_data: Vec<AlterTableExpr>,
|
||||
query_context: QueryContextRef,
|
||||
) -> Result<SubmitDdlTaskResponse> {
|
||||
let request = SubmitDdlTaskRequest {
|
||||
query_context,
|
||||
task: DdlTask::new_alter_logical_tables(tables_data),
|
||||
};
|
||||
|
||||
self.procedure_executor
|
||||
.submit_ddl_task(&ExecutorContext::default(), request)
|
||||
.await
|
||||
.context(error::ExecuteDdlSnafu)
|
||||
}
|
||||
|
||||
async fn drop_table_procedure(
|
||||
&self,
|
||||
table_name: &TableName,
|
||||
@@ -1548,8 +1132,61 @@ impl StatementExecutor {
|
||||
}
|
||||
}
|
||||
|
||||
/// Verifies an alter and returns whether it is necessary to perform the alter.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// Returns true if the alter need to be porformed; otherwise, it returns false.
|
||||
pub(crate) fn verify_alter(
|
||||
table_id: TableId,
|
||||
table_info: Arc<TableInfo>,
|
||||
expr: AlterTableExpr,
|
||||
) -> Result<bool> {
|
||||
let request: AlterTableRequest =
|
||||
common_grpc_expr::alter_expr_to_request(table_id, expr).context(AlterExprToRequestSnafu)?;
|
||||
|
||||
let AlterTableRequest {
|
||||
table_name,
|
||||
alter_kind,
|
||||
..
|
||||
} = &request;
|
||||
|
||||
if let AlterKind::RenameTable { new_table_name } = alter_kind {
|
||||
ensure!(
|
||||
NAME_PATTERN_REG.is_match(new_table_name),
|
||||
error::UnexpectedSnafu {
|
||||
violated: format!("Invalid table name: {}", new_table_name)
|
||||
}
|
||||
);
|
||||
} else if let AlterKind::AddColumns { columns } = alter_kind {
|
||||
// If all the columns are marked as add_if_not_exists and they already exist in the table,
|
||||
// there is no need to perform the alter.
|
||||
let column_names: HashSet<_> = table_info
|
||||
.meta
|
||||
.schema
|
||||
.column_schemas()
|
||||
.iter()
|
||||
.map(|schema| &schema.name)
|
||||
.collect();
|
||||
if columns.iter().all(|column| {
|
||||
column_names.contains(&column.column_schema.name) && column.add_if_not_exists
|
||||
}) {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
|
||||
let _ = table_info
|
||||
.meta
|
||||
.builder_with_alter_kind(table_name, &request.alter_kind)
|
||||
.context(error::TableSnafu)?
|
||||
.build()
|
||||
.context(error::BuildTableMetaSnafu { table_name })?;
|
||||
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
/// Parse partition statement [Partitions] into [MetaPartition] and partition columns.
|
||||
fn parse_partitions(
|
||||
pub(crate) fn parse_partitions(
|
||||
create_table: &CreateTableExpr,
|
||||
partitions: Option<Partitions>,
|
||||
query_ctx: &QueryContextRef,
|
||||
@@ -1582,7 +1219,7 @@ fn parse_partitions(
|
||||
))
|
||||
}
|
||||
|
||||
fn create_table_info(
|
||||
pub(crate) fn create_table_info(
|
||||
create_table: &CreateTableExpr,
|
||||
partition_columns: Vec<String>,
|
||||
) -> Result<RawTableInfo> {
|
||||
|
||||
@@ -25,7 +25,7 @@ use sql::ast::Ident;
|
||||
use sql::statements::create::Partitions;
|
||||
use sql::statements::show::{
|
||||
ShowColumns, ShowCreateFlow, ShowCreateView, ShowDatabases, ShowFlows, ShowIndex, ShowKind,
|
||||
ShowRegion, ShowTableStatus, ShowTables, ShowVariables, ShowViews,
|
||||
ShowProcessList, ShowRegion, ShowTableStatus, ShowTables, ShowVariables, ShowViews,
|
||||
};
|
||||
use sql::statements::OptionMap;
|
||||
use table::metadata::TableType;
|
||||
@@ -33,8 +33,9 @@ use table::table_name::TableName;
|
||||
use table::TableRef;
|
||||
|
||||
use crate::error::{
|
||||
self, CatalogSnafu, ExecuteStatementSnafu, ExternalSnafu, FindViewInfoSnafu, InvalidSqlSnafu,
|
||||
Result, TableMetadataManagerSnafu, ViewInfoNotFoundSnafu, ViewNotFoundSnafu,
|
||||
self, CatalogSnafu, ExecLogicalPlanSnafu, ExecuteStatementSnafu, ExternalSnafu,
|
||||
FindViewInfoSnafu, InvalidSqlSnafu, Result, TableMetadataManagerSnafu, ViewInfoNotFoundSnafu,
|
||||
ViewNotFoundSnafu,
|
||||
};
|
||||
use crate::statement::StatementExecutor;
|
||||
|
||||
@@ -314,6 +315,16 @@ impl StatementExecutor {
|
||||
.await
|
||||
.context(error::ExecuteStatementSnafu)
|
||||
}
|
||||
|
||||
pub async fn show_processlist(
|
||||
&self,
|
||||
stmt: ShowProcessList,
|
||||
query_ctx: QueryContextRef,
|
||||
) -> Result<Output> {
|
||||
query::sql::show_processlist(stmt, &self.query_engine, &self.catalog_manager, query_ctx)
|
||||
.await
|
||||
.context(ExecLogicalPlanSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn create_partitions_stmt(partitions: Vec<PartitionInfo>) -> Result<Option<Partitions>> {
|
||||
|
||||
@@ -88,7 +88,6 @@ impl PipelineOperator {
|
||||
catalog.to_string(),
|
||||
Arc::new(PipelineTable::new(
|
||||
self.inserter.clone(),
|
||||
self.statement_executor.clone(),
|
||||
table,
|
||||
self.query_engine.clone(),
|
||||
)),
|
||||
|
||||
@@ -30,7 +30,6 @@ use datatypes::timestamp::TimestampNanosecond;
|
||||
use datatypes::vectors::{StringVector, TimestampNanosecondVector, Vector};
|
||||
use itertools::Itertools;
|
||||
use operator::insert::InserterRef;
|
||||
use operator::statement::StatementExecutorRef;
|
||||
use query::dataframe::DataFrame;
|
||||
use query::QueryEngineRef;
|
||||
use session::context::{QueryContextBuilder, QueryContextRef};
|
||||
@@ -61,7 +60,6 @@ pub(crate) const EMPTY_SCHEMA_NAME: &str = "";
|
||||
/// Every catalog has its own pipeline table.
|
||||
pub struct PipelineTable {
|
||||
inserter: InserterRef,
|
||||
statement_executor: StatementExecutorRef,
|
||||
table: TableRef,
|
||||
query_engine: QueryEngineRef,
|
||||
cache: PipelineCache,
|
||||
@@ -69,15 +67,9 @@ pub struct PipelineTable {
|
||||
|
||||
impl PipelineTable {
|
||||
/// Create a new PipelineTable.
|
||||
pub fn new(
|
||||
inserter: InserterRef,
|
||||
statement_executor: StatementExecutorRef,
|
||||
table: TableRef,
|
||||
query_engine: QueryEngineRef,
|
||||
) -> Self {
|
||||
pub fn new(inserter: InserterRef, table: TableRef, query_engine: QueryEngineRef) -> Self {
|
||||
Self {
|
||||
inserter,
|
||||
statement_executor,
|
||||
table,
|
||||
query_engine,
|
||||
cache: PipelineCache::new(),
|
||||
@@ -232,13 +224,7 @@ impl PipelineTable {
|
||||
|
||||
let output = self
|
||||
.inserter
|
||||
.handle_row_inserts(
|
||||
requests,
|
||||
Self::query_ctx(&table_info),
|
||||
&self.statement_executor,
|
||||
false,
|
||||
false,
|
||||
)
|
||||
.handle_row_inserts(requests, Self::query_ctx(&table_info), false, false)
|
||||
.await
|
||||
.context(InsertPipelineSnafu)?;
|
||||
|
||||
|
||||
@@ -18,8 +18,8 @@ use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use catalog::information_schema::{
|
||||
columns, flows, key_column_usage, region_peers, schemata, tables, CHARACTER_SETS, COLLATIONS,
|
||||
COLUMNS, FLOWS, KEY_COLUMN_USAGE, REGION_PEERS, SCHEMATA, TABLES, VIEWS,
|
||||
columns, flows, key_column_usage, process_list, region_peers, schemata, tables, CHARACTER_SETS,
|
||||
COLLATIONS, COLUMNS, FLOWS, KEY_COLUMN_USAGE, REGION_PEERS, SCHEMATA, TABLES, VIEWS,
|
||||
};
|
||||
use catalog::CatalogManagerRef;
|
||||
use common_catalog::consts::{
|
||||
@@ -57,8 +57,8 @@ use sql::ast::Ident;
|
||||
use sql::parser::ParserContext;
|
||||
use sql::statements::create::{CreateDatabase, CreateFlow, CreateView, Partitions, SqlOrTql};
|
||||
use sql::statements::show::{
|
||||
ShowColumns, ShowDatabases, ShowFlows, ShowIndex, ShowKind, ShowRegion, ShowTableStatus,
|
||||
ShowTables, ShowVariables, ShowViews,
|
||||
ShowColumns, ShowDatabases, ShowFlows, ShowIndex, ShowKind, ShowProcessList, ShowRegion,
|
||||
ShowTableStatus, ShowTables, ShowVariables, ShowViews,
|
||||
};
|
||||
use sql::statements::statement::Statement;
|
||||
use sql::statements::OptionMap;
|
||||
@@ -1233,6 +1233,50 @@ fn parse_file_table_format(options: &HashMap<String, String>) -> Result<Box<dyn
|
||||
)
|
||||
}
|
||||
|
||||
pub async fn show_processlist(
|
||||
stmt: ShowProcessList,
|
||||
query_engine: &QueryEngineRef,
|
||||
catalog_manager: &CatalogManagerRef,
|
||||
query_ctx: QueryContextRef,
|
||||
) -> Result<Output> {
|
||||
let projects = if stmt.full {
|
||||
vec![
|
||||
(process_list::ID, "Id"),
|
||||
(process_list::CATALOG, "Catalog"),
|
||||
(process_list::SCHEMAS, "Schema"),
|
||||
(process_list::CLIENT, "Client"),
|
||||
(process_list::FRONTEND, "Frontend"),
|
||||
(process_list::START_TIMESTAMP, "Start Time"),
|
||||
(process_list::ELAPSED_TIME, "Elapsed Time"),
|
||||
(process_list::QUERY, "Query"),
|
||||
]
|
||||
} else {
|
||||
vec![
|
||||
(process_list::ID, "Id"),
|
||||
(process_list::CATALOG, "Catalog"),
|
||||
(process_list::QUERY, "Query"),
|
||||
(process_list::ELAPSED_TIME, "Elapsed Time"),
|
||||
]
|
||||
};
|
||||
|
||||
let filters = vec![];
|
||||
let like_field = None;
|
||||
let sort = vec![col("id").sort(true, true)];
|
||||
query_from_information_schema_table(
|
||||
query_engine,
|
||||
catalog_manager,
|
||||
query_ctx.clone(),
|
||||
"process_list",
|
||||
vec![],
|
||||
projects.clone(),
|
||||
filters,
|
||||
like_field,
|
||||
sort,
|
||||
ShowKind::All,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -36,6 +36,7 @@ chrono.workspace = true
|
||||
common-base.workspace = true
|
||||
common-catalog.workspace = true
|
||||
common-config.workspace = true
|
||||
common-datasource.workspace = true
|
||||
common-error.workspace = true
|
||||
common-frontend.workspace = true
|
||||
common-grpc.workspace = true
|
||||
@@ -74,11 +75,18 @@ jsonb.workspace = true
|
||||
lazy_static.workspace = true
|
||||
log-query.workspace = true
|
||||
loki-proto.workspace = true
|
||||
metric-engine.workspace = true
|
||||
mime_guess = "2.0"
|
||||
mito-codec.workspace = true
|
||||
mito2.workspace = true
|
||||
notify.workspace = true
|
||||
object-pool = "0.5"
|
||||
object-store.workspace = true
|
||||
once_cell.workspace = true
|
||||
openmetrics-parser = "0.4"
|
||||
operator.workspace = true
|
||||
parquet.workspace = true
|
||||
partition.workspace = true
|
||||
simd-json.workspace = true
|
||||
socket2 = "0.5"
|
||||
# use crates.io version once the following PRs is merged into the nextest release
|
||||
|
||||
@@ -1 +1 @@
|
||||
v0.10.0
|
||||
v0.10.1
|
||||
|
||||
415
src/servers/src/access_layer.rs
Normal file
415
src/servers/src/access_layer.rs
Normal file
@@ -0,0 +1,415 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use arrow::array::{
|
||||
Array, PrimitiveArray, RecordBatch, TimestampMicrosecondArray, TimestampMillisecondArray,
|
||||
TimestampNanosecondArray, TimestampSecondArray,
|
||||
};
|
||||
use arrow::datatypes::Int64Type;
|
||||
use arrow_schema::TimeUnit;
|
||||
use common_datasource::parquet_writer::AsyncWriter;
|
||||
use datafusion::parquet::arrow::AsyncArrowWriter;
|
||||
use mito2::sst::file::{FileId, FileMeta};
|
||||
use mito2::sst::parquet::{DEFAULT_ROW_GROUP_SIZE, PARQUET_METADATA_KEY};
|
||||
use object_store::config::ObjectStoreConfig;
|
||||
use object_store::util::{join_dir, join_path};
|
||||
use object_store::ObjectStore;
|
||||
use parquet::basic::{Compression, Encoding, ZstdLevel};
|
||||
use parquet::file::metadata::KeyValue;
|
||||
use parquet::file::properties::WriterProperties;
|
||||
use snafu::ResultExt;
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::metric_engine_consts::DATA_REGION_SUBDIR;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::batch_builder::physical_schema;
|
||||
use crate::error;
|
||||
|
||||
type AsyncParquetWriter = AsyncArrowWriter<AsyncWriter>;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct AccessLayerFactory {
|
||||
object_store: ObjectStore,
|
||||
}
|
||||
|
||||
impl AccessLayerFactory {
|
||||
pub async fn new(config: &ObjectStoreConfig) -> error::Result<AccessLayerFactory> {
|
||||
let object_store = object_store::factory::new_raw_object_store(config, "")
|
||||
.await
|
||||
.context(error::ObjectStoreSnafu)?;
|
||||
Ok(Self { object_store })
|
||||
}
|
||||
|
||||
pub(crate) async fn create_sst_writer(
|
||||
&self,
|
||||
catalog: &str,
|
||||
schema: &str,
|
||||
region_metadata: RegionMetadataRef,
|
||||
) -> error::Result<ParquetWriter> {
|
||||
let region_dir = build_data_region_dir(catalog, schema, region_metadata.region_id);
|
||||
let file_id = FileId::random();
|
||||
let file_path = join_path(®ion_dir, &file_id.as_parquet());
|
||||
let writer = self
|
||||
.object_store
|
||||
.writer(&file_path)
|
||||
.await
|
||||
.context(error::OpendalSnafu)?;
|
||||
|
||||
let schema = physical_schema();
|
||||
|
||||
let key_value_meta = KeyValue::new(
|
||||
PARQUET_METADATA_KEY.to_string(),
|
||||
region_metadata.to_json().unwrap(),
|
||||
);
|
||||
|
||||
let props = WriterProperties::builder()
|
||||
.set_key_value_metadata(Some(vec![key_value_meta]))
|
||||
.set_compression(Compression::ZSTD(ZstdLevel::default()))
|
||||
.set_encoding(Encoding::PLAIN)
|
||||
.set_max_row_group_size(DEFAULT_ROW_GROUP_SIZE)
|
||||
.build();
|
||||
|
||||
let writer = AsyncParquetWriter::try_new(AsyncWriter::new(writer), schema, Some(props))
|
||||
.context(error::ParquetSnafu)?;
|
||||
Ok(ParquetWriter {
|
||||
region_id: region_metadata.region_id,
|
||||
file_id,
|
||||
region_metadata,
|
||||
writer,
|
||||
timestamp_range: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ParquetWriter {
|
||||
region_id: RegionId,
|
||||
file_id: FileId,
|
||||
region_metadata: RegionMetadataRef,
|
||||
writer: AsyncParquetWriter,
|
||||
timestamp_range: Option<(i64, i64)>,
|
||||
}
|
||||
|
||||
impl ParquetWriter {
|
||||
pub(crate) fn file_id(&self) -> FileId {
|
||||
self.file_id
|
||||
}
|
||||
}
|
||||
|
||||
impl ParquetWriter {
|
||||
pub async fn write_record_batch(
|
||||
&mut self,
|
||||
batch: &RecordBatch,
|
||||
timestamp_range: Option<(i64, i64)>,
|
||||
) -> error::Result<()> {
|
||||
if let Err(e) = self.writer.write(&batch).await.context(error::ParquetSnafu) {
|
||||
common_telemetry::error!(e; "Region metadata: {:?}, batch schema: {:?}", self.region_metadata, batch.schema_ref());
|
||||
return Err(e);
|
||||
}
|
||||
|
||||
let (batch_min, batch_max) =
|
||||
get_or_calculate_timestamp_range(timestamp_range, batch, &self.region_metadata)?;
|
||||
|
||||
if let Some((min, max)) = &mut self.timestamp_range {
|
||||
*min = (*min).min(batch_min);
|
||||
*max = (*max).max(batch_max);
|
||||
} else {
|
||||
self.timestamp_range = Some((batch_min, batch_max));
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn finish(&mut self) -> error::Result<FileMeta> {
|
||||
let (min, max) = self.timestamp_range.unwrap();
|
||||
let timestamp_type = self
|
||||
.region_metadata
|
||||
.time_index_column()
|
||||
.column_schema
|
||||
.data_type
|
||||
.as_timestamp()
|
||||
.unwrap();
|
||||
let min_ts = timestamp_type.create_timestamp(min);
|
||||
let max_ts = timestamp_type.create_timestamp(max);
|
||||
let file_meta = self.writer.finish().await.context(error::ParquetSnafu)?;
|
||||
let meta = FileMeta {
|
||||
region_id: self.region_id,
|
||||
file_id: self.file_id,
|
||||
time_range: (min_ts, max_ts),
|
||||
level: 0,
|
||||
file_size: self.writer.bytes_written() as u64,
|
||||
available_indexes: Default::default(),
|
||||
index_file_size: 0,
|
||||
num_rows: file_meta.num_rows as u64,
|
||||
num_row_groups: file_meta.row_groups.len() as u64,
|
||||
sequence: None, //todo(hl): use flushed sequence here.
|
||||
};
|
||||
Ok(meta)
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds the data region subdir for metric physical tables.
|
||||
fn build_data_region_dir(catalog: &str, schema: &str, physical_region_id: RegionId) -> String {
|
||||
let storage_path = common_meta::ddl::utils::region_storage_path(&catalog, &schema);
|
||||
join_dir(
|
||||
&store_api::path_utils::region_dir(&storage_path, physical_region_id),
|
||||
DATA_REGION_SUBDIR,
|
||||
)
|
||||
}
|
||||
|
||||
fn get_or_calculate_timestamp_range(
|
||||
timestamp_range: Option<(i64, i64)>,
|
||||
rb: &RecordBatch,
|
||||
region_metadata: &RegionMetadataRef,
|
||||
) -> error::Result<(i64, i64)> {
|
||||
if let Some(range) = timestamp_range {
|
||||
return Ok(range);
|
||||
};
|
||||
|
||||
let ts = rb
|
||||
.column_by_name(®ion_metadata.time_index_column().column_schema.name)
|
||||
.expect("column not found");
|
||||
let arrow::datatypes::DataType::Timestamp(unit, _) = ts.data_type() else {
|
||||
unreachable!("expected timestamp types");
|
||||
};
|
||||
let primitives: PrimitiveArray<Int64Type> = match unit {
|
||||
TimeUnit::Second => ts
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampSecondArray>()
|
||||
.unwrap()
|
||||
.reinterpret_cast(),
|
||||
TimeUnit::Millisecond => ts
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampMillisecondArray>()
|
||||
.unwrap()
|
||||
.reinterpret_cast(),
|
||||
TimeUnit::Microsecond => ts
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampMicrosecondArray>()
|
||||
.unwrap()
|
||||
.reinterpret_cast(),
|
||||
TimeUnit::Nanosecond => ts
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampNanosecondArray>()
|
||||
.unwrap()
|
||||
.reinterpret_cast(),
|
||||
};
|
||||
|
||||
let min = arrow::compute::min(&primitives).unwrap();
|
||||
let max = arrow::compute::max(&primitives).unwrap();
|
||||
Ok((min, max))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::SemanticType;
|
||||
use arrow::array::{Float64Array, StringArray};
|
||||
use arrow::datatypes::{DataType, Field, Schema, TimeUnit};
|
||||
use common_query::prelude::{GREPTIME_TIMESTAMP, GREPTIME_VALUE};
|
||||
use common_time::Timestamp;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::schema::ColumnSchema;
|
||||
use object_store::services::MemoryConfig;
|
||||
use store_api::metadata::{ColumnMetadata, RegionMetadataBuilder};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_build_data_region_dir_basic() {
|
||||
let result = build_data_region_dir("greptime", "public", RegionId::new(1024, 0));
|
||||
assert_eq!(&result, "data/greptime/public/1024/1024_0000000000/data/");
|
||||
}
|
||||
|
||||
fn create_test_region_metadata() -> RegionMetadataRef {
|
||||
let mut builder = RegionMetadataBuilder::new(RegionId::new(1024, 0));
|
||||
builder
|
||||
.push_column_metadata(ColumnMetadata {
|
||||
column_schema: ColumnSchema::new(
|
||||
GREPTIME_TIMESTAMP,
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
false,
|
||||
),
|
||||
semantic_type: SemanticType::Timestamp,
|
||||
column_id: 1,
|
||||
})
|
||||
.push_column_metadata(ColumnMetadata {
|
||||
column_schema: ColumnSchema::new(
|
||||
GREPTIME_VALUE,
|
||||
ConcreteDataType::float64_datatype(),
|
||||
true,
|
||||
),
|
||||
semantic_type: SemanticType::Field,
|
||||
column_id: 2,
|
||||
})
|
||||
.push_column_metadata(ColumnMetadata {
|
||||
column_schema: ColumnSchema::new("tag", ConcreteDataType::string_datatype(), true),
|
||||
semantic_type: SemanticType::Tag,
|
||||
column_id: 3,
|
||||
})
|
||||
.primary_key(vec![3]);
|
||||
let metadata = builder.build().unwrap();
|
||||
Arc::new(metadata)
|
||||
}
|
||||
|
||||
fn create_test_record_batch() -> RecordBatch {
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new(
|
||||
GREPTIME_TIMESTAMP,
|
||||
DataType::Timestamp(TimeUnit::Millisecond, None),
|
||||
false,
|
||||
),
|
||||
Field::new(GREPTIME_VALUE, DataType::Float64, true),
|
||||
Field::new("tag", DataType::Utf8, true),
|
||||
]));
|
||||
|
||||
let timestamp_array = TimestampMillisecondArray::from(vec![1000, 2000, 3000]);
|
||||
let value_array = Float64Array::from(vec![Some(10.0), None, Some(30.0)]);
|
||||
let tag_array = StringArray::from(vec![Some("a"), Some("b"), Some("c")]);
|
||||
|
||||
RecordBatch::try_new(
|
||||
schema,
|
||||
vec![
|
||||
Arc::new(timestamp_array),
|
||||
Arc::new(value_array),
|
||||
Arc::new(tag_array),
|
||||
],
|
||||
)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_parquet_writer_write_and_finish() {
|
||||
let object_store = ObjectStore::from_config(MemoryConfig::default())
|
||||
.unwrap()
|
||||
.finish();
|
||||
let factory = AccessLayerFactory { object_store };
|
||||
|
||||
let region_metadata = create_test_region_metadata();
|
||||
let mut writer = factory
|
||||
.create_sst_writer("test_catalog", "test_schema", region_metadata.clone())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let batch = create_test_record_batch();
|
||||
|
||||
// Test writing a record batch
|
||||
writer.write_record_batch(&batch, None).await.unwrap();
|
||||
|
||||
// Test finishing the writer
|
||||
let file_meta = writer.finish().await.unwrap();
|
||||
|
||||
assert_eq!(file_meta.region_id, RegionId::new(1024, 0));
|
||||
assert_eq!(file_meta.level, 0);
|
||||
assert_eq!(file_meta.num_rows, 3);
|
||||
assert_eq!(file_meta.num_row_groups, 1);
|
||||
assert!(file_meta.file_size > 0);
|
||||
|
||||
assert_eq!(file_meta.time_range.0, Timestamp::new_millisecond(1000));
|
||||
assert_eq!(file_meta.time_range.1, Timestamp::new_millisecond(3000));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_parquet_writer_multiple_batches() {
|
||||
let object_store = ObjectStore::from_config(MemoryConfig::default())
|
||||
.unwrap()
|
||||
.finish();
|
||||
let factory = AccessLayerFactory { object_store };
|
||||
|
||||
let region_metadata = create_test_region_metadata();
|
||||
let mut writer = factory
|
||||
.create_sst_writer("test_catalog", "test_schema", region_metadata.clone())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Write first batch
|
||||
let batch1 = create_test_record_batch();
|
||||
writer.write_record_batch(&batch1, None).await.unwrap();
|
||||
|
||||
// Create second batch with different timestamp range
|
||||
let schema = region_metadata.schema.arrow_schema().clone();
|
||||
let timestamp_array = TimestampMillisecondArray::from(vec![4000, 5000]);
|
||||
let value_array = Float64Array::from(vec![Some(40.0), Some(50.0)]);
|
||||
let tag_array = StringArray::from(vec![Some("d"), Some("e")]);
|
||||
|
||||
let batch2 = RecordBatch::try_new(
|
||||
schema,
|
||||
vec![
|
||||
Arc::new(timestamp_array),
|
||||
Arc::new(value_array),
|
||||
Arc::new(tag_array),
|
||||
],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
writer.write_record_batch(&batch2, None).await.unwrap();
|
||||
|
||||
let file_meta = writer.finish().await.unwrap();
|
||||
|
||||
// Should have combined rows from both batches
|
||||
assert_eq!(file_meta.num_rows, 5);
|
||||
assert_eq!(file_meta.time_range.0, Timestamp::new_millisecond(1000));
|
||||
assert_eq!(file_meta.time_range.1, Timestamp::new_millisecond(5000));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_parquet_writer_with_provided_timestamp_range() {
|
||||
let object_store = ObjectStore::from_config(MemoryConfig::default())
|
||||
.unwrap()
|
||||
.finish();
|
||||
let factory = AccessLayerFactory { object_store };
|
||||
|
||||
let region_metadata = create_test_region_metadata();
|
||||
let mut writer = factory
|
||||
.create_sst_writer("test_catalog", "test_schema", region_metadata.clone())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let batch = create_test_record_batch();
|
||||
|
||||
// Provide explicit timestamp range that differs from actual data
|
||||
let provided_range = (500, 6000);
|
||||
writer
|
||||
.write_record_batch(&batch, Some(provided_range))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let file_meta = writer.finish().await.unwrap();
|
||||
|
||||
assert_eq!(file_meta.time_range.0, Timestamp::new_millisecond(500));
|
||||
assert_eq!(file_meta.time_range.1, Timestamp::new_millisecond(6000));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_or_calculate_timestamp_range_with_provided_range() {
|
||||
let region_metadata = create_test_region_metadata();
|
||||
let batch = create_test_record_batch();
|
||||
|
||||
let provided_range = Some((100, 200));
|
||||
let result = get_or_calculate_timestamp_range(provided_range, &batch, ®ion_metadata);
|
||||
|
||||
assert!(result.is_ok());
|
||||
assert_eq!(result.unwrap(), (100, 200));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_or_calculate_timestamp_range_calculated() {
|
||||
let region_metadata = create_test_region_metadata();
|
||||
let batch = create_test_record_batch();
|
||||
|
||||
let result = get_or_calculate_timestamp_range(None, &batch, ®ion_metadata);
|
||||
|
||||
assert!(result.is_ok());
|
||||
assert_eq!(result.unwrap(), (1000, 3000));
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user