mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-04 12:22:55 +00:00
Compare commits
55 Commits
v0.10.0-ni
...
transform-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6247de2d50 | ||
|
|
a2eb46132f | ||
|
|
3f9bf48161 | ||
|
|
9bd2e006b5 | ||
|
|
031421ca91 | ||
|
|
999f3a40c2 | ||
|
|
50d28e0a00 | ||
|
|
770a850437 | ||
|
|
65e53b5bc4 | ||
|
|
9a6c7aa4d6 | ||
|
|
4f446b95d8 | ||
|
|
9ad4200f55 | ||
|
|
53d456651f | ||
|
|
f11c5acb0f | ||
|
|
8536a1ec6e | ||
|
|
fce8c968da | ||
|
|
98a6ac973c | ||
|
|
8f79e421c3 | ||
|
|
e8b326382f | ||
|
|
56781e7fbc | ||
|
|
7d342b3d95 | ||
|
|
a22667bf3c | ||
|
|
29b9b7db0c | ||
|
|
a66909a562 | ||
|
|
8137b8ff3d | ||
|
|
7c5cd2922a | ||
|
|
a1d0dcf2c3 | ||
|
|
c391171f99 | ||
|
|
f44862aaac | ||
|
|
8bf795d88c | ||
|
|
3bbf4e0232 | ||
|
|
83da3950da | ||
|
|
957b5effd5 | ||
|
|
f59e28006a | ||
|
|
3e5bbdf71e | ||
|
|
b8ac19c480 | ||
|
|
92b274a856 | ||
|
|
6bdac25f0a | ||
|
|
a9f3c4b17c | ||
|
|
e003eaab36 | ||
|
|
6e590da412 | ||
|
|
ff5fa40b85 | ||
|
|
d4aa4159d4 | ||
|
|
960f6d821b | ||
|
|
9c5d044238 | ||
|
|
70c354eed6 | ||
|
|
23bf663d58 | ||
|
|
817648eac5 | ||
|
|
03b29439e2 | ||
|
|
712f4ca0ef | ||
|
|
60bacff57e | ||
|
|
6208772ba4 | ||
|
|
67184c0498 | ||
|
|
1dd908fdf7 | ||
|
|
8179b4798e |
177
Cargo.lock
generated
177
Cargo.lock
generated
@@ -2070,8 +2070,6 @@ dependencies = [
|
||||
"datafusion",
|
||||
"datatypes",
|
||||
"derive_more",
|
||||
"geo",
|
||||
"geo-types",
|
||||
"geohash",
|
||||
"h3o",
|
||||
"jsonb",
|
||||
@@ -2090,7 +2088,6 @@ dependencies = [
|
||||
"store-api",
|
||||
"table",
|
||||
"tokio",
|
||||
"wkt",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3709,16 +3706,6 @@ version = "1.0.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0d6ef0072f8a535281e4876be788938b528e9a1d43900b82c2569af7da799125"
|
||||
|
||||
[[package]]
|
||||
name = "earcutr"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "79127ed59a85d7687c409e9978547cffb7dc79675355ed22da6b66fd5f6ead01"
|
||||
dependencies = [
|
||||
"itertools 0.11.0",
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.13.0"
|
||||
@@ -4027,12 +4014,6 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "float_next_after"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8"
|
||||
|
||||
[[package]]
|
||||
name = "flow"
|
||||
version = "0.9.5"
|
||||
@@ -4457,24 +4438,6 @@ dependencies = [
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "geo"
|
||||
version = "0.29.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "81d088357a9cc60cec8253b3578f6834b4a3aa20edb55f5d1c030c36d8143f11"
|
||||
dependencies = [
|
||||
"earcutr",
|
||||
"float_next_after",
|
||||
"geo-types",
|
||||
"geographiclib-rs",
|
||||
"i_overlay",
|
||||
"log",
|
||||
"num-traits",
|
||||
"robust",
|
||||
"rstar",
|
||||
"spade",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "geo-types"
|
||||
version = "0.7.13"
|
||||
@@ -4483,19 +4446,9 @@ checksum = "9ff16065e5720f376fbced200a5ae0f47ace85fd70b7e54269790281353b6d61"
|
||||
dependencies = [
|
||||
"approx 0.5.1",
|
||||
"num-traits",
|
||||
"rstar",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "geographiclib-rs"
|
||||
version = "0.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6e5ed84f8089c70234b0a8e0aedb6dc733671612ddc0d37c6066052f9781960"
|
||||
dependencies = [
|
||||
"libm",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "geohash"
|
||||
version = "0.13.1"
|
||||
@@ -4644,15 +4597,6 @@ dependencies = [
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hash32"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "47d60b12902ba28e2730cd37e95b8c9223af2808df9e902d4df49588d1470606"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.12.3"
|
||||
@@ -4748,16 +4692,6 @@ dependencies = [
|
||||
"http 1.1.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "heapless"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0bfb9eb618601c89945a70e254898da93b13be0388091d42117462b265bb3fad"
|
||||
dependencies = [
|
||||
"hash32",
|
||||
"stable_deref_trait",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.4.1"
|
||||
@@ -5183,50 +5117,6 @@ dependencies = [
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "i_float"
|
||||
version = "1.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f5fe043aae28ce70bd2f78b2f5f82a3654d63607c82594da4dabb8b6cb81f2b2"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "i_key_sort"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "347c253b4748a1a28baf94c9ce133b6b166f08573157e05afe718812bc599fcd"
|
||||
|
||||
[[package]]
|
||||
name = "i_overlay"
|
||||
version = "1.7.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a469f68cb8a7cef375b2b0f581faf5859b4b50600438c00d46b71acc25ebbd0c"
|
||||
dependencies = [
|
||||
"i_float",
|
||||
"i_key_sort",
|
||||
"i_shape",
|
||||
"i_tree",
|
||||
"rayon",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "i_shape"
|
||||
version = "1.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1b44852d57a991c7dedaf76c55bc44f677f547ff899a430d29e13efd6133d7d8"
|
||||
dependencies = [
|
||||
"i_float",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "i_tree"
|
||||
version = "0.8.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "155181bc97d770181cf9477da51218a19ee92a8e5be642e796661aee2b601139"
|
||||
|
||||
[[package]]
|
||||
name = "iana-time-zone"
|
||||
version = "0.1.61"
|
||||
@@ -5633,8 +5523,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "jsonb"
|
||||
version = "0.4.3"
|
||||
source = "git+https://github.com/CookiePieWw/jsonb.git?rev=ed2d4f8575419ed434a4ae09dee18ca900915d9c#ed2d4f8575419ed434a4ae09dee18ca900915d9c"
|
||||
version = "0.4.1"
|
||||
source = "git+https://github.com/databendlabs/jsonb.git?rev=46ad50fc71cf75afbf98eec455f7892a6387c1fc#46ad50fc71cf75afbf98eec455f7892a6387c1fc"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"fast-float",
|
||||
@@ -5661,19 +5551,6 @@ dependencies = [
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jsonpath-rust"
|
||||
version = "0.7.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "69a61b87f6a55cc6c28fed5739dd36b9642321ce63e4a5e4a4715d69106f4a10"
|
||||
dependencies = [
|
||||
"pest",
|
||||
"pest_derive",
|
||||
"regex",
|
||||
"serde_json",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jsonptr"
|
||||
version = "0.4.7"
|
||||
@@ -5784,7 +5661,7 @@ dependencies = [
|
||||
"hyper-rustls",
|
||||
"hyper-timeout 0.5.1",
|
||||
"hyper-util",
|
||||
"jsonpath-rust 0.5.1",
|
||||
"jsonpath-rust",
|
||||
"k8s-openapi",
|
||||
"kube-core",
|
||||
"pem 3.0.4",
|
||||
@@ -6468,7 +6345,6 @@ dependencies = [
|
||||
"common-telemetry",
|
||||
"datatypes",
|
||||
"futures",
|
||||
"futures-util",
|
||||
"humantime-serde",
|
||||
"meta-srv",
|
||||
"rand",
|
||||
@@ -6680,7 +6556,6 @@ dependencies = [
|
||||
"common-error",
|
||||
"common-function",
|
||||
"common-macro",
|
||||
"common-meta",
|
||||
"common-procedure-test",
|
||||
"common-query",
|
||||
"common-recordbatch",
|
||||
@@ -7721,7 +7596,6 @@ dependencies = [
|
||||
"file-engine",
|
||||
"futures",
|
||||
"futures-util",
|
||||
"jsonb",
|
||||
"lazy_static",
|
||||
"meta-client",
|
||||
"meter-core",
|
||||
@@ -8324,7 +8198,6 @@ dependencies = [
|
||||
"greptime-proto",
|
||||
"itertools 0.10.5",
|
||||
"jsonb",
|
||||
"jsonpath-rust 0.7.3",
|
||||
"lazy_static",
|
||||
"moka",
|
||||
"once_cell",
|
||||
@@ -8523,7 +8396,8 @@ checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
|
||||
[[package]]
|
||||
name = "pprof"
|
||||
version = "0.13.0"
|
||||
source = "git+https://github.com/GreptimeTeam/pprof-rs?rev=1bd1e21#1bd1e210d8626da3d1e5aff976e6feee994f576d"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ef5c97c51bd34c7e742402e216abdeb44d415fbe6ae41d56b114723e953711cb"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
"cfg-if",
|
||||
@@ -9694,12 +9568,6 @@ dependencies = [
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "robust"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cbf4a6aa5f6d6888f39e980649f3ad6b666acdce1d78e95b8a2cb076e687ae30"
|
||||
|
||||
[[package]]
|
||||
name = "ron"
|
||||
version = "0.7.1"
|
||||
@@ -9794,17 +9662,6 @@ dependencies = [
|
||||
"zstd 0.13.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rstar"
|
||||
version = "0.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "133315eb94c7b1e8d0cb097e5a710d850263372fd028fff18969de708afc7008"
|
||||
dependencies = [
|
||||
"heapless",
|
||||
"num-traits",
|
||||
"smallvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rstest"
|
||||
version = "0.21.0"
|
||||
@@ -11297,18 +11154,6 @@ dependencies = [
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "spade"
|
||||
version = "2.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "93f5ef1f863aca7d1d7dda7ccfc36a0a4279bd6d3c375176e5e0712e25cb4889"
|
||||
dependencies = [
|
||||
"hashbrown 0.14.5",
|
||||
"num-traits",
|
||||
"robust",
|
||||
"smallvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sparsevec"
|
||||
version = "0.2.0"
|
||||
@@ -14302,18 +14147,6 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wkt"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "54f7f1ff4ea4c18936d6cd26a6fd24f0003af37e951a8e0e8b9e9a2d0bd0a46d"
|
||||
dependencies = [
|
||||
"geo-types",
|
||||
"log",
|
||||
"num-traits",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wyz"
|
||||
version = "0.5.1"
|
||||
|
||||
12
Cargo.toml
12
Cargo.toml
@@ -125,7 +125,7 @@ greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", r
|
||||
humantime = "2.1"
|
||||
humantime-serde = "1.1"
|
||||
itertools = "0.10"
|
||||
jsonb = { git = "https://github.com/CookiePieWw/jsonb.git", rev = "ed2d4f8575419ed434a4ae09dee18ca900915d9c", default-features = false }
|
||||
jsonb = { git = "https://github.com/databendlabs/jsonb.git", rev = "46ad50fc71cf75afbf98eec455f7892a6387c1fc", default-features = false }
|
||||
lazy_static = "1.4"
|
||||
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "a10facb353b41460eeb98578868ebf19c2084fac" }
|
||||
mockall = "0.11.4"
|
||||
@@ -261,21 +261,21 @@ tokio-rustls = { git = "https://github.com/GreptimeTeam/tokio-rustls" }
|
||||
# This is commented, since we are not using aws-lc-sys, if we need to use it, we need to uncomment this line or use a release after this commit, or it wouldn't compile with gcc < 8.1
|
||||
# see https://github.com/aws/aws-lc-rs/pull/526
|
||||
# aws-lc-sys = { git ="https://github.com/aws/aws-lc-rs", rev = "556558441e3494af4b156ae95ebc07ebc2fd38aa" }
|
||||
# Apply a fix for pprof for unaligned pointer access
|
||||
pprof = { git = "https://github.com/GreptimeTeam/pprof-rs", rev = "1bd1e21" }
|
||||
|
||||
[workspace.dependencies.meter-macros]
|
||||
git = "https://github.com/GreptimeTeam/greptime-meter.git"
|
||||
rev = "a10facb353b41460eeb98578868ebf19c2084fac"
|
||||
|
||||
[profile.release]
|
||||
debug = 1
|
||||
# debug = 1
|
||||
split-debuginfo = "off"
|
||||
|
||||
[profile.nightly]
|
||||
inherits = "release"
|
||||
strip = "debuginfo"
|
||||
split-debuginfo = "off"
|
||||
# strip = "debuginfo"
|
||||
lto = "thin"
|
||||
debug = false
|
||||
# debug = false
|
||||
incremental = false
|
||||
|
||||
[profile.ci]
|
||||
|
||||
16
README.md
16
README.md
@@ -6,7 +6,7 @@
|
||||
</picture>
|
||||
</p>
|
||||
|
||||
<h2 align="center">Unified & Cost-Effective Time Series Database for Metrics, Logs, and Events</h2>
|
||||
<h2 align="center">Unified Time Series Database for Metrics, Logs, and Events</h2>
|
||||
|
||||
<div align="center">
|
||||
<h3 align="center">
|
||||
@@ -48,21 +48,9 @@
|
||||
</a>
|
||||
</div>
|
||||
|
||||
- [Introduction](#introduction)
|
||||
- [**Features: Why GreptimeDB**](#why-greptimedb)
|
||||
- [Architecture](https://docs.greptime.com/contributor-guide/overview/#architecture)
|
||||
- [Try it for free](#try-greptimedb)
|
||||
- [Getting Started](#getting-started)
|
||||
- [Project Status](#project-status)
|
||||
- [Join the community](#community)
|
||||
- [Contributing](#contributing)
|
||||
- [Extension](#extension )
|
||||
- [License](#license)
|
||||
- [Acknowledgement](#acknowledgement)
|
||||
|
||||
## Introduction
|
||||
|
||||
**GreptimeDB** is an open-source unified & cost-effective time-series database for **Metrics**, **Logs**, and **Events** (also **Traces** in plan). You can gain real-time insights from Edge to Cloud at Any Scale.
|
||||
**GreptimeDB** is an open-source unified time-series database for **Metrics**, **Logs**, and **Events** (also **Traces** in plan). You can gain real-time insights from Edge to Cloud at any scale.
|
||||
|
||||
## Why GreptimeDB
|
||||
|
||||
|
||||
@@ -93,8 +93,8 @@
|
||||
| `storage` | -- | -- | The data storage options. |
|
||||
| `storage.data_home` | String | `/tmp/greptimedb/` | The working home directory. |
|
||||
| `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
|
||||
| `storage.cache_path` | String | Unset | Cache configuration for object storage such as 'S3' etc. It is recommended to configure it when using object storage for better performance.<br/>The local file cache directory. |
|
||||
| `storage.cache_capacity` | String | Unset | The local file cache capacity in bytes. If your disk space is sufficient, it is recommended to set it larger. |
|
||||
| `storage.cache_path` | String | Unset | Cache configuration for object storage such as 'S3' etc.<br/>The local file cache directory. |
|
||||
| `storage.cache_capacity` | String | Unset | The local file cache capacity in bytes. |
|
||||
| `storage.bucket` | String | Unset | The S3 bucket name.<br/>**It's only used when the storage type is `S3`, `Oss` and `Gcs`**. |
|
||||
| `storage.root` | String | Unset | The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`.<br/>**It's only used when the storage type is `S3`, `Oss` and `Azblob`**. |
|
||||
| `storage.access_key_id` | String | Unset | The access key id of the aws account.<br/>It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.<br/>**It's only used when the storage type is `S3` and `Oss`**. |
|
||||
@@ -126,9 +126,9 @@
|
||||
| `region_engine.mito.vector_cache_size` | String | Auto | Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
|
||||
| `region_engine.mito.page_cache_size` | String | Auto | Cache size for pages of SST row groups. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/8 of OS memory. |
|
||||
| `region_engine.mito.selector_result_cache_size` | String | Auto | Cache size for time series selector (e.g. `last_value()`). Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
|
||||
| `region_engine.mito.enable_experimental_write_cache` | Bool | `false` | Whether to enable the experimental write cache. It is recommended to enable it when using object storage for better performance. |
|
||||
| `region_engine.mito.enable_experimental_write_cache` | Bool | `false` | Whether to enable the experimental write cache. |
|
||||
| `region_engine.mito.experimental_write_cache_path` | String | `""` | File system path for write cache, defaults to `{data_home}/write_cache`. |
|
||||
| `region_engine.mito.experimental_write_cache_size` | String | `1GiB` | Capacity for write cache. If your disk space is sufficient, it is recommended to set it larger. |
|
||||
| `region_engine.mito.experimental_write_cache_size` | String | `512MB` | Capacity for write cache. |
|
||||
| `region_engine.mito.experimental_write_cache_ttl` | String | Unset | TTL for write cache. |
|
||||
| `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
|
||||
| `region_engine.mito.scan_parallelism` | Integer | `0` | Parallelism to scan a region (default: 1/4 of cpu cores).<br/>- `0`: using the default value (1/4 of cpu cores).<br/>- `1`: scan in current thread.<br/>- `n`: scan in parallelism n. |
|
||||
@@ -416,8 +416,8 @@
|
||||
| `storage` | -- | -- | The data storage options. |
|
||||
| `storage.data_home` | String | `/tmp/greptimedb/` | The working home directory. |
|
||||
| `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
|
||||
| `storage.cache_path` | String | Unset | Cache configuration for object storage such as 'S3' etc. It is recommended to configure it when using object storage for better performance.<br/>The local file cache directory. |
|
||||
| `storage.cache_capacity` | String | Unset | The local file cache capacity in bytes. If your disk space is sufficient, it is recommended to set it larger. |
|
||||
| `storage.cache_path` | String | Unset | Cache configuration for object storage such as 'S3' etc.<br/>The local file cache directory. |
|
||||
| `storage.cache_capacity` | String | Unset | The local file cache capacity in bytes. |
|
||||
| `storage.bucket` | String | Unset | The S3 bucket name.<br/>**It's only used when the storage type is `S3`, `Oss` and `Gcs`**. |
|
||||
| `storage.root` | String | Unset | The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`.<br/>**It's only used when the storage type is `S3`, `Oss` and `Azblob`**. |
|
||||
| `storage.access_key_id` | String | Unset | The access key id of the aws account.<br/>It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.<br/>**It's only used when the storage type is `S3` and `Oss`**. |
|
||||
@@ -449,9 +449,9 @@
|
||||
| `region_engine.mito.vector_cache_size` | String | Auto | Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
|
||||
| `region_engine.mito.page_cache_size` | String | Auto | Cache size for pages of SST row groups. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/8 of OS memory. |
|
||||
| `region_engine.mito.selector_result_cache_size` | String | Auto | Cache size for time series selector (e.g. `last_value()`). Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
|
||||
| `region_engine.mito.enable_experimental_write_cache` | Bool | `false` | Whether to enable the experimental write cache. It is recommended to enable it when using object storage for better performance. |
|
||||
| `region_engine.mito.enable_experimental_write_cache` | Bool | `false` | Whether to enable the experimental write cache. |
|
||||
| `region_engine.mito.experimental_write_cache_path` | String | `""` | File system path for write cache, defaults to `{data_home}/write_cache`. |
|
||||
| `region_engine.mito.experimental_write_cache_size` | String | `1GiB` | Capacity for write cache. If your disk space is sufficient, it is recommended to set it larger. |
|
||||
| `region_engine.mito.experimental_write_cache_size` | String | `512MB` | Capacity for write cache. |
|
||||
| `region_engine.mito.experimental_write_cache_ttl` | String | Unset | TTL for write cache. |
|
||||
| `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
|
||||
| `region_engine.mito.scan_parallelism` | Integer | `0` | Parallelism to scan a region (default: 1/4 of cpu cores).<br/>- `0`: using the default value (1/4 of cpu cores).<br/>- `1`: scan in current thread.<br/>- `n`: scan in parallelism n. |
|
||||
|
||||
@@ -294,14 +294,14 @@ data_home = "/tmp/greptimedb/"
|
||||
## - `Oss`: the data is stored in the Aliyun OSS.
|
||||
type = "File"
|
||||
|
||||
## Cache configuration for object storage such as 'S3' etc. It is recommended to configure it when using object storage for better performance.
|
||||
## Cache configuration for object storage such as 'S3' etc.
|
||||
## The local file cache directory.
|
||||
## @toml2docs:none-default
|
||||
cache_path = "/path/local_cache"
|
||||
|
||||
## The local file cache capacity in bytes. If your disk space is sufficient, it is recommended to set it larger.
|
||||
## The local file cache capacity in bytes.
|
||||
## @toml2docs:none-default
|
||||
cache_capacity = "1GiB"
|
||||
cache_capacity = "256MB"
|
||||
|
||||
## The S3 bucket name.
|
||||
## **It's only used when the storage type is `S3`, `Oss` and `Gcs`**.
|
||||
@@ -459,14 +459,14 @@ auto_flush_interval = "1h"
|
||||
## @toml2docs:none-default="Auto"
|
||||
#+ selector_result_cache_size = "512MB"
|
||||
|
||||
## Whether to enable the experimental write cache. It is recommended to enable it when using object storage for better performance.
|
||||
## Whether to enable the experimental write cache.
|
||||
enable_experimental_write_cache = false
|
||||
|
||||
## File system path for write cache, defaults to `{data_home}/write_cache`.
|
||||
experimental_write_cache_path = ""
|
||||
|
||||
## Capacity for write cache. If your disk space is sufficient, it is recommended to set it larger.
|
||||
experimental_write_cache_size = "1GiB"
|
||||
## Capacity for write cache.
|
||||
experimental_write_cache_size = "512MB"
|
||||
|
||||
## TTL for write cache.
|
||||
## @toml2docs:none-default
|
||||
|
||||
@@ -332,14 +332,14 @@ data_home = "/tmp/greptimedb/"
|
||||
## - `Oss`: the data is stored in the Aliyun OSS.
|
||||
type = "File"
|
||||
|
||||
## Cache configuration for object storage such as 'S3' etc. It is recommended to configure it when using object storage for better performance.
|
||||
## Cache configuration for object storage such as 'S3' etc.
|
||||
## The local file cache directory.
|
||||
## @toml2docs:none-default
|
||||
cache_path = "/path/local_cache"
|
||||
|
||||
## The local file cache capacity in bytes. If your disk space is sufficient, it is recommended to set it larger.
|
||||
## The local file cache capacity in bytes.
|
||||
## @toml2docs:none-default
|
||||
cache_capacity = "1GiB"
|
||||
cache_capacity = "256MB"
|
||||
|
||||
## The S3 bucket name.
|
||||
## **It's only used when the storage type is `S3`, `Oss` and `Gcs`**.
|
||||
@@ -497,14 +497,14 @@ auto_flush_interval = "1h"
|
||||
## @toml2docs:none-default="Auto"
|
||||
#+ selector_result_cache_size = "512MB"
|
||||
|
||||
## Whether to enable the experimental write cache. It is recommended to enable it when using object storage for better performance.
|
||||
## Whether to enable the experimental write cache.
|
||||
enable_experimental_write_cache = false
|
||||
|
||||
## File system path for write cache, defaults to `{data_home}/write_cache`.
|
||||
experimental_write_cache_path = ""
|
||||
|
||||
## Capacity for write cache. If your disk space is sufficient, it is recommended to set it larger.
|
||||
experimental_write_cache_size = "1GiB"
|
||||
## Capacity for write cache.
|
||||
experimental_write_cache_size = "512MB"
|
||||
|
||||
## TTL for write cache.
|
||||
## @toml2docs:none-default
|
||||
|
||||
@@ -4,13 +4,13 @@
|
||||
|
||||
example:
|
||||
```bash
|
||||
curl --data "trace,flow=debug" 127.0.0.1:4000/debug/log_level
|
||||
curl --data "trace;flow=debug" 127.0.0.1:4000/debug/log_level
|
||||
```
|
||||
And database will reply with something like:
|
||||
```bash
|
||||
Log Level changed from Some("info") to "trace,flow=debug"%
|
||||
Log Level changed from Some("info") to "trace;flow=debug"%
|
||||
```
|
||||
|
||||
The data is a string in the format of `global_level,module1=level1,module2=level2,...` that follow the same rule of `RUST_LOG`.
|
||||
The data is a string in the format of `global_level;module1=level1;module2=level2;...` that follow the same rule of `RUST_LOG`.
|
||||
|
||||
The module is the module name of the log, and the level is the log level. The log level can be one of the following: `trace`, `debug`, `info`, `warn`, `error`, `off`(case insensitive).
|
||||
@@ -178,12 +178,6 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Partition manager not found, it's not expected."))]
|
||||
PartitionManagerNotFound {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to find table partitions"))]
|
||||
FindPartitions { source: partition::error::Error },
|
||||
|
||||
@@ -307,7 +301,6 @@ impl ErrorExt for Error {
|
||||
| Error::CastManager { .. }
|
||||
| Error::Json { .. }
|
||||
| Error::GetInformationExtension { .. }
|
||||
| Error::PartitionManagerNotFound { .. }
|
||||
| Error::ProcedureIdNotFound { .. } => StatusCode::Unexpected,
|
||||
|
||||
Error::ViewPlanColumnsChanged { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
@@ -34,14 +34,15 @@ use datatypes::vectors::{
|
||||
};
|
||||
use futures::{StreamExt, TryStreamExt};
|
||||
use partition::manager::PartitionInfo;
|
||||
use partition::partition::PartitionDef;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::{ScanRequest, TableId};
|
||||
use store_api::storage::{RegionId, ScanRequest, TableId};
|
||||
use table::metadata::{TableInfo, TableType};
|
||||
|
||||
use super::PARTITIONS;
|
||||
use crate::error::{
|
||||
CreateRecordBatchSnafu, FindPartitionsSnafu, InternalSnafu, PartitionManagerNotFoundSnafu,
|
||||
Result, UpgradeWeakCatalogManagerRefSnafu,
|
||||
CreateRecordBatchSnafu, FindPartitionsSnafu, InternalSnafu, Result,
|
||||
UpgradeWeakCatalogManagerRefSnafu,
|
||||
};
|
||||
use crate::kvbackend::KvBackendCatalogManager;
|
||||
use crate::system_schema::information_schema::{InformationTable, Predicates};
|
||||
@@ -235,8 +236,7 @@ impl InformationSchemaPartitionsBuilder {
|
||||
let partition_manager = catalog_manager
|
||||
.as_any()
|
||||
.downcast_ref::<KvBackendCatalogManager>()
|
||||
.map(|catalog_manager| catalog_manager.partition_manager())
|
||||
.context(PartitionManagerNotFoundSnafu)?;
|
||||
.map(|catalog_manager| catalog_manager.partition_manager());
|
||||
|
||||
let predicates = Predicates::from_scan_request(&request);
|
||||
|
||||
@@ -262,10 +262,27 @@ impl InformationSchemaPartitionsBuilder {
|
||||
let table_ids: Vec<TableId> =
|
||||
table_infos.iter().map(|info| info.ident.table_id).collect();
|
||||
|
||||
let mut table_partitions = partition_manager
|
||||
.batch_find_table_partitions(&table_ids)
|
||||
.await
|
||||
.context(FindPartitionsSnafu)?;
|
||||
let mut table_partitions = if let Some(partition_manager) = &partition_manager {
|
||||
partition_manager
|
||||
.batch_find_table_partitions(&table_ids)
|
||||
.await
|
||||
.context(FindPartitionsSnafu)?
|
||||
} else {
|
||||
// Current node must be a standalone instance, contains only one partition by default.
|
||||
// TODO(dennis): change it when we support multi-regions for standalone.
|
||||
table_ids
|
||||
.into_iter()
|
||||
.map(|table_id| {
|
||||
(
|
||||
table_id,
|
||||
vec![PartitionInfo {
|
||||
id: RegionId::new(table_id, 0),
|
||||
partition: PartitionDef::new(vec![], vec![]),
|
||||
}],
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
};
|
||||
|
||||
for table_info in table_infos {
|
||||
let partitions = table_partitions
|
||||
|
||||
@@ -12,16 +12,13 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::sync::{Arc, Weak};
|
||||
|
||||
use arrow_schema::SchemaRef as ArrowSchemaRef;
|
||||
use common_catalog::consts::{INFORMATION_SCHEMA_TABLES_TABLE_ID, MITO_ENGINE};
|
||||
use common_catalog::consts::INFORMATION_SCHEMA_TABLES_TABLE_ID;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::datanode::RegionStat;
|
||||
use common_recordbatch::adapter::RecordBatchStreamAdapter;
|
||||
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
|
||||
use common_telemetry::error;
|
||||
use datafusion::execution::TaskContext;
|
||||
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
|
||||
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
|
||||
@@ -34,7 +31,7 @@ use datatypes::vectors::{
|
||||
};
|
||||
use futures::TryStreamExt;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::{RegionId, ScanRequest, TableId};
|
||||
use store_api::storage::{ScanRequest, TableId};
|
||||
use table::metadata::{TableInfo, TableType};
|
||||
|
||||
use super::TABLES;
|
||||
@@ -42,7 +39,6 @@ use crate::error::{
|
||||
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
|
||||
};
|
||||
use crate::system_schema::information_schema::{InformationTable, Predicates};
|
||||
use crate::system_schema::utils;
|
||||
use crate::CatalogManager;
|
||||
|
||||
pub const TABLE_CATALOG: &str = "table_catalog";
|
||||
@@ -238,50 +234,17 @@ impl InformationSchemaTablesBuilder {
|
||||
.context(UpgradeWeakCatalogManagerRefSnafu)?;
|
||||
let predicates = Predicates::from_scan_request(&request);
|
||||
|
||||
let information_extension = utils::information_extension(&self.catalog_manager)?;
|
||||
|
||||
// TODO(dennis): `region_stats` API is not stable in distributed cluster because of network issue etc.
|
||||
// But we don't want the statements such as `show tables` fail,
|
||||
// so using `unwrap_or_else` here instead of `?` operator.
|
||||
let region_stats = information_extension
|
||||
.region_stats()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
error!(e; "Failed to call region_stats");
|
||||
e
|
||||
})
|
||||
.unwrap_or_else(|_| vec![]);
|
||||
|
||||
for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
|
||||
let mut stream = catalog_manager.tables(&catalog_name, &schema_name, None);
|
||||
|
||||
while let Some(table) = stream.try_next().await? {
|
||||
let table_info = table.table_info();
|
||||
|
||||
// TODO(dennis): make it working for metric engine
|
||||
let table_region_stats = if table_info.meta.engine == MITO_ENGINE {
|
||||
let region_ids = table_info
|
||||
.meta
|
||||
.region_numbers
|
||||
.iter()
|
||||
.map(|n| RegionId::new(table_info.ident.table_id, *n))
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
region_stats
|
||||
.iter()
|
||||
.filter(|stat| region_ids.contains(&stat.id))
|
||||
.collect::<Vec<_>>()
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
|
||||
self.add_table(
|
||||
&predicates,
|
||||
&catalog_name,
|
||||
&schema_name,
|
||||
table_info,
|
||||
table.table_type(),
|
||||
&table_region_stats,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -297,7 +260,6 @@ impl InformationSchemaTablesBuilder {
|
||||
schema_name: &str,
|
||||
table_info: Arc<TableInfo>,
|
||||
table_type: TableType,
|
||||
region_stats: &[&RegionStat],
|
||||
) {
|
||||
let table_name = table_info.name.as_ref();
|
||||
let table_id = table_info.table_id();
|
||||
@@ -311,9 +273,7 @@ impl InformationSchemaTablesBuilder {
|
||||
|
||||
let row = [
|
||||
(TABLE_CATALOG, &Value::from(catalog_name)),
|
||||
(TABLE_ID, &Value::from(table_id)),
|
||||
(TABLE_SCHEMA, &Value::from(schema_name)),
|
||||
(ENGINE, &Value::from(engine)),
|
||||
(TABLE_NAME, &Value::from(table_name)),
|
||||
(TABLE_TYPE, &Value::from(table_type_text)),
|
||||
];
|
||||
@@ -327,39 +287,21 @@ impl InformationSchemaTablesBuilder {
|
||||
self.table_names.push(Some(table_name));
|
||||
self.table_types.push(Some(table_type_text));
|
||||
self.table_ids.push(Some(table_id));
|
||||
|
||||
let data_length = region_stats.iter().map(|stat| stat.sst_size).sum();
|
||||
let table_rows = region_stats.iter().map(|stat| stat.num_rows).sum();
|
||||
let index_length = region_stats.iter().map(|stat| stat.index_size).sum();
|
||||
|
||||
// It's not precise, but it is acceptable for long-term data storage.
|
||||
let avg_row_length = if table_rows > 0 {
|
||||
let total_data_length = data_length
|
||||
+ region_stats
|
||||
.iter()
|
||||
.map(|stat| stat.memtable_size)
|
||||
.sum::<u64>();
|
||||
|
||||
total_data_length / table_rows
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
self.data_length.push(Some(data_length));
|
||||
self.index_length.push(Some(index_length));
|
||||
self.table_rows.push(Some(table_rows));
|
||||
self.avg_row_length.push(Some(avg_row_length));
|
||||
|
||||
// TODO(sunng87): use real data for these fields
|
||||
self.data_length.push(Some(0));
|
||||
self.max_data_length.push(Some(0));
|
||||
self.checksum.push(Some(0));
|
||||
self.index_length.push(Some(0));
|
||||
self.avg_row_length.push(Some(0));
|
||||
self.max_index_length.push(Some(0));
|
||||
self.checksum.push(Some(0));
|
||||
self.table_rows.push(Some(0));
|
||||
self.data_free.push(Some(0));
|
||||
self.auto_increment.push(Some(0));
|
||||
self.row_format.push(Some("Fixed"));
|
||||
self.table_collation.push(Some("utf8_bin"));
|
||||
self.update_time.push(None);
|
||||
self.check_time.push(None);
|
||||
|
||||
// use mariadb default table version number here
|
||||
self.version.push(Some(11));
|
||||
self.table_comment.push(table_info.desc.as_deref());
|
||||
|
||||
@@ -9,7 +9,7 @@ workspace = true
|
||||
|
||||
[features]
|
||||
default = ["geo"]
|
||||
geo = ["geohash", "h3o", "s2", "wkt", "geo-types", "dep:geo"]
|
||||
geo = ["geohash", "h3o", "s2"]
|
||||
|
||||
[dependencies]
|
||||
api.workspace = true
|
||||
@@ -28,8 +28,6 @@ common-version.workspace = true
|
||||
datafusion.workspace = true
|
||||
datatypes.workspace = true
|
||||
derive_more = { version = "1", default-features = false, features = ["display"] }
|
||||
geo = { version = "0.29", optional = true }
|
||||
geo-types = { version = "0.7", optional = true }
|
||||
geohash = { version = "0.13", optional = true }
|
||||
h3o = { version = "0.6", optional = true }
|
||||
jsonb.workspace = true
|
||||
@@ -46,7 +44,6 @@ sql.workspace = true
|
||||
statrs = "0.16"
|
||||
store-api.workspace = true
|
||||
table.workspace = true
|
||||
wkt = { version = "0.11", optional = true }
|
||||
|
||||
[dev-dependencies]
|
||||
ron = "0.7"
|
||||
|
||||
@@ -17,10 +17,7 @@ pub(crate) mod encoding;
|
||||
mod geohash;
|
||||
mod h3;
|
||||
mod helpers;
|
||||
mod measure;
|
||||
mod relation;
|
||||
mod s2;
|
||||
mod wkt;
|
||||
|
||||
use crate::function_registry::FunctionRegistry;
|
||||
|
||||
@@ -51,7 +48,6 @@ impl GeoFunctions {
|
||||
registry.register(Arc::new(h3::H3CellToChildrenSize));
|
||||
registry.register(Arc::new(h3::H3CellToChildPos));
|
||||
registry.register(Arc::new(h3::H3ChildPosToCell));
|
||||
registry.register(Arc::new(h3::H3CellContains));
|
||||
|
||||
// h3 grid traversal
|
||||
registry.register(Arc::new(h3::H3GridDisk));
|
||||
@@ -59,27 +55,10 @@ impl GeoFunctions {
|
||||
registry.register(Arc::new(h3::H3GridDistance));
|
||||
registry.register(Arc::new(h3::H3GridPathCells));
|
||||
|
||||
// h3 measurement
|
||||
registry.register(Arc::new(h3::H3CellDistanceSphereKm));
|
||||
registry.register(Arc::new(h3::H3CellDistanceEuclideanDegree));
|
||||
|
||||
// s2
|
||||
registry.register(Arc::new(s2::S2LatLngToCell));
|
||||
registry.register(Arc::new(s2::S2CellLevel));
|
||||
registry.register(Arc::new(s2::S2CellToToken));
|
||||
registry.register(Arc::new(s2::S2CellParent));
|
||||
|
||||
// spatial data type
|
||||
registry.register(Arc::new(wkt::LatLngToPointWkt));
|
||||
|
||||
// spatial relation
|
||||
registry.register(Arc::new(relation::STContains));
|
||||
registry.register(Arc::new(relation::STWithin));
|
||||
registry.register(Arc::new(relation::STIntersects));
|
||||
|
||||
// spatial measure
|
||||
registry.register(Arc::new(measure::STDistance));
|
||||
registry.register(Arc::new(measure::STDistanceSphere));
|
||||
registry.register(Arc::new(measure::STArea));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,8 +23,8 @@ use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::scalars::{Scalar, ScalarVectorBuilder};
|
||||
use datatypes::value::{ListValue, Value};
|
||||
use datatypes::vectors::{
|
||||
BooleanVectorBuilder, Float64VectorBuilder, Int32VectorBuilder, ListVectorBuilder,
|
||||
MutableVector, StringVectorBuilder, UInt64VectorBuilder, UInt8VectorBuilder, VectorRef,
|
||||
BooleanVectorBuilder, Int32VectorBuilder, ListVectorBuilder, MutableVector,
|
||||
StringVectorBuilder, UInt64VectorBuilder, UInt8VectorBuilder, VectorRef,
|
||||
};
|
||||
use derive_more::Display;
|
||||
use h3o::{CellIndex, LatLng, Resolution};
|
||||
@@ -38,7 +38,6 @@ static CELL_TYPES: Lazy<Vec<ConcreteDataType>> = Lazy::new(|| {
|
||||
vec![
|
||||
ConcreteDataType::int64_datatype(),
|
||||
ConcreteDataType::uint64_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
]
|
||||
});
|
||||
|
||||
@@ -953,181 +952,6 @@ impl Function for H3GridPathCells {
|
||||
}
|
||||
}
|
||||
|
||||
/// Tests if cells contains given cells
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct H3CellContains;
|
||||
|
||||
impl Function for H3CellContains {
|
||||
fn name(&self) -> &str {
|
||||
"h3_cells_contains"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::boolean_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
let multi_cell_types = vec![
|
||||
ConcreteDataType::list_datatype(ConcreteDataType::int64_datatype()),
|
||||
ConcreteDataType::list_datatype(ConcreteDataType::uint64_datatype()),
|
||||
ConcreteDataType::list_datatype(ConcreteDataType::string_datatype()),
|
||||
ConcreteDataType::string_datatype(),
|
||||
];
|
||||
|
||||
let mut signatures = Vec::with_capacity(multi_cell_types.len() * CELL_TYPES.len());
|
||||
for multi_cell_type in &multi_cell_types {
|
||||
for cell_type in CELL_TYPES.as_slice() {
|
||||
signatures.push(TypeSignature::Exact(vec![
|
||||
multi_cell_type.clone(),
|
||||
cell_type.clone(),
|
||||
]));
|
||||
}
|
||||
}
|
||||
|
||||
Signature::one_of(signatures, Volatility::Stable)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 2);
|
||||
|
||||
let cells_vec = &columns[0];
|
||||
let cell_this_vec = &columns[1];
|
||||
|
||||
let size = cell_this_vec.len();
|
||||
let mut results = BooleanVectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let mut result = None;
|
||||
if let (cells, Some(cell_this)) = (
|
||||
cells_from_value(cells_vec.get(i))?,
|
||||
cell_from_value(cell_this_vec.get(i))?,
|
||||
) {
|
||||
result = Some(false);
|
||||
|
||||
for cell_that in cells.iter() {
|
||||
// get cell resolution, and find cell_this's parent at
|
||||
// this solution, test if cell_that equals the parent
|
||||
let resolution = cell_that.resolution();
|
||||
if let Some(cell_this_parent) = cell_this.parent(resolution) {
|
||||
if cell_this_parent == *cell_that {
|
||||
result = Some(true);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
/// Get WGS84 great circle distance of two cell centroid
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct H3CellDistanceSphereKm;
|
||||
|
||||
impl Function for H3CellDistanceSphereKm {
|
||||
fn name(&self) -> &str {
|
||||
"h3_distance_sphere_km"
|
||||
}
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::float64_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
signature_of_double_cells()
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 2);
|
||||
|
||||
let cell_this_vec = &columns[0];
|
||||
let cell_that_vec = &columns[1];
|
||||
let size = cell_this_vec.len();
|
||||
|
||||
let mut results = Float64VectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let result = match (
|
||||
cell_from_value(cell_this_vec.get(i))?,
|
||||
cell_from_value(cell_that_vec.get(i))?,
|
||||
) {
|
||||
(Some(cell_this), Some(cell_that)) => {
|
||||
let centroid_this = LatLng::from(cell_this);
|
||||
let centroid_that = LatLng::from(cell_that);
|
||||
|
||||
Some(centroid_this.distance_km(centroid_that))
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
/// Get Euclidean distance of two cell centroid
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct H3CellDistanceEuclideanDegree;
|
||||
|
||||
impl H3CellDistanceEuclideanDegree {
|
||||
fn distance(centroid_this: LatLng, centroid_that: LatLng) -> f64 {
|
||||
((centroid_this.lat() - centroid_that.lat()).powi(2)
|
||||
+ (centroid_this.lng() - centroid_that.lng()).powi(2))
|
||||
.sqrt()
|
||||
}
|
||||
}
|
||||
|
||||
impl Function for H3CellDistanceEuclideanDegree {
|
||||
fn name(&self) -> &str {
|
||||
"h3_distance_degree"
|
||||
}
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::float64_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
signature_of_double_cells()
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 2);
|
||||
|
||||
let cell_this_vec = &columns[0];
|
||||
let cell_that_vec = &columns[1];
|
||||
let size = cell_this_vec.len();
|
||||
|
||||
let mut results = Float64VectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let result = match (
|
||||
cell_from_value(cell_this_vec.get(i))?,
|
||||
cell_from_value(cell_that_vec.get(i))?,
|
||||
) {
|
||||
(Some(cell_this), Some(cell_that)) => {
|
||||
let centroid_this = LatLng::from(cell_this);
|
||||
let centroid_that = LatLng::from(cell_that);
|
||||
|
||||
let dist = Self::distance(centroid_this, centroid_that);
|
||||
Some(dist)
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
fn value_to_resolution(v: Value) -> Result<Resolution> {
|
||||
let r = match v {
|
||||
Value::Int8(v) => v as u8,
|
||||
@@ -1249,126 +1073,7 @@ fn cell_from_value(v: Value) -> Result<Option<CellIndex>> {
|
||||
})
|
||||
.context(error::ExecuteSnafu)?,
|
||||
),
|
||||
Value::String(s) => Some(
|
||||
CellIndex::from_str(s.as_utf8())
|
||||
.map_err(|e| {
|
||||
BoxedError::new(PlainError::new(
|
||||
format!("H3 error: {}", e),
|
||||
StatusCode::EngineExecuteQuery,
|
||||
))
|
||||
})
|
||||
.context(error::ExecuteSnafu)?,
|
||||
),
|
||||
_ => None,
|
||||
};
|
||||
Ok(cell)
|
||||
}
|
||||
|
||||
/// extract cell array from all possible types including:
|
||||
/// - int64 list
|
||||
/// - uint64 list
|
||||
/// - string list
|
||||
/// - comma-separated string
|
||||
fn cells_from_value(v: Value) -> Result<Vec<CellIndex>> {
|
||||
match v {
|
||||
Value::List(list) => match list.datatype() {
|
||||
ConcreteDataType::Int64(_) => list
|
||||
.items()
|
||||
.iter()
|
||||
.map(|v| {
|
||||
if let Value::Int64(v) = v {
|
||||
CellIndex::try_from(*v as u64)
|
||||
.map_err(|e| {
|
||||
BoxedError::new(PlainError::new(
|
||||
format!("H3 error: {}", e),
|
||||
StatusCode::EngineExecuteQuery,
|
||||
))
|
||||
})
|
||||
.context(error::ExecuteSnafu)
|
||||
} else {
|
||||
Err(BoxedError::new(PlainError::new(
|
||||
"Invalid data type in array".to_string(),
|
||||
StatusCode::EngineExecuteQuery,
|
||||
)))
|
||||
.context(error::ExecuteSnafu)
|
||||
}
|
||||
})
|
||||
.collect::<Result<Vec<CellIndex>>>(),
|
||||
ConcreteDataType::UInt64(_) => list
|
||||
.items()
|
||||
.iter()
|
||||
.map(|v| {
|
||||
if let Value::UInt64(v) = v {
|
||||
CellIndex::try_from(*v)
|
||||
.map_err(|e| {
|
||||
BoxedError::new(PlainError::new(
|
||||
format!("H3 error: {}", e),
|
||||
StatusCode::EngineExecuteQuery,
|
||||
))
|
||||
})
|
||||
.context(error::ExecuteSnafu)
|
||||
} else {
|
||||
Err(BoxedError::new(PlainError::new(
|
||||
"Invalid data type in array".to_string(),
|
||||
StatusCode::EngineExecuteQuery,
|
||||
)))
|
||||
.context(error::ExecuteSnafu)
|
||||
}
|
||||
})
|
||||
.collect::<Result<Vec<CellIndex>>>(),
|
||||
ConcreteDataType::String(_) => list
|
||||
.items()
|
||||
.iter()
|
||||
.map(|v| {
|
||||
if let Value::String(v) = v {
|
||||
CellIndex::from_str(v.as_utf8().trim())
|
||||
.map_err(|e| {
|
||||
BoxedError::new(PlainError::new(
|
||||
format!("H3 error: {}", e),
|
||||
StatusCode::EngineExecuteQuery,
|
||||
))
|
||||
})
|
||||
.context(error::ExecuteSnafu)
|
||||
} else {
|
||||
Err(BoxedError::new(PlainError::new(
|
||||
"Invalid data type in array".to_string(),
|
||||
StatusCode::EngineExecuteQuery,
|
||||
)))
|
||||
.context(error::ExecuteSnafu)
|
||||
}
|
||||
})
|
||||
.collect::<Result<Vec<CellIndex>>>(),
|
||||
_ => Ok(vec![]),
|
||||
},
|
||||
Value::String(csv) => {
|
||||
let str_seq = csv.as_utf8().split(',');
|
||||
str_seq
|
||||
.map(|v| {
|
||||
CellIndex::from_str(v.trim())
|
||||
.map_err(|e| {
|
||||
BoxedError::new(PlainError::new(
|
||||
format!("H3 error: {}", e),
|
||||
StatusCode::EngineExecuteQuery,
|
||||
))
|
||||
})
|
||||
.context(error::ExecuteSnafu)
|
||||
})
|
||||
.collect::<Result<Vec<CellIndex>>>()
|
||||
}
|
||||
_ => Ok(vec![]),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_h3_euclidean_distance() {
|
||||
let point_this = LatLng::new(42.3521, -72.1235).expect("incorrect lat lng");
|
||||
let point_that = LatLng::new(42.45, -72.1260).expect("incorrect lat lng");
|
||||
|
||||
let dist = H3CellDistanceEuclideanDegree::distance(point_this, point_that);
|
||||
assert_eq!(dist, 0.09793191512474639);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,195 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_error::ext::{BoxedError, PlainError};
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_query::error::{self, Result};
|
||||
use common_query::prelude::{Signature, TypeSignature};
|
||||
use datafusion::logical_expr::Volatility;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::vectors::{Float64VectorBuilder, MutableVector, VectorRef};
|
||||
use derive_more::Display;
|
||||
use geo::algorithm::line_measures::metric_spaces::Euclidean;
|
||||
use geo::{Area, Distance, Haversine};
|
||||
use geo_types::Geometry;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use super::helpers::{ensure_columns_len, ensure_columns_n};
|
||||
use super::wkt::parse_wkt;
|
||||
use crate::function::{Function, FunctionContext};
|
||||
|
||||
/// Return WGS84(SRID: 4326) euclidean distance between two geometry object, in degree
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct STDistance;
|
||||
|
||||
impl Function for STDistance {
|
||||
fn name(&self) -> &str {
|
||||
"st_distance"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::float64_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::new(
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::string_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
]),
|
||||
Volatility::Stable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 2);
|
||||
|
||||
let wkt_this_vec = &columns[0];
|
||||
let wkt_that_vec = &columns[1];
|
||||
|
||||
let size = wkt_this_vec.len();
|
||||
let mut results = Float64VectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let wkt_this = wkt_this_vec.get(i).as_string();
|
||||
let wkt_that = wkt_that_vec.get(i).as_string();
|
||||
|
||||
let result = match (wkt_this, wkt_that) {
|
||||
(Some(wkt_this), Some(wkt_that)) => {
|
||||
let geom_this = parse_wkt(&wkt_this)?;
|
||||
let geom_that = parse_wkt(&wkt_that)?;
|
||||
|
||||
Some(Euclidean::distance(&geom_this, &geom_that))
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
/// Return great circle distance between two geometry object, in meters
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct STDistanceSphere;
|
||||
|
||||
impl Function for STDistanceSphere {
|
||||
fn name(&self) -> &str {
|
||||
"st_distance_sphere_m"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::float64_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::new(
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::string_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
]),
|
||||
Volatility::Stable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 2);
|
||||
|
||||
let wkt_this_vec = &columns[0];
|
||||
let wkt_that_vec = &columns[1];
|
||||
|
||||
let size = wkt_this_vec.len();
|
||||
let mut results = Float64VectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let wkt_this = wkt_this_vec.get(i).as_string();
|
||||
let wkt_that = wkt_that_vec.get(i).as_string();
|
||||
|
||||
let result = match (wkt_this, wkt_that) {
|
||||
(Some(wkt_this), Some(wkt_that)) => {
|
||||
let geom_this = parse_wkt(&wkt_this)?;
|
||||
let geom_that = parse_wkt(&wkt_that)?;
|
||||
|
||||
match (geom_this, geom_that) {
|
||||
(Geometry::Point(this), Geometry::Point(that)) => {
|
||||
Some(Haversine::distance(this, that))
|
||||
}
|
||||
_ => {
|
||||
Err(BoxedError::new(PlainError::new(
|
||||
"Great circle distance between non-point objects are not supported for now.".to_string(),
|
||||
StatusCode::Unsupported,
|
||||
))).context(error::ExecuteSnafu)?
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
/// Return area of given geometry object
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct STArea;
|
||||
|
||||
impl Function for STArea {
|
||||
fn name(&self) -> &str {
|
||||
"st_area"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::float64_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::new(
|
||||
TypeSignature::Exact(vec![ConcreteDataType::string_datatype()]),
|
||||
Volatility::Stable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 1);
|
||||
|
||||
let wkt_vec = &columns[0];
|
||||
|
||||
let size = wkt_vec.len();
|
||||
let mut results = Float64VectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let wkt = wkt_vec.get(i).as_string();
|
||||
|
||||
let result = if let Some(wkt) = wkt {
|
||||
let geom = parse_wkt(&wkt)?;
|
||||
Some(geom.unsigned_area())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
@@ -1,190 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_query::error::Result;
|
||||
use common_query::prelude::{Signature, TypeSignature};
|
||||
use datafusion::logical_expr::Volatility;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::vectors::{BooleanVectorBuilder, MutableVector, VectorRef};
|
||||
use derive_more::Display;
|
||||
use geo::algorithm::contains::Contains;
|
||||
use geo::algorithm::intersects::Intersects;
|
||||
use geo::algorithm::within::Within;
|
||||
|
||||
use super::helpers::{ensure_columns_len, ensure_columns_n};
|
||||
use super::wkt::parse_wkt;
|
||||
use crate::function::{Function, FunctionContext};
|
||||
|
||||
/// Test if spatial relationship: contains
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct STContains;
|
||||
|
||||
impl Function for STContains {
|
||||
fn name(&self) -> &str {
|
||||
"st_contains"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::boolean_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::new(
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::string_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
]),
|
||||
Volatility::Stable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 2);
|
||||
|
||||
let wkt_this_vec = &columns[0];
|
||||
let wkt_that_vec = &columns[1];
|
||||
|
||||
let size = wkt_this_vec.len();
|
||||
let mut results = BooleanVectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let wkt_this = wkt_this_vec.get(i).as_string();
|
||||
let wkt_that = wkt_that_vec.get(i).as_string();
|
||||
|
||||
let result = match (wkt_this, wkt_that) {
|
||||
(Some(wkt_this), Some(wkt_that)) => {
|
||||
let geom_this = parse_wkt(&wkt_this)?;
|
||||
let geom_that = parse_wkt(&wkt_that)?;
|
||||
|
||||
Some(geom_this.contains(&geom_that))
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
/// Test if spatial relationship: within
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct STWithin;
|
||||
|
||||
impl Function for STWithin {
|
||||
fn name(&self) -> &str {
|
||||
"st_within"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::boolean_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::new(
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::string_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
]),
|
||||
Volatility::Stable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 2);
|
||||
|
||||
let wkt_this_vec = &columns[0];
|
||||
let wkt_that_vec = &columns[1];
|
||||
|
||||
let size = wkt_this_vec.len();
|
||||
let mut results = BooleanVectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let wkt_this = wkt_this_vec.get(i).as_string();
|
||||
let wkt_that = wkt_that_vec.get(i).as_string();
|
||||
|
||||
let result = match (wkt_this, wkt_that) {
|
||||
(Some(wkt_this), Some(wkt_that)) => {
|
||||
let geom_this = parse_wkt(&wkt_this)?;
|
||||
let geom_that = parse_wkt(&wkt_that)?;
|
||||
|
||||
Some(geom_this.is_within(&geom_that))
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
/// Test if spatial relationship: within
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct STIntersects;
|
||||
|
||||
impl Function for STIntersects {
|
||||
fn name(&self) -> &str {
|
||||
"st_intersects"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::boolean_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::new(
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::string_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
]),
|
||||
Volatility::Stable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 2);
|
||||
|
||||
let wkt_this_vec = &columns[0];
|
||||
let wkt_that_vec = &columns[1];
|
||||
|
||||
let size = wkt_this_vec.len();
|
||||
let mut results = BooleanVectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let wkt_this = wkt_this_vec.get(i).as_string();
|
||||
let wkt_that = wkt_that_vec.get(i).as_string();
|
||||
|
||||
let result = match (wkt_this, wkt_that) {
|
||||
(Some(wkt_this), Some(wkt_that)) => {
|
||||
let geom_this = parse_wkt(&wkt_this)?;
|
||||
let geom_that = parse_wkt(&wkt_that)?;
|
||||
|
||||
Some(geom_this.intersects(&geom_that))
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
@@ -1,100 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_error::ext::{BoxedError, PlainError};
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_query::error::{self, Result};
|
||||
use common_query::prelude::{Signature, TypeSignature};
|
||||
use datafusion::logical_expr::Volatility;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::vectors::{MutableVector, StringVectorBuilder, VectorRef};
|
||||
use derive_more::Display;
|
||||
use geo_types::{Geometry, Point};
|
||||
use once_cell::sync::Lazy;
|
||||
use snafu::ResultExt;
|
||||
use wkt::{ToWkt, TryFromWkt};
|
||||
|
||||
use super::helpers::{ensure_columns_len, ensure_columns_n};
|
||||
use crate::function::{Function, FunctionContext};
|
||||
|
||||
static COORDINATE_TYPES: Lazy<Vec<ConcreteDataType>> = Lazy::new(|| {
|
||||
vec![
|
||||
ConcreteDataType::float32_datatype(),
|
||||
ConcreteDataType::float64_datatype(),
|
||||
]
|
||||
});
|
||||
|
||||
/// Return WGS84(SRID: 4326) euclidean distance between two geometry object, in degree
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct LatLngToPointWkt;
|
||||
|
||||
impl Function for LatLngToPointWkt {
|
||||
fn name(&self) -> &str {
|
||||
"wkt_point_from_latlng"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::string_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
let mut signatures = Vec::new();
|
||||
for coord_type in COORDINATE_TYPES.as_slice() {
|
||||
signatures.push(TypeSignature::Exact(vec![
|
||||
// latitude
|
||||
coord_type.clone(),
|
||||
// longitude
|
||||
coord_type.clone(),
|
||||
]));
|
||||
}
|
||||
Signature::one_of(signatures, Volatility::Stable)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 2);
|
||||
|
||||
let lat_vec = &columns[0];
|
||||
let lng_vec = &columns[1];
|
||||
|
||||
let size = lat_vec.len();
|
||||
let mut results = StringVectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let lat = lat_vec.get(i).as_f64_lossy();
|
||||
let lng = lng_vec.get(i).as_f64_lossy();
|
||||
|
||||
let result = match (lat, lng) {
|
||||
(Some(lat), Some(lng)) => Some(Point::new(lng, lat).wkt_string()),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result.as_deref());
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn parse_wkt(s: &str) -> Result<Geometry> {
|
||||
Geometry::try_from_wkt_str(s)
|
||||
.map_err(|e| {
|
||||
BoxedError::new(PlainError::new(
|
||||
format!("Fail to parse WKT: {}", e),
|
||||
StatusCode::EngineExecuteQuery,
|
||||
))
|
||||
})
|
||||
.context(error::ExecuteSnafu)
|
||||
}
|
||||
@@ -15,7 +15,7 @@
|
||||
use std::fmt::{self, Display};
|
||||
|
||||
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
|
||||
use common_query::prelude::{Signature, TypeSignature};
|
||||
use common_query::prelude::Signature;
|
||||
use datafusion::logical_expr::Volatility;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::prelude::VectorRef;
|
||||
@@ -41,24 +41,10 @@ impl Function for JsonPathExistsFunction {
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::one_of(
|
||||
Signature::exact(
|
||||
vec![
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::json_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
]),
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::null_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
]),
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::json_datatype(),
|
||||
ConcreteDataType::null_datatype(),
|
||||
]),
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::null_datatype(),
|
||||
ConcreteDataType::null_datatype(),
|
||||
]),
|
||||
ConcreteDataType::json_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
],
|
||||
Volatility::Immutable,
|
||||
)
|
||||
@@ -78,26 +64,25 @@ impl Function for JsonPathExistsFunction {
|
||||
let paths = &columns[1];
|
||||
|
||||
let size = jsons.len();
|
||||
let datatype = jsons.data_type();
|
||||
let mut results = BooleanVectorBuilder::with_capacity(size);
|
||||
|
||||
match (jsons.data_type(), paths.data_type()) {
|
||||
(ConcreteDataType::Binary(_), ConcreteDataType::String(_)) => {
|
||||
match datatype {
|
||||
// JSON data type uses binary vector
|
||||
ConcreteDataType::Binary(_) => {
|
||||
for i in 0..size {
|
||||
let result = match (jsons.get_ref(i).as_binary(), paths.get_ref(i).as_string())
|
||||
{
|
||||
let json = jsons.get_ref(i);
|
||||
let path = paths.get_ref(i);
|
||||
|
||||
let json = json.as_binary();
|
||||
let path = path.as_string();
|
||||
let result = match (json, path) {
|
||||
(Ok(Some(json)), Ok(Some(path))) => {
|
||||
// Get `JsonPath`.
|
||||
let json_path = match jsonb::jsonpath::parse_json_path(path.as_bytes())
|
||||
{
|
||||
Ok(json_path) => json_path,
|
||||
Err(_) => {
|
||||
return InvalidFuncArgsSnafu {
|
||||
err_msg: format!("Illegal json path: {:?}", path),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
};
|
||||
jsonb::path_exists(json, json_path).ok()
|
||||
let json_path = jsonb::jsonpath::parse_json_path(path.as_bytes());
|
||||
match json_path {
|
||||
Ok(json_path) => jsonb::path_exists(json, json_path).ok(),
|
||||
Err(_) => None,
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
@@ -105,12 +90,6 @@ impl Function for JsonPathExistsFunction {
|
||||
results.push(result);
|
||||
}
|
||||
}
|
||||
|
||||
// Any null args existence causes the result to be NULL.
|
||||
(ConcreteDataType::Null(_), ConcreteDataType::String(_)) => results.push_nulls(size),
|
||||
(ConcreteDataType::Binary(_), ConcreteDataType::Null(_)) => results.push_nulls(size),
|
||||
(ConcreteDataType::Null(_), ConcreteDataType::Null(_)) => results.push_nulls(size),
|
||||
|
||||
_ => {
|
||||
return UnsupportedInputDataTypeSnafu {
|
||||
function: NAME,
|
||||
@@ -135,8 +114,8 @@ mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::prelude::TypeSignature;
|
||||
use datatypes::prelude::ScalarVector;
|
||||
use datatypes::vectors::{BinaryVector, NullVector, StringVector};
|
||||
use datatypes::scalars::ScalarVector;
|
||||
use datatypes::vectors::{BinaryVector, StringVector};
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -154,27 +133,9 @@ mod tests {
|
||||
|
||||
assert!(matches!(json_path_exists.signature(),
|
||||
Signature {
|
||||
type_signature: TypeSignature::OneOf(valid_types),
|
||||
type_signature: TypeSignature::Exact(valid_types),
|
||||
volatility: Volatility::Immutable
|
||||
} if valid_types ==
|
||||
vec![
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::json_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
]),
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::null_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
]),
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::json_datatype(),
|
||||
ConcreteDataType::null_datatype(),
|
||||
]),
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::null_datatype(),
|
||||
ConcreteDataType::null_datatype(),
|
||||
]),
|
||||
],
|
||||
} if valid_types == vec![ConcreteDataType::json_datatype(), ConcreteDataType::string_datatype()]
|
||||
));
|
||||
|
||||
let json_strings = [
|
||||
@@ -182,15 +143,9 @@ mod tests {
|
||||
r#"{"a": 4, "b": {"c": 6}, "c": 6}"#,
|
||||
r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
|
||||
r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
|
||||
r#"[1, 2, 3]"#,
|
||||
r#"null"#,
|
||||
r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
|
||||
r#"null"#,
|
||||
];
|
||||
let paths = vec![
|
||||
"$.a.b.c", "$.b", "$.c.a", ".d", "$[0]", "$.a", "null", "null",
|
||||
];
|
||||
let expected = [false, true, true, false, true, false, false, false];
|
||||
let paths = vec!["$.a.b.c", "$.b", "$.c.a", ".d"];
|
||||
let results = [false, true, true, false];
|
||||
|
||||
let jsonbs = json_strings
|
||||
.iter()
|
||||
@@ -207,44 +162,11 @@ mod tests {
|
||||
.eval(FunctionContext::default(), &args)
|
||||
.unwrap();
|
||||
|
||||
// Test for non-nulls.
|
||||
assert_eq!(8, vector.len());
|
||||
for (i, real) in expected.iter().enumerate() {
|
||||
assert_eq!(4, vector.len());
|
||||
for (i, gt) in results.iter().enumerate() {
|
||||
let result = vector.get_ref(i);
|
||||
assert!(!result.is_null());
|
||||
let val = result.as_boolean().unwrap().unwrap();
|
||||
assert_eq!(val, *real);
|
||||
let result = result.as_boolean().unwrap().unwrap();
|
||||
assert_eq!(*gt, result);
|
||||
}
|
||||
|
||||
// Test for path error.
|
||||
let json_bytes = jsonb::parse_value("{}".as_bytes()).unwrap().to_vec();
|
||||
let json = BinaryVector::from_vec(vec![json_bytes]);
|
||||
let illegal_path = StringVector::from_vec(vec!["$..a"]);
|
||||
|
||||
let args: Vec<VectorRef> = vec![Arc::new(json), Arc::new(illegal_path)];
|
||||
let err = json_path_exists.eval(FunctionContext::default(), &args);
|
||||
assert!(err.is_err());
|
||||
|
||||
// Test for nulls.
|
||||
let json_bytes = jsonb::parse_value("{}".as_bytes()).unwrap().to_vec();
|
||||
let json = BinaryVector::from_vec(vec![json_bytes]);
|
||||
let null_json = NullVector::new(1);
|
||||
|
||||
let path = StringVector::from_vec(vec!["$.a"]);
|
||||
let null_path = NullVector::new(1);
|
||||
|
||||
let args: Vec<VectorRef> = vec![Arc::new(null_json), Arc::new(path)];
|
||||
let result1 = json_path_exists
|
||||
.eval(FunctionContext::default(), &args)
|
||||
.unwrap();
|
||||
let args: Vec<VectorRef> = vec![Arc::new(json), Arc::new(null_path)];
|
||||
let result2 = json_path_exists
|
||||
.eval(FunctionContext::default(), &args)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(result1.len(), 1);
|
||||
assert!(result1.get_ref(0).is_null());
|
||||
assert_eq!(result2.len(), 1);
|
||||
assert!(result2.get_ref(0).is_null());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,11 +14,10 @@
|
||||
|
||||
use std::collections::HashSet;
|
||||
|
||||
use api::v1::column_data_type_extension::TypeExt;
|
||||
use api::v1::column_def::contains_fulltext;
|
||||
use api::v1::{
|
||||
AddColumn, AddColumns, Column, ColumnDataType, ColumnDataTypeExtension, ColumnDef,
|
||||
ColumnOptions, ColumnSchema, CreateTableExpr, JsonTypeExtension, SemanticType,
|
||||
ColumnOptions, ColumnSchema, CreateTableExpr, SemanticType,
|
||||
};
|
||||
use datatypes::schema::Schema;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
@@ -26,9 +25,8 @@ use table::metadata::TableId;
|
||||
use table::table_reference::TableReference;
|
||||
|
||||
use crate::error::{
|
||||
self, DuplicatedColumnNameSnafu, DuplicatedTimestampColumnSnafu,
|
||||
InvalidFulltextColumnTypeSnafu, MissingTimestampColumnSnafu, Result,
|
||||
UnknownColumnDataTypeSnafu,
|
||||
DuplicatedColumnNameSnafu, DuplicatedTimestampColumnSnafu, InvalidFulltextColumnTypeSnafu,
|
||||
MissingTimestampColumnSnafu, Result, UnknownColumnDataTypeSnafu,
|
||||
};
|
||||
pub struct ColumnExpr<'a> {
|
||||
pub column_name: &'a str,
|
||||
@@ -74,28 +72,6 @@ impl<'a> From<&'a ColumnSchema> for ColumnExpr<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
fn infer_column_datatype(
|
||||
datatype: i32,
|
||||
datatype_extension: &Option<ColumnDataTypeExtension>,
|
||||
) -> Result<ColumnDataType> {
|
||||
let column_type =
|
||||
ColumnDataType::try_from(datatype).context(UnknownColumnDataTypeSnafu { datatype })?;
|
||||
|
||||
if matches!(&column_type, ColumnDataType::Binary) {
|
||||
if let Some(ext) = datatype_extension {
|
||||
let type_ext = ext
|
||||
.type_ext
|
||||
.as_ref()
|
||||
.context(error::MissingFieldSnafu { field: "type_ext" })?;
|
||||
if *type_ext == TypeExt::JsonType(JsonTypeExtension::JsonBinary.into()) {
|
||||
return Ok(ColumnDataType::Json);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(column_type)
|
||||
}
|
||||
|
||||
pub fn build_create_table_expr(
|
||||
table_id: Option<TableId>,
|
||||
table_name: &TableReference<'_>,
|
||||
@@ -148,7 +124,8 @@ pub fn build_create_table_expr(
|
||||
_ => {}
|
||||
}
|
||||
|
||||
let column_type = infer_column_datatype(datatype, datatype_extension)?;
|
||||
let column_type =
|
||||
ColumnDataType::try_from(datatype).context(UnknownColumnDataTypeSnafu { datatype })?;
|
||||
|
||||
ensure!(
|
||||
!contains_fulltext(options) || column_type == ColumnDataType::String,
|
||||
|
||||
@@ -91,7 +91,6 @@ pub mod catalog_name;
|
||||
pub mod datanode_table;
|
||||
pub mod flow;
|
||||
pub mod node_address;
|
||||
mod schema_metadata_manager;
|
||||
pub mod schema_name;
|
||||
pub mod table_info;
|
||||
pub mod table_name;
|
||||
@@ -117,7 +116,6 @@ use flow::flow_route::FlowRouteValue;
|
||||
use flow::table_flow::TableFlowValue;
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
pub use schema_metadata_manager::{SchemaMetadataManager, SchemaMetadataManagerRef};
|
||||
use serde::de::DeserializeOwned;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
|
||||
@@ -1,122 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Schema-level metadata manager.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use snafu::OptionExt;
|
||||
use store_api::storage::TableId;
|
||||
|
||||
use crate::error::TableInfoNotFoundSnafu;
|
||||
use crate::key::schema_name::{SchemaManager, SchemaNameKey};
|
||||
use crate::key::table_info::{TableInfoManager, TableInfoManagerRef};
|
||||
use crate::kv_backend::KvBackendRef;
|
||||
use crate::{error, SchemaOptions};
|
||||
|
||||
pub type SchemaMetadataManagerRef = Arc<SchemaMetadataManager>;
|
||||
|
||||
pub struct SchemaMetadataManager {
|
||||
table_info_manager: TableInfoManagerRef,
|
||||
schema_manager: SchemaManager,
|
||||
#[cfg(any(test, feature = "testing"))]
|
||||
kv_backend: KvBackendRef,
|
||||
}
|
||||
|
||||
impl SchemaMetadataManager {
|
||||
/// Creates a new database meta
|
||||
#[cfg(not(any(test, feature = "testing")))]
|
||||
pub fn new(kv_backend: KvBackendRef) -> Self {
|
||||
let table_info_manager = Arc::new(TableInfoManager::new(kv_backend.clone()));
|
||||
let schema_manager = SchemaManager::new(kv_backend);
|
||||
Self {
|
||||
table_info_manager,
|
||||
schema_manager,
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new database meta
|
||||
#[cfg(any(test, feature = "testing"))]
|
||||
pub fn new(kv_backend: KvBackendRef) -> Self {
|
||||
let table_info_manager = Arc::new(TableInfoManager::new(kv_backend.clone()));
|
||||
let schema_manager = SchemaManager::new(kv_backend.clone());
|
||||
Self {
|
||||
table_info_manager,
|
||||
schema_manager,
|
||||
kv_backend,
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets schema options by table id.
|
||||
pub async fn get_schema_options_by_table_id(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
) -> error::Result<Option<SchemaOptions>> {
|
||||
let table_info = self
|
||||
.table_info_manager
|
||||
.get(table_id)
|
||||
.await?
|
||||
.with_context(|| TableInfoNotFoundSnafu {
|
||||
table: format!("table id: {}", table_id),
|
||||
})?;
|
||||
|
||||
let key = SchemaNameKey::new(
|
||||
&table_info.table_info.catalog_name,
|
||||
&table_info.table_info.schema_name,
|
||||
);
|
||||
self.schema_manager.get(key).await
|
||||
}
|
||||
|
||||
#[cfg(any(test, feature = "testing"))]
|
||||
pub async fn register_region_table_info(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
table_name: &str,
|
||||
schema_name: &str,
|
||||
catalog_name: &str,
|
||||
schema_value: Option<crate::key::schema_name::SchemaNameValue>,
|
||||
) {
|
||||
use table::metadata::{RawTableInfo, TableType};
|
||||
let value = crate::key::table_info::TableInfoValue::new(RawTableInfo {
|
||||
ident: Default::default(),
|
||||
name: table_name.to_string(),
|
||||
desc: None,
|
||||
catalog_name: catalog_name.to_string(),
|
||||
schema_name: schema_name.to_string(),
|
||||
meta: Default::default(),
|
||||
table_type: TableType::Base,
|
||||
});
|
||||
let (txn, _) = self
|
||||
.table_info_manager
|
||||
.build_create_txn(table_id, &value)
|
||||
.unwrap();
|
||||
let resp = self.kv_backend.txn(txn).await.unwrap();
|
||||
assert!(resp.succeeded, "Failed to create table metadata");
|
||||
let key = SchemaNameKey {
|
||||
catalog: catalog_name,
|
||||
schema: schema_name,
|
||||
};
|
||||
self.schema_manager
|
||||
.create(key, schema_value, false)
|
||||
.await
|
||||
.expect("Failed to create schema metadata");
|
||||
common_telemetry::info!(
|
||||
"Register table: {}, id: {}, schema: {}, catalog: {}",
|
||||
table_name,
|
||||
table_id,
|
||||
schema_name,
|
||||
catalog_name
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -134,7 +134,6 @@ impl TableInfoValue {
|
||||
}
|
||||
|
||||
pub type TableInfoManagerRef = Arc<TableInfoManager>;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct TableInfoManager {
|
||||
kv_backend: KvBackendRef,
|
||||
|
||||
@@ -54,7 +54,4 @@ pub type DatanodeId = u64;
|
||||
// The id of the flownode.
|
||||
pub type FlownodeId = u64;
|
||||
|
||||
/// Schema options.
|
||||
pub type SchemaOptions = key::schema_name::SchemaNameValue;
|
||||
|
||||
pub use instruction::RegionIdent;
|
||||
|
||||
@@ -192,6 +192,7 @@ pub fn init_global_logging(
|
||||
if opts.log_format == LogFormat::Json {
|
||||
Some(
|
||||
Layer::new()
|
||||
.with_thread_ids(true)
|
||||
.json()
|
||||
.with_writer(writer)
|
||||
.with_ansi(atty::is(atty::Stream::Stdout))
|
||||
@@ -200,6 +201,7 @@ pub fn init_global_logging(
|
||||
} else {
|
||||
Some(
|
||||
Layer::new()
|
||||
.with_thread_ids(true)
|
||||
.with_writer(writer)
|
||||
.with_ansi(atty::is(atty::Stream::Stdout))
|
||||
.boxed(),
|
||||
@@ -228,13 +230,20 @@ pub fn init_global_logging(
|
||||
if opts.log_format == LogFormat::Json {
|
||||
Some(
|
||||
Layer::new()
|
||||
.with_thread_ids(true)
|
||||
.json()
|
||||
.with_writer(writer)
|
||||
.with_ansi(false)
|
||||
.boxed(),
|
||||
)
|
||||
} else {
|
||||
Some(Layer::new().with_writer(writer).with_ansi(false).boxed())
|
||||
Some(
|
||||
Layer::new()
|
||||
.with_thread_ids(true)
|
||||
.with_writer(writer)
|
||||
.with_ansi(false)
|
||||
.boxed(),
|
||||
)
|
||||
}
|
||||
} else {
|
||||
None
|
||||
@@ -260,6 +269,7 @@ pub fn init_global_logging(
|
||||
Some(
|
||||
Layer::new()
|
||||
.json()
|
||||
.with_thread_ids(true)
|
||||
.with_writer(writer)
|
||||
.with_ansi(false)
|
||||
.with_filter(filter::LevelFilter::ERROR)
|
||||
@@ -268,6 +278,7 @@ pub fn init_global_logging(
|
||||
} else {
|
||||
Some(
|
||||
Layer::new()
|
||||
.with_thread_ids(true)
|
||||
.with_writer(writer)
|
||||
.with_ansi(false)
|
||||
.with_filter(filter::LevelFilter::ERROR)
|
||||
|
||||
@@ -427,8 +427,7 @@ mod test {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let mut region_server = mock_region_server();
|
||||
let mut engine_env = TestEnv::with_prefix("region-alive-keeper");
|
||||
let engine = engine_env.create_engine(MitoConfig::default()).await;
|
||||
let engine = Arc::new(engine);
|
||||
let engine = Arc::new(engine_env.create_engine(MitoConfig::default()).await);
|
||||
region_server.register_engine(engine.clone());
|
||||
|
||||
let alive_keeper = Arc::new(RegionAliveKeeper::new(region_server.clone(), 100));
|
||||
|
||||
@@ -30,7 +30,7 @@ use servers::heartbeat_options::HeartbeatOptions;
|
||||
use servers::http::HttpOptions;
|
||||
use servers::Mode;
|
||||
|
||||
pub const DEFAULT_OBJECT_STORE_CACHE_SIZE: ReadableSize = ReadableSize::gb(1);
|
||||
pub const DEFAULT_OBJECT_STORE_CACHE_SIZE: ReadableSize = ReadableSize::mb(256);
|
||||
|
||||
/// Default data home in file storage
|
||||
const DEFAULT_DATA_HOME: &str = "/tmp/greptimedb";
|
||||
|
||||
@@ -23,7 +23,6 @@ use common_base::Plugins;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_greptimedb_telemetry::GreptimeDBTelemetryTask;
|
||||
use common_meta::key::datanode_table::{DatanodeTableManager, DatanodeTableValue};
|
||||
use common_meta::key::{SchemaMetadataManager, SchemaMetadataManagerRef};
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
use common_meta::wal_options_allocator::prepare_wal_options;
|
||||
pub use common_procedure::options::ProcedureConfig;
|
||||
@@ -208,10 +207,7 @@ impl DatanodeBuilder {
|
||||
(Box::new(NoopRegionServerEventListener) as _, None)
|
||||
};
|
||||
|
||||
let schema_metadata_manager = Arc::new(SchemaMetadataManager::new(kv_backend.clone()));
|
||||
let region_server = self
|
||||
.new_region_server(schema_metadata_manager, region_event_listener)
|
||||
.await?;
|
||||
let region_server = self.new_region_server(region_event_listener).await?;
|
||||
|
||||
let datanode_table_manager = DatanodeTableManager::new(kv_backend.clone());
|
||||
let table_values = datanode_table_manager
|
||||
@@ -316,7 +312,6 @@ impl DatanodeBuilder {
|
||||
|
||||
async fn new_region_server(
|
||||
&self,
|
||||
schema_metadata_manager: SchemaMetadataManagerRef,
|
||||
event_listener: RegionServerEventListenerRef,
|
||||
) -> Result<RegionServer> {
|
||||
let opts: &DatanodeOptions = &self.opts;
|
||||
@@ -345,13 +340,8 @@ impl DatanodeBuilder {
|
||||
);
|
||||
|
||||
let object_store_manager = Self::build_object_store_manager(&opts.storage).await?;
|
||||
let engines = Self::build_store_engines(
|
||||
opts,
|
||||
object_store_manager,
|
||||
schema_metadata_manager,
|
||||
self.plugins.clone(),
|
||||
)
|
||||
.await?;
|
||||
let engines =
|
||||
Self::build_store_engines(opts, object_store_manager, self.plugins.clone()).await?;
|
||||
for engine in engines {
|
||||
region_server.register_engine(engine);
|
||||
}
|
||||
@@ -365,7 +355,6 @@ impl DatanodeBuilder {
|
||||
async fn build_store_engines(
|
||||
opts: &DatanodeOptions,
|
||||
object_store_manager: ObjectStoreManagerRef,
|
||||
schema_metadata_manager: SchemaMetadataManagerRef,
|
||||
plugins: Plugins,
|
||||
) -> Result<Vec<RegionEngineRef>> {
|
||||
let mut engines = vec![];
|
||||
@@ -376,7 +365,6 @@ impl DatanodeBuilder {
|
||||
opts,
|
||||
object_store_manager.clone(),
|
||||
config.clone(),
|
||||
schema_metadata_manager.clone(),
|
||||
plugins.clone(),
|
||||
)
|
||||
.await?;
|
||||
@@ -402,7 +390,6 @@ impl DatanodeBuilder {
|
||||
opts: &DatanodeOptions,
|
||||
object_store_manager: ObjectStoreManagerRef,
|
||||
config: MitoConfig,
|
||||
schema_metadata_manager: SchemaMetadataManagerRef,
|
||||
plugins: Plugins,
|
||||
) -> Result<MitoEngine> {
|
||||
let mito_engine = match &opts.wal {
|
||||
@@ -412,7 +399,6 @@ impl DatanodeBuilder {
|
||||
Self::build_raft_engine_log_store(&opts.storage.data_home, raft_engine_config)
|
||||
.await?,
|
||||
object_store_manager,
|
||||
schema_metadata_manager,
|
||||
plugins,
|
||||
)
|
||||
.await
|
||||
@@ -443,7 +429,6 @@ impl DatanodeBuilder {
|
||||
config,
|
||||
Self::build_kafka_log_store(kafka_config, global_index_collector).await?,
|
||||
object_store_manager,
|
||||
schema_metadata_manager,
|
||||
plugins,
|
||||
)
|
||||
.await
|
||||
|
||||
@@ -1355,7 +1355,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_region_server_parallelism() {
|
||||
async fn test_region_server_parallism() {
|
||||
let p = RegionServerParallelism::from_opts(2, Duration::from_millis(1)).unwrap();
|
||||
let first_query = p.acquire().await;
|
||||
assert!(first_query.is_ok());
|
||||
|
||||
@@ -462,15 +462,5 @@ mod tests {
|
||||
.convert_binary_to_json()
|
||||
.unwrap_err();
|
||||
assert_matches!(error, error::Error::InvalidJson { .. });
|
||||
|
||||
// corrupted jsonb
|
||||
let jsonb = jsonb::parse_value("{\"hello\": \"world\"}".as_bytes())
|
||||
.unwrap()
|
||||
.to_vec();
|
||||
let corrupted_jsonb = jsonb[0..jsonb.len() - 1].to_vec();
|
||||
let error = BinaryVector::from(vec![corrupted_jsonb])
|
||||
.convert_binary_to_json()
|
||||
.unwrap_err();
|
||||
assert_matches!(error, error::Error::InvalidJson { .. });
|
||||
}
|
||||
}
|
||||
|
||||
@@ -37,6 +37,7 @@ use operator::delete::Deleter;
|
||||
use operator::insert::Inserter;
|
||||
use operator::statement::StatementExecutor;
|
||||
use partition::manager::PartitionRuleManager;
|
||||
use query::stats::StatementStatistics;
|
||||
use query::{QueryEngine, QueryEngineFactory};
|
||||
use servers::error::{AlreadyStartedSnafu, StartGrpcSnafu, TcpBindSnafu, TcpIncomingSnafu};
|
||||
use servers::server::Server;
|
||||
@@ -302,7 +303,7 @@ impl FlownodeBuilder {
|
||||
///
|
||||
/// or recover all existing flow tasks if in standalone mode(nodeid is None)
|
||||
///
|
||||
/// TODO(discord9): persistent flow tasks with internal state
|
||||
/// TODO(discord9): persisent flow tasks with internal state
|
||||
async fn recover_flows(&self, manager: &FlowWorkerManagerRef) -> Result<usize, Error> {
|
||||
let nodeid = self.opts.node_id;
|
||||
let to_be_recovered: Vec<_> = if let Some(nodeid) = nodeid {
|
||||
@@ -475,6 +476,7 @@ impl FrontendInvoker {
|
||||
layered_cache_registry.clone(),
|
||||
inserter.clone(),
|
||||
table_route_cache,
|
||||
StatementStatistics::default(),
|
||||
));
|
||||
|
||||
let invoker = FrontendInvoker::new(inserter, deleter, statement_executor);
|
||||
|
||||
@@ -51,7 +51,6 @@ use query::metrics::OnDone;
|
||||
use query::parser::{PromQuery, QueryLanguageParser, QueryStatement};
|
||||
use query::query_engine::options::{validate_catalog_and_schema, QueryOptions};
|
||||
use query::query_engine::DescribeResult;
|
||||
use query::stats::StatementStatistics;
|
||||
use query::QueryEngineRef;
|
||||
use raft_engine::{Config, ReadableSize, RecoveryMode};
|
||||
use servers::error as server_error;
|
||||
@@ -123,7 +122,6 @@ pub struct Instance {
|
||||
deleter: DeleterRef,
|
||||
export_metrics_task: Option<ExportMetricsTask>,
|
||||
table_metadata_manager: TableMetadataManagerRef,
|
||||
stats: StatementStatistics,
|
||||
}
|
||||
|
||||
impl Instance {
|
||||
@@ -230,10 +228,6 @@ impl Instance {
|
||||
let query_interceptor = self.plugins.get::<SqlQueryInterceptorRef<Error>>();
|
||||
let query_interceptor = query_interceptor.as_ref();
|
||||
|
||||
let _slow_query_timer = self
|
||||
.stats
|
||||
.start_slow_query_timer(QueryStatement::Sql(stmt.clone()));
|
||||
|
||||
let output = match stmt {
|
||||
Statement::Query(_) | Statement::Explain(_) | Statement::Delete(_) => {
|
||||
let stmt = QueryStatement::Sql(stmt);
|
||||
@@ -418,6 +412,7 @@ impl PrometheusHandler for Instance {
|
||||
let interceptor = self
|
||||
.plugins
|
||||
.get::<PromQueryInterceptorRef<server_error::Error>>();
|
||||
interceptor.pre_execute(query, query_ctx.clone())?;
|
||||
|
||||
self.plugins
|
||||
.get::<PermissionCheckerRef>()
|
||||
@@ -431,20 +426,9 @@ impl PrometheusHandler for Instance {
|
||||
}
|
||||
})?;
|
||||
|
||||
let _slow_query_timer = self.stats.start_slow_query_timer(stmt.clone());
|
||||
|
||||
let plan = self
|
||||
.statement_executor
|
||||
.plan(&stmt, query_ctx.clone())
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExecuteQuerySnafu)?;
|
||||
|
||||
interceptor.pre_execute(query, Some(&plan), query_ctx.clone())?;
|
||||
|
||||
let output = self
|
||||
.statement_executor
|
||||
.exec_plan(plan, query_ctx.clone())
|
||||
.execute_stmt(stmt, query_ctx.clone())
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExecuteQuerySnafu)?;
|
||||
|
||||
@@ -185,6 +185,7 @@ impl FrontendBuilder {
|
||||
local_cache_invalidator,
|
||||
inserter.clone(),
|
||||
table_route_cache,
|
||||
self.stats,
|
||||
));
|
||||
|
||||
let pipeline_operator = Arc::new(PipelineOperator::new(
|
||||
@@ -210,7 +211,6 @@ impl FrontendBuilder {
|
||||
deleter,
|
||||
export_metrics_task: None,
|
||||
table_metadata_manager: Arc::new(TableMetadataManager::new(kv_backend)),
|
||||
stats: self.stats,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@ use api::v1::{DeleteRequests, DropFlowExpr, InsertRequests, RowDeleteRequests, R
|
||||
use async_trait::async_trait;
|
||||
use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq};
|
||||
use common_query::Output;
|
||||
use common_telemetry::tracing::{self};
|
||||
use common_telemetry::tracing;
|
||||
use query::parser::PromQuery;
|
||||
use servers::interceptor::{GrpcQueryInterceptor, GrpcQueryInterceptorRef};
|
||||
use servers::query_handler::grpc::GrpcQueryHandler;
|
||||
|
||||
@@ -24,6 +24,7 @@ use pipeline::PipelineWay;
|
||||
use servers::error::{self, AuthSnafu, Result as ServerResult};
|
||||
use servers::interceptor::{OpenTelemetryProtocolInterceptor, OpenTelemetryProtocolInterceptorRef};
|
||||
use servers::otlp;
|
||||
use servers::otlp::plugin::TraceParserRef;
|
||||
use servers::query_handler::OpenTelemetryProtocolHandler;
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::ResultExt;
|
||||
@@ -63,7 +64,6 @@ impl OpenTelemetryProtocolHandler for Instance {
|
||||
async fn traces(
|
||||
&self,
|
||||
request: ExportTraceServiceRequest,
|
||||
table_name: String,
|
||||
ctx: QueryContextRef,
|
||||
) -> ServerResult<Output> {
|
||||
self.plugins
|
||||
@@ -77,7 +77,13 @@ impl OpenTelemetryProtocolHandler for Instance {
|
||||
.get::<OpenTelemetryProtocolInterceptorRef<servers::error::Error>>();
|
||||
interceptor_ref.pre_execute(ctx.clone())?;
|
||||
|
||||
let spans = otlp::trace::parse(request);
|
||||
let (table_name, spans) = match self.plugins.get::<TraceParserRef>() {
|
||||
Some(parser) => (parser.table_name(), parser.parse(request)),
|
||||
None => (
|
||||
otlp::trace::TRACE_TABLE_NAME.to_string(),
|
||||
otlp::trace::parse(request),
|
||||
),
|
||||
};
|
||||
|
||||
let (requests, rows) = otlp::trace::to_grpc_insert_requests(table_name, spans)?;
|
||||
|
||||
|
||||
@@ -62,6 +62,8 @@ impl IndexApplier for PredicatesIndexApplier {
|
||||
break;
|
||||
}
|
||||
|
||||
common_telemetry::info!("Predicate apply, apply name start, name: {}", name);
|
||||
|
||||
let Some(meta) = metadata.metas.get(name) else {
|
||||
match context.index_not_found_strategy {
|
||||
IndexNotFoundStrategy::ReturnEmpty => {
|
||||
@@ -85,6 +87,8 @@ impl IndexApplier for PredicatesIndexApplier {
|
||||
let bm = mapper.map_values(&values).await?;
|
||||
|
||||
bitmap &= bm;
|
||||
|
||||
common_telemetry::info!("Predicate apply, apply name end, name: {}", name);
|
||||
}
|
||||
|
||||
output.matched_segment_ids = bitmap;
|
||||
|
||||
@@ -17,6 +17,7 @@ use std::any::Any;
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_macro::stack_trace_debug;
|
||||
use common_runtime::error::Error as RuntimeError;
|
||||
use common_runtime::JoinError;
|
||||
use serde_json::error::Error as JsonError;
|
||||
use snafu::{Location, Snafu};
|
||||
use store_api::storage::RegionId;
|
||||
@@ -306,6 +307,14 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Join error"))]
|
||||
Join {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
#[snafu(source)]
|
||||
error: JoinError,
|
||||
},
|
||||
}
|
||||
|
||||
impl ErrorExt for Error {
|
||||
|
||||
@@ -31,8 +31,8 @@ use store_api::storage::RegionId;
|
||||
|
||||
use crate::error::{
|
||||
AddEntryLogBatchSnafu, DiscontinuousLogIndexSnafu, Error, FetchEntrySnafu,
|
||||
IllegalNamespaceSnafu, IllegalStateSnafu, InvalidProviderSnafu, OverrideCompactedEntrySnafu,
|
||||
RaftEngineSnafu, Result, StartGcTaskSnafu, StopGcTaskSnafu,
|
||||
IllegalNamespaceSnafu, IllegalStateSnafu, InvalidProviderSnafu, JoinSnafu,
|
||||
OverrideCompactedEntrySnafu, RaftEngineSnafu, Result, StartGcTaskSnafu, StopGcTaskSnafu,
|
||||
};
|
||||
use crate::metrics;
|
||||
use crate::raft_engine::backend::SYSTEM_NAMESPACE;
|
||||
@@ -250,6 +250,12 @@ impl LogStore for RaftEngineLogStore {
|
||||
.engine
|
||||
.write(&mut batch, sync)
|
||||
.context(RaftEngineSnafu)?;
|
||||
let engine = self.engine.clone();
|
||||
let _ = common_runtime::spawn_blocking_global(move || {
|
||||
engine.write(&mut batch, sync).context(RaftEngineSnafu)
|
||||
})
|
||||
.await
|
||||
.context(JoinSnafu)?;
|
||||
|
||||
Ok(AppendBatchResponse { last_entry_ids })
|
||||
}
|
||||
|
||||
@@ -15,8 +15,6 @@ common-grpc.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-meta.workspace = true
|
||||
common-telemetry.workspace = true
|
||||
futures.workspace = true
|
||||
futures-util.workspace = true
|
||||
humantime-serde.workspace = true
|
||||
rand.workspace = true
|
||||
serde.workspace = true
|
||||
|
||||
@@ -21,8 +21,6 @@ mod cluster;
|
||||
mod store;
|
||||
mod util;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::meta::{ProcedureDetailResponse, Role};
|
||||
use cluster::Client as ClusterClient;
|
||||
use common_error::ext::BoxedError;
|
||||
@@ -32,8 +30,7 @@ use common_meta::cluster::{
|
||||
};
|
||||
use common_meta::datanode::{DatanodeStatKey, DatanodeStatValue, RegionStat};
|
||||
use common_meta::ddl::{ExecutorContext, ProcedureExecutor};
|
||||
use common_meta::error::{self as meta_error, ExternalSnafu, Result as MetaResult};
|
||||
use common_meta::range_stream::PaginationStream;
|
||||
use common_meta::error::{self as meta_error, Result as MetaResult};
|
||||
use common_meta::rpc::ddl::{SubmitDdlTaskRequest, SubmitDdlTaskResponse};
|
||||
use common_meta::rpc::procedure::{
|
||||
MigrateRegionRequest, MigrateRegionResponse, ProcedureStateResponse,
|
||||
@@ -43,10 +40,8 @@ use common_meta::rpc::store::{
|
||||
BatchPutResponse, CompareAndPutRequest, CompareAndPutResponse, DeleteRangeRequest,
|
||||
DeleteRangeResponse, PutRequest, PutResponse, RangeRequest, RangeResponse,
|
||||
};
|
||||
use common_meta::rpc::KeyValue;
|
||||
use common_meta::ClusterId;
|
||||
use common_telemetry::info;
|
||||
use futures::TryStreamExt;
|
||||
use heartbeat::Client as HeartbeatClient;
|
||||
use procedure::Client as ProcedureClient;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
@@ -319,15 +314,16 @@ impl ClusterInfo for MetaClient {
|
||||
}
|
||||
|
||||
async fn list_region_stats(&self) -> Result<Vec<RegionStat>> {
|
||||
let cluster_kv_backend = Arc::new(self.cluster_client()?);
|
||||
let cluster_client = self.cluster_client()?;
|
||||
let range_prefix = DatanodeStatKey::key_prefix_with_cluster_id(self.id.0);
|
||||
let req = RangeRequest::new().with_prefix(range_prefix);
|
||||
let stream = PaginationStream::new(cluster_kv_backend, req, 256, Arc::new(decode_stats))
|
||||
.into_stream();
|
||||
let mut datanode_stats = stream
|
||||
.try_collect::<Vec<_>>()
|
||||
.await
|
||||
.context(ConvertMetaResponseSnafu)?;
|
||||
let mut datanode_stats = cluster_client
|
||||
.range(req)
|
||||
.await?
|
||||
.kvs
|
||||
.into_iter()
|
||||
.map(|kv| DatanodeStatValue::try_from(kv.value).context(ConvertMetaRequestSnafu))
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
let region_stats = datanode_stats
|
||||
.iter_mut()
|
||||
.flat_map(|datanode_stat| {
|
||||
@@ -340,12 +336,6 @@ impl ClusterInfo for MetaClient {
|
||||
}
|
||||
}
|
||||
|
||||
fn decode_stats(kv: KeyValue) -> MetaResult<DatanodeStatValue> {
|
||||
DatanodeStatValue::try_from(kv.value)
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)
|
||||
}
|
||||
|
||||
impl MetaClient {
|
||||
pub fn new(id: Id) -> Self {
|
||||
Self {
|
||||
|
||||
@@ -12,22 +12,14 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::future::Future;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::greptime_proto::v1;
|
||||
use api::v1::meta::cluster_client::ClusterClient;
|
||||
use api::v1::meta::{MetasrvNodeInfo, MetasrvPeersRequest, ResponseHeader, Role};
|
||||
use common_error::ext::BoxedError;
|
||||
use common_grpc::channel_manager::ChannelManager;
|
||||
use common_meta::error::{Error as MetaError, ExternalSnafu, Result as MetaResult};
|
||||
use common_meta::kv_backend::{KvBackend, TxnService};
|
||||
use common_meta::rpc::store::{
|
||||
BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse, BatchPutRequest,
|
||||
BatchPutResponse, DeleteRangeRequest, DeleteRangeResponse, PutRequest, PutResponse,
|
||||
RangeRequest, RangeResponse,
|
||||
};
|
||||
use common_meta::rpc::store::{BatchGetRequest, BatchGetResponse, RangeRequest, RangeResponse};
|
||||
use common_telemetry::{info, warn};
|
||||
use snafu::{ensure, ResultExt};
|
||||
use tokio::sync::RwLock;
|
||||
@@ -87,51 +79,6 @@ impl Client {
|
||||
}
|
||||
}
|
||||
|
||||
impl TxnService for Client {
|
||||
type Error = MetaError;
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl KvBackend for Client {
|
||||
fn name(&self) -> &str {
|
||||
"ClusterClientKvBackend"
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
async fn range(&self, req: RangeRequest) -> MetaResult<RangeResponse> {
|
||||
self.range(req)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)
|
||||
}
|
||||
|
||||
async fn put(&self, _: PutRequest) -> MetaResult<PutResponse> {
|
||||
unimplemented!("`put` is not supported in cluster client kv backend")
|
||||
}
|
||||
|
||||
async fn batch_put(&self, _: BatchPutRequest) -> MetaResult<BatchPutResponse> {
|
||||
unimplemented!("`batch_put` is not supported in cluster client kv backend")
|
||||
}
|
||||
|
||||
async fn batch_get(&self, req: BatchGetRequest) -> MetaResult<BatchGetResponse> {
|
||||
self.batch_get(req)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)
|
||||
}
|
||||
|
||||
async fn delete_range(&self, _: DeleteRangeRequest) -> MetaResult<DeleteRangeResponse> {
|
||||
unimplemented!("`delete_range` is not supported in cluster client kv backend")
|
||||
}
|
||||
|
||||
async fn batch_delete(&self, _: BatchDeleteRequest) -> MetaResult<BatchDeleteResponse> {
|
||||
unimplemented!("`batch_delete` is not supported in cluster client kv backend")
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Inner {
|
||||
id: Id,
|
||||
|
||||
@@ -478,11 +478,6 @@ pub struct HeartbeatHandlerGroupBuilder {
|
||||
/// The handler to handle region lease.
|
||||
region_lease_handler: Option<RegionLeaseHandler>,
|
||||
|
||||
/// The factor that determines how often statistics should be flushed,
|
||||
/// based on the number of received heartbeats. When the number of heartbeats
|
||||
/// reaches this factor, a flush operation is triggered.
|
||||
flush_stats_factor: Option<usize>,
|
||||
|
||||
/// The plugins.
|
||||
plugins: Option<Plugins>,
|
||||
|
||||
@@ -498,7 +493,6 @@ impl HeartbeatHandlerGroupBuilder {
|
||||
Self {
|
||||
region_failure_handler: None,
|
||||
region_lease_handler: None,
|
||||
flush_stats_factor: None,
|
||||
plugins: None,
|
||||
pushers,
|
||||
handlers: vec![],
|
||||
@@ -516,12 +510,6 @@ impl HeartbeatHandlerGroupBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the flush stats factor.
|
||||
pub fn with_flush_stats_factor(mut self, flush_stats_factor: Option<usize>) -> Self {
|
||||
self.flush_stats_factor = flush_stats_factor;
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the [`Plugins`].
|
||||
pub fn with_plugins(mut self, plugins: Option<Plugins>) -> Self {
|
||||
self.plugins = plugins;
|
||||
@@ -562,7 +550,7 @@ impl HeartbeatHandlerGroupBuilder {
|
||||
if let Some(publish_heartbeat_handler) = publish_heartbeat_handler {
|
||||
self.add_handler_last(publish_heartbeat_handler);
|
||||
}
|
||||
self.add_handler_last(CollectStatsHandler::new(self.flush_stats_factor));
|
||||
self.add_handler_last(CollectStatsHandler::default());
|
||||
|
||||
self
|
||||
}
|
||||
|
||||
@@ -29,6 +29,8 @@ use crate::error::{self, Result};
|
||||
use crate::handler::{HandleControl, HeartbeatAccumulator, HeartbeatHandler};
|
||||
use crate::metasrv::Context;
|
||||
|
||||
const MAX_CACHED_STATS_PER_KEY: usize = 10;
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct EpochStats {
|
||||
stats: Vec<Stat>,
|
||||
@@ -67,26 +69,9 @@ impl EpochStats {
|
||||
}
|
||||
}
|
||||
|
||||
const DEFAULT_FLUSH_STATS_FACTOR: usize = 3;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct CollectStatsHandler {
|
||||
stats_cache: DashMap<DatanodeStatKey, EpochStats>,
|
||||
flush_stats_factor: usize,
|
||||
}
|
||||
|
||||
impl Default for CollectStatsHandler {
|
||||
fn default() -> Self {
|
||||
Self::new(None)
|
||||
}
|
||||
}
|
||||
|
||||
impl CollectStatsHandler {
|
||||
pub fn new(flush_stats_factor: Option<usize>) -> Self {
|
||||
Self {
|
||||
flush_stats_factor: flush_stats_factor.unwrap_or(DEFAULT_FLUSH_STATS_FACTOR),
|
||||
stats_cache: DashMap::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
@@ -145,7 +130,7 @@ impl HeartbeatHandler for CollectStatsHandler {
|
||||
rewrite_node_address(ctx, last).await;
|
||||
}
|
||||
|
||||
if !refresh && epoch_stats.len() < self.flush_stats_factor {
|
||||
if !refresh && epoch_stats.len() < MAX_CACHED_STATS_PER_KEY {
|
||||
return Ok(HandleControl::Continue);
|
||||
}
|
||||
|
||||
@@ -276,7 +261,8 @@ mod tests {
|
||||
let res = ctx.in_memory.get(&key).await.unwrap();
|
||||
let kv = res.unwrap();
|
||||
let val: DatanodeStatValue = kv.value.try_into().unwrap();
|
||||
assert_eq!(handler.flush_stats_factor, val.stats.len());
|
||||
// refresh every 10 stats
|
||||
assert_eq!(10, val.stats.len());
|
||||
}
|
||||
|
||||
async fn handle_request_many_times(
|
||||
|
||||
@@ -130,10 +130,6 @@ pub struct MetasrvOptions {
|
||||
/// limit the number of operations in a txn because an infinitely large txn could
|
||||
/// potentially block other operations.
|
||||
pub max_txn_ops: usize,
|
||||
/// The factor that determines how often statistics should be flushed,
|
||||
/// based on the number of received heartbeats. When the number of heartbeats
|
||||
/// reaches this factor, a flush operation is triggered.
|
||||
pub flush_stats_factor: usize,
|
||||
/// The tracing options.
|
||||
pub tracing: TracingOptions,
|
||||
/// The datastore for kv metadata.
|
||||
@@ -169,7 +165,6 @@ impl Default for MetasrvOptions {
|
||||
export_metrics: ExportMetricsOption::default(),
|
||||
store_key_prefix: String::new(),
|
||||
max_txn_ops: 128,
|
||||
flush_stats_factor: 3,
|
||||
tracing: TracingOptions::default(),
|
||||
backend: BackendImpl::EtcdStore,
|
||||
}
|
||||
|
||||
@@ -227,7 +227,7 @@ impl MetasrvBuilder {
|
||||
))
|
||||
});
|
||||
let flow_metadata_allocator = {
|
||||
// for now flownode just use round-robin selector
|
||||
// for now flownode just use round robin selector
|
||||
let flow_selector = RoundRobinSelector::new(SelectTarget::Flownode);
|
||||
let flow_selector_ctx = selector_ctx.clone();
|
||||
let peer_allocator = Arc::new(FlowPeerAllocator::new(
|
||||
@@ -347,7 +347,6 @@ impl MetasrvBuilder {
|
||||
.with_plugins(plugins.clone())
|
||||
.with_region_failure_handler(region_failover_handler)
|
||||
.with_region_lease_handler(Some(region_lease_handler))
|
||||
.with_flush_stats_factor(Some(options.flush_stats_factor))
|
||||
.add_default_handlers()
|
||||
}
|
||||
};
|
||||
|
||||
@@ -22,7 +22,7 @@ use crate::metasrv::SelectTarget;
|
||||
use crate::selector::SelectorOptions;
|
||||
|
||||
/// According to the `opts`, choose peers from the `weight_array` through `weighted_choose`.
|
||||
pub fn choose_items<W>(opts: &SelectorOptions, weighted_choose: &mut W) -> Result<Vec<Peer>>
|
||||
pub fn choose_peers<W>(opts: &SelectorOptions, weighted_choose: &mut W) -> Result<Vec<Peer>>
|
||||
where
|
||||
W: WeightedChoose<Peer>,
|
||||
{
|
||||
@@ -36,36 +36,20 @@ where
|
||||
}
|
||||
);
|
||||
|
||||
if min_required_items == 1 {
|
||||
// fast path
|
||||
return Ok(vec![weighted_choose.choose_one()?]);
|
||||
}
|
||||
|
||||
let available_count = weighted_choose.len();
|
||||
|
||||
if opts.allow_duplication {
|
||||
// Calculate how many complete rounds of `available_count` items to select,
|
||||
// plus any additional items needed after complete rounds.
|
||||
let complete_batches = min_required_items / available_count;
|
||||
let leftover_items = min_required_items % available_count;
|
||||
if complete_batches == 0 {
|
||||
return weighted_choose.choose_multiple(leftover_items);
|
||||
}
|
||||
|
||||
let mut result = Vec::with_capacity(min_required_items);
|
||||
for _ in 0..complete_batches {
|
||||
result.extend(weighted_choose.choose_multiple(available_count)?);
|
||||
}
|
||||
result.extend(weighted_choose.choose_multiple(leftover_items)?);
|
||||
|
||||
Ok(result)
|
||||
(0..min_required_items)
|
||||
.map(|_| weighted_choose.choose_one())
|
||||
.collect::<Result<_>>()
|
||||
} else {
|
||||
// Ensure the available items are sufficient when duplication is not allowed.
|
||||
let weight_array_len = weighted_choose.len();
|
||||
|
||||
// When opts.allow_duplication is false, we need to check that the length of the weighted array is greater than
|
||||
// or equal to min_required_items, otherwise it may cause an infinite loop.
|
||||
ensure!(
|
||||
available_count >= min_required_items,
|
||||
weight_array_len >= min_required_items,
|
||||
error::NoEnoughAvailableNodeSnafu {
|
||||
required: min_required_items,
|
||||
available: available_count,
|
||||
available: weight_array_len,
|
||||
select_target: SelectTarget::Datanode
|
||||
}
|
||||
);
|
||||
@@ -80,7 +64,7 @@ mod tests {
|
||||
|
||||
use common_meta::peer::Peer;
|
||||
|
||||
use crate::selector::common::choose_items;
|
||||
use crate::selector::common::choose_peers;
|
||||
use crate::selector::weighted_choose::{RandomWeightedChoose, WeightedItem};
|
||||
use crate::selector::SelectorOptions;
|
||||
|
||||
@@ -131,7 +115,7 @@ mod tests {
|
||||
};
|
||||
|
||||
let selected_peers: HashSet<_> =
|
||||
choose_items(&opts, &mut RandomWeightedChoose::new(weight_array.clone()))
|
||||
choose_peers(&opts, &mut RandomWeightedChoose::new(weight_array.clone()))
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.collect();
|
||||
@@ -145,7 +129,7 @@ mod tests {
|
||||
};
|
||||
|
||||
let selected_result =
|
||||
choose_items(&opts, &mut RandomWeightedChoose::new(weight_array.clone()));
|
||||
choose_peers(&opts, &mut RandomWeightedChoose::new(weight_array.clone()));
|
||||
assert!(selected_result.is_err());
|
||||
|
||||
for i in 1..=50 {
|
||||
@@ -155,7 +139,7 @@ mod tests {
|
||||
};
|
||||
|
||||
let selected_peers =
|
||||
choose_items(&opts, &mut RandomWeightedChoose::new(weight_array.clone())).unwrap();
|
||||
choose_peers(&opts, &mut RandomWeightedChoose::new(weight_array.clone())).unwrap();
|
||||
|
||||
assert_eq!(i, selected_peers.len());
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@ use common_meta::peer::Peer;
|
||||
use crate::error::Result;
|
||||
use crate::lease;
|
||||
use crate::metasrv::SelectorContext;
|
||||
use crate::selector::common::choose_items;
|
||||
use crate::selector::common::choose_peers;
|
||||
use crate::selector::weighted_choose::{RandomWeightedChoose, WeightedItem};
|
||||
use crate::selector::{Namespace, Selector, SelectorOptions};
|
||||
|
||||
@@ -53,7 +53,7 @@ impl Selector for LeaseBasedSelector {
|
||||
|
||||
// 3. choose peers by weight_array.
|
||||
let mut weighted_choose = RandomWeightedChoose::new(weight_array);
|
||||
let selected = choose_items(&opts, &mut weighted_choose)?;
|
||||
let selected = choose_peers(&opts, &mut weighted_choose)?;
|
||||
|
||||
Ok(selected)
|
||||
}
|
||||
|
||||
@@ -26,7 +26,7 @@ use crate::error::{self, Result};
|
||||
use crate::key::{DatanodeLeaseKey, LeaseValue};
|
||||
use crate::lease;
|
||||
use crate::metasrv::SelectorContext;
|
||||
use crate::selector::common::choose_items;
|
||||
use crate::selector::common::choose_peers;
|
||||
use crate::selector::weight_compute::{RegionNumsBasedWeightCompute, WeightCompute};
|
||||
use crate::selector::weighted_choose::RandomWeightedChoose;
|
||||
use crate::selector::{Namespace, Selector, SelectorOptions};
|
||||
@@ -94,7 +94,7 @@ where
|
||||
|
||||
// 5. choose peers by weight_array.
|
||||
let mut weighted_choose = RandomWeightedChoose::new(weight_array);
|
||||
let selected = choose_items(&opts, &mut weighted_choose)?;
|
||||
let selected = choose_peers(&opts, &mut weighted_choose)?;
|
||||
|
||||
debug!(
|
||||
"LoadBasedSelector select peers: {:?}, namespace: {}, opts: {:?}.",
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
|
||||
//! Implementation of retrieving logical region's region metadata.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use store_api::metadata::ColumnMetadata;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
@@ -48,36 +46,23 @@ impl MetricEngineInner {
|
||||
.read_lock_logical_region(logical_region_id)
|
||||
.await;
|
||||
// Load logical and physical columns, and intersect them to get logical column metadata.
|
||||
let logical_column_metadata = self
|
||||
let mut logical_column_metadata = self
|
||||
.metadata_region
|
||||
.logical_columns(physical_region_id, logical_region_id)
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|(_, column_metadata)| column_metadata)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// Update cache
|
||||
let mut mutable_state = self.state.write().unwrap();
|
||||
// Merge with existing cached columns.
|
||||
let existing_columns = mutable_state
|
||||
.logical_columns()
|
||||
.get(&logical_region_id)
|
||||
.cloned()
|
||||
.unwrap_or_default()
|
||||
.into_iter();
|
||||
let mut dedup_columns = logical_column_metadata
|
||||
.into_iter()
|
||||
.chain(existing_columns)
|
||||
.map(|c| (c.column_id, c))
|
||||
.collect::<HashMap<_, _>>()
|
||||
.values()
|
||||
.cloned()
|
||||
.collect::<Vec<_>>();
|
||||
// Sort columns on column name to ensure the order
|
||||
dedup_columns.sort_unstable_by(|c1, c2| c1.column_schema.name.cmp(&c2.column_schema.name));
|
||||
mutable_state.set_logical_columns(logical_region_id, dedup_columns.clone());
|
||||
logical_column_metadata
|
||||
.sort_unstable_by(|c1, c2| c1.column_schema.name.cmp(&c2.column_schema.name));
|
||||
// Update cache
|
||||
self.state
|
||||
.write()
|
||||
.unwrap()
|
||||
.add_logical_columns(logical_region_id, logical_column_metadata.clone());
|
||||
|
||||
Ok(dedup_columns)
|
||||
Ok(logical_column_metadata)
|
||||
}
|
||||
|
||||
/// Load logical column names of a logical region.
|
||||
|
||||
@@ -85,13 +85,19 @@ impl MetricEngineState {
|
||||
.insert(logical_region_id, physical_region_id);
|
||||
}
|
||||
|
||||
/// Replace the logical columns of the logical region with given columns.
|
||||
pub fn set_logical_columns(
|
||||
/// Add and reorder logical columns.
|
||||
///
|
||||
/// Caller should make sure:
|
||||
/// 1. there is no duplicate columns
|
||||
/// 2. the column order is the same with the order in the metadata, which is
|
||||
/// alphabetically ordered on column name.
|
||||
pub fn add_logical_columns(
|
||||
&mut self,
|
||||
logical_region_id: RegionId,
|
||||
columns: Vec<ColumnMetadata>,
|
||||
new_columns: impl IntoIterator<Item = ColumnMetadata>,
|
||||
) {
|
||||
self.logical_columns.insert(logical_region_id, columns);
|
||||
let columns = self.logical_columns.entry(logical_region_id).or_default();
|
||||
columns.extend(new_columns);
|
||||
}
|
||||
|
||||
pub fn get_physical_region_id(&self, logical_region_id: RegionId) -> Option<RegionId> {
|
||||
|
||||
@@ -24,7 +24,6 @@ common-datasource.workspace = true
|
||||
common-decimal.workspace = true
|
||||
common-error.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-meta.workspace = true
|
||||
common-query.workspace = true
|
||||
common-recordbatch.workspace = true
|
||||
common-runtime.workspace = true
|
||||
@@ -75,7 +74,6 @@ uuid.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
common-function.workspace = true
|
||||
common-meta = { workspace = true, features = ["testing"] }
|
||||
common-procedure-test.workspace = true
|
||||
common-test-util.workspace = true
|
||||
criterion = "0.4"
|
||||
|
||||
67
src/mito2/src/cache/index.rs
vendored
67
src/mito2/src/cache/index.rs
vendored
@@ -87,7 +87,16 @@ impl<R: InvertedIndexReader> InvertedIndexReader for CachedInvertedIndexBlobRead
|
||||
&mut self,
|
||||
dest: &mut Vec<u8>,
|
||||
) -> index::inverted_index::error::Result<usize> {
|
||||
self.inner.read_all(dest).await
|
||||
common_telemetry::debug!(
|
||||
"Inverted index reader read_all start, file_id: {}",
|
||||
self.file_id,
|
||||
);
|
||||
let res = self.inner.read_all(dest).await;
|
||||
common_telemetry::debug!(
|
||||
"Inverted index reader read_all end, file_id: {}",
|
||||
self.file_id,
|
||||
);
|
||||
res
|
||||
}
|
||||
|
||||
async fn seek_read(
|
||||
@@ -95,7 +104,20 @@ impl<R: InvertedIndexReader> InvertedIndexReader for CachedInvertedIndexBlobRead
|
||||
offset: u64,
|
||||
size: u32,
|
||||
) -> index::inverted_index::error::Result<Vec<u8>> {
|
||||
self.inner.seek_read(offset, size).await
|
||||
common_telemetry::debug!(
|
||||
"Inverted index reader seek_read start, file_id: {}, offset: {}, size: {}",
|
||||
self.file_id,
|
||||
offset,
|
||||
size,
|
||||
);
|
||||
let res = self.inner.seek_read(offset, size).await;
|
||||
common_telemetry::debug!(
|
||||
"Inverted index reader seek_read end, file_id: {}, offset: {}, size: {}",
|
||||
self.file_id,
|
||||
offset,
|
||||
size,
|
||||
);
|
||||
res
|
||||
}
|
||||
|
||||
async fn metadata(&mut self) -> index::inverted_index::error::Result<Arc<InvertedIndexMetas>> {
|
||||
@@ -103,8 +125,16 @@ impl<R: InvertedIndexReader> InvertedIndexReader for CachedInvertedIndexBlobRead
|
||||
CACHE_HIT.with_label_values(&[INDEX_METADATA_TYPE]).inc();
|
||||
Ok(cached)
|
||||
} else {
|
||||
common_telemetry::debug!(
|
||||
"Inverted index reader get metadata start, file_id: {}",
|
||||
self.file_id,
|
||||
);
|
||||
let meta = self.inner.metadata().await?;
|
||||
self.cache.put_index_metadata(self.file_id, meta.clone());
|
||||
common_telemetry::debug!(
|
||||
"Inverted index reader get metadata end, file_id: {}",
|
||||
self.file_id,
|
||||
);
|
||||
CACHE_MISS.with_label_values(&[INDEX_METADATA_TYPE]).inc();
|
||||
Ok(meta)
|
||||
}
|
||||
@@ -115,9 +145,23 @@ impl<R: InvertedIndexReader> InvertedIndexReader for CachedInvertedIndexBlobRead
|
||||
offset: u64,
|
||||
size: u32,
|
||||
) -> index::inverted_index::error::Result<FstMap> {
|
||||
self.get_or_load(offset, size)
|
||||
common_telemetry::debug!(
|
||||
"Inverted index reader fst start, file_id: {}, offset: {}, size: {}",
|
||||
self.file_id,
|
||||
offset,
|
||||
size,
|
||||
);
|
||||
let res = self
|
||||
.get_or_load(offset, size)
|
||||
.await
|
||||
.and_then(|r| FstMap::new(r).context(DecodeFstSnafu))
|
||||
.and_then(|r| FstMap::new(r).context(DecodeFstSnafu));
|
||||
common_telemetry::debug!(
|
||||
"Inverted index reader fst end, file_id: {}, offset: {}, size: {}",
|
||||
self.file_id,
|
||||
offset,
|
||||
size,
|
||||
);
|
||||
res
|
||||
}
|
||||
|
||||
async fn bitmap(
|
||||
@@ -125,7 +169,20 @@ impl<R: InvertedIndexReader> InvertedIndexReader for CachedInvertedIndexBlobRead
|
||||
offset: u64,
|
||||
size: u32,
|
||||
) -> index::inverted_index::error::Result<BitVec> {
|
||||
self.get_or_load(offset, size).await.map(BitVec::from_vec)
|
||||
common_telemetry::debug!(
|
||||
"Inverted index reader bitmap start, file_id: {}, offset: {}, size: {}",
|
||||
self.file_id,
|
||||
offset,
|
||||
size,
|
||||
);
|
||||
let res = self.get_or_load(offset, size).await.map(BitVec::from_vec);
|
||||
common_telemetry::debug!(
|
||||
"Inverted index reader bitmap end, file_id: {}, offset: {}, size: {}",
|
||||
self.file_id,
|
||||
offset,
|
||||
size,
|
||||
);
|
||||
res
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -28,8 +28,7 @@ use std::time::{Duration, Instant};
|
||||
|
||||
use api::v1::region::compact_request;
|
||||
use common_base::Plugins;
|
||||
use common_meta::key::SchemaMetadataManagerRef;
|
||||
use common_telemetry::{debug, error, info, warn};
|
||||
use common_telemetry::{debug, error, info};
|
||||
use common_time::range::TimestampRange;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::Timestamp;
|
||||
@@ -38,7 +37,7 @@ use datafusion_expr::Expr;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::storage::{RegionId, TableId};
|
||||
use store_api::storage::RegionId;
|
||||
use table::predicate::Predicate;
|
||||
use tokio::sync::mpsc::{self, Sender};
|
||||
|
||||
@@ -49,8 +48,8 @@ use crate::compaction::picker::{new_picker, CompactionTask};
|
||||
use crate::compaction::task::CompactionTaskImpl;
|
||||
use crate::config::MitoConfig;
|
||||
use crate::error::{
|
||||
CompactRegionSnafu, Error, GetSchemaMetadataSnafu, RegionClosedSnafu, RegionDroppedSnafu,
|
||||
RegionTruncatedSnafu, RemoteCompactionSnafu, Result, TimeRangePredicateOverflowSnafu,
|
||||
CompactRegionSnafu, Error, RegionClosedSnafu, RegionDroppedSnafu, RegionTruncatedSnafu,
|
||||
RemoteCompactionSnafu, Result, TimeRangePredicateOverflowSnafu,
|
||||
};
|
||||
use crate::metrics::COMPACTION_STAGE_ELAPSED;
|
||||
use crate::read::projection::ProjectionMapper;
|
||||
@@ -83,7 +82,6 @@ pub struct CompactionRequest {
|
||||
pub(crate) cache_manager: CacheManagerRef,
|
||||
pub(crate) manifest_ctx: ManifestContextRef,
|
||||
pub(crate) listener: WorkerListener,
|
||||
pub(crate) schema_metadata_manager: SchemaMetadataManagerRef,
|
||||
}
|
||||
|
||||
impl CompactionRequest {
|
||||
@@ -143,7 +141,6 @@ impl CompactionScheduler {
|
||||
access_layer: &AccessLayerRef,
|
||||
waiter: OptionOutputTx,
|
||||
manifest_ctx: &ManifestContextRef,
|
||||
schema_metadata_manager: SchemaMetadataManagerRef,
|
||||
) -> Result<()> {
|
||||
if let Some(status) = self.region_status.get_mut(®ion_id) {
|
||||
// Region is compacting. Add the waiter to pending list.
|
||||
@@ -161,7 +158,6 @@ impl CompactionScheduler {
|
||||
self.cache_manager.clone(),
|
||||
manifest_ctx,
|
||||
self.listener.clone(),
|
||||
schema_metadata_manager,
|
||||
);
|
||||
self.region_status.insert(region_id, status);
|
||||
let result = self
|
||||
@@ -177,7 +173,6 @@ impl CompactionScheduler {
|
||||
&mut self,
|
||||
region_id: RegionId,
|
||||
manifest_ctx: &ManifestContextRef,
|
||||
schema_metadata_manager: SchemaMetadataManagerRef,
|
||||
) {
|
||||
let Some(status) = self.region_status.get_mut(®ion_id) else {
|
||||
return;
|
||||
@@ -191,7 +186,6 @@ impl CompactionScheduler {
|
||||
self.cache_manager.clone(),
|
||||
manifest_ctx,
|
||||
self.listener.clone(),
|
||||
schema_metadata_manager,
|
||||
);
|
||||
// Try to schedule next compaction task for this region.
|
||||
if let Err(e) = self
|
||||
@@ -262,23 +256,10 @@ impl CompactionScheduler {
|
||||
cache_manager,
|
||||
manifest_ctx,
|
||||
listener,
|
||||
schema_metadata_manager,
|
||||
} = request;
|
||||
|
||||
let ttl = find_ttl(
|
||||
region_id.table_id(),
|
||||
current_version.options.ttl,
|
||||
&schema_metadata_manager,
|
||||
)
|
||||
.await
|
||||
.unwrap_or_else(|e| {
|
||||
warn!(e; "Failed to get ttl for region: {}", region_id);
|
||||
None
|
||||
});
|
||||
|
||||
debug!(
|
||||
"Pick compaction strategy {:?} for region: {}, ttl: {:?}",
|
||||
picker, region_id, ttl
|
||||
"Pick compaction strategy {:?} for region: {}",
|
||||
picker, region_id
|
||||
);
|
||||
|
||||
let compaction_region = CompactionRegion {
|
||||
@@ -292,7 +273,6 @@ impl CompactionScheduler {
|
||||
access_layer: access_layer.clone(),
|
||||
manifest_ctx: manifest_ctx.clone(),
|
||||
file_purger: None,
|
||||
ttl,
|
||||
};
|
||||
|
||||
let picker_output = {
|
||||
@@ -434,24 +414,6 @@ impl PendingCompaction {
|
||||
}
|
||||
}
|
||||
|
||||
/// Finds TTL of table by first examine table options then database options.
|
||||
async fn find_ttl(
|
||||
table_id: TableId,
|
||||
table_ttl: Option<Duration>,
|
||||
schema_metadata_manager: &SchemaMetadataManagerRef,
|
||||
) -> Result<Option<Duration>> {
|
||||
if let Some(table_ttl) = table_ttl {
|
||||
return Ok(Some(table_ttl));
|
||||
}
|
||||
|
||||
let ttl = schema_metadata_manager
|
||||
.get_schema_options_by_table_id(table_id)
|
||||
.await
|
||||
.context(GetSchemaMetadataSnafu)?
|
||||
.and_then(|options| options.ttl);
|
||||
Ok(ttl)
|
||||
}
|
||||
|
||||
/// Status of running and pending region compaction tasks.
|
||||
struct CompactionStatus {
|
||||
/// Id of the region.
|
||||
@@ -509,7 +471,6 @@ impl CompactionStatus {
|
||||
cache_manager: CacheManagerRef,
|
||||
manifest_ctx: &ManifestContextRef,
|
||||
listener: WorkerListener,
|
||||
schema_metadata_manager: SchemaMetadataManagerRef,
|
||||
) -> CompactionRequest {
|
||||
let current_version = self.version_control.current().version;
|
||||
let start_time = Instant::now();
|
||||
@@ -523,7 +484,6 @@ impl CompactionStatus {
|
||||
cache_manager,
|
||||
manifest_ctx: manifest_ctx.clone(),
|
||||
listener,
|
||||
schema_metadata_manager,
|
||||
};
|
||||
|
||||
if let Some(pending) = self.pending_compaction.take() {
|
||||
@@ -679,9 +639,6 @@ fn get_expired_ssts(
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_meta::key::SchemaMetadataManager;
|
||||
use common_meta::kv_backend::memory::MemoryKvBackend;
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
use tokio::sync::oneshot;
|
||||
|
||||
use super::*;
|
||||
@@ -694,19 +651,7 @@ mod tests {
|
||||
let (tx, _rx) = mpsc::channel(4);
|
||||
let mut scheduler = env.mock_compaction_scheduler(tx);
|
||||
let mut builder = VersionControlBuilder::new();
|
||||
let schema_metadata_manager = Arc::new(SchemaMetadataManager::new(Arc::new(
|
||||
MemoryKvBackend::new(),
|
||||
)
|
||||
as KvBackendRef));
|
||||
schema_metadata_manager
|
||||
.register_region_table_info(
|
||||
builder.region_id().table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
// Nothing to compact.
|
||||
let version_control = Arc::new(builder.build());
|
||||
let (output_tx, output_rx) = oneshot::channel();
|
||||
@@ -722,7 +667,6 @@ mod tests {
|
||||
&env.access_layer,
|
||||
waiter,
|
||||
&manifest_ctx,
|
||||
schema_metadata_manager.clone(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -742,7 +686,6 @@ mod tests {
|
||||
&env.access_layer,
|
||||
waiter,
|
||||
&manifest_ctx,
|
||||
schema_metadata_manager,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -760,19 +703,6 @@ mod tests {
|
||||
let mut builder = VersionControlBuilder::new();
|
||||
let purger = builder.file_purger();
|
||||
let region_id = builder.region_id();
|
||||
let schema_metadata_manager = Arc::new(SchemaMetadataManager::new(Arc::new(
|
||||
MemoryKvBackend::new(),
|
||||
)
|
||||
as KvBackendRef));
|
||||
schema_metadata_manager
|
||||
.register_region_table_info(
|
||||
builder.region_id().table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
// 5 files to compact.
|
||||
let end = 1000 * 1000;
|
||||
@@ -796,7 +726,6 @@ mod tests {
|
||||
&env.access_layer,
|
||||
OptionOutputTx::none(),
|
||||
&manifest_ctx,
|
||||
schema_metadata_manager.clone(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -826,7 +755,6 @@ mod tests {
|
||||
&env.access_layer,
|
||||
OptionOutputTx::none(),
|
||||
&manifest_ctx,
|
||||
schema_metadata_manager.clone(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -841,7 +769,7 @@ mod tests {
|
||||
|
||||
// On compaction finished and schedule next compaction.
|
||||
scheduler
|
||||
.on_compaction_finished(region_id, &manifest_ctx, schema_metadata_manager.clone())
|
||||
.on_compaction_finished(region_id, &manifest_ctx)
|
||||
.await;
|
||||
assert_eq!(1, scheduler.region_status.len());
|
||||
assert_eq!(2, job_scheduler.num_jobs());
|
||||
@@ -861,7 +789,6 @@ mod tests {
|
||||
&env.access_layer,
|
||||
OptionOutputTx::none(),
|
||||
&manifest_ctx,
|
||||
schema_metadata_manager,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
@@ -16,8 +16,7 @@ use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use api::v1::region::compact_request;
|
||||
use common_meta::key::SchemaMetadataManagerRef;
|
||||
use common_telemetry::{info, warn};
|
||||
use common_telemetry::info;
|
||||
use object_store::manager::ObjectStoreManagerRef;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use smallvec::SmallVec;
|
||||
@@ -28,7 +27,7 @@ use store_api::storage::RegionId;
|
||||
use crate::access_layer::{AccessLayer, AccessLayerRef, OperationType, SstWriteRequest};
|
||||
use crate::cache::{CacheManager, CacheManagerRef};
|
||||
use crate::compaction::picker::{new_picker, PickerOutput};
|
||||
use crate::compaction::{find_ttl, CompactionSstReaderBuilder};
|
||||
use crate::compaction::CompactionSstReaderBuilder;
|
||||
use crate::config::MitoConfig;
|
||||
use crate::error::{EmptyRegionDirSnafu, JoinSnafu, ObjectStoreNotFoundSnafu, Result};
|
||||
use crate::manifest::action::{RegionEdit, RegionMetaAction, RegionMetaActionList};
|
||||
@@ -63,7 +62,6 @@ pub struct CompactionRegion {
|
||||
pub(crate) manifest_ctx: Arc<ManifestContext>,
|
||||
pub(crate) current_version: VersionRef,
|
||||
pub(crate) file_purger: Option<Arc<LocalFilePurger>>,
|
||||
pub(crate) ttl: Option<Duration>,
|
||||
}
|
||||
|
||||
/// OpenCompactionRegionRequest represents the request to open a compaction region.
|
||||
@@ -80,7 +78,6 @@ pub async fn open_compaction_region(
|
||||
req: &OpenCompactionRegionRequest,
|
||||
mito_config: &MitoConfig,
|
||||
object_store_manager: ObjectStoreManagerRef,
|
||||
schema_metadata_manager: SchemaMetadataManagerRef,
|
||||
) -> Result<CompactionRegion> {
|
||||
let object_store = {
|
||||
let name = &req.region_options.storage;
|
||||
@@ -172,16 +169,6 @@ pub async fn open_compaction_region(
|
||||
Arc::new(version)
|
||||
};
|
||||
|
||||
let ttl = find_ttl(
|
||||
req.region_id.table_id(),
|
||||
current_version.options.ttl,
|
||||
&schema_metadata_manager,
|
||||
)
|
||||
.await
|
||||
.unwrap_or_else(|e| {
|
||||
warn!(e; "Failed to get ttl for region: {}", region_metadata.region_id);
|
||||
None
|
||||
});
|
||||
Ok(CompactionRegion {
|
||||
region_id: req.region_id,
|
||||
region_options: req.region_options.clone(),
|
||||
@@ -193,7 +180,6 @@ pub async fn open_compaction_region(
|
||||
manifest_ctx,
|
||||
current_version,
|
||||
file_purger: Some(file_purger),
|
||||
ttl,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -273,6 +259,7 @@ impl Compactor for DefaultCompactor {
|
||||
|
||||
let write_opts = WriteOptions {
|
||||
write_buffer_size: compaction_region.engine_config.sst_write_buffer_size,
|
||||
compression_method: compaction_region.engine_config.compression_method,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ use std::collections::hash_map::Entry;
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::fmt::Debug;
|
||||
|
||||
use common_telemetry::{debug, info};
|
||||
use common_telemetry::{info, trace};
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::timestamp_millis::BucketAligned;
|
||||
use common_time::Timestamp;
|
||||
@@ -114,7 +114,7 @@ impl TwcsPicker {
|
||||
// Files in window exceeds file num limit
|
||||
vec![enforce_file_num(&files.files, max_files)]
|
||||
} else {
|
||||
debug!("Skip building compaction output, active window: {:?}, current window: {}, max runs: {}, found runs: {}, ", active_window, *window, max_runs, found_runs);
|
||||
trace!("Skip building compaction output, active window: {:?}, current window: {}, max runs: {}, found runs: {}, ", active_window, *window, max_runs, found_runs);
|
||||
continue;
|
||||
};
|
||||
|
||||
@@ -212,9 +212,8 @@ impl Picker for TwcsPicker {
|
||||
fn pick(&self, compaction_region: &CompactionRegion) -> Option<PickerOutput> {
|
||||
let region_id = compaction_region.region_id;
|
||||
let levels = compaction_region.current_version.ssts.levels();
|
||||
|
||||
let expired_ssts =
|
||||
get_expired_ssts(levels, compaction_region.ttl, Timestamp::current_millis());
|
||||
let ttl = compaction_region.current_version.options.ttl;
|
||||
let expired_ssts = get_expired_ssts(levels, ttl, Timestamp::current_millis());
|
||||
if !expired_ssts.is_empty() {
|
||||
info!("Expired SSTs in region {}: {:?}", region_id, expired_ssts);
|
||||
// here we mark expired SSTs as compacting to avoid them being picked.
|
||||
@@ -447,21 +446,6 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_assign_compacting_to_windows() {
|
||||
let files = [
|
||||
new_file_handle(FileId::random(), 0, 999, 0),
|
||||
new_file_handle(FileId::random(), 0, 999, 0),
|
||||
new_file_handle(FileId::random(), 0, 999, 0),
|
||||
new_file_handle(FileId::random(), 0, 999, 0),
|
||||
new_file_handle(FileId::random(), 0, 999, 0),
|
||||
];
|
||||
files[0].set_compacting(true);
|
||||
files[2].set_compacting(true);
|
||||
let windows = assign_to_windows(files.iter(), 3);
|
||||
assert_eq!(3, windows.get(&0).unwrap().files.len());
|
||||
}
|
||||
|
||||
/// (Window value, overlapping, files' time ranges in window)
|
||||
type ExpectedWindowSpec = (i64, bool, Vec<(i64, i64)>);
|
||||
|
||||
|
||||
@@ -352,28 +352,6 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_assign_compacting_files_to_windows() {
|
||||
let picker = WindowedCompactionPicker::new(Some(HOUR / 1000));
|
||||
let files = vec![
|
||||
(FileId::random(), 0, 2 * HOUR - 1, 0),
|
||||
(FileId::random(), HOUR, HOUR * 3 - 1, 0),
|
||||
];
|
||||
let version = build_version(&files, Some(Duration::from_millis(3 * HOUR as u64)));
|
||||
version.ssts.levels()[0]
|
||||
.files()
|
||||
.for_each(|f| f.set_compacting(true));
|
||||
let (outputs, expired_ssts, window_seconds) = picker.pick_inner(
|
||||
RegionId::new(0, 0),
|
||||
&version,
|
||||
Timestamp::new_millisecond(HOUR * 3),
|
||||
);
|
||||
|
||||
assert!(expired_ssts.is_empty());
|
||||
assert_eq!(HOUR / 1000, window_seconds);
|
||||
assert!(outputs.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_file_time_bucket_span() {
|
||||
assert_eq!(
|
||||
|
||||
@@ -128,6 +128,20 @@ pub struct MitoConfig {
|
||||
/// To align with the old behavior, the default value is 0 (no restrictions).
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub min_compaction_interval: Duration,
|
||||
|
||||
/// Skip wal
|
||||
pub skip_wal: bool,
|
||||
/// SST compression method.
|
||||
pub compression_method: CompressionMethod,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum CompressionMethod {
|
||||
#[default]
|
||||
Zstd,
|
||||
Lz4,
|
||||
None,
|
||||
}
|
||||
|
||||
impl Default for MitoConfig {
|
||||
@@ -150,7 +164,7 @@ impl Default for MitoConfig {
|
||||
selector_result_cache_size: ReadableSize::mb(512),
|
||||
enable_experimental_write_cache: false,
|
||||
experimental_write_cache_path: String::new(),
|
||||
experimental_write_cache_size: ReadableSize::gb(1),
|
||||
experimental_write_cache_size: ReadableSize::mb(512),
|
||||
experimental_write_cache_ttl: None,
|
||||
sst_write_buffer_size: DEFAULT_WRITE_BUFFER_SIZE,
|
||||
scan_parallelism: divide_num_cpus(4),
|
||||
@@ -161,6 +175,8 @@ impl Default for MitoConfig {
|
||||
fulltext_index: FulltextIndexConfig::default(),
|
||||
memtable: MemtableConfig::default(),
|
||||
min_compaction_interval: Duration::from_secs(0),
|
||||
skip_wal: false,
|
||||
compression_method: CompressionMethod::Zstd,
|
||||
};
|
||||
|
||||
// Adjust buffer and cache size according to system memory if we can.
|
||||
|
||||
@@ -66,7 +66,6 @@ use api::region::RegionResponse;
|
||||
use async_trait::async_trait;
|
||||
use common_base::Plugins;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::key::SchemaMetadataManagerRef;
|
||||
use common_recordbatch::SendableRecordBatchStream;
|
||||
use common_telemetry::tracing;
|
||||
use common_wal::options::{WalOptions, WAL_OPTIONS_KEY};
|
||||
@@ -90,7 +89,7 @@ use crate::error::{
|
||||
};
|
||||
use crate::manifest::action::RegionEdit;
|
||||
use crate::metrics::HANDLE_REQUEST_ELAPSED;
|
||||
use crate::read::scan_region::{ScanParallelism, ScanRegion, Scanner};
|
||||
use crate::read::scan_region::{ScanParallism, ScanRegion, Scanner};
|
||||
use crate::request::{RegionEditRequest, WorkerRequest};
|
||||
use crate::wal::entry_distributor::{
|
||||
build_wal_entry_distributor_and_receivers, DEFAULT_ENTRY_RECEIVER_BUFFER_SIZE,
|
||||
@@ -113,21 +112,13 @@ impl MitoEngine {
|
||||
mut config: MitoConfig,
|
||||
log_store: Arc<S>,
|
||||
object_store_manager: ObjectStoreManagerRef,
|
||||
schema_metadata_manager: SchemaMetadataManagerRef,
|
||||
plugins: Plugins,
|
||||
) -> Result<MitoEngine> {
|
||||
config.sanitize(data_home)?;
|
||||
|
||||
Ok(MitoEngine {
|
||||
inner: Arc::new(
|
||||
EngineInner::new(
|
||||
config,
|
||||
log_store,
|
||||
object_store_manager,
|
||||
schema_metadata_manager,
|
||||
plugins,
|
||||
)
|
||||
.await?,
|
||||
EngineInner::new(config, log_store, object_store_manager, plugins).await?,
|
||||
),
|
||||
})
|
||||
}
|
||||
@@ -287,20 +278,13 @@ impl EngineInner {
|
||||
config: MitoConfig,
|
||||
log_store: Arc<S>,
|
||||
object_store_manager: ObjectStoreManagerRef,
|
||||
schema_metadata_manager: SchemaMetadataManagerRef,
|
||||
plugins: Plugins,
|
||||
) -> Result<EngineInner> {
|
||||
let config = Arc::new(config);
|
||||
let wal_raw_entry_reader = Arc::new(LogStoreRawEntryReader::new(log_store.clone()));
|
||||
Ok(EngineInner {
|
||||
workers: WorkerGroup::start(
|
||||
config.clone(),
|
||||
log_store,
|
||||
object_store_manager,
|
||||
schema_metadata_manager,
|
||||
plugins,
|
||||
)
|
||||
.await?,
|
||||
workers: WorkerGroup::start(config.clone(), log_store, object_store_manager, plugins)
|
||||
.await?,
|
||||
config,
|
||||
wal_raw_entry_reader,
|
||||
})
|
||||
@@ -433,7 +417,7 @@ impl EngineInner {
|
||||
let version = region.version();
|
||||
// Get cache.
|
||||
let cache_manager = self.workers.cache_manager();
|
||||
let scan_parallelism = ScanParallelism {
|
||||
let scan_parallelism = ScanParallism {
|
||||
parallelism: self.config.scan_parallelism,
|
||||
channel_size: self.config.parallel_scan_channel_size,
|
||||
};
|
||||
@@ -599,7 +583,6 @@ impl RegionEngine for MitoEngine {
|
||||
|
||||
// Tests methods.
|
||||
#[cfg(any(test, feature = "test"))]
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
impl MitoEngine {
|
||||
/// Returns a new [MitoEngine] for tests.
|
||||
pub async fn new_for_test<S: LogStore>(
|
||||
@@ -610,7 +593,6 @@ impl MitoEngine {
|
||||
write_buffer_manager: Option<crate::flush::WriteBufferManagerRef>,
|
||||
listener: Option<crate::engine::listener::EventListenerRef>,
|
||||
time_provider: crate::time_provider::TimeProviderRef,
|
||||
schema_metadata_manager: SchemaMetadataManagerRef,
|
||||
) -> Result<MitoEngine> {
|
||||
config.sanitize(data_home)?;
|
||||
|
||||
@@ -624,7 +606,6 @@ impl MitoEngine {
|
||||
object_store_manager,
|
||||
write_buffer_manager,
|
||||
listener,
|
||||
schema_metadata_manager,
|
||||
time_provider,
|
||||
)
|
||||
.await?,
|
||||
|
||||
@@ -78,16 +78,6 @@ async fn test_alter_region() {
|
||||
let region_id = RegionId::new(1, 1);
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let column_schemas = rows_schema(&request);
|
||||
let region_dir = request.region_dir.clone();
|
||||
engine
|
||||
@@ -177,19 +167,10 @@ fn build_rows_for_tags(
|
||||
async fn test_put_after_alter() {
|
||||
let mut env = TestEnv::new();
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let mut column_schemas = rows_schema(&request);
|
||||
let region_dir = request.region_dir.clone();
|
||||
engine
|
||||
@@ -285,16 +266,6 @@ async fn test_alter_region_retry() {
|
||||
let region_id = RegionId::new(1, 1);
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let column_schemas = rows_schema(&request);
|
||||
engine
|
||||
.handle_request(region_id, RegionRequest::Create(request))
|
||||
@@ -349,16 +320,6 @@ async fn test_alter_on_flushing() {
|
||||
let region_id = RegionId::new(1, 1);
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let column_schemas = rows_schema(&request);
|
||||
engine
|
||||
.handle_request(region_id, RegionRequest::Create(request))
|
||||
|
||||
@@ -98,16 +98,6 @@ async fn test_append_mode_compaction() {
|
||||
.await;
|
||||
let region_id = RegionId::new(1, 1);
|
||||
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let request = CreateRequestBuilder::new()
|
||||
.insert_option("compaction.type", "twcs")
|
||||
.insert_option("compaction.twcs.max_active_window_runs", "2")
|
||||
|
||||
@@ -112,16 +112,6 @@ async fn test_compaction_region() {
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let request = CreateRequestBuilder::new()
|
||||
.insert_option("compaction.type", "twcs")
|
||||
.insert_option("compaction.twcs.max_active_window_runs", "1")
|
||||
@@ -181,18 +171,8 @@ async fn test_compaction_region_with_overlapping() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let mut env = TestEnv::new();
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let request = CreateRequestBuilder::new()
|
||||
.insert_option("compaction.type", "twcs")
|
||||
.insert_option("compaction.twcs.max_active_window_runs", "2")
|
||||
@@ -237,17 +217,6 @@ async fn test_compaction_region_with_overlapping_delete_all() {
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let request = CreateRequestBuilder::new()
|
||||
.insert_option("compaction.type", "twcs")
|
||||
.insert_option("compaction.twcs.max_active_window_runs", "2")
|
||||
@@ -312,16 +281,6 @@ async fn test_readonly_during_compaction() {
|
||||
.await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let request = CreateRequestBuilder::new()
|
||||
.insert_option("compaction.type", "twcs")
|
||||
.insert_option("compaction.twcs.max_active_window_runs", "1")
|
||||
|
||||
@@ -16,7 +16,6 @@ use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use api::v1::Rows;
|
||||
use common_meta::key::SchemaMetadataManager;
|
||||
use object_store::util::join_path;
|
||||
use store_api::region_engine::RegionEngine;
|
||||
use store_api::region_request::{RegionDropRequest, RegionRequest};
|
||||
@@ -41,17 +40,6 @@ async fn test_engine_drop_region() {
|
||||
.await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
// It's okay to drop a region doesn't exist.
|
||||
engine
|
||||
.handle_request(region_id, RegionRequest::Drop(RegionDropRequest {}))
|
||||
@@ -99,12 +87,7 @@ async fn test_engine_drop_region() {
|
||||
#[tokio::test]
|
||||
async fn test_engine_drop_region_for_custom_store() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
async fn setup(
|
||||
engine: &MitoEngine,
|
||||
schema_metadata_manager: &SchemaMetadataManager,
|
||||
region_id: RegionId,
|
||||
storage_name: &str,
|
||||
) {
|
||||
async fn setup(engine: &MitoEngine, region_id: RegionId, storage_name: &str) {
|
||||
let request = CreateRequestBuilder::new()
|
||||
.insert_option("storage", storage_name)
|
||||
.region_dir(storage_name)
|
||||
@@ -114,18 +97,6 @@ async fn test_engine_drop_region_for_custom_store() {
|
||||
.handle_request(region_id, RegionRequest::Create(request))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let table_id = format!("test_table_{}", region_id.table_id());
|
||||
schema_metadata_manager
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
&table_id,
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let rows = Rows {
|
||||
schema: column_schema.clone(),
|
||||
rows: build_rows_for_key("a", 0, 2, 0),
|
||||
@@ -143,19 +114,12 @@ async fn test_engine_drop_region_for_custom_store() {
|
||||
&["Gcs"],
|
||||
)
|
||||
.await;
|
||||
let schema_metadata_manager = env.get_schema_metadata_manager();
|
||||
let object_store_manager = env.get_object_store_manager().unwrap();
|
||||
|
||||
let global_region_id = RegionId::new(1, 1);
|
||||
setup(
|
||||
&engine,
|
||||
&schema_metadata_manager,
|
||||
global_region_id,
|
||||
"default",
|
||||
)
|
||||
.await;
|
||||
setup(&engine, global_region_id, "default").await;
|
||||
let custom_region_id = RegionId::new(2, 1);
|
||||
setup(&engine, &schema_metadata_manager, custom_region_id, "Gcs").await;
|
||||
setup(&engine, custom_region_id, "Gcs").await;
|
||||
|
||||
let global_region = engine.get_region(global_region_id).unwrap();
|
||||
let global_region_dir = global_region.access_layer.region_dir().to_string();
|
||||
|
||||
@@ -64,16 +64,6 @@ async fn test_edit_region_schedule_compaction() {
|
||||
.await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
engine
|
||||
.handle_request(
|
||||
region_id,
|
||||
|
||||
@@ -32,16 +32,6 @@ async fn test_scan_without_filtering_deleted() {
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
let request = CreateRequestBuilder::new()
|
||||
.insert_option("compaction.type", "twcs")
|
||||
.insert_option("compaction.twcs.max_active_window_runs", "10")
|
||||
|
||||
@@ -45,16 +45,6 @@ async fn test_manual_flush() {
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
|
||||
let column_schemas = rows_schema(&request);
|
||||
@@ -102,16 +92,6 @@ async fn test_flush_engine() {
|
||||
.await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
|
||||
let column_schemas = rows_schema(&request);
|
||||
@@ -171,15 +151,6 @@ async fn test_write_stall() {
|
||||
.await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
|
||||
let column_schemas = rows_schema(&request);
|
||||
@@ -244,15 +215,6 @@ async fn test_flush_empty() {
|
||||
.await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
|
||||
engine
|
||||
@@ -287,17 +249,8 @@ async fn test_flush_reopen_region(factory: Option<LogStoreFactory>) {
|
||||
|
||||
let mut env = TestEnv::new().with_log_store_factory(factory.clone());
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
let region_id = RegionId::new(1, 1);
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
let topic = prepare_test_for_kafka_log_store(&factory).await;
|
||||
let request = CreateRequestBuilder::new()
|
||||
.kafka_topic(topic.clone())
|
||||
@@ -407,17 +360,8 @@ async fn test_auto_flush_engine() {
|
||||
time_provider.clone(),
|
||||
)
|
||||
.await;
|
||||
let region_id = RegionId::new(1, 1);
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
|
||||
let column_schemas = rows_schema(&request);
|
||||
@@ -477,16 +421,6 @@ async fn test_flush_workers() {
|
||||
|
||||
let region_id0 = RegionId::new(1, 0);
|
||||
let region_id1 = RegionId::new(1, 1);
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id0.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let request = CreateRequestBuilder::new().region_dir("r0").build();
|
||||
let column_schemas = rows_schema(&request);
|
||||
engine
|
||||
|
||||
@@ -98,16 +98,6 @@ async fn test_merge_mode_compaction() {
|
||||
.await;
|
||||
let region_id = RegionId::new(1, 1);
|
||||
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let request = CreateRequestBuilder::new()
|
||||
.field_num(2)
|
||||
.insert_option("compaction.type", "twcs")
|
||||
|
||||
@@ -245,16 +245,6 @@ async fn test_open_region_skip_wal_replay() {
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
let region_dir = request.region_dir.clone();
|
||||
|
||||
@@ -433,16 +423,6 @@ async fn test_open_compaction_region() {
|
||||
let engine = env.create_engine(mito_config.clone()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
let schema_metadata_manager = env.get_schema_metadata_manager();
|
||||
schema_metadata_manager
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
let region_dir = request.region_dir.clone();
|
||||
engine
|
||||
@@ -464,14 +444,10 @@ async fn test_open_compaction_region() {
|
||||
region_options: RegionOptions::default(),
|
||||
};
|
||||
|
||||
let compaction_region = open_compaction_region(
|
||||
&req,
|
||||
&mito_config,
|
||||
object_store_manager.clone(),
|
||||
schema_metadata_manager,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
let compaction_region =
|
||||
open_compaction_region(&req, &mito_config, object_store_manager.clone())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(region_id, compaction_region.region_id);
|
||||
}
|
||||
|
||||
@@ -76,16 +76,6 @@ async fn test_parallel_scan() {
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
let region_dir = request.region_dir.clone();
|
||||
|
||||
|
||||
@@ -151,17 +151,6 @@ async fn test_prune_memtable() {
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
|
||||
let column_schemas = rows_schema(&request);
|
||||
|
||||
@@ -29,15 +29,6 @@ async fn test_last_row(append_mode: bool) {
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
let region_id = RegionId::new(1, 1);
|
||||
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
let request = CreateRequestBuilder::new()
|
||||
.insert_option("append_mode", &append_mode.to_string())
|
||||
.build();
|
||||
|
||||
@@ -151,17 +151,6 @@ async fn test_engine_truncate_after_flush() {
|
||||
|
||||
// Create the region.
|
||||
let region_id = RegionId::new(1, 1);
|
||||
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
let column_schemas = rows_schema(&request);
|
||||
engine
|
||||
|
||||
@@ -871,9 +871,11 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to get schema metadata"))]
|
||||
GetSchemaMetadata {
|
||||
source: common_meta::error::Error,
|
||||
#[snafu(display("Timeout: {}", msg))]
|
||||
Timeout {
|
||||
msg: String,
|
||||
#[snafu(source)]
|
||||
error: tokio::time::error::Elapsed,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
@@ -1009,7 +1011,7 @@ impl ErrorExt for Error {
|
||||
| ApplyFulltextIndex { source, .. } => source.status_code(),
|
||||
DecodeStats { .. } | StatsNotPresent { .. } => StatusCode::Internal,
|
||||
RegionBusy { .. } => StatusCode::RegionBusy,
|
||||
GetSchemaMetadata { source, .. } => source.status_code(),
|
||||
Timeout { .. } => StatusCode::Cancelled,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -321,6 +321,7 @@ impl RegionFlushTask {
|
||||
|
||||
let mut write_opts = WriteOptions {
|
||||
write_buffer_size: self.engine_config.sst_write_buffer_size,
|
||||
compression_method: self.engine_config.compression_method,
|
||||
..Default::default()
|
||||
};
|
||||
if let Some(row_group_size) = self.row_group_size {
|
||||
|
||||
@@ -26,6 +26,8 @@ pub const FLUSH_REASON: &str = "reason";
|
||||
pub const FILE_TYPE_LABEL: &str = "file_type";
|
||||
/// Region worker id label.
|
||||
pub const WORKER_LABEL: &str = "worker";
|
||||
/// Partition label.
|
||||
pub const PARTITION_LABEL: &str = "partition";
|
||||
|
||||
lazy_static! {
|
||||
/// Global write buffer size in bytes.
|
||||
@@ -134,6 +136,14 @@ lazy_static! {
|
||||
)
|
||||
.unwrap();
|
||||
pub static ref READ_STAGE_FETCH_PAGES: Histogram = READ_STAGE_ELAPSED.with_label_values(&["fetch_pages"]);
|
||||
pub static ref READ_STAGE_BUILD_PAGE_READER: Histogram = READ_STAGE_ELAPSED.with_label_values(&["build_page_reader"]);
|
||||
/// In progress scan for each partition.
|
||||
pub static ref SCAN_PARTITION: IntGaugeVec = register_int_gauge_vec!(
|
||||
"greptime_mito_scan_partition",
|
||||
"mito partitions scanning",
|
||||
&[TYPE_LABEL, PARTITION_LABEL]
|
||||
)
|
||||
.unwrap();
|
||||
/// Counter of rows read from different source.
|
||||
pub static ref READ_ROWS_TOTAL: IntCounterVec =
|
||||
register_int_counter_vec!("greptime_mito_read_rows_total", "mito read rows total", &[TYPE_LABEL]).unwrap();
|
||||
|
||||
@@ -493,7 +493,7 @@ impl Batch {
|
||||
}
|
||||
|
||||
/// Checks the batch is monotonic by timestamps.
|
||||
#[cfg(debug_assertions)]
|
||||
// #[cfg(debug_assertions)]
|
||||
pub(crate) fn check_monotonic(&self) -> Result<(), String> {
|
||||
use std::cmp::Ordering;
|
||||
if self.timestamps_native().is_none() {
|
||||
@@ -501,12 +501,12 @@ impl Batch {
|
||||
}
|
||||
|
||||
let timestamps = self.timestamps_native().unwrap();
|
||||
let sequences = self.sequences.as_arrow().values();
|
||||
// let sequences = self.sequences.as_arrow().values();
|
||||
for (i, window) in timestamps.windows(2).enumerate() {
|
||||
let current = window[0];
|
||||
let next = window[1];
|
||||
let current_sequence = sequences[i];
|
||||
let next_sequence = sequences[i + 1];
|
||||
// let current_sequence = sequences[i];
|
||||
// let next_sequence = sequences[i + 1];
|
||||
match current.cmp(&next) {
|
||||
Ordering::Less => {
|
||||
// The current timestamp is less than the next timestamp.
|
||||
@@ -514,12 +514,12 @@ impl Batch {
|
||||
}
|
||||
Ordering::Equal => {
|
||||
// The current timestamp is equal to the next timestamp.
|
||||
if current_sequence < next_sequence {
|
||||
return Err(format!(
|
||||
"sequence are not monotonic: ts {} == {} but current sequence {} < {}, index: {}",
|
||||
current, next, current_sequence, next_sequence, i
|
||||
));
|
||||
}
|
||||
// if current_sequence < next_sequence {
|
||||
// return Err(format!(
|
||||
// "sequence are not monotonic: ts {} == {} but current sequence {} < {}, index: {}",
|
||||
// current, next, current_sequence, next_sequence, i
|
||||
// ));
|
||||
// }
|
||||
}
|
||||
Ordering::Greater => {
|
||||
// The current timestamp is greater than the next timestamp.
|
||||
@@ -535,7 +535,7 @@ impl Batch {
|
||||
}
|
||||
|
||||
/// Returns Ok if the given batch is behind the current batch.
|
||||
#[cfg(debug_assertions)]
|
||||
// #[cfg(debug_assertions)]
|
||||
pub(crate) fn check_next_batch(&self, other: &Batch) -> Result<(), String> {
|
||||
// Checks the primary key
|
||||
if self.primary_key() < other.primary_key() {
|
||||
@@ -560,19 +560,20 @@ impl Batch {
|
||||
));
|
||||
}
|
||||
// Checks the sequence.
|
||||
if self.last_sequence() >= other.first_sequence() {
|
||||
return Ok(());
|
||||
}
|
||||
Err(format!(
|
||||
"sequences are not monotonic: {:?} < {:?}",
|
||||
self.last_sequence(),
|
||||
other.first_sequence()
|
||||
))
|
||||
Ok(())
|
||||
// if self.last_sequence() >= other.first_sequence() {
|
||||
// return Ok(());
|
||||
// }
|
||||
// Err(format!(
|
||||
// "sequences are not monotonic: {:?} < {:?}",
|
||||
// self.last_sequence(),
|
||||
// other.first_sequence()
|
||||
// ))
|
||||
}
|
||||
}
|
||||
|
||||
/// A struct to check the batch is monotonic.
|
||||
#[cfg(debug_assertions)]
|
||||
// #[cfg(debug_assertions)]
|
||||
#[derive(Default)]
|
||||
pub(crate) struct BatchChecker {
|
||||
last_batch: Option<Batch>,
|
||||
@@ -580,7 +581,7 @@ pub(crate) struct BatchChecker {
|
||||
end: Option<Timestamp>,
|
||||
}
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
// #[cfg(debug_assertions)]
|
||||
impl BatchChecker {
|
||||
/// Attaches the given start timestamp to the checker.
|
||||
pub(crate) fn with_start(mut self, start: Option<Timestamp>) -> Self {
|
||||
|
||||
@@ -169,7 +169,7 @@ pub(crate) struct ScanRegion {
|
||||
/// Cache.
|
||||
cache_manager: Option<CacheManagerRef>,
|
||||
/// Parallelism to scan.
|
||||
parallelism: ScanParallelism,
|
||||
parallelism: ScanParallism,
|
||||
/// Whether to ignore inverted index.
|
||||
ignore_inverted_index: bool,
|
||||
/// Whether to ignore fulltext index.
|
||||
@@ -191,7 +191,7 @@ impl ScanRegion {
|
||||
access_layer,
|
||||
request,
|
||||
cache_manager,
|
||||
parallelism: ScanParallelism::default(),
|
||||
parallelism: ScanParallism::default(),
|
||||
ignore_inverted_index: false,
|
||||
ignore_fulltext_index: false,
|
||||
start_time: None,
|
||||
@@ -200,7 +200,7 @@ impl ScanRegion {
|
||||
|
||||
/// Sets parallelism.
|
||||
#[must_use]
|
||||
pub(crate) fn with_parallelism(mut self, parallelism: ScanParallelism) -> Self {
|
||||
pub(crate) fn with_parallelism(mut self, parallelism: ScanParallism) -> Self {
|
||||
self.parallelism = parallelism;
|
||||
self
|
||||
}
|
||||
@@ -447,7 +447,7 @@ impl ScanRegion {
|
||||
|
||||
/// Config for parallel scan.
|
||||
#[derive(Debug, Clone, Copy, Default)]
|
||||
pub(crate) struct ScanParallelism {
|
||||
pub(crate) struct ScanParallism {
|
||||
/// Number of tasks expect to spawn to read data.
|
||||
pub(crate) parallelism: usize,
|
||||
/// Channel size to send batches. Only takes effect when the parallelism > 1.
|
||||
@@ -484,7 +484,7 @@ pub(crate) struct ScanInput {
|
||||
/// Ignores file not found error.
|
||||
ignore_file_not_found: bool,
|
||||
/// Parallelism to scan data.
|
||||
pub(crate) parallelism: ScanParallelism,
|
||||
pub(crate) parallelism: ScanParallism,
|
||||
/// Index appliers.
|
||||
inverted_index_applier: Option<InvertedIndexApplierRef>,
|
||||
fulltext_index_applier: Option<FulltextIndexApplierRef>,
|
||||
@@ -513,7 +513,7 @@ impl ScanInput {
|
||||
files: Vec::new(),
|
||||
cache_manager: None,
|
||||
ignore_file_not_found: false,
|
||||
parallelism: ScanParallelism::default(),
|
||||
parallelism: ScanParallism::default(),
|
||||
inverted_index_applier: None,
|
||||
fulltext_index_applier: None,
|
||||
query_start: None,
|
||||
@@ -568,7 +568,7 @@ impl ScanInput {
|
||||
|
||||
/// Sets scan parallelism.
|
||||
#[must_use]
|
||||
pub(crate) fn with_parallelism(mut self, parallelism: ScanParallelism) -> Self {
|
||||
pub(crate) fn with_parallelism(mut self, parallelism: ScanParallism) -> Self {
|
||||
self.parallelism = parallelism;
|
||||
self
|
||||
}
|
||||
@@ -662,10 +662,17 @@ impl ScanInput {
|
||||
/// Prunes a file to scan and returns the builder to build readers.
|
||||
async fn prune_file(
|
||||
&self,
|
||||
row_group_index: RowGroupIndex,
|
||||
file_index: usize,
|
||||
reader_metrics: &mut ReaderMetrics,
|
||||
) -> Result<FileRangeBuilder> {
|
||||
let file = &self.files[file_index];
|
||||
common_telemetry::info!(
|
||||
"ScanInput prune file start, region_id: {}, file: {}, row_group_index: {:?}",
|
||||
file.region_id(),
|
||||
file.file_id(),
|
||||
row_group_index,
|
||||
);
|
||||
let res = self
|
||||
.access_layer
|
||||
.read_sst(file.clone())
|
||||
@@ -701,6 +708,13 @@ impl ScanInput {
|
||||
)?;
|
||||
file_range_ctx.set_compat_batch(Some(compat));
|
||||
}
|
||||
common_telemetry::info!(
|
||||
"ScanInput prune file end, region_id: {}, file: {}, row_groups_num: {}, row_group_index: {:?}",
|
||||
file.region_id(),
|
||||
file.file_id(),
|
||||
row_groups.len(),
|
||||
row_group_index,
|
||||
);
|
||||
Ok(FileRangeBuilder {
|
||||
context: Some(Arc::new(file_range_ctx)),
|
||||
row_groups,
|
||||
@@ -821,11 +835,12 @@ impl StreamContext {
|
||||
pub(crate) async fn build_file_ranges(
|
||||
&self,
|
||||
index: RowGroupIndex,
|
||||
read_type: &'static str,
|
||||
reader_metrics: &mut ReaderMetrics,
|
||||
) -> Result<SmallVec<[FileRange; 2]>> {
|
||||
let mut ranges = SmallVec::new();
|
||||
self.range_builders
|
||||
.build_file_ranges(&self.input, index, &mut ranges, reader_metrics)
|
||||
.build_file_ranges(&self.input, index, read_type, &mut ranges, reader_metrics)
|
||||
.await?;
|
||||
Ok(ranges)
|
||||
}
|
||||
@@ -896,19 +911,52 @@ impl RangeBuilderList {
|
||||
&self,
|
||||
input: &ScanInput,
|
||||
index: RowGroupIndex,
|
||||
read_type: &'static str,
|
||||
ranges: &mut SmallVec<[FileRange; 2]>,
|
||||
reader_metrics: &mut ReaderMetrics,
|
||||
) -> Result<()> {
|
||||
let file_index = index.index - self.mem_builders.len();
|
||||
if read_type == "unordered_scan_files" {
|
||||
common_telemetry::debug!(
|
||||
"[DEBUG_SCAN] RangeBuilderList build ranges start, region_id: {}, row_group_index: {:?}",
|
||||
input.mapper.metadata().region_id,
|
||||
index,
|
||||
);
|
||||
}
|
||||
let mut builder_opt = self.file_builders[file_index].lock().await;
|
||||
match &mut *builder_opt {
|
||||
Some(builder) => builder.build_ranges(index.row_group_index, ranges),
|
||||
Some(builder) => {
|
||||
if read_type == "unordered_scan_files" {
|
||||
common_telemetry::debug!(
|
||||
"[DEBUG_SCAN] RangeBuilderList build ranges get lock, build ranges, region_id: {}, row_group_index: {:?}",
|
||||
input.mapper.metadata().region_id,
|
||||
index,
|
||||
);
|
||||
}
|
||||
builder.build_ranges(index.row_group_index, ranges)
|
||||
}
|
||||
None => {
|
||||
let builder = input.prune_file(file_index, reader_metrics).await?;
|
||||
if read_type == "unordered_scan_files" {
|
||||
common_telemetry::debug!(
|
||||
"[DEBUG_SCAN] RangeBuilderList build ranges get lock, build builder, region_id: {}, row_group_index: {:?}",
|
||||
input.mapper.metadata().region_id,
|
||||
index,
|
||||
);
|
||||
}
|
||||
let builder = input.prune_file(index, file_index, reader_metrics).await?;
|
||||
builder.build_ranges(index.row_group_index, ranges);
|
||||
*builder_opt = Some(builder);
|
||||
}
|
||||
}
|
||||
|
||||
if read_type == "unordered_scan_files" {
|
||||
common_telemetry::debug!(
|
||||
"[DEBUG_SCAN] RangeBuilderList build ranges end, region_id: {}, row_group_index: {:?}, ranges: {}",
|
||||
input.mapper.metadata().region_id,
|
||||
index,
|
||||
ranges.len(),
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -20,15 +20,21 @@ use std::time::{Duration, Instant};
|
||||
use async_stream::try_stream;
|
||||
use common_telemetry::debug;
|
||||
use futures::Stream;
|
||||
use prometheus::IntGauge;
|
||||
use snafu::ResultExt;
|
||||
use store_api::storage::RegionId;
|
||||
use tokio::task::yield_now;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::error::{Result, TimeoutSnafu};
|
||||
use crate::metrics::SCAN_PARTITION;
|
||||
use crate::read::range::RowGroupIndex;
|
||||
use crate::read::scan_region::StreamContext;
|
||||
use crate::read::{Batch, ScannerMetrics, Source};
|
||||
use crate::sst::file::FileTimeRange;
|
||||
use crate::sst::parquet::reader::ReaderMetrics;
|
||||
|
||||
const BUILD_RANGES_TIMEOUT: Duration = Duration::from_secs(60 * 5);
|
||||
|
||||
struct PartitionMetricsInner {
|
||||
region_id: RegionId,
|
||||
/// Index of the partition to scan.
|
||||
@@ -41,6 +47,7 @@ struct PartitionMetricsInner {
|
||||
first_poll: Duration,
|
||||
metrics: ScannerMetrics,
|
||||
reader_metrics: ReaderMetrics,
|
||||
scan_partition_gauge: IntGauge,
|
||||
}
|
||||
|
||||
impl PartitionMetricsInner {
|
||||
@@ -56,6 +63,7 @@ impl Drop for PartitionMetricsInner {
|
||||
fn drop(&mut self) {
|
||||
self.on_finish();
|
||||
self.metrics.observe_metrics();
|
||||
self.scan_partition_gauge.dec();
|
||||
|
||||
debug!(
|
||||
"{} finished, region_id: {}, partition: {}, first_poll: {:?}, metrics: {:?}, reader_metrics: {:?}",
|
||||
@@ -76,6 +84,10 @@ impl PartitionMetrics {
|
||||
query_start: Instant,
|
||||
metrics: ScannerMetrics,
|
||||
) -> Self {
|
||||
let partition_str = partition.to_string();
|
||||
let scan_partition_gauge =
|
||||
SCAN_PARTITION.with_label_values(&[scanner_type, &partition_str]);
|
||||
scan_partition_gauge.inc();
|
||||
let inner = PartitionMetricsInner {
|
||||
region_id,
|
||||
partition,
|
||||
@@ -84,10 +96,15 @@ impl PartitionMetrics {
|
||||
first_poll: Duration::default(),
|
||||
metrics,
|
||||
reader_metrics: ReaderMetrics::default(),
|
||||
scan_partition_gauge,
|
||||
};
|
||||
Self(Arc::new(Mutex::new(inner)))
|
||||
}
|
||||
|
||||
pub(crate) fn partition(&self) -> usize {
|
||||
self.0.lock().unwrap().partition
|
||||
}
|
||||
|
||||
pub(crate) fn on_first_poll(&self) {
|
||||
let mut inner = self.0.lock().unwrap();
|
||||
inner.first_poll = inner.query_start.elapsed();
|
||||
@@ -126,6 +143,7 @@ impl PartitionMetrics {
|
||||
|
||||
/// Scans memtable ranges at `index`.
|
||||
pub(crate) fn scan_mem_ranges(
|
||||
partition: usize,
|
||||
stream_ctx: Arc<StreamContext>,
|
||||
part_metrics: PartitionMetrics,
|
||||
index: RowGroupIndex,
|
||||
@@ -137,7 +155,17 @@ pub(crate) fn scan_mem_ranges(
|
||||
for range in ranges {
|
||||
let build_reader_start = Instant::now();
|
||||
let iter = range.build_iter(time_range)?;
|
||||
part_metrics.inc_build_reader_cost(build_reader_start.elapsed());
|
||||
let build_cost = build_reader_start.elapsed();
|
||||
part_metrics.inc_build_reader_cost(build_cost);
|
||||
common_telemetry::debug!(
|
||||
"Thread: {:?}, Scan mem range, region_id: {}, partition: {}, time_range: {:?}, index: {:?}, build_cost: {:?}",
|
||||
std::thread::current().id(),
|
||||
stream_ctx.input.mapper.metadata().region_id,
|
||||
partition,
|
||||
time_range,
|
||||
index,
|
||||
build_cost
|
||||
);
|
||||
|
||||
let mut source = Source::Iter(iter);
|
||||
while let Some(batch) = source.next_batch().await? {
|
||||
@@ -149,6 +177,7 @@ pub(crate) fn scan_mem_ranges(
|
||||
|
||||
/// Scans file ranges at `index`.
|
||||
pub(crate) fn scan_file_ranges(
|
||||
partition: usize,
|
||||
stream_ctx: Arc<StreamContext>,
|
||||
part_metrics: PartitionMetrics,
|
||||
index: RowGroupIndex,
|
||||
@@ -156,14 +185,70 @@ pub(crate) fn scan_file_ranges(
|
||||
) -> impl Stream<Item = Result<Batch>> {
|
||||
try_stream! {
|
||||
let mut reader_metrics = ReaderMetrics::default();
|
||||
let ranges = stream_ctx
|
||||
.build_file_ranges(index, &mut reader_metrics)
|
||||
.await?;
|
||||
if read_type == "unordered_scan_files" {
|
||||
common_telemetry::debug!(
|
||||
"[DEBUG_SCAN] Thread: {:?}, Scan file ranges build ranges start, region_id: {}, partition: {}, index: {:?}",
|
||||
std::thread::current().id(),
|
||||
stream_ctx.input.mapper.metadata().region_id,
|
||||
partition,
|
||||
index,
|
||||
);
|
||||
}
|
||||
let ranges = tokio::time::timeout(
|
||||
BUILD_RANGES_TIMEOUT,
|
||||
stream_ctx.build_file_ranges(index, read_type, &mut reader_metrics),
|
||||
)
|
||||
.await
|
||||
.with_context(|_| TimeoutSnafu {
|
||||
msg: format!(
|
||||
"build file ranges for {}, partition: {}",
|
||||
stream_ctx.input.mapper.metadata().region_id,
|
||||
partition,
|
||||
),
|
||||
})
|
||||
.inspect_err(|e| {
|
||||
common_telemetry::error!(
|
||||
e; "Thread: {:?}, Scan file ranges build ranges timeout, region_id: {}, partition: {}, index: {:?}",
|
||||
std::thread::current().id(),
|
||||
stream_ctx.input.mapper.metadata().region_id,
|
||||
partition,
|
||||
index,
|
||||
);
|
||||
})??;
|
||||
// let ranges = stream_ctx
|
||||
// .build_file_ranges(index, read_type, &mut reader_metrics)
|
||||
// .await?;
|
||||
part_metrics.inc_num_file_ranges(ranges.len());
|
||||
|
||||
// Notify other partitions.
|
||||
yield_now().await;
|
||||
|
||||
if read_type == "unordered_scan_files" {
|
||||
common_telemetry::debug!(
|
||||
"[DEBUG_SCAN] Thread: {:?}, Scan file ranges build ranges end, region_id: {}, partition: {}, index: {:?}, ranges: {}",
|
||||
std::thread::current().id(),
|
||||
stream_ctx.input.mapper.metadata().region_id,
|
||||
partition,
|
||||
index,
|
||||
ranges.len(),
|
||||
);
|
||||
}
|
||||
for range in ranges {
|
||||
let build_reader_start = Instant::now();
|
||||
let reader = range.reader(None).await?;
|
||||
part_metrics.inc_build_reader_cost(build_reader_start.elapsed());
|
||||
let build_cost = build_reader_start.elapsed();
|
||||
part_metrics.inc_build_reader_cost(build_cost);
|
||||
if read_type == "unordered_scan_files" {
|
||||
common_telemetry::debug!(
|
||||
"[DEBUG_SCAN] Thread: {:?}, Scan file range, region_id: {}, partition: {}, file_id: {}, index: {:?}, build_cost: {:?}",
|
||||
std::thread::current().id(),
|
||||
stream_ctx.input.mapper.metadata().region_id,
|
||||
partition,
|
||||
range.file_handle().file_id(),
|
||||
index,
|
||||
build_cost
|
||||
);
|
||||
}
|
||||
let compat_batch = range.compat_batch();
|
||||
let mut source = Source::PruneReader(reader);
|
||||
while let Some(mut batch) = source.next_batch().await? {
|
||||
|
||||
@@ -361,6 +361,7 @@ fn build_sources(
|
||||
for index in &range_meta.row_group_indices {
|
||||
let stream = if stream_ctx.is_mem_range_index(*index) {
|
||||
let stream = scan_mem_ranges(
|
||||
part_metrics.partition(),
|
||||
stream_ctx.clone(),
|
||||
part_metrics.clone(),
|
||||
*index,
|
||||
@@ -373,8 +374,13 @@ fn build_sources(
|
||||
} else {
|
||||
"seq_scan_files"
|
||||
};
|
||||
let stream =
|
||||
scan_file_ranges(stream_ctx.clone(), part_metrics.clone(), *index, read_type);
|
||||
let stream = scan_file_ranges(
|
||||
part_metrics.partition(),
|
||||
stream_ctx.clone(),
|
||||
part_metrics.clone(),
|
||||
*index,
|
||||
read_type,
|
||||
);
|
||||
Box::pin(stream) as _
|
||||
};
|
||||
sources.push(Source::Stream(stream));
|
||||
|
||||
@@ -81,6 +81,7 @@ impl UnorderedScan {
|
||||
|
||||
/// Scans a [PartitionRange] by its `identifier` and returns a stream.
|
||||
fn scan_partition_range(
|
||||
partition: usize,
|
||||
stream_ctx: Arc<StreamContext>,
|
||||
part_range_id: usize,
|
||||
part_metrics: PartitionMetrics,
|
||||
@@ -90,12 +91,12 @@ impl UnorderedScan {
|
||||
let range_meta = &stream_ctx.ranges[part_range_id];
|
||||
for index in &range_meta.row_group_indices {
|
||||
if stream_ctx.is_mem_range_index(*index) {
|
||||
let stream = scan_mem_ranges(stream_ctx.clone(), part_metrics.clone(), *index, range_meta.time_range);
|
||||
let stream = scan_mem_ranges(partition, stream_ctx.clone(), part_metrics.clone(), *index, range_meta.time_range);
|
||||
for await batch in stream {
|
||||
yield batch;
|
||||
}
|
||||
} else {
|
||||
let stream = scan_file_ranges(stream_ctx.clone(), part_metrics.clone(), *index, "unordered_scan_files");
|
||||
let stream = scan_file_ranges(partition, stream_ctx.clone(), part_metrics.clone(), *index, "unordered_scan_files");
|
||||
for await batch in stream {
|
||||
yield batch;
|
||||
}
|
||||
@@ -132,24 +133,45 @@ impl UnorderedScan {
|
||||
let part_ranges = self.properties.partitions[partition].clone();
|
||||
let distinguish_range = self.properties.distinguish_partition_range();
|
||||
|
||||
common_telemetry::info!(
|
||||
"[DEBUG_SCAN] Thread: {:?}, Unordered scan start, region_id: {}, partition: {}, num_ranges: {}, part_ranges: {:?}",
|
||||
std::thread::current().id(),
|
||||
stream_ctx.input.mapper.metadata().region_id,
|
||||
partition,
|
||||
part_ranges.len(),
|
||||
part_ranges,
|
||||
);
|
||||
|
||||
let stream = try_stream! {
|
||||
part_metrics.on_first_poll();
|
||||
|
||||
let cache = stream_ctx.input.cache_manager.as_deref();
|
||||
let ranges_len = part_ranges.len();
|
||||
// Scans each part.
|
||||
for part_range in part_ranges {
|
||||
let mut metrics = ScannerMetrics::default();
|
||||
let mut fetch_start = Instant::now();
|
||||
#[cfg(debug_assertions)]
|
||||
for (part_idx, part_range) in part_ranges.into_iter().enumerate() {
|
||||
common_telemetry::debug!(
|
||||
"[DEBUG_SCAN] Thread: {:?}, Unordered scan range start {}/{}, region_id: {}, partition: {}, part_range: {:?}, range_meta: {:?}",
|
||||
std::thread::current().id(),
|
||||
part_idx,
|
||||
ranges_len,
|
||||
stream_ctx.input.mapper.metadata().region_id,
|
||||
partition,
|
||||
part_range,
|
||||
stream_ctx.ranges[part_range.identifier]
|
||||
);
|
||||
// #[cfg(debug_assertions)]
|
||||
let mut checker = crate::read::BatchChecker::default()
|
||||
.with_start(Some(part_range.start))
|
||||
.with_end(Some(part_range.end));
|
||||
|
||||
let stream = Self::scan_partition_range(
|
||||
partition,
|
||||
stream_ctx.clone(),
|
||||
part_range.identifier,
|
||||
part_metrics.clone(),
|
||||
);
|
||||
let mut metrics = ScannerMetrics::default();
|
||||
let mut fetch_start = Instant::now();
|
||||
for await batch in stream {
|
||||
let batch = batch.map_err(BoxedError::new).context(ExternalSnafu)?;
|
||||
metrics.scan_cost += fetch_start.elapsed();
|
||||
@@ -161,7 +183,7 @@ impl UnorderedScan {
|
||||
continue;
|
||||
}
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
// #[cfg(debug_assertions)]
|
||||
checker.ensure_part_range_batch(
|
||||
"UnorderedScan",
|
||||
stream_ctx.input.mapper.metadata().region_id,
|
||||
@@ -188,7 +210,20 @@ impl UnorderedScan {
|
||||
metrics.yield_cost += yield_start.elapsed();
|
||||
}
|
||||
|
||||
metrics.scan_cost += fetch_start.elapsed();
|
||||
let scan_cost = fetch_start.elapsed();
|
||||
metrics.scan_cost += scan_cost;
|
||||
common_telemetry::debug!(
|
||||
"[DEBUG_SCAN] Thread: {:?}, Unordered scan range end {}/{}, region_id: {}, partition: {}, part_range: {:?}, scan_cost: {:?}, yieid_cost: {:?}, num_rows: {}",
|
||||
std::thread::current().id(),
|
||||
part_idx,
|
||||
ranges_len,
|
||||
stream_ctx.input.mapper.metadata().region_id,
|
||||
partition,
|
||||
part_range,
|
||||
metrics.scan_cost,
|
||||
metrics.yield_cost,
|
||||
metrics.num_rows,
|
||||
);
|
||||
part_metrics.merge_metrics(&metrics);
|
||||
}
|
||||
|
||||
|
||||
@@ -106,6 +106,10 @@ impl InvertedIndexApplier {
|
||||
if let Err(err) = other {
|
||||
warn!(err; "An unexpected error occurred while reading the cached index file. Fallback to remote index file.")
|
||||
}
|
||||
common_telemetry::debug!(
|
||||
"Inverted applier get from remote blob reader, file_id: {}",
|
||||
file_id,
|
||||
);
|
||||
self.remote_blob_reader(file_id).await?
|
||||
}
|
||||
};
|
||||
|
||||
@@ -19,6 +19,7 @@ use std::sync::Arc;
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use parquet::file::metadata::ParquetMetaData;
|
||||
|
||||
use crate::config::CompressionMethod;
|
||||
use crate::sst::file::FileTimeRange;
|
||||
use crate::sst::index::IndexOutput;
|
||||
use crate::sst::DEFAULT_WRITE_BUFFER_SIZE;
|
||||
@@ -49,6 +50,8 @@ pub struct WriteOptions {
|
||||
pub write_buffer_size: ReadableSize,
|
||||
/// Row group size.
|
||||
pub row_group_size: usize,
|
||||
/// Compression method.
|
||||
pub compression_method: CompressionMethod,
|
||||
}
|
||||
|
||||
impl Default for WriteOptions {
|
||||
@@ -56,6 +59,7 @@ impl Default for WriteOptions {
|
||||
WriteOptions {
|
||||
write_buffer_size: DEFAULT_WRITE_BUFFER_SIZE,
|
||||
row_group_size: DEFAULT_ROW_GROUP_SIZE,
|
||||
compression_method: CompressionMethod::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -193,6 +193,12 @@ impl ParquetReaderBuilder {
|
||||
let file_size = self.file_handle.meta_ref().file_size;
|
||||
// Loads parquet metadata of the file.
|
||||
let parquet_meta = self.read_parquet_metadata(&file_path, file_size).await?;
|
||||
common_telemetry::debug!(
|
||||
"Parquet read metadata done, region_id: {}, file_id: {}, elapsed: {:?}",
|
||||
self.file_handle.region_id(),
|
||||
self.file_handle.file_id(),
|
||||
start.elapsed(),
|
||||
);
|
||||
// Decodes region metadata.
|
||||
let key_value_meta = parquet_meta.file_metadata().key_value_metadata();
|
||||
// Gets the metadata stored in the SST.
|
||||
@@ -476,6 +482,12 @@ impl ParquetReaderBuilder {
|
||||
return false;
|
||||
}
|
||||
|
||||
common_telemetry::debug!(
|
||||
"Parquet prune by inverted index start, region_id: {}, file_id: {}",
|
||||
self.file_handle.region_id(),
|
||||
self.file_handle.file_id(),
|
||||
);
|
||||
|
||||
let apply_output = match index_applier.apply(self.file_handle.file_id()).await {
|
||||
Ok(output) => output,
|
||||
Err(err) => {
|
||||
@@ -497,6 +509,12 @@ impl ParquetReaderBuilder {
|
||||
}
|
||||
};
|
||||
|
||||
common_telemetry::debug!(
|
||||
"Parquet prune by inverted index stop, region_id: {}, file_id: {}",
|
||||
self.file_handle.region_id(),
|
||||
self.file_handle.file_id(),
|
||||
);
|
||||
|
||||
let segment_row_count = apply_output.segment_row_count;
|
||||
let grouped_in_row_groups = apply_output
|
||||
.matched_segment_ids
|
||||
|
||||
@@ -33,7 +33,7 @@ use tokio::task::yield_now;
|
||||
|
||||
use crate::cache::file_cache::{FileType, IndexKey};
|
||||
use crate::cache::{CacheManagerRef, PageKey, PageValue};
|
||||
use crate::metrics::{READ_STAGE_ELAPSED, READ_STAGE_FETCH_PAGES};
|
||||
use crate::metrics::{READ_STAGE_BUILD_PAGE_READER, READ_STAGE_ELAPSED, READ_STAGE_FETCH_PAGES};
|
||||
use crate::sst::file::FileId;
|
||||
use crate::sst::parquet::helper::fetch_byte_ranges;
|
||||
use crate::sst::parquet::page_reader::RowGroupCachedReader;
|
||||
@@ -308,6 +308,7 @@ impl<'a> InMemoryRowGroup<'a> {
|
||||
|
||||
/// Creates a page reader to read column at `i`.
|
||||
fn column_page_reader(&self, i: usize) -> Result<Box<dyn PageReader>> {
|
||||
let _timer = READ_STAGE_BUILD_PAGE_READER.start_timer();
|
||||
if let Some(cached_pages) = &self.column_uncompressed_pages[i] {
|
||||
debug_assert!(!cached_pages.row_group.is_empty());
|
||||
// Hits the row group level page cache.
|
||||
|
||||
@@ -34,6 +34,7 @@ use store_api::storage::consts::SEQUENCE_COLUMN_NAME;
|
||||
use tokio::io::AsyncWrite;
|
||||
use tokio_util::compat::{Compat, FuturesAsyncWriteCompatExt};
|
||||
|
||||
use crate::config::CompressionMethod;
|
||||
use crate::error::{InvalidMetadataSnafu, OpenDalSnafu, Result, WriteParquetSnafu};
|
||||
use crate::read::{Batch, Source};
|
||||
use crate::sst::index::Indexer;
|
||||
@@ -217,9 +218,14 @@ where
|
||||
let key_value_meta = KeyValue::new(PARQUET_METADATA_KEY.to_string(), json);
|
||||
|
||||
// TODO(yingwen): Find and set proper column encoding for internal columns: op type and tsid.
|
||||
let compression = match opts.compression_method {
|
||||
CompressionMethod::Zstd => Compression::ZSTD(ZstdLevel::default()),
|
||||
CompressionMethod::Lz4 => Compression::LZ4_RAW,
|
||||
CompressionMethod::None => Compression::UNCOMPRESSED,
|
||||
};
|
||||
let props_builder = WriterProperties::builder()
|
||||
.set_key_value_metadata(Some(vec![key_value_meta]))
|
||||
.set_compression(Compression::ZSTD(ZstdLevel::default()))
|
||||
.set_compression(compression)
|
||||
.set_encoding(Encoding::PLAIN)
|
||||
.set_max_row_group_size(opts.row_group_size);
|
||||
|
||||
|
||||
@@ -35,9 +35,6 @@ use api::v1::{OpType, Row, Rows, SemanticType};
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use common_base::Plugins;
|
||||
use common_datasource::compression::CompressionType;
|
||||
use common_meta::key::{SchemaMetadataManager, SchemaMetadataManagerRef};
|
||||
use common_meta::kv_backend::memory::MemoryKvBackend;
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
use common_telemetry::warn;
|
||||
use common_test_util::temp_dir::{create_temp_dir, TempDir};
|
||||
use common_wal::options::{KafkaWalOptions, WalOptions, WAL_OPTIONS_KEY};
|
||||
@@ -198,7 +195,6 @@ pub struct TestEnv {
|
||||
log_store: Option<LogStoreImpl>,
|
||||
log_store_factory: LogStoreFactory,
|
||||
object_store_manager: Option<ObjectStoreManagerRef>,
|
||||
schema_metadata_manager: SchemaMetadataManagerRef,
|
||||
}
|
||||
|
||||
impl Default for TestEnv {
|
||||
@@ -215,10 +211,6 @@ impl TestEnv {
|
||||
log_store: None,
|
||||
log_store_factory: LogStoreFactory::RaftEngine(RaftEngineLogStoreFactory),
|
||||
object_store_manager: None,
|
||||
schema_metadata_manager: Arc::new(SchemaMetadataManager::new(Arc::new(
|
||||
MemoryKvBackend::new(),
|
||||
)
|
||||
as KvBackendRef)),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -229,10 +221,6 @@ impl TestEnv {
|
||||
log_store: None,
|
||||
log_store_factory: LogStoreFactory::RaftEngine(RaftEngineLogStoreFactory),
|
||||
object_store_manager: None,
|
||||
schema_metadata_manager: Arc::new(SchemaMetadataManager::new(Arc::new(
|
||||
MemoryKvBackend::new(),
|
||||
)
|
||||
as KvBackendRef)),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -243,10 +231,6 @@ impl TestEnv {
|
||||
log_store: None,
|
||||
log_store_factory: LogStoreFactory::RaftEngine(RaftEngineLogStoreFactory),
|
||||
object_store_manager: None,
|
||||
schema_metadata_manager: Arc::new(SchemaMetadataManager::new(Arc::new(
|
||||
MemoryKvBackend::new(),
|
||||
)
|
||||
as KvBackendRef)),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -285,7 +269,6 @@ impl TestEnv {
|
||||
config,
|
||||
log_store,
|
||||
object_store_manager,
|
||||
self.schema_metadata_manager.clone(),
|
||||
Plugins::new(),
|
||||
)
|
||||
.await
|
||||
@@ -295,7 +278,6 @@ impl TestEnv {
|
||||
config,
|
||||
log_store,
|
||||
object_store_manager,
|
||||
self.schema_metadata_manager.clone(),
|
||||
Plugins::new(),
|
||||
)
|
||||
.await
|
||||
@@ -313,7 +295,6 @@ impl TestEnv {
|
||||
config,
|
||||
log_store,
|
||||
object_store_manager,
|
||||
self.schema_metadata_manager.clone(),
|
||||
Plugins::new(),
|
||||
)
|
||||
.await
|
||||
@@ -323,7 +304,6 @@ impl TestEnv {
|
||||
config,
|
||||
log_store,
|
||||
object_store_manager,
|
||||
self.schema_metadata_manager.clone(),
|
||||
Plugins::new(),
|
||||
)
|
||||
.await
|
||||
@@ -355,7 +335,6 @@ impl TestEnv {
|
||||
manager,
|
||||
listener,
|
||||
Arc::new(StdTimeProvider),
|
||||
self.schema_metadata_manager.clone(),
|
||||
)
|
||||
.await
|
||||
.unwrap(),
|
||||
@@ -367,7 +346,6 @@ impl TestEnv {
|
||||
manager,
|
||||
listener,
|
||||
Arc::new(StdTimeProvider),
|
||||
self.schema_metadata_manager.clone(),
|
||||
)
|
||||
.await
|
||||
.unwrap(),
|
||||
@@ -410,7 +388,6 @@ impl TestEnv {
|
||||
manager,
|
||||
listener,
|
||||
Arc::new(StdTimeProvider),
|
||||
self.schema_metadata_manager.clone(),
|
||||
)
|
||||
.await
|
||||
.unwrap(),
|
||||
@@ -422,7 +399,6 @@ impl TestEnv {
|
||||
manager,
|
||||
listener,
|
||||
Arc::new(StdTimeProvider),
|
||||
self.schema_metadata_manager.clone(),
|
||||
)
|
||||
.await
|
||||
.unwrap(),
|
||||
@@ -454,7 +430,6 @@ impl TestEnv {
|
||||
manager,
|
||||
listener,
|
||||
time_provider.clone(),
|
||||
self.schema_metadata_manager.clone(),
|
||||
)
|
||||
.await
|
||||
.unwrap(),
|
||||
@@ -466,7 +441,6 @@ impl TestEnv {
|
||||
manager,
|
||||
listener,
|
||||
time_provider.clone(),
|
||||
self.schema_metadata_manager.clone(),
|
||||
)
|
||||
.await
|
||||
.unwrap(),
|
||||
@@ -476,13 +450,13 @@ impl TestEnv {
|
||||
/// Reopen the engine.
|
||||
pub async fn reopen_engine(&mut self, engine: MitoEngine, config: MitoConfig) -> MitoEngine {
|
||||
engine.stop().await.unwrap();
|
||||
|
||||
match self.log_store.as_ref().unwrap().clone() {
|
||||
LogStoreImpl::RaftEngine(log_store) => MitoEngine::new(
|
||||
&self.data_home().display().to_string(),
|
||||
config,
|
||||
log_store,
|
||||
self.object_store_manager.clone().unwrap(),
|
||||
self.schema_metadata_manager.clone(),
|
||||
Plugins::new(),
|
||||
)
|
||||
.await
|
||||
@@ -492,7 +466,6 @@ impl TestEnv {
|
||||
config,
|
||||
log_store,
|
||||
self.object_store_manager.clone().unwrap(),
|
||||
self.schema_metadata_manager.clone(),
|
||||
Plugins::new(),
|
||||
)
|
||||
.await
|
||||
@@ -508,7 +481,6 @@ impl TestEnv {
|
||||
config,
|
||||
log_store,
|
||||
self.object_store_manager.clone().unwrap(),
|
||||
self.schema_metadata_manager.clone(),
|
||||
Plugins::new(),
|
||||
)
|
||||
.await
|
||||
@@ -518,7 +490,6 @@ impl TestEnv {
|
||||
config,
|
||||
log_store,
|
||||
self.object_store_manager.clone().unwrap(),
|
||||
self.schema_metadata_manager.clone(),
|
||||
Plugins::new(),
|
||||
)
|
||||
.await
|
||||
@@ -544,7 +515,6 @@ impl TestEnv {
|
||||
Arc::new(config),
|
||||
log_store,
|
||||
Arc::new(object_store_manager),
|
||||
self.schema_metadata_manager.clone(),
|
||||
Plugins::new(),
|
||||
)
|
||||
.await
|
||||
@@ -553,7 +523,6 @@ impl TestEnv {
|
||||
Arc::new(config),
|
||||
log_store,
|
||||
Arc::new(object_store_manager),
|
||||
self.schema_metadata_manager.clone(),
|
||||
Plugins::new(),
|
||||
)
|
||||
.await
|
||||
@@ -661,10 +630,6 @@ impl TestEnv {
|
||||
|
||||
Arc::new(write_cache)
|
||||
}
|
||||
|
||||
pub fn get_schema_metadata_manager(&self) -> SchemaMetadataManagerRef {
|
||||
self.schema_metadata_manager.clone()
|
||||
}
|
||||
}
|
||||
|
||||
/// Builder to mock a [RegionCreateRequest].
|
||||
|
||||
@@ -34,8 +34,8 @@ use crate::error::Result;
|
||||
use crate::memtable::key_values::KeyValue;
|
||||
use crate::memtable::partition_tree::data::{timestamp_array_to_i64_slice, DataBatch, DataBuffer};
|
||||
use crate::memtable::{
|
||||
BoxedBatchIterator, BulkPart, KeyValues, Memtable, MemtableBuilder, MemtableId, MemtableRange,
|
||||
MemtableRef, MemtableStats,
|
||||
BoxedBatchIterator, BulkPart, IterBuilder, KeyValues, Memtable, MemtableBuilder, MemtableId,
|
||||
MemtableRange, MemtableRef, MemtableStats,
|
||||
};
|
||||
use crate::row_converter::{McmpRowCodec, RowCodec, SortField};
|
||||
|
||||
|
||||
@@ -31,7 +31,6 @@ use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use common_base::Plugins;
|
||||
use common_meta::key::SchemaMetadataManagerRef;
|
||||
use common_runtime::JoinHandle;
|
||||
use common_telemetry::{error, info, warn};
|
||||
use futures::future::try_join_all;
|
||||
@@ -133,7 +132,6 @@ impl WorkerGroup {
|
||||
config: Arc<MitoConfig>,
|
||||
log_store: Arc<S>,
|
||||
object_store_manager: ObjectStoreManagerRef,
|
||||
schema_metadata_manager: SchemaMetadataManagerRef,
|
||||
plugins: Plugins,
|
||||
) -> Result<WorkerGroup> {
|
||||
let (flush_sender, flush_receiver) = watch::channel(());
|
||||
@@ -193,7 +191,6 @@ impl WorkerGroup {
|
||||
flush_sender: flush_sender.clone(),
|
||||
flush_receiver: flush_receiver.clone(),
|
||||
plugins: plugins.clone(),
|
||||
schema_metadata_manager: schema_metadata_manager.clone(),
|
||||
}
|
||||
.start()
|
||||
})
|
||||
@@ -276,7 +273,6 @@ impl WorkerGroup {
|
||||
object_store_manager: ObjectStoreManagerRef,
|
||||
write_buffer_manager: Option<WriteBufferManagerRef>,
|
||||
listener: Option<crate::engine::listener::EventListenerRef>,
|
||||
schema_metadata_manager: SchemaMetadataManagerRef,
|
||||
time_provider: TimeProviderRef,
|
||||
) -> Result<WorkerGroup> {
|
||||
let (flush_sender, flush_receiver) = watch::channel(());
|
||||
@@ -333,7 +329,6 @@ impl WorkerGroup {
|
||||
flush_sender: flush_sender.clone(),
|
||||
flush_receiver: flush_receiver.clone(),
|
||||
plugins: Plugins::new(),
|
||||
schema_metadata_manager: schema_metadata_manager.clone(),
|
||||
}
|
||||
.start()
|
||||
})
|
||||
@@ -410,7 +405,6 @@ struct WorkerStarter<S> {
|
||||
/// Watch channel receiver to wait for background flush job.
|
||||
flush_receiver: watch::Receiver<()>,
|
||||
plugins: Plugins,
|
||||
schema_metadata_manager: SchemaMetadataManagerRef,
|
||||
}
|
||||
|
||||
impl<S: LogStore> WorkerStarter<S> {
|
||||
@@ -461,7 +455,6 @@ impl<S: LogStore> WorkerStarter<S> {
|
||||
stalled_count: WRITE_STALL_TOTAL.with_label_values(&[&id_string]),
|
||||
region_count: REGION_COUNT.with_label_values(&[&id_string]),
|
||||
region_edit_queues: RegionEditQueues::default(),
|
||||
schema_metadata_manager: self.schema_metadata_manager,
|
||||
};
|
||||
let handle = common_runtime::spawn_global(async move {
|
||||
worker_thread.run().await;
|
||||
@@ -652,8 +645,6 @@ struct RegionWorkerLoop<S> {
|
||||
region_count: IntGauge,
|
||||
/// Queues for region edit requests.
|
||||
region_edit_queues: RegionEditQueues,
|
||||
/// Database level metadata manager.
|
||||
schema_metadata_manager: SchemaMetadataManagerRef,
|
||||
}
|
||||
|
||||
impl<S: LogStore> RegionWorkerLoop<S> {
|
||||
|
||||
@@ -44,7 +44,6 @@ impl<S> RegionWorkerLoop<S> {
|
||||
®ion.access_layer,
|
||||
sender,
|
||||
®ion.manifest_ctx,
|
||||
self.schema_metadata_manager.clone(),
|
||||
)
|
||||
.await
|
||||
{
|
||||
@@ -81,11 +80,7 @@ impl<S> RegionWorkerLoop<S> {
|
||||
|
||||
// Schedule next compaction if necessary.
|
||||
self.compaction_scheduler
|
||||
.on_compaction_finished(
|
||||
region_id,
|
||||
®ion.manifest_ctx,
|
||||
self.schema_metadata_manager.clone(),
|
||||
)
|
||||
.on_compaction_finished(region_id, ®ion.manifest_ctx)
|
||||
.await;
|
||||
}
|
||||
|
||||
@@ -112,7 +107,6 @@ impl<S> RegionWorkerLoop<S> {
|
||||
®ion.access_layer,
|
||||
OptionOutputTx::none(),
|
||||
®ion.manifest_ctx,
|
||||
self.schema_metadata_manager.clone(),
|
||||
)
|
||||
.await
|
||||
{
|
||||
|
||||
@@ -79,21 +79,23 @@ impl<S: LogStore> RegionWorkerLoop<S> {
|
||||
region_ctx.set_error(e);
|
||||
}
|
||||
}
|
||||
match wal_writer.write_to_wal().await.map_err(Arc::new) {
|
||||
Ok(response) => {
|
||||
for (region_id, region_ctx) in region_ctxs.iter_mut() {
|
||||
// Safety: the log store implementation ensures that either the `write_to_wal` fails and no
|
||||
// response is returned or the last entry ids for each region do exist.
|
||||
let last_entry_id = response.last_entry_ids.get(region_id).unwrap();
|
||||
region_ctx.set_next_entry_id(last_entry_id + 1);
|
||||
if !self.config.skip_wal {
|
||||
match wal_writer.write_to_wal().await.map_err(Arc::new) {
|
||||
Ok(response) => {
|
||||
for (region_id, region_ctx) in region_ctxs.iter_mut() {
|
||||
// Safety: the log store implementation ensures that either the `write_to_wal` fails and no
|
||||
// response is returned or the last entry ids for each region do exist.
|
||||
let last_entry_id = response.last_entry_ids.get(region_id).unwrap();
|
||||
region_ctx.set_next_entry_id(last_entry_id + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
// Failed to write wal.
|
||||
for mut region_ctx in region_ctxs.into_values() {
|
||||
region_ctx.set_error(e.clone());
|
||||
Err(e) => {
|
||||
// Failed to write wal.
|
||||
for mut region_ctx in region_ctxs.into_values() {
|
||||
region_ctx.set_error(e.clone());
|
||||
}
|
||||
return;
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -36,7 +36,6 @@ datatypes.workspace = true
|
||||
file-engine.workspace = true
|
||||
futures = "0.3"
|
||||
futures-util.workspace = true
|
||||
jsonb.workspace = true
|
||||
lazy_static.workspace = true
|
||||
meta-client.workspace = true
|
||||
meter-core.workspace = true
|
||||
|
||||
@@ -35,7 +35,6 @@ use crate::error::{
|
||||
MissingTimeIndexColumnSnafu, RequestDeletesSnafu, Result, TableNotFoundSnafu,
|
||||
};
|
||||
use crate::region_req_factory::RegionRequestFactory;
|
||||
use crate::req_convert::common::preprocess_row_delete_requests;
|
||||
use crate::req_convert::delete::{ColumnToRow, RowToRegion, TableToRegion};
|
||||
|
||||
pub struct Deleter {
|
||||
@@ -73,7 +72,6 @@ impl Deleter {
|
||||
mut requests: RowDeleteRequests,
|
||||
ctx: QueryContextRef,
|
||||
) -> Result<Output> {
|
||||
preprocess_row_delete_requests(&mut requests.deletes)?;
|
||||
// remove empty requests
|
||||
requests.deletes.retain(|req| {
|
||||
req.rows
|
||||
|
||||
@@ -770,13 +770,6 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid json text: {}", json))]
|
||||
InvalidJsonFormat {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
json: String,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -815,8 +808,7 @@ impl ErrorExt for Error {
|
||||
| Error::BuildAdminFunctionArgs { .. }
|
||||
| Error::FunctionArityMismatch { .. }
|
||||
| Error::InvalidPartition { .. }
|
||||
| Error::PhysicalExpr { .. }
|
||||
| Error::InvalidJsonFormat { .. } => StatusCode::InvalidArguments,
|
||||
| Error::PhysicalExpr { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
Error::TableAlreadyExists { .. } | Error::ViewAlreadyExists { .. } => {
|
||||
StatusCode::TableAlreadyExists
|
||||
|
||||
@@ -54,7 +54,6 @@ use crate::error::{
|
||||
};
|
||||
use crate::expr_factory::CreateExprFactory;
|
||||
use crate::region_req_factory::RegionRequestFactory;
|
||||
use crate::req_convert::common::preprocess_row_insert_requests;
|
||||
use crate::req_convert::insert::{ColumnToRow, RowToRegion, StatementToRegion, TableToRegion};
|
||||
use crate::statement::StatementExecutor;
|
||||
|
||||
@@ -120,11 +119,10 @@ impl Inserter {
|
||||
/// Handles row inserts request and creates a physical table on demand.
|
||||
pub async fn handle_row_inserts(
|
||||
&self,
|
||||
mut requests: RowInsertRequests,
|
||||
requests: RowInsertRequests,
|
||||
ctx: QueryContextRef,
|
||||
statement_executor: &StatementExecutor,
|
||||
) -> Result<Output> {
|
||||
preprocess_row_insert_requests(&mut requests.inserts)?;
|
||||
self.handle_row_inserts_with_create_type(
|
||||
requests,
|
||||
ctx,
|
||||
@@ -760,8 +758,10 @@ impl Inserter {
|
||||
ctx: &QueryContextRef,
|
||||
statement_executor: &StatementExecutor,
|
||||
) -> Result<Vec<TableRef>> {
|
||||
let catalog_name = ctx.current_catalog();
|
||||
let schema_name = ctx.current_schema();
|
||||
let res = statement_executor
|
||||
.create_logical_tables(&create_table_exprs, ctx.clone())
|
||||
.create_logical_tables(catalog_name, &schema_name, &create_table_exprs, ctx.clone())
|
||||
.await;
|
||||
|
||||
match res {
|
||||
|
||||
@@ -17,13 +17,9 @@ pub(crate) mod partitioner;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use api::helper::ColumnDataTypeWrapper;
|
||||
use api::v1::column_data_type_extension::TypeExt;
|
||||
use api::v1::column_def::options_from_column_schema;
|
||||
use api::v1::value::ValueData;
|
||||
use api::v1::{
|
||||
Column, ColumnDataType, ColumnDataTypeExtension, ColumnSchema, JsonTypeExtension, Row,
|
||||
RowDeleteRequest, RowInsertRequest, Rows, SemanticType, Value,
|
||||
};
|
||||
use api::v1::{Column, ColumnDataType, ColumnSchema, Row, Rows, SemanticType, Value};
|
||||
use common_base::BitVec;
|
||||
use datatypes::vectors::VectorRef;
|
||||
use snafu::prelude::*;
|
||||
@@ -31,77 +27,10 @@ use snafu::ResultExt;
|
||||
use table::metadata::TableInfo;
|
||||
|
||||
use crate::error::{
|
||||
ColumnDataTypeSnafu, ColumnNotFoundSnafu, InvalidInsertRequestSnafu, InvalidJsonFormatSnafu,
|
||||
MissingTimeIndexColumnSnafu, Result, UnexpectedSnafu,
|
||||
ColumnDataTypeSnafu, ColumnNotFoundSnafu, InvalidInsertRequestSnafu,
|
||||
MissingTimeIndexColumnSnafu, Result,
|
||||
};
|
||||
|
||||
/// Encodes a string value as JSONB binary data if the value is of `StringValue` type.
|
||||
fn encode_string_to_jsonb_binary(value_data: ValueData) -> Result<ValueData> {
|
||||
if let ValueData::StringValue(json) = &value_data {
|
||||
let binary = jsonb::parse_value(json.as_bytes())
|
||||
.map_err(|_| InvalidJsonFormatSnafu { json }.build())
|
||||
.map(|jsonb| jsonb.to_vec())?;
|
||||
Ok(ValueData::BinaryValue(binary))
|
||||
} else {
|
||||
UnexpectedSnafu {
|
||||
violated: "Expected to value data to be a string.",
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
}
|
||||
|
||||
/// Prepares row insertion requests by converting any JSON values to binary JSONB format.
|
||||
pub fn preprocess_row_insert_requests(requests: &mut Vec<RowInsertRequest>) -> Result<()> {
|
||||
for request in requests {
|
||||
prepare_rows(&mut request.rows)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Prepares row deletion requests by converting any JSON values to binary JSONB format.
|
||||
pub fn preprocess_row_delete_requests(requests: &mut Vec<RowDeleteRequest>) -> Result<()> {
|
||||
for request in requests {
|
||||
prepare_rows(&mut request.rows)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn prepare_rows(rows: &mut Option<Rows>) -> Result<()> {
|
||||
if let Some(rows) = rows {
|
||||
let indexes = rows
|
||||
.schema
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(idx, schema)| {
|
||||
if schema.datatype() == ColumnDataType::Json {
|
||||
Some(idx)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
for idx in &indexes {
|
||||
let column = &mut rows.schema[*idx];
|
||||
column.datatype_extension = Some(ColumnDataTypeExtension {
|
||||
type_ext: Some(TypeExt::JsonType(JsonTypeExtension::JsonBinary.into())),
|
||||
});
|
||||
column.datatype = ColumnDataType::Binary.into();
|
||||
}
|
||||
|
||||
for idx in &indexes {
|
||||
for row in &mut rows.rows {
|
||||
if let Some(value_data) = row.values[*idx].value_data.take() {
|
||||
row.values[*idx].value_data = Some(encode_string_to_jsonb_binary(value_data)?);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn columns_to_rows(columns: Vec<Column>, row_count: u32) -> Result<Rows> {
|
||||
let row_count = row_count as usize;
|
||||
let column_count = columns.len();
|
||||
|
||||
@@ -45,6 +45,7 @@ use common_time::Timestamp;
|
||||
use datafusion_expr::LogicalPlan;
|
||||
use partition::manager::{PartitionRuleManager, PartitionRuleManagerRef};
|
||||
use query::parser::QueryStatement;
|
||||
use query::stats::StatementStatistics;
|
||||
use query::QueryEngineRef;
|
||||
use session::context::{Channel, QueryContextRef};
|
||||
use session::table_name::table_idents_to_full_name;
|
||||
@@ -80,11 +81,13 @@ pub struct StatementExecutor {
|
||||
partition_manager: PartitionRuleManagerRef,
|
||||
cache_invalidator: CacheInvalidatorRef,
|
||||
inserter: InserterRef,
|
||||
stats: StatementStatistics,
|
||||
}
|
||||
|
||||
pub type StatementExecutorRef = Arc<StatementExecutor>;
|
||||
|
||||
impl StatementExecutor {
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn new(
|
||||
catalog_manager: CatalogManagerRef,
|
||||
query_engine: QueryEngineRef,
|
||||
@@ -93,6 +96,7 @@ impl StatementExecutor {
|
||||
cache_invalidator: CacheInvalidatorRef,
|
||||
inserter: InserterRef,
|
||||
table_route_cache: TableRouteCacheRef,
|
||||
stats: StatementStatistics,
|
||||
) -> Self {
|
||||
Self {
|
||||
catalog_manager,
|
||||
@@ -104,22 +108,23 @@ impl StatementExecutor {
|
||||
partition_manager: Arc::new(PartitionRuleManager::new(kv_backend, table_route_cache)),
|
||||
cache_invalidator,
|
||||
inserter,
|
||||
stats,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub async fn execute_stmt(
|
||||
&self,
|
||||
stmt: QueryStatement,
|
||||
query_ctx: QueryContextRef,
|
||||
) -> Result<Output> {
|
||||
let _slow_query_timer = self.stats.start_slow_query_timer(stmt.clone());
|
||||
match stmt {
|
||||
QueryStatement::Sql(stmt) => self.execute_sql(stmt, query_ctx).await,
|
||||
QueryStatement::Promql(_) => self.plan_exec(stmt, query_ctx).await,
|
||||
}
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub async fn execute_sql(&self, stmt: Statement, query_ctx: QueryContextRef) -> Result<Output> {
|
||||
match stmt {
|
||||
Statement::Query(_) | Statement::Explain(_) | Statement::Delete(_) => {
|
||||
@@ -356,7 +361,6 @@ impl StatementExecutor {
|
||||
Ok(Output::new_with_affected_rows(0))
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub async fn plan(
|
||||
&self,
|
||||
stmt: &QueryStatement,
|
||||
@@ -370,7 +374,6 @@ impl StatementExecutor {
|
||||
}
|
||||
|
||||
/// Execute [`LogicalPlan`] directly.
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub async fn exec_plan(&self, plan: LogicalPlan, query_ctx: QueryContextRef) -> Result<Output> {
|
||||
self.query_engine
|
||||
.execute(plan, query_ctx)
|
||||
|
||||
@@ -26,7 +26,7 @@ use common_error::ext::BoxedError;
|
||||
use common_meta::cache_invalidator::Context;
|
||||
use common_meta::ddl::ExecutorContext;
|
||||
use common_meta::instruction::CacheIdent;
|
||||
use common_meta::key::schema_name::SchemaNameKey;
|
||||
use common_meta::key::schema_name::{SchemaNameKey, SchemaNameValue};
|
||||
use common_meta::key::NAME_PATTERN;
|
||||
use common_meta::rpc::ddl::{
|
||||
CreateFlowTask, DdlTask, DropFlowTask, DropViewTask, SubmitDdlTaskRequest,
|
||||
@@ -116,21 +116,9 @@ impl StatementExecutor {
|
||||
.await
|
||||
.context(error::FindTablePartitionRuleSnafu { table_name: table })?;
|
||||
|
||||
// CREATE TABLE LIKE also inherits database level options.
|
||||
let schema_options = self
|
||||
.table_metadata_manager
|
||||
.schema_manager()
|
||||
.get(SchemaNameKey {
|
||||
catalog: &catalog,
|
||||
schema: &schema,
|
||||
})
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?;
|
||||
|
||||
let quote_style = ctx.quote_style();
|
||||
let mut create_stmt =
|
||||
create_table_stmt(&table_ref.table_info(), schema_options, quote_style)
|
||||
.context(error::ParseQuerySnafu)?;
|
||||
let mut create_stmt = create_table_stmt(&table_ref.table_info(), quote_style)
|
||||
.context(error::ParseQuerySnafu)?;
|
||||
create_stmt.name = stmt.table_name;
|
||||
create_stmt.if_not_exists = false;
|
||||
|
||||
@@ -177,8 +165,15 @@ impl StatementExecutor {
|
||||
.table_options
|
||||
.contains_key(LOGICAL_TABLE_METADATA_KEY)
|
||||
{
|
||||
let catalog_name = &create_table.catalog_name;
|
||||
let schema_name = &create_table.schema_name;
|
||||
return self
|
||||
.create_logical_tables(&[create_table.clone()], query_ctx)
|
||||
.create_logical_tables(
|
||||
catalog_name,
|
||||
schema_name,
|
||||
&[create_table.clone()],
|
||||
query_ctx,
|
||||
)
|
||||
.await?
|
||||
.into_iter()
|
||||
.next()
|
||||
@@ -188,7 +183,6 @@ impl StatementExecutor {
|
||||
}
|
||||
|
||||
let _timer = crate::metrics::DIST_CREATE_TABLE.start_timer();
|
||||
|
||||
let schema = self
|
||||
.table_metadata_manager
|
||||
.schema_manager()
|
||||
@@ -199,12 +193,12 @@ impl StatementExecutor {
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?;
|
||||
|
||||
ensure!(
|
||||
schema.is_some(),
|
||||
SchemaNotFoundSnafu {
|
||||
let Some(schema_opts) = schema else {
|
||||
return SchemaNotFoundSnafu {
|
||||
schema_info: &create_table.schema_name,
|
||||
}
|
||||
);
|
||||
.fail();
|
||||
};
|
||||
|
||||
// if table exists.
|
||||
if let Some(table) = self
|
||||
@@ -246,7 +240,7 @@ impl StatementExecutor {
|
||||
);
|
||||
|
||||
let (partitions, partition_cols) = parse_partitions(create_table, partitions, &query_ctx)?;
|
||||
let mut table_info = create_table_info(create_table, partition_cols)?;
|
||||
let mut table_info = create_table_info(create_table, partition_cols, schema_opts)?;
|
||||
|
||||
let resp = self
|
||||
.create_table_procedure(
|
||||
@@ -279,6 +273,8 @@ impl StatementExecutor {
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub async fn create_logical_tables(
|
||||
&self,
|
||||
catalog_name: &str,
|
||||
schema_name: &str,
|
||||
create_table_exprs: &[CreateTableExpr],
|
||||
query_context: QueryContextRef,
|
||||
) -> Result<Vec<TableRef>> {
|
||||
@@ -300,9 +296,19 @@ impl StatementExecutor {
|
||||
);
|
||||
}
|
||||
|
||||
let schema = self
|
||||
.table_metadata_manager
|
||||
.schema_manager()
|
||||
.get(SchemaNameKey::new(catalog_name, schema_name))
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?
|
||||
.context(SchemaNotFoundSnafu {
|
||||
schema_info: schema_name,
|
||||
})?;
|
||||
|
||||
let mut raw_tables_info = create_table_exprs
|
||||
.iter()
|
||||
.map(|create| create_table_info(create, vec![]))
|
||||
.map(|create| create_table_info(create, vec![], schema.clone()))
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
let tables_data = create_table_exprs
|
||||
.iter()
|
||||
@@ -1255,6 +1261,7 @@ fn parse_partitions(
|
||||
fn create_table_info(
|
||||
create_table: &CreateTableExpr,
|
||||
partition_columns: Vec<String>,
|
||||
schema_opts: SchemaNameValue,
|
||||
) -> Result<RawTableInfo> {
|
||||
let mut column_schemas = Vec::with_capacity(create_table.column_defs.len());
|
||||
let mut column_name_to_index_map = HashMap::new();
|
||||
@@ -1303,6 +1310,7 @@ fn create_table_info(
|
||||
|
||||
let table_options = TableOptions::try_from_iter(&create_table.table_options)
|
||||
.context(UnrecognizedTableOptionSnafu)?;
|
||||
let table_options = merge_options(table_options, schema_opts);
|
||||
|
||||
let meta = RawTableMeta {
|
||||
schema: raw_schema,
|
||||
@@ -1487,6 +1495,12 @@ fn convert_value(
|
||||
.context(ParseSqlValueSnafu)
|
||||
}
|
||||
|
||||
/// Merge table level table options with schema level table options.
|
||||
fn merge_options(mut table_opts: TableOptions, schema_opts: SchemaNameValue) -> TableOptions {
|
||||
table_opts.ttl = table_opts.ttl.or(schema_opts.ttl);
|
||||
table_opts
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use session::context::{QueryContext, QueryContextBuilder};
|
||||
|
||||
@@ -13,7 +13,6 @@
|
||||
// limitations under the License.
|
||||
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::key::schema_name::SchemaNameKey;
|
||||
use common_query::Output;
|
||||
use common_telemetry::tracing;
|
||||
use partition::manager::PartitionInfo;
|
||||
@@ -34,7 +33,7 @@ use table::TableRef;
|
||||
|
||||
use crate::error::{
|
||||
self, CatalogSnafu, ExecuteStatementSnafu, ExternalSnafu, FindViewInfoSnafu, InvalidSqlSnafu,
|
||||
Result, TableMetadataManagerSnafu, ViewInfoNotFoundSnafu, ViewNotFoundSnafu,
|
||||
Result, ViewInfoNotFoundSnafu, ViewNotFoundSnafu,
|
||||
};
|
||||
use crate::statement::StatementExecutor;
|
||||
|
||||
@@ -119,16 +118,6 @@ impl StatementExecutor {
|
||||
.fail();
|
||||
}
|
||||
|
||||
let schema_options = self
|
||||
.table_metadata_manager
|
||||
.schema_manager()
|
||||
.get(SchemaNameKey {
|
||||
catalog: &table_name.catalog_name,
|
||||
schema: &table_name.schema_name,
|
||||
})
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?;
|
||||
|
||||
let partitions = self
|
||||
.partition_manager
|
||||
.find_table_partitions(table.table_info().table_id())
|
||||
@@ -139,8 +128,7 @@ impl StatementExecutor {
|
||||
|
||||
let partitions = create_partitions_stmt(partitions)?;
|
||||
|
||||
query::sql::show_create_table(table, schema_options, partitions, query_ctx)
|
||||
.context(ExecuteStatementSnafu)
|
||||
query::sql::show_create_table(table, partitions, query_ctx).context(ExecuteStatementSnafu)
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
|
||||
@@ -274,7 +274,7 @@ impl<'a> RuleChecker<'a> {
|
||||
fn check_axis(&self) -> Result<()> {
|
||||
for (col_index, axis) in self.axis.iter().enumerate() {
|
||||
for (val, split_point) in axis {
|
||||
if split_point.less_than_counter != 0 || !split_point.is_equal {
|
||||
if !split_point.is_equal {
|
||||
UnclosedValueSnafu {
|
||||
value: format!("{val:?}"),
|
||||
column: self.rule.partition_columns[col_index].clone(),
|
||||
@@ -410,6 +410,7 @@ mod tests {
|
||||
/// b <= h b >= s
|
||||
/// ```
|
||||
#[test]
|
||||
#[ignore = "don't check unmatched `>` and `<` for now"]
|
||||
fn empty_expr_case_1() {
|
||||
// PARTITION ON COLUMNS (b) (
|
||||
// b <= 'h',
|
||||
@@ -451,6 +452,7 @@ mod tests {
|
||||
/// 10 20
|
||||
/// ```
|
||||
#[test]
|
||||
#[ignore = "don't check unmatched `>` and `<` for now"]
|
||||
fn empty_expr_case_2() {
|
||||
// PARTITION ON COLUMNS (b) (
|
||||
// a >= 100 AND b <= 10 OR a > 100 AND a <= 200 AND b <= 10 OR a >= 200 AND b > 10 AND b <= 20 OR a > 200 AND b <= 20
|
||||
@@ -580,6 +582,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore = "don't check unmatched `>` and `<` for now"]
|
||||
fn duplicate_expr_case_1() {
|
||||
// PARTITION ON COLUMNS (a) (
|
||||
// a <= 20,
|
||||
|
||||
@@ -41,7 +41,6 @@ futures.workspace = true
|
||||
greptime-proto.workspace = true
|
||||
itertools.workspace = true
|
||||
jsonb.workspace = true
|
||||
jsonpath-rust = "0.7.3"
|
||||
lazy_static.workspace = true
|
||||
moka = { workspace = true, features = ["sync"] }
|
||||
once_cell.workspace = true
|
||||
|
||||
@@ -570,18 +570,6 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
#[snafu(display("Parse json path error"))]
|
||||
JsonPathParse {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
#[snafu(source)]
|
||||
error: jsonpath_rust::JsonPathParserError,
|
||||
},
|
||||
#[snafu(display("Json path result index not number"))]
|
||||
JsonPathParseResultIndex {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
@@ -19,7 +19,6 @@ pub mod dissect;
|
||||
pub mod epoch;
|
||||
pub mod gsub;
|
||||
pub mod join;
|
||||
pub mod json_path;
|
||||
pub mod letter;
|
||||
pub mod regex;
|
||||
pub mod timestamp;
|
||||
@@ -35,7 +34,6 @@ use epoch::{EpochProcessor, EpochProcessorBuilder};
|
||||
use gsub::{GsubProcessor, GsubProcessorBuilder};
|
||||
use itertools::Itertools;
|
||||
use join::{JoinProcessor, JoinProcessorBuilder};
|
||||
use json_path::{JsonPathProcessor, JsonPathProcessorBuilder};
|
||||
use letter::{LetterProcessor, LetterProcessorBuilder};
|
||||
use regex::{RegexProcessor, RegexProcessorBuilder};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
@@ -58,8 +56,6 @@ const PATTERN_NAME: &str = "pattern";
|
||||
const PATTERNS_NAME: &str = "patterns";
|
||||
const SEPARATOR_NAME: &str = "separator";
|
||||
const TARGET_FIELDS_NAME: &str = "target_fields";
|
||||
const JSON_PATH_NAME: &str = "json_path";
|
||||
const JSON_PATH_RESULT_INDEX_NAME: &str = "result_index";
|
||||
|
||||
// const IF_NAME: &str = "if";
|
||||
// const IGNORE_FAILURE_NAME: &str = "ignore_failure";
|
||||
@@ -98,7 +94,6 @@ pub enum ProcessorKind {
|
||||
UrlEncoding(UrlEncodingProcessor),
|
||||
Epoch(EpochProcessor),
|
||||
Date(DateProcessor),
|
||||
JsonPath(JsonPathProcessor),
|
||||
}
|
||||
|
||||
/// ProcessorBuilder trait defines the interface for all processor builders
|
||||
@@ -127,7 +122,6 @@ pub enum ProcessorBuilders {
|
||||
UrlEncoding(UrlEncodingProcessorBuilder),
|
||||
Epoch(EpochProcessorBuilder),
|
||||
Date(DateProcessorBuilder),
|
||||
JsonPath(JsonPathProcessorBuilder),
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
@@ -272,9 +266,6 @@ fn parse_processor(doc: &yaml_rust::Yaml) -> Result<ProcessorBuilders> {
|
||||
urlencoding::PROCESSOR_URL_ENCODING => {
|
||||
ProcessorBuilders::UrlEncoding(UrlEncodingProcessorBuilder::try_from(value)?)
|
||||
}
|
||||
json_path::PROCESSOR_JSON_PATH => {
|
||||
ProcessorBuilders::JsonPath(json_path::JsonPathProcessorBuilder::try_from(value)?)
|
||||
}
|
||||
_ => return UnsupportedProcessorSnafu { processor: str_key }.fail(),
|
||||
};
|
||||
|
||||
|
||||
@@ -1,231 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use ahash::HashSet;
|
||||
use jsonpath_rust::JsonPath;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use super::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, Processor, ProcessorBuilder,
|
||||
FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME, JSON_PATH_NAME, JSON_PATH_RESULT_INDEX_NAME,
|
||||
};
|
||||
use crate::etl::error::{Error, Result};
|
||||
use crate::etl::field::{Fields, OneInputOneOutputField};
|
||||
use crate::etl::processor::ProcessorKind;
|
||||
use crate::etl_error::{
|
||||
JsonPathParseResultIndexSnafu, JsonPathParseSnafu, KeyMustBeStringSnafu,
|
||||
ProcessorMissingFieldSnafu,
|
||||
};
|
||||
use crate::Value;
|
||||
|
||||
pub(crate) const PROCESSOR_JSON_PATH: &str = "json_path";
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct JsonPathProcessorBuilder {
|
||||
fields: Fields,
|
||||
json_path: JsonPath<Value>,
|
||||
ignore_missing: bool,
|
||||
result_idex: Option<usize>,
|
||||
}
|
||||
|
||||
impl JsonPathProcessorBuilder {
|
||||
fn build(self, intermediate_keys: &[String]) -> Result<JsonPathProcessor> {
|
||||
let mut real_fields = vec![];
|
||||
for field in self.fields.into_iter() {
|
||||
let input = OneInputOneOutputField::build(
|
||||
JSON_PATH_NAME,
|
||||
intermediate_keys,
|
||||
field.input_field(),
|
||||
field.target_or_input_field(),
|
||||
)?;
|
||||
real_fields.push(input);
|
||||
}
|
||||
|
||||
Ok(JsonPathProcessor {
|
||||
fields: real_fields,
|
||||
json_path: self.json_path,
|
||||
ignore_missing: self.ignore_missing,
|
||||
result_idex: self.result_idex,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl ProcessorBuilder for JsonPathProcessorBuilder {
|
||||
fn output_keys(&self) -> HashSet<&str> {
|
||||
self.fields
|
||||
.iter()
|
||||
.map(|f| f.target_or_input_field())
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn input_keys(&self) -> HashSet<&str> {
|
||||
self.fields.iter().map(|f| f.input_field()).collect()
|
||||
}
|
||||
|
||||
fn build(self, intermediate_keys: &[String]) -> Result<ProcessorKind> {
|
||||
self.build(intermediate_keys).map(ProcessorKind::JsonPath)
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&yaml_rust::yaml::Hash> for JsonPathProcessorBuilder {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(value: &yaml_rust::yaml::Hash) -> std::result::Result<Self, Self::Error> {
|
||||
let mut fields = Fields::default();
|
||||
let mut ignore_missing = false;
|
||||
let mut json_path = None;
|
||||
let mut result_idex = None;
|
||||
|
||||
for (k, v) in value.iter() {
|
||||
let key = k
|
||||
.as_str()
|
||||
.with_context(|| KeyMustBeStringSnafu { k: k.clone() })?;
|
||||
match key {
|
||||
FIELD_NAME => {
|
||||
fields = Fields::one(yaml_new_field(v, FIELD_NAME)?);
|
||||
}
|
||||
FIELDS_NAME => {
|
||||
fields = yaml_new_fields(v, FIELDS_NAME)?;
|
||||
}
|
||||
|
||||
IGNORE_MISSING_NAME => {
|
||||
ignore_missing = yaml_bool(v, IGNORE_MISSING_NAME)?;
|
||||
}
|
||||
JSON_PATH_RESULT_INDEX_NAME => {
|
||||
result_idex = Some(v.as_i64().context(JsonPathParseResultIndexSnafu)? as usize);
|
||||
}
|
||||
|
||||
JSON_PATH_NAME => {
|
||||
let json_path_str = yaml_string(v, JSON_PATH_NAME)?;
|
||||
json_path = Some(
|
||||
JsonPath::try_from(json_path_str.as_str()).context(JsonPathParseSnafu)?,
|
||||
);
|
||||
}
|
||||
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
if let Some(json_path) = json_path {
|
||||
let processor = JsonPathProcessorBuilder {
|
||||
fields,
|
||||
json_path,
|
||||
ignore_missing,
|
||||
result_idex,
|
||||
};
|
||||
|
||||
Ok(processor)
|
||||
} else {
|
||||
ProcessorMissingFieldSnafu {
|
||||
processor: PROCESSOR_JSON_PATH,
|
||||
field: JSON_PATH_NAME,
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct JsonPathProcessor {
|
||||
fields: Vec<OneInputOneOutputField>,
|
||||
json_path: JsonPath<Value>,
|
||||
ignore_missing: bool,
|
||||
result_idex: Option<usize>,
|
||||
}
|
||||
|
||||
impl Default for JsonPathProcessor {
|
||||
fn default() -> Self {
|
||||
JsonPathProcessor {
|
||||
fields: vec![],
|
||||
json_path: JsonPath::try_from("$").unwrap(),
|
||||
ignore_missing: false,
|
||||
result_idex: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl JsonPathProcessor {
|
||||
fn process_field(&self, val: &Value) -> Result<Value> {
|
||||
let processed = self.json_path.find(val);
|
||||
match processed {
|
||||
Value::Array(arr) => {
|
||||
if let Some(index) = self.result_idex {
|
||||
Ok(arr.get(index).cloned().unwrap_or(Value::Null))
|
||||
} else {
|
||||
Ok(Value::Array(arr))
|
||||
}
|
||||
}
|
||||
v => Ok(v),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Processor for JsonPathProcessor {
|
||||
fn kind(&self) -> &str {
|
||||
PROCESSOR_JSON_PATH
|
||||
}
|
||||
|
||||
fn ignore_missing(&self) -> bool {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, val: &mut Vec<Value>) -> Result<()> {
|
||||
for field in self.fields.iter() {
|
||||
let index = field.input_index();
|
||||
match val.get(index) {
|
||||
Some(v) => {
|
||||
let processed = self.process_field(v)?;
|
||||
|
||||
let output_index = field.output_index();
|
||||
val[output_index] = processed;
|
||||
}
|
||||
None => {
|
||||
if !self.ignore_missing {
|
||||
return ProcessorMissingFieldSnafu {
|
||||
processor: self.kind(),
|
||||
field: field.input_name(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::Map;
|
||||
|
||||
#[test]
|
||||
fn test_json_path() {
|
||||
use super::*;
|
||||
use crate::Value;
|
||||
|
||||
let json_path = JsonPath::try_from("$.hello").unwrap();
|
||||
let processor = JsonPathProcessor {
|
||||
json_path,
|
||||
result_idex: Some(0),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let result = processor
|
||||
.process_field(&Value::Map(Map::one(
|
||||
"hello",
|
||||
Value::String("world".to_string()),
|
||||
)))
|
||||
.unwrap();
|
||||
assert_eq!(result, Value::String("world".to_string()));
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user