mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-05 21:02:58 +00:00
Compare commits
25 Commits
transform-
...
v0.10.0-ni
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0e0c4faf0d | ||
|
|
1a02fc31c2 | ||
|
|
8efbafa538 | ||
|
|
fcd0ceea94 | ||
|
|
22f31f5929 | ||
|
|
5d20acca44 | ||
|
|
e3733344fe | ||
|
|
305767e226 | ||
|
|
22a662f6bc | ||
|
|
1431393fc8 | ||
|
|
dfe8cf25f9 | ||
|
|
cccd25ddbb | ||
|
|
ac387bd2af | ||
|
|
2e9737c01d | ||
|
|
a8b426aebe | ||
|
|
f3509fa312 | ||
|
|
3dcd6b8e51 | ||
|
|
f221ee30fd | ||
|
|
fb822987a9 | ||
|
|
4ab6dc2825 | ||
|
|
191755fc42 | ||
|
|
1676d02149 | ||
|
|
edc49623de | ||
|
|
9405d1c578 | ||
|
|
7a4276c24a |
177
Cargo.lock
generated
177
Cargo.lock
generated
@@ -2070,6 +2070,8 @@ dependencies = [
|
||||
"datafusion",
|
||||
"datatypes",
|
||||
"derive_more",
|
||||
"geo",
|
||||
"geo-types",
|
||||
"geohash",
|
||||
"h3o",
|
||||
"jsonb",
|
||||
@@ -2088,6 +2090,7 @@ dependencies = [
|
||||
"store-api",
|
||||
"table",
|
||||
"tokio",
|
||||
"wkt",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3706,6 +3709,16 @@ version = "1.0.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0d6ef0072f8a535281e4876be788938b528e9a1d43900b82c2569af7da799125"
|
||||
|
||||
[[package]]
|
||||
name = "earcutr"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "79127ed59a85d7687c409e9978547cffb7dc79675355ed22da6b66fd5f6ead01"
|
||||
dependencies = [
|
||||
"itertools 0.11.0",
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.13.0"
|
||||
@@ -4014,6 +4027,12 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "float_next_after"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8"
|
||||
|
||||
[[package]]
|
||||
name = "flow"
|
||||
version = "0.9.5"
|
||||
@@ -4438,6 +4457,24 @@ dependencies = [
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "geo"
|
||||
version = "0.29.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "81d088357a9cc60cec8253b3578f6834b4a3aa20edb55f5d1c030c36d8143f11"
|
||||
dependencies = [
|
||||
"earcutr",
|
||||
"float_next_after",
|
||||
"geo-types",
|
||||
"geographiclib-rs",
|
||||
"i_overlay",
|
||||
"log",
|
||||
"num-traits",
|
||||
"robust",
|
||||
"rstar",
|
||||
"spade",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "geo-types"
|
||||
version = "0.7.13"
|
||||
@@ -4446,9 +4483,19 @@ checksum = "9ff16065e5720f376fbced200a5ae0f47ace85fd70b7e54269790281353b6d61"
|
||||
dependencies = [
|
||||
"approx 0.5.1",
|
||||
"num-traits",
|
||||
"rstar",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "geographiclib-rs"
|
||||
version = "0.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6e5ed84f8089c70234b0a8e0aedb6dc733671612ddc0d37c6066052f9781960"
|
||||
dependencies = [
|
||||
"libm",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "geohash"
|
||||
version = "0.13.1"
|
||||
@@ -4597,6 +4644,15 @@ dependencies = [
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hash32"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "47d60b12902ba28e2730cd37e95b8c9223af2808df9e902d4df49588d1470606"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.12.3"
|
||||
@@ -4692,6 +4748,16 @@ dependencies = [
|
||||
"http 1.1.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "heapless"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0bfb9eb618601c89945a70e254898da93b13be0388091d42117462b265bb3fad"
|
||||
dependencies = [
|
||||
"hash32",
|
||||
"stable_deref_trait",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.4.1"
|
||||
@@ -5117,6 +5183,50 @@ dependencies = [
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "i_float"
|
||||
version = "1.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f5fe043aae28ce70bd2f78b2f5f82a3654d63607c82594da4dabb8b6cb81f2b2"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "i_key_sort"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "347c253b4748a1a28baf94c9ce133b6b166f08573157e05afe718812bc599fcd"
|
||||
|
||||
[[package]]
|
||||
name = "i_overlay"
|
||||
version = "1.7.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a469f68cb8a7cef375b2b0f581faf5859b4b50600438c00d46b71acc25ebbd0c"
|
||||
dependencies = [
|
||||
"i_float",
|
||||
"i_key_sort",
|
||||
"i_shape",
|
||||
"i_tree",
|
||||
"rayon",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "i_shape"
|
||||
version = "1.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1b44852d57a991c7dedaf76c55bc44f677f547ff899a430d29e13efd6133d7d8"
|
||||
dependencies = [
|
||||
"i_float",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "i_tree"
|
||||
version = "0.8.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "155181bc97d770181cf9477da51218a19ee92a8e5be642e796661aee2b601139"
|
||||
|
||||
[[package]]
|
||||
name = "iana-time-zone"
|
||||
version = "0.1.61"
|
||||
@@ -5523,8 +5633,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "jsonb"
|
||||
version = "0.4.1"
|
||||
source = "git+https://github.com/databendlabs/jsonb.git?rev=46ad50fc71cf75afbf98eec455f7892a6387c1fc#46ad50fc71cf75afbf98eec455f7892a6387c1fc"
|
||||
version = "0.4.3"
|
||||
source = "git+https://github.com/CookiePieWw/jsonb.git?rev=ed2d4f8575419ed434a4ae09dee18ca900915d9c#ed2d4f8575419ed434a4ae09dee18ca900915d9c"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"fast-float",
|
||||
@@ -5551,6 +5661,19 @@ dependencies = [
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jsonpath-rust"
|
||||
version = "0.7.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "69a61b87f6a55cc6c28fed5739dd36b9642321ce63e4a5e4a4715d69106f4a10"
|
||||
dependencies = [
|
||||
"pest",
|
||||
"pest_derive",
|
||||
"regex",
|
||||
"serde_json",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jsonptr"
|
||||
version = "0.4.7"
|
||||
@@ -5661,7 +5784,7 @@ dependencies = [
|
||||
"hyper-rustls",
|
||||
"hyper-timeout 0.5.1",
|
||||
"hyper-util",
|
||||
"jsonpath-rust",
|
||||
"jsonpath-rust 0.5.1",
|
||||
"k8s-openapi",
|
||||
"kube-core",
|
||||
"pem 3.0.4",
|
||||
@@ -6345,6 +6468,7 @@ dependencies = [
|
||||
"common-telemetry",
|
||||
"datatypes",
|
||||
"futures",
|
||||
"futures-util",
|
||||
"humantime-serde",
|
||||
"meta-srv",
|
||||
"rand",
|
||||
@@ -6556,6 +6680,7 @@ dependencies = [
|
||||
"common-error",
|
||||
"common-function",
|
||||
"common-macro",
|
||||
"common-meta",
|
||||
"common-procedure-test",
|
||||
"common-query",
|
||||
"common-recordbatch",
|
||||
@@ -7596,6 +7721,7 @@ dependencies = [
|
||||
"file-engine",
|
||||
"futures",
|
||||
"futures-util",
|
||||
"jsonb",
|
||||
"lazy_static",
|
||||
"meta-client",
|
||||
"meter-core",
|
||||
@@ -8198,6 +8324,7 @@ dependencies = [
|
||||
"greptime-proto",
|
||||
"itertools 0.10.5",
|
||||
"jsonb",
|
||||
"jsonpath-rust 0.7.3",
|
||||
"lazy_static",
|
||||
"moka",
|
||||
"once_cell",
|
||||
@@ -8396,8 +8523,7 @@ checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
|
||||
[[package]]
|
||||
name = "pprof"
|
||||
version = "0.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ef5c97c51bd34c7e742402e216abdeb44d415fbe6ae41d56b114723e953711cb"
|
||||
source = "git+https://github.com/GreptimeTeam/pprof-rs?rev=1bd1e21#1bd1e210d8626da3d1e5aff976e6feee994f576d"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
"cfg-if",
|
||||
@@ -9568,6 +9694,12 @@ dependencies = [
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "robust"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cbf4a6aa5f6d6888f39e980649f3ad6b666acdce1d78e95b8a2cb076e687ae30"
|
||||
|
||||
[[package]]
|
||||
name = "ron"
|
||||
version = "0.7.1"
|
||||
@@ -9662,6 +9794,17 @@ dependencies = [
|
||||
"zstd 0.13.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rstar"
|
||||
version = "0.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "133315eb94c7b1e8d0cb097e5a710d850263372fd028fff18969de708afc7008"
|
||||
dependencies = [
|
||||
"heapless",
|
||||
"num-traits",
|
||||
"smallvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rstest"
|
||||
version = "0.21.0"
|
||||
@@ -11154,6 +11297,18 @@ dependencies = [
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "spade"
|
||||
version = "2.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "93f5ef1f863aca7d1d7dda7ccfc36a0a4279bd6d3c375176e5e0712e25cb4889"
|
||||
dependencies = [
|
||||
"hashbrown 0.14.5",
|
||||
"num-traits",
|
||||
"robust",
|
||||
"smallvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sparsevec"
|
||||
version = "0.2.0"
|
||||
@@ -14147,6 +14302,18 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wkt"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "54f7f1ff4ea4c18936d6cd26a6fd24f0003af37e951a8e0e8b9e9a2d0bd0a46d"
|
||||
dependencies = [
|
||||
"geo-types",
|
||||
"log",
|
||||
"num-traits",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wyz"
|
||||
version = "0.5.1"
|
||||
|
||||
@@ -125,7 +125,7 @@ greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", r
|
||||
humantime = "2.1"
|
||||
humantime-serde = "1.1"
|
||||
itertools = "0.10"
|
||||
jsonb = { git = "https://github.com/databendlabs/jsonb.git", rev = "46ad50fc71cf75afbf98eec455f7892a6387c1fc", default-features = false }
|
||||
jsonb = { git = "https://github.com/CookiePieWw/jsonb.git", rev = "ed2d4f8575419ed434a4ae09dee18ca900915d9c", default-features = false }
|
||||
lazy_static = "1.4"
|
||||
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "a10facb353b41460eeb98578868ebf19c2084fac" }
|
||||
mockall = "0.11.4"
|
||||
@@ -261,6 +261,8 @@ tokio-rustls = { git = "https://github.com/GreptimeTeam/tokio-rustls" }
|
||||
# This is commented, since we are not using aws-lc-sys, if we need to use it, we need to uncomment this line or use a release after this commit, or it wouldn't compile with gcc < 8.1
|
||||
# see https://github.com/aws/aws-lc-rs/pull/526
|
||||
# aws-lc-sys = { git ="https://github.com/aws/aws-lc-rs", rev = "556558441e3494af4b156ae95ebc07ebc2fd38aa" }
|
||||
# Apply a fix for pprof for unaligned pointer access
|
||||
pprof = { git = "https://github.com/GreptimeTeam/pprof-rs", rev = "1bd1e21" }
|
||||
|
||||
[workspace.dependencies.meter-macros]
|
||||
git = "https://github.com/GreptimeTeam/greptime-meter.git"
|
||||
|
||||
16
README.md
16
README.md
@@ -6,7 +6,7 @@
|
||||
</picture>
|
||||
</p>
|
||||
|
||||
<h2 align="center">Unified Time Series Database for Metrics, Logs, and Events</h2>
|
||||
<h2 align="center">Unified & Cost-Effective Time Series Database for Metrics, Logs, and Events</h2>
|
||||
|
||||
<div align="center">
|
||||
<h3 align="center">
|
||||
@@ -48,9 +48,21 @@
|
||||
</a>
|
||||
</div>
|
||||
|
||||
- [Introduction](#introduction)
|
||||
- [**Features: Why GreptimeDB**](#why-greptimedb)
|
||||
- [Architecture](https://docs.greptime.com/contributor-guide/overview/#architecture)
|
||||
- [Try it for free](#try-greptimedb)
|
||||
- [Getting Started](#getting-started)
|
||||
- [Project Status](#project-status)
|
||||
- [Join the community](#community)
|
||||
- [Contributing](#contributing)
|
||||
- [Extension](#extension )
|
||||
- [License](#license)
|
||||
- [Acknowledgement](#acknowledgement)
|
||||
|
||||
## Introduction
|
||||
|
||||
**GreptimeDB** is an open-source unified time-series database for **Metrics**, **Logs**, and **Events** (also **Traces** in plan). You can gain real-time insights from Edge to Cloud at any scale.
|
||||
**GreptimeDB** is an open-source unified & cost-effective time-series database for **Metrics**, **Logs**, and **Events** (also **Traces** in plan). You can gain real-time insights from Edge to Cloud at Any Scale.
|
||||
|
||||
## Why GreptimeDB
|
||||
|
||||
|
||||
@@ -93,8 +93,8 @@
|
||||
| `storage` | -- | -- | The data storage options. |
|
||||
| `storage.data_home` | String | `/tmp/greptimedb/` | The working home directory. |
|
||||
| `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
|
||||
| `storage.cache_path` | String | Unset | Cache configuration for object storage such as 'S3' etc.<br/>The local file cache directory. |
|
||||
| `storage.cache_capacity` | String | Unset | The local file cache capacity in bytes. |
|
||||
| `storage.cache_path` | String | Unset | Cache configuration for object storage such as 'S3' etc. It is recommended to configure it when using object storage for better performance.<br/>The local file cache directory. |
|
||||
| `storage.cache_capacity` | String | Unset | The local file cache capacity in bytes. If your disk space is sufficient, it is recommended to set it larger. |
|
||||
| `storage.bucket` | String | Unset | The S3 bucket name.<br/>**It's only used when the storage type is `S3`, `Oss` and `Gcs`**. |
|
||||
| `storage.root` | String | Unset | The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`.<br/>**It's only used when the storage type is `S3`, `Oss` and `Azblob`**. |
|
||||
| `storage.access_key_id` | String | Unset | The access key id of the aws account.<br/>It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.<br/>**It's only used when the storage type is `S3` and `Oss`**. |
|
||||
@@ -126,9 +126,9 @@
|
||||
| `region_engine.mito.vector_cache_size` | String | Auto | Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
|
||||
| `region_engine.mito.page_cache_size` | String | Auto | Cache size for pages of SST row groups. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/8 of OS memory. |
|
||||
| `region_engine.mito.selector_result_cache_size` | String | Auto | Cache size for time series selector (e.g. `last_value()`). Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
|
||||
| `region_engine.mito.enable_experimental_write_cache` | Bool | `false` | Whether to enable the experimental write cache. |
|
||||
| `region_engine.mito.enable_experimental_write_cache` | Bool | `false` | Whether to enable the experimental write cache. It is recommended to enable it when using object storage for better performance. |
|
||||
| `region_engine.mito.experimental_write_cache_path` | String | `""` | File system path for write cache, defaults to `{data_home}/write_cache`. |
|
||||
| `region_engine.mito.experimental_write_cache_size` | String | `512MB` | Capacity for write cache. |
|
||||
| `region_engine.mito.experimental_write_cache_size` | String | `1GiB` | Capacity for write cache. If your disk space is sufficient, it is recommended to set it larger. |
|
||||
| `region_engine.mito.experimental_write_cache_ttl` | String | Unset | TTL for write cache. |
|
||||
| `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
|
||||
| `region_engine.mito.scan_parallelism` | Integer | `0` | Parallelism to scan a region (default: 1/4 of cpu cores).<br/>- `0`: using the default value (1/4 of cpu cores).<br/>- `1`: scan in current thread.<br/>- `n`: scan in parallelism n. |
|
||||
@@ -416,8 +416,8 @@
|
||||
| `storage` | -- | -- | The data storage options. |
|
||||
| `storage.data_home` | String | `/tmp/greptimedb/` | The working home directory. |
|
||||
| `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
|
||||
| `storage.cache_path` | String | Unset | Cache configuration for object storage such as 'S3' etc.<br/>The local file cache directory. |
|
||||
| `storage.cache_capacity` | String | Unset | The local file cache capacity in bytes. |
|
||||
| `storage.cache_path` | String | Unset | Cache configuration for object storage such as 'S3' etc. It is recommended to configure it when using object storage for better performance.<br/>The local file cache directory. |
|
||||
| `storage.cache_capacity` | String | Unset | The local file cache capacity in bytes. If your disk space is sufficient, it is recommended to set it larger. |
|
||||
| `storage.bucket` | String | Unset | The S3 bucket name.<br/>**It's only used when the storage type is `S3`, `Oss` and `Gcs`**. |
|
||||
| `storage.root` | String | Unset | The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`.<br/>**It's only used when the storage type is `S3`, `Oss` and `Azblob`**. |
|
||||
| `storage.access_key_id` | String | Unset | The access key id of the aws account.<br/>It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.<br/>**It's only used when the storage type is `S3` and `Oss`**. |
|
||||
@@ -449,9 +449,9 @@
|
||||
| `region_engine.mito.vector_cache_size` | String | Auto | Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
|
||||
| `region_engine.mito.page_cache_size` | String | Auto | Cache size for pages of SST row groups. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/8 of OS memory. |
|
||||
| `region_engine.mito.selector_result_cache_size` | String | Auto | Cache size for time series selector (e.g. `last_value()`). Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
|
||||
| `region_engine.mito.enable_experimental_write_cache` | Bool | `false` | Whether to enable the experimental write cache. |
|
||||
| `region_engine.mito.enable_experimental_write_cache` | Bool | `false` | Whether to enable the experimental write cache. It is recommended to enable it when using object storage for better performance. |
|
||||
| `region_engine.mito.experimental_write_cache_path` | String | `""` | File system path for write cache, defaults to `{data_home}/write_cache`. |
|
||||
| `region_engine.mito.experimental_write_cache_size` | String | `512MB` | Capacity for write cache. |
|
||||
| `region_engine.mito.experimental_write_cache_size` | String | `1GiB` | Capacity for write cache. If your disk space is sufficient, it is recommended to set it larger. |
|
||||
| `region_engine.mito.experimental_write_cache_ttl` | String | Unset | TTL for write cache. |
|
||||
| `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
|
||||
| `region_engine.mito.scan_parallelism` | Integer | `0` | Parallelism to scan a region (default: 1/4 of cpu cores).<br/>- `0`: using the default value (1/4 of cpu cores).<br/>- `1`: scan in current thread.<br/>- `n`: scan in parallelism n. |
|
||||
|
||||
@@ -294,14 +294,14 @@ data_home = "/tmp/greptimedb/"
|
||||
## - `Oss`: the data is stored in the Aliyun OSS.
|
||||
type = "File"
|
||||
|
||||
## Cache configuration for object storage such as 'S3' etc.
|
||||
## Cache configuration for object storage such as 'S3' etc. It is recommended to configure it when using object storage for better performance.
|
||||
## The local file cache directory.
|
||||
## @toml2docs:none-default
|
||||
cache_path = "/path/local_cache"
|
||||
|
||||
## The local file cache capacity in bytes.
|
||||
## The local file cache capacity in bytes. If your disk space is sufficient, it is recommended to set it larger.
|
||||
## @toml2docs:none-default
|
||||
cache_capacity = "256MB"
|
||||
cache_capacity = "1GiB"
|
||||
|
||||
## The S3 bucket name.
|
||||
## **It's only used when the storage type is `S3`, `Oss` and `Gcs`**.
|
||||
@@ -459,14 +459,14 @@ auto_flush_interval = "1h"
|
||||
## @toml2docs:none-default="Auto"
|
||||
#+ selector_result_cache_size = "512MB"
|
||||
|
||||
## Whether to enable the experimental write cache.
|
||||
## Whether to enable the experimental write cache. It is recommended to enable it when using object storage for better performance.
|
||||
enable_experimental_write_cache = false
|
||||
|
||||
## File system path for write cache, defaults to `{data_home}/write_cache`.
|
||||
experimental_write_cache_path = ""
|
||||
|
||||
## Capacity for write cache.
|
||||
experimental_write_cache_size = "512MB"
|
||||
## Capacity for write cache. If your disk space is sufficient, it is recommended to set it larger.
|
||||
experimental_write_cache_size = "1GiB"
|
||||
|
||||
## TTL for write cache.
|
||||
## @toml2docs:none-default
|
||||
|
||||
@@ -332,14 +332,14 @@ data_home = "/tmp/greptimedb/"
|
||||
## - `Oss`: the data is stored in the Aliyun OSS.
|
||||
type = "File"
|
||||
|
||||
## Cache configuration for object storage such as 'S3' etc.
|
||||
## Cache configuration for object storage such as 'S3' etc. It is recommended to configure it when using object storage for better performance.
|
||||
## The local file cache directory.
|
||||
## @toml2docs:none-default
|
||||
cache_path = "/path/local_cache"
|
||||
|
||||
## The local file cache capacity in bytes.
|
||||
## The local file cache capacity in bytes. If your disk space is sufficient, it is recommended to set it larger.
|
||||
## @toml2docs:none-default
|
||||
cache_capacity = "256MB"
|
||||
cache_capacity = "1GiB"
|
||||
|
||||
## The S3 bucket name.
|
||||
## **It's only used when the storage type is `S3`, `Oss` and `Gcs`**.
|
||||
@@ -497,14 +497,14 @@ auto_flush_interval = "1h"
|
||||
## @toml2docs:none-default="Auto"
|
||||
#+ selector_result_cache_size = "512MB"
|
||||
|
||||
## Whether to enable the experimental write cache.
|
||||
## Whether to enable the experimental write cache. It is recommended to enable it when using object storage for better performance.
|
||||
enable_experimental_write_cache = false
|
||||
|
||||
## File system path for write cache, defaults to `{data_home}/write_cache`.
|
||||
experimental_write_cache_path = ""
|
||||
|
||||
## Capacity for write cache.
|
||||
experimental_write_cache_size = "512MB"
|
||||
## Capacity for write cache. If your disk space is sufficient, it is recommended to set it larger.
|
||||
experimental_write_cache_size = "1GiB"
|
||||
|
||||
## TTL for write cache.
|
||||
## @toml2docs:none-default
|
||||
|
||||
@@ -4,13 +4,13 @@
|
||||
|
||||
example:
|
||||
```bash
|
||||
curl --data "trace;flow=debug" 127.0.0.1:4000/debug/log_level
|
||||
curl --data "trace,flow=debug" 127.0.0.1:4000/debug/log_level
|
||||
```
|
||||
And database will reply with something like:
|
||||
```bash
|
||||
Log Level changed from Some("info") to "trace;flow=debug"%
|
||||
Log Level changed from Some("info") to "trace,flow=debug"%
|
||||
```
|
||||
|
||||
The data is a string in the format of `global_level;module1=level1;module2=level2;...` that follow the same rule of `RUST_LOG`.
|
||||
The data is a string in the format of `global_level,module1=level1,module2=level2,...` that follow the same rule of `RUST_LOG`.
|
||||
|
||||
The module is the module name of the log, and the level is the log level. The log level can be one of the following: `trace`, `debug`, `info`, `warn`, `error`, `off`(case insensitive).
|
||||
@@ -178,6 +178,12 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Partition manager not found, it's not expected."))]
|
||||
PartitionManagerNotFound {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to find table partitions"))]
|
||||
FindPartitions { source: partition::error::Error },
|
||||
|
||||
@@ -301,6 +307,7 @@ impl ErrorExt for Error {
|
||||
| Error::CastManager { .. }
|
||||
| Error::Json { .. }
|
||||
| Error::GetInformationExtension { .. }
|
||||
| Error::PartitionManagerNotFound { .. }
|
||||
| Error::ProcedureIdNotFound { .. } => StatusCode::Unexpected,
|
||||
|
||||
Error::ViewPlanColumnsChanged { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
@@ -34,15 +34,14 @@ use datatypes::vectors::{
|
||||
};
|
||||
use futures::{StreamExt, TryStreamExt};
|
||||
use partition::manager::PartitionInfo;
|
||||
use partition::partition::PartitionDef;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::{RegionId, ScanRequest, TableId};
|
||||
use store_api::storage::{ScanRequest, TableId};
|
||||
use table::metadata::{TableInfo, TableType};
|
||||
|
||||
use super::PARTITIONS;
|
||||
use crate::error::{
|
||||
CreateRecordBatchSnafu, FindPartitionsSnafu, InternalSnafu, Result,
|
||||
UpgradeWeakCatalogManagerRefSnafu,
|
||||
CreateRecordBatchSnafu, FindPartitionsSnafu, InternalSnafu, PartitionManagerNotFoundSnafu,
|
||||
Result, UpgradeWeakCatalogManagerRefSnafu,
|
||||
};
|
||||
use crate::kvbackend::KvBackendCatalogManager;
|
||||
use crate::system_schema::information_schema::{InformationTable, Predicates};
|
||||
@@ -236,7 +235,8 @@ impl InformationSchemaPartitionsBuilder {
|
||||
let partition_manager = catalog_manager
|
||||
.as_any()
|
||||
.downcast_ref::<KvBackendCatalogManager>()
|
||||
.map(|catalog_manager| catalog_manager.partition_manager());
|
||||
.map(|catalog_manager| catalog_manager.partition_manager())
|
||||
.context(PartitionManagerNotFoundSnafu)?;
|
||||
|
||||
let predicates = Predicates::from_scan_request(&request);
|
||||
|
||||
@@ -262,27 +262,10 @@ impl InformationSchemaPartitionsBuilder {
|
||||
let table_ids: Vec<TableId> =
|
||||
table_infos.iter().map(|info| info.ident.table_id).collect();
|
||||
|
||||
let mut table_partitions = if let Some(partition_manager) = &partition_manager {
|
||||
partition_manager
|
||||
.batch_find_table_partitions(&table_ids)
|
||||
.await
|
||||
.context(FindPartitionsSnafu)?
|
||||
} else {
|
||||
// Current node must be a standalone instance, contains only one partition by default.
|
||||
// TODO(dennis): change it when we support multi-regions for standalone.
|
||||
table_ids
|
||||
.into_iter()
|
||||
.map(|table_id| {
|
||||
(
|
||||
table_id,
|
||||
vec![PartitionInfo {
|
||||
id: RegionId::new(table_id, 0),
|
||||
partition: PartitionDef::new(vec![], vec![]),
|
||||
}],
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
};
|
||||
let mut table_partitions = partition_manager
|
||||
.batch_find_table_partitions(&table_ids)
|
||||
.await
|
||||
.context(FindPartitionsSnafu)?;
|
||||
|
||||
for table_info in table_infos {
|
||||
let partitions = table_partitions
|
||||
|
||||
@@ -12,13 +12,16 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::sync::{Arc, Weak};
|
||||
|
||||
use arrow_schema::SchemaRef as ArrowSchemaRef;
|
||||
use common_catalog::consts::INFORMATION_SCHEMA_TABLES_TABLE_ID;
|
||||
use common_catalog::consts::{INFORMATION_SCHEMA_TABLES_TABLE_ID, MITO_ENGINE};
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::datanode::RegionStat;
|
||||
use common_recordbatch::adapter::RecordBatchStreamAdapter;
|
||||
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
|
||||
use common_telemetry::error;
|
||||
use datafusion::execution::TaskContext;
|
||||
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
|
||||
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
|
||||
@@ -31,7 +34,7 @@ use datatypes::vectors::{
|
||||
};
|
||||
use futures::TryStreamExt;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::{ScanRequest, TableId};
|
||||
use store_api::storage::{RegionId, ScanRequest, TableId};
|
||||
use table::metadata::{TableInfo, TableType};
|
||||
|
||||
use super::TABLES;
|
||||
@@ -39,6 +42,7 @@ use crate::error::{
|
||||
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
|
||||
};
|
||||
use crate::system_schema::information_schema::{InformationTable, Predicates};
|
||||
use crate::system_schema::utils;
|
||||
use crate::CatalogManager;
|
||||
|
||||
pub const TABLE_CATALOG: &str = "table_catalog";
|
||||
@@ -234,17 +238,50 @@ impl InformationSchemaTablesBuilder {
|
||||
.context(UpgradeWeakCatalogManagerRefSnafu)?;
|
||||
let predicates = Predicates::from_scan_request(&request);
|
||||
|
||||
let information_extension = utils::information_extension(&self.catalog_manager)?;
|
||||
|
||||
// TODO(dennis): `region_stats` API is not stable in distributed cluster because of network issue etc.
|
||||
// But we don't want the statements such as `show tables` fail,
|
||||
// so using `unwrap_or_else` here instead of `?` operator.
|
||||
let region_stats = information_extension
|
||||
.region_stats()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
error!(e; "Failed to call region_stats");
|
||||
e
|
||||
})
|
||||
.unwrap_or_else(|_| vec![]);
|
||||
|
||||
for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
|
||||
let mut stream = catalog_manager.tables(&catalog_name, &schema_name, None);
|
||||
|
||||
while let Some(table) = stream.try_next().await? {
|
||||
let table_info = table.table_info();
|
||||
|
||||
// TODO(dennis): make it working for metric engine
|
||||
let table_region_stats = if table_info.meta.engine == MITO_ENGINE {
|
||||
let region_ids = table_info
|
||||
.meta
|
||||
.region_numbers
|
||||
.iter()
|
||||
.map(|n| RegionId::new(table_info.ident.table_id, *n))
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
region_stats
|
||||
.iter()
|
||||
.filter(|stat| region_ids.contains(&stat.id))
|
||||
.collect::<Vec<_>>()
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
|
||||
self.add_table(
|
||||
&predicates,
|
||||
&catalog_name,
|
||||
&schema_name,
|
||||
table_info,
|
||||
table.table_type(),
|
||||
&table_region_stats,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -260,6 +297,7 @@ impl InformationSchemaTablesBuilder {
|
||||
schema_name: &str,
|
||||
table_info: Arc<TableInfo>,
|
||||
table_type: TableType,
|
||||
region_stats: &[&RegionStat],
|
||||
) {
|
||||
let table_name = table_info.name.as_ref();
|
||||
let table_id = table_info.table_id();
|
||||
@@ -273,7 +311,9 @@ impl InformationSchemaTablesBuilder {
|
||||
|
||||
let row = [
|
||||
(TABLE_CATALOG, &Value::from(catalog_name)),
|
||||
(TABLE_ID, &Value::from(table_id)),
|
||||
(TABLE_SCHEMA, &Value::from(schema_name)),
|
||||
(ENGINE, &Value::from(engine)),
|
||||
(TABLE_NAME, &Value::from(table_name)),
|
||||
(TABLE_TYPE, &Value::from(table_type_text)),
|
||||
];
|
||||
@@ -287,21 +327,39 @@ impl InformationSchemaTablesBuilder {
|
||||
self.table_names.push(Some(table_name));
|
||||
self.table_types.push(Some(table_type_text));
|
||||
self.table_ids.push(Some(table_id));
|
||||
|
||||
let data_length = region_stats.iter().map(|stat| stat.sst_size).sum();
|
||||
let table_rows = region_stats.iter().map(|stat| stat.num_rows).sum();
|
||||
let index_length = region_stats.iter().map(|stat| stat.index_size).sum();
|
||||
|
||||
// It's not precise, but it is acceptable for long-term data storage.
|
||||
let avg_row_length = if table_rows > 0 {
|
||||
let total_data_length = data_length
|
||||
+ region_stats
|
||||
.iter()
|
||||
.map(|stat| stat.memtable_size)
|
||||
.sum::<u64>();
|
||||
|
||||
total_data_length / table_rows
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
self.data_length.push(Some(data_length));
|
||||
self.index_length.push(Some(index_length));
|
||||
self.table_rows.push(Some(table_rows));
|
||||
self.avg_row_length.push(Some(avg_row_length));
|
||||
|
||||
// TODO(sunng87): use real data for these fields
|
||||
self.data_length.push(Some(0));
|
||||
self.max_data_length.push(Some(0));
|
||||
self.index_length.push(Some(0));
|
||||
self.avg_row_length.push(Some(0));
|
||||
self.max_index_length.push(Some(0));
|
||||
self.checksum.push(Some(0));
|
||||
self.table_rows.push(Some(0));
|
||||
self.max_index_length.push(Some(0));
|
||||
self.data_free.push(Some(0));
|
||||
self.auto_increment.push(Some(0));
|
||||
self.row_format.push(Some("Fixed"));
|
||||
self.table_collation.push(Some("utf8_bin"));
|
||||
self.update_time.push(None);
|
||||
self.check_time.push(None);
|
||||
|
||||
// use mariadb default table version number here
|
||||
self.version.push(Some(11));
|
||||
self.table_comment.push(table_info.desc.as_deref());
|
||||
|
||||
@@ -9,7 +9,7 @@ workspace = true
|
||||
|
||||
[features]
|
||||
default = ["geo"]
|
||||
geo = ["geohash", "h3o", "s2"]
|
||||
geo = ["geohash", "h3o", "s2", "wkt", "geo-types", "dep:geo"]
|
||||
|
||||
[dependencies]
|
||||
api.workspace = true
|
||||
@@ -28,6 +28,8 @@ common-version.workspace = true
|
||||
datafusion.workspace = true
|
||||
datatypes.workspace = true
|
||||
derive_more = { version = "1", default-features = false, features = ["display"] }
|
||||
geo = { version = "0.29", optional = true }
|
||||
geo-types = { version = "0.7", optional = true }
|
||||
geohash = { version = "0.13", optional = true }
|
||||
h3o = { version = "0.6", optional = true }
|
||||
jsonb.workspace = true
|
||||
@@ -44,6 +46,7 @@ sql.workspace = true
|
||||
statrs = "0.16"
|
||||
store-api.workspace = true
|
||||
table.workspace = true
|
||||
wkt = { version = "0.11", optional = true }
|
||||
|
||||
[dev-dependencies]
|
||||
ron = "0.7"
|
||||
|
||||
@@ -17,7 +17,10 @@ pub(crate) mod encoding;
|
||||
mod geohash;
|
||||
mod h3;
|
||||
mod helpers;
|
||||
mod measure;
|
||||
mod relation;
|
||||
mod s2;
|
||||
mod wkt;
|
||||
|
||||
use crate::function_registry::FunctionRegistry;
|
||||
|
||||
@@ -48,6 +51,7 @@ impl GeoFunctions {
|
||||
registry.register(Arc::new(h3::H3CellToChildrenSize));
|
||||
registry.register(Arc::new(h3::H3CellToChildPos));
|
||||
registry.register(Arc::new(h3::H3ChildPosToCell));
|
||||
registry.register(Arc::new(h3::H3CellContains));
|
||||
|
||||
// h3 grid traversal
|
||||
registry.register(Arc::new(h3::H3GridDisk));
|
||||
@@ -55,10 +59,27 @@ impl GeoFunctions {
|
||||
registry.register(Arc::new(h3::H3GridDistance));
|
||||
registry.register(Arc::new(h3::H3GridPathCells));
|
||||
|
||||
// h3 measurement
|
||||
registry.register(Arc::new(h3::H3CellDistanceSphereKm));
|
||||
registry.register(Arc::new(h3::H3CellDistanceEuclideanDegree));
|
||||
|
||||
// s2
|
||||
registry.register(Arc::new(s2::S2LatLngToCell));
|
||||
registry.register(Arc::new(s2::S2CellLevel));
|
||||
registry.register(Arc::new(s2::S2CellToToken));
|
||||
registry.register(Arc::new(s2::S2CellParent));
|
||||
|
||||
// spatial data type
|
||||
registry.register(Arc::new(wkt::LatLngToPointWkt));
|
||||
|
||||
// spatial relation
|
||||
registry.register(Arc::new(relation::STContains));
|
||||
registry.register(Arc::new(relation::STWithin));
|
||||
registry.register(Arc::new(relation::STIntersects));
|
||||
|
||||
// spatial measure
|
||||
registry.register(Arc::new(measure::STDistance));
|
||||
registry.register(Arc::new(measure::STDistanceSphere));
|
||||
registry.register(Arc::new(measure::STArea));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,8 +23,8 @@ use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::scalars::{Scalar, ScalarVectorBuilder};
|
||||
use datatypes::value::{ListValue, Value};
|
||||
use datatypes::vectors::{
|
||||
BooleanVectorBuilder, Int32VectorBuilder, ListVectorBuilder, MutableVector,
|
||||
StringVectorBuilder, UInt64VectorBuilder, UInt8VectorBuilder, VectorRef,
|
||||
BooleanVectorBuilder, Float64VectorBuilder, Int32VectorBuilder, ListVectorBuilder,
|
||||
MutableVector, StringVectorBuilder, UInt64VectorBuilder, UInt8VectorBuilder, VectorRef,
|
||||
};
|
||||
use derive_more::Display;
|
||||
use h3o::{CellIndex, LatLng, Resolution};
|
||||
@@ -38,6 +38,7 @@ static CELL_TYPES: Lazy<Vec<ConcreteDataType>> = Lazy::new(|| {
|
||||
vec![
|
||||
ConcreteDataType::int64_datatype(),
|
||||
ConcreteDataType::uint64_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
]
|
||||
});
|
||||
|
||||
@@ -952,6 +953,181 @@ impl Function for H3GridPathCells {
|
||||
}
|
||||
}
|
||||
|
||||
/// Tests if cells contains given cells
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct H3CellContains;
|
||||
|
||||
impl Function for H3CellContains {
|
||||
fn name(&self) -> &str {
|
||||
"h3_cells_contains"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::boolean_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
let multi_cell_types = vec![
|
||||
ConcreteDataType::list_datatype(ConcreteDataType::int64_datatype()),
|
||||
ConcreteDataType::list_datatype(ConcreteDataType::uint64_datatype()),
|
||||
ConcreteDataType::list_datatype(ConcreteDataType::string_datatype()),
|
||||
ConcreteDataType::string_datatype(),
|
||||
];
|
||||
|
||||
let mut signatures = Vec::with_capacity(multi_cell_types.len() * CELL_TYPES.len());
|
||||
for multi_cell_type in &multi_cell_types {
|
||||
for cell_type in CELL_TYPES.as_slice() {
|
||||
signatures.push(TypeSignature::Exact(vec![
|
||||
multi_cell_type.clone(),
|
||||
cell_type.clone(),
|
||||
]));
|
||||
}
|
||||
}
|
||||
|
||||
Signature::one_of(signatures, Volatility::Stable)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 2);
|
||||
|
||||
let cells_vec = &columns[0];
|
||||
let cell_this_vec = &columns[1];
|
||||
|
||||
let size = cell_this_vec.len();
|
||||
let mut results = BooleanVectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let mut result = None;
|
||||
if let (cells, Some(cell_this)) = (
|
||||
cells_from_value(cells_vec.get(i))?,
|
||||
cell_from_value(cell_this_vec.get(i))?,
|
||||
) {
|
||||
result = Some(false);
|
||||
|
||||
for cell_that in cells.iter() {
|
||||
// get cell resolution, and find cell_this's parent at
|
||||
// this solution, test if cell_that equals the parent
|
||||
let resolution = cell_that.resolution();
|
||||
if let Some(cell_this_parent) = cell_this.parent(resolution) {
|
||||
if cell_this_parent == *cell_that {
|
||||
result = Some(true);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
/// Get WGS84 great circle distance of two cell centroid
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct H3CellDistanceSphereKm;
|
||||
|
||||
impl Function for H3CellDistanceSphereKm {
|
||||
fn name(&self) -> &str {
|
||||
"h3_distance_sphere_km"
|
||||
}
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::float64_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
signature_of_double_cells()
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 2);
|
||||
|
||||
let cell_this_vec = &columns[0];
|
||||
let cell_that_vec = &columns[1];
|
||||
let size = cell_this_vec.len();
|
||||
|
||||
let mut results = Float64VectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let result = match (
|
||||
cell_from_value(cell_this_vec.get(i))?,
|
||||
cell_from_value(cell_that_vec.get(i))?,
|
||||
) {
|
||||
(Some(cell_this), Some(cell_that)) => {
|
||||
let centroid_this = LatLng::from(cell_this);
|
||||
let centroid_that = LatLng::from(cell_that);
|
||||
|
||||
Some(centroid_this.distance_km(centroid_that))
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
/// Get Euclidean distance of two cell centroid
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct H3CellDistanceEuclideanDegree;
|
||||
|
||||
impl H3CellDistanceEuclideanDegree {
|
||||
fn distance(centroid_this: LatLng, centroid_that: LatLng) -> f64 {
|
||||
((centroid_this.lat() - centroid_that.lat()).powi(2)
|
||||
+ (centroid_this.lng() - centroid_that.lng()).powi(2))
|
||||
.sqrt()
|
||||
}
|
||||
}
|
||||
|
||||
impl Function for H3CellDistanceEuclideanDegree {
|
||||
fn name(&self) -> &str {
|
||||
"h3_distance_degree"
|
||||
}
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::float64_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
signature_of_double_cells()
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 2);
|
||||
|
||||
let cell_this_vec = &columns[0];
|
||||
let cell_that_vec = &columns[1];
|
||||
let size = cell_this_vec.len();
|
||||
|
||||
let mut results = Float64VectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let result = match (
|
||||
cell_from_value(cell_this_vec.get(i))?,
|
||||
cell_from_value(cell_that_vec.get(i))?,
|
||||
) {
|
||||
(Some(cell_this), Some(cell_that)) => {
|
||||
let centroid_this = LatLng::from(cell_this);
|
||||
let centroid_that = LatLng::from(cell_that);
|
||||
|
||||
let dist = Self::distance(centroid_this, centroid_that);
|
||||
Some(dist)
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
fn value_to_resolution(v: Value) -> Result<Resolution> {
|
||||
let r = match v {
|
||||
Value::Int8(v) => v as u8,
|
||||
@@ -1073,7 +1249,126 @@ fn cell_from_value(v: Value) -> Result<Option<CellIndex>> {
|
||||
})
|
||||
.context(error::ExecuteSnafu)?,
|
||||
),
|
||||
Value::String(s) => Some(
|
||||
CellIndex::from_str(s.as_utf8())
|
||||
.map_err(|e| {
|
||||
BoxedError::new(PlainError::new(
|
||||
format!("H3 error: {}", e),
|
||||
StatusCode::EngineExecuteQuery,
|
||||
))
|
||||
})
|
||||
.context(error::ExecuteSnafu)?,
|
||||
),
|
||||
_ => None,
|
||||
};
|
||||
Ok(cell)
|
||||
}
|
||||
|
||||
/// extract cell array from all possible types including:
|
||||
/// - int64 list
|
||||
/// - uint64 list
|
||||
/// - string list
|
||||
/// - comma-separated string
|
||||
fn cells_from_value(v: Value) -> Result<Vec<CellIndex>> {
|
||||
match v {
|
||||
Value::List(list) => match list.datatype() {
|
||||
ConcreteDataType::Int64(_) => list
|
||||
.items()
|
||||
.iter()
|
||||
.map(|v| {
|
||||
if let Value::Int64(v) = v {
|
||||
CellIndex::try_from(*v as u64)
|
||||
.map_err(|e| {
|
||||
BoxedError::new(PlainError::new(
|
||||
format!("H3 error: {}", e),
|
||||
StatusCode::EngineExecuteQuery,
|
||||
))
|
||||
})
|
||||
.context(error::ExecuteSnafu)
|
||||
} else {
|
||||
Err(BoxedError::new(PlainError::new(
|
||||
"Invalid data type in array".to_string(),
|
||||
StatusCode::EngineExecuteQuery,
|
||||
)))
|
||||
.context(error::ExecuteSnafu)
|
||||
}
|
||||
})
|
||||
.collect::<Result<Vec<CellIndex>>>(),
|
||||
ConcreteDataType::UInt64(_) => list
|
||||
.items()
|
||||
.iter()
|
||||
.map(|v| {
|
||||
if let Value::UInt64(v) = v {
|
||||
CellIndex::try_from(*v)
|
||||
.map_err(|e| {
|
||||
BoxedError::new(PlainError::new(
|
||||
format!("H3 error: {}", e),
|
||||
StatusCode::EngineExecuteQuery,
|
||||
))
|
||||
})
|
||||
.context(error::ExecuteSnafu)
|
||||
} else {
|
||||
Err(BoxedError::new(PlainError::new(
|
||||
"Invalid data type in array".to_string(),
|
||||
StatusCode::EngineExecuteQuery,
|
||||
)))
|
||||
.context(error::ExecuteSnafu)
|
||||
}
|
||||
})
|
||||
.collect::<Result<Vec<CellIndex>>>(),
|
||||
ConcreteDataType::String(_) => list
|
||||
.items()
|
||||
.iter()
|
||||
.map(|v| {
|
||||
if let Value::String(v) = v {
|
||||
CellIndex::from_str(v.as_utf8().trim())
|
||||
.map_err(|e| {
|
||||
BoxedError::new(PlainError::new(
|
||||
format!("H3 error: {}", e),
|
||||
StatusCode::EngineExecuteQuery,
|
||||
))
|
||||
})
|
||||
.context(error::ExecuteSnafu)
|
||||
} else {
|
||||
Err(BoxedError::new(PlainError::new(
|
||||
"Invalid data type in array".to_string(),
|
||||
StatusCode::EngineExecuteQuery,
|
||||
)))
|
||||
.context(error::ExecuteSnafu)
|
||||
}
|
||||
})
|
||||
.collect::<Result<Vec<CellIndex>>>(),
|
||||
_ => Ok(vec![]),
|
||||
},
|
||||
Value::String(csv) => {
|
||||
let str_seq = csv.as_utf8().split(',');
|
||||
str_seq
|
||||
.map(|v| {
|
||||
CellIndex::from_str(v.trim())
|
||||
.map_err(|e| {
|
||||
BoxedError::new(PlainError::new(
|
||||
format!("H3 error: {}", e),
|
||||
StatusCode::EngineExecuteQuery,
|
||||
))
|
||||
})
|
||||
.context(error::ExecuteSnafu)
|
||||
})
|
||||
.collect::<Result<Vec<CellIndex>>>()
|
||||
}
|
||||
_ => Ok(vec![]),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_h3_euclidean_distance() {
|
||||
let point_this = LatLng::new(42.3521, -72.1235).expect("incorrect lat lng");
|
||||
let point_that = LatLng::new(42.45, -72.1260).expect("incorrect lat lng");
|
||||
|
||||
let dist = H3CellDistanceEuclideanDegree::distance(point_this, point_that);
|
||||
assert_eq!(dist, 0.09793191512474639);
|
||||
}
|
||||
}
|
||||
|
||||
195
src/common/function/src/scalars/geo/measure.rs
Normal file
195
src/common/function/src/scalars/geo/measure.rs
Normal file
@@ -0,0 +1,195 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_error::ext::{BoxedError, PlainError};
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_query::error::{self, Result};
|
||||
use common_query::prelude::{Signature, TypeSignature};
|
||||
use datafusion::logical_expr::Volatility;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::vectors::{Float64VectorBuilder, MutableVector, VectorRef};
|
||||
use derive_more::Display;
|
||||
use geo::algorithm::line_measures::metric_spaces::Euclidean;
|
||||
use geo::{Area, Distance, Haversine};
|
||||
use geo_types::Geometry;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use super::helpers::{ensure_columns_len, ensure_columns_n};
|
||||
use super::wkt::parse_wkt;
|
||||
use crate::function::{Function, FunctionContext};
|
||||
|
||||
/// Return WGS84(SRID: 4326) euclidean distance between two geometry object, in degree
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct STDistance;
|
||||
|
||||
impl Function for STDistance {
|
||||
fn name(&self) -> &str {
|
||||
"st_distance"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::float64_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::new(
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::string_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
]),
|
||||
Volatility::Stable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 2);
|
||||
|
||||
let wkt_this_vec = &columns[0];
|
||||
let wkt_that_vec = &columns[1];
|
||||
|
||||
let size = wkt_this_vec.len();
|
||||
let mut results = Float64VectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let wkt_this = wkt_this_vec.get(i).as_string();
|
||||
let wkt_that = wkt_that_vec.get(i).as_string();
|
||||
|
||||
let result = match (wkt_this, wkt_that) {
|
||||
(Some(wkt_this), Some(wkt_that)) => {
|
||||
let geom_this = parse_wkt(&wkt_this)?;
|
||||
let geom_that = parse_wkt(&wkt_that)?;
|
||||
|
||||
Some(Euclidean::distance(&geom_this, &geom_that))
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
/// Return great circle distance between two geometry object, in meters
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct STDistanceSphere;
|
||||
|
||||
impl Function for STDistanceSphere {
|
||||
fn name(&self) -> &str {
|
||||
"st_distance_sphere_m"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::float64_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::new(
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::string_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
]),
|
||||
Volatility::Stable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 2);
|
||||
|
||||
let wkt_this_vec = &columns[0];
|
||||
let wkt_that_vec = &columns[1];
|
||||
|
||||
let size = wkt_this_vec.len();
|
||||
let mut results = Float64VectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let wkt_this = wkt_this_vec.get(i).as_string();
|
||||
let wkt_that = wkt_that_vec.get(i).as_string();
|
||||
|
||||
let result = match (wkt_this, wkt_that) {
|
||||
(Some(wkt_this), Some(wkt_that)) => {
|
||||
let geom_this = parse_wkt(&wkt_this)?;
|
||||
let geom_that = parse_wkt(&wkt_that)?;
|
||||
|
||||
match (geom_this, geom_that) {
|
||||
(Geometry::Point(this), Geometry::Point(that)) => {
|
||||
Some(Haversine::distance(this, that))
|
||||
}
|
||||
_ => {
|
||||
Err(BoxedError::new(PlainError::new(
|
||||
"Great circle distance between non-point objects are not supported for now.".to_string(),
|
||||
StatusCode::Unsupported,
|
||||
))).context(error::ExecuteSnafu)?
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
/// Return area of given geometry object
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct STArea;
|
||||
|
||||
impl Function for STArea {
|
||||
fn name(&self) -> &str {
|
||||
"st_area"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::float64_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::new(
|
||||
TypeSignature::Exact(vec![ConcreteDataType::string_datatype()]),
|
||||
Volatility::Stable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 1);
|
||||
|
||||
let wkt_vec = &columns[0];
|
||||
|
||||
let size = wkt_vec.len();
|
||||
let mut results = Float64VectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let wkt = wkt_vec.get(i).as_string();
|
||||
|
||||
let result = if let Some(wkt) = wkt {
|
||||
let geom = parse_wkt(&wkt)?;
|
||||
Some(geom.unsigned_area())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
190
src/common/function/src/scalars/geo/relation.rs
Normal file
190
src/common/function/src/scalars/geo/relation.rs
Normal file
@@ -0,0 +1,190 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_query::error::Result;
|
||||
use common_query::prelude::{Signature, TypeSignature};
|
||||
use datafusion::logical_expr::Volatility;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::vectors::{BooleanVectorBuilder, MutableVector, VectorRef};
|
||||
use derive_more::Display;
|
||||
use geo::algorithm::contains::Contains;
|
||||
use geo::algorithm::intersects::Intersects;
|
||||
use geo::algorithm::within::Within;
|
||||
|
||||
use super::helpers::{ensure_columns_len, ensure_columns_n};
|
||||
use super::wkt::parse_wkt;
|
||||
use crate::function::{Function, FunctionContext};
|
||||
|
||||
/// Test if spatial relationship: contains
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct STContains;
|
||||
|
||||
impl Function for STContains {
|
||||
fn name(&self) -> &str {
|
||||
"st_contains"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::boolean_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::new(
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::string_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
]),
|
||||
Volatility::Stable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 2);
|
||||
|
||||
let wkt_this_vec = &columns[0];
|
||||
let wkt_that_vec = &columns[1];
|
||||
|
||||
let size = wkt_this_vec.len();
|
||||
let mut results = BooleanVectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let wkt_this = wkt_this_vec.get(i).as_string();
|
||||
let wkt_that = wkt_that_vec.get(i).as_string();
|
||||
|
||||
let result = match (wkt_this, wkt_that) {
|
||||
(Some(wkt_this), Some(wkt_that)) => {
|
||||
let geom_this = parse_wkt(&wkt_this)?;
|
||||
let geom_that = parse_wkt(&wkt_that)?;
|
||||
|
||||
Some(geom_this.contains(&geom_that))
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
/// Test if spatial relationship: within
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct STWithin;
|
||||
|
||||
impl Function for STWithin {
|
||||
fn name(&self) -> &str {
|
||||
"st_within"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::boolean_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::new(
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::string_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
]),
|
||||
Volatility::Stable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 2);
|
||||
|
||||
let wkt_this_vec = &columns[0];
|
||||
let wkt_that_vec = &columns[1];
|
||||
|
||||
let size = wkt_this_vec.len();
|
||||
let mut results = BooleanVectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let wkt_this = wkt_this_vec.get(i).as_string();
|
||||
let wkt_that = wkt_that_vec.get(i).as_string();
|
||||
|
||||
let result = match (wkt_this, wkt_that) {
|
||||
(Some(wkt_this), Some(wkt_that)) => {
|
||||
let geom_this = parse_wkt(&wkt_this)?;
|
||||
let geom_that = parse_wkt(&wkt_that)?;
|
||||
|
||||
Some(geom_this.is_within(&geom_that))
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
/// Test if spatial relationship: within
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct STIntersects;
|
||||
|
||||
impl Function for STIntersects {
|
||||
fn name(&self) -> &str {
|
||||
"st_intersects"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::boolean_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::new(
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::string_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
]),
|
||||
Volatility::Stable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 2);
|
||||
|
||||
let wkt_this_vec = &columns[0];
|
||||
let wkt_that_vec = &columns[1];
|
||||
|
||||
let size = wkt_this_vec.len();
|
||||
let mut results = BooleanVectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let wkt_this = wkt_this_vec.get(i).as_string();
|
||||
let wkt_that = wkt_that_vec.get(i).as_string();
|
||||
|
||||
let result = match (wkt_this, wkt_that) {
|
||||
(Some(wkt_this), Some(wkt_that)) => {
|
||||
let geom_this = parse_wkt(&wkt_this)?;
|
||||
let geom_that = parse_wkt(&wkt_that)?;
|
||||
|
||||
Some(geom_this.intersects(&geom_that))
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
100
src/common/function/src/scalars/geo/wkt.rs
Normal file
100
src/common/function/src/scalars/geo/wkt.rs
Normal file
@@ -0,0 +1,100 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_error::ext::{BoxedError, PlainError};
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_query::error::{self, Result};
|
||||
use common_query::prelude::{Signature, TypeSignature};
|
||||
use datafusion::logical_expr::Volatility;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::vectors::{MutableVector, StringVectorBuilder, VectorRef};
|
||||
use derive_more::Display;
|
||||
use geo_types::{Geometry, Point};
|
||||
use once_cell::sync::Lazy;
|
||||
use snafu::ResultExt;
|
||||
use wkt::{ToWkt, TryFromWkt};
|
||||
|
||||
use super::helpers::{ensure_columns_len, ensure_columns_n};
|
||||
use crate::function::{Function, FunctionContext};
|
||||
|
||||
static COORDINATE_TYPES: Lazy<Vec<ConcreteDataType>> = Lazy::new(|| {
|
||||
vec![
|
||||
ConcreteDataType::float32_datatype(),
|
||||
ConcreteDataType::float64_datatype(),
|
||||
]
|
||||
});
|
||||
|
||||
/// Return WGS84(SRID: 4326) euclidean distance between two geometry object, in degree
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct LatLngToPointWkt;
|
||||
|
||||
impl Function for LatLngToPointWkt {
|
||||
fn name(&self) -> &str {
|
||||
"wkt_point_from_latlng"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::string_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
let mut signatures = Vec::new();
|
||||
for coord_type in COORDINATE_TYPES.as_slice() {
|
||||
signatures.push(TypeSignature::Exact(vec![
|
||||
// latitude
|
||||
coord_type.clone(),
|
||||
// longitude
|
||||
coord_type.clone(),
|
||||
]));
|
||||
}
|
||||
Signature::one_of(signatures, Volatility::Stable)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 2);
|
||||
|
||||
let lat_vec = &columns[0];
|
||||
let lng_vec = &columns[1];
|
||||
|
||||
let size = lat_vec.len();
|
||||
let mut results = StringVectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let lat = lat_vec.get(i).as_f64_lossy();
|
||||
let lng = lng_vec.get(i).as_f64_lossy();
|
||||
|
||||
let result = match (lat, lng) {
|
||||
(Some(lat), Some(lng)) => Some(Point::new(lng, lat).wkt_string()),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result.as_deref());
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn parse_wkt(s: &str) -> Result<Geometry> {
|
||||
Geometry::try_from_wkt_str(s)
|
||||
.map_err(|e| {
|
||||
BoxedError::new(PlainError::new(
|
||||
format!("Fail to parse WKT: {}", e),
|
||||
StatusCode::EngineExecuteQuery,
|
||||
))
|
||||
})
|
||||
.context(error::ExecuteSnafu)
|
||||
}
|
||||
@@ -15,7 +15,7 @@
|
||||
use std::fmt::{self, Display};
|
||||
|
||||
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
|
||||
use common_query::prelude::Signature;
|
||||
use common_query::prelude::{Signature, TypeSignature};
|
||||
use datafusion::logical_expr::Volatility;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::prelude::VectorRef;
|
||||
@@ -41,10 +41,24 @@ impl Function for JsonPathExistsFunction {
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::exact(
|
||||
Signature::one_of(
|
||||
vec![
|
||||
ConcreteDataType::json_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::json_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
]),
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::null_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
]),
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::json_datatype(),
|
||||
ConcreteDataType::null_datatype(),
|
||||
]),
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::null_datatype(),
|
||||
ConcreteDataType::null_datatype(),
|
||||
]),
|
||||
],
|
||||
Volatility::Immutable,
|
||||
)
|
||||
@@ -64,25 +78,26 @@ impl Function for JsonPathExistsFunction {
|
||||
let paths = &columns[1];
|
||||
|
||||
let size = jsons.len();
|
||||
let datatype = jsons.data_type();
|
||||
let mut results = BooleanVectorBuilder::with_capacity(size);
|
||||
|
||||
match datatype {
|
||||
// JSON data type uses binary vector
|
||||
ConcreteDataType::Binary(_) => {
|
||||
match (jsons.data_type(), paths.data_type()) {
|
||||
(ConcreteDataType::Binary(_), ConcreteDataType::String(_)) => {
|
||||
for i in 0..size {
|
||||
let json = jsons.get_ref(i);
|
||||
let path = paths.get_ref(i);
|
||||
|
||||
let json = json.as_binary();
|
||||
let path = path.as_string();
|
||||
let result = match (json, path) {
|
||||
let result = match (jsons.get_ref(i).as_binary(), paths.get_ref(i).as_string())
|
||||
{
|
||||
(Ok(Some(json)), Ok(Some(path))) => {
|
||||
let json_path = jsonb::jsonpath::parse_json_path(path.as_bytes());
|
||||
match json_path {
|
||||
Ok(json_path) => jsonb::path_exists(json, json_path).ok(),
|
||||
Err(_) => None,
|
||||
}
|
||||
// Get `JsonPath`.
|
||||
let json_path = match jsonb::jsonpath::parse_json_path(path.as_bytes())
|
||||
{
|
||||
Ok(json_path) => json_path,
|
||||
Err(_) => {
|
||||
return InvalidFuncArgsSnafu {
|
||||
err_msg: format!("Illegal json path: {:?}", path),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
};
|
||||
jsonb::path_exists(json, json_path).ok()
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
@@ -90,6 +105,12 @@ impl Function for JsonPathExistsFunction {
|
||||
results.push(result);
|
||||
}
|
||||
}
|
||||
|
||||
// Any null args existence causes the result to be NULL.
|
||||
(ConcreteDataType::Null(_), ConcreteDataType::String(_)) => results.push_nulls(size),
|
||||
(ConcreteDataType::Binary(_), ConcreteDataType::Null(_)) => results.push_nulls(size),
|
||||
(ConcreteDataType::Null(_), ConcreteDataType::Null(_)) => results.push_nulls(size),
|
||||
|
||||
_ => {
|
||||
return UnsupportedInputDataTypeSnafu {
|
||||
function: NAME,
|
||||
@@ -114,8 +135,8 @@ mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::prelude::TypeSignature;
|
||||
use datatypes::scalars::ScalarVector;
|
||||
use datatypes::vectors::{BinaryVector, StringVector};
|
||||
use datatypes::prelude::ScalarVector;
|
||||
use datatypes::vectors::{BinaryVector, NullVector, StringVector};
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -133,9 +154,27 @@ mod tests {
|
||||
|
||||
assert!(matches!(json_path_exists.signature(),
|
||||
Signature {
|
||||
type_signature: TypeSignature::Exact(valid_types),
|
||||
type_signature: TypeSignature::OneOf(valid_types),
|
||||
volatility: Volatility::Immutable
|
||||
} if valid_types == vec![ConcreteDataType::json_datatype(), ConcreteDataType::string_datatype()]
|
||||
} if valid_types ==
|
||||
vec![
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::json_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
]),
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::null_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
]),
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::json_datatype(),
|
||||
ConcreteDataType::null_datatype(),
|
||||
]),
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::null_datatype(),
|
||||
ConcreteDataType::null_datatype(),
|
||||
]),
|
||||
],
|
||||
));
|
||||
|
||||
let json_strings = [
|
||||
@@ -143,9 +182,15 @@ mod tests {
|
||||
r#"{"a": 4, "b": {"c": 6}, "c": 6}"#,
|
||||
r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
|
||||
r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
|
||||
r#"[1, 2, 3]"#,
|
||||
r#"null"#,
|
||||
r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
|
||||
r#"null"#,
|
||||
];
|
||||
let paths = vec!["$.a.b.c", "$.b", "$.c.a", ".d"];
|
||||
let results = [false, true, true, false];
|
||||
let paths = vec![
|
||||
"$.a.b.c", "$.b", "$.c.a", ".d", "$[0]", "$.a", "null", "null",
|
||||
];
|
||||
let expected = [false, true, true, false, true, false, false, false];
|
||||
|
||||
let jsonbs = json_strings
|
||||
.iter()
|
||||
@@ -162,11 +207,44 @@ mod tests {
|
||||
.eval(FunctionContext::default(), &args)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(4, vector.len());
|
||||
for (i, gt) in results.iter().enumerate() {
|
||||
// Test for non-nulls.
|
||||
assert_eq!(8, vector.len());
|
||||
for (i, real) in expected.iter().enumerate() {
|
||||
let result = vector.get_ref(i);
|
||||
let result = result.as_boolean().unwrap().unwrap();
|
||||
assert_eq!(*gt, result);
|
||||
assert!(!result.is_null());
|
||||
let val = result.as_boolean().unwrap().unwrap();
|
||||
assert_eq!(val, *real);
|
||||
}
|
||||
|
||||
// Test for path error.
|
||||
let json_bytes = jsonb::parse_value("{}".as_bytes()).unwrap().to_vec();
|
||||
let json = BinaryVector::from_vec(vec![json_bytes]);
|
||||
let illegal_path = StringVector::from_vec(vec!["$..a"]);
|
||||
|
||||
let args: Vec<VectorRef> = vec![Arc::new(json), Arc::new(illegal_path)];
|
||||
let err = json_path_exists.eval(FunctionContext::default(), &args);
|
||||
assert!(err.is_err());
|
||||
|
||||
// Test for nulls.
|
||||
let json_bytes = jsonb::parse_value("{}".as_bytes()).unwrap().to_vec();
|
||||
let json = BinaryVector::from_vec(vec![json_bytes]);
|
||||
let null_json = NullVector::new(1);
|
||||
|
||||
let path = StringVector::from_vec(vec!["$.a"]);
|
||||
let null_path = NullVector::new(1);
|
||||
|
||||
let args: Vec<VectorRef> = vec![Arc::new(null_json), Arc::new(path)];
|
||||
let result1 = json_path_exists
|
||||
.eval(FunctionContext::default(), &args)
|
||||
.unwrap();
|
||||
let args: Vec<VectorRef> = vec![Arc::new(json), Arc::new(null_path)];
|
||||
let result2 = json_path_exists
|
||||
.eval(FunctionContext::default(), &args)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(result1.len(), 1);
|
||||
assert!(result1.get_ref(0).is_null());
|
||||
assert_eq!(result2.len(), 1);
|
||||
assert!(result2.get_ref(0).is_null());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,10 +14,11 @@
|
||||
|
||||
use std::collections::HashSet;
|
||||
|
||||
use api::v1::column_data_type_extension::TypeExt;
|
||||
use api::v1::column_def::contains_fulltext;
|
||||
use api::v1::{
|
||||
AddColumn, AddColumns, Column, ColumnDataType, ColumnDataTypeExtension, ColumnDef,
|
||||
ColumnOptions, ColumnSchema, CreateTableExpr, SemanticType,
|
||||
ColumnOptions, ColumnSchema, CreateTableExpr, JsonTypeExtension, SemanticType,
|
||||
};
|
||||
use datatypes::schema::Schema;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
@@ -25,8 +26,9 @@ use table::metadata::TableId;
|
||||
use table::table_reference::TableReference;
|
||||
|
||||
use crate::error::{
|
||||
DuplicatedColumnNameSnafu, DuplicatedTimestampColumnSnafu, InvalidFulltextColumnTypeSnafu,
|
||||
MissingTimestampColumnSnafu, Result, UnknownColumnDataTypeSnafu,
|
||||
self, DuplicatedColumnNameSnafu, DuplicatedTimestampColumnSnafu,
|
||||
InvalidFulltextColumnTypeSnafu, MissingTimestampColumnSnafu, Result,
|
||||
UnknownColumnDataTypeSnafu,
|
||||
};
|
||||
pub struct ColumnExpr<'a> {
|
||||
pub column_name: &'a str,
|
||||
@@ -72,6 +74,28 @@ impl<'a> From<&'a ColumnSchema> for ColumnExpr<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
fn infer_column_datatype(
|
||||
datatype: i32,
|
||||
datatype_extension: &Option<ColumnDataTypeExtension>,
|
||||
) -> Result<ColumnDataType> {
|
||||
let column_type =
|
||||
ColumnDataType::try_from(datatype).context(UnknownColumnDataTypeSnafu { datatype })?;
|
||||
|
||||
if matches!(&column_type, ColumnDataType::Binary) {
|
||||
if let Some(ext) = datatype_extension {
|
||||
let type_ext = ext
|
||||
.type_ext
|
||||
.as_ref()
|
||||
.context(error::MissingFieldSnafu { field: "type_ext" })?;
|
||||
if *type_ext == TypeExt::JsonType(JsonTypeExtension::JsonBinary.into()) {
|
||||
return Ok(ColumnDataType::Json);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(column_type)
|
||||
}
|
||||
|
||||
pub fn build_create_table_expr(
|
||||
table_id: Option<TableId>,
|
||||
table_name: &TableReference<'_>,
|
||||
@@ -124,8 +148,7 @@ pub fn build_create_table_expr(
|
||||
_ => {}
|
||||
}
|
||||
|
||||
let column_type =
|
||||
ColumnDataType::try_from(datatype).context(UnknownColumnDataTypeSnafu { datatype })?;
|
||||
let column_type = infer_column_datatype(datatype, datatype_extension)?;
|
||||
|
||||
ensure!(
|
||||
!contains_fulltext(options) || column_type == ColumnDataType::String,
|
||||
|
||||
@@ -91,6 +91,7 @@ pub mod catalog_name;
|
||||
pub mod datanode_table;
|
||||
pub mod flow;
|
||||
pub mod node_address;
|
||||
mod schema_metadata_manager;
|
||||
pub mod schema_name;
|
||||
pub mod table_info;
|
||||
pub mod table_name;
|
||||
@@ -116,6 +117,7 @@ use flow::flow_route::FlowRouteValue;
|
||||
use flow::table_flow::TableFlowValue;
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
pub use schema_metadata_manager::{SchemaMetadataManager, SchemaMetadataManagerRef};
|
||||
use serde::de::DeserializeOwned;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
|
||||
122
src/common/meta/src/key/schema_metadata_manager.rs
Normal file
122
src/common/meta/src/key/schema_metadata_manager.rs
Normal file
@@ -0,0 +1,122 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Schema-level metadata manager.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use snafu::OptionExt;
|
||||
use store_api::storage::TableId;
|
||||
|
||||
use crate::error::TableInfoNotFoundSnafu;
|
||||
use crate::key::schema_name::{SchemaManager, SchemaNameKey};
|
||||
use crate::key::table_info::{TableInfoManager, TableInfoManagerRef};
|
||||
use crate::kv_backend::KvBackendRef;
|
||||
use crate::{error, SchemaOptions};
|
||||
|
||||
pub type SchemaMetadataManagerRef = Arc<SchemaMetadataManager>;
|
||||
|
||||
pub struct SchemaMetadataManager {
|
||||
table_info_manager: TableInfoManagerRef,
|
||||
schema_manager: SchemaManager,
|
||||
#[cfg(any(test, feature = "testing"))]
|
||||
kv_backend: KvBackendRef,
|
||||
}
|
||||
|
||||
impl SchemaMetadataManager {
|
||||
/// Creates a new database meta
|
||||
#[cfg(not(any(test, feature = "testing")))]
|
||||
pub fn new(kv_backend: KvBackendRef) -> Self {
|
||||
let table_info_manager = Arc::new(TableInfoManager::new(kv_backend.clone()));
|
||||
let schema_manager = SchemaManager::new(kv_backend);
|
||||
Self {
|
||||
table_info_manager,
|
||||
schema_manager,
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new database meta
|
||||
#[cfg(any(test, feature = "testing"))]
|
||||
pub fn new(kv_backend: KvBackendRef) -> Self {
|
||||
let table_info_manager = Arc::new(TableInfoManager::new(kv_backend.clone()));
|
||||
let schema_manager = SchemaManager::new(kv_backend.clone());
|
||||
Self {
|
||||
table_info_manager,
|
||||
schema_manager,
|
||||
kv_backend,
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets schema options by table id.
|
||||
pub async fn get_schema_options_by_table_id(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
) -> error::Result<Option<SchemaOptions>> {
|
||||
let table_info = self
|
||||
.table_info_manager
|
||||
.get(table_id)
|
||||
.await?
|
||||
.with_context(|| TableInfoNotFoundSnafu {
|
||||
table: format!("table id: {}", table_id),
|
||||
})?;
|
||||
|
||||
let key = SchemaNameKey::new(
|
||||
&table_info.table_info.catalog_name,
|
||||
&table_info.table_info.schema_name,
|
||||
);
|
||||
self.schema_manager.get(key).await
|
||||
}
|
||||
|
||||
#[cfg(any(test, feature = "testing"))]
|
||||
pub async fn register_region_table_info(
|
||||
&self,
|
||||
table_id: TableId,
|
||||
table_name: &str,
|
||||
schema_name: &str,
|
||||
catalog_name: &str,
|
||||
schema_value: Option<crate::key::schema_name::SchemaNameValue>,
|
||||
) {
|
||||
use table::metadata::{RawTableInfo, TableType};
|
||||
let value = crate::key::table_info::TableInfoValue::new(RawTableInfo {
|
||||
ident: Default::default(),
|
||||
name: table_name.to_string(),
|
||||
desc: None,
|
||||
catalog_name: catalog_name.to_string(),
|
||||
schema_name: schema_name.to_string(),
|
||||
meta: Default::default(),
|
||||
table_type: TableType::Base,
|
||||
});
|
||||
let (txn, _) = self
|
||||
.table_info_manager
|
||||
.build_create_txn(table_id, &value)
|
||||
.unwrap();
|
||||
let resp = self.kv_backend.txn(txn).await.unwrap();
|
||||
assert!(resp.succeeded, "Failed to create table metadata");
|
||||
let key = SchemaNameKey {
|
||||
catalog: catalog_name,
|
||||
schema: schema_name,
|
||||
};
|
||||
self.schema_manager
|
||||
.create(key, schema_value, false)
|
||||
.await
|
||||
.expect("Failed to create schema metadata");
|
||||
common_telemetry::info!(
|
||||
"Register table: {}, id: {}, schema: {}, catalog: {}",
|
||||
table_name,
|
||||
table_id,
|
||||
schema_name,
|
||||
catalog_name
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -134,6 +134,7 @@ impl TableInfoValue {
|
||||
}
|
||||
|
||||
pub type TableInfoManagerRef = Arc<TableInfoManager>;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct TableInfoManager {
|
||||
kv_backend: KvBackendRef,
|
||||
|
||||
@@ -54,4 +54,7 @@ pub type DatanodeId = u64;
|
||||
// The id of the flownode.
|
||||
pub type FlownodeId = u64;
|
||||
|
||||
/// Schema options.
|
||||
pub type SchemaOptions = key::schema_name::SchemaNameValue;
|
||||
|
||||
pub use instruction::RegionIdent;
|
||||
|
||||
@@ -427,7 +427,8 @@ mod test {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let mut region_server = mock_region_server();
|
||||
let mut engine_env = TestEnv::with_prefix("region-alive-keeper");
|
||||
let engine = Arc::new(engine_env.create_engine(MitoConfig::default()).await);
|
||||
let engine = engine_env.create_engine(MitoConfig::default()).await;
|
||||
let engine = Arc::new(engine);
|
||||
region_server.register_engine(engine.clone());
|
||||
|
||||
let alive_keeper = Arc::new(RegionAliveKeeper::new(region_server.clone(), 100));
|
||||
|
||||
@@ -30,7 +30,7 @@ use servers::heartbeat_options::HeartbeatOptions;
|
||||
use servers::http::HttpOptions;
|
||||
use servers::Mode;
|
||||
|
||||
pub const DEFAULT_OBJECT_STORE_CACHE_SIZE: ReadableSize = ReadableSize::mb(256);
|
||||
pub const DEFAULT_OBJECT_STORE_CACHE_SIZE: ReadableSize = ReadableSize::gb(1);
|
||||
|
||||
/// Default data home in file storage
|
||||
const DEFAULT_DATA_HOME: &str = "/tmp/greptimedb";
|
||||
|
||||
@@ -23,6 +23,7 @@ use common_base::Plugins;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_greptimedb_telemetry::GreptimeDBTelemetryTask;
|
||||
use common_meta::key::datanode_table::{DatanodeTableManager, DatanodeTableValue};
|
||||
use common_meta::key::{SchemaMetadataManager, SchemaMetadataManagerRef};
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
use common_meta::wal_options_allocator::prepare_wal_options;
|
||||
pub use common_procedure::options::ProcedureConfig;
|
||||
@@ -207,7 +208,10 @@ impl DatanodeBuilder {
|
||||
(Box::new(NoopRegionServerEventListener) as _, None)
|
||||
};
|
||||
|
||||
let region_server = self.new_region_server(region_event_listener).await?;
|
||||
let schema_metadata_manager = Arc::new(SchemaMetadataManager::new(kv_backend.clone()));
|
||||
let region_server = self
|
||||
.new_region_server(schema_metadata_manager, region_event_listener)
|
||||
.await?;
|
||||
|
||||
let datanode_table_manager = DatanodeTableManager::new(kv_backend.clone());
|
||||
let table_values = datanode_table_manager
|
||||
@@ -312,6 +316,7 @@ impl DatanodeBuilder {
|
||||
|
||||
async fn new_region_server(
|
||||
&self,
|
||||
schema_metadata_manager: SchemaMetadataManagerRef,
|
||||
event_listener: RegionServerEventListenerRef,
|
||||
) -> Result<RegionServer> {
|
||||
let opts: &DatanodeOptions = &self.opts;
|
||||
@@ -340,8 +345,13 @@ impl DatanodeBuilder {
|
||||
);
|
||||
|
||||
let object_store_manager = Self::build_object_store_manager(&opts.storage).await?;
|
||||
let engines =
|
||||
Self::build_store_engines(opts, object_store_manager, self.plugins.clone()).await?;
|
||||
let engines = Self::build_store_engines(
|
||||
opts,
|
||||
object_store_manager,
|
||||
schema_metadata_manager,
|
||||
self.plugins.clone(),
|
||||
)
|
||||
.await?;
|
||||
for engine in engines {
|
||||
region_server.register_engine(engine);
|
||||
}
|
||||
@@ -355,6 +365,7 @@ impl DatanodeBuilder {
|
||||
async fn build_store_engines(
|
||||
opts: &DatanodeOptions,
|
||||
object_store_manager: ObjectStoreManagerRef,
|
||||
schema_metadata_manager: SchemaMetadataManagerRef,
|
||||
plugins: Plugins,
|
||||
) -> Result<Vec<RegionEngineRef>> {
|
||||
let mut engines = vec![];
|
||||
@@ -365,6 +376,7 @@ impl DatanodeBuilder {
|
||||
opts,
|
||||
object_store_manager.clone(),
|
||||
config.clone(),
|
||||
schema_metadata_manager.clone(),
|
||||
plugins.clone(),
|
||||
)
|
||||
.await?;
|
||||
@@ -390,6 +402,7 @@ impl DatanodeBuilder {
|
||||
opts: &DatanodeOptions,
|
||||
object_store_manager: ObjectStoreManagerRef,
|
||||
config: MitoConfig,
|
||||
schema_metadata_manager: SchemaMetadataManagerRef,
|
||||
plugins: Plugins,
|
||||
) -> Result<MitoEngine> {
|
||||
let mito_engine = match &opts.wal {
|
||||
@@ -399,6 +412,7 @@ impl DatanodeBuilder {
|
||||
Self::build_raft_engine_log_store(&opts.storage.data_home, raft_engine_config)
|
||||
.await?,
|
||||
object_store_manager,
|
||||
schema_metadata_manager,
|
||||
plugins,
|
||||
)
|
||||
.await
|
||||
@@ -429,6 +443,7 @@ impl DatanodeBuilder {
|
||||
config,
|
||||
Self::build_kafka_log_store(kafka_config, global_index_collector).await?,
|
||||
object_store_manager,
|
||||
schema_metadata_manager,
|
||||
plugins,
|
||||
)
|
||||
.await
|
||||
|
||||
@@ -1355,7 +1355,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_region_server_parallism() {
|
||||
async fn test_region_server_parallelism() {
|
||||
let p = RegionServerParallelism::from_opts(2, Duration::from_millis(1)).unwrap();
|
||||
let first_query = p.acquire().await;
|
||||
assert!(first_query.is_ok());
|
||||
|
||||
@@ -462,5 +462,15 @@ mod tests {
|
||||
.convert_binary_to_json()
|
||||
.unwrap_err();
|
||||
assert_matches!(error, error::Error::InvalidJson { .. });
|
||||
|
||||
// corrupted jsonb
|
||||
let jsonb = jsonb::parse_value("{\"hello\": \"world\"}".as_bytes())
|
||||
.unwrap()
|
||||
.to_vec();
|
||||
let corrupted_jsonb = jsonb[0..jsonb.len() - 1].to_vec();
|
||||
let error = BinaryVector::from(vec![corrupted_jsonb])
|
||||
.convert_binary_to_json()
|
||||
.unwrap_err();
|
||||
assert_matches!(error, error::Error::InvalidJson { .. });
|
||||
}
|
||||
}
|
||||
|
||||
@@ -37,7 +37,6 @@ use operator::delete::Deleter;
|
||||
use operator::insert::Inserter;
|
||||
use operator::statement::StatementExecutor;
|
||||
use partition::manager::PartitionRuleManager;
|
||||
use query::stats::StatementStatistics;
|
||||
use query::{QueryEngine, QueryEngineFactory};
|
||||
use servers::error::{AlreadyStartedSnafu, StartGrpcSnafu, TcpBindSnafu, TcpIncomingSnafu};
|
||||
use servers::server::Server;
|
||||
@@ -303,7 +302,7 @@ impl FlownodeBuilder {
|
||||
///
|
||||
/// or recover all existing flow tasks if in standalone mode(nodeid is None)
|
||||
///
|
||||
/// TODO(discord9): persisent flow tasks with internal state
|
||||
/// TODO(discord9): persistent flow tasks with internal state
|
||||
async fn recover_flows(&self, manager: &FlowWorkerManagerRef) -> Result<usize, Error> {
|
||||
let nodeid = self.opts.node_id;
|
||||
let to_be_recovered: Vec<_> = if let Some(nodeid) = nodeid {
|
||||
@@ -476,7 +475,6 @@ impl FrontendInvoker {
|
||||
layered_cache_registry.clone(),
|
||||
inserter.clone(),
|
||||
table_route_cache,
|
||||
StatementStatistics::default(),
|
||||
));
|
||||
|
||||
let invoker = FrontendInvoker::new(inserter, deleter, statement_executor);
|
||||
|
||||
@@ -51,6 +51,7 @@ use query::metrics::OnDone;
|
||||
use query::parser::{PromQuery, QueryLanguageParser, QueryStatement};
|
||||
use query::query_engine::options::{validate_catalog_and_schema, QueryOptions};
|
||||
use query::query_engine::DescribeResult;
|
||||
use query::stats::StatementStatistics;
|
||||
use query::QueryEngineRef;
|
||||
use raft_engine::{Config, ReadableSize, RecoveryMode};
|
||||
use servers::error as server_error;
|
||||
@@ -122,6 +123,7 @@ pub struct Instance {
|
||||
deleter: DeleterRef,
|
||||
export_metrics_task: Option<ExportMetricsTask>,
|
||||
table_metadata_manager: TableMetadataManagerRef,
|
||||
stats: StatementStatistics,
|
||||
}
|
||||
|
||||
impl Instance {
|
||||
@@ -228,6 +230,10 @@ impl Instance {
|
||||
let query_interceptor = self.plugins.get::<SqlQueryInterceptorRef<Error>>();
|
||||
let query_interceptor = query_interceptor.as_ref();
|
||||
|
||||
let _slow_query_timer = self
|
||||
.stats
|
||||
.start_slow_query_timer(QueryStatement::Sql(stmt.clone()));
|
||||
|
||||
let output = match stmt {
|
||||
Statement::Query(_) | Statement::Explain(_) | Statement::Delete(_) => {
|
||||
let stmt = QueryStatement::Sql(stmt);
|
||||
@@ -412,7 +418,6 @@ impl PrometheusHandler for Instance {
|
||||
let interceptor = self
|
||||
.plugins
|
||||
.get::<PromQueryInterceptorRef<server_error::Error>>();
|
||||
interceptor.pre_execute(query, query_ctx.clone())?;
|
||||
|
||||
self.plugins
|
||||
.get::<PermissionCheckerRef>()
|
||||
@@ -426,9 +431,20 @@ impl PrometheusHandler for Instance {
|
||||
}
|
||||
})?;
|
||||
|
||||
let _slow_query_timer = self.stats.start_slow_query_timer(stmt.clone());
|
||||
|
||||
let plan = self
|
||||
.statement_executor
|
||||
.plan(&stmt, query_ctx.clone())
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExecuteQuerySnafu)?;
|
||||
|
||||
interceptor.pre_execute(query, Some(&plan), query_ctx.clone())?;
|
||||
|
||||
let output = self
|
||||
.statement_executor
|
||||
.execute_stmt(stmt, query_ctx.clone())
|
||||
.exec_plan(plan, query_ctx.clone())
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExecuteQuerySnafu)?;
|
||||
|
||||
@@ -185,7 +185,6 @@ impl FrontendBuilder {
|
||||
local_cache_invalidator,
|
||||
inserter.clone(),
|
||||
table_route_cache,
|
||||
self.stats,
|
||||
));
|
||||
|
||||
let pipeline_operator = Arc::new(PipelineOperator::new(
|
||||
@@ -211,6 +210,7 @@ impl FrontendBuilder {
|
||||
deleter,
|
||||
export_metrics_task: None,
|
||||
table_metadata_manager: Arc::new(TableMetadataManager::new(kv_backend)),
|
||||
stats: self.stats,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@ use api::v1::{DeleteRequests, DropFlowExpr, InsertRequests, RowDeleteRequests, R
|
||||
use async_trait::async_trait;
|
||||
use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq};
|
||||
use common_query::Output;
|
||||
use common_telemetry::tracing;
|
||||
use common_telemetry::tracing::{self};
|
||||
use query::parser::PromQuery;
|
||||
use servers::interceptor::{GrpcQueryInterceptor, GrpcQueryInterceptorRef};
|
||||
use servers::query_handler::grpc::GrpcQueryHandler;
|
||||
|
||||
@@ -24,7 +24,6 @@ use pipeline::PipelineWay;
|
||||
use servers::error::{self, AuthSnafu, Result as ServerResult};
|
||||
use servers::interceptor::{OpenTelemetryProtocolInterceptor, OpenTelemetryProtocolInterceptorRef};
|
||||
use servers::otlp;
|
||||
use servers::otlp::plugin::TraceParserRef;
|
||||
use servers::query_handler::OpenTelemetryProtocolHandler;
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::ResultExt;
|
||||
@@ -64,6 +63,7 @@ impl OpenTelemetryProtocolHandler for Instance {
|
||||
async fn traces(
|
||||
&self,
|
||||
request: ExportTraceServiceRequest,
|
||||
table_name: String,
|
||||
ctx: QueryContextRef,
|
||||
) -> ServerResult<Output> {
|
||||
self.plugins
|
||||
@@ -77,13 +77,7 @@ impl OpenTelemetryProtocolHandler for Instance {
|
||||
.get::<OpenTelemetryProtocolInterceptorRef<servers::error::Error>>();
|
||||
interceptor_ref.pre_execute(ctx.clone())?;
|
||||
|
||||
let (table_name, spans) = match self.plugins.get::<TraceParserRef>() {
|
||||
Some(parser) => (parser.table_name(), parser.parse(request)),
|
||||
None => (
|
||||
otlp::trace::TRACE_TABLE_NAME.to_string(),
|
||||
otlp::trace::parse(request),
|
||||
),
|
||||
};
|
||||
let spans = otlp::trace::parse(request);
|
||||
|
||||
let (requests, rows) = otlp::trace::to_grpc_insert_requests(table_name, spans)?;
|
||||
|
||||
|
||||
@@ -17,7 +17,6 @@ use std::any::Any;
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_macro::stack_trace_debug;
|
||||
use common_runtime::error::Error as RuntimeError;
|
||||
use common_runtime::JoinError;
|
||||
use serde_json::error::Error as JsonError;
|
||||
use snafu::{Location, Snafu};
|
||||
use store_api::storage::RegionId;
|
||||
@@ -307,14 +306,6 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Join error"))]
|
||||
Join {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
#[snafu(source)]
|
||||
error: JoinError,
|
||||
},
|
||||
}
|
||||
|
||||
impl ErrorExt for Error {
|
||||
|
||||
@@ -31,8 +31,8 @@ use store_api::storage::RegionId;
|
||||
|
||||
use crate::error::{
|
||||
AddEntryLogBatchSnafu, DiscontinuousLogIndexSnafu, Error, FetchEntrySnafu,
|
||||
IllegalNamespaceSnafu, IllegalStateSnafu, InvalidProviderSnafu, JoinSnafu,
|
||||
OverrideCompactedEntrySnafu, RaftEngineSnafu, Result, StartGcTaskSnafu, StopGcTaskSnafu,
|
||||
IllegalNamespaceSnafu, IllegalStateSnafu, InvalidProviderSnafu, OverrideCompactedEntrySnafu,
|
||||
RaftEngineSnafu, Result, StartGcTaskSnafu, StopGcTaskSnafu,
|
||||
};
|
||||
use crate::metrics;
|
||||
use crate::raft_engine::backend::SYSTEM_NAMESPACE;
|
||||
@@ -250,12 +250,6 @@ impl LogStore for RaftEngineLogStore {
|
||||
.engine
|
||||
.write(&mut batch, sync)
|
||||
.context(RaftEngineSnafu)?;
|
||||
let engine = self.engine.clone();
|
||||
let _ = common_runtime::spawn_blocking_global(move || {
|
||||
engine.write(&mut batch, sync).context(RaftEngineSnafu)
|
||||
})
|
||||
.await
|
||||
.context(JoinSnafu)?;
|
||||
|
||||
Ok(AppendBatchResponse { last_entry_ids })
|
||||
}
|
||||
|
||||
@@ -15,6 +15,8 @@ common-grpc.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-meta.workspace = true
|
||||
common-telemetry.workspace = true
|
||||
futures.workspace = true
|
||||
futures-util.workspace = true
|
||||
humantime-serde.workspace = true
|
||||
rand.workspace = true
|
||||
serde.workspace = true
|
||||
|
||||
@@ -21,6 +21,8 @@ mod cluster;
|
||||
mod store;
|
||||
mod util;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::meta::{ProcedureDetailResponse, Role};
|
||||
use cluster::Client as ClusterClient;
|
||||
use common_error::ext::BoxedError;
|
||||
@@ -30,7 +32,8 @@ use common_meta::cluster::{
|
||||
};
|
||||
use common_meta::datanode::{DatanodeStatKey, DatanodeStatValue, RegionStat};
|
||||
use common_meta::ddl::{ExecutorContext, ProcedureExecutor};
|
||||
use common_meta::error::{self as meta_error, Result as MetaResult};
|
||||
use common_meta::error::{self as meta_error, ExternalSnafu, Result as MetaResult};
|
||||
use common_meta::range_stream::PaginationStream;
|
||||
use common_meta::rpc::ddl::{SubmitDdlTaskRequest, SubmitDdlTaskResponse};
|
||||
use common_meta::rpc::procedure::{
|
||||
MigrateRegionRequest, MigrateRegionResponse, ProcedureStateResponse,
|
||||
@@ -40,8 +43,10 @@ use common_meta::rpc::store::{
|
||||
BatchPutResponse, CompareAndPutRequest, CompareAndPutResponse, DeleteRangeRequest,
|
||||
DeleteRangeResponse, PutRequest, PutResponse, RangeRequest, RangeResponse,
|
||||
};
|
||||
use common_meta::rpc::KeyValue;
|
||||
use common_meta::ClusterId;
|
||||
use common_telemetry::info;
|
||||
use futures::TryStreamExt;
|
||||
use heartbeat::Client as HeartbeatClient;
|
||||
use procedure::Client as ProcedureClient;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
@@ -314,16 +319,15 @@ impl ClusterInfo for MetaClient {
|
||||
}
|
||||
|
||||
async fn list_region_stats(&self) -> Result<Vec<RegionStat>> {
|
||||
let cluster_client = self.cluster_client()?;
|
||||
let cluster_kv_backend = Arc::new(self.cluster_client()?);
|
||||
let range_prefix = DatanodeStatKey::key_prefix_with_cluster_id(self.id.0);
|
||||
let req = RangeRequest::new().with_prefix(range_prefix);
|
||||
let mut datanode_stats = cluster_client
|
||||
.range(req)
|
||||
.await?
|
||||
.kvs
|
||||
.into_iter()
|
||||
.map(|kv| DatanodeStatValue::try_from(kv.value).context(ConvertMetaRequestSnafu))
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
let stream = PaginationStream::new(cluster_kv_backend, req, 256, Arc::new(decode_stats))
|
||||
.into_stream();
|
||||
let mut datanode_stats = stream
|
||||
.try_collect::<Vec<_>>()
|
||||
.await
|
||||
.context(ConvertMetaResponseSnafu)?;
|
||||
let region_stats = datanode_stats
|
||||
.iter_mut()
|
||||
.flat_map(|datanode_stat| {
|
||||
@@ -336,6 +340,12 @@ impl ClusterInfo for MetaClient {
|
||||
}
|
||||
}
|
||||
|
||||
fn decode_stats(kv: KeyValue) -> MetaResult<DatanodeStatValue> {
|
||||
DatanodeStatValue::try_from(kv.value)
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)
|
||||
}
|
||||
|
||||
impl MetaClient {
|
||||
pub fn new(id: Id) -> Self {
|
||||
Self {
|
||||
|
||||
@@ -12,14 +12,22 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::future::Future;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::greptime_proto::v1;
|
||||
use api::v1::meta::cluster_client::ClusterClient;
|
||||
use api::v1::meta::{MetasrvNodeInfo, MetasrvPeersRequest, ResponseHeader, Role};
|
||||
use common_error::ext::BoxedError;
|
||||
use common_grpc::channel_manager::ChannelManager;
|
||||
use common_meta::rpc::store::{BatchGetRequest, BatchGetResponse, RangeRequest, RangeResponse};
|
||||
use common_meta::error::{Error as MetaError, ExternalSnafu, Result as MetaResult};
|
||||
use common_meta::kv_backend::{KvBackend, TxnService};
|
||||
use common_meta::rpc::store::{
|
||||
BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse, BatchPutRequest,
|
||||
BatchPutResponse, DeleteRangeRequest, DeleteRangeResponse, PutRequest, PutResponse,
|
||||
RangeRequest, RangeResponse,
|
||||
};
|
||||
use common_telemetry::{info, warn};
|
||||
use snafu::{ensure, ResultExt};
|
||||
use tokio::sync::RwLock;
|
||||
@@ -79,6 +87,51 @@ impl Client {
|
||||
}
|
||||
}
|
||||
|
||||
impl TxnService for Client {
|
||||
type Error = MetaError;
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl KvBackend for Client {
|
||||
fn name(&self) -> &str {
|
||||
"ClusterClientKvBackend"
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
async fn range(&self, req: RangeRequest) -> MetaResult<RangeResponse> {
|
||||
self.range(req)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)
|
||||
}
|
||||
|
||||
async fn put(&self, _: PutRequest) -> MetaResult<PutResponse> {
|
||||
unimplemented!("`put` is not supported in cluster client kv backend")
|
||||
}
|
||||
|
||||
async fn batch_put(&self, _: BatchPutRequest) -> MetaResult<BatchPutResponse> {
|
||||
unimplemented!("`batch_put` is not supported in cluster client kv backend")
|
||||
}
|
||||
|
||||
async fn batch_get(&self, req: BatchGetRequest) -> MetaResult<BatchGetResponse> {
|
||||
self.batch_get(req)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)
|
||||
}
|
||||
|
||||
async fn delete_range(&self, _: DeleteRangeRequest) -> MetaResult<DeleteRangeResponse> {
|
||||
unimplemented!("`delete_range` is not supported in cluster client kv backend")
|
||||
}
|
||||
|
||||
async fn batch_delete(&self, _: BatchDeleteRequest) -> MetaResult<BatchDeleteResponse> {
|
||||
unimplemented!("`batch_delete` is not supported in cluster client kv backend")
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Inner {
|
||||
id: Id,
|
||||
|
||||
@@ -478,6 +478,11 @@ pub struct HeartbeatHandlerGroupBuilder {
|
||||
/// The handler to handle region lease.
|
||||
region_lease_handler: Option<RegionLeaseHandler>,
|
||||
|
||||
/// The factor that determines how often statistics should be flushed,
|
||||
/// based on the number of received heartbeats. When the number of heartbeats
|
||||
/// reaches this factor, a flush operation is triggered.
|
||||
flush_stats_factor: Option<usize>,
|
||||
|
||||
/// The plugins.
|
||||
plugins: Option<Plugins>,
|
||||
|
||||
@@ -493,6 +498,7 @@ impl HeartbeatHandlerGroupBuilder {
|
||||
Self {
|
||||
region_failure_handler: None,
|
||||
region_lease_handler: None,
|
||||
flush_stats_factor: None,
|
||||
plugins: None,
|
||||
pushers,
|
||||
handlers: vec![],
|
||||
@@ -510,6 +516,12 @@ impl HeartbeatHandlerGroupBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the flush stats factor.
|
||||
pub fn with_flush_stats_factor(mut self, flush_stats_factor: Option<usize>) -> Self {
|
||||
self.flush_stats_factor = flush_stats_factor;
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the [`Plugins`].
|
||||
pub fn with_plugins(mut self, plugins: Option<Plugins>) -> Self {
|
||||
self.plugins = plugins;
|
||||
@@ -550,7 +562,7 @@ impl HeartbeatHandlerGroupBuilder {
|
||||
if let Some(publish_heartbeat_handler) = publish_heartbeat_handler {
|
||||
self.add_handler_last(publish_heartbeat_handler);
|
||||
}
|
||||
self.add_handler_last(CollectStatsHandler::default());
|
||||
self.add_handler_last(CollectStatsHandler::new(self.flush_stats_factor));
|
||||
|
||||
self
|
||||
}
|
||||
|
||||
@@ -29,8 +29,6 @@ use crate::error::{self, Result};
|
||||
use crate::handler::{HandleControl, HeartbeatAccumulator, HeartbeatHandler};
|
||||
use crate::metasrv::Context;
|
||||
|
||||
const MAX_CACHED_STATS_PER_KEY: usize = 10;
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct EpochStats {
|
||||
stats: Vec<Stat>,
|
||||
@@ -69,9 +67,26 @@ impl EpochStats {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
const DEFAULT_FLUSH_STATS_FACTOR: usize = 3;
|
||||
|
||||
pub struct CollectStatsHandler {
|
||||
stats_cache: DashMap<DatanodeStatKey, EpochStats>,
|
||||
flush_stats_factor: usize,
|
||||
}
|
||||
|
||||
impl Default for CollectStatsHandler {
|
||||
fn default() -> Self {
|
||||
Self::new(None)
|
||||
}
|
||||
}
|
||||
|
||||
impl CollectStatsHandler {
|
||||
pub fn new(flush_stats_factor: Option<usize>) -> Self {
|
||||
Self {
|
||||
flush_stats_factor: flush_stats_factor.unwrap_or(DEFAULT_FLUSH_STATS_FACTOR),
|
||||
stats_cache: DashMap::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
@@ -130,7 +145,7 @@ impl HeartbeatHandler for CollectStatsHandler {
|
||||
rewrite_node_address(ctx, last).await;
|
||||
}
|
||||
|
||||
if !refresh && epoch_stats.len() < MAX_CACHED_STATS_PER_KEY {
|
||||
if !refresh && epoch_stats.len() < self.flush_stats_factor {
|
||||
return Ok(HandleControl::Continue);
|
||||
}
|
||||
|
||||
@@ -261,8 +276,7 @@ mod tests {
|
||||
let res = ctx.in_memory.get(&key).await.unwrap();
|
||||
let kv = res.unwrap();
|
||||
let val: DatanodeStatValue = kv.value.try_into().unwrap();
|
||||
// refresh every 10 stats
|
||||
assert_eq!(10, val.stats.len());
|
||||
assert_eq!(handler.flush_stats_factor, val.stats.len());
|
||||
}
|
||||
|
||||
async fn handle_request_many_times(
|
||||
|
||||
@@ -130,6 +130,10 @@ pub struct MetasrvOptions {
|
||||
/// limit the number of operations in a txn because an infinitely large txn could
|
||||
/// potentially block other operations.
|
||||
pub max_txn_ops: usize,
|
||||
/// The factor that determines how often statistics should be flushed,
|
||||
/// based on the number of received heartbeats. When the number of heartbeats
|
||||
/// reaches this factor, a flush operation is triggered.
|
||||
pub flush_stats_factor: usize,
|
||||
/// The tracing options.
|
||||
pub tracing: TracingOptions,
|
||||
/// The datastore for kv metadata.
|
||||
@@ -165,6 +169,7 @@ impl Default for MetasrvOptions {
|
||||
export_metrics: ExportMetricsOption::default(),
|
||||
store_key_prefix: String::new(),
|
||||
max_txn_ops: 128,
|
||||
flush_stats_factor: 3,
|
||||
tracing: TracingOptions::default(),
|
||||
backend: BackendImpl::EtcdStore,
|
||||
}
|
||||
|
||||
@@ -227,7 +227,7 @@ impl MetasrvBuilder {
|
||||
))
|
||||
});
|
||||
let flow_metadata_allocator = {
|
||||
// for now flownode just use round robin selector
|
||||
// for now flownode just use round-robin selector
|
||||
let flow_selector = RoundRobinSelector::new(SelectTarget::Flownode);
|
||||
let flow_selector_ctx = selector_ctx.clone();
|
||||
let peer_allocator = Arc::new(FlowPeerAllocator::new(
|
||||
@@ -347,6 +347,7 @@ impl MetasrvBuilder {
|
||||
.with_plugins(plugins.clone())
|
||||
.with_region_failure_handler(region_failover_handler)
|
||||
.with_region_lease_handler(Some(region_lease_handler))
|
||||
.with_flush_stats_factor(Some(options.flush_stats_factor))
|
||||
.add_default_handlers()
|
||||
}
|
||||
};
|
||||
|
||||
@@ -22,7 +22,7 @@ use crate::metasrv::SelectTarget;
|
||||
use crate::selector::SelectorOptions;
|
||||
|
||||
/// According to the `opts`, choose peers from the `weight_array` through `weighted_choose`.
|
||||
pub fn choose_peers<W>(opts: &SelectorOptions, weighted_choose: &mut W) -> Result<Vec<Peer>>
|
||||
pub fn choose_items<W>(opts: &SelectorOptions, weighted_choose: &mut W) -> Result<Vec<Peer>>
|
||||
where
|
||||
W: WeightedChoose<Peer>,
|
||||
{
|
||||
@@ -36,20 +36,36 @@ where
|
||||
}
|
||||
);
|
||||
|
||||
if opts.allow_duplication {
|
||||
(0..min_required_items)
|
||||
.map(|_| weighted_choose.choose_one())
|
||||
.collect::<Result<_>>()
|
||||
} else {
|
||||
let weight_array_len = weighted_choose.len();
|
||||
if min_required_items == 1 {
|
||||
// fast path
|
||||
return Ok(vec![weighted_choose.choose_one()?]);
|
||||
}
|
||||
|
||||
// When opts.allow_duplication is false, we need to check that the length of the weighted array is greater than
|
||||
// or equal to min_required_items, otherwise it may cause an infinite loop.
|
||||
let available_count = weighted_choose.len();
|
||||
|
||||
if opts.allow_duplication {
|
||||
// Calculate how many complete rounds of `available_count` items to select,
|
||||
// plus any additional items needed after complete rounds.
|
||||
let complete_batches = min_required_items / available_count;
|
||||
let leftover_items = min_required_items % available_count;
|
||||
if complete_batches == 0 {
|
||||
return weighted_choose.choose_multiple(leftover_items);
|
||||
}
|
||||
|
||||
let mut result = Vec::with_capacity(min_required_items);
|
||||
for _ in 0..complete_batches {
|
||||
result.extend(weighted_choose.choose_multiple(available_count)?);
|
||||
}
|
||||
result.extend(weighted_choose.choose_multiple(leftover_items)?);
|
||||
|
||||
Ok(result)
|
||||
} else {
|
||||
// Ensure the available items are sufficient when duplication is not allowed.
|
||||
ensure!(
|
||||
weight_array_len >= min_required_items,
|
||||
available_count >= min_required_items,
|
||||
error::NoEnoughAvailableNodeSnafu {
|
||||
required: min_required_items,
|
||||
available: weight_array_len,
|
||||
available: available_count,
|
||||
select_target: SelectTarget::Datanode
|
||||
}
|
||||
);
|
||||
@@ -64,7 +80,7 @@ mod tests {
|
||||
|
||||
use common_meta::peer::Peer;
|
||||
|
||||
use crate::selector::common::choose_peers;
|
||||
use crate::selector::common::choose_items;
|
||||
use crate::selector::weighted_choose::{RandomWeightedChoose, WeightedItem};
|
||||
use crate::selector::SelectorOptions;
|
||||
|
||||
@@ -115,7 +131,7 @@ mod tests {
|
||||
};
|
||||
|
||||
let selected_peers: HashSet<_> =
|
||||
choose_peers(&opts, &mut RandomWeightedChoose::new(weight_array.clone()))
|
||||
choose_items(&opts, &mut RandomWeightedChoose::new(weight_array.clone()))
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.collect();
|
||||
@@ -129,7 +145,7 @@ mod tests {
|
||||
};
|
||||
|
||||
let selected_result =
|
||||
choose_peers(&opts, &mut RandomWeightedChoose::new(weight_array.clone()));
|
||||
choose_items(&opts, &mut RandomWeightedChoose::new(weight_array.clone()));
|
||||
assert!(selected_result.is_err());
|
||||
|
||||
for i in 1..=50 {
|
||||
@@ -139,7 +155,7 @@ mod tests {
|
||||
};
|
||||
|
||||
let selected_peers =
|
||||
choose_peers(&opts, &mut RandomWeightedChoose::new(weight_array.clone())).unwrap();
|
||||
choose_items(&opts, &mut RandomWeightedChoose::new(weight_array.clone())).unwrap();
|
||||
|
||||
assert_eq!(i, selected_peers.len());
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@ use common_meta::peer::Peer;
|
||||
use crate::error::Result;
|
||||
use crate::lease;
|
||||
use crate::metasrv::SelectorContext;
|
||||
use crate::selector::common::choose_peers;
|
||||
use crate::selector::common::choose_items;
|
||||
use crate::selector::weighted_choose::{RandomWeightedChoose, WeightedItem};
|
||||
use crate::selector::{Namespace, Selector, SelectorOptions};
|
||||
|
||||
@@ -53,7 +53,7 @@ impl Selector for LeaseBasedSelector {
|
||||
|
||||
// 3. choose peers by weight_array.
|
||||
let mut weighted_choose = RandomWeightedChoose::new(weight_array);
|
||||
let selected = choose_peers(&opts, &mut weighted_choose)?;
|
||||
let selected = choose_items(&opts, &mut weighted_choose)?;
|
||||
|
||||
Ok(selected)
|
||||
}
|
||||
|
||||
@@ -26,7 +26,7 @@ use crate::error::{self, Result};
|
||||
use crate::key::{DatanodeLeaseKey, LeaseValue};
|
||||
use crate::lease;
|
||||
use crate::metasrv::SelectorContext;
|
||||
use crate::selector::common::choose_peers;
|
||||
use crate::selector::common::choose_items;
|
||||
use crate::selector::weight_compute::{RegionNumsBasedWeightCompute, WeightCompute};
|
||||
use crate::selector::weighted_choose::RandomWeightedChoose;
|
||||
use crate::selector::{Namespace, Selector, SelectorOptions};
|
||||
@@ -94,7 +94,7 @@ where
|
||||
|
||||
// 5. choose peers by weight_array.
|
||||
let mut weighted_choose = RandomWeightedChoose::new(weight_array);
|
||||
let selected = choose_peers(&opts, &mut weighted_choose)?;
|
||||
let selected = choose_items(&opts, &mut weighted_choose)?;
|
||||
|
||||
debug!(
|
||||
"LoadBasedSelector select peers: {:?}, namespace: {}, opts: {:?}.",
|
||||
|
||||
@@ -14,6 +14,8 @@
|
||||
|
||||
//! Implementation of retrieving logical region's region metadata.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use store_api::metadata::ColumnMetadata;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
@@ -46,23 +48,36 @@ impl MetricEngineInner {
|
||||
.read_lock_logical_region(logical_region_id)
|
||||
.await;
|
||||
// Load logical and physical columns, and intersect them to get logical column metadata.
|
||||
let mut logical_column_metadata = self
|
||||
let logical_column_metadata = self
|
||||
.metadata_region
|
||||
.logical_columns(physical_region_id, logical_region_id)
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|(_, column_metadata)| column_metadata)
|
||||
.collect::<Vec<_>>();
|
||||
// Sort columns on column name to ensure the order
|
||||
logical_column_metadata
|
||||
.sort_unstable_by(|c1, c2| c1.column_schema.name.cmp(&c2.column_schema.name));
|
||||
// Update cache
|
||||
self.state
|
||||
.write()
|
||||
.unwrap()
|
||||
.add_logical_columns(logical_region_id, logical_column_metadata.clone());
|
||||
|
||||
Ok(logical_column_metadata)
|
||||
// Update cache
|
||||
let mut mutable_state = self.state.write().unwrap();
|
||||
// Merge with existing cached columns.
|
||||
let existing_columns = mutable_state
|
||||
.logical_columns()
|
||||
.get(&logical_region_id)
|
||||
.cloned()
|
||||
.unwrap_or_default()
|
||||
.into_iter();
|
||||
let mut dedup_columns = logical_column_metadata
|
||||
.into_iter()
|
||||
.chain(existing_columns)
|
||||
.map(|c| (c.column_id, c))
|
||||
.collect::<HashMap<_, _>>()
|
||||
.values()
|
||||
.cloned()
|
||||
.collect::<Vec<_>>();
|
||||
// Sort columns on column name to ensure the order
|
||||
dedup_columns.sort_unstable_by(|c1, c2| c1.column_schema.name.cmp(&c2.column_schema.name));
|
||||
mutable_state.set_logical_columns(logical_region_id, dedup_columns.clone());
|
||||
|
||||
Ok(dedup_columns)
|
||||
}
|
||||
|
||||
/// Load logical column names of a logical region.
|
||||
|
||||
@@ -85,19 +85,13 @@ impl MetricEngineState {
|
||||
.insert(logical_region_id, physical_region_id);
|
||||
}
|
||||
|
||||
/// Add and reorder logical columns.
|
||||
///
|
||||
/// Caller should make sure:
|
||||
/// 1. there is no duplicate columns
|
||||
/// 2. the column order is the same with the order in the metadata, which is
|
||||
/// alphabetically ordered on column name.
|
||||
pub fn add_logical_columns(
|
||||
/// Replace the logical columns of the logical region with given columns.
|
||||
pub fn set_logical_columns(
|
||||
&mut self,
|
||||
logical_region_id: RegionId,
|
||||
new_columns: impl IntoIterator<Item = ColumnMetadata>,
|
||||
columns: Vec<ColumnMetadata>,
|
||||
) {
|
||||
let columns = self.logical_columns.entry(logical_region_id).or_default();
|
||||
columns.extend(new_columns);
|
||||
self.logical_columns.insert(logical_region_id, columns);
|
||||
}
|
||||
|
||||
pub fn get_physical_region_id(&self, logical_region_id: RegionId) -> Option<RegionId> {
|
||||
|
||||
@@ -24,6 +24,7 @@ common-datasource.workspace = true
|
||||
common-decimal.workspace = true
|
||||
common-error.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-meta.workspace = true
|
||||
common-query.workspace = true
|
||||
common-recordbatch.workspace = true
|
||||
common-runtime.workspace = true
|
||||
@@ -74,6 +75,7 @@ uuid.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
common-function.workspace = true
|
||||
common-meta = { workspace = true, features = ["testing"] }
|
||||
common-procedure-test.workspace = true
|
||||
common-test-util.workspace = true
|
||||
criterion = "0.4"
|
||||
|
||||
@@ -28,7 +28,8 @@ use std::time::{Duration, Instant};
|
||||
|
||||
use api::v1::region::compact_request;
|
||||
use common_base::Plugins;
|
||||
use common_telemetry::{debug, error, info};
|
||||
use common_meta::key::SchemaMetadataManagerRef;
|
||||
use common_telemetry::{debug, error, info, warn};
|
||||
use common_time::range::TimestampRange;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::Timestamp;
|
||||
@@ -37,7 +38,7 @@ use datafusion_expr::Expr;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::storage::RegionId;
|
||||
use store_api::storage::{RegionId, TableId};
|
||||
use table::predicate::Predicate;
|
||||
use tokio::sync::mpsc::{self, Sender};
|
||||
|
||||
@@ -48,8 +49,8 @@ use crate::compaction::picker::{new_picker, CompactionTask};
|
||||
use crate::compaction::task::CompactionTaskImpl;
|
||||
use crate::config::MitoConfig;
|
||||
use crate::error::{
|
||||
CompactRegionSnafu, Error, RegionClosedSnafu, RegionDroppedSnafu, RegionTruncatedSnafu,
|
||||
RemoteCompactionSnafu, Result, TimeRangePredicateOverflowSnafu,
|
||||
CompactRegionSnafu, Error, GetSchemaMetadataSnafu, RegionClosedSnafu, RegionDroppedSnafu,
|
||||
RegionTruncatedSnafu, RemoteCompactionSnafu, Result, TimeRangePredicateOverflowSnafu,
|
||||
};
|
||||
use crate::metrics::COMPACTION_STAGE_ELAPSED;
|
||||
use crate::read::projection::ProjectionMapper;
|
||||
@@ -82,6 +83,7 @@ pub struct CompactionRequest {
|
||||
pub(crate) cache_manager: CacheManagerRef,
|
||||
pub(crate) manifest_ctx: ManifestContextRef,
|
||||
pub(crate) listener: WorkerListener,
|
||||
pub(crate) schema_metadata_manager: SchemaMetadataManagerRef,
|
||||
}
|
||||
|
||||
impl CompactionRequest {
|
||||
@@ -141,6 +143,7 @@ impl CompactionScheduler {
|
||||
access_layer: &AccessLayerRef,
|
||||
waiter: OptionOutputTx,
|
||||
manifest_ctx: &ManifestContextRef,
|
||||
schema_metadata_manager: SchemaMetadataManagerRef,
|
||||
) -> Result<()> {
|
||||
if let Some(status) = self.region_status.get_mut(®ion_id) {
|
||||
// Region is compacting. Add the waiter to pending list.
|
||||
@@ -158,6 +161,7 @@ impl CompactionScheduler {
|
||||
self.cache_manager.clone(),
|
||||
manifest_ctx,
|
||||
self.listener.clone(),
|
||||
schema_metadata_manager,
|
||||
);
|
||||
self.region_status.insert(region_id, status);
|
||||
let result = self
|
||||
@@ -173,6 +177,7 @@ impl CompactionScheduler {
|
||||
&mut self,
|
||||
region_id: RegionId,
|
||||
manifest_ctx: &ManifestContextRef,
|
||||
schema_metadata_manager: SchemaMetadataManagerRef,
|
||||
) {
|
||||
let Some(status) = self.region_status.get_mut(®ion_id) else {
|
||||
return;
|
||||
@@ -186,6 +191,7 @@ impl CompactionScheduler {
|
||||
self.cache_manager.clone(),
|
||||
manifest_ctx,
|
||||
self.listener.clone(),
|
||||
schema_metadata_manager,
|
||||
);
|
||||
// Try to schedule next compaction task for this region.
|
||||
if let Err(e) = self
|
||||
@@ -256,10 +262,23 @@ impl CompactionScheduler {
|
||||
cache_manager,
|
||||
manifest_ctx,
|
||||
listener,
|
||||
schema_metadata_manager,
|
||||
} = request;
|
||||
|
||||
let ttl = find_ttl(
|
||||
region_id.table_id(),
|
||||
current_version.options.ttl,
|
||||
&schema_metadata_manager,
|
||||
)
|
||||
.await
|
||||
.unwrap_or_else(|e| {
|
||||
warn!(e; "Failed to get ttl for region: {}", region_id);
|
||||
None
|
||||
});
|
||||
|
||||
debug!(
|
||||
"Pick compaction strategy {:?} for region: {}",
|
||||
picker, region_id
|
||||
"Pick compaction strategy {:?} for region: {}, ttl: {:?}",
|
||||
picker, region_id, ttl
|
||||
);
|
||||
|
||||
let compaction_region = CompactionRegion {
|
||||
@@ -273,6 +292,7 @@ impl CompactionScheduler {
|
||||
access_layer: access_layer.clone(),
|
||||
manifest_ctx: manifest_ctx.clone(),
|
||||
file_purger: None,
|
||||
ttl,
|
||||
};
|
||||
|
||||
let picker_output = {
|
||||
@@ -414,6 +434,24 @@ impl PendingCompaction {
|
||||
}
|
||||
}
|
||||
|
||||
/// Finds TTL of table by first examine table options then database options.
|
||||
async fn find_ttl(
|
||||
table_id: TableId,
|
||||
table_ttl: Option<Duration>,
|
||||
schema_metadata_manager: &SchemaMetadataManagerRef,
|
||||
) -> Result<Option<Duration>> {
|
||||
if let Some(table_ttl) = table_ttl {
|
||||
return Ok(Some(table_ttl));
|
||||
}
|
||||
|
||||
let ttl = schema_metadata_manager
|
||||
.get_schema_options_by_table_id(table_id)
|
||||
.await
|
||||
.context(GetSchemaMetadataSnafu)?
|
||||
.and_then(|options| options.ttl);
|
||||
Ok(ttl)
|
||||
}
|
||||
|
||||
/// Status of running and pending region compaction tasks.
|
||||
struct CompactionStatus {
|
||||
/// Id of the region.
|
||||
@@ -471,6 +509,7 @@ impl CompactionStatus {
|
||||
cache_manager: CacheManagerRef,
|
||||
manifest_ctx: &ManifestContextRef,
|
||||
listener: WorkerListener,
|
||||
schema_metadata_manager: SchemaMetadataManagerRef,
|
||||
) -> CompactionRequest {
|
||||
let current_version = self.version_control.current().version;
|
||||
let start_time = Instant::now();
|
||||
@@ -484,6 +523,7 @@ impl CompactionStatus {
|
||||
cache_manager,
|
||||
manifest_ctx: manifest_ctx.clone(),
|
||||
listener,
|
||||
schema_metadata_manager,
|
||||
};
|
||||
|
||||
if let Some(pending) = self.pending_compaction.take() {
|
||||
@@ -639,6 +679,9 @@ fn get_expired_ssts(
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_meta::key::SchemaMetadataManager;
|
||||
use common_meta::kv_backend::memory::MemoryKvBackend;
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
use tokio::sync::oneshot;
|
||||
|
||||
use super::*;
|
||||
@@ -651,7 +694,19 @@ mod tests {
|
||||
let (tx, _rx) = mpsc::channel(4);
|
||||
let mut scheduler = env.mock_compaction_scheduler(tx);
|
||||
let mut builder = VersionControlBuilder::new();
|
||||
|
||||
let schema_metadata_manager = Arc::new(SchemaMetadataManager::new(Arc::new(
|
||||
MemoryKvBackend::new(),
|
||||
)
|
||||
as KvBackendRef));
|
||||
schema_metadata_manager
|
||||
.register_region_table_info(
|
||||
builder.region_id().table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
// Nothing to compact.
|
||||
let version_control = Arc::new(builder.build());
|
||||
let (output_tx, output_rx) = oneshot::channel();
|
||||
@@ -667,6 +722,7 @@ mod tests {
|
||||
&env.access_layer,
|
||||
waiter,
|
||||
&manifest_ctx,
|
||||
schema_metadata_manager.clone(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -686,6 +742,7 @@ mod tests {
|
||||
&env.access_layer,
|
||||
waiter,
|
||||
&manifest_ctx,
|
||||
schema_metadata_manager,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -703,6 +760,19 @@ mod tests {
|
||||
let mut builder = VersionControlBuilder::new();
|
||||
let purger = builder.file_purger();
|
||||
let region_id = builder.region_id();
|
||||
let schema_metadata_manager = Arc::new(SchemaMetadataManager::new(Arc::new(
|
||||
MemoryKvBackend::new(),
|
||||
)
|
||||
as KvBackendRef));
|
||||
schema_metadata_manager
|
||||
.register_region_table_info(
|
||||
builder.region_id().table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
// 5 files to compact.
|
||||
let end = 1000 * 1000;
|
||||
@@ -726,6 +796,7 @@ mod tests {
|
||||
&env.access_layer,
|
||||
OptionOutputTx::none(),
|
||||
&manifest_ctx,
|
||||
schema_metadata_manager.clone(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -755,6 +826,7 @@ mod tests {
|
||||
&env.access_layer,
|
||||
OptionOutputTx::none(),
|
||||
&manifest_ctx,
|
||||
schema_metadata_manager.clone(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -769,7 +841,7 @@ mod tests {
|
||||
|
||||
// On compaction finished and schedule next compaction.
|
||||
scheduler
|
||||
.on_compaction_finished(region_id, &manifest_ctx)
|
||||
.on_compaction_finished(region_id, &manifest_ctx, schema_metadata_manager.clone())
|
||||
.await;
|
||||
assert_eq!(1, scheduler.region_status.len());
|
||||
assert_eq!(2, job_scheduler.num_jobs());
|
||||
@@ -789,6 +861,7 @@ mod tests {
|
||||
&env.access_layer,
|
||||
OptionOutputTx::none(),
|
||||
&manifest_ctx,
|
||||
schema_metadata_manager,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
@@ -16,7 +16,8 @@ use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use api::v1::region::compact_request;
|
||||
use common_telemetry::info;
|
||||
use common_meta::key::SchemaMetadataManagerRef;
|
||||
use common_telemetry::{info, warn};
|
||||
use object_store::manager::ObjectStoreManagerRef;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use smallvec::SmallVec;
|
||||
@@ -27,7 +28,7 @@ use store_api::storage::RegionId;
|
||||
use crate::access_layer::{AccessLayer, AccessLayerRef, OperationType, SstWriteRequest};
|
||||
use crate::cache::{CacheManager, CacheManagerRef};
|
||||
use crate::compaction::picker::{new_picker, PickerOutput};
|
||||
use crate::compaction::CompactionSstReaderBuilder;
|
||||
use crate::compaction::{find_ttl, CompactionSstReaderBuilder};
|
||||
use crate::config::MitoConfig;
|
||||
use crate::error::{EmptyRegionDirSnafu, JoinSnafu, ObjectStoreNotFoundSnafu, Result};
|
||||
use crate::manifest::action::{RegionEdit, RegionMetaAction, RegionMetaActionList};
|
||||
@@ -62,6 +63,7 @@ pub struct CompactionRegion {
|
||||
pub(crate) manifest_ctx: Arc<ManifestContext>,
|
||||
pub(crate) current_version: VersionRef,
|
||||
pub(crate) file_purger: Option<Arc<LocalFilePurger>>,
|
||||
pub(crate) ttl: Option<Duration>,
|
||||
}
|
||||
|
||||
/// OpenCompactionRegionRequest represents the request to open a compaction region.
|
||||
@@ -78,6 +80,7 @@ pub async fn open_compaction_region(
|
||||
req: &OpenCompactionRegionRequest,
|
||||
mito_config: &MitoConfig,
|
||||
object_store_manager: ObjectStoreManagerRef,
|
||||
schema_metadata_manager: SchemaMetadataManagerRef,
|
||||
) -> Result<CompactionRegion> {
|
||||
let object_store = {
|
||||
let name = &req.region_options.storage;
|
||||
@@ -169,6 +172,16 @@ pub async fn open_compaction_region(
|
||||
Arc::new(version)
|
||||
};
|
||||
|
||||
let ttl = find_ttl(
|
||||
req.region_id.table_id(),
|
||||
current_version.options.ttl,
|
||||
&schema_metadata_manager,
|
||||
)
|
||||
.await
|
||||
.unwrap_or_else(|e| {
|
||||
warn!(e; "Failed to get ttl for region: {}", region_metadata.region_id);
|
||||
None
|
||||
});
|
||||
Ok(CompactionRegion {
|
||||
region_id: req.region_id,
|
||||
region_options: req.region_options.clone(),
|
||||
@@ -180,6 +193,7 @@ pub async fn open_compaction_region(
|
||||
manifest_ctx,
|
||||
current_version,
|
||||
file_purger: Some(file_purger),
|
||||
ttl,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -212,8 +212,9 @@ impl Picker for TwcsPicker {
|
||||
fn pick(&self, compaction_region: &CompactionRegion) -> Option<PickerOutput> {
|
||||
let region_id = compaction_region.region_id;
|
||||
let levels = compaction_region.current_version.ssts.levels();
|
||||
let ttl = compaction_region.current_version.options.ttl;
|
||||
let expired_ssts = get_expired_ssts(levels, ttl, Timestamp::current_millis());
|
||||
|
||||
let expired_ssts =
|
||||
get_expired_ssts(levels, compaction_region.ttl, Timestamp::current_millis());
|
||||
if !expired_ssts.is_empty() {
|
||||
info!("Expired SSTs in region {}: {:?}", region_id, expired_ssts);
|
||||
// here we mark expired SSTs as compacting to avoid them being picked.
|
||||
@@ -297,6 +298,9 @@ fn assign_to_windows<'a>(
|
||||
let mut windows: HashMap<i64, Window> = HashMap::new();
|
||||
// Iterates all files and assign to time windows according to max timestamp
|
||||
for f in files {
|
||||
if f.compacting() {
|
||||
continue;
|
||||
}
|
||||
let (_, end) = f.time_range();
|
||||
let time_window = end
|
||||
.convert_to(TimeUnit::Second)
|
||||
@@ -443,6 +447,21 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_assign_compacting_to_windows() {
|
||||
let files = [
|
||||
new_file_handle(FileId::random(), 0, 999, 0),
|
||||
new_file_handle(FileId::random(), 0, 999, 0),
|
||||
new_file_handle(FileId::random(), 0, 999, 0),
|
||||
new_file_handle(FileId::random(), 0, 999, 0),
|
||||
new_file_handle(FileId::random(), 0, 999, 0),
|
||||
];
|
||||
files[0].set_compacting(true);
|
||||
files[2].set_compacting(true);
|
||||
let windows = assign_to_windows(files.iter(), 3);
|
||||
assert_eq!(3, windows.get(&0).unwrap().files.len());
|
||||
}
|
||||
|
||||
/// (Window value, overlapping, files' time ranges in window)
|
||||
type ExpectedWindowSpec = (i64, bool, Vec<(i64, i64)>);
|
||||
|
||||
|
||||
@@ -352,6 +352,28 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_assign_compacting_files_to_windows() {
|
||||
let picker = WindowedCompactionPicker::new(Some(HOUR / 1000));
|
||||
let files = vec![
|
||||
(FileId::random(), 0, 2 * HOUR - 1, 0),
|
||||
(FileId::random(), HOUR, HOUR * 3 - 1, 0),
|
||||
];
|
||||
let version = build_version(&files, Some(Duration::from_millis(3 * HOUR as u64)));
|
||||
version.ssts.levels()[0]
|
||||
.files()
|
||||
.for_each(|f| f.set_compacting(true));
|
||||
let (outputs, expired_ssts, window_seconds) = picker.pick_inner(
|
||||
RegionId::new(0, 0),
|
||||
&version,
|
||||
Timestamp::new_millisecond(HOUR * 3),
|
||||
);
|
||||
|
||||
assert!(expired_ssts.is_empty());
|
||||
assert_eq!(HOUR / 1000, window_seconds);
|
||||
assert!(outputs.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_file_time_bucket_span() {
|
||||
assert_eq!(
|
||||
|
||||
@@ -150,7 +150,7 @@ impl Default for MitoConfig {
|
||||
selector_result_cache_size: ReadableSize::mb(512),
|
||||
enable_experimental_write_cache: false,
|
||||
experimental_write_cache_path: String::new(),
|
||||
experimental_write_cache_size: ReadableSize::mb(512),
|
||||
experimental_write_cache_size: ReadableSize::gb(1),
|
||||
experimental_write_cache_ttl: None,
|
||||
sst_write_buffer_size: DEFAULT_WRITE_BUFFER_SIZE,
|
||||
scan_parallelism: divide_num_cpus(4),
|
||||
|
||||
@@ -66,6 +66,7 @@ use api::region::RegionResponse;
|
||||
use async_trait::async_trait;
|
||||
use common_base::Plugins;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::key::SchemaMetadataManagerRef;
|
||||
use common_recordbatch::SendableRecordBatchStream;
|
||||
use common_telemetry::tracing;
|
||||
use common_wal::options::{WalOptions, WAL_OPTIONS_KEY};
|
||||
@@ -89,7 +90,7 @@ use crate::error::{
|
||||
};
|
||||
use crate::manifest::action::RegionEdit;
|
||||
use crate::metrics::HANDLE_REQUEST_ELAPSED;
|
||||
use crate::read::scan_region::{ScanParallism, ScanRegion, Scanner};
|
||||
use crate::read::scan_region::{ScanParallelism, ScanRegion, Scanner};
|
||||
use crate::request::{RegionEditRequest, WorkerRequest};
|
||||
use crate::wal::entry_distributor::{
|
||||
build_wal_entry_distributor_and_receivers, DEFAULT_ENTRY_RECEIVER_BUFFER_SIZE,
|
||||
@@ -112,13 +113,21 @@ impl MitoEngine {
|
||||
mut config: MitoConfig,
|
||||
log_store: Arc<S>,
|
||||
object_store_manager: ObjectStoreManagerRef,
|
||||
schema_metadata_manager: SchemaMetadataManagerRef,
|
||||
plugins: Plugins,
|
||||
) -> Result<MitoEngine> {
|
||||
config.sanitize(data_home)?;
|
||||
|
||||
Ok(MitoEngine {
|
||||
inner: Arc::new(
|
||||
EngineInner::new(config, log_store, object_store_manager, plugins).await?,
|
||||
EngineInner::new(
|
||||
config,
|
||||
log_store,
|
||||
object_store_manager,
|
||||
schema_metadata_manager,
|
||||
plugins,
|
||||
)
|
||||
.await?,
|
||||
),
|
||||
})
|
||||
}
|
||||
@@ -278,13 +287,20 @@ impl EngineInner {
|
||||
config: MitoConfig,
|
||||
log_store: Arc<S>,
|
||||
object_store_manager: ObjectStoreManagerRef,
|
||||
schema_metadata_manager: SchemaMetadataManagerRef,
|
||||
plugins: Plugins,
|
||||
) -> Result<EngineInner> {
|
||||
let config = Arc::new(config);
|
||||
let wal_raw_entry_reader = Arc::new(LogStoreRawEntryReader::new(log_store.clone()));
|
||||
Ok(EngineInner {
|
||||
workers: WorkerGroup::start(config.clone(), log_store, object_store_manager, plugins)
|
||||
.await?,
|
||||
workers: WorkerGroup::start(
|
||||
config.clone(),
|
||||
log_store,
|
||||
object_store_manager,
|
||||
schema_metadata_manager,
|
||||
plugins,
|
||||
)
|
||||
.await?,
|
||||
config,
|
||||
wal_raw_entry_reader,
|
||||
})
|
||||
@@ -417,7 +433,7 @@ impl EngineInner {
|
||||
let version = region.version();
|
||||
// Get cache.
|
||||
let cache_manager = self.workers.cache_manager();
|
||||
let scan_parallelism = ScanParallism {
|
||||
let scan_parallelism = ScanParallelism {
|
||||
parallelism: self.config.scan_parallelism,
|
||||
channel_size: self.config.parallel_scan_channel_size,
|
||||
};
|
||||
@@ -583,6 +599,7 @@ impl RegionEngine for MitoEngine {
|
||||
|
||||
// Tests methods.
|
||||
#[cfg(any(test, feature = "test"))]
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
impl MitoEngine {
|
||||
/// Returns a new [MitoEngine] for tests.
|
||||
pub async fn new_for_test<S: LogStore>(
|
||||
@@ -593,6 +610,7 @@ impl MitoEngine {
|
||||
write_buffer_manager: Option<crate::flush::WriteBufferManagerRef>,
|
||||
listener: Option<crate::engine::listener::EventListenerRef>,
|
||||
time_provider: crate::time_provider::TimeProviderRef,
|
||||
schema_metadata_manager: SchemaMetadataManagerRef,
|
||||
) -> Result<MitoEngine> {
|
||||
config.sanitize(data_home)?;
|
||||
|
||||
@@ -606,6 +624,7 @@ impl MitoEngine {
|
||||
object_store_manager,
|
||||
write_buffer_manager,
|
||||
listener,
|
||||
schema_metadata_manager,
|
||||
time_provider,
|
||||
)
|
||||
.await?,
|
||||
|
||||
@@ -78,6 +78,16 @@ async fn test_alter_region() {
|
||||
let region_id = RegionId::new(1, 1);
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let column_schemas = rows_schema(&request);
|
||||
let region_dir = request.region_dir.clone();
|
||||
engine
|
||||
@@ -167,10 +177,19 @@ fn build_rows_for_tags(
|
||||
async fn test_put_after_alter() {
|
||||
let mut env = TestEnv::new();
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let mut column_schemas = rows_schema(&request);
|
||||
let region_dir = request.region_dir.clone();
|
||||
engine
|
||||
@@ -266,6 +285,16 @@ async fn test_alter_region_retry() {
|
||||
let region_id = RegionId::new(1, 1);
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let column_schemas = rows_schema(&request);
|
||||
engine
|
||||
.handle_request(region_id, RegionRequest::Create(request))
|
||||
@@ -320,6 +349,16 @@ async fn test_alter_on_flushing() {
|
||||
let region_id = RegionId::new(1, 1);
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let column_schemas = rows_schema(&request);
|
||||
engine
|
||||
.handle_request(region_id, RegionRequest::Create(request))
|
||||
|
||||
@@ -98,6 +98,16 @@ async fn test_append_mode_compaction() {
|
||||
.await;
|
||||
let region_id = RegionId::new(1, 1);
|
||||
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let request = CreateRequestBuilder::new()
|
||||
.insert_option("compaction.type", "twcs")
|
||||
.insert_option("compaction.twcs.max_active_window_runs", "2")
|
||||
|
||||
@@ -112,6 +112,16 @@ async fn test_compaction_region() {
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let request = CreateRequestBuilder::new()
|
||||
.insert_option("compaction.type", "twcs")
|
||||
.insert_option("compaction.twcs.max_active_window_runs", "1")
|
||||
@@ -171,8 +181,18 @@ async fn test_compaction_region_with_overlapping() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let mut env = TestEnv::new();
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let request = CreateRequestBuilder::new()
|
||||
.insert_option("compaction.type", "twcs")
|
||||
.insert_option("compaction.twcs.max_active_window_runs", "2")
|
||||
@@ -217,6 +237,17 @@ async fn test_compaction_region_with_overlapping_delete_all() {
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let request = CreateRequestBuilder::new()
|
||||
.insert_option("compaction.type", "twcs")
|
||||
.insert_option("compaction.twcs.max_active_window_runs", "2")
|
||||
@@ -281,6 +312,16 @@ async fn test_readonly_during_compaction() {
|
||||
.await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let request = CreateRequestBuilder::new()
|
||||
.insert_option("compaction.type", "twcs")
|
||||
.insert_option("compaction.twcs.max_active_window_runs", "1")
|
||||
|
||||
@@ -16,6 +16,7 @@ use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use api::v1::Rows;
|
||||
use common_meta::key::SchemaMetadataManager;
|
||||
use object_store::util::join_path;
|
||||
use store_api::region_engine::RegionEngine;
|
||||
use store_api::region_request::{RegionDropRequest, RegionRequest};
|
||||
@@ -40,6 +41,17 @@ async fn test_engine_drop_region() {
|
||||
.await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
// It's okay to drop a region doesn't exist.
|
||||
engine
|
||||
.handle_request(region_id, RegionRequest::Drop(RegionDropRequest {}))
|
||||
@@ -87,7 +99,12 @@ async fn test_engine_drop_region() {
|
||||
#[tokio::test]
|
||||
async fn test_engine_drop_region_for_custom_store() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
async fn setup(engine: &MitoEngine, region_id: RegionId, storage_name: &str) {
|
||||
async fn setup(
|
||||
engine: &MitoEngine,
|
||||
schema_metadata_manager: &SchemaMetadataManager,
|
||||
region_id: RegionId,
|
||||
storage_name: &str,
|
||||
) {
|
||||
let request = CreateRequestBuilder::new()
|
||||
.insert_option("storage", storage_name)
|
||||
.region_dir(storage_name)
|
||||
@@ -97,6 +114,18 @@ async fn test_engine_drop_region_for_custom_store() {
|
||||
.handle_request(region_id, RegionRequest::Create(request))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let table_id = format!("test_table_{}", region_id.table_id());
|
||||
schema_metadata_manager
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
&table_id,
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let rows = Rows {
|
||||
schema: column_schema.clone(),
|
||||
rows: build_rows_for_key("a", 0, 2, 0),
|
||||
@@ -114,12 +143,19 @@ async fn test_engine_drop_region_for_custom_store() {
|
||||
&["Gcs"],
|
||||
)
|
||||
.await;
|
||||
let schema_metadata_manager = env.get_schema_metadata_manager();
|
||||
let object_store_manager = env.get_object_store_manager().unwrap();
|
||||
|
||||
let global_region_id = RegionId::new(1, 1);
|
||||
setup(&engine, global_region_id, "default").await;
|
||||
setup(
|
||||
&engine,
|
||||
&schema_metadata_manager,
|
||||
global_region_id,
|
||||
"default",
|
||||
)
|
||||
.await;
|
||||
let custom_region_id = RegionId::new(2, 1);
|
||||
setup(&engine, custom_region_id, "Gcs").await;
|
||||
setup(&engine, &schema_metadata_manager, custom_region_id, "Gcs").await;
|
||||
|
||||
let global_region = engine.get_region(global_region_id).unwrap();
|
||||
let global_region_dir = global_region.access_layer.region_dir().to_string();
|
||||
|
||||
@@ -64,6 +64,16 @@ async fn test_edit_region_schedule_compaction() {
|
||||
.await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
engine
|
||||
.handle_request(
|
||||
region_id,
|
||||
|
||||
@@ -32,6 +32,16 @@ async fn test_scan_without_filtering_deleted() {
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
let request = CreateRequestBuilder::new()
|
||||
.insert_option("compaction.type", "twcs")
|
||||
.insert_option("compaction.twcs.max_active_window_runs", "10")
|
||||
|
||||
@@ -45,6 +45,16 @@ async fn test_manual_flush() {
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
|
||||
let column_schemas = rows_schema(&request);
|
||||
@@ -92,6 +102,16 @@ async fn test_flush_engine() {
|
||||
.await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
|
||||
let column_schemas = rows_schema(&request);
|
||||
@@ -151,6 +171,15 @@ async fn test_write_stall() {
|
||||
.await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
|
||||
let column_schemas = rows_schema(&request);
|
||||
@@ -215,6 +244,15 @@ async fn test_flush_empty() {
|
||||
.await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
|
||||
engine
|
||||
@@ -249,8 +287,17 @@ async fn test_flush_reopen_region(factory: Option<LogStoreFactory>) {
|
||||
|
||||
let mut env = TestEnv::new().with_log_store_factory(factory.clone());
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let topic = prepare_test_for_kafka_log_store(&factory).await;
|
||||
let request = CreateRequestBuilder::new()
|
||||
.kafka_topic(topic.clone())
|
||||
@@ -360,8 +407,17 @@ async fn test_auto_flush_engine() {
|
||||
time_provider.clone(),
|
||||
)
|
||||
.await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
|
||||
let column_schemas = rows_schema(&request);
|
||||
@@ -421,6 +477,16 @@ async fn test_flush_workers() {
|
||||
|
||||
let region_id0 = RegionId::new(1, 0);
|
||||
let region_id1 = RegionId::new(1, 1);
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id0.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let request = CreateRequestBuilder::new().region_dir("r0").build();
|
||||
let column_schemas = rows_schema(&request);
|
||||
engine
|
||||
|
||||
@@ -98,6 +98,16 @@ async fn test_merge_mode_compaction() {
|
||||
.await;
|
||||
let region_id = RegionId::new(1, 1);
|
||||
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let request = CreateRequestBuilder::new()
|
||||
.field_num(2)
|
||||
.insert_option("compaction.type", "twcs")
|
||||
|
||||
@@ -245,6 +245,16 @@ async fn test_open_region_skip_wal_replay() {
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
let region_dir = request.region_dir.clone();
|
||||
|
||||
@@ -423,6 +433,16 @@ async fn test_open_compaction_region() {
|
||||
let engine = env.create_engine(mito_config.clone()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
let schema_metadata_manager = env.get_schema_metadata_manager();
|
||||
schema_metadata_manager
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
let region_dir = request.region_dir.clone();
|
||||
engine
|
||||
@@ -444,10 +464,14 @@ async fn test_open_compaction_region() {
|
||||
region_options: RegionOptions::default(),
|
||||
};
|
||||
|
||||
let compaction_region =
|
||||
open_compaction_region(&req, &mito_config, object_store_manager.clone())
|
||||
.await
|
||||
.unwrap();
|
||||
let compaction_region = open_compaction_region(
|
||||
&req,
|
||||
&mito_config,
|
||||
object_store_manager.clone(),
|
||||
schema_metadata_manager,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(region_id, compaction_region.region_id);
|
||||
}
|
||||
|
||||
@@ -76,6 +76,16 @@ async fn test_parallel_scan() {
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
let region_dir = request.region_dir.clone();
|
||||
|
||||
|
||||
@@ -151,6 +151,17 @@ async fn test_prune_memtable() {
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
|
||||
let region_id = RegionId::new(1, 1);
|
||||
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
|
||||
let column_schemas = rows_schema(&request);
|
||||
|
||||
@@ -29,6 +29,15 @@ async fn test_last_row(append_mode: bool) {
|
||||
let engine = env.create_engine(MitoConfig::default()).await;
|
||||
let region_id = RegionId::new(1, 1);
|
||||
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
let request = CreateRequestBuilder::new()
|
||||
.insert_option("append_mode", &append_mode.to_string())
|
||||
.build();
|
||||
|
||||
@@ -151,6 +151,17 @@ async fn test_engine_truncate_after_flush() {
|
||||
|
||||
// Create the region.
|
||||
let region_id = RegionId::new(1, 1);
|
||||
|
||||
env.get_schema_metadata_manager()
|
||||
.register_region_table_info(
|
||||
region_id.table_id(),
|
||||
"test_table",
|
||||
"test_catalog",
|
||||
"test_schema",
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
|
||||
let request = CreateRequestBuilder::new().build();
|
||||
let column_schemas = rows_schema(&request);
|
||||
engine
|
||||
|
||||
@@ -870,6 +870,13 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to get schema metadata"))]
|
||||
GetSchemaMetadata {
|
||||
source: common_meta::error::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T, E = Error> = std::result::Result<T, E>;
|
||||
@@ -1002,6 +1009,7 @@ impl ErrorExt for Error {
|
||||
| ApplyFulltextIndex { source, .. } => source.status_code(),
|
||||
DecodeStats { .. } | StatsNotPresent { .. } => StatusCode::Internal,
|
||||
RegionBusy { .. } => StatusCode::RegionBusy,
|
||||
GetSchemaMetadata { source, .. } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -169,7 +169,7 @@ pub(crate) struct ScanRegion {
|
||||
/// Cache.
|
||||
cache_manager: Option<CacheManagerRef>,
|
||||
/// Parallelism to scan.
|
||||
parallelism: ScanParallism,
|
||||
parallelism: ScanParallelism,
|
||||
/// Whether to ignore inverted index.
|
||||
ignore_inverted_index: bool,
|
||||
/// Whether to ignore fulltext index.
|
||||
@@ -191,7 +191,7 @@ impl ScanRegion {
|
||||
access_layer,
|
||||
request,
|
||||
cache_manager,
|
||||
parallelism: ScanParallism::default(),
|
||||
parallelism: ScanParallelism::default(),
|
||||
ignore_inverted_index: false,
|
||||
ignore_fulltext_index: false,
|
||||
start_time: None,
|
||||
@@ -200,7 +200,7 @@ impl ScanRegion {
|
||||
|
||||
/// Sets parallelism.
|
||||
#[must_use]
|
||||
pub(crate) fn with_parallelism(mut self, parallelism: ScanParallism) -> Self {
|
||||
pub(crate) fn with_parallelism(mut self, parallelism: ScanParallelism) -> Self {
|
||||
self.parallelism = parallelism;
|
||||
self
|
||||
}
|
||||
@@ -447,7 +447,7 @@ impl ScanRegion {
|
||||
|
||||
/// Config for parallel scan.
|
||||
#[derive(Debug, Clone, Copy, Default)]
|
||||
pub(crate) struct ScanParallism {
|
||||
pub(crate) struct ScanParallelism {
|
||||
/// Number of tasks expect to spawn to read data.
|
||||
pub(crate) parallelism: usize,
|
||||
/// Channel size to send batches. Only takes effect when the parallelism > 1.
|
||||
@@ -484,7 +484,7 @@ pub(crate) struct ScanInput {
|
||||
/// Ignores file not found error.
|
||||
ignore_file_not_found: bool,
|
||||
/// Parallelism to scan data.
|
||||
pub(crate) parallelism: ScanParallism,
|
||||
pub(crate) parallelism: ScanParallelism,
|
||||
/// Index appliers.
|
||||
inverted_index_applier: Option<InvertedIndexApplierRef>,
|
||||
fulltext_index_applier: Option<FulltextIndexApplierRef>,
|
||||
@@ -513,7 +513,7 @@ impl ScanInput {
|
||||
files: Vec::new(),
|
||||
cache_manager: None,
|
||||
ignore_file_not_found: false,
|
||||
parallelism: ScanParallism::default(),
|
||||
parallelism: ScanParallelism::default(),
|
||||
inverted_index_applier: None,
|
||||
fulltext_index_applier: None,
|
||||
query_start: None,
|
||||
@@ -568,7 +568,7 @@ impl ScanInput {
|
||||
|
||||
/// Sets scan parallelism.
|
||||
#[must_use]
|
||||
pub(crate) fn with_parallelism(mut self, parallelism: ScanParallism) -> Self {
|
||||
pub(crate) fn with_parallelism(mut self, parallelism: ScanParallelism) -> Self {
|
||||
self.parallelism = parallelism;
|
||||
self
|
||||
}
|
||||
|
||||
@@ -35,6 +35,9 @@ use api::v1::{OpType, Row, Rows, SemanticType};
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use common_base::Plugins;
|
||||
use common_datasource::compression::CompressionType;
|
||||
use common_meta::key::{SchemaMetadataManager, SchemaMetadataManagerRef};
|
||||
use common_meta::kv_backend::memory::MemoryKvBackend;
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
use common_telemetry::warn;
|
||||
use common_test_util::temp_dir::{create_temp_dir, TempDir};
|
||||
use common_wal::options::{KafkaWalOptions, WalOptions, WAL_OPTIONS_KEY};
|
||||
@@ -195,6 +198,7 @@ pub struct TestEnv {
|
||||
log_store: Option<LogStoreImpl>,
|
||||
log_store_factory: LogStoreFactory,
|
||||
object_store_manager: Option<ObjectStoreManagerRef>,
|
||||
schema_metadata_manager: SchemaMetadataManagerRef,
|
||||
}
|
||||
|
||||
impl Default for TestEnv {
|
||||
@@ -211,6 +215,10 @@ impl TestEnv {
|
||||
log_store: None,
|
||||
log_store_factory: LogStoreFactory::RaftEngine(RaftEngineLogStoreFactory),
|
||||
object_store_manager: None,
|
||||
schema_metadata_manager: Arc::new(SchemaMetadataManager::new(Arc::new(
|
||||
MemoryKvBackend::new(),
|
||||
)
|
||||
as KvBackendRef)),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -221,6 +229,10 @@ impl TestEnv {
|
||||
log_store: None,
|
||||
log_store_factory: LogStoreFactory::RaftEngine(RaftEngineLogStoreFactory),
|
||||
object_store_manager: None,
|
||||
schema_metadata_manager: Arc::new(SchemaMetadataManager::new(Arc::new(
|
||||
MemoryKvBackend::new(),
|
||||
)
|
||||
as KvBackendRef)),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -231,6 +243,10 @@ impl TestEnv {
|
||||
log_store: None,
|
||||
log_store_factory: LogStoreFactory::RaftEngine(RaftEngineLogStoreFactory),
|
||||
object_store_manager: None,
|
||||
schema_metadata_manager: Arc::new(SchemaMetadataManager::new(Arc::new(
|
||||
MemoryKvBackend::new(),
|
||||
)
|
||||
as KvBackendRef)),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -269,6 +285,7 @@ impl TestEnv {
|
||||
config,
|
||||
log_store,
|
||||
object_store_manager,
|
||||
self.schema_metadata_manager.clone(),
|
||||
Plugins::new(),
|
||||
)
|
||||
.await
|
||||
@@ -278,6 +295,7 @@ impl TestEnv {
|
||||
config,
|
||||
log_store,
|
||||
object_store_manager,
|
||||
self.schema_metadata_manager.clone(),
|
||||
Plugins::new(),
|
||||
)
|
||||
.await
|
||||
@@ -295,6 +313,7 @@ impl TestEnv {
|
||||
config,
|
||||
log_store,
|
||||
object_store_manager,
|
||||
self.schema_metadata_manager.clone(),
|
||||
Plugins::new(),
|
||||
)
|
||||
.await
|
||||
@@ -304,6 +323,7 @@ impl TestEnv {
|
||||
config,
|
||||
log_store,
|
||||
object_store_manager,
|
||||
self.schema_metadata_manager.clone(),
|
||||
Plugins::new(),
|
||||
)
|
||||
.await
|
||||
@@ -335,6 +355,7 @@ impl TestEnv {
|
||||
manager,
|
||||
listener,
|
||||
Arc::new(StdTimeProvider),
|
||||
self.schema_metadata_manager.clone(),
|
||||
)
|
||||
.await
|
||||
.unwrap(),
|
||||
@@ -346,6 +367,7 @@ impl TestEnv {
|
||||
manager,
|
||||
listener,
|
||||
Arc::new(StdTimeProvider),
|
||||
self.schema_metadata_manager.clone(),
|
||||
)
|
||||
.await
|
||||
.unwrap(),
|
||||
@@ -388,6 +410,7 @@ impl TestEnv {
|
||||
manager,
|
||||
listener,
|
||||
Arc::new(StdTimeProvider),
|
||||
self.schema_metadata_manager.clone(),
|
||||
)
|
||||
.await
|
||||
.unwrap(),
|
||||
@@ -399,6 +422,7 @@ impl TestEnv {
|
||||
manager,
|
||||
listener,
|
||||
Arc::new(StdTimeProvider),
|
||||
self.schema_metadata_manager.clone(),
|
||||
)
|
||||
.await
|
||||
.unwrap(),
|
||||
@@ -430,6 +454,7 @@ impl TestEnv {
|
||||
manager,
|
||||
listener,
|
||||
time_provider.clone(),
|
||||
self.schema_metadata_manager.clone(),
|
||||
)
|
||||
.await
|
||||
.unwrap(),
|
||||
@@ -441,6 +466,7 @@ impl TestEnv {
|
||||
manager,
|
||||
listener,
|
||||
time_provider.clone(),
|
||||
self.schema_metadata_manager.clone(),
|
||||
)
|
||||
.await
|
||||
.unwrap(),
|
||||
@@ -450,13 +476,13 @@ impl TestEnv {
|
||||
/// Reopen the engine.
|
||||
pub async fn reopen_engine(&mut self, engine: MitoEngine, config: MitoConfig) -> MitoEngine {
|
||||
engine.stop().await.unwrap();
|
||||
|
||||
match self.log_store.as_ref().unwrap().clone() {
|
||||
LogStoreImpl::RaftEngine(log_store) => MitoEngine::new(
|
||||
&self.data_home().display().to_string(),
|
||||
config,
|
||||
log_store,
|
||||
self.object_store_manager.clone().unwrap(),
|
||||
self.schema_metadata_manager.clone(),
|
||||
Plugins::new(),
|
||||
)
|
||||
.await
|
||||
@@ -466,6 +492,7 @@ impl TestEnv {
|
||||
config,
|
||||
log_store,
|
||||
self.object_store_manager.clone().unwrap(),
|
||||
self.schema_metadata_manager.clone(),
|
||||
Plugins::new(),
|
||||
)
|
||||
.await
|
||||
@@ -481,6 +508,7 @@ impl TestEnv {
|
||||
config,
|
||||
log_store,
|
||||
self.object_store_manager.clone().unwrap(),
|
||||
self.schema_metadata_manager.clone(),
|
||||
Plugins::new(),
|
||||
)
|
||||
.await
|
||||
@@ -490,6 +518,7 @@ impl TestEnv {
|
||||
config,
|
||||
log_store,
|
||||
self.object_store_manager.clone().unwrap(),
|
||||
self.schema_metadata_manager.clone(),
|
||||
Plugins::new(),
|
||||
)
|
||||
.await
|
||||
@@ -515,6 +544,7 @@ impl TestEnv {
|
||||
Arc::new(config),
|
||||
log_store,
|
||||
Arc::new(object_store_manager),
|
||||
self.schema_metadata_manager.clone(),
|
||||
Plugins::new(),
|
||||
)
|
||||
.await
|
||||
@@ -523,6 +553,7 @@ impl TestEnv {
|
||||
Arc::new(config),
|
||||
log_store,
|
||||
Arc::new(object_store_manager),
|
||||
self.schema_metadata_manager.clone(),
|
||||
Plugins::new(),
|
||||
)
|
||||
.await
|
||||
@@ -630,6 +661,10 @@ impl TestEnv {
|
||||
|
||||
Arc::new(write_cache)
|
||||
}
|
||||
|
||||
pub fn get_schema_metadata_manager(&self) -> SchemaMetadataManagerRef {
|
||||
self.schema_metadata_manager.clone()
|
||||
}
|
||||
}
|
||||
|
||||
/// Builder to mock a [RegionCreateRequest].
|
||||
|
||||
@@ -34,8 +34,8 @@ use crate::error::Result;
|
||||
use crate::memtable::key_values::KeyValue;
|
||||
use crate::memtable::partition_tree::data::{timestamp_array_to_i64_slice, DataBatch, DataBuffer};
|
||||
use crate::memtable::{
|
||||
BoxedBatchIterator, BulkPart, IterBuilder, KeyValues, Memtable, MemtableBuilder, MemtableId,
|
||||
MemtableRange, MemtableRef, MemtableStats,
|
||||
BoxedBatchIterator, BulkPart, KeyValues, Memtable, MemtableBuilder, MemtableId, MemtableRange,
|
||||
MemtableRef, MemtableStats,
|
||||
};
|
||||
use crate::row_converter::{McmpRowCodec, RowCodec, SortField};
|
||||
|
||||
|
||||
@@ -31,6 +31,7 @@ use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use common_base::Plugins;
|
||||
use common_meta::key::SchemaMetadataManagerRef;
|
||||
use common_runtime::JoinHandle;
|
||||
use common_telemetry::{error, info, warn};
|
||||
use futures::future::try_join_all;
|
||||
@@ -132,6 +133,7 @@ impl WorkerGroup {
|
||||
config: Arc<MitoConfig>,
|
||||
log_store: Arc<S>,
|
||||
object_store_manager: ObjectStoreManagerRef,
|
||||
schema_metadata_manager: SchemaMetadataManagerRef,
|
||||
plugins: Plugins,
|
||||
) -> Result<WorkerGroup> {
|
||||
let (flush_sender, flush_receiver) = watch::channel(());
|
||||
@@ -191,6 +193,7 @@ impl WorkerGroup {
|
||||
flush_sender: flush_sender.clone(),
|
||||
flush_receiver: flush_receiver.clone(),
|
||||
plugins: plugins.clone(),
|
||||
schema_metadata_manager: schema_metadata_manager.clone(),
|
||||
}
|
||||
.start()
|
||||
})
|
||||
@@ -273,6 +276,7 @@ impl WorkerGroup {
|
||||
object_store_manager: ObjectStoreManagerRef,
|
||||
write_buffer_manager: Option<WriteBufferManagerRef>,
|
||||
listener: Option<crate::engine::listener::EventListenerRef>,
|
||||
schema_metadata_manager: SchemaMetadataManagerRef,
|
||||
time_provider: TimeProviderRef,
|
||||
) -> Result<WorkerGroup> {
|
||||
let (flush_sender, flush_receiver) = watch::channel(());
|
||||
@@ -329,6 +333,7 @@ impl WorkerGroup {
|
||||
flush_sender: flush_sender.clone(),
|
||||
flush_receiver: flush_receiver.clone(),
|
||||
plugins: Plugins::new(),
|
||||
schema_metadata_manager: schema_metadata_manager.clone(),
|
||||
}
|
||||
.start()
|
||||
})
|
||||
@@ -405,6 +410,7 @@ struct WorkerStarter<S> {
|
||||
/// Watch channel receiver to wait for background flush job.
|
||||
flush_receiver: watch::Receiver<()>,
|
||||
plugins: Plugins,
|
||||
schema_metadata_manager: SchemaMetadataManagerRef,
|
||||
}
|
||||
|
||||
impl<S: LogStore> WorkerStarter<S> {
|
||||
@@ -455,6 +461,7 @@ impl<S: LogStore> WorkerStarter<S> {
|
||||
stalled_count: WRITE_STALL_TOTAL.with_label_values(&[&id_string]),
|
||||
region_count: REGION_COUNT.with_label_values(&[&id_string]),
|
||||
region_edit_queues: RegionEditQueues::default(),
|
||||
schema_metadata_manager: self.schema_metadata_manager,
|
||||
};
|
||||
let handle = common_runtime::spawn_global(async move {
|
||||
worker_thread.run().await;
|
||||
@@ -645,6 +652,8 @@ struct RegionWorkerLoop<S> {
|
||||
region_count: IntGauge,
|
||||
/// Queues for region edit requests.
|
||||
region_edit_queues: RegionEditQueues,
|
||||
/// Database level metadata manager.
|
||||
schema_metadata_manager: SchemaMetadataManagerRef,
|
||||
}
|
||||
|
||||
impl<S: LogStore> RegionWorkerLoop<S> {
|
||||
|
||||
@@ -44,6 +44,7 @@ impl<S> RegionWorkerLoop<S> {
|
||||
®ion.access_layer,
|
||||
sender,
|
||||
®ion.manifest_ctx,
|
||||
self.schema_metadata_manager.clone(),
|
||||
)
|
||||
.await
|
||||
{
|
||||
@@ -80,7 +81,11 @@ impl<S> RegionWorkerLoop<S> {
|
||||
|
||||
// Schedule next compaction if necessary.
|
||||
self.compaction_scheduler
|
||||
.on_compaction_finished(region_id, ®ion.manifest_ctx)
|
||||
.on_compaction_finished(
|
||||
region_id,
|
||||
®ion.manifest_ctx,
|
||||
self.schema_metadata_manager.clone(),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
@@ -107,6 +112,7 @@ impl<S> RegionWorkerLoop<S> {
|
||||
®ion.access_layer,
|
||||
OptionOutputTx::none(),
|
||||
®ion.manifest_ctx,
|
||||
self.schema_metadata_manager.clone(),
|
||||
)
|
||||
.await
|
||||
{
|
||||
|
||||
@@ -36,6 +36,7 @@ datatypes.workspace = true
|
||||
file-engine.workspace = true
|
||||
futures = "0.3"
|
||||
futures-util.workspace = true
|
||||
jsonb.workspace = true
|
||||
lazy_static.workspace = true
|
||||
meta-client.workspace = true
|
||||
meter-core.workspace = true
|
||||
|
||||
@@ -35,6 +35,7 @@ use crate::error::{
|
||||
MissingTimeIndexColumnSnafu, RequestDeletesSnafu, Result, TableNotFoundSnafu,
|
||||
};
|
||||
use crate::region_req_factory::RegionRequestFactory;
|
||||
use crate::req_convert::common::preprocess_row_delete_requests;
|
||||
use crate::req_convert::delete::{ColumnToRow, RowToRegion, TableToRegion};
|
||||
|
||||
pub struct Deleter {
|
||||
@@ -72,6 +73,7 @@ impl Deleter {
|
||||
mut requests: RowDeleteRequests,
|
||||
ctx: QueryContextRef,
|
||||
) -> Result<Output> {
|
||||
preprocess_row_delete_requests(&mut requests.deletes)?;
|
||||
// remove empty requests
|
||||
requests.deletes.retain(|req| {
|
||||
req.rows
|
||||
|
||||
@@ -770,6 +770,13 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid json text: {}", json))]
|
||||
InvalidJsonFormat {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
json: String,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -808,7 +815,8 @@ impl ErrorExt for Error {
|
||||
| Error::BuildAdminFunctionArgs { .. }
|
||||
| Error::FunctionArityMismatch { .. }
|
||||
| Error::InvalidPartition { .. }
|
||||
| Error::PhysicalExpr { .. } => StatusCode::InvalidArguments,
|
||||
| Error::PhysicalExpr { .. }
|
||||
| Error::InvalidJsonFormat { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
Error::TableAlreadyExists { .. } | Error::ViewAlreadyExists { .. } => {
|
||||
StatusCode::TableAlreadyExists
|
||||
|
||||
@@ -54,6 +54,7 @@ use crate::error::{
|
||||
};
|
||||
use crate::expr_factory::CreateExprFactory;
|
||||
use crate::region_req_factory::RegionRequestFactory;
|
||||
use crate::req_convert::common::preprocess_row_insert_requests;
|
||||
use crate::req_convert::insert::{ColumnToRow, RowToRegion, StatementToRegion, TableToRegion};
|
||||
use crate::statement::StatementExecutor;
|
||||
|
||||
@@ -119,10 +120,11 @@ impl Inserter {
|
||||
/// Handles row inserts request and creates a physical table on demand.
|
||||
pub async fn handle_row_inserts(
|
||||
&self,
|
||||
requests: RowInsertRequests,
|
||||
mut requests: RowInsertRequests,
|
||||
ctx: QueryContextRef,
|
||||
statement_executor: &StatementExecutor,
|
||||
) -> Result<Output> {
|
||||
preprocess_row_insert_requests(&mut requests.inserts)?;
|
||||
self.handle_row_inserts_with_create_type(
|
||||
requests,
|
||||
ctx,
|
||||
@@ -758,10 +760,8 @@ impl Inserter {
|
||||
ctx: &QueryContextRef,
|
||||
statement_executor: &StatementExecutor,
|
||||
) -> Result<Vec<TableRef>> {
|
||||
let catalog_name = ctx.current_catalog();
|
||||
let schema_name = ctx.current_schema();
|
||||
let res = statement_executor
|
||||
.create_logical_tables(catalog_name, &schema_name, &create_table_exprs, ctx.clone())
|
||||
.create_logical_tables(&create_table_exprs, ctx.clone())
|
||||
.await;
|
||||
|
||||
match res {
|
||||
|
||||
@@ -17,9 +17,13 @@ pub(crate) mod partitioner;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use api::helper::ColumnDataTypeWrapper;
|
||||
use api::v1::column_data_type_extension::TypeExt;
|
||||
use api::v1::column_def::options_from_column_schema;
|
||||
use api::v1::value::ValueData;
|
||||
use api::v1::{Column, ColumnDataType, ColumnSchema, Row, Rows, SemanticType, Value};
|
||||
use api::v1::{
|
||||
Column, ColumnDataType, ColumnDataTypeExtension, ColumnSchema, JsonTypeExtension, Row,
|
||||
RowDeleteRequest, RowInsertRequest, Rows, SemanticType, Value,
|
||||
};
|
||||
use common_base::BitVec;
|
||||
use datatypes::vectors::VectorRef;
|
||||
use snafu::prelude::*;
|
||||
@@ -27,10 +31,77 @@ use snafu::ResultExt;
|
||||
use table::metadata::TableInfo;
|
||||
|
||||
use crate::error::{
|
||||
ColumnDataTypeSnafu, ColumnNotFoundSnafu, InvalidInsertRequestSnafu,
|
||||
MissingTimeIndexColumnSnafu, Result,
|
||||
ColumnDataTypeSnafu, ColumnNotFoundSnafu, InvalidInsertRequestSnafu, InvalidJsonFormatSnafu,
|
||||
MissingTimeIndexColumnSnafu, Result, UnexpectedSnafu,
|
||||
};
|
||||
|
||||
/// Encodes a string value as JSONB binary data if the value is of `StringValue` type.
|
||||
fn encode_string_to_jsonb_binary(value_data: ValueData) -> Result<ValueData> {
|
||||
if let ValueData::StringValue(json) = &value_data {
|
||||
let binary = jsonb::parse_value(json.as_bytes())
|
||||
.map_err(|_| InvalidJsonFormatSnafu { json }.build())
|
||||
.map(|jsonb| jsonb.to_vec())?;
|
||||
Ok(ValueData::BinaryValue(binary))
|
||||
} else {
|
||||
UnexpectedSnafu {
|
||||
violated: "Expected to value data to be a string.",
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
}
|
||||
|
||||
/// Prepares row insertion requests by converting any JSON values to binary JSONB format.
|
||||
pub fn preprocess_row_insert_requests(requests: &mut Vec<RowInsertRequest>) -> Result<()> {
|
||||
for request in requests {
|
||||
prepare_rows(&mut request.rows)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Prepares row deletion requests by converting any JSON values to binary JSONB format.
|
||||
pub fn preprocess_row_delete_requests(requests: &mut Vec<RowDeleteRequest>) -> Result<()> {
|
||||
for request in requests {
|
||||
prepare_rows(&mut request.rows)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn prepare_rows(rows: &mut Option<Rows>) -> Result<()> {
|
||||
if let Some(rows) = rows {
|
||||
let indexes = rows
|
||||
.schema
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(idx, schema)| {
|
||||
if schema.datatype() == ColumnDataType::Json {
|
||||
Some(idx)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
for idx in &indexes {
|
||||
let column = &mut rows.schema[*idx];
|
||||
column.datatype_extension = Some(ColumnDataTypeExtension {
|
||||
type_ext: Some(TypeExt::JsonType(JsonTypeExtension::JsonBinary.into())),
|
||||
});
|
||||
column.datatype = ColumnDataType::Binary.into();
|
||||
}
|
||||
|
||||
for idx in &indexes {
|
||||
for row in &mut rows.rows {
|
||||
if let Some(value_data) = row.values[*idx].value_data.take() {
|
||||
row.values[*idx].value_data = Some(encode_string_to_jsonb_binary(value_data)?);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn columns_to_rows(columns: Vec<Column>, row_count: u32) -> Result<Rows> {
|
||||
let row_count = row_count as usize;
|
||||
let column_count = columns.len();
|
||||
|
||||
@@ -45,7 +45,6 @@ use common_time::Timestamp;
|
||||
use datafusion_expr::LogicalPlan;
|
||||
use partition::manager::{PartitionRuleManager, PartitionRuleManagerRef};
|
||||
use query::parser::QueryStatement;
|
||||
use query::stats::StatementStatistics;
|
||||
use query::QueryEngineRef;
|
||||
use session::context::{Channel, QueryContextRef};
|
||||
use session::table_name::table_idents_to_full_name;
|
||||
@@ -81,13 +80,11 @@ pub struct StatementExecutor {
|
||||
partition_manager: PartitionRuleManagerRef,
|
||||
cache_invalidator: CacheInvalidatorRef,
|
||||
inserter: InserterRef,
|
||||
stats: StatementStatistics,
|
||||
}
|
||||
|
||||
pub type StatementExecutorRef = Arc<StatementExecutor>;
|
||||
|
||||
impl StatementExecutor {
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn new(
|
||||
catalog_manager: CatalogManagerRef,
|
||||
query_engine: QueryEngineRef,
|
||||
@@ -96,7 +93,6 @@ impl StatementExecutor {
|
||||
cache_invalidator: CacheInvalidatorRef,
|
||||
inserter: InserterRef,
|
||||
table_route_cache: TableRouteCacheRef,
|
||||
stats: StatementStatistics,
|
||||
) -> Self {
|
||||
Self {
|
||||
catalog_manager,
|
||||
@@ -108,23 +104,22 @@ impl StatementExecutor {
|
||||
partition_manager: Arc::new(PartitionRuleManager::new(kv_backend, table_route_cache)),
|
||||
cache_invalidator,
|
||||
inserter,
|
||||
stats,
|
||||
}
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
#[cfg(feature = "testing")]
|
||||
pub async fn execute_stmt(
|
||||
&self,
|
||||
stmt: QueryStatement,
|
||||
query_ctx: QueryContextRef,
|
||||
) -> Result<Output> {
|
||||
let _slow_query_timer = self.stats.start_slow_query_timer(stmt.clone());
|
||||
match stmt {
|
||||
QueryStatement::Sql(stmt) => self.execute_sql(stmt, query_ctx).await,
|
||||
QueryStatement::Promql(_) => self.plan_exec(stmt, query_ctx).await,
|
||||
}
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub async fn execute_sql(&self, stmt: Statement, query_ctx: QueryContextRef) -> Result<Output> {
|
||||
match stmt {
|
||||
Statement::Query(_) | Statement::Explain(_) | Statement::Delete(_) => {
|
||||
@@ -361,6 +356,7 @@ impl StatementExecutor {
|
||||
Ok(Output::new_with_affected_rows(0))
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub async fn plan(
|
||||
&self,
|
||||
stmt: &QueryStatement,
|
||||
@@ -374,6 +370,7 @@ impl StatementExecutor {
|
||||
}
|
||||
|
||||
/// Execute [`LogicalPlan`] directly.
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub async fn exec_plan(&self, plan: LogicalPlan, query_ctx: QueryContextRef) -> Result<Output> {
|
||||
self.query_engine
|
||||
.execute(plan, query_ctx)
|
||||
|
||||
@@ -26,7 +26,7 @@ use common_error::ext::BoxedError;
|
||||
use common_meta::cache_invalidator::Context;
|
||||
use common_meta::ddl::ExecutorContext;
|
||||
use common_meta::instruction::CacheIdent;
|
||||
use common_meta::key::schema_name::{SchemaNameKey, SchemaNameValue};
|
||||
use common_meta::key::schema_name::SchemaNameKey;
|
||||
use common_meta::key::NAME_PATTERN;
|
||||
use common_meta::rpc::ddl::{
|
||||
CreateFlowTask, DdlTask, DropFlowTask, DropViewTask, SubmitDdlTaskRequest,
|
||||
@@ -116,9 +116,21 @@ impl StatementExecutor {
|
||||
.await
|
||||
.context(error::FindTablePartitionRuleSnafu { table_name: table })?;
|
||||
|
||||
// CREATE TABLE LIKE also inherits database level options.
|
||||
let schema_options = self
|
||||
.table_metadata_manager
|
||||
.schema_manager()
|
||||
.get(SchemaNameKey {
|
||||
catalog: &catalog,
|
||||
schema: &schema,
|
||||
})
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?;
|
||||
|
||||
let quote_style = ctx.quote_style();
|
||||
let mut create_stmt = create_table_stmt(&table_ref.table_info(), quote_style)
|
||||
.context(error::ParseQuerySnafu)?;
|
||||
let mut create_stmt =
|
||||
create_table_stmt(&table_ref.table_info(), schema_options, quote_style)
|
||||
.context(error::ParseQuerySnafu)?;
|
||||
create_stmt.name = stmt.table_name;
|
||||
create_stmt.if_not_exists = false;
|
||||
|
||||
@@ -165,15 +177,8 @@ impl StatementExecutor {
|
||||
.table_options
|
||||
.contains_key(LOGICAL_TABLE_METADATA_KEY)
|
||||
{
|
||||
let catalog_name = &create_table.catalog_name;
|
||||
let schema_name = &create_table.schema_name;
|
||||
return self
|
||||
.create_logical_tables(
|
||||
catalog_name,
|
||||
schema_name,
|
||||
&[create_table.clone()],
|
||||
query_ctx,
|
||||
)
|
||||
.create_logical_tables(&[create_table.clone()], query_ctx)
|
||||
.await?
|
||||
.into_iter()
|
||||
.next()
|
||||
@@ -183,6 +188,7 @@ impl StatementExecutor {
|
||||
}
|
||||
|
||||
let _timer = crate::metrics::DIST_CREATE_TABLE.start_timer();
|
||||
|
||||
let schema = self
|
||||
.table_metadata_manager
|
||||
.schema_manager()
|
||||
@@ -193,12 +199,12 @@ impl StatementExecutor {
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?;
|
||||
|
||||
let Some(schema_opts) = schema else {
|
||||
return SchemaNotFoundSnafu {
|
||||
ensure!(
|
||||
schema.is_some(),
|
||||
SchemaNotFoundSnafu {
|
||||
schema_info: &create_table.schema_name,
|
||||
}
|
||||
.fail();
|
||||
};
|
||||
);
|
||||
|
||||
// if table exists.
|
||||
if let Some(table) = self
|
||||
@@ -240,7 +246,7 @@ impl StatementExecutor {
|
||||
);
|
||||
|
||||
let (partitions, partition_cols) = parse_partitions(create_table, partitions, &query_ctx)?;
|
||||
let mut table_info = create_table_info(create_table, partition_cols, schema_opts)?;
|
||||
let mut table_info = create_table_info(create_table, partition_cols)?;
|
||||
|
||||
let resp = self
|
||||
.create_table_procedure(
|
||||
@@ -273,8 +279,6 @@ impl StatementExecutor {
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub async fn create_logical_tables(
|
||||
&self,
|
||||
catalog_name: &str,
|
||||
schema_name: &str,
|
||||
create_table_exprs: &[CreateTableExpr],
|
||||
query_context: QueryContextRef,
|
||||
) -> Result<Vec<TableRef>> {
|
||||
@@ -296,19 +300,9 @@ impl StatementExecutor {
|
||||
);
|
||||
}
|
||||
|
||||
let schema = self
|
||||
.table_metadata_manager
|
||||
.schema_manager()
|
||||
.get(SchemaNameKey::new(catalog_name, schema_name))
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?
|
||||
.context(SchemaNotFoundSnafu {
|
||||
schema_info: schema_name,
|
||||
})?;
|
||||
|
||||
let mut raw_tables_info = create_table_exprs
|
||||
.iter()
|
||||
.map(|create| create_table_info(create, vec![], schema.clone()))
|
||||
.map(|create| create_table_info(create, vec![]))
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
let tables_data = create_table_exprs
|
||||
.iter()
|
||||
@@ -1261,7 +1255,6 @@ fn parse_partitions(
|
||||
fn create_table_info(
|
||||
create_table: &CreateTableExpr,
|
||||
partition_columns: Vec<String>,
|
||||
schema_opts: SchemaNameValue,
|
||||
) -> Result<RawTableInfo> {
|
||||
let mut column_schemas = Vec::with_capacity(create_table.column_defs.len());
|
||||
let mut column_name_to_index_map = HashMap::new();
|
||||
@@ -1310,7 +1303,6 @@ fn create_table_info(
|
||||
|
||||
let table_options = TableOptions::try_from_iter(&create_table.table_options)
|
||||
.context(UnrecognizedTableOptionSnafu)?;
|
||||
let table_options = merge_options(table_options, schema_opts);
|
||||
|
||||
let meta = RawTableMeta {
|
||||
schema: raw_schema,
|
||||
@@ -1495,12 +1487,6 @@ fn convert_value(
|
||||
.context(ParseSqlValueSnafu)
|
||||
}
|
||||
|
||||
/// Merge table level table options with schema level table options.
|
||||
fn merge_options(mut table_opts: TableOptions, schema_opts: SchemaNameValue) -> TableOptions {
|
||||
table_opts.ttl = table_opts.ttl.or(schema_opts.ttl);
|
||||
table_opts
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use session::context::{QueryContext, QueryContextBuilder};
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::key::schema_name::SchemaNameKey;
|
||||
use common_query::Output;
|
||||
use common_telemetry::tracing;
|
||||
use partition::manager::PartitionInfo;
|
||||
@@ -33,7 +34,7 @@ use table::TableRef;
|
||||
|
||||
use crate::error::{
|
||||
self, CatalogSnafu, ExecuteStatementSnafu, ExternalSnafu, FindViewInfoSnafu, InvalidSqlSnafu,
|
||||
Result, ViewInfoNotFoundSnafu, ViewNotFoundSnafu,
|
||||
Result, TableMetadataManagerSnafu, ViewInfoNotFoundSnafu, ViewNotFoundSnafu,
|
||||
};
|
||||
use crate::statement::StatementExecutor;
|
||||
|
||||
@@ -118,6 +119,16 @@ impl StatementExecutor {
|
||||
.fail();
|
||||
}
|
||||
|
||||
let schema_options = self
|
||||
.table_metadata_manager
|
||||
.schema_manager()
|
||||
.get(SchemaNameKey {
|
||||
catalog: &table_name.catalog_name,
|
||||
schema: &table_name.schema_name,
|
||||
})
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?;
|
||||
|
||||
let partitions = self
|
||||
.partition_manager
|
||||
.find_table_partitions(table.table_info().table_id())
|
||||
@@ -128,7 +139,8 @@ impl StatementExecutor {
|
||||
|
||||
let partitions = create_partitions_stmt(partitions)?;
|
||||
|
||||
query::sql::show_create_table(table, partitions, query_ctx).context(ExecuteStatementSnafu)
|
||||
query::sql::show_create_table(table, schema_options, partitions, query_ctx)
|
||||
.context(ExecuteStatementSnafu)
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
|
||||
@@ -274,7 +274,7 @@ impl<'a> RuleChecker<'a> {
|
||||
fn check_axis(&self) -> Result<()> {
|
||||
for (col_index, axis) in self.axis.iter().enumerate() {
|
||||
for (val, split_point) in axis {
|
||||
if !split_point.is_equal {
|
||||
if split_point.less_than_counter != 0 || !split_point.is_equal {
|
||||
UnclosedValueSnafu {
|
||||
value: format!("{val:?}"),
|
||||
column: self.rule.partition_columns[col_index].clone(),
|
||||
@@ -410,7 +410,6 @@ mod tests {
|
||||
/// b <= h b >= s
|
||||
/// ```
|
||||
#[test]
|
||||
#[ignore = "don't check unmatched `>` and `<` for now"]
|
||||
fn empty_expr_case_1() {
|
||||
// PARTITION ON COLUMNS (b) (
|
||||
// b <= 'h',
|
||||
@@ -452,7 +451,6 @@ mod tests {
|
||||
/// 10 20
|
||||
/// ```
|
||||
#[test]
|
||||
#[ignore = "don't check unmatched `>` and `<` for now"]
|
||||
fn empty_expr_case_2() {
|
||||
// PARTITION ON COLUMNS (b) (
|
||||
// a >= 100 AND b <= 10 OR a > 100 AND a <= 200 AND b <= 10 OR a >= 200 AND b > 10 AND b <= 20 OR a > 200 AND b <= 20
|
||||
@@ -582,7 +580,6 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore = "don't check unmatched `>` and `<` for now"]
|
||||
fn duplicate_expr_case_1() {
|
||||
// PARTITION ON COLUMNS (a) (
|
||||
// a <= 20,
|
||||
|
||||
@@ -41,6 +41,7 @@ futures.workspace = true
|
||||
greptime-proto.workspace = true
|
||||
itertools.workspace = true
|
||||
jsonb.workspace = true
|
||||
jsonpath-rust = "0.7.3"
|
||||
lazy_static.workspace = true
|
||||
moka = { workspace = true, features = ["sync"] }
|
||||
once_cell.workspace = true
|
||||
|
||||
@@ -570,6 +570,18 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
#[snafu(display("Parse json path error"))]
|
||||
JsonPathParse {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
#[snafu(source)]
|
||||
error: jsonpath_rust::JsonPathParserError,
|
||||
},
|
||||
#[snafu(display("Json path result index not number"))]
|
||||
JsonPathParseResultIndex {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
@@ -19,6 +19,7 @@ pub mod dissect;
|
||||
pub mod epoch;
|
||||
pub mod gsub;
|
||||
pub mod join;
|
||||
pub mod json_path;
|
||||
pub mod letter;
|
||||
pub mod regex;
|
||||
pub mod timestamp;
|
||||
@@ -34,6 +35,7 @@ use epoch::{EpochProcessor, EpochProcessorBuilder};
|
||||
use gsub::{GsubProcessor, GsubProcessorBuilder};
|
||||
use itertools::Itertools;
|
||||
use join::{JoinProcessor, JoinProcessorBuilder};
|
||||
use json_path::{JsonPathProcessor, JsonPathProcessorBuilder};
|
||||
use letter::{LetterProcessor, LetterProcessorBuilder};
|
||||
use regex::{RegexProcessor, RegexProcessorBuilder};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
@@ -56,6 +58,8 @@ const PATTERN_NAME: &str = "pattern";
|
||||
const PATTERNS_NAME: &str = "patterns";
|
||||
const SEPARATOR_NAME: &str = "separator";
|
||||
const TARGET_FIELDS_NAME: &str = "target_fields";
|
||||
const JSON_PATH_NAME: &str = "json_path";
|
||||
const JSON_PATH_RESULT_INDEX_NAME: &str = "result_index";
|
||||
|
||||
// const IF_NAME: &str = "if";
|
||||
// const IGNORE_FAILURE_NAME: &str = "ignore_failure";
|
||||
@@ -94,6 +98,7 @@ pub enum ProcessorKind {
|
||||
UrlEncoding(UrlEncodingProcessor),
|
||||
Epoch(EpochProcessor),
|
||||
Date(DateProcessor),
|
||||
JsonPath(JsonPathProcessor),
|
||||
}
|
||||
|
||||
/// ProcessorBuilder trait defines the interface for all processor builders
|
||||
@@ -122,6 +127,7 @@ pub enum ProcessorBuilders {
|
||||
UrlEncoding(UrlEncodingProcessorBuilder),
|
||||
Epoch(EpochProcessorBuilder),
|
||||
Date(DateProcessorBuilder),
|
||||
JsonPath(JsonPathProcessorBuilder),
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
@@ -266,6 +272,9 @@ fn parse_processor(doc: &yaml_rust::Yaml) -> Result<ProcessorBuilders> {
|
||||
urlencoding::PROCESSOR_URL_ENCODING => {
|
||||
ProcessorBuilders::UrlEncoding(UrlEncodingProcessorBuilder::try_from(value)?)
|
||||
}
|
||||
json_path::PROCESSOR_JSON_PATH => {
|
||||
ProcessorBuilders::JsonPath(json_path::JsonPathProcessorBuilder::try_from(value)?)
|
||||
}
|
||||
_ => return UnsupportedProcessorSnafu { processor: str_key }.fail(),
|
||||
};
|
||||
|
||||
|
||||
231
src/pipeline/src/etl/processor/json_path.rs
Normal file
231
src/pipeline/src/etl/processor/json_path.rs
Normal file
@@ -0,0 +1,231 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use ahash::HashSet;
|
||||
use jsonpath_rust::JsonPath;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use super::{
|
||||
yaml_bool, yaml_new_field, yaml_new_fields, yaml_string, Processor, ProcessorBuilder,
|
||||
FIELDS_NAME, FIELD_NAME, IGNORE_MISSING_NAME, JSON_PATH_NAME, JSON_PATH_RESULT_INDEX_NAME,
|
||||
};
|
||||
use crate::etl::error::{Error, Result};
|
||||
use crate::etl::field::{Fields, OneInputOneOutputField};
|
||||
use crate::etl::processor::ProcessorKind;
|
||||
use crate::etl_error::{
|
||||
JsonPathParseResultIndexSnafu, JsonPathParseSnafu, KeyMustBeStringSnafu,
|
||||
ProcessorMissingFieldSnafu,
|
||||
};
|
||||
use crate::Value;
|
||||
|
||||
pub(crate) const PROCESSOR_JSON_PATH: &str = "json_path";
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct JsonPathProcessorBuilder {
|
||||
fields: Fields,
|
||||
json_path: JsonPath<Value>,
|
||||
ignore_missing: bool,
|
||||
result_idex: Option<usize>,
|
||||
}
|
||||
|
||||
impl JsonPathProcessorBuilder {
|
||||
fn build(self, intermediate_keys: &[String]) -> Result<JsonPathProcessor> {
|
||||
let mut real_fields = vec![];
|
||||
for field in self.fields.into_iter() {
|
||||
let input = OneInputOneOutputField::build(
|
||||
JSON_PATH_NAME,
|
||||
intermediate_keys,
|
||||
field.input_field(),
|
||||
field.target_or_input_field(),
|
||||
)?;
|
||||
real_fields.push(input);
|
||||
}
|
||||
|
||||
Ok(JsonPathProcessor {
|
||||
fields: real_fields,
|
||||
json_path: self.json_path,
|
||||
ignore_missing: self.ignore_missing,
|
||||
result_idex: self.result_idex,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl ProcessorBuilder for JsonPathProcessorBuilder {
|
||||
fn output_keys(&self) -> HashSet<&str> {
|
||||
self.fields
|
||||
.iter()
|
||||
.map(|f| f.target_or_input_field())
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn input_keys(&self) -> HashSet<&str> {
|
||||
self.fields.iter().map(|f| f.input_field()).collect()
|
||||
}
|
||||
|
||||
fn build(self, intermediate_keys: &[String]) -> Result<ProcessorKind> {
|
||||
self.build(intermediate_keys).map(ProcessorKind::JsonPath)
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&yaml_rust::yaml::Hash> for JsonPathProcessorBuilder {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(value: &yaml_rust::yaml::Hash) -> std::result::Result<Self, Self::Error> {
|
||||
let mut fields = Fields::default();
|
||||
let mut ignore_missing = false;
|
||||
let mut json_path = None;
|
||||
let mut result_idex = None;
|
||||
|
||||
for (k, v) in value.iter() {
|
||||
let key = k
|
||||
.as_str()
|
||||
.with_context(|| KeyMustBeStringSnafu { k: k.clone() })?;
|
||||
match key {
|
||||
FIELD_NAME => {
|
||||
fields = Fields::one(yaml_new_field(v, FIELD_NAME)?);
|
||||
}
|
||||
FIELDS_NAME => {
|
||||
fields = yaml_new_fields(v, FIELDS_NAME)?;
|
||||
}
|
||||
|
||||
IGNORE_MISSING_NAME => {
|
||||
ignore_missing = yaml_bool(v, IGNORE_MISSING_NAME)?;
|
||||
}
|
||||
JSON_PATH_RESULT_INDEX_NAME => {
|
||||
result_idex = Some(v.as_i64().context(JsonPathParseResultIndexSnafu)? as usize);
|
||||
}
|
||||
|
||||
JSON_PATH_NAME => {
|
||||
let json_path_str = yaml_string(v, JSON_PATH_NAME)?;
|
||||
json_path = Some(
|
||||
JsonPath::try_from(json_path_str.as_str()).context(JsonPathParseSnafu)?,
|
||||
);
|
||||
}
|
||||
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
if let Some(json_path) = json_path {
|
||||
let processor = JsonPathProcessorBuilder {
|
||||
fields,
|
||||
json_path,
|
||||
ignore_missing,
|
||||
result_idex,
|
||||
};
|
||||
|
||||
Ok(processor)
|
||||
} else {
|
||||
ProcessorMissingFieldSnafu {
|
||||
processor: PROCESSOR_JSON_PATH,
|
||||
field: JSON_PATH_NAME,
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct JsonPathProcessor {
|
||||
fields: Vec<OneInputOneOutputField>,
|
||||
json_path: JsonPath<Value>,
|
||||
ignore_missing: bool,
|
||||
result_idex: Option<usize>,
|
||||
}
|
||||
|
||||
impl Default for JsonPathProcessor {
|
||||
fn default() -> Self {
|
||||
JsonPathProcessor {
|
||||
fields: vec![],
|
||||
json_path: JsonPath::try_from("$").unwrap(),
|
||||
ignore_missing: false,
|
||||
result_idex: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl JsonPathProcessor {
|
||||
fn process_field(&self, val: &Value) -> Result<Value> {
|
||||
let processed = self.json_path.find(val);
|
||||
match processed {
|
||||
Value::Array(arr) => {
|
||||
if let Some(index) = self.result_idex {
|
||||
Ok(arr.get(index).cloned().unwrap_or(Value::Null))
|
||||
} else {
|
||||
Ok(Value::Array(arr))
|
||||
}
|
||||
}
|
||||
v => Ok(v),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Processor for JsonPathProcessor {
|
||||
fn kind(&self) -> &str {
|
||||
PROCESSOR_JSON_PATH
|
||||
}
|
||||
|
||||
fn ignore_missing(&self) -> bool {
|
||||
self.ignore_missing
|
||||
}
|
||||
|
||||
fn exec_mut(&self, val: &mut Vec<Value>) -> Result<()> {
|
||||
for field in self.fields.iter() {
|
||||
let index = field.input_index();
|
||||
match val.get(index) {
|
||||
Some(v) => {
|
||||
let processed = self.process_field(v)?;
|
||||
|
||||
let output_index = field.output_index();
|
||||
val[output_index] = processed;
|
||||
}
|
||||
None => {
|
||||
if !self.ignore_missing {
|
||||
return ProcessorMissingFieldSnafu {
|
||||
processor: self.kind(),
|
||||
field: field.input_name(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::Map;
|
||||
|
||||
#[test]
|
||||
fn test_json_path() {
|
||||
use super::*;
|
||||
use crate::Value;
|
||||
|
||||
let json_path = JsonPath::try_from("$.hello").unwrap();
|
||||
let processor = JsonPathProcessor {
|
||||
json_path,
|
||||
result_idex: Some(0),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let result = processor
|
||||
.process_field(&Value::Map(Map::one(
|
||||
"hello",
|
||||
Value::String("world".to_string()),
|
||||
)))
|
||||
.unwrap();
|
||||
assert_eq!(result, Value::String("world".to_string()));
|
||||
}
|
||||
}
|
||||
@@ -20,7 +20,10 @@ use std::collections::BTreeMap;
|
||||
|
||||
pub use array::Array;
|
||||
use jsonb::{Number as JsonbNumber, Object as JsonbObject, Value as JsonbValue};
|
||||
use jsonpath_rust::path::{JsonLike, Path};
|
||||
use jsonpath_rust::{jsp_idx, jsp_obj};
|
||||
pub use map::Map;
|
||||
use regex::Regex;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
pub use time::Timestamp;
|
||||
|
||||
@@ -35,10 +38,11 @@ use crate::etl::error::{Error, Result};
|
||||
/// acts as value: the enclosed value is the actual value
|
||||
/// acts as type: the enclosed value is the default value
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
#[derive(Debug, Clone, PartialEq, Default)]
|
||||
pub enum Value {
|
||||
// as value: null
|
||||
// as type: no type specified
|
||||
#[default]
|
||||
Null,
|
||||
|
||||
Int8(i8),
|
||||
@@ -230,6 +234,36 @@ impl Value {
|
||||
Value::Null => "null",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get(&self, key: &str) -> Option<&Self> {
|
||||
match self {
|
||||
Value::Map(map) => map.get(key),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_str(&self) -> Option<&str> {
|
||||
match self {
|
||||
Value::String(v) => Some(v),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_f64(&self) -> Option<f64> {
|
||||
match self {
|
||||
Value::Float32(v) => Some(*v as f64),
|
||||
Value::Float64(v) => Some(*v),
|
||||
Value::Uint64(v) => Some(*v as f64),
|
||||
Value::Uint32(v) => Some(*v as f64),
|
||||
Value::Uint16(v) => Some(*v as f64),
|
||||
Value::Uint8(v) => Some(*v as f64),
|
||||
Value::Int64(v) => Some(*v as f64),
|
||||
Value::Int32(v) => Some(*v as f64),
|
||||
Value::Int16(v) => Some(*v as f64),
|
||||
Value::Int8(v) => Some(*v as f64),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Value {
|
||||
@@ -410,3 +444,352 @@ impl From<Value> for JsonbValue<'_> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for Value {
|
||||
fn from(value: String) -> Self {
|
||||
Value::String(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&str> for Value {
|
||||
fn from(value: &str) -> Self {
|
||||
Value::String(value.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<i64> for Value {
|
||||
fn from(value: i64) -> Self {
|
||||
Value::Int64(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<f64> for Value {
|
||||
fn from(value: f64) -> Self {
|
||||
Value::Float64(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<String>> for Value {
|
||||
fn from(value: Vec<String>) -> Self {
|
||||
Value::Array(Array {
|
||||
values: value.into_iter().map(Value::String).collect(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<Self>> for Value {
|
||||
fn from(value: Vec<Self>) -> Self {
|
||||
Value::Array(Array { values: value })
|
||||
}
|
||||
}
|
||||
|
||||
impl From<bool> for Value {
|
||||
fn from(value: bool) -> Self {
|
||||
Value::Boolean(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl JsonLike for Value {
|
||||
fn get(&self, key: &str) -> Option<&Self> {
|
||||
self.get(key)
|
||||
}
|
||||
|
||||
fn itre(&self, pref: String) -> Vec<jsonpath_rust::JsonPathValue<Self>> {
|
||||
let res = match self {
|
||||
Value::Array(elems) => {
|
||||
let mut res = vec![];
|
||||
for (idx, el) in elems.iter().enumerate() {
|
||||
res.push(jsonpath_rust::JsonPathValue::Slice(
|
||||
el,
|
||||
jsonpath_rust::jsp_idx(&pref, idx),
|
||||
));
|
||||
}
|
||||
res
|
||||
}
|
||||
Value::Map(elems) => {
|
||||
let mut res = vec![];
|
||||
for (key, el) in elems.iter() {
|
||||
res.push(jsonpath_rust::JsonPathValue::Slice(
|
||||
el,
|
||||
jsonpath_rust::jsp_obj(&pref, key),
|
||||
));
|
||||
}
|
||||
res
|
||||
}
|
||||
_ => vec![],
|
||||
};
|
||||
if res.is_empty() {
|
||||
vec![jsonpath_rust::JsonPathValue::NoValue]
|
||||
} else {
|
||||
res
|
||||
}
|
||||
}
|
||||
|
||||
fn array_len(&self) -> jsonpath_rust::JsonPathValue<'static, Self> {
|
||||
match self {
|
||||
Value::Array(elems) => {
|
||||
jsonpath_rust::JsonPathValue::NewValue(Value::Int64(elems.len() as i64))
|
||||
}
|
||||
_ => jsonpath_rust::JsonPathValue::NoValue,
|
||||
}
|
||||
}
|
||||
|
||||
fn init_with_usize(cnt: usize) -> Self {
|
||||
Value::Int64(cnt as i64)
|
||||
}
|
||||
|
||||
fn deep_flatten(&self, pref: String) -> Vec<(&Self, String)> {
|
||||
let mut acc = vec![];
|
||||
match self {
|
||||
Value::Map(elems) => {
|
||||
for (f, v) in elems.iter() {
|
||||
let pref = jsp_obj(&pref, f);
|
||||
acc.push((v, pref.clone()));
|
||||
acc.append(&mut v.deep_flatten(pref));
|
||||
}
|
||||
}
|
||||
Value::Array(elems) => {
|
||||
for (i, v) in elems.iter().enumerate() {
|
||||
let pref = jsp_idx(&pref, i);
|
||||
acc.push((v, pref.clone()));
|
||||
acc.append(&mut v.deep_flatten(pref));
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
acc
|
||||
}
|
||||
|
||||
fn deep_path_by_key<'a>(
|
||||
&'a self,
|
||||
key: jsonpath_rust::path::ObjectField<'a, Self>,
|
||||
pref: String,
|
||||
) -> Vec<(&'a Self, String)> {
|
||||
let mut result: Vec<(&'a Value, String)> = jsonpath_rust::JsonPathValue::vec_as_pair(
|
||||
key.find(jsonpath_rust::JsonPathValue::new_slice(self, pref.clone())),
|
||||
);
|
||||
match self {
|
||||
Value::Map(elems) => {
|
||||
let mut next_levels: Vec<(&'a Value, String)> = elems
|
||||
.iter()
|
||||
.flat_map(|(k, v)| v.deep_path_by_key(key.clone(), jsp_obj(&pref, k)))
|
||||
.collect();
|
||||
result.append(&mut next_levels);
|
||||
result
|
||||
}
|
||||
Value::Array(elems) => {
|
||||
let mut next_levels: Vec<(&'a Value, String)> = elems
|
||||
.iter()
|
||||
.enumerate()
|
||||
.flat_map(|(i, v)| v.deep_path_by_key(key.clone(), jsp_idx(&pref, i)))
|
||||
.collect();
|
||||
result.append(&mut next_levels);
|
||||
result
|
||||
}
|
||||
_ => result,
|
||||
}
|
||||
}
|
||||
|
||||
fn as_u64(&self) -> Option<u64> {
|
||||
match self {
|
||||
Value::Uint64(v) => Some(*v),
|
||||
Value::Uint32(v) => Some(*v as u64),
|
||||
Value::Uint16(v) => Some(*v as u64),
|
||||
Value::Uint8(v) => Some(*v as u64),
|
||||
Value::Int64(v) if *v >= 0 => Some(*v as u64),
|
||||
Value::Int32(v) if *v >= 0 => Some(*v as u64),
|
||||
Value::Int16(v) if *v >= 0 => Some(*v as u64),
|
||||
Value::Int8(v) if *v >= 0 => Some(*v as u64),
|
||||
Value::Float64(v) if *v >= 0.0 => Some(*v as u64),
|
||||
Value::Float32(v) if *v >= 0.0 => Some(*v as u64),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn is_array(&self) -> bool {
|
||||
matches!(self, Value::Array(_))
|
||||
}
|
||||
|
||||
fn as_array(&self) -> Option<&Vec<Self>> {
|
||||
match self {
|
||||
Value::Array(arr) => Some(&arr.values),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn size(left: Vec<&Self>, right: Vec<&Self>) -> bool {
|
||||
if let Some(v) = right.first() {
|
||||
let sz = match v {
|
||||
Value::Int64(n) => *n as usize,
|
||||
Value::Int32(n) => *n as usize,
|
||||
Value::Int16(n) => *n as usize,
|
||||
Value::Int8(n) => *n as usize,
|
||||
|
||||
Value::Uint64(n) => *n as usize,
|
||||
Value::Uint32(n) => *n as usize,
|
||||
Value::Uint16(n) => *n as usize,
|
||||
Value::Uint8(n) => *n as usize,
|
||||
Value::Float32(n) => *n as usize,
|
||||
Value::Float64(n) => *n as usize,
|
||||
_ => return false,
|
||||
};
|
||||
for el in left.iter() {
|
||||
match el {
|
||||
Value::String(v) if v.len() == sz => true,
|
||||
Value::Array(elems) if elems.len() == sz => true,
|
||||
Value::Map(fields) if fields.len() == sz => true,
|
||||
_ => return false,
|
||||
};
|
||||
}
|
||||
return true;
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn sub_set_of(left: Vec<&Self>, right: Vec<&Self>) -> bool {
|
||||
if left.is_empty() {
|
||||
return true;
|
||||
}
|
||||
if right.is_empty() {
|
||||
return false;
|
||||
}
|
||||
|
||||
if let Some(elems) = left.first().and_then(|e| e.as_array()) {
|
||||
if let Some(Value::Array(right_elems)) = right.first() {
|
||||
if right_elems.is_empty() {
|
||||
return false;
|
||||
}
|
||||
|
||||
for el in elems {
|
||||
let mut res = false;
|
||||
|
||||
for r in right_elems.iter() {
|
||||
if el.eq(r) {
|
||||
res = true
|
||||
}
|
||||
}
|
||||
if !res {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn any_of(left: Vec<&Self>, right: Vec<&Self>) -> bool {
|
||||
if left.is_empty() {
|
||||
return true;
|
||||
}
|
||||
if right.is_empty() {
|
||||
return false;
|
||||
}
|
||||
|
||||
if let Some(Value::Array(elems)) = right.first() {
|
||||
if elems.is_empty() {
|
||||
return false;
|
||||
}
|
||||
|
||||
for el in left.iter() {
|
||||
if let Some(left_elems) = el.as_array() {
|
||||
for l in left_elems.iter() {
|
||||
for r in elems.iter() {
|
||||
if l.eq(r) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for r in elems.iter() {
|
||||
if el.eq(&r) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
fn regex(left: Vec<&Self>, right: Vec<&Self>) -> bool {
|
||||
if left.is_empty() || right.is_empty() {
|
||||
return false;
|
||||
}
|
||||
|
||||
match right.first() {
|
||||
Some(Value::String(str)) => {
|
||||
if let Ok(regex) = Regex::new(str) {
|
||||
for el in left.iter() {
|
||||
if let Some(v) = el.as_str() {
|
||||
if regex.is_match(v) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn inside(left: Vec<&Self>, right: Vec<&Self>) -> bool {
|
||||
if left.is_empty() {
|
||||
return false;
|
||||
}
|
||||
|
||||
match right.first() {
|
||||
Some(Value::Array(elems)) => {
|
||||
for el in left.iter() {
|
||||
if elems.contains(el) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
Some(Value::Map(elems)) => {
|
||||
for el in left.iter() {
|
||||
for r in elems.values() {
|
||||
if el.eq(&r) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn less(left: Vec<&Self>, right: Vec<&Self>) -> bool {
|
||||
if left.len() == 1 && right.len() == 1 {
|
||||
match (left.first(), right.first()) {
|
||||
(Some(l), Some(r)) => l
|
||||
.as_f64()
|
||||
.and_then(|v1| r.as_f64().map(|v2| v1 < v2))
|
||||
.unwrap_or(false),
|
||||
_ => false,
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
fn eq(left: Vec<&Self>, right: Vec<&Self>) -> bool {
|
||||
if left.len() != right.len() {
|
||||
false
|
||||
} else {
|
||||
left.iter().zip(right).map(|(a, b)| a.eq(&b)).all(|a| a)
|
||||
}
|
||||
}
|
||||
|
||||
fn array(data: Vec<Self>) -> Self {
|
||||
Value::Array(Array { values: data })
|
||||
}
|
||||
|
||||
fn null() -> Self {
|
||||
Value::Null
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,8 +16,8 @@ use api::v1::value::ValueData;
|
||||
use api::v1::Rows;
|
||||
use common_telemetry::tracing::info;
|
||||
use greptime_proto::v1::value::ValueData::{
|
||||
BoolValue, F64Value, StringValue, TimestampNanosecondValue, TimestampSecondValue, U32Value,
|
||||
U64Value, U8Value,
|
||||
BinaryValue, BoolValue, F64Value, StringValue, TimestampNanosecondValue, TimestampSecondValue,
|
||||
U32Value, U64Value, U8Value,
|
||||
};
|
||||
use greptime_proto::v1::Value as GreptimeValue;
|
||||
use pipeline::{parse, Content, GreptimeTransformer, Pipeline};
|
||||
@@ -518,6 +518,112 @@ transform:
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_json_path() {
|
||||
let input_value_str = r#"
|
||||
{
|
||||
"product_object": {
|
||||
"hello": "world"
|
||||
},
|
||||
"product_array": [
|
||||
"hello",
|
||||
"world"
|
||||
],
|
||||
"complex_object": {
|
||||
"shop": {
|
||||
"orders": [
|
||||
{
|
||||
"id": 1,
|
||||
"active": true
|
||||
},
|
||||
{
|
||||
"id": 2
|
||||
},
|
||||
{
|
||||
"id": 3
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"active": true
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}"#;
|
||||
let input_value = serde_json::from_str::<serde_json::Value>(input_value_str).unwrap();
|
||||
|
||||
let pipeline_yaml = r#"
|
||||
processors:
|
||||
- json_path:
|
||||
fields:
|
||||
- product_object, object_target
|
||||
json_path: "$.hello"
|
||||
result_index: 0
|
||||
- json_path:
|
||||
fields:
|
||||
- product_array, array_target
|
||||
json_path: "$.[1]"
|
||||
result_index: 0
|
||||
- json_path:
|
||||
fields:
|
||||
- complex_object, complex_target1
|
||||
json_path: "$.shop.orders[?(@.active)].id"
|
||||
- json_path:
|
||||
fields:
|
||||
- complex_target1, complex_target_2
|
||||
json_path: "$.[1]"
|
||||
result_index: 0
|
||||
- json_path:
|
||||
fields:
|
||||
- complex_object, complex_target_3
|
||||
json_path: "$.shop.orders[?(@.active)].id"
|
||||
result_index: 1
|
||||
transform:
|
||||
- fields:
|
||||
- object_target
|
||||
- array_target
|
||||
type: string
|
||||
- fields:
|
||||
- complex_target_3
|
||||
- complex_target_2
|
||||
type: uint32
|
||||
- fields:
|
||||
- complex_target1
|
||||
type: json
|
||||
"#;
|
||||
|
||||
let yaml_content = Content::Yaml(pipeline_yaml.into());
|
||||
let pipeline: Pipeline<GreptimeTransformer> = parse(&yaml_content).unwrap();
|
||||
|
||||
let mut status = pipeline.init_intermediate_state();
|
||||
|
||||
pipeline.prepare(input_value, &mut status).unwrap();
|
||||
let row = pipeline.exec_mut(&mut status).unwrap();
|
||||
|
||||
let r = row
|
||||
.values
|
||||
.into_iter()
|
||||
.map(|v| v.value_data.unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let object_target = r[0].clone();
|
||||
let array_target = r[1].clone();
|
||||
let complex_target3 = r[2].clone();
|
||||
let complex_target2 = r[3].clone();
|
||||
let complex_target1 = r[4].clone();
|
||||
|
||||
assert_eq!(StringValue("world".into()), object_target);
|
||||
assert_eq!(StringValue("world".into()), array_target);
|
||||
assert_eq!(complex_target3, complex_target2);
|
||||
|
||||
assert_eq!(
|
||||
BinaryValue(
|
||||
jsonb::Value::Array(vec![jsonb::Value::from(1), jsonb::Value::from(4),]).to_vec()
|
||||
),
|
||||
complex_target1
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_simple_data() {
|
||||
let input_value_str = r#"
|
||||
|
||||
@@ -15,11 +15,8 @@
|
||||
use std::collections::HashSet;
|
||||
use std::sync::Arc;
|
||||
|
||||
use datafusion::functions_aggregate::sum::Sum;
|
||||
use datafusion_expr::aggregate_function::AggregateFunction as BuiltInAggregateFunction;
|
||||
use datafusion_expr::expr::{AggregateFunction, AggregateFunctionDefinition};
|
||||
use datafusion_expr::utils::exprlist_to_columns;
|
||||
use datafusion_expr::{AggregateUDF, Expr, LogicalPlan, UserDefinedLogicalNode};
|
||||
use datafusion_expr::{Expr, LogicalPlan, UserDefinedLogicalNode};
|
||||
use promql::extension_plan::{
|
||||
EmptyMetric, InstantManipulate, RangeManipulate, SeriesDivide, SeriesNormalize,
|
||||
};
|
||||
@@ -28,91 +25,21 @@ use crate::dist_plan::merge_sort::{merge_sort_transformer, MergeSortLogicalPlan}
|
||||
use crate::dist_plan::MergeScanLogicalPlan;
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub enum Commutativity<T> {
|
||||
pub enum Commutativity {
|
||||
Commutative,
|
||||
PartialCommutative,
|
||||
ConditionalCommutative(Option<Transformer<T>>),
|
||||
TransformedCommutative(Option<Transformer<T>>),
|
||||
ConditionalCommutative(Option<Transformer>),
|
||||
TransformedCommutative(Option<Transformer>),
|
||||
NonCommutative,
|
||||
Unimplemented,
|
||||
/// For unrelated plans like DDL
|
||||
Unsupported,
|
||||
}
|
||||
|
||||
impl<T> Commutativity<T> {
|
||||
/// Check if self is stricter than `lhs`
|
||||
fn is_stricter_than(&self, lhs: &Self) -> bool {
|
||||
match (lhs, self) {
|
||||
(Commutativity::Commutative, Commutativity::Commutative) => false,
|
||||
(Commutativity::Commutative, _) => true,
|
||||
|
||||
(
|
||||
Commutativity::PartialCommutative,
|
||||
Commutativity::Commutative | Commutativity::PartialCommutative,
|
||||
) => false,
|
||||
(Commutativity::PartialCommutative, _) => true,
|
||||
|
||||
(
|
||||
Commutativity::ConditionalCommutative(_),
|
||||
Commutativity::Commutative
|
||||
| Commutativity::PartialCommutative
|
||||
| Commutativity::ConditionalCommutative(_),
|
||||
) => false,
|
||||
(Commutativity::ConditionalCommutative(_), _) => true,
|
||||
|
||||
(
|
||||
Commutativity::TransformedCommutative(_),
|
||||
Commutativity::Commutative
|
||||
| Commutativity::PartialCommutative
|
||||
| Commutativity::ConditionalCommutative(_)
|
||||
| Commutativity::TransformedCommutative(_),
|
||||
) => false,
|
||||
(Commutativity::TransformedCommutative(_), _) => true,
|
||||
|
||||
(
|
||||
Commutativity::NonCommutative
|
||||
| Commutativity::Unimplemented
|
||||
| Commutativity::Unsupported,
|
||||
_,
|
||||
) => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return a bare commutative level without any transformer
|
||||
fn bare_level<To>(&self) -> Commutativity<To> {
|
||||
match self {
|
||||
Commutativity::Commutative => Commutativity::Commutative,
|
||||
Commutativity::PartialCommutative => Commutativity::PartialCommutative,
|
||||
Commutativity::ConditionalCommutative(_) => Commutativity::ConditionalCommutative(None),
|
||||
Commutativity::TransformedCommutative(_) => Commutativity::TransformedCommutative(None),
|
||||
Commutativity::NonCommutative => Commutativity::NonCommutative,
|
||||
Commutativity::Unimplemented => Commutativity::Unimplemented,
|
||||
Commutativity::Unsupported => Commutativity::Unsupported,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> std::fmt::Debug for Commutativity<T> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Commutativity::Commutative => write!(f, "Commutative"),
|
||||
Commutativity::PartialCommutative => write!(f, "PartialCommutative"),
|
||||
Commutativity::ConditionalCommutative(_) => write!(f, "ConditionalCommutative"),
|
||||
Commutativity::TransformedCommutative(_) => write!(f, "TransformedCommutative"),
|
||||
Commutativity::NonCommutative => write!(f, "NonCommutative"),
|
||||
Commutativity::Unimplemented => write!(f, "Unimplemented"),
|
||||
Commutativity::Unsupported => write!(f, "Unsupported"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Categorizer {}
|
||||
|
||||
impl Categorizer {
|
||||
pub fn check_plan(
|
||||
plan: &LogicalPlan,
|
||||
partition_cols: Option<Vec<String>>,
|
||||
) -> Commutativity<LogicalPlan> {
|
||||
pub fn check_plan(plan: &LogicalPlan, partition_cols: Option<Vec<String>>) -> Commutativity {
|
||||
let partition_cols = partition_cols.unwrap_or_default();
|
||||
|
||||
match plan {
|
||||
@@ -120,104 +47,21 @@ impl Categorizer {
|
||||
for expr in &proj.expr {
|
||||
let commutativity = Self::check_expr(expr);
|
||||
if !matches!(commutativity, Commutativity::Commutative) {
|
||||
return commutativity.bare_level();
|
||||
return commutativity;
|
||||
}
|
||||
}
|
||||
Commutativity::Commutative
|
||||
}
|
||||
// TODO(ruihang): Change this to Commutative once Like is supported in substrait
|
||||
LogicalPlan::Filter(filter) => Self::check_expr(&filter.predicate).bare_level(),
|
||||
LogicalPlan::Filter(filter) => Self::check_expr(&filter.predicate),
|
||||
LogicalPlan::Window(_) => Commutativity::Unimplemented,
|
||||
LogicalPlan::Aggregate(aggr) => {
|
||||
// fast path: if the group_expr is a subset of partition_cols
|
||||
if Self::check_partition(&aggr.group_expr, &partition_cols) {
|
||||
return Commutativity::Commutative;
|
||||
}
|
||||
|
||||
common_telemetry::info!("[DEBUG] aggregate plan expr: {:?}", aggr.aggr_expr);
|
||||
|
||||
// get all commutativity levels of aggregate exprs and find the strictest one
|
||||
let aggr_expr_comm = aggr
|
||||
.aggr_expr
|
||||
.iter()
|
||||
.map(Self::check_expr)
|
||||
.collect::<Vec<_>>();
|
||||
let mut strictest = Commutativity::Commutative;
|
||||
for comm in &aggr_expr_comm {
|
||||
if comm.is_stricter_than(&strictest) {
|
||||
strictest = comm.bare_level();
|
||||
}
|
||||
}
|
||||
|
||||
common_telemetry::info!("[DEBUG] aggr_expr_comm: {:?}", aggr_expr_comm);
|
||||
common_telemetry::info!("[DEBUG] strictest: {:?}", strictest);
|
||||
|
||||
// fast path: if any expr is commutative or non-commutative
|
||||
if matches!(
|
||||
strictest,
|
||||
Commutativity::Commutative
|
||||
| Commutativity::NonCommutative
|
||||
| Commutativity::Unimplemented
|
||||
| Commutativity::Unsupported
|
||||
) {
|
||||
return strictest.bare_level();
|
||||
}
|
||||
|
||||
common_telemetry::info!("[DEBUG] continue for strictest",);
|
||||
|
||||
// collect expr transformers
|
||||
let mut expr_transformer = Vec::with_capacity(aggr.aggr_expr.len());
|
||||
for expr_comm in aggr_expr_comm {
|
||||
match expr_comm {
|
||||
Commutativity::Commutative => expr_transformer.push(None),
|
||||
Commutativity::ConditionalCommutative(transformer) => {
|
||||
expr_transformer.push(transformer.clone());
|
||||
}
|
||||
Commutativity::PartialCommutative => expr_transformer
|
||||
.push(Some(Arc::new(expr_partial_commutative_transformer))),
|
||||
_ => expr_transformer.push(None),
|
||||
}
|
||||
}
|
||||
|
||||
// build plan transformer
|
||||
let transformer = Arc::new(move |plan: &LogicalPlan| {
|
||||
if let LogicalPlan::Aggregate(aggr) = plan {
|
||||
let mut new_plan = aggr.clone();
|
||||
|
||||
// transform aggr exprs
|
||||
for (expr, transformer) in
|
||||
new_plan.aggr_expr.iter_mut().zip(&expr_transformer)
|
||||
{
|
||||
if let Some(transformer) = transformer {
|
||||
let new_expr = transformer(expr)?;
|
||||
*expr = new_expr;
|
||||
}
|
||||
}
|
||||
|
||||
// transform group exprs
|
||||
for expr in new_plan.group_expr.iter_mut() {
|
||||
// if let Some(transformer) = transformer {
|
||||
// let new_expr = transformer(expr)?;
|
||||
// *expr = new_expr;
|
||||
// }
|
||||
let expr_name = expr.name_for_alias().expect("not a sort expr");
|
||||
*expr = Expr::Column(expr_name.into());
|
||||
}
|
||||
|
||||
common_telemetry::info!(
|
||||
"[DEBUG] new plan aggr expr: {:?}, group expr: {:?}",
|
||||
new_plan.aggr_expr,
|
||||
new_plan.group_expr
|
||||
);
|
||||
Some(LogicalPlan::Aggregate(new_plan))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
|
||||
common_telemetry::info!("[DEBUG] done TransformedCommutative for aggr plan ");
|
||||
|
||||
Commutativity::TransformedCommutative(Some(transformer))
|
||||
// check all children exprs and uses the strictest level
|
||||
Commutativity::Unimplemented
|
||||
}
|
||||
LogicalPlan::Sort(_) => {
|
||||
if partition_cols.is_empty() {
|
||||
@@ -269,7 +113,7 @@ impl Categorizer {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_extension_plan(plan: &dyn UserDefinedLogicalNode) -> Commutativity<LogicalPlan> {
|
||||
pub fn check_extension_plan(plan: &dyn UserDefinedLogicalNode) -> Commutativity {
|
||||
match plan.name() {
|
||||
name if name == EmptyMetric::name()
|
||||
|| name == InstantManipulate::name()
|
||||
@@ -285,7 +129,7 @@ impl Categorizer {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_expr(expr: &Expr) -> Commutativity<Expr> {
|
||||
pub fn check_expr(expr: &Expr) -> Commutativity {
|
||||
match expr {
|
||||
Expr::Column(_)
|
||||
| Expr::ScalarVariable(_, _)
|
||||
@@ -311,14 +155,13 @@ impl Categorizer {
|
||||
| Expr::Case(_)
|
||||
| Expr::Cast(_)
|
||||
| Expr::TryCast(_)
|
||||
| Expr::AggregateFunction(_)
|
||||
| Expr::WindowFunction(_)
|
||||
| Expr::InList(_)
|
||||
| Expr::InSubquery(_)
|
||||
| Expr::ScalarSubquery(_)
|
||||
| Expr::Wildcard { .. } => Commutativity::Unimplemented,
|
||||
|
||||
Expr::AggregateFunction(aggr_fn) => Self::check_aggregate_fn(aggr_fn),
|
||||
|
||||
Expr::Alias(_)
|
||||
| Expr::Unnest(_)
|
||||
| Expr::GroupingSet(_)
|
||||
@@ -327,59 +170,6 @@ impl Categorizer {
|
||||
}
|
||||
}
|
||||
|
||||
fn check_aggregate_fn(aggr_fn: &AggregateFunction) -> Commutativity<Expr> {
|
||||
common_telemetry::info!("[DEBUG] checking aggr_fn: {:?}", aggr_fn);
|
||||
match &aggr_fn.func_def {
|
||||
AggregateFunctionDefinition::BuiltIn(func_def) => match func_def {
|
||||
BuiltInAggregateFunction::Max | BuiltInAggregateFunction::Min => {
|
||||
// Commutativity::PartialCommutative
|
||||
common_telemetry::info!("[DEBUG] checking min/max: {:?}", aggr_fn);
|
||||
let mut new_fn = aggr_fn.clone();
|
||||
let col_name = Expr::AggregateFunction(aggr_fn.clone())
|
||||
.name_for_alias()
|
||||
.expect("not a sort expr");
|
||||
let alias = col_name.clone();
|
||||
new_fn.args = vec![Expr::Column(col_name.into())];
|
||||
|
||||
// new_fn.func_def =
|
||||
// AggregateFunctionDefinition::BuiltIn(BuiltInAggregateFunction::Sum);
|
||||
Commutativity::ConditionalCommutative(Some(Arc::new(move |_| {
|
||||
common_telemetry::info!("[DEBUG] transforming min/max fn: {:?}", new_fn);
|
||||
Some(Expr::AggregateFunction(new_fn.clone()).alias(alias.clone()))
|
||||
})))
|
||||
}
|
||||
BuiltInAggregateFunction::Count => {
|
||||
common_telemetry::info!("[DEBUG] checking count_fn: {:?}", aggr_fn);
|
||||
let col_name = Expr::AggregateFunction(aggr_fn.clone())
|
||||
.name_for_alias()
|
||||
.expect("not a sort expr");
|
||||
let sum_udf = Arc::new(AggregateUDF::new_from_impl(Sum::new()));
|
||||
let alias = col_name.clone();
|
||||
// let sum_func = Arc::new(AggregateFunction::new_udf(
|
||||
// sum_udf,
|
||||
// vec![Expr::Column(col_name.into())],
|
||||
// false,
|
||||
// None,
|
||||
// None,
|
||||
// None,
|
||||
// ));
|
||||
let mut sum_expr = aggr_fn.clone();
|
||||
sum_expr.func_def = AggregateFunctionDefinition::UDF(sum_udf);
|
||||
sum_expr.args = vec![Expr::Column(col_name.into())];
|
||||
// let mut sum_fn = aggr_fn.clone();
|
||||
// sum_fn.func_def =
|
||||
// AggregateFunctionDefinition::BuiltIn(BuiltInAggregateFunction::Sum);
|
||||
Commutativity::ConditionalCommutative(Some(Arc::new(move |_| {
|
||||
common_telemetry::info!("[DEBUG] transforming sum_fn: {:?}", sum_expr);
|
||||
Some(Expr::AggregateFunction(sum_expr.clone()).alias(alias.clone()))
|
||||
})))
|
||||
}
|
||||
_ => Commutativity::Unimplemented,
|
||||
},
|
||||
AggregateFunctionDefinition::UDF(_) => Commutativity::Unimplemented,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return true if the given expr and partition cols satisfied the rule.
|
||||
/// In this case the plan can be treated as fully commutative.
|
||||
fn check_partition(exprs: &[Expr], partition_cols: &[String]) -> bool {
|
||||
@@ -401,16 +191,12 @@ impl Categorizer {
|
||||
}
|
||||
}
|
||||
|
||||
pub type Transformer<T> = Arc<dyn for<'a> Fn(&'a T) -> Option<T>>;
|
||||
pub type Transformer = Arc<dyn Fn(&LogicalPlan) -> Option<LogicalPlan>>;
|
||||
|
||||
pub fn partial_commutative_transformer(plan: &LogicalPlan) -> Option<LogicalPlan> {
|
||||
Some(plan.clone())
|
||||
}
|
||||
|
||||
pub fn expr_partial_commutative_transformer(expr: &Expr) -> Option<Expr> {
|
||||
Some(expr.clone())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datafusion_expr::{LogicalPlanBuilder, Sort};
|
||||
|
||||
@@ -199,6 +199,7 @@ struct PartSortStream {
|
||||
#[allow(dead_code)] // this is used under #[debug_assertions]
|
||||
partition: usize,
|
||||
cur_part_idx: usize,
|
||||
evaluating_batch: Option<DfRecordBatch>,
|
||||
metrics: BaselineMetrics,
|
||||
}
|
||||
|
||||
@@ -224,6 +225,7 @@ impl PartSortStream {
|
||||
partition_ranges,
|
||||
partition,
|
||||
cur_part_idx: 0,
|
||||
evaluating_batch: None,
|
||||
metrics: BaselineMetrics::new(&sort.metrics, partition),
|
||||
}
|
||||
}
|
||||
@@ -425,6 +427,52 @@ impl PartSortStream {
|
||||
Ok(sorted)
|
||||
}
|
||||
|
||||
fn split_batch(
|
||||
&mut self,
|
||||
batch: DfRecordBatch,
|
||||
) -> datafusion_common::Result<Option<DfRecordBatch>> {
|
||||
if batch.num_rows() == 0 {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let sort_column = self
|
||||
.expression
|
||||
.expr
|
||||
.evaluate(&batch)?
|
||||
.into_array(batch.num_rows())?;
|
||||
|
||||
let next_range_idx = self.try_find_next_range(&sort_column)?;
|
||||
let Some(idx) = next_range_idx else {
|
||||
self.buffer.push(batch);
|
||||
// keep polling input for next batch
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let this_range = batch.slice(0, idx);
|
||||
let remaining_range = batch.slice(idx, batch.num_rows() - idx);
|
||||
if this_range.num_rows() != 0 {
|
||||
self.buffer.push(this_range);
|
||||
}
|
||||
// mark end of current PartitionRange
|
||||
let sorted_batch = self.sort_buffer();
|
||||
// step to next proper PartitionRange
|
||||
self.cur_part_idx += 1;
|
||||
let next_sort_column = sort_column.slice(idx, batch.num_rows() - idx);
|
||||
if self.try_find_next_range(&next_sort_column)?.is_some() {
|
||||
// remaining batch still contains data that exceeds the current partition range
|
||||
// register the remaining batch for next polling
|
||||
self.evaluating_batch = Some(remaining_range);
|
||||
} else {
|
||||
// remaining batch is within the current partition range
|
||||
// push to the buffer and continue polling
|
||||
if remaining_range.num_rows() != 0 {
|
||||
self.buffer.push(remaining_range);
|
||||
}
|
||||
}
|
||||
|
||||
sorted_batch.map(|x| if x.num_rows() == 0 { None } else { Some(x) })
|
||||
}
|
||||
|
||||
pub fn poll_next_inner(
|
||||
mut self: Pin<&mut Self>,
|
||||
cx: &mut Context<'_>,
|
||||
@@ -439,51 +487,29 @@ impl PartSortStream {
|
||||
}
|
||||
}
|
||||
|
||||
// if there is a remaining batch being evaluated from last run,
|
||||
// split on it instead of fetching new batch
|
||||
if let Some(evaluating_batch) = self.evaluating_batch.take()
|
||||
&& evaluating_batch.num_rows() != 0
|
||||
{
|
||||
if let Some(sorted_batch) = self.split_batch(evaluating_batch)? {
|
||||
return Poll::Ready(Some(Ok(sorted_batch)));
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// fetch next batch from input
|
||||
let res = self.input.as_mut().poll_next(cx);
|
||||
match res {
|
||||
Poll::Ready(Some(Ok(batch))) => {
|
||||
let sort_column = self
|
||||
.expression
|
||||
.expr
|
||||
.evaluate(&batch)?
|
||||
.into_array(batch.num_rows())?;
|
||||
let next_range_idx = self.try_find_next_range(&sort_column)?;
|
||||
// `Some` means the current range is finished, split the batch into two parts and sort
|
||||
if let Some(idx) = next_range_idx {
|
||||
let this_range = batch.slice(0, idx);
|
||||
let next_range = batch.slice(idx, batch.num_rows() - idx);
|
||||
if this_range.num_rows() != 0 {
|
||||
self.buffer.push(this_range);
|
||||
}
|
||||
// mark end of current PartitionRange
|
||||
let sorted_batch = self.sort_buffer()?;
|
||||
let next_sort_column = sort_column.slice(idx, batch.num_rows() - idx);
|
||||
// step to next proper PartitionRange
|
||||
loop {
|
||||
self.cur_part_idx += 1;
|
||||
if next_sort_column.is_empty()
|
||||
|| self.try_find_next_range(&next_sort_column)?.is_none()
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
// push the next range to the buffer
|
||||
if next_range.num_rows() != 0 {
|
||||
self.buffer.push(next_range);
|
||||
}
|
||||
if sorted_batch.num_rows() == 0 {
|
||||
// Current part is empty, continue polling next part.
|
||||
continue;
|
||||
}
|
||||
if let Some(sorted_batch) = self.split_batch(batch)? {
|
||||
return Poll::Ready(Some(Ok(sorted_batch)));
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
self.buffer.push(batch);
|
||||
// keep polling until boundary(a empty RecordBatch) is reached
|
||||
continue;
|
||||
}
|
||||
// input stream end, sort the buffer and return
|
||||
// input stream end, mark and continue
|
||||
Poll::Ready(None) => {
|
||||
self.input_complete = true;
|
||||
continue;
|
||||
@@ -564,14 +590,19 @@ mod test {
|
||||
let schema = Arc::new(schema);
|
||||
|
||||
let mut input_ranged_data = vec![];
|
||||
let mut output_ranges = vec![];
|
||||
let mut output_data = vec![];
|
||||
// generate each input `PartitionRange`
|
||||
for part_id in 0..rng.usize(0..part_cnt_bound) {
|
||||
// generate each `PartitionRange`'s timestamp range
|
||||
let (start, end) = if descending {
|
||||
let end = bound_val
|
||||
.map(|i| i.checked_sub(rng.i64(0..range_offset_bound)).expect("Bad luck, fuzzy test generate data that will overflow, change seed and try again"))
|
||||
.unwrap_or_else(|| rng.i64(..));
|
||||
.map(
|
||||
|i| i
|
||||
.checked_sub(rng.i64(0..range_offset_bound))
|
||||
.expect("Bad luck, fuzzy test generate data that will overflow, change seed and try again")
|
||||
)
|
||||
.unwrap_or_else(|| rng.i64(-100000000..100000000));
|
||||
bound_val = Some(end);
|
||||
let start = end - rng.i64(1..range_size_bound);
|
||||
let start = Timestamp::new(start, unit.clone().into());
|
||||
@@ -594,13 +625,15 @@ mod test {
|
||||
for _batch_idx in 0..rng.usize(1..batch_cnt_bound) {
|
||||
let cnt = rng.usize(0..batch_size_bound) + 1;
|
||||
let iter = 0..rng.usize(0..cnt);
|
||||
let data_gen = iter
|
||||
let mut data_gen = iter
|
||||
.map(|_| rng.i64(start.value()..end.value()))
|
||||
.collect_vec();
|
||||
if data_gen.is_empty() {
|
||||
// current batch is empty, skip
|
||||
continue;
|
||||
}
|
||||
// mito always sort on ASC order
|
||||
data_gen.sort();
|
||||
per_part_sort_data.extend(data_gen.clone());
|
||||
let arr = new_ts_array(unit.clone(), data_gen.clone());
|
||||
let batch = DfRecordBatch::try_new(schema.clone(), vec![arr]).unwrap();
|
||||
@@ -615,15 +648,35 @@ mod test {
|
||||
};
|
||||
input_ranged_data.push((range, batches));
|
||||
|
||||
if descending {
|
||||
per_part_sort_data.sort_by(|a, b| b.cmp(a));
|
||||
} else {
|
||||
per_part_sort_data.sort();
|
||||
}
|
||||
output_ranges.push(range);
|
||||
if per_part_sort_data.is_empty() {
|
||||
continue;
|
||||
}
|
||||
output_data.push(per_part_sort_data);
|
||||
output_data.extend_from_slice(&per_part_sort_data);
|
||||
}
|
||||
|
||||
// adjust output data with adjacent PartitionRanges
|
||||
let mut output_data_iter = output_data.iter().peekable();
|
||||
let mut output_data = vec![];
|
||||
for range in output_ranges.clone() {
|
||||
let mut cur_data = vec![];
|
||||
while let Some(val) = output_data_iter.peek() {
|
||||
if **val < range.start.value() || **val >= range.end.value() {
|
||||
break;
|
||||
}
|
||||
cur_data.push(*output_data_iter.next().unwrap());
|
||||
}
|
||||
|
||||
if cur_data.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
if descending {
|
||||
cur_data.sort_by(|a, b| b.cmp(a));
|
||||
} else {
|
||||
cur_data.sort();
|
||||
}
|
||||
output_data.push(cur_data);
|
||||
}
|
||||
|
||||
let expected_output = output_data
|
||||
@@ -658,7 +711,7 @@ mod test {
|
||||
((5, 10), vec![vec![5, 6], vec![7, 8]]),
|
||||
],
|
||||
false,
|
||||
vec![vec![1, 2, 3, 4, 5, 6, 7, 8, 9], vec![5, 6, 7, 8]],
|
||||
vec![vec![1, 2, 3, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9]],
|
||||
),
|
||||
(
|
||||
TimeUnit::Millisecond,
|
||||
@@ -700,6 +753,25 @@ mod test {
|
||||
true,
|
||||
vec![],
|
||||
),
|
||||
(
|
||||
TimeUnit::Millisecond,
|
||||
vec![
|
||||
(
|
||||
(15, 20),
|
||||
vec![vec![15, 17, 19, 10, 11, 12, 5, 6, 7, 8, 9, 1, 2, 3, 4]],
|
||||
),
|
||||
((10, 15), vec![]),
|
||||
((5, 10), vec![]),
|
||||
((0, 10), vec![]),
|
||||
],
|
||||
true,
|
||||
vec![
|
||||
vec![19, 17, 15],
|
||||
vec![12, 11, 10],
|
||||
vec![9, 8, 7, 6, 5],
|
||||
vec![4, 3, 2, 1],
|
||||
],
|
||||
),
|
||||
];
|
||||
|
||||
for (identifier, (unit, input_ranged_data, descending, expected_output)) in
|
||||
@@ -744,10 +816,18 @@ mod test {
|
||||
})
|
||||
.collect_vec();
|
||||
|
||||
run_test(0, input_ranged_data, schema.clone(), opt, expected_output).await;
|
||||
run_test(
|
||||
identifier,
|
||||
input_ranged_data,
|
||||
schema.clone(),
|
||||
opt,
|
||||
expected_output,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::print_stdout)]
|
||||
async fn run_test(
|
||||
case_id: usize,
|
||||
input_ranged_data: Vec<(PartitionRange, Vec<DfRecordBatch>)>,
|
||||
@@ -772,20 +852,36 @@ mod test {
|
||||
options: opt,
|
||||
},
|
||||
None,
|
||||
vec![ranges],
|
||||
vec![ranges.clone()],
|
||||
Arc::new(mock_input),
|
||||
);
|
||||
|
||||
let exec_stream = exec.execute(0, Arc::new(TaskContext::default())).unwrap();
|
||||
|
||||
let real_output = exec_stream.map(|r| r.unwrap()).collect::<Vec<_>>().await;
|
||||
|
||||
// a makeshift solution for compare large data
|
||||
if real_output != expected_output {
|
||||
let mut first_diff = 0;
|
||||
for (idx, (lhs, rhs)) in real_output.iter().zip(expected_output.iter()).enumerate() {
|
||||
if lhs != rhs {
|
||||
first_diff = idx;
|
||||
break;
|
||||
}
|
||||
}
|
||||
println!("first diff batch at {}", first_diff);
|
||||
println!(
|
||||
"ranges: {:?}",
|
||||
ranges
|
||||
.into_iter()
|
||||
.map(|r| (r.start.to_chrono_datetime(), r.end.to_chrono_datetime()))
|
||||
.enumerate()
|
||||
.collect::<Vec<_>>()
|
||||
);
|
||||
|
||||
let mut full_msg = String::new();
|
||||
{
|
||||
let mut buf = Vec::with_capacity(10 * real_output.len());
|
||||
for batch in &real_output {
|
||||
for batch in real_output.iter().skip(first_diff) {
|
||||
let mut rb_json: Vec<u8> = Vec::new();
|
||||
let mut writer = ArrayWriter::new(&mut rb_json);
|
||||
writer.write(batch).unwrap();
|
||||
@@ -794,12 +890,12 @@ mod test {
|
||||
buf.push(b',');
|
||||
}
|
||||
// TODO(discord9): better ways to print buf
|
||||
let _buf = String::from_utf8_lossy(&buf);
|
||||
full_msg += &format!("case_id:{case_id}, real_output");
|
||||
let buf = String::from_utf8_lossy(&buf);
|
||||
full_msg += &format!("\ncase_id:{case_id}, real_output \n{buf}\n");
|
||||
}
|
||||
{
|
||||
let mut buf = Vec::with_capacity(10 * real_output.len());
|
||||
for batch in &expected_output {
|
||||
for batch in expected_output.iter().skip(first_diff) {
|
||||
let mut rb_json: Vec<u8> = Vec::new();
|
||||
let mut writer = ArrayWriter::new(&mut rb_json);
|
||||
writer.write(batch).unwrap();
|
||||
@@ -807,12 +903,16 @@ mod test {
|
||||
buf.append(&mut rb_json);
|
||||
buf.push(b',');
|
||||
}
|
||||
let _buf = String::from_utf8_lossy(&buf);
|
||||
full_msg += &format!("case_id:{case_id}, expected_output");
|
||||
let buf = String::from_utf8_lossy(&buf);
|
||||
full_msg += &format!("case_id:{case_id}, expected_output \n{buf}");
|
||||
}
|
||||
panic!(
|
||||
"case_{} failed, opt: {:?}, full msg: {}",
|
||||
case_id, opt, full_msg
|
||||
"case_{} failed, opt: {:?},\n real output has {} batches, {} rows, expected has {} batches with {} rows\nfull msg: {}",
|
||||
case_id, opt,
|
||||
real_output.len(),
|
||||
real_output.iter().map(|x|x.num_rows()).sum::<usize>(),
|
||||
expected_output.len(),
|
||||
expected_output.iter().map(|x|x.num_rows()).sum::<usize>(), full_msg
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -32,6 +32,7 @@ use common_datasource::lister::{Lister, Source};
|
||||
use common_datasource::object_store::build_backend;
|
||||
use common_datasource::util::find_dir_and_filename;
|
||||
use common_meta::key::flow::flow_info::FlowInfoValue;
|
||||
use common_meta::SchemaOptions;
|
||||
use common_query::prelude::GREPTIME_TIMESTAMP;
|
||||
use common_query::Output;
|
||||
use common_recordbatch::adapter::RecordBatchStreamAdapter;
|
||||
@@ -703,6 +704,7 @@ pub fn show_create_database(database_name: &str, options: OptionMap) -> Result<O
|
||||
|
||||
pub fn show_create_table(
|
||||
table: TableRef,
|
||||
schema_options: Option<SchemaOptions>,
|
||||
partitions: Option<Partitions>,
|
||||
query_ctx: QueryContextRef,
|
||||
) -> Result<Output> {
|
||||
@@ -711,7 +713,7 @@ pub fn show_create_table(
|
||||
|
||||
let quote_style = query_ctx.quote_style();
|
||||
|
||||
let mut stmt = create_table_stmt(&table_info, quote_style)?;
|
||||
let mut stmt = create_table_stmt(&table_info, schema_options, quote_style)?;
|
||||
stmt.partitions = partitions.map(|mut p| {
|
||||
p.set_quote(quote_style);
|
||||
p
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use common_meta::SchemaOptions;
|
||||
use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema, SchemaRef, COMMENT_KEY};
|
||||
use humantime::format_duration;
|
||||
use snafu::ResultExt;
|
||||
@@ -36,7 +37,8 @@ use crate::error::{
|
||||
ConvertSqlTypeSnafu, ConvertSqlValueSnafu, GetFulltextOptionsSnafu, Result, SqlSnafu,
|
||||
};
|
||||
|
||||
fn create_sql_options(table_meta: &TableMeta) -> OptionMap {
|
||||
/// Generates CREATE TABLE options from given table metadata and schema-level options.
|
||||
fn create_sql_options(table_meta: &TableMeta, schema_options: Option<SchemaOptions>) -> OptionMap {
|
||||
let table_opts = &table_meta.options;
|
||||
let mut options = OptionMap::default();
|
||||
if let Some(write_buffer_size) = table_opts.write_buffer_size {
|
||||
@@ -47,7 +49,12 @@ fn create_sql_options(table_meta: &TableMeta) -> OptionMap {
|
||||
}
|
||||
if let Some(ttl) = table_opts.ttl {
|
||||
options.insert(TTL_KEY.to_string(), format_duration(ttl).to_string());
|
||||
}
|
||||
} else if let Some(database_ttl) = schema_options.and_then(|o| o.ttl) {
|
||||
options.insert(
|
||||
TTL_KEY.to_string(),
|
||||
format_duration(database_ttl).to_string(),
|
||||
);
|
||||
};
|
||||
for (k, v) in table_opts
|
||||
.extra_options
|
||||
.iter()
|
||||
@@ -169,7 +176,11 @@ fn create_table_constraints(
|
||||
}
|
||||
|
||||
/// Create a CreateTable statement from table info.
|
||||
pub fn create_table_stmt(table_info: &TableInfoRef, quote_style: char) -> Result<CreateTable> {
|
||||
pub fn create_table_stmt(
|
||||
table_info: &TableInfoRef,
|
||||
schema_options: Option<SchemaOptions>,
|
||||
quote_style: char,
|
||||
) -> Result<CreateTable> {
|
||||
let table_meta = &table_info.meta;
|
||||
let table_name = &table_info.name;
|
||||
let schema = &table_info.meta.schema;
|
||||
@@ -195,7 +206,7 @@ pub fn create_table_stmt(table_info: &TableInfoRef, quote_style: char) -> Result
|
||||
columns,
|
||||
engine: table_meta.engine.clone(),
|
||||
constraints,
|
||||
options: create_sql_options(table_meta),
|
||||
options: create_sql_options(table_meta, schema_options),
|
||||
partitions: None,
|
||||
})
|
||||
}
|
||||
@@ -271,7 +282,7 @@ mod tests {
|
||||
.unwrap(),
|
||||
);
|
||||
|
||||
let stmt = create_table_stmt(&info, '"').unwrap();
|
||||
let stmt = create_table_stmt(&info, None, '"').unwrap();
|
||||
|
||||
let sql = format!("\n{}", stmt);
|
||||
assert_eq!(
|
||||
@@ -337,7 +348,7 @@ ENGINE=mito
|
||||
.unwrap(),
|
||||
);
|
||||
|
||||
let stmt = create_table_stmt(&info, '"').unwrap();
|
||||
let stmt = create_table_stmt(&info, None, '"').unwrap();
|
||||
|
||||
let sql = format!("\n{}", stmt);
|
||||
assert_eq!(
|
||||
|
||||
@@ -527,6 +527,14 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid table name"))]
|
||||
InvalidTableName {
|
||||
#[snafu(source)]
|
||||
error: tonic::metadata::errors::ToStrError,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to initialize a watcher for file {}", path))]
|
||||
FileWatch {
|
||||
path: String,
|
||||
@@ -620,7 +628,8 @@ impl ErrorExt for Error {
|
||||
| UnsupportedContentType { .. }
|
||||
| TimestampOverflow { .. }
|
||||
| OpenTelemetryLog { .. }
|
||||
| UnsupportedJsonDataTypeForTag { .. } => StatusCode::InvalidArguments,
|
||||
| UnsupportedJsonDataTypeForTag { .. }
|
||||
| InvalidTableName { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
Catalog { source, .. } => source.status_code(),
|
||||
RowWriter { source, .. } => source.status_code(),
|
||||
|
||||
@@ -24,10 +24,12 @@ use opentelemetry_proto::tonic::collector::trace::v1::{
|
||||
ExportTraceServiceRequest, ExportTraceServiceResponse,
|
||||
};
|
||||
use session::context::{Channel, QueryContext};
|
||||
use snafu::OptionExt;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use tonic::{Request, Response, Status};
|
||||
|
||||
use crate::error;
|
||||
use crate::http::header::constants::GREPTIME_TRACE_TABLE_NAME_HEADER_NAME;
|
||||
use crate::otlp::trace::TRACE_TABLE_NAME;
|
||||
use crate::query_handler::OpenTelemetryProtocolHandlerRef;
|
||||
|
||||
pub struct OtlpService {
|
||||
@@ -46,7 +48,15 @@ impl TraceService for OtlpService {
|
||||
&self,
|
||||
request: Request<ExportTraceServiceRequest>,
|
||||
) -> StdResult<Response<ExportTraceServiceResponse>, Status> {
|
||||
let (_headers, extensions, req) = request.into_parts();
|
||||
let (headers, extensions, req) = request.into_parts();
|
||||
|
||||
let table_name = match headers.get(GREPTIME_TRACE_TABLE_NAME_HEADER_NAME) {
|
||||
Some(table_name) => table_name
|
||||
.to_str()
|
||||
.context(error::InvalidTableNameSnafu)?
|
||||
.to_string(),
|
||||
None => TRACE_TABLE_NAME.to_string(),
|
||||
};
|
||||
|
||||
let mut ctx = extensions
|
||||
.get::<QueryContext>()
|
||||
@@ -55,7 +65,7 @@ impl TraceService for OtlpService {
|
||||
ctx.set_channel(Channel::Otlp);
|
||||
let ctx = Arc::new(ctx);
|
||||
|
||||
let _ = self.handler.traces(req, ctx).await?;
|
||||
let _ = self.handler.traces(req, table_name, ctx).await?;
|
||||
|
||||
Ok(Response::new(ExportTraceServiceResponse {
|
||||
partial_success: None,
|
||||
|
||||
@@ -48,6 +48,7 @@ pub mod constants {
|
||||
pub const GREPTIME_LOG_PIPELINE_VERSION_HEADER_NAME: &str = "x-greptime-log-pipeline-version";
|
||||
pub const GREPTIME_LOG_TABLE_NAME_HEADER_NAME: &str = "x-greptime-log-table-name";
|
||||
pub const GREPTIME_LOG_EXTRACT_KEYS_HEADER_NAME: &str = "x-greptime-log-extract-keys";
|
||||
pub const GREPTIME_TRACE_TABLE_NAME_HEADER_NAME: &str = "x-greptime-trace-table-name";
|
||||
}
|
||||
|
||||
pub static GREPTIME_DB_HEADER_FORMAT: HeaderName =
|
||||
|
||||
@@ -24,6 +24,7 @@ use axum::response::IntoResponse;
|
||||
use axum::{async_trait, Extension};
|
||||
use bytes::Bytes;
|
||||
use common_telemetry::tracing;
|
||||
use http::HeaderMap;
|
||||
use opentelemetry_proto::tonic::collector::logs::v1::{
|
||||
ExportLogsServiceRequest, ExportLogsServiceResponse,
|
||||
};
|
||||
@@ -41,11 +42,13 @@ use snafu::prelude::*;
|
||||
|
||||
use super::header::constants::GREPTIME_LOG_EXTRACT_KEYS_HEADER_NAME;
|
||||
use super::header::{write_cost_header_map, CONTENT_TYPE_PROTOBUF};
|
||||
use crate::error::{self, PipelineSnafu, Result};
|
||||
use crate::error::{self, InvalidUtf8ValueSnafu, PipelineSnafu, Result};
|
||||
use crate::http::header::constants::{
|
||||
GREPTIME_LOG_PIPELINE_NAME_HEADER_NAME, GREPTIME_LOG_PIPELINE_VERSION_HEADER_NAME,
|
||||
GREPTIME_LOG_TABLE_NAME_HEADER_NAME,
|
||||
GREPTIME_LOG_TABLE_NAME_HEADER_NAME, GREPTIME_TRACE_TABLE_NAME_HEADER_NAME,
|
||||
};
|
||||
use crate::otlp::logs::LOG_TABLE_NAME;
|
||||
use crate::otlp::trace::TRACE_TABLE_NAME;
|
||||
use crate::query_handler::OpenTelemetryProtocolHandlerRef;
|
||||
|
||||
#[axum_macros::debug_handler]
|
||||
@@ -80,10 +83,18 @@ pub async fn metrics(
|
||||
#[tracing::instrument(skip_all, fields(protocol = "otlp", request_type = "traces"))]
|
||||
pub async fn traces(
|
||||
State(handler): State<OpenTelemetryProtocolHandlerRef>,
|
||||
header: HeaderMap,
|
||||
Extension(mut query_ctx): Extension<QueryContext>,
|
||||
bytes: Bytes,
|
||||
) -> Result<OtlpResponse<ExportTraceServiceResponse>> {
|
||||
let db = query_ctx.get_db_string();
|
||||
let table_name = extract_string_value_from_header(
|
||||
&header,
|
||||
GREPTIME_TRACE_TABLE_NAME_HEADER_NAME,
|
||||
Some(TRACE_TABLE_NAME),
|
||||
)?
|
||||
// safety here, we provide default value for table_name
|
||||
.unwrap();
|
||||
query_ctx.set_channel(Channel::Otlp);
|
||||
let query_ctx = Arc::new(query_ctx);
|
||||
let _timer = crate::metrics::METRIC_HTTP_OPENTELEMETRY_TRACES_ELAPSED
|
||||
@@ -92,7 +103,7 @@ pub async fn traces(
|
||||
let request =
|
||||
ExportTraceServiceRequest::decode(bytes).context(error::DecodeOtlpRequestSnafu)?;
|
||||
handler
|
||||
.traces(request, query_ctx)
|
||||
.traces(request, table_name, query_ctx)
|
||||
.await
|
||||
.map(|o| OtlpResponse {
|
||||
resp_body: ExportTraceServiceResponse {
|
||||
@@ -107,17 +118,31 @@ pub struct PipelineInfo {
|
||||
pub pipeline_version: Option<String>,
|
||||
}
|
||||
|
||||
fn pipeline_header_error(
|
||||
header: &HeaderValue,
|
||||
key: &str,
|
||||
) -> StdResult<String, (http::StatusCode, String)> {
|
||||
let header_utf8 = str::from_utf8(header.as_bytes());
|
||||
match header_utf8 {
|
||||
Ok(s) => Ok(s.to_string()),
|
||||
Err(_) => Err((
|
||||
fn parse_header_value_to_string(header: &HeaderValue) -> Result<String> {
|
||||
String::from_utf8(header.as_bytes().to_vec()).context(InvalidUtf8ValueSnafu)
|
||||
}
|
||||
|
||||
fn extract_string_value_from_header(
|
||||
headers: &HeaderMap,
|
||||
header: &str,
|
||||
default_table_name: Option<&str>,
|
||||
) -> Result<Option<String>> {
|
||||
let table_name = headers.get(header);
|
||||
match table_name {
|
||||
Some(name) => parse_header_value_to_string(name).map(Some),
|
||||
None => match default_table_name {
|
||||
Some(name) => Ok(Some(name.to_string())),
|
||||
None => Ok(None),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn utf8_error(header_name: &str) -> impl Fn(error::Error) -> (StatusCode, String) + use<'_> {
|
||||
move |_| {
|
||||
(
|
||||
StatusCode::BAD_REQUEST,
|
||||
format!("`{}` header is not valid UTF-8 string type.", key),
|
||||
)),
|
||||
format!("`{}` header is not valid UTF-8 string type.", header_name),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -129,28 +154,27 @@ where
|
||||
type Rejection = (StatusCode, String);
|
||||
|
||||
async fn from_request_parts(parts: &mut Parts, _state: &S) -> StdResult<Self, Self::Rejection> {
|
||||
let pipeline_name = parts.headers.get(GREPTIME_LOG_PIPELINE_NAME_HEADER_NAME);
|
||||
let pipeline_version = parts.headers.get(GREPTIME_LOG_PIPELINE_VERSION_HEADER_NAME);
|
||||
let headers = &parts.headers;
|
||||
let pipeline_name =
|
||||
extract_string_value_from_header(headers, GREPTIME_LOG_PIPELINE_NAME_HEADER_NAME, None)
|
||||
.map_err(utf8_error(GREPTIME_LOG_PIPELINE_NAME_HEADER_NAME))?;
|
||||
let pipeline_version = extract_string_value_from_header(
|
||||
headers,
|
||||
GREPTIME_LOG_PIPELINE_VERSION_HEADER_NAME,
|
||||
None,
|
||||
)
|
||||
.map_err(utf8_error(GREPTIME_LOG_PIPELINE_VERSION_HEADER_NAME))?;
|
||||
match (pipeline_name, pipeline_version) {
|
||||
(Some(name), Some(version)) => Ok(PipelineInfo {
|
||||
pipeline_name: Some(pipeline_header_error(
|
||||
name,
|
||||
GREPTIME_LOG_PIPELINE_NAME_HEADER_NAME,
|
||||
)?),
|
||||
pipeline_version: Some(pipeline_header_error(
|
||||
version,
|
||||
GREPTIME_LOG_PIPELINE_VERSION_HEADER_NAME,
|
||||
)?),
|
||||
pipeline_name: Some(name),
|
||||
pipeline_version: Some(version),
|
||||
}),
|
||||
(None, _) => Ok(PipelineInfo {
|
||||
pipeline_name: None,
|
||||
pipeline_version: None,
|
||||
}),
|
||||
(Some(name), None) => Ok(PipelineInfo {
|
||||
pipeline_name: Some(pipeline_header_error(
|
||||
name,
|
||||
GREPTIME_LOG_PIPELINE_NAME_HEADER_NAME,
|
||||
)?),
|
||||
pipeline_name: Some(name),
|
||||
pipeline_version: None,
|
||||
}),
|
||||
}
|
||||
@@ -169,16 +193,16 @@ where
|
||||
type Rejection = (StatusCode, String);
|
||||
|
||||
async fn from_request_parts(parts: &mut Parts, _state: &S) -> StdResult<Self, Self::Rejection> {
|
||||
let table_name = parts.headers.get(GREPTIME_LOG_TABLE_NAME_HEADER_NAME);
|
||||
let table_name = extract_string_value_from_header(
|
||||
&parts.headers,
|
||||
GREPTIME_LOG_TABLE_NAME_HEADER_NAME,
|
||||
Some(LOG_TABLE_NAME),
|
||||
)
|
||||
.map_err(utf8_error(GREPTIME_LOG_TABLE_NAME_HEADER_NAME))?
|
||||
// safety here, we provide default value for table_name
|
||||
.unwrap();
|
||||
|
||||
match table_name {
|
||||
Some(name) => Ok(TableInfo {
|
||||
table_name: pipeline_header_error(name, GREPTIME_LOG_TABLE_NAME_HEADER_NAME)?,
|
||||
}),
|
||||
None => Ok(TableInfo {
|
||||
table_name: "opentelemetry_logs".to_string(),
|
||||
}),
|
||||
}
|
||||
Ok(TableInfo { table_name })
|
||||
}
|
||||
}
|
||||
|
||||
@@ -192,16 +216,19 @@ where
|
||||
type Rejection = (StatusCode, String);
|
||||
|
||||
async fn from_request_parts(parts: &mut Parts, _state: &S) -> StdResult<Self, Self::Rejection> {
|
||||
let select = parts.headers.get(GREPTIME_LOG_EXTRACT_KEYS_HEADER_NAME);
|
||||
let select = extract_string_value_from_header(
|
||||
&parts.headers,
|
||||
GREPTIME_LOG_EXTRACT_KEYS_HEADER_NAME,
|
||||
None,
|
||||
)
|
||||
.map_err(utf8_error(GREPTIME_LOG_EXTRACT_KEYS_HEADER_NAME))?;
|
||||
|
||||
match select {
|
||||
Some(name) => {
|
||||
let select_header =
|
||||
pipeline_header_error(name, GREPTIME_LOG_EXTRACT_KEYS_HEADER_NAME)?;
|
||||
if select_header.is_empty() {
|
||||
if name.is_empty() {
|
||||
Ok(SelectInfoWrapper(Default::default()))
|
||||
} else {
|
||||
Ok(SelectInfoWrapper(SelectInfo::from(select_header)))
|
||||
Ok(SelectInfoWrapper(SelectInfo::from(name)))
|
||||
}
|
||||
}
|
||||
None => Ok(SelectInfoWrapper(Default::default())),
|
||||
|
||||
@@ -75,6 +75,8 @@ pub mod handler {
|
||||
|
||||
info!("finish pprof");
|
||||
|
||||
info!("Dump data success, size: {}", body.len());
|
||||
|
||||
Ok((StatusCode::OK, body))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -201,6 +201,7 @@ pub trait PromQueryInterceptor {
|
||||
fn pre_execute(
|
||||
&self,
|
||||
_query: &PromQuery,
|
||||
_plan: Option<&LogicalPlan>,
|
||||
_query_ctx: QueryContextRef,
|
||||
) -> Result<(), Self::Error> {
|
||||
Ok(())
|
||||
@@ -229,10 +230,11 @@ where
|
||||
fn pre_execute(
|
||||
&self,
|
||||
query: &PromQuery,
|
||||
plan: Option<&LogicalPlan>,
|
||||
query_ctx: QueryContextRef,
|
||||
) -> Result<(), Self::Error> {
|
||||
if let Some(this) = self {
|
||||
this.pre_execute(query, query_ctx)
|
||||
this.pre_execute(query, plan, query_ctx)
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -302,7 +302,7 @@ mod test {
|
||||
let result = check(query, QueryContext::arc(), session.clone());
|
||||
assert!(result.is_none());
|
||||
|
||||
let query = "select versiona";
|
||||
let query = "select version";
|
||||
let output = check(query, QueryContext::arc(), session.clone());
|
||||
assert!(output.is_none());
|
||||
|
||||
|
||||
@@ -249,7 +249,7 @@ impl<W: AsyncWrite + Send + Sync + Unpin> AsyncMysqlShim<W> for MysqlInstanceShi
|
||||
self.auth_plugin()
|
||||
}
|
||||
|
||||
async fn auth_plugin_for_username(&self, _user: &[u8]) -> &str {
|
||||
async fn auth_plugin_for_username<'a, 'user>(&'a self, _user: &'user [u8]) -> &'a str {
|
||||
self.auth_plugin()
|
||||
}
|
||||
|
||||
|
||||
@@ -14,5 +14,5 @@
|
||||
|
||||
pub mod logs;
|
||||
pub mod metrics;
|
||||
pub mod plugin;
|
||||
pub mod trace;
|
||||
mod utils;
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user