Compare commits

...

27 Commits

Author SHA1 Message Date
zyy17
0847ff36ce fix: config test failed and use similar_asserts::assert_eq to replace assert_eq for long string compare (#4731)
* fix: config test failed and use 'similar_asserts::assert_eq' to replace 'assert_eq' for long string compare

* Update Cargo.toml

Co-authored-by: Yingwen <realevenyag@gmail.com>

* Update src/cmd/tests/load_config_test.rs

Co-authored-by: Yingwen <realevenyag@gmail.com>

---------

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
2024-09-18 07:41:25 +00:00
shuiyisong
c014e875f3 chore: add auto-decompression layer for otlp http request (#4723)
* chore: add auto-decompression for http request

* test: otlp
2024-09-18 04:32:00 +00:00
Zhenchi
3b5b906543 feat(index): add explicit adapter between RangeReader and AsyncRead (#4724)
Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-09-18 03:33:55 +00:00
Weny Xu
d1dfffcdaf chore: enable fuzz test for append table (#4702)
* chore: enable fuzz test for append table

* fix: fix mysql translator
2024-09-18 03:01:30 +00:00
localhost
36b1bafbf0 fix: pipeline dissert error is returned directly to the user, instead of printing a warn log (#4709)
* fix: pipeline dissert error is returned directly to the user, instead of printing a warn log

* chore: add more test for pipeline
2024-09-12 18:21:05 +00:00
Yohan Wal
67fb3d003e feat: add respective get_by_path UDFs for JSON type (#4720)
* feat: add respectiv get_by_path udf for json type

* Apply review comments

Co-authored-by: Weny Xu <wenymedia@gmail.com>

* fix: fix compile error

* refactor: change name of UDFs, add some tests

---------

Co-authored-by: Weny Xu <wenymedia@gmail.com>
2024-09-11 08:17:57 +00:00
zyy17
aa03d3b11c docs: use docs comment prefix and bump toml2docs version (#4711) 2024-09-11 07:49:23 +00:00
discord9
a3d567f0c9 perf(flow): use batch mode for flow (#4599)
* generic bundle trait

* feat: impl get/let

* fix: drop batch

* test: tumble batch

* feat: use batch eval flow

* fix: div use arrow::div not mul

* perf: not append batch

* perf: use bool mask for reduce

* perf: tiny opt

* perf: refactor slow path

* feat: opt if then

* fix: WIP

* perf: if then

* chore: use trace instead

* fix: reduce missing non-first batch

* perf: flow if then using interleave

* docs: add TODO

* perf: remove unnecessary eq

* chore: remove unused import

* fix: run_available no longer loop forever

* feat: blocking on high input buf

* chore: increase threhold

* chore: after rebase

* chore: per review

* chore: per review

* fix: allow empty values in reduce&test

* tests: more flow doc example tests

* chore: per review

* chore: per review
2024-09-11 03:31:52 +00:00
Zhenchi
f252599ac6 feat(index): add RangeReader trait (#4718)
* feat(index): add `RangeReader` trait`

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: return content_length as read bytes

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* chore: remove buffer & use `BufMut`

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-09-10 15:24:06 +00:00
Ruihang Xia
ff40d512bd fix: support append-only physical table (#4716)
* fix: support append-only physical table

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* Update src/metric-engine/src/engine/create.rs

Co-authored-by: jeremyhi <jiachun_feng@proton.me>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Weny Xu <wenymedia@gmail.com>
Co-authored-by: jeremyhi <jiachun_feng@proton.me>
2024-09-10 12:23:23 +00:00
jeremyhi
dcae21208b chore: refresh route table (#4673)
* chore: remove error::

* chore: avoid to use get_raw if unnecessary

* chore: clearer method name

* feat: remap node addresses in table route

* chore: add unit test for remap address

* feat: refresh node address mapping via heartbeat

* feat: broadcast table cache invalidate on new epoch

* chore: clarify heartbeat log

* chore: remove InvalidHeartbeatRequest

* chore: add log

* feat: add role into NodeAddressKey

* chore: fix test

* Update src/common/meta/src/key/table_route.rs

Co-authored-by: LFC <990479+MichaelScofield@users.noreply.github.com>

* chore: simplify code

---------

Co-authored-by: LFC <990479+MichaelScofield@users.noreply.github.com>
2024-09-10 12:08:59 +00:00
Weny Xu
d0fd79ac7f chore: remove validate_request_with_table (#4710)
perf: remove `validate_request_with_table`
2024-09-10 11:56:18 +00:00
Yingwen
3e17c09e45 feat: skip caching uncompressed pages if they are large (#4705)
* feat: cache each uncompressed page

* chore: remove unused function

* chore: log

* chore: log

* chore: row group pages cache kv

* feat: also support row group level cache

* chore: fix range count

* feat: don't cache compressed page for row group cache

* feat: use function to get part

* chore: log whether scan is from compaction

* chore: avoid get column

* feat: add timer metrics

* chore: Revert "feat: add timer metrics"

This reverts commit 4618f57fa2ba13b1e1a8dec83afd01c00ae4c867.

* feat: don't cache individual uncompressed page

* feat: append in row group level under append mode

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* chore: fetch pages cost

* perf: yield

* Update src/mito2/src/sst/parquet/row_group.rs

* refactor: cache key

* feat: print file num and row groups num in explain

* test: update sqlness test

* chore: Update src/mito2/src/sst/parquet/page_reader.rs

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
2024-09-10 11:52:16 +00:00
jeremyhi
04de3ed929 chore: avoid schema check when auto_create_table_hint is disabled (#4712)
chore: avoid schema check when auto-create-table-hint is disabled
2024-09-10 07:13:28 +00:00
Ruihang Xia
29f215531a feat: parallel in row group level under append mode (#4704)
feat: append in row group level under append mode

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-09-10 07:12:23 +00:00
jeremyhi
545a80c6e0 chore: remove unused method (#4703) 2024-09-09 12:14:17 +00:00
Yohan Wal
04e7dd6fd5 feat: add json data type (#4619)
* feat: add json type and vector

* fix: allow to create and insert json data

* feat: udf to query json as string

* refactor: remove JsonbValue and JsonVector

* feat: show json value as strings

* chore: make ci happy

* test: adunit test and sqlness test

* refactor: use binary as grpc value of json

* fix: use non-preserve-order jsonb

* test: revert changed test

* refactor: change udf get_by_path to jq

* chore: make ci happy

* fix: distinguish binary and json in proto

* chore: delete udf for future pr

* refactor: remove Value(Json)

* chore: follow review comments

* test: some tests and checks

* test: fix unit tests

* chore: follow review comments

* chore: corresponding changes to proto

* fix: change grpc and pgsql server behavior alongside with sqlness/crud tests

* chore: follow review comments

* feat: udf of conversions between json and strings, used for grpc server

* refactor: rename to_string to json_to_string

* test: add more sqlness test for json

* chore: thanks for review :)

* Apply suggestions from code review

---------

Co-authored-by: Weny Xu <wenymedia@gmail.com>
2024-09-09 11:41:36 +00:00
jeremyhi
dc89944570 feat: gRPC auto create table hint (#4700)
* feat: gRPC auto create table hint

* chore: remove the checking of auto_create_table_hint
2024-09-09 09:07:07 +00:00
Weny Xu
8bf549c2fa chore: print downgraded region last_entry_id (#4701) 2024-09-09 08:14:55 +00:00
Lei, HUANG
208afe402b feat(wal): increase recovery parallelism (#4689)
* Refactor RaftEngineLogStore to use references for config

 - Updated `RaftEngineLogStore::try_new` to accept a reference to `RaftEngineConfig` instead of taking ownership.
 - Replaced direct usage of `config` with individual fields (`sync_write`, `sync_period`, `read_batch_size`).
 - Adjusted test cases to pass references to `RaftEngineConfig`.

* Add parallelism configuration for WAL recovery

 - Introduced `recovery_parallelism` setting in `datanode.example.toml` and `standalone.example.toml` for configuring parallelism during WAL recovery.
 - Updated `Cargo.lock` and `Cargo.toml` to include `num_cpus` dependency.
 - Modified `RaftEngineConfig` to include `recovery_parallelism` with a default value set to the number of CP

* feat/wal-recovery-parallelism:
 Add `wal.recovery_parallelism` configuration option

 - Introduced `wal.recovery_parallelism` to config.md for specifying parallelism during WAL recovery.
 - Updated `RaftEngineLogStore` to include `recovery_threads` from the new configuration.

* fix: ut
2024-09-09 04:25:24 +00:00
Ning Sun
c22a398f59 fix: return version string based on request protocol (#4680)
* fix: return version string based on request protocol

* fix: resolve lint issue
2024-09-09 03:36:54 +00:00
JohnsonLee
a8477e4142 fix: table resolving logic related to pg_catalog (#4580)
* fix: table resolving logic related to pg_catalog

refer to
https://github.com/GreptimeTeam/greptimedb/issues/3560#issuecomment-2287794348
and #4543

* refactor: remove CatalogProtocol type

* fix: sqlness

* fix: forbid create database pg_catalog with mysql client

* refactor: use QueryContext as arguments rather than Channel

* refactor: pass None as default behaviour in information_schema

* test: fix test
2024-09-09 00:47:59 +00:00
Yiran
b950e705f5 chore: update the document link in README.md (#4690) 2024-09-07 15:27:32 +00:00
Ruihang Xia
d2d62e0c6f fix: unconditional statistics (#4694)
* fix: unconditional statistics

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add more sqlness case

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-09-07 04:28:11 +00:00
localhost
5d9f8a3be7 feat: add test pipeline api (#4667)
* chore: add test pipeline api

* chore: add test for test pipeline api

* chore: fix taplo check

* chore: change pipeline dryrun api path

* chore: add more info for pipeline dryrun api
2024-09-06 08:36:49 +00:00
jeremyhi
e88465840d feat: add extension field to HeartbeatRequest (#4688)
* feat: add extension field to HeartbeatRequest

* chore: extension to extensions

* chore: upgrade proto
2024-09-06 08:29:20 +00:00
localhost
67d95d2088 refactor!: add processor builder and transform buidler (#4571)
* chore: add processor builder and transform buidler

* chore: in process

* chore: intermediate state from hashmap to vector in pipeline

* chore: remove useless code and rename some struct

* chore: fix typos

* chore: format code

* chore: add error handling and optimize code readability

* chore: fix typos

* chore: remove useless code

* chore: add some doc

* chore: fix by pr commit

* chore: remove useless code and change struct name

* chore: modify the location of the find_key_index function.
2024-09-06 07:51:08 +00:00
218 changed files with 8931 additions and 4416 deletions

76
Cargo.lock generated
View File

@@ -1175,6 +1175,17 @@ dependencies = [
"regex-automata 0.1.10",
]
[[package]]
name = "bstr"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40723b8fb387abc38f4f4a37c09073622e41dd12327033091ef8950659e6dc0c"
dependencies = [
"memchr",
"regex-automata 0.4.7",
"serde",
]
[[package]]
name = "btoi"
version = "0.4.3"
@@ -1761,6 +1772,7 @@ dependencies = [
"serde_json",
"servers",
"session",
"similar-asserts",
"snafu 0.8.4",
"store-api",
"substrait 0.9.3",
@@ -1812,10 +1824,12 @@ name = "common-base"
version = "0.9.3"
dependencies = [
"anymap",
"async-trait",
"bitvec",
"bytes",
"common-error",
"common-macro",
"futures",
"paste",
"serde",
"snafu 0.8.4",
@@ -1952,6 +1966,7 @@ dependencies = [
"datatypes",
"geohash",
"h3o",
"jsonb",
"num",
"num-traits",
"once_cell",
@@ -2293,6 +2308,7 @@ dependencies = [
"common-telemetry",
"futures-util",
"humantime-serde",
"num_cpus",
"rskafka",
"rustls 0.23.10",
"rustls-native-certs",
@@ -3156,6 +3172,7 @@ dependencies = [
"arrow",
"arrow-array",
"arrow-schema",
"base64 0.21.7",
"common-base",
"common-decimal",
"common-error",
@@ -3164,6 +3181,8 @@ dependencies = [
"common-time",
"datafusion-common",
"enum_dispatch",
"greptime-proto",
"jsonb",
"num",
"num-traits",
"ordered-float 3.9.2",
@@ -3696,6 +3715,12 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
[[package]]
name = "fast-float"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c"
[[package]]
name = "fastdivide"
version = "0.4.1"
@@ -4300,7 +4325,7 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
[[package]]
name = "greptime-proto"
version = "0.1.0"
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=c437b55725b7f5224fe9d46db21072b4a682ee4b#c437b55725b7f5224fe9d46db21072b4a682ee4b"
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=973f49cde88a582fb65755cc572ebcf6fb93ccf7#973f49cde88a582fb65755cc572ebcf6fb93ccf7"
dependencies = [
"prost 0.12.6",
"serde",
@@ -5407,6 +5432,21 @@ dependencies = [
"serde",
]
[[package]]
name = "jsonb"
version = "0.4.1"
source = "git+https://github.com/CookiePieWw/jsonb.git?rev=d0166c130fce903bf6c58643417a3173a6172d31#d0166c130fce903bf6c58643417a3173a6172d31"
dependencies = [
"byteorder",
"fast-float",
"itoa",
"nom",
"ordered-float 4.2.0",
"rand",
"ryu",
"serde_json",
]
[[package]]
name = "jsonpath-rust"
version = "0.5.1"
@@ -7200,9 +7240,11 @@ version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a8fddc9b68f5b80dae9d6f510b88e02396f006ad48cac349411fbecc80caae4"
dependencies = [
"hex",
"opentelemetry 0.22.0",
"opentelemetry_sdk 0.22.1",
"prost 0.12.6",
"serde",
"tonic 0.11.0",
]
@@ -8060,6 +8102,8 @@ dependencies = [
"chrono",
"fallible-iterator",
"postgres-protocol",
"serde",
"serde_json",
]
[[package]]
@@ -9625,7 +9669,7 @@ source = "git+https://github.com/discord9/RustPython?rev=9ed5137412#9ed51374125b
dependencies = [
"ascii",
"bitflags 1.3.2",
"bstr",
"bstr 0.2.17",
"cfg-if",
"hexf-parse",
"itertools 0.10.5",
@@ -9660,7 +9704,7 @@ version = "0.2.0"
source = "git+https://github.com/discord9/RustPython?rev=9ed5137412#9ed51374125b5f1a9e5cee5dd7e27023b8591f1e"
dependencies = [
"bitflags 1.3.2",
"bstr",
"bstr 0.2.17",
"itertools 0.10.5",
"lz4_flex 0.9.5",
"num-bigint",
@@ -9813,7 +9857,7 @@ dependencies = [
"ascii",
"atty",
"bitflags 1.3.2",
"bstr",
"bstr 0.2.17",
"caseless",
"cfg-if",
"chrono",
@@ -10398,6 +10442,7 @@ dependencies = [
"hyper 0.14.29",
"influxdb_line_protocol",
"itertools 0.10.5",
"jsonb",
"lazy_static",
"mime_guess",
"mysql_async",
@@ -10580,6 +10625,26 @@ version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a"
[[package]]
name = "similar"
version = "2.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1de1d4f81173b03af4c0cbed3c898f6bff5b870e4a7f5d6f4057d62a7a4b686e"
dependencies = [
"bstr 1.10.0",
"unicode-segmentation",
]
[[package]]
name = "similar-asserts"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cfe85670573cd6f0fa97940f26e7e6601213c3b0555246c24234131f88c5709e"
dependencies = [
"console",
"similar",
]
[[package]]
name = "simple_asn1"
version = "0.6.2"
@@ -10777,6 +10842,7 @@ dependencies = [
"hex",
"iso8601",
"itertools 0.10.5",
"jsonb",
"lazy_static",
"regex",
"serde_json",
@@ -11743,6 +11809,7 @@ dependencies = [
"datanode",
"datatypes",
"dotenv",
"flate2",
"flow",
"frontend",
"futures",
@@ -11766,6 +11833,7 @@ dependencies = [
"serde_json",
"servers",
"session",
"similar-asserts",
"snafu 0.8.4",
"sql",
"sqlx",

View File

@@ -120,10 +120,11 @@ etcd-client = { version = "0.13" }
fst = "0.4.7"
futures = "0.3"
futures-util = "0.3"
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "c437b55725b7f5224fe9d46db21072b4a682ee4b" }
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "973f49cde88a582fb65755cc572ebcf6fb93ccf7" }
humantime = "2.1"
humantime-serde = "1.1"
itertools = "0.10"
jsonb = { git = "https://github.com/CookiePieWw/jsonb.git", rev = "d0166c130fce903bf6c58643417a3173a6172d31", default-features = false }
lazy_static = "1.4"
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "80eb97c24c88af4dd9a86f8bbaf50e741d4eb8cd" }
mockall = "0.11.4"
@@ -135,6 +136,7 @@ opentelemetry-proto = { version = "0.5", features = [
"gen-tonic",
"metrics",
"trace",
"with-serde",
] }
parquet = { version = "51.0.0", default-features = false, features = ["arrow", "async", "object_store"] }
paste = "1.0"
@@ -167,6 +169,7 @@ shadow-rs = "0.31"
smallvec = { version = "1", features = ["serde"] }
snafu = "0.8"
sysinfo = "0.30"
similar-asserts = "1.6.0"
# on branch v0.44.x
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "54a267ac89c09b11c0c88934690530807185d3e7", features = [
"visitor",

View File

@@ -221,7 +221,7 @@ config-docs: ## Generate configuration documentation from toml files.
docker run --rm \
-v ${PWD}:/greptimedb \
-w /greptimedb/config \
toml2docs/toml2docs:v0.1.1 \
toml2docs/toml2docs:v0.1.3 \
-p '##' \
-t ./config-docs-template.md \
-o ./config.md

View File

@@ -74,7 +74,7 @@ Our core developers have been building time-series data platforms for years. Bas
* **Compatible with InfluxDB, Prometheus and more protocols**
Widely adopted database protocols and APIs, including MySQL, PostgreSQL, and Prometheus Remote Storage, etc. [Read more](https://docs.greptime.com/user-guide/clients/overview).
Widely adopted database protocols and APIs, including MySQL, PostgreSQL, and Prometheus Remote Storage, etc. [Read more](https://docs.greptime.com/user-guide/protocols/overview).
## Try GreptimeDB

View File

@@ -14,7 +14,7 @@
| --- | -----| ------- | ----------- |
| `mode` | String | `standalone` | The running mode of the datanode. It can be `standalone` or `distributed`. |
| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. |
| `default_timezone` | String | `None` | The default timezone of the server. |
| `default_timezone` | String | Unset | The default timezone of the server. |
| `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.<br/>By default, it provides services after all regions have been initialized. |
| `init_regions_parallelism` | Integer | `16` | Parallelism of initializing regions. |
| `runtime` | -- | -- | The runtime options. |
@@ -29,8 +29,8 @@
| `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
| `grpc.tls` | -- | -- | gRPC server TLS options, see `mysql.tls` section. |
| `grpc.tls.mode` | String | `disable` | TLS mode. |
| `grpc.tls.cert_path` | String | `None` | Certificate file path. |
| `grpc.tls.key_path` | String | `None` | Private key file path. |
| `grpc.tls.cert_path` | String | Unset | Certificate file path. |
| `grpc.tls.key_path` | String | Unset | Private key file path. |
| `grpc.tls.watch` | Bool | `false` | Watch for Certificate and key file change and auto reload.<br/>For now, gRPC tls config does not support auto reload. |
| `mysql` | -- | -- | MySQL server options. |
| `mysql.enable` | Bool | `true` | Whether to enable. |
@@ -38,8 +38,8 @@
| `mysql.runtime_size` | Integer | `2` | The number of server worker threads. |
| `mysql.tls` | -- | -- | -- |
| `mysql.tls.mode` | String | `disable` | TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html<br/>- `disable` (default value)<br/>- `prefer`<br/>- `require`<br/>- `verify-ca`<br/>- `verify-full` |
| `mysql.tls.cert_path` | String | `None` | Certificate file path. |
| `mysql.tls.key_path` | String | `None` | Private key file path. |
| `mysql.tls.cert_path` | String | Unset | Certificate file path. |
| `mysql.tls.key_path` | String | Unset | Private key file path. |
| `mysql.tls.watch` | Bool | `false` | Watch for Certificate and key file change and auto reload |
| `postgres` | -- | -- | PostgresSQL server options. |
| `postgres.enable` | Bool | `true` | Whether to enable |
@@ -47,8 +47,8 @@
| `postgres.runtime_size` | Integer | `2` | The number of server worker threads. |
| `postgres.tls` | -- | -- | PostgresSQL server TLS options, see `mysql.tls` section. |
| `postgres.tls.mode` | String | `disable` | TLS mode. |
| `postgres.tls.cert_path` | String | `None` | Certificate file path. |
| `postgres.tls.key_path` | String | `None` | Private key file path. |
| `postgres.tls.cert_path` | String | Unset | Certificate file path. |
| `postgres.tls.key_path` | String | Unset | Private key file path. |
| `postgres.tls.watch` | Bool | `false` | Watch for Certificate and key file change and auto reload |
| `opentsdb` | -- | -- | OpenTSDB protocol options. |
| `opentsdb.enable` | Bool | `true` | Whether to enable OpenTSDB put in HTTP API. |
@@ -59,7 +59,7 @@
| `prom_store.with_metric_engine` | Bool | `true` | Whether to store the data from Prometheus remote write in metric engine. |
| `wal` | -- | -- | The WAL options. |
| `wal.provider` | String | `raft_engine` | The provider of the WAL.<br/>- `raft_engine`: the wal is stored in the local file system by raft-engine.<br/>- `kafka`: it's remote wal that data is stored in Kafka. |
| `wal.dir` | String | `None` | The directory to store the WAL files.<br/>**It's only used when the provider is `raft_engine`**. |
| `wal.dir` | String | Unset | The directory to store the WAL files.<br/>**It's only used when the provider is `raft_engine`**. |
| `wal.file_size` | String | `256MB` | The size of the WAL segment file.<br/>**It's only used when the provider is `raft_engine`**. |
| `wal.purge_threshold` | String | `4GB` | The threshold of the WAL size to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
| `wal.purge_interval` | String | `10m` | The interval to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
@@ -68,6 +68,7 @@
| `wal.enable_log_recycle` | Bool | `true` | Whether to reuse logically truncated log files.<br/>**It's only used when the provider is `raft_engine`**. |
| `wal.prefill_log_files` | Bool | `false` | Whether to pre-create log files on start up.<br/>**It's only used when the provider is `raft_engine`**. |
| `wal.sync_period` | String | `10s` | Duration for fsyncing log files.<br/>**It's only used when the provider is `raft_engine`**. |
| `wal.recovery_parallelism` | Integer | `2` | Parallelism during WAL recovery. |
| `wal.broker_endpoints` | Array | -- | The Kafka broker endpoints.<br/>**It's only used when the provider is `kafka`**. |
| `wal.auto_create_topics` | Bool | `true` | Automatically create topics for WAL.<br/>Set to `true` to automatically create topics for WAL.<br/>Otherwise, use topics named `topic_name_prefix_[0..num_topics)` |
| `wal.num_topics` | Integer | `64` | Number of topics.<br/>**It's only used when the provider is `kafka`**. |
@@ -90,22 +91,22 @@
| `storage` | -- | -- | The data storage options. |
| `storage.data_home` | String | `/tmp/greptimedb/` | The working home directory. |
| `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
| `storage.cache_path` | String | `None` | Cache configuration for object storage such as 'S3' etc.<br/>The local file cache directory. |
| `storage.cache_capacity` | String | `None` | The local file cache capacity in bytes. |
| `storage.bucket` | String | `None` | The S3 bucket name.<br/>**It's only used when the storage type is `S3`, `Oss` and `Gcs`**. |
| `storage.root` | String | `None` | The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`.<br/>**It's only used when the storage type is `S3`, `Oss` and `Azblob`**. |
| `storage.access_key_id` | String | `None` | The access key id of the aws account.<br/>It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.<br/>**It's only used when the storage type is `S3` and `Oss`**. |
| `storage.secret_access_key` | String | `None` | The secret access key of the aws account.<br/>It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.<br/>**It's only used when the storage type is `S3`**. |
| `storage.access_key_secret` | String | `None` | The secret access key of the aliyun account.<br/>**It's only used when the storage type is `Oss`**. |
| `storage.account_name` | String | `None` | The account key of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
| `storage.account_key` | String | `None` | The account key of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
| `storage.scope` | String | `None` | The scope of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
| `storage.credential_path` | String | `None` | The credential path of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
| `storage.credential` | String | `None` | The credential of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
| `storage.container` | String | `None` | The container of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
| `storage.sas_token` | String | `None` | The sas token of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
| `storage.endpoint` | String | `None` | The endpoint of the S3 service.<br/>**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. |
| `storage.region` | String | `None` | The region of the S3 service.<br/>**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. |
| `storage.cache_path` | String | Unset | Cache configuration for object storage such as 'S3' etc.<br/>The local file cache directory. |
| `storage.cache_capacity` | String | Unset | The local file cache capacity in bytes. |
| `storage.bucket` | String | Unset | The S3 bucket name.<br/>**It's only used when the storage type is `S3`, `Oss` and `Gcs`**. |
| `storage.root` | String | Unset | The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`.<br/>**It's only used when the storage type is `S3`, `Oss` and `Azblob`**. |
| `storage.access_key_id` | String | Unset | The access key id of the aws account.<br/>It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.<br/>**It's only used when the storage type is `S3` and `Oss`**. |
| `storage.secret_access_key` | String | Unset | The secret access key of the aws account.<br/>It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.<br/>**It's only used when the storage type is `S3`**. |
| `storage.access_key_secret` | String | Unset | The secret access key of the aliyun account.<br/>**It's only used when the storage type is `Oss`**. |
| `storage.account_name` | String | Unset | The account key of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
| `storage.account_key` | String | Unset | The account key of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
| `storage.scope` | String | Unset | The scope of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
| `storage.credential_path` | String | Unset | The credential path of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
| `storage.credential` | String | Unset | The credential of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
| `storage.container` | String | Unset | The container of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
| `storage.sas_token` | String | Unset | The sas token of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
| `storage.endpoint` | String | Unset | The endpoint of the S3 service.<br/>**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. |
| `storage.region` | String | Unset | The region of the S3 service.<br/>**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. |
| `[[region_engine]]` | -- | -- | The region engine options. You can configure multiple region engines. |
| `region_engine.mito` | -- | -- | The Mito engine options. |
| `region_engine.mito.num_workers` | Integer | `8` | Number of region workers. |
@@ -115,16 +116,16 @@
| `region_engine.mito.compress_manifest` | Bool | `false` | Whether to compress manifest and checkpoint file by gzip (default false). |
| `region_engine.mito.max_background_jobs` | Integer | `4` | Max number of running background jobs |
| `region_engine.mito.auto_flush_interval` | String | `1h` | Interval to auto flush a region if it has not flushed yet. |
| `region_engine.mito.global_write_buffer_size` | String | `1GB` | Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB. |
| `region_engine.mito.global_write_buffer_reject_size` | String | `2GB` | Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size` |
| `region_engine.mito.sst_meta_cache_size` | String | `128MB` | Cache size for SST metadata. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/32 of OS memory with a max limitation of 128MB. |
| `region_engine.mito.vector_cache_size` | String | `512MB` | Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
| `region_engine.mito.page_cache_size` | String | `512MB` | Cache size for pages of SST row groups. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/8 of OS memory. |
| `region_engine.mito.selector_result_cache_size` | String | `512MB` | Cache size for time series selector (e.g. `last_value()`). Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
| `region_engine.mito.global_write_buffer_size` | String | Auto | Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB. |
| `region_engine.mito.global_write_buffer_reject_size` | String | Auto | Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size`. |
| `region_engine.mito.sst_meta_cache_size` | String | Auto | Cache size for SST metadata. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/32 of OS memory with a max limitation of 128MB. |
| `region_engine.mito.vector_cache_size` | String | Auto | Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
| `region_engine.mito.page_cache_size` | String | Auto | Cache size for pages of SST row groups. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/8 of OS memory. |
| `region_engine.mito.selector_result_cache_size` | String | Auto | Cache size for time series selector (e.g. `last_value()`). Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
| `region_engine.mito.enable_experimental_write_cache` | Bool | `false` | Whether to enable the experimental write cache. |
| `region_engine.mito.experimental_write_cache_path` | String | `""` | File system path for write cache, defaults to `{data_home}/write_cache`. |
| `region_engine.mito.experimental_write_cache_size` | String | `512MB` | Capacity for write cache. |
| `region_engine.mito.experimental_write_cache_ttl` | String | `None` | TTL for write cache. |
| `region_engine.mito.experimental_write_cache_ttl` | String | Unset | TTL for write cache. |
| `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
| `region_engine.mito.scan_parallelism` | Integer | `0` | Parallelism to scan a region (default: 1/4 of cpu cores).<br/>- `0`: using the default value (1/4 of cpu cores).<br/>- `1`: scan in current thread.<br/>- `n`: scan in parallelism n. |
| `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
@@ -154,7 +155,7 @@
| `region_engine.file` | -- | -- | Enable the file engine. |
| `logging` | -- | -- | The logging options. |
| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
| `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. |
| `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
@@ -164,13 +165,13 @@
| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
| `export_metrics.self_import` | -- | -- | For `standalone` mode, `self_import` is recommend to collect metrics generated by itself<br/>You must create the database before enabling it. |
| `export_metrics.self_import.db` | String | `None` | -- |
| `export_metrics.self_import` | -- | -- | For `standalone` mode, `self_import` is recommended to collect metrics generated by itself<br/>You must create the database before enabling it. |
| `export_metrics.self_import.db` | String | Unset | -- |
| `export_metrics.remote_write` | -- | -- | -- |
| `export_metrics.remote_write.url` | String | `""` | The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=greptime_metrics`. |
| `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. |
| `tracing` | -- | -- | The tracing options. Only effect when compiled with `tokio-console` feature. |
| `tracing.tokio_console_addr` | String | `None` | The tokio console address. |
| `tracing.tokio_console_addr` | String | Unset | The tokio console address. |
## Distributed Mode
@@ -179,7 +180,7 @@
| Key | Type | Default | Descriptions |
| --- | -----| ------- | ----------- |
| `default_timezone` | String | `None` | The default timezone of the server. |
| `default_timezone` | String | Unset | The default timezone of the server. |
| `runtime` | -- | -- | The runtime options. |
| `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
| `runtime.compact_rt_size` | Integer | `4` | The number of threads to execute the runtime for global write operations. |
@@ -196,8 +197,8 @@
| `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
| `grpc.tls` | -- | -- | gRPC server TLS options, see `mysql.tls` section. |
| `grpc.tls.mode` | String | `disable` | TLS mode. |
| `grpc.tls.cert_path` | String | `None` | Certificate file path. |
| `grpc.tls.key_path` | String | `None` | Private key file path. |
| `grpc.tls.cert_path` | String | Unset | Certificate file path. |
| `grpc.tls.key_path` | String | Unset | Private key file path. |
| `grpc.tls.watch` | Bool | `false` | Watch for Certificate and key file change and auto reload.<br/>For now, gRPC tls config does not support auto reload. |
| `mysql` | -- | -- | MySQL server options. |
| `mysql.enable` | Bool | `true` | Whether to enable. |
@@ -205,8 +206,8 @@
| `mysql.runtime_size` | Integer | `2` | The number of server worker threads. |
| `mysql.tls` | -- | -- | -- |
| `mysql.tls.mode` | String | `disable` | TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html<br/>- `disable` (default value)<br/>- `prefer`<br/>- `require`<br/>- `verify-ca`<br/>- `verify-full` |
| `mysql.tls.cert_path` | String | `None` | Certificate file path. |
| `mysql.tls.key_path` | String | `None` | Private key file path. |
| `mysql.tls.cert_path` | String | Unset | Certificate file path. |
| `mysql.tls.key_path` | String | Unset | Private key file path. |
| `mysql.tls.watch` | Bool | `false` | Watch for Certificate and key file change and auto reload |
| `postgres` | -- | -- | PostgresSQL server options. |
| `postgres.enable` | Bool | `true` | Whether to enable |
@@ -214,8 +215,8 @@
| `postgres.runtime_size` | Integer | `2` | The number of server worker threads. |
| `postgres.tls` | -- | -- | PostgresSQL server TLS options, see `mysql.tls` section. |
| `postgres.tls.mode` | String | `disable` | TLS mode. |
| `postgres.tls.cert_path` | String | `None` | Certificate file path. |
| `postgres.tls.key_path` | String | `None` | Private key file path. |
| `postgres.tls.cert_path` | String | Unset | Certificate file path. |
| `postgres.tls.key_path` | String | Unset | Private key file path. |
| `postgres.tls.watch` | Bool | `false` | Watch for Certificate and key file change and auto reload |
| `opentsdb` | -- | -- | OpenTSDB protocol options. |
| `opentsdb.enable` | Bool | `true` | Whether to enable OpenTSDB put in HTTP API. |
@@ -240,7 +241,7 @@
| `datanode.client.tcp_nodelay` | Bool | `true` | -- |
| `logging` | -- | -- | The logging options. |
| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
| `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. |
| `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
@@ -251,12 +252,12 @@
| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
| `export_metrics.self_import` | -- | -- | For `standalone` mode, `self_import` is recommend to collect metrics generated by itself<br/>You must create the database before enabling it. |
| `export_metrics.self_import.db` | String | `None` | -- |
| `export_metrics.self_import.db` | String | Unset | -- |
| `export_metrics.remote_write` | -- | -- | -- |
| `export_metrics.remote_write.url` | String | `""` | The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=greptime_metrics`. |
| `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. |
| `tracing` | -- | -- | The tracing options. Only effect when compiled with `tokio-console` feature. |
| `tracing.tokio_console_addr` | String | `None` | The tokio console address. |
| `tracing.tokio_console_addr` | String | Unset | The tokio console address. |
### Metasrv
@@ -305,7 +306,7 @@
| `wal.backoff_deadline` | String | `5mins` | Stop reconnecting if the total wait time reaches the deadline. If this config is missing, the reconnecting won't terminate. |
| `logging` | -- | -- | The logging options. |
| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
| `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. |
| `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
@@ -316,12 +317,12 @@
| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
| `export_metrics.self_import` | -- | -- | For `standalone` mode, `self_import` is recommend to collect metrics generated by itself<br/>You must create the database before enabling it. |
| `export_metrics.self_import.db` | String | `None` | -- |
| `export_metrics.self_import.db` | String | Unset | -- |
| `export_metrics.remote_write` | -- | -- | -- |
| `export_metrics.remote_write.url` | String | `""` | The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=greptime_metrics`. |
| `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. |
| `tracing` | -- | -- | The tracing options. Only effect when compiled with `tokio-console` feature. |
| `tracing.tokio_console_addr` | String | `None` | The tokio console address. |
| `tracing.tokio_console_addr` | String | Unset | The tokio console address. |
### Datanode
@@ -329,16 +330,16 @@
| Key | Type | Default | Descriptions |
| --- | -----| ------- | ----------- |
| `mode` | String | `standalone` | The running mode of the datanode. It can be `standalone` or `distributed`. |
| `node_id` | Integer | `None` | The datanode identifier and should be unique in the cluster. |
| `node_id` | Integer | Unset | The datanode identifier and should be unique in the cluster. |
| `require_lease_before_startup` | Bool | `false` | Start services after regions have obtained leases.<br/>It will block the datanode start if it can't receive leases in the heartbeat from metasrv. |
| `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.<br/>By default, it provides services after all regions have been initialized. |
| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. |
| `init_regions_parallelism` | Integer | `16` | Parallelism of initializing regions. |
| `rpc_addr` | String | `None` | Deprecated, use `grpc.addr` instead. |
| `rpc_hostname` | String | `None` | Deprecated, use `grpc.hostname` instead. |
| `rpc_runtime_size` | Integer | `None` | Deprecated, use `grpc.runtime_size` instead. |
| `rpc_max_recv_message_size` | String | `None` | Deprecated, use `grpc.rpc_max_recv_message_size` instead. |
| `rpc_max_send_message_size` | String | `None` | Deprecated, use `grpc.rpc_max_send_message_size` instead. |
| `rpc_addr` | String | Unset | Deprecated, use `grpc.addr` instead. |
| `rpc_hostname` | String | Unset | Deprecated, use `grpc.hostname` instead. |
| `rpc_runtime_size` | Integer | Unset | Deprecated, use `grpc.runtime_size` instead. |
| `rpc_max_recv_message_size` | String | Unset | Deprecated, use `grpc.rpc_max_recv_message_size` instead. |
| `rpc_max_send_message_size` | String | Unset | Deprecated, use `grpc.rpc_max_send_message_size` instead. |
| `http` | -- | -- | The HTTP server options. |
| `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
| `http.timeout` | String | `30s` | HTTP request timeout. Set to 0 to disable timeout. |
@@ -351,8 +352,8 @@
| `grpc.max_send_message_size` | String | `512MB` | The maximum send message size for gRPC server. |
| `grpc.tls` | -- | -- | gRPC server TLS options, see `mysql.tls` section. |
| `grpc.tls.mode` | String | `disable` | TLS mode. |
| `grpc.tls.cert_path` | String | `None` | Certificate file path. |
| `grpc.tls.key_path` | String | `None` | Private key file path. |
| `grpc.tls.cert_path` | String | Unset | Certificate file path. |
| `grpc.tls.key_path` | String | Unset | Private key file path. |
| `grpc.tls.watch` | Bool | `false` | Watch for Certificate and key file change and auto reload.<br/>For now, gRPC tls config does not support auto reload. |
| `runtime` | -- | -- | The runtime options. |
| `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
@@ -372,7 +373,7 @@
| `meta_client.metadata_cache_tti` | String | `5m` | -- |
| `wal` | -- | -- | The WAL options. |
| `wal.provider` | String | `raft_engine` | The provider of the WAL.<br/>- `raft_engine`: the wal is stored in the local file system by raft-engine.<br/>- `kafka`: it's remote wal that data is stored in Kafka. |
| `wal.dir` | String | `None` | The directory to store the WAL files.<br/>**It's only used when the provider is `raft_engine`**. |
| `wal.dir` | String | Unset | The directory to store the WAL files.<br/>**It's only used when the provider is `raft_engine`**. |
| `wal.file_size` | String | `256MB` | The size of the WAL segment file.<br/>**It's only used when the provider is `raft_engine`**. |
| `wal.purge_threshold` | String | `4GB` | The threshold of the WAL size to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
| `wal.purge_interval` | String | `10m` | The interval to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
@@ -381,6 +382,7 @@
| `wal.enable_log_recycle` | Bool | `true` | Whether to reuse logically truncated log files.<br/>**It's only used when the provider is `raft_engine`**. |
| `wal.prefill_log_files` | Bool | `false` | Whether to pre-create log files on start up.<br/>**It's only used when the provider is `raft_engine`**. |
| `wal.sync_period` | String | `10s` | Duration for fsyncing log files.<br/>**It's only used when the provider is `raft_engine`**. |
| `wal.recovery_parallelism` | Integer | `2` | Parallelism during WAL recovery. |
| `wal.broker_endpoints` | Array | -- | The Kafka broker endpoints.<br/>**It's only used when the provider is `kafka`**. |
| `wal.max_batch_bytes` | String | `1MB` | The max size of a single producer batch.<br/>Warning: Kafka has a default limit of 1MB per message in a topic.<br/>**It's only used when the provider is `kafka`**. |
| `wal.consumer_wait_timeout` | String | `100ms` | The consumer wait timeout.<br/>**It's only used when the provider is `kafka`**. |
@@ -393,22 +395,22 @@
| `storage` | -- | -- | The data storage options. |
| `storage.data_home` | String | `/tmp/greptimedb/` | The working home directory. |
| `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
| `storage.cache_path` | String | `None` | Cache configuration for object storage such as 'S3' etc.<br/>The local file cache directory. |
| `storage.cache_capacity` | String | `None` | The local file cache capacity in bytes. |
| `storage.bucket` | String | `None` | The S3 bucket name.<br/>**It's only used when the storage type is `S3`, `Oss` and `Gcs`**. |
| `storage.root` | String | `None` | The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`.<br/>**It's only used when the storage type is `S3`, `Oss` and `Azblob`**. |
| `storage.access_key_id` | String | `None` | The access key id of the aws account.<br/>It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.<br/>**It's only used when the storage type is `S3` and `Oss`**. |
| `storage.secret_access_key` | String | `None` | The secret access key of the aws account.<br/>It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.<br/>**It's only used when the storage type is `S3`**. |
| `storage.access_key_secret` | String | `None` | The secret access key of the aliyun account.<br/>**It's only used when the storage type is `Oss`**. |
| `storage.account_name` | String | `None` | The account key of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
| `storage.account_key` | String | `None` | The account key of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
| `storage.scope` | String | `None` | The scope of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
| `storage.credential_path` | String | `None` | The credential path of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
| `storage.credential` | String | `None` | The credential of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
| `storage.container` | String | `None` | The container of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
| `storage.sas_token` | String | `None` | The sas token of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
| `storage.endpoint` | String | `None` | The endpoint of the S3 service.<br/>**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. |
| `storage.region` | String | `None` | The region of the S3 service.<br/>**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. |
| `storage.cache_path` | String | Unset | Cache configuration for object storage such as 'S3' etc.<br/>The local file cache directory. |
| `storage.cache_capacity` | String | Unset | The local file cache capacity in bytes. |
| `storage.bucket` | String | Unset | The S3 bucket name.<br/>**It's only used when the storage type is `S3`, `Oss` and `Gcs`**. |
| `storage.root` | String | Unset | The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`.<br/>**It's only used when the storage type is `S3`, `Oss` and `Azblob`**. |
| `storage.access_key_id` | String | Unset | The access key id of the aws account.<br/>It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.<br/>**It's only used when the storage type is `S3` and `Oss`**. |
| `storage.secret_access_key` | String | Unset | The secret access key of the aws account.<br/>It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.<br/>**It's only used when the storage type is `S3`**. |
| `storage.access_key_secret` | String | Unset | The secret access key of the aliyun account.<br/>**It's only used when the storage type is `Oss`**. |
| `storage.account_name` | String | Unset | The account key of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
| `storage.account_key` | String | Unset | The account key of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
| `storage.scope` | String | Unset | The scope of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
| `storage.credential_path` | String | Unset | The credential path of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
| `storage.credential` | String | Unset | The credential of the google cloud storage.<br/>**It's only used when the storage type is `Gcs`**. |
| `storage.container` | String | Unset | The container of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
| `storage.sas_token` | String | Unset | The sas token of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
| `storage.endpoint` | String | Unset | The endpoint of the S3 service.<br/>**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. |
| `storage.region` | String | Unset | The region of the S3 service.<br/>**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. |
| `[[region_engine]]` | -- | -- | The region engine options. You can configure multiple region engines. |
| `region_engine.mito` | -- | -- | The Mito engine options. |
| `region_engine.mito.num_workers` | Integer | `8` | Number of region workers. |
@@ -418,16 +420,16 @@
| `region_engine.mito.compress_manifest` | Bool | `false` | Whether to compress manifest and checkpoint file by gzip (default false). |
| `region_engine.mito.max_background_jobs` | Integer | `4` | Max number of running background jobs |
| `region_engine.mito.auto_flush_interval` | String | `1h` | Interval to auto flush a region if it has not flushed yet. |
| `region_engine.mito.global_write_buffer_size` | String | `1GB` | Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB. |
| `region_engine.mito.global_write_buffer_reject_size` | String | `2GB` | Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size` |
| `region_engine.mito.sst_meta_cache_size` | String | `128MB` | Cache size for SST metadata. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/32 of OS memory with a max limitation of 128MB. |
| `region_engine.mito.vector_cache_size` | String | `512MB` | Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
| `region_engine.mito.page_cache_size` | String | `512MB` | Cache size for pages of SST row groups. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/8 of OS memory. |
| `region_engine.mito.selector_result_cache_size` | String | `512MB` | Cache size for time series selector (e.g. `last_value()`). Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
| `region_engine.mito.global_write_buffer_size` | String | Auto | Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB. |
| `region_engine.mito.global_write_buffer_reject_size` | String | Auto | Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size` |
| `region_engine.mito.sst_meta_cache_size` | String | Auto | Cache size for SST metadata. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/32 of OS memory with a max limitation of 128MB. |
| `region_engine.mito.vector_cache_size` | String | Auto | Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
| `region_engine.mito.page_cache_size` | String | Auto | Cache size for pages of SST row groups. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/8 of OS memory. |
| `region_engine.mito.selector_result_cache_size` | String | Auto | Cache size for time series selector (e.g. `last_value()`). Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
| `region_engine.mito.enable_experimental_write_cache` | Bool | `false` | Whether to enable the experimental write cache. |
| `region_engine.mito.experimental_write_cache_path` | String | `""` | File system path for write cache, defaults to `{data_home}/write_cache`. |
| `region_engine.mito.experimental_write_cache_size` | String | `512MB` | Capacity for write cache. |
| `region_engine.mito.experimental_write_cache_ttl` | String | `None` | TTL for write cache. |
| `region_engine.mito.experimental_write_cache_ttl` | String | Unset | TTL for write cache. |
| `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
| `region_engine.mito.scan_parallelism` | Integer | `0` | Parallelism to scan a region (default: 1/4 of cpu cores).<br/>- `0`: using the default value (1/4 of cpu cores).<br/>- `1`: scan in current thread.<br/>- `n`: scan in parallelism n. |
| `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
@@ -455,7 +457,7 @@
| `region_engine.file` | -- | -- | Enable the file engine. |
| `logging` | -- | -- | The logging options. |
| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
| `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. |
| `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
@@ -466,12 +468,12 @@
| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
| `export_metrics.self_import` | -- | -- | For `standalone` mode, `self_import` is recommend to collect metrics generated by itself<br/>You must create the database before enabling it. |
| `export_metrics.self_import.db` | String | `None` | -- |
| `export_metrics.self_import.db` | String | Unset | -- |
| `export_metrics.remote_write` | -- | -- | -- |
| `export_metrics.remote_write.url` | String | `""` | The url the metrics send to. The url example can be: `http://127.0.0.1:4000/v1/prometheus/write?db=greptime_metrics`. |
| `export_metrics.remote_write.headers` | InlineTable | -- | HTTP headers of Prometheus remote-write carry. |
| `tracing` | -- | -- | The tracing options. Only effect when compiled with `tokio-console` feature. |
| `tracing.tokio_console_addr` | String | `None` | The tokio console address. |
| `tracing.tokio_console_addr` | String | Unset | The tokio console address. |
### Flownode
@@ -479,7 +481,7 @@
| Key | Type | Default | Descriptions |
| --- | -----| ------- | ----------- |
| `mode` | String | `distributed` | The running mode of the flownode. It can be `standalone` or `distributed`. |
| `node_id` | Integer | `None` | The flownode identifier and should be unique in the cluster. |
| `node_id` | Integer | Unset | The flownode identifier and should be unique in the cluster. |
| `grpc` | -- | -- | The gRPC server options. |
| `grpc.addr` | String | `127.0.0.1:6800` | The address to bind the gRPC server. |
| `grpc.hostname` | String | `127.0.0.1` | The hostname advertised to the metasrv,<br/>and used for connections from outside the host |
@@ -501,7 +503,7 @@
| `heartbeat.retry_interval` | String | `3s` | Interval for retrying to send heartbeat messages to the metasrv. |
| `logging` | -- | -- | The logging options. |
| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
| `logging.level` | String | `None` | The log level. Can be `info`/`debug`/`warn`/`error`. |
| `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
@@ -509,4 +511,4 @@
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
| `tracing` | -- | -- | The tracing options. Only effect when compiled with `tokio-console` feature. |
| `tracing.tokio_console_addr` | String | `None` | The tokio console address. |
| `tracing.tokio_console_addr` | String | Unset | The tokio console address. |

View File

@@ -2,7 +2,7 @@
mode = "standalone"
## The datanode identifier and should be unique in the cluster.
## +toml2docs:none-default
## @toml2docs:none-default
node_id = 42
## Start services after regions have obtained leases.
@@ -20,23 +20,23 @@ enable_telemetry = true
init_regions_parallelism = 16
## Deprecated, use `grpc.addr` instead.
## +toml2docs:none-default
## @toml2docs:none-default
rpc_addr = "127.0.0.1:3001"
## Deprecated, use `grpc.hostname` instead.
## +toml2docs:none-default
## @toml2docs:none-default
rpc_hostname = "127.0.0.1"
## Deprecated, use `grpc.runtime_size` instead.
## +toml2docs:none-default
## @toml2docs:none-default
rpc_runtime_size = 8
## Deprecated, use `grpc.rpc_max_recv_message_size` instead.
## +toml2docs:none-default
## @toml2docs:none-default
rpc_max_recv_message_size = "512MB"
## Deprecated, use `grpc.rpc_max_send_message_size` instead.
## +toml2docs:none-default
## @toml2docs:none-default
rpc_max_send_message_size = "512MB"
@@ -71,11 +71,11 @@ max_send_message_size = "512MB"
mode = "disable"
## Certificate file path.
## +toml2docs:none-default
## @toml2docs:none-default
cert_path = ""
## Private key file path.
## +toml2docs:none-default
## @toml2docs:none-default
key_path = ""
## Watch for Certificate and key file change and auto reload.
@@ -83,11 +83,11 @@ key_path = ""
watch = false
## The runtime options.
[runtime]
#+ [runtime]
## The number of threads to execute the runtime for global read operations.
global_rt_size = 8
#+ global_rt_size = 8
## The number of threads to execute the runtime for global write operations.
compact_rt_size = 4
#+ compact_rt_size = 4
## The heartbeat options.
[heartbeat]
@@ -135,7 +135,7 @@ provider = "raft_engine"
## The directory to store the WAL files.
## **It's only used when the provider is `raft_engine`**.
## +toml2docs:none-default
## @toml2docs:none-default
dir = "/tmp/greptimedb/wal"
## The size of the WAL segment file.
@@ -170,6 +170,9 @@ prefill_log_files = false
## **It's only used when the provider is `raft_engine`**.
sync_period = "10s"
## Parallelism during WAL recovery.
recovery_parallelism = 2
## The Kafka broker endpoints.
## **It's only used when the provider is `kafka`**.
broker_endpoints = ["127.0.0.1:9092"]
@@ -279,83 +282,83 @@ type = "File"
## Cache configuration for object storage such as 'S3' etc.
## The local file cache directory.
## +toml2docs:none-default
## @toml2docs:none-default
cache_path = "/path/local_cache"
## The local file cache capacity in bytes.
## +toml2docs:none-default
## @toml2docs:none-default
cache_capacity = "256MB"
## The S3 bucket name.
## **It's only used when the storage type is `S3`, `Oss` and `Gcs`**.
## +toml2docs:none-default
## @toml2docs:none-default
bucket = "greptimedb"
## The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`.
## **It's only used when the storage type is `S3`, `Oss` and `Azblob`**.
## +toml2docs:none-default
## @toml2docs:none-default
root = "greptimedb"
## The access key id of the aws account.
## It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.
## **It's only used when the storage type is `S3` and `Oss`**.
## +toml2docs:none-default
## @toml2docs:none-default
access_key_id = "test"
## The secret access key of the aws account.
## It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.
## **It's only used when the storage type is `S3`**.
## +toml2docs:none-default
## @toml2docs:none-default
secret_access_key = "test"
## The secret access key of the aliyun account.
## **It's only used when the storage type is `Oss`**.
## +toml2docs:none-default
## @toml2docs:none-default
access_key_secret = "test"
## The account key of the azure account.
## **It's only used when the storage type is `Azblob`**.
## +toml2docs:none-default
## @toml2docs:none-default
account_name = "test"
## The account key of the azure account.
## **It's only used when the storage type is `Azblob`**.
## +toml2docs:none-default
## @toml2docs:none-default
account_key = "test"
## The scope of the google cloud storage.
## **It's only used when the storage type is `Gcs`**.
## +toml2docs:none-default
## @toml2docs:none-default
scope = "test"
## The credential path of the google cloud storage.
## **It's only used when the storage type is `Gcs`**.
## +toml2docs:none-default
## @toml2docs:none-default
credential_path = "test"
## The credential of the google cloud storage.
## **It's only used when the storage type is `Gcs`**.
## +toml2docs:none-default
## @toml2docs:none-default
credential = "base64-credential"
## The container of the azure account.
## **It's only used when the storage type is `Azblob`**.
## +toml2docs:none-default
## @toml2docs:none-default
container = "greptimedb"
## The sas token of the azure account.
## **It's only used when the storage type is `Azblob`**.
## +toml2docs:none-default
## @toml2docs:none-default
sas_token = ""
## The endpoint of the S3 service.
## **It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**.
## +toml2docs:none-default
## @toml2docs:none-default
endpoint = "https://s3.amazonaws.com"
## The region of the S3 service.
## **It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**.
## +toml2docs:none-default
## @toml2docs:none-default
region = "us-west-2"
# Custom storage options
@@ -385,7 +388,7 @@ region = "us-west-2"
[region_engine.mito]
## Number of region workers.
num_workers = 8
#+ num_workers = 8
## Request channel size of each worker.
worker_channel_size = 128
@@ -406,26 +409,32 @@ max_background_jobs = 4
auto_flush_interval = "1h"
## Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB.
global_write_buffer_size = "1GB"
## @toml2docs:none-default="Auto"
#+ global_write_buffer_size = "1GB"
## Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size`
global_write_buffer_reject_size = "2GB"
## @toml2docs:none-default="Auto"
#+ global_write_buffer_reject_size = "2GB"
## Cache size for SST metadata. Setting it to 0 to disable the cache.
## If not set, it's default to 1/32 of OS memory with a max limitation of 128MB.
sst_meta_cache_size = "128MB"
## @toml2docs:none-default="Auto"
#+ sst_meta_cache_size = "128MB"
## Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.
## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
vector_cache_size = "512MB"
## @toml2docs:none-default="Auto"
#+ vector_cache_size = "512MB"
## Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
## If not set, it's default to 1/8 of OS memory.
page_cache_size = "512MB"
## @toml2docs:none-default="Auto"
#+ page_cache_size = "512MB"
## Cache size for time series selector (e.g. `last_value()`). Setting it to 0 to disable the cache.
## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
selector_result_cache_size = "512MB"
## @toml2docs:none-default="Auto"
#+ selector_result_cache_size = "512MB"
## Whether to enable the experimental write cache.
enable_experimental_write_cache = false
@@ -437,7 +446,7 @@ experimental_write_cache_path = ""
experimental_write_cache_size = "512MB"
## TTL for write cache.
## +toml2docs:none-default
## @toml2docs:none-default
experimental_write_cache_ttl = "8h"
## Buffer size for SST writing.
@@ -553,7 +562,7 @@ fork_dictionary_bytes = "1GiB"
dir = "/tmp/greptimedb/logs"
## The log level. Can be `info`/`debug`/`warn`/`error`.
## +toml2docs:none-default
## @toml2docs:none-default
level = "info"
## Enable OTLP tracing.
@@ -587,7 +596,7 @@ write_interval = "30s"
## For `standalone` mode, `self_import` is recommend to collect metrics generated by itself
## You must create the database before enabling it.
[export_metrics.self_import]
## +toml2docs:none-default
## @toml2docs:none-default
db = "greptime_metrics"
[export_metrics.remote_write]
@@ -600,5 +609,5 @@ headers = { }
## The tracing options. Only effect when compiled with `tokio-console` feature.
[tracing]
## The tokio console address.
## +toml2docs:none-default
## @toml2docs:none-default
tokio_console_addr = "127.0.0.1"

View File

@@ -2,7 +2,7 @@
mode = "distributed"
## The flownode identifier and should be unique in the cluster.
## +toml2docs:none-default
## @toml2docs:none-default
node_id = 14
## The gRPC server options.
@@ -63,7 +63,7 @@ retry_interval = "3s"
dir = "/tmp/greptimedb/logs"
## The log level. Can be `info`/`debug`/`warn`/`error`.
## +toml2docs:none-default
## @toml2docs:none-default
level = "info"
## Enable OTLP tracing.
@@ -87,6 +87,6 @@ default_ratio = 1.0
## The tracing options. Only effect when compiled with `tokio-console` feature.
[tracing]
## The tokio console address.
## +toml2docs:none-default
## @toml2docs:none-default
tokio_console_addr = "127.0.0.1"

View File

@@ -1,13 +1,13 @@
## The default timezone of the server.
## +toml2docs:none-default
## @toml2docs:none-default
default_timezone = "UTC"
## The runtime options.
[runtime]
#+ [runtime]
## The number of threads to execute the runtime for global read operations.
global_rt_size = 8
#+ global_rt_size = 8
## The number of threads to execute the runtime for global write operations.
compact_rt_size = 4
#+ compact_rt_size = 4
## The heartbeat options.
[heartbeat]
@@ -44,11 +44,11 @@ runtime_size = 8
mode = "disable"
## Certificate file path.
## +toml2docs:none-default
## @toml2docs:none-default
cert_path = ""
## Private key file path.
## +toml2docs:none-default
## @toml2docs:none-default
key_path = ""
## Watch for Certificate and key file change and auto reload.
@@ -76,11 +76,11 @@ runtime_size = 2
mode = "disable"
## Certificate file path.
## +toml2docs:none-default
## @toml2docs:none-default
cert_path = ""
## Private key file path.
## +toml2docs:none-default
## @toml2docs:none-default
key_path = ""
## Watch for Certificate and key file change and auto reload
@@ -101,11 +101,11 @@ runtime_size = 2
mode = "disable"
## Certificate file path.
## +toml2docs:none-default
## @toml2docs:none-default
cert_path = ""
## Private key file path.
## +toml2docs:none-default
## @toml2docs:none-default
key_path = ""
## Watch for Certificate and key file change and auto reload
@@ -170,7 +170,7 @@ tcp_nodelay = true
dir = "/tmp/greptimedb/logs"
## The log level. Can be `info`/`debug`/`warn`/`error`.
## +toml2docs:none-default
## @toml2docs:none-default
level = "info"
## Enable OTLP tracing.
@@ -204,7 +204,7 @@ write_interval = "30s"
## For `standalone` mode, `self_import` is recommend to collect metrics generated by itself
## You must create the database before enabling it.
[export_metrics.self_import]
## +toml2docs:none-default
## @toml2docs:none-default
db = "greptime_metrics"
[export_metrics.remote_write]
@@ -217,5 +217,5 @@ headers = { }
## The tracing options. Only effect when compiled with `tokio-console` feature.
[tracing]
## The tokio console address.
## +toml2docs:none-default
## @toml2docs:none-default
tokio_console_addr = "127.0.0.1"

View File

@@ -36,11 +36,11 @@ enable_region_failover = false
backend = "EtcdStore"
## The runtime options.
[runtime]
#+ [runtime]
## The number of threads to execute the runtime for global read operations.
global_rt_size = 8
#+ global_rt_size = 8
## The number of threads to execute the runtime for global write operations.
compact_rt_size = 4
#+ compact_rt_size = 4
## Procedure storage options.
[procedure]
@@ -157,7 +157,7 @@ backoff_deadline = "5mins"
dir = "/tmp/greptimedb/logs"
## The log level. Can be `info`/`debug`/`warn`/`error`.
## +toml2docs:none-default
## @toml2docs:none-default
level = "info"
## Enable OTLP tracing.
@@ -191,7 +191,7 @@ write_interval = "30s"
## For `standalone` mode, `self_import` is recommend to collect metrics generated by itself
## You must create the database before enabling it.
[export_metrics.self_import]
## +toml2docs:none-default
## @toml2docs:none-default
db = "greptime_metrics"
[export_metrics.remote_write]
@@ -204,5 +204,5 @@ headers = { }
## The tracing options. Only effect when compiled with `tokio-console` feature.
[tracing]
## The tokio console address.
## +toml2docs:none-default
## @toml2docs:none-default
tokio_console_addr = "127.0.0.1"

View File

@@ -5,7 +5,7 @@ mode = "standalone"
enable_telemetry = true
## The default timezone of the server.
## +toml2docs:none-default
## @toml2docs:none-default
default_timezone = "UTC"
## Initialize all regions in the background during the startup.
@@ -16,11 +16,11 @@ init_regions_in_background = false
init_regions_parallelism = 16
## The runtime options.
[runtime]
#+ [runtime]
## The number of threads to execute the runtime for global read operations.
global_rt_size = 8
#+ global_rt_size = 8
## The number of threads to execute the runtime for global write operations.
compact_rt_size = 4
#+ compact_rt_size = 4
## The HTTP server options.
[http]
@@ -46,11 +46,11 @@ runtime_size = 8
mode = "disable"
## Certificate file path.
## +toml2docs:none-default
## @toml2docs:none-default
cert_path = ""
## Private key file path.
## +toml2docs:none-default
## @toml2docs:none-default
key_path = ""
## Watch for Certificate and key file change and auto reload.
@@ -78,11 +78,11 @@ runtime_size = 2
mode = "disable"
## Certificate file path.
## +toml2docs:none-default
## @toml2docs:none-default
cert_path = ""
## Private key file path.
## +toml2docs:none-default
## @toml2docs:none-default
key_path = ""
## Watch for Certificate and key file change and auto reload
@@ -103,11 +103,11 @@ runtime_size = 2
mode = "disable"
## Certificate file path.
## +toml2docs:none-default
## @toml2docs:none-default
cert_path = ""
## Private key file path.
## +toml2docs:none-default
## @toml2docs:none-default
key_path = ""
## Watch for Certificate and key file change and auto reload
@@ -139,7 +139,7 @@ provider = "raft_engine"
## The directory to store the WAL files.
## **It's only used when the provider is `raft_engine`**.
## +toml2docs:none-default
## @toml2docs:none-default
dir = "/tmp/greptimedb/wal"
## The size of the WAL segment file.
@@ -174,6 +174,9 @@ prefill_log_files = false
## **It's only used when the provider is `raft_engine`**.
sync_period = "10s"
## Parallelism during WAL recovery.
recovery_parallelism = 2
## The Kafka broker endpoints.
## **It's only used when the provider is `kafka`**.
broker_endpoints = ["127.0.0.1:9092"]
@@ -317,83 +320,83 @@ type = "File"
## Cache configuration for object storage such as 'S3' etc.
## The local file cache directory.
## +toml2docs:none-default
## @toml2docs:none-default
cache_path = "/path/local_cache"
## The local file cache capacity in bytes.
## +toml2docs:none-default
## @toml2docs:none-default
cache_capacity = "256MB"
## The S3 bucket name.
## **It's only used when the storage type is `S3`, `Oss` and `Gcs`**.
## +toml2docs:none-default
## @toml2docs:none-default
bucket = "greptimedb"
## The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`.
## **It's only used when the storage type is `S3`, `Oss` and `Azblob`**.
## +toml2docs:none-default
## @toml2docs:none-default
root = "greptimedb"
## The access key id of the aws account.
## It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.
## **It's only used when the storage type is `S3` and `Oss`**.
## +toml2docs:none-default
## @toml2docs:none-default
access_key_id = "test"
## The secret access key of the aws account.
## It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.
## **It's only used when the storage type is `S3`**.
## +toml2docs:none-default
## @toml2docs:none-default
secret_access_key = "test"
## The secret access key of the aliyun account.
## **It's only used when the storage type is `Oss`**.
## +toml2docs:none-default
## @toml2docs:none-default
access_key_secret = "test"
## The account key of the azure account.
## **It's only used when the storage type is `Azblob`**.
## +toml2docs:none-default
## @toml2docs:none-default
account_name = "test"
## The account key of the azure account.
## **It's only used when the storage type is `Azblob`**.
## +toml2docs:none-default
## @toml2docs:none-default
account_key = "test"
## The scope of the google cloud storage.
## **It's only used when the storage type is `Gcs`**.
## +toml2docs:none-default
## @toml2docs:none-default
scope = "test"
## The credential path of the google cloud storage.
## **It's only used when the storage type is `Gcs`**.
## +toml2docs:none-default
## @toml2docs:none-default
credential_path = "test"
## The credential of the google cloud storage.
## **It's only used when the storage type is `Gcs`**.
## +toml2docs:none-default
## @toml2docs:none-default
credential = "base64-credential"
## The container of the azure account.
## **It's only used when the storage type is `Azblob`**.
## +toml2docs:none-default
## @toml2docs:none-default
container = "greptimedb"
## The sas token of the azure account.
## **It's only used when the storage type is `Azblob`**.
## +toml2docs:none-default
## @toml2docs:none-default
sas_token = ""
## The endpoint of the S3 service.
## **It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**.
## +toml2docs:none-default
## @toml2docs:none-default
endpoint = "https://s3.amazonaws.com"
## The region of the S3 service.
## **It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**.
## +toml2docs:none-default
## @toml2docs:none-default
region = "us-west-2"
# Custom storage options
@@ -423,7 +426,7 @@ region = "us-west-2"
[region_engine.mito]
## Number of region workers.
num_workers = 8
#+ num_workers = 8
## Request channel size of each worker.
worker_channel_size = 128
@@ -444,26 +447,32 @@ max_background_jobs = 4
auto_flush_interval = "1h"
## Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB.
global_write_buffer_size = "1GB"
## @toml2docs:none-default="Auto"
#+ global_write_buffer_size = "1GB"
## Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size`
global_write_buffer_reject_size = "2GB"
## Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size`.
## @toml2docs:none-default="Auto"
#+ global_write_buffer_reject_size = "2GB"
## Cache size for SST metadata. Setting it to 0 to disable the cache.
## If not set, it's default to 1/32 of OS memory with a max limitation of 128MB.
sst_meta_cache_size = "128MB"
## @toml2docs:none-default="Auto"
#+ sst_meta_cache_size = "128MB"
## Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.
## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
vector_cache_size = "512MB"
## @toml2docs:none-default="Auto"
#+ vector_cache_size = "512MB"
## Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
## If not set, it's default to 1/8 of OS memory.
page_cache_size = "512MB"
## @toml2docs:none-default="Auto"
#+ page_cache_size = "512MB"
## Cache size for time series selector (e.g. `last_value()`). Setting it to 0 to disable the cache.
## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
selector_result_cache_size = "512MB"
## @toml2docs:none-default="Auto"
#+ selector_result_cache_size = "512MB"
## Whether to enable the experimental write cache.
enable_experimental_write_cache = false
@@ -475,7 +484,7 @@ experimental_write_cache_path = ""
experimental_write_cache_size = "512MB"
## TTL for write cache.
## +toml2docs:none-default
## @toml2docs:none-default
experimental_write_cache_ttl = "8h"
## Buffer size for SST writing.
@@ -597,7 +606,7 @@ fork_dictionary_bytes = "1GiB"
dir = "/tmp/greptimedb/logs"
## The log level. Can be `info`/`debug`/`warn`/`error`.
## +toml2docs:none-default
## @toml2docs:none-default
level = "info"
## Enable OTLP tracing.
@@ -628,10 +637,10 @@ enable = false
## The interval of export metrics.
write_interval = "30s"
## For `standalone` mode, `self_import` is recommend to collect metrics generated by itself
## For `standalone` mode, `self_import` is recommended to collect metrics generated by itself
## You must create the database before enabling it.
[export_metrics.self_import]
## +toml2docs:none-default
## @toml2docs:none-default
db = "greptime_metrics"
[export_metrics.remote_write]
@@ -644,5 +653,5 @@ headers = { }
## The tracing options. Only effect when compiled with `tokio-console` feature.
[tracing]
## The tokio console address.
## +toml2docs:none-default
## @toml2docs:none-default
tokio_console_addr = "127.0.0.1"

View File

@@ -42,7 +42,8 @@ use greptime_proto::v1::greptime_request::Request;
use greptime_proto::v1::query_request::Query;
use greptime_proto::v1::value::ValueData;
use greptime_proto::v1::{
ColumnDataTypeExtension, DdlRequest, DecimalTypeExtension, QueryRequest, Row, SemanticType,
ColumnDataTypeExtension, DdlRequest, DecimalTypeExtension, JsonTypeExtension, QueryRequest,
Row, SemanticType,
};
use paste::paste;
use snafu::prelude::*;
@@ -103,7 +104,17 @@ impl From<ColumnDataTypeWrapper> for ConcreteDataType {
ColumnDataType::Uint64 => ConcreteDataType::uint64_datatype(),
ColumnDataType::Float32 => ConcreteDataType::float32_datatype(),
ColumnDataType::Float64 => ConcreteDataType::float64_datatype(),
ColumnDataType::Binary => ConcreteDataType::binary_datatype(),
ColumnDataType::Binary => {
if let Some(TypeExt::JsonType(_)) = datatype_wrapper
.datatype_ext
.as_ref()
.and_then(|datatype_ext| datatype_ext.type_ext.as_ref())
{
ConcreteDataType::json_datatype()
} else {
ConcreteDataType::binary_datatype()
}
}
ColumnDataType::String => ConcreteDataType::string_datatype(),
ColumnDataType::Date => ConcreteDataType::date_datatype(),
ColumnDataType::Datetime => ConcreteDataType::datetime_datatype(),
@@ -236,7 +247,7 @@ impl TryFrom<ConcreteDataType> for ColumnDataTypeWrapper {
ConcreteDataType::UInt64(_) => ColumnDataType::Uint64,
ConcreteDataType::Float32(_) => ColumnDataType::Float32,
ConcreteDataType::Float64(_) => ColumnDataType::Float64,
ConcreteDataType::Binary(_) => ColumnDataType::Binary,
ConcreteDataType::Binary(_) | ConcreteDataType::Json(_) => ColumnDataType::Binary,
ConcreteDataType::String(_) => ColumnDataType::String,
ConcreteDataType::Date(_) => ColumnDataType::Date,
ConcreteDataType::DateTime(_) => ColumnDataType::Datetime,
@@ -276,6 +287,16 @@ impl TryFrom<ConcreteDataType> for ColumnDataTypeWrapper {
})),
})
}
ColumnDataType::Binary => {
if datatype == ConcreteDataType::json_datatype() {
// Json is the same as binary in proto. The extension marks the binary in proto is actually a json.
Some(ColumnDataTypeExtension {
type_ext: Some(TypeExt::JsonType(JsonTypeExtension::JsonBinary.into())),
})
} else {
None
}
}
_ => None,
};
Ok(Self {
@@ -649,7 +670,8 @@ pub fn pb_values_to_vector_ref(data_type: &ConcreteDataType, values: Values) ->
ConcreteDataType::Null(_)
| ConcreteDataType::List(_)
| ConcreteDataType::Dictionary(_)
| ConcreteDataType::Duration(_) => {
| ConcreteDataType::Duration(_)
| ConcreteDataType::Json(_) => {
unreachable!()
}
}
@@ -813,7 +835,8 @@ pub fn pb_values_to_values(data_type: &ConcreteDataType, values: Values) -> Vec<
ConcreteDataType::Null(_)
| ConcreteDataType::List(_)
| ConcreteDataType::Dictionary(_)
| ConcreteDataType::Duration(_) => {
| ConcreteDataType::Duration(_)
| ConcreteDataType::Json(_) => {
unreachable!()
}
}
@@ -831,7 +854,13 @@ pub fn is_column_type_value_eq(
expect_type: &ConcreteDataType,
) -> bool {
ColumnDataTypeWrapper::try_new(type_value, type_extension)
.map(|wrapper| ConcreteDataType::from(wrapper) == *expect_type)
.map(|wrapper| {
let datatype = ConcreteDataType::from(wrapper);
(datatype == *expect_type)
// Json type leverage binary type in pb, so this is valid.
|| (datatype == ConcreteDataType::binary_datatype()
&& *expect_type == ConcreteDataType::json_datatype())
})
.unwrap_or(false)
}

View File

@@ -21,14 +21,14 @@ use greptime_proto::v1::region::RegionResponse as RegionResponseV1;
#[derive(Debug)]
pub struct RegionResponse {
pub affected_rows: AffectedRows,
pub extension: HashMap<String, Vec<u8>>,
pub extensions: HashMap<String, Vec<u8>>,
}
impl RegionResponse {
pub fn from_region_response(region_response: RegionResponseV1) -> Self {
Self {
affected_rows: region_response.affected_rows as _,
extension: region_response.extension,
extensions: region_response.extensions,
}
}
@@ -36,7 +36,7 @@ impl RegionResponse {
pub fn new(affected_rows: AffectedRows) -> Self {
Self {
affected_rows,
extension: Default::default(),
extensions: Default::default(),
}
}
}

View File

@@ -36,6 +36,7 @@ use futures_util::{StreamExt, TryStreamExt};
use meta_client::client::MetaClient;
use moka::sync::Cache;
use partition::manager::{PartitionRuleManager, PartitionRuleManagerRef};
use session::context::{Channel, QueryContext};
use snafu::prelude::*;
use table::dist_table::DistTable;
use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME};
@@ -152,7 +153,11 @@ impl CatalogManager for KvBackendCatalogManager {
Ok(keys)
}
async fn schema_names(&self, catalog: &str) -> Result<Vec<String>> {
async fn schema_names(
&self,
catalog: &str,
query_ctx: Option<&QueryContext>,
) -> Result<Vec<String>> {
let stream = self
.table_metadata_manager
.schema_manager()
@@ -163,12 +168,17 @@ impl CatalogManager for KvBackendCatalogManager {
.map_err(BoxedError::new)
.context(ListSchemasSnafu { catalog })?;
keys.extend(self.system_catalog.schema_names());
keys.extend(self.system_catalog.schema_names(query_ctx));
Ok(keys.into_iter().collect())
}
async fn table_names(&self, catalog: &str, schema: &str) -> Result<Vec<String>> {
async fn table_names(
&self,
catalog: &str,
schema: &str,
query_ctx: Option<&QueryContext>,
) -> Result<Vec<String>> {
let stream = self
.table_metadata_manager
.table_name_manager()
@@ -181,7 +191,7 @@ impl CatalogManager for KvBackendCatalogManager {
.into_iter()
.map(|(k, _)| k)
.collect::<Vec<_>>();
tables.extend_from_slice(&self.system_catalog.table_names(schema));
tables.extend_from_slice(&self.system_catalog.table_names(schema, query_ctx));
Ok(tables.into_iter().collect())
}
@@ -194,8 +204,13 @@ impl CatalogManager for KvBackendCatalogManager {
.context(TableMetadataManagerSnafu)
}
async fn schema_exists(&self, catalog: &str, schema: &str) -> Result<bool> {
if self.system_catalog.schema_exists(schema) {
async fn schema_exists(
&self,
catalog: &str,
schema: &str,
query_ctx: Option<&QueryContext>,
) -> Result<bool> {
if self.system_catalog.schema_exists(schema, query_ctx) {
return Ok(true);
}
@@ -206,8 +221,14 @@ impl CatalogManager for KvBackendCatalogManager {
.context(TableMetadataManagerSnafu)
}
async fn table_exists(&self, catalog: &str, schema: &str, table: &str) -> Result<bool> {
if self.system_catalog.table_exists(schema, table) {
async fn table_exists(
&self,
catalog: &str,
schema: &str,
table: &str,
query_ctx: Option<&QueryContext>,
) -> Result<bool> {
if self.system_catalog.table_exists(schema, table, query_ctx) {
return Ok(true);
}
@@ -225,10 +246,12 @@ impl CatalogManager for KvBackendCatalogManager {
catalog_name: &str,
schema_name: &str,
table_name: &str,
query_ctx: Option<&QueryContext>,
) -> Result<Option<TableRef>> {
if let Some(table) = self
.system_catalog
.table(catalog_name, schema_name, table_name)
let channel = query_ctx.map_or(Channel::Unknown, |ctx| ctx.channel());
if let Some(table) =
self.system_catalog
.table(catalog_name, schema_name, table_name, query_ctx)
{
return Ok(Some(table));
}
@@ -236,23 +259,45 @@ impl CatalogManager for KvBackendCatalogManager {
let table_cache: TableCacheRef = self.cache_registry.get().context(CacheNotFoundSnafu {
name: "table_cache",
})?;
table_cache
if let Some(table) = table_cache
.get_by_ref(&TableName {
catalog_name: catalog_name.to_string(),
schema_name: schema_name.to_string(),
table_name: table_name.to_string(),
})
.await
.context(GetTableCacheSnafu)
.context(GetTableCacheSnafu)?
{
return Ok(Some(table));
}
if channel == Channel::Postgres {
// falldown to pg_catalog
if let Some(table) =
self.system_catalog
.table(catalog_name, PG_CATALOG_NAME, table_name, query_ctx)
{
return Ok(Some(table));
}
}
return Ok(None);
}
fn tables<'a>(&'a self, catalog: &'a str, schema: &'a str) -> BoxStream<'a, Result<TableRef>> {
fn tables<'a>(
&'a self,
catalog: &'a str,
schema: &'a str,
query_ctx: Option<&'a QueryContext>,
) -> BoxStream<'a, Result<TableRef>> {
let sys_tables = try_stream!({
// System tables
let sys_table_names = self.system_catalog.table_names(schema);
let sys_table_names = self.system_catalog.table_names(schema, query_ctx);
for table_name in sys_table_names {
if let Some(table) = self.system_catalog.table(catalog, schema, &table_name) {
if let Some(table) =
self.system_catalog
.table(catalog, schema, &table_name, query_ctx)
{
yield table;
}
}
@@ -320,18 +365,27 @@ struct SystemCatalog {
}
impl SystemCatalog {
// TODO(j0hn50n133): remove the duplicated hard-coded table names logic
fn schema_names(&self) -> Vec<String> {
vec![
INFORMATION_SCHEMA_NAME.to_string(),
PG_CATALOG_NAME.to_string(),
]
fn schema_names(&self, query_ctx: Option<&QueryContext>) -> Vec<String> {
let channel = query_ctx.map_or(Channel::Unknown, |ctx| ctx.channel());
match channel {
// pg_catalog only visible under postgres protocol
Channel::Postgres => vec![
INFORMATION_SCHEMA_NAME.to_string(),
PG_CATALOG_NAME.to_string(),
],
_ => {
vec![INFORMATION_SCHEMA_NAME.to_string()]
}
}
}
fn table_names(&self, schema: &str) -> Vec<String> {
fn table_names(&self, schema: &str, query_ctx: Option<&QueryContext>) -> Vec<String> {
let channel = query_ctx.map_or(Channel::Unknown, |ctx| ctx.channel());
match schema {
INFORMATION_SCHEMA_NAME => self.information_schema_provider.table_names(),
PG_CATALOG_NAME => self.pg_catalog_provider.table_names(),
PG_CATALOG_NAME if channel == Channel::Postgres => {
self.pg_catalog_provider.table_names()
}
DEFAULT_SCHEMA_NAME => {
vec![NUMBERS_TABLE_NAME.to_string()]
}
@@ -339,23 +393,35 @@ impl SystemCatalog {
}
}
fn schema_exists(&self, schema: &str) -> bool {
schema == INFORMATION_SCHEMA_NAME || schema == PG_CATALOG_NAME
fn schema_exists(&self, schema: &str, query_ctx: Option<&QueryContext>) -> bool {
let channel = query_ctx.map_or(Channel::Unknown, |ctx| ctx.channel());
match channel {
Channel::Postgres => schema == PG_CATALOG_NAME || schema == INFORMATION_SCHEMA_NAME,
_ => schema == INFORMATION_SCHEMA_NAME,
}
}
fn table_exists(&self, schema: &str, table: &str) -> bool {
fn table_exists(&self, schema: &str, table: &str, query_ctx: Option<&QueryContext>) -> bool {
let channel = query_ctx.map_or(Channel::Unknown, |ctx| ctx.channel());
if schema == INFORMATION_SCHEMA_NAME {
self.information_schema_provider.table(table).is_some()
} else if schema == DEFAULT_SCHEMA_NAME {
table == NUMBERS_TABLE_NAME
} else if schema == PG_CATALOG_NAME {
} else if schema == PG_CATALOG_NAME && channel == Channel::Postgres {
self.pg_catalog_provider.table(table).is_some()
} else {
false
}
}
fn table(&self, catalog: &str, schema: &str, table_name: &str) -> Option<TableRef> {
fn table(
&self,
catalog: &str,
schema: &str,
table_name: &str,
query_ctx: Option<&QueryContext>,
) -> Option<TableRef> {
let channel = query_ctx.map_or(Channel::Unknown, |ctx| ctx.channel());
if schema == INFORMATION_SCHEMA_NAME {
let information_schema_provider =
self.catalog_cache.get_with_by_ref(catalog, move || {
@@ -366,7 +432,7 @@ impl SystemCatalog {
))
});
information_schema_provider.table(table_name)
} else if schema == PG_CATALOG_NAME {
} else if schema == PG_CATALOG_NAME && channel == Channel::Postgres {
if catalog == DEFAULT_CATALOG_NAME {
self.pg_catalog_provider.table(table_name)
} else {

View File

@@ -20,8 +20,10 @@ use std::fmt::{Debug, Formatter};
use std::sync::Arc;
use api::v1::CreateTableExpr;
use common_catalog::consts::{INFORMATION_SCHEMA_NAME, PG_CATALOG_NAME};
use futures::future::BoxFuture;
use futures_util::stream::BoxStream;
use session::context::QueryContext;
use table::metadata::TableId;
use table::TableRef;
@@ -44,15 +46,35 @@ pub trait CatalogManager: Send + Sync {
async fn catalog_names(&self) -> Result<Vec<String>>;
async fn schema_names(&self, catalog: &str) -> Result<Vec<String>>;
async fn schema_names(
&self,
catalog: &str,
query_ctx: Option<&QueryContext>,
) -> Result<Vec<String>>;
async fn table_names(&self, catalog: &str, schema: &str) -> Result<Vec<String>>;
async fn table_names(
&self,
catalog: &str,
schema: &str,
query_ctx: Option<&QueryContext>,
) -> Result<Vec<String>>;
async fn catalog_exists(&self, catalog: &str) -> Result<bool>;
async fn schema_exists(&self, catalog: &str, schema: &str) -> Result<bool>;
async fn schema_exists(
&self,
catalog: &str,
schema: &str,
query_ctx: Option<&QueryContext>,
) -> Result<bool>;
async fn table_exists(&self, catalog: &str, schema: &str, table: &str) -> Result<bool>;
async fn table_exists(
&self,
catalog: &str,
schema: &str,
table: &str,
query_ctx: Option<&QueryContext>,
) -> Result<bool>;
/// Returns the table by catalog, schema and table name.
async fn table(
@@ -60,10 +82,25 @@ pub trait CatalogManager: Send + Sync {
catalog: &str,
schema: &str,
table_name: &str,
query_ctx: Option<&QueryContext>,
) -> Result<Option<TableRef>>;
/// Returns all tables with a stream by catalog and schema.
fn tables<'a>(&'a self, catalog: &'a str, schema: &'a str) -> BoxStream<'a, Result<TableRef>>;
fn tables<'a>(
&'a self,
catalog: &'a str,
schema: &'a str,
query_ctx: Option<&'a QueryContext>,
) -> BoxStream<'a, Result<TableRef>>;
/// Check if `schema` is a reserved schema name
fn is_reserved_schema_name(&self, schema: &str) -> bool {
// We have to check whether a schema name is reserved before create schema.
// We need this rather than use schema_exists directly because `pg_catalog` is
// only visible via postgres protocol. So if we don't check, a mysql client may
// create a schema named `pg_catalog` which is somehow malformed.
schema == INFORMATION_SCHEMA_NAME || schema == PG_CATALOG_NAME
}
}
pub type CatalogManagerRef = Arc<dyn CatalogManager>;

View File

@@ -26,6 +26,7 @@ use common_catalog::consts::{
use common_meta::key::flow::FlowMetadataManager;
use common_meta::kv_backend::memory::MemoryKvBackend;
use futures_util::stream::BoxStream;
use session::context::QueryContext;
use snafu::OptionExt;
use table::TableRef;
@@ -53,7 +54,11 @@ impl CatalogManager for MemoryCatalogManager {
Ok(self.catalogs.read().unwrap().keys().cloned().collect())
}
async fn schema_names(&self, catalog: &str) -> Result<Vec<String>> {
async fn schema_names(
&self,
catalog: &str,
_query_ctx: Option<&QueryContext>,
) -> Result<Vec<String>> {
Ok(self
.catalogs
.read()
@@ -67,7 +72,12 @@ impl CatalogManager for MemoryCatalogManager {
.collect())
}
async fn table_names(&self, catalog: &str, schema: &str) -> Result<Vec<String>> {
async fn table_names(
&self,
catalog: &str,
schema: &str,
_query_ctx: Option<&QueryContext>,
) -> Result<Vec<String>> {
Ok(self
.catalogs
.read()
@@ -87,11 +97,22 @@ impl CatalogManager for MemoryCatalogManager {
self.catalog_exist_sync(catalog)
}
async fn schema_exists(&self, catalog: &str, schema: &str) -> Result<bool> {
async fn schema_exists(
&self,
catalog: &str,
schema: &str,
_query_ctx: Option<&QueryContext>,
) -> Result<bool> {
self.schema_exist_sync(catalog, schema)
}
async fn table_exists(&self, catalog: &str, schema: &str, table: &str) -> Result<bool> {
async fn table_exists(
&self,
catalog: &str,
schema: &str,
table: &str,
_query_ctx: Option<&QueryContext>,
) -> Result<bool> {
let catalogs = self.catalogs.read().unwrap();
Ok(catalogs
.get(catalog)
@@ -108,6 +129,7 @@ impl CatalogManager for MemoryCatalogManager {
catalog: &str,
schema: &str,
table_name: &str,
_query_ctx: Option<&QueryContext>,
) -> Result<Option<TableRef>> {
let result = try {
self.catalogs
@@ -121,7 +143,12 @@ impl CatalogManager for MemoryCatalogManager {
Ok(result)
}
fn tables<'a>(&'a self, catalog: &'a str, schema: &'a str) -> BoxStream<'a, Result<TableRef>> {
fn tables<'a>(
&'a self,
catalog: &'a str,
schema: &'a str,
_query_ctx: Option<&QueryContext>,
) -> BoxStream<'a, Result<TableRef>> {
let catalogs = self.catalogs.read().unwrap();
let Some(schemas) = catalogs.get(catalog) else {
@@ -371,11 +398,12 @@ mod tests {
DEFAULT_CATALOG_NAME,
DEFAULT_SCHEMA_NAME,
NUMBERS_TABLE_NAME,
None,
)
.await
.unwrap()
.unwrap();
let stream = catalog_list.tables(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME);
let stream = catalog_list.tables(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, None);
let tables = stream.try_collect::<Vec<_>>().await.unwrap();
assert_eq!(tables.len(), 1);
assert_eq!(
@@ -384,7 +412,12 @@ mod tests {
);
assert!(catalog_list
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "not_exists")
.table(
DEFAULT_CATALOG_NAME,
DEFAULT_SCHEMA_NAME,
"not_exists",
None
)
.await
.unwrap()
.is_none());
@@ -411,7 +444,7 @@ mod tests {
};
catalog.register_table_sync(register_table_req).unwrap();
assert!(catalog
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name)
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name, None)
.await
.unwrap()
.is_some());
@@ -423,7 +456,7 @@ mod tests {
};
catalog.deregister_table_sync(deregister_table_req).unwrap();
assert!(catalog
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name)
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name, None)
.await
.unwrap()
.is_none());

View File

@@ -257,8 +257,8 @@ impl InformationSchemaColumnsBuilder {
.context(UpgradeWeakCatalogManagerRefSnafu)?;
let predicates = Predicates::from_scan_request(&request);
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
let mut stream = catalog_manager.tables(&catalog_name, &schema_name);
for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
let mut stream = catalog_manager.tables(&catalog_name, &schema_name, None);
while let Some(table) = stream.try_next().await? {
let keys = &table.table_info().meta.primary_key_indices;

View File

@@ -212,8 +212,8 @@ impl InformationSchemaKeyColumnUsageBuilder {
.context(UpgradeWeakCatalogManagerRefSnafu)?;
let predicates = Predicates::from_scan_request(&request);
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
let mut stream = catalog_manager.tables(&catalog_name, &schema_name);
for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
let mut stream = catalog_manager.tables(&catalog_name, &schema_name, None);
while let Some(table) = stream.try_next().await? {
let mut primary_constraints = vec![];

View File

@@ -240,9 +240,9 @@ impl InformationSchemaPartitionsBuilder {
let predicates = Predicates::from_scan_request(&request);
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
let table_info_stream = catalog_manager
.tables(&catalog_name, &schema_name)
.tables(&catalog_name, &schema_name, None)
.try_filter_map(|t| async move {
let table_info = t.table_info();
if table_info.table_type == TableType::Temporary {

View File

@@ -176,9 +176,9 @@ impl InformationSchemaRegionPeersBuilder {
let predicates = Predicates::from_scan_request(&request);
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
let table_id_stream = catalog_manager
.tables(&catalog_name, &schema_name)
.tables(&catalog_name, &schema_name, None)
.try_filter_map(|t| async move {
let table_info = t.table_info();
if table_info.table_type == TableType::Temporary {

View File

@@ -171,7 +171,7 @@ impl InformationSchemaSchemataBuilder {
let table_metadata_manager = utils::table_meta_manager(&self.catalog_manager)?;
let predicates = Predicates::from_scan_request(&request);
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
let opts = if let Some(table_metadata_manager) = &table_metadata_manager {
table_metadata_manager
.schema_manager()

View File

@@ -176,8 +176,8 @@ impl InformationSchemaTableConstraintsBuilder {
.context(UpgradeWeakCatalogManagerRefSnafu)?;
let predicates = Predicates::from_scan_request(&request);
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
let mut stream = catalog_manager.tables(&catalog_name, &schema_name);
for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
let mut stream = catalog_manager.tables(&catalog_name, &schema_name, None);
while let Some(table) = stream.try_next().await? {
let keys = &table.table_info().meta.primary_key_indices;

View File

@@ -234,8 +234,8 @@ impl InformationSchemaTablesBuilder {
.context(UpgradeWeakCatalogManagerRefSnafu)?;
let predicates = Predicates::from_scan_request(&request);
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
let mut stream = catalog_manager.tables(&catalog_name, &schema_name);
for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
let mut stream = catalog_manager.tables(&catalog_name, &schema_name, None);
while let Some(table) = stream.try_next().await? {
let table_info = table.table_info();

View File

@@ -192,8 +192,8 @@ impl InformationSchemaViewsBuilder {
.context(CastManagerSnafu)?
.view_info_cache()?;
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
let mut stream = catalog_manager.tables(&catalog_name, &schema_name);
for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
let mut stream = catalog_manager.tables(&catalog_name, &schema_name, None);
while let Some(table) = stream.try_next().await? {
let table_info = table.table_info();

View File

@@ -18,15 +18,16 @@ mod pg_namespace;
mod table_names;
use std::collections::HashMap;
use std::sync::{Arc, Weak};
use std::sync::{Arc, LazyLock, Weak};
use common_catalog::consts::{self, PG_CATALOG_NAME};
use common_catalog::consts::{self, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, PG_CATALOG_NAME};
use datatypes::schema::ColumnSchema;
use lazy_static::lazy_static;
use paste::paste;
use pg_catalog_memory_table::get_schema_columns;
use pg_class::PGClass;
use pg_namespace::PGNamespace;
use session::context::{Channel, QueryContext};
use table::TableRef;
pub use table_names::*;
@@ -142,3 +143,12 @@ impl SystemSchemaProviderInner for PGCatalogProvider {
&self.catalog_name
}
}
/// Provide query context to call the [`CatalogManager`]'s method.
static PG_QUERY_CTX: LazyLock<QueryContext> = LazyLock::new(|| {
QueryContext::with_channel(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, Channel::Postgres)
});
fn query_ctx() -> Option<&'static QueryContext> {
Some(&PG_QUERY_CTX)
}

View File

@@ -32,7 +32,7 @@ use store_api::storage::ScanRequest;
use table::metadata::TableType;
use super::pg_namespace::oid_map::PGNamespaceOidMapRef;
use super::{OID_COLUMN_NAME, PG_CLASS};
use super::{query_ctx, OID_COLUMN_NAME, PG_CLASS};
use crate::error::{
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
};
@@ -202,8 +202,11 @@ impl PGClassBuilder {
.upgrade()
.context(UpgradeWeakCatalogManagerRefSnafu)?;
let predicates = Predicates::from_scan_request(&request);
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
let mut stream = catalog_manager.tables(&catalog_name, &schema_name);
for schema_name in catalog_manager
.schema_names(&catalog_name, query_ctx())
.await?
{
let mut stream = catalog_manager.tables(&catalog_name, &schema_name, query_ctx());
while let Some(table) = stream.try_next().await? {
let table_info = table.table_info();
self.add_class(

View File

@@ -31,7 +31,7 @@ use datatypes::vectors::{StringVectorBuilder, UInt32VectorBuilder, VectorRef};
use snafu::{OptionExt, ResultExt};
use store_api::storage::ScanRequest;
use super::{PGNamespaceOidMapRef, OID_COLUMN_NAME, PG_NAMESPACE};
use super::{query_ctx, PGNamespaceOidMapRef, OID_COLUMN_NAME, PG_NAMESPACE};
use crate::error::{
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
};
@@ -180,7 +180,10 @@ impl PGNamespaceBuilder {
.upgrade()
.context(UpgradeWeakCatalogManagerRefSnafu)?;
let predicates = Predicates::from_scan_request(&request);
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
for schema_name in catalog_manager
.schema_names(&catalog_name, query_ctx())
.await?
{
self.add_namespace(&predicates, &schema_name);
}
self.finish()

View File

@@ -23,7 +23,7 @@ use datafusion::datasource::view::ViewTable;
use datafusion::datasource::{provider_as_source, TableProvider};
use datafusion::logical_expr::TableSource;
use itertools::Itertools;
use session::context::QueryContext;
use session::context::QueryContextRef;
use snafu::{ensure, OptionExt, ResultExt};
use table::metadata::TableType;
use table::table::adapter::DfTableProviderAdapter;
@@ -45,6 +45,7 @@ pub struct DfTableSourceProvider {
disallow_cross_catalog_query: bool,
default_catalog: String,
default_schema: String,
query_ctx: QueryContextRef,
plan_decoder: SubstraitPlanDecoderRef,
enable_ident_normalization: bool,
}
@@ -53,7 +54,7 @@ impl DfTableSourceProvider {
pub fn new(
catalog_manager: CatalogManagerRef,
disallow_cross_catalog_query: bool,
query_ctx: &QueryContext,
query_ctx: QueryContextRef,
plan_decoder: SubstraitPlanDecoderRef,
enable_ident_normalization: bool,
) -> Self {
@@ -63,6 +64,7 @@ impl DfTableSourceProvider {
resolved_tables: HashMap::new(),
default_catalog: query_ctx.current_catalog().to_owned(),
default_schema: query_ctx.current_schema(),
query_ctx,
plan_decoder,
enable_ident_normalization,
}
@@ -71,8 +73,7 @@ impl DfTableSourceProvider {
pub fn resolve_table_ref(&self, table_ref: TableReference) -> Result<ResolvedTableReference> {
if self.disallow_cross_catalog_query {
match &table_ref {
TableReference::Bare { .. } => (),
TableReference::Partial { .. } => {}
TableReference::Bare { .. } | TableReference::Partial { .. } => {}
TableReference::Full {
catalog, schema, ..
} => {
@@ -107,7 +108,7 @@ impl DfTableSourceProvider {
let table = self
.catalog_manager
.table(catalog_name, schema_name, table_name)
.table(catalog_name, schema_name, table_name, Some(&self.query_ctx))
.await?
.with_context(|| TableNotExistSnafu {
table: format_full_table_name(catalog_name, schema_name, table_name),
@@ -210,12 +211,12 @@ mod tests {
#[test]
fn test_validate_table_ref() {
let query_ctx = &QueryContext::with("greptime", "public");
let query_ctx = Arc::new(QueryContext::with("greptime", "public"));
let table_provider = DfTableSourceProvider::new(
MemoryCatalogManager::with_default_setup(),
true,
query_ctx,
query_ctx.clone(),
DummyDecoder::arc(),
true,
);
@@ -308,7 +309,7 @@ mod tests {
#[tokio::test]
async fn test_resolve_view() {
let query_ctx = &QueryContext::with("greptime", "public");
let query_ctx = Arc::new(QueryContext::with("greptime", "public"));
let backend = Arc::new(MemoryKvBackend::default());
let layered_cache_builder = LayeredCacheRegistryBuilder::default()
.add_cache_registry(CacheRegistryBuilder::default().build());
@@ -344,8 +345,13 @@ mod tests {
.await
.unwrap();
let mut table_provider =
DfTableSourceProvider::new(catalog_manager, true, query_ctx, MockDecoder::arc(), true);
let mut table_provider = DfTableSourceProvider::new(
catalog_manager,
true,
query_ctx.clone(),
MockDecoder::arc(),
true,
);
// View not found
let table_ref = TableReference::bare("not_exists_view");

View File

@@ -112,7 +112,7 @@ impl SchemaProvider for DummySchemaProvider {
async fn table(&self, name: &str) -> datafusion::error::Result<Option<Arc<dyn TableProvider>>> {
let table = self
.catalog_manager
.table(&self.catalog_name, &self.schema_name, name)
.table(&self.catalog_name, &self.schema_name, name, None)
.await?
.with_context(|| TableNotExistSnafu {
table: format_full_table_name(&self.catalog_name, &self.schema_name, name),

View File

@@ -70,6 +70,7 @@ serde.workspace = true
serde_json.workspace = true
servers.workspace = true
session.workspace = true
similar-asserts.workspace = true
snafu.workspace = true
store-api.workspace = true
substrait.workspace = true

View File

@@ -16,12 +16,10 @@ use std::time::Duration;
use cmd::options::GreptimeOptions;
use cmd::standalone::StandaloneOptions;
use common_base::readable_size::ReadableSize;
use common_config::Configurable;
use common_grpc::channel_manager::{
DEFAULT_MAX_GRPC_RECV_MESSAGE_SIZE, DEFAULT_MAX_GRPC_SEND_MESSAGE_SIZE,
};
use common_runtime::global::RuntimeOptions;
use common_telemetry::logging::{LoggingOptions, DEFAULT_OTLP_ENDPOINT};
use common_wal::config::raft_engine::RaftEngineConfig;
use common_wal::config::DatanodeWalConfig;
@@ -45,10 +43,6 @@ fn test_load_datanode_example_config() {
.unwrap();
let expected = GreptimeOptions::<DatanodeOptions> {
runtime: RuntimeOptions {
global_rt_size: 8,
compact_rt_size: 4,
},
component: DatanodeOptions {
node_id: Some(42),
meta_client: Some(MetaClientOptions {
@@ -65,6 +59,7 @@ fn test_load_datanode_example_config() {
wal: DatanodeWalConfig::RaftEngine(RaftEngineConfig {
dir: Some("/tmp/greptimedb/wal".to_string()),
sync_period: Some(Duration::from_secs(10)),
recovery_parallelism: 2,
..Default::default()
}),
storage: StorageConfig {
@@ -73,16 +68,8 @@ fn test_load_datanode_example_config() {
},
region_engine: vec![
RegionEngineConfig::Mito(MitoConfig {
num_workers: 8,
auto_flush_interval: Duration::from_secs(3600),
scan_parallelism: 0,
global_write_buffer_size: ReadableSize::gb(1),
global_write_buffer_reject_size: ReadableSize::gb(2),
sst_meta_cache_size: ReadableSize::mb(128),
vector_cache_size: ReadableSize::mb(512),
page_cache_size: ReadableSize::mb(512),
selector_result_cache_size: ReadableSize::mb(512),
max_background_jobs: 4,
experimental_write_cache_ttl: Some(Duration::from_secs(60 * 60 * 8)),
..Default::default()
}),
@@ -107,9 +94,10 @@ fn test_load_datanode_example_config() {
rpc_max_send_message_size: Some(DEFAULT_MAX_GRPC_SEND_MESSAGE_SIZE),
..Default::default()
},
..Default::default()
};
assert_eq!(options, expected);
similar_asserts::assert_eq!(options, expected);
}
#[test]
@@ -119,10 +107,6 @@ fn test_load_frontend_example_config() {
GreptimeOptions::<FrontendOptions>::load_layered_options(example_config.to_str(), "")
.unwrap();
let expected = GreptimeOptions::<FrontendOptions> {
runtime: RuntimeOptions {
global_rt_size: 8,
compact_rt_size: 4,
},
component: FrontendOptions {
default_timezone: Some("UTC".to_string()),
meta_client: Some(MetaClientOptions {
@@ -155,8 +139,9 @@ fn test_load_frontend_example_config() {
},
..Default::default()
},
..Default::default()
};
assert_eq!(options, expected);
similar_asserts::assert_eq!(options, expected);
}
#[test]
@@ -166,10 +151,6 @@ fn test_load_metasrv_example_config() {
GreptimeOptions::<MetasrvOptions>::load_layered_options(example_config.to_str(), "")
.unwrap();
let expected = GreptimeOptions::<MetasrvOptions> {
runtime: RuntimeOptions {
global_rt_size: 8,
compact_rt_size: 4,
},
component: MetasrvOptions {
selector: SelectorType::default(),
data_home: "/tmp/metasrv/".to_string(),
@@ -187,8 +168,9 @@ fn test_load_metasrv_example_config() {
},
..Default::default()
},
..Default::default()
};
assert_eq!(options, expected);
similar_asserts::assert_eq!(options, expected);
}
#[test]
@@ -198,30 +180,19 @@ fn test_load_standalone_example_config() {
GreptimeOptions::<StandaloneOptions>::load_layered_options(example_config.to_str(), "")
.unwrap();
let expected = GreptimeOptions::<StandaloneOptions> {
runtime: RuntimeOptions {
global_rt_size: 8,
compact_rt_size: 4,
},
component: StandaloneOptions {
default_timezone: Some("UTC".to_string()),
wal: DatanodeWalConfig::RaftEngine(RaftEngineConfig {
dir: Some("/tmp/greptimedb/wal".to_string()),
sync_period: Some(Duration::from_secs(10)),
recovery_parallelism: 2,
..Default::default()
}),
region_engine: vec![
RegionEngineConfig::Mito(MitoConfig {
num_workers: 8,
auto_flush_interval: Duration::from_secs(3600),
scan_parallelism: 0,
global_write_buffer_size: ReadableSize::gb(1),
global_write_buffer_reject_size: ReadableSize::gb(2),
sst_meta_cache_size: ReadableSize::mb(128),
vector_cache_size: ReadableSize::mb(512),
page_cache_size: ReadableSize::mb(512),
selector_result_cache_size: ReadableSize::mb(512),
max_background_jobs: 4,
experimental_write_cache_ttl: Some(Duration::from_secs(60 * 60 * 8)),
scan_parallelism: 0,
..Default::default()
}),
RegionEngineConfig::File(EngineConfig {}),
@@ -243,6 +214,7 @@ fn test_load_standalone_example_config() {
},
..Default::default()
},
..Default::default()
};
assert_eq!(options, expected);
similar_asserts::assert_eq!(options, expected);
}

View File

@@ -9,10 +9,12 @@ workspace = true
[dependencies]
anymap = "1.0.0-beta.2"
async-trait.workspace = true
bitvec = "1.0"
bytes.workspace = true
common-error.workspace = true
common-macro.workspace = true
futures.workspace = true
paste = "1.0"
serde = { version = "1.0", features = ["derive"] }
snafu.workspace = true

View File

@@ -1,242 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::any::Any;
use std::io::{Read, Write};
use bytes::{Buf, BufMut, BytesMut};
use common_error::ext::ErrorExt;
use common_macro::stack_trace_debug;
use paste::paste;
use snafu::{ensure, Location, ResultExt, Snafu};
#[derive(Snafu)]
#[snafu(visibility(pub))]
#[stack_trace_debug]
pub enum Error {
#[snafu(display(
"Destination buffer overflow, src_len: {}, dst_len: {}",
src_len,
dst_len
))]
Overflow {
src_len: usize,
dst_len: usize,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Buffer underflow"))]
Underflow {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("IO operation reach EOF"))]
Eof {
#[snafu(source)]
error: std::io::Error,
#[snafu(implicit)]
location: Location,
},
}
pub type Result<T> = std::result::Result<T, Error>;
impl ErrorExt for Error {
fn as_any(&self) -> &dyn Any {
self
}
}
macro_rules! impl_read_le {
( $($num_ty: ty), *) => {
$(
paste!{
// TODO(hl): default implementation requires allocating a
// temp buffer. maybe use more efficient impls in concrete buffers.
// see https://github.com/GrepTimeTeam/greptimedb/pull/97#discussion_r930798941
fn [<read_ $num_ty _le>](&mut self) -> Result<$num_ty> {
let mut buf = [0u8; std::mem::size_of::<$num_ty>()];
self.read_to_slice(&mut buf)?;
Ok($num_ty::from_le_bytes(buf))
}
fn [<peek_ $num_ty _le>](&mut self) -> Result<$num_ty> {
let mut buf = [0u8; std::mem::size_of::<$num_ty>()];
self.peek_to_slice(&mut buf)?;
Ok($num_ty::from_le_bytes(buf))
}
}
)*
}
}
macro_rules! impl_write_le {
( $($num_ty: ty), *) => {
$(
paste!{
fn [<write_ $num_ty _le>](&mut self, n: $num_ty) -> Result<()> {
self.write_from_slice(&n.to_le_bytes())?;
Ok(())
}
}
)*
}
}
pub trait Buffer {
/// Returns remaining data size for read.
fn remaining_size(&self) -> usize;
/// Returns true if buffer has no data for read.
fn is_empty(&self) -> bool {
self.remaining_size() == 0
}
/// Peeks data into dst. This method should not change internal cursor,
/// invoke `advance_by` if needed.
/// # Panics
/// This method **may** panic if buffer does not have enough data to be copied to dst.
fn peek_to_slice(&self, dst: &mut [u8]) -> Result<()>;
/// Reads data into dst. This method will change internal cursor.
/// # Panics
/// This method **may** panic if buffer does not have enough data to be copied to dst.
fn read_to_slice(&mut self, dst: &mut [u8]) -> Result<()> {
self.peek_to_slice(dst)?;
self.advance_by(dst.len());
Ok(())
}
/// Advances internal cursor for next read.
/// # Panics
/// This method **may** panic if the offset after advancing exceeds the length of underlying buffer.
fn advance_by(&mut self, by: usize);
impl_read_le![u8, i8, u16, i16, u32, i32, u64, i64, f32, f64];
}
macro_rules! impl_buffer_for_bytes {
( $($buf_ty:ty), *) => {
$(
impl Buffer for $buf_ty {
fn remaining_size(&self) -> usize{
self.len()
}
fn peek_to_slice(&self, dst: &mut [u8]) -> Result<()> {
let dst_len = dst.len();
ensure!(self.remaining() >= dst.len(), OverflowSnafu {
src_len: self.remaining_size(),
dst_len,
}
);
dst.copy_from_slice(&self[0..dst_len]);
Ok(())
}
#[inline]
fn advance_by(&mut self, by: usize) {
self.advance(by);
}
}
)*
};
}
impl_buffer_for_bytes![bytes::Bytes, bytes::BytesMut];
impl Buffer for &[u8] {
fn remaining_size(&self) -> usize {
self.len()
}
fn peek_to_slice(&self, dst: &mut [u8]) -> Result<()> {
let dst_len = dst.len();
ensure!(
self.len() >= dst.len(),
OverflowSnafu {
src_len: self.remaining_size(),
dst_len,
}
);
dst.copy_from_slice(&self[0..dst_len]);
Ok(())
}
fn read_to_slice(&mut self, dst: &mut [u8]) -> Result<()> {
ensure!(
self.len() >= dst.len(),
OverflowSnafu {
src_len: self.remaining_size(),
dst_len: dst.len(),
}
);
self.read_exact(dst).context(EofSnafu)
}
fn advance_by(&mut self, by: usize) {
*self = &self[by..];
}
}
/// Mutable buffer.
pub trait BufferMut {
fn as_slice(&self) -> &[u8];
fn write_from_slice(&mut self, src: &[u8]) -> Result<()>;
impl_write_le![i8, u8, i16, u16, i32, u32, i64, u64, f32, f64];
}
impl BufferMut for BytesMut {
fn as_slice(&self) -> &[u8] {
self
}
fn write_from_slice(&mut self, src: &[u8]) -> Result<()> {
self.put_slice(src);
Ok(())
}
}
impl BufferMut for &mut [u8] {
fn as_slice(&self) -> &[u8] {
self
}
fn write_from_slice(&mut self, src: &[u8]) -> Result<()> {
// see std::io::Write::write_all
// https://doc.rust-lang.org/src/std/io/impls.rs.html#363
self.write_all(src).map_err(|_| {
OverflowSnafu {
src_len: src.len(),
dst_len: self.as_slice().len(),
}
.build()
})
}
}
impl BufferMut for Vec<u8> {
fn as_slice(&self) -> &[u8] {
self
}
fn write_from_slice(&mut self, src: &[u8]) -> Result<()> {
self.extend_from_slice(src);
Ok(())
}
}

View File

@@ -44,6 +44,12 @@ impl From<Vec<u8>> for Bytes {
}
}
impl From<Bytes> for Vec<u8> {
fn from(bytes: Bytes) -> Vec<u8> {
bytes.0.into()
}
}
impl Deref for Bytes {
type Target = [u8];

View File

@@ -13,9 +13,9 @@
// limitations under the License.
pub mod bit_vec;
pub mod buffer;
pub mod bytes;
pub mod plugins;
pub mod range_read;
#[allow(clippy::all)]
pub mod readable_size;
pub mod secrets;

View File

@@ -0,0 +1,105 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::io;
use std::ops::Range;
use async_trait::async_trait;
use bytes::{BufMut, Bytes};
use futures::{AsyncReadExt, AsyncSeekExt};
/// `Metadata` contains the metadata of a source.
pub struct Metadata {
/// The length of the source in bytes.
pub content_length: u64,
}
/// `RangeReader` reads a range of bytes from a source.
#[async_trait]
pub trait RangeReader: Send + Unpin {
/// Returns the metadata of the source.
async fn metadata(&mut self) -> io::Result<Metadata>;
/// Reads the bytes in the given range.
async fn read(&mut self, range: Range<u64>) -> io::Result<Bytes>;
/// Reads the bytes in the given range into the buffer.
///
/// Handles the buffer based on its capacity:
/// - If the buffer is insufficient to hold the bytes, it will either:
/// - Allocate additional space (e.g., for `Vec<u8>`)
/// - Panic (e.g., for `&mut [u8]`)
async fn read_into(
&mut self,
range: Range<u64>,
buf: &mut (impl BufMut + Send),
) -> io::Result<()> {
let bytes = self.read(range).await?;
buf.put_slice(&bytes);
Ok(())
}
/// Reads the bytes in the given ranges.
async fn read_vec(&mut self, ranges: &[Range<u64>]) -> io::Result<Vec<Bytes>> {
let mut result = Vec::with_capacity(ranges.len());
for range in ranges {
result.push(self.read(range.clone()).await?);
}
Ok(result)
}
}
#[async_trait]
impl<R: RangeReader + Send + Unpin> RangeReader for &mut R {
async fn metadata(&mut self) -> io::Result<Metadata> {
(*self).metadata().await
}
async fn read(&mut self, range: Range<u64>) -> io::Result<Bytes> {
(*self).read(range).await
}
async fn read_into(
&mut self,
range: Range<u64>,
buf: &mut (impl BufMut + Send),
) -> io::Result<()> {
(*self).read_into(range, buf).await
}
async fn read_vec(&mut self, ranges: &[Range<u64>]) -> io::Result<Vec<Bytes>> {
(*self).read_vec(ranges).await
}
}
/// `RangeReaderAdapter` bridges `RangeReader` and `AsyncRead + AsyncSeek`.
pub struct RangeReaderAdapter<R>(pub R);
/// Implements `RangeReader` for a type that implements `AsyncRead + AsyncSeek`.
///
/// TODO(zhongzc): It's a temporary solution for porting the codebase from `AsyncRead + AsyncSeek` to `RangeReader`.
/// Until the codebase is fully ported to `RangeReader`, remove this implementation.
#[async_trait]
impl<R: futures::AsyncRead + futures::AsyncSeek + Send + Unpin> RangeReader
for RangeReaderAdapter<R>
{
async fn metadata(&mut self) -> io::Result<Metadata> {
let content_length = self.0.seek(io::SeekFrom::End(0)).await?;
Ok(Metadata { content_length })
}
async fn read(&mut self, range: Range<u64>) -> io::Result<Bytes> {
let mut buf = vec![0; (range.end - range.start) as usize];
self.0.seek(io::SeekFrom::Start(range.start)).await?;
self.0.read_exact(&mut buf).await?;
Ok(Bytes::from(buf))
}
}

View File

@@ -1,182 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#![feature(assert_matches)]
#[cfg(test)]
mod tests {
use std::assert_matches::assert_matches;
use bytes::{Buf, Bytes, BytesMut};
use common_base::buffer::Error::Overflow;
use common_base::buffer::{Buffer, BufferMut};
use paste::paste;
#[test]
pub fn test_buffer_read_write() {
let mut buf = BytesMut::with_capacity(16);
buf.write_u64_le(1234u64).unwrap();
let result = buf.peek_u64_le().unwrap();
assert_eq!(1234u64, result);
buf.advance_by(8);
buf.write_from_slice("hello, world".as_bytes()).unwrap();
let mut content = vec![0u8; 5];
buf.peek_to_slice(&mut content).unwrap();
let read = String::from_utf8_lossy(&content);
assert_eq!("hello", read);
buf.advance_by(5);
// after read, buffer should still have 7 bytes to read.
assert_eq!(7, buf.remaining());
let mut content = vec![0u8; 6];
buf.read_to_slice(&mut content).unwrap();
let read = String::from_utf8_lossy(&content);
assert_eq!(", worl", read);
// after read, buffer should still have 1 byte to read.
assert_eq!(1, buf.remaining());
}
#[test]
pub fn test_buffer_read() {
let mut bytes = Bytes::from_static("hello".as_bytes());
assert_eq!(5, bytes.remaining_size());
assert_eq!(b'h', bytes.peek_u8_le().unwrap());
bytes.advance_by(1);
assert_eq!(4, bytes.remaining_size());
}
macro_rules! test_primitive_read_write {
( $($num_ty: ty), *) => {
$(
paste!{
#[test]
fn [<test_read_write_ $num_ty>]() {
assert_eq!($num_ty::MAX,(&mut $num_ty::MAX.to_le_bytes() as &[u8]).[<read_ $num_ty _le>]().unwrap());
assert_eq!($num_ty::MIN,(&mut $num_ty::MIN.to_le_bytes() as &[u8]).[<read_ $num_ty _le>]().unwrap());
}
}
)*
}
}
test_primitive_read_write![u8, u16, u32, u64, i8, i16, i32, i64, f32, f64];
#[test]
pub fn test_read_write_from_slice_buffer() {
let mut buf = "hello".as_bytes();
assert_eq!(104, buf.peek_u8_le().unwrap());
buf.advance_by(1);
assert_eq!(101, buf.peek_u8_le().unwrap());
buf.advance_by(1);
assert_eq!(108, buf.peek_u8_le().unwrap());
buf.advance_by(1);
assert_eq!(108, buf.peek_u8_le().unwrap());
buf.advance_by(1);
assert_eq!(111, buf.peek_u8_le().unwrap());
buf.advance_by(1);
assert_matches!(buf.peek_u8_le(), Err(Overflow { .. }));
}
#[test]
pub fn test_read_u8_from_slice_buffer() {
let mut buf = "hello".as_bytes();
assert_eq!(104, buf.read_u8_le().unwrap());
assert_eq!(101, buf.read_u8_le().unwrap());
assert_eq!(108, buf.read_u8_le().unwrap());
assert_eq!(108, buf.read_u8_le().unwrap());
assert_eq!(111, buf.read_u8_le().unwrap());
assert_matches!(buf.read_u8_le(), Err(Overflow { .. }));
}
#[test]
pub fn test_read_write_numbers() {
let mut buf: Vec<u8> = vec![];
buf.write_u64_le(1234).unwrap();
assert_eq!(1234, (&buf[..]).read_u64_le().unwrap());
buf.write_u32_le(4242).unwrap();
let mut p = &buf[..];
assert_eq!(1234, p.read_u64_le().unwrap());
assert_eq!(4242, p.read_u32_le().unwrap());
}
macro_rules! test_primitive_vec_read_write {
( $($num_ty: ty), *) => {
$(
paste!{
#[test]
fn [<test_read_write_ $num_ty _from_vec_buffer>]() {
let mut buf = vec![];
let _ = buf.[<write_ $num_ty _le>]($num_ty::MAX).unwrap();
assert_eq!($num_ty::MAX, buf.as_slice().[<read_ $num_ty _le>]().unwrap());
}
}
)*
}
}
test_primitive_vec_read_write![u8, u16, u32, u64, i8, i16, i32, i64, f32, f64];
#[test]
pub fn test_peek_write_from_vec_buffer() {
let mut buf: Vec<u8> = vec![];
buf.write_from_slice("hello".as_bytes()).unwrap();
let mut slice = buf.as_slice();
assert_eq!(104, slice.peek_u8_le().unwrap());
slice.advance_by(1);
assert_eq!(101, slice.peek_u8_le().unwrap());
slice.advance_by(1);
assert_eq!(108, slice.peek_u8_le().unwrap());
slice.advance_by(1);
assert_eq!(108, slice.peek_u8_le().unwrap());
slice.advance_by(1);
assert_eq!(111, slice.peek_u8_le().unwrap());
slice.advance_by(1);
assert_matches!(slice.read_u8_le(), Err(Overflow { .. }));
}
macro_rules! test_primitive_bytes_read_write {
( $($num_ty: ty), *) => {
$(
paste!{
#[test]
fn [<test_read_write_ $num_ty _from_bytes>]() {
let mut bytes = bytes::Bytes::from($num_ty::MAX.to_le_bytes().to_vec());
assert_eq!($num_ty::MAX, bytes.[<read_ $num_ty _le>]().unwrap());
let mut bytes = bytes::Bytes::from($num_ty::MIN.to_le_bytes().to_vec());
assert_eq!($num_ty::MIN, bytes.[<read_ $num_ty _le>]().unwrap());
}
}
)*
}
}
test_primitive_bytes_read_write![u8, u16, u32, u64, i8, i16, i32, i64, f32, f64];
#[test]
pub fn test_write_overflow() {
let mut buf = [0u8; 4];
assert_matches!(
(&mut buf[..]).write_from_slice("hell".as_bytes()),
Ok { .. }
);
assert_matches!(
(&mut buf[..]).write_from_slice("hello".as_bytes()),
Err(common_base::buffer::Error::Overflow { .. })
);
}
}

View File

@@ -29,6 +29,7 @@ datafusion.workspace = true
datatypes.workspace = true
geohash = { version = "0.13", optional = true }
h3o = { version = "0.6", optional = true }
jsonb.workspace = true
num = "0.4"
num-traits = "0.2"
once_cell.workspace = true

View File

@@ -22,6 +22,7 @@ use crate::function::{AsyncFunctionRef, FunctionRef};
use crate::scalars::aggregate::{AggregateFunctionMetaRef, AggregateFunctions};
use crate::scalars::date::DateFunction;
use crate::scalars::expression::ExpressionFunction;
use crate::scalars::json::JsonFunction;
use crate::scalars::matches::MatchesFunction;
use crate::scalars::math::MathFunction;
use crate::scalars::numpy::NumpyFunction;
@@ -116,6 +117,9 @@ pub static FUNCTION_REGISTRY: Lazy<Arc<FunctionRegistry>> = Lazy::new(|| {
SystemFunction::register(&function_registry);
TableFunction::register(&function_registry);
// Json related functions
JsonFunction::register(&function_registry);
// Geo functions
#[cfg(feature = "geo")]
crate::scalars::geo::GeoFunctions::register(&function_registry);

View File

@@ -17,9 +17,11 @@ pub(crate) mod date;
pub mod expression;
#[cfg(feature = "geo")]
pub mod geo;
pub mod json;
pub mod matches;
pub mod math;
pub mod numpy;
#[cfg(test)]
pub(crate) mod test;
pub(crate) mod timestamp;

View File

@@ -0,0 +1,38 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
mod json_get;
mod json_to_string;
mod to_json;
use json_get::{JsonGetBool, JsonGetFloat, JsonGetInt, JsonGetString};
use json_to_string::JsonToStringFunction;
use to_json::ToJsonFunction;
use crate::function_registry::FunctionRegistry;
pub(crate) struct JsonFunction;
impl JsonFunction {
pub fn register(registry: &FunctionRegistry) {
registry.register(Arc::new(JsonToStringFunction));
registry.register(Arc::new(ToJsonFunction));
registry.register(Arc::new(JsonGetInt));
registry.register(Arc::new(JsonGetFloat));
registry.register(Arc::new(JsonGetString));
registry.register(Arc::new(JsonGetBool));
}
}

View File

@@ -0,0 +1,454 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt::{self, Display};
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
use common_query::prelude::Signature;
use datafusion::logical_expr::Volatility;
use datatypes::data_type::ConcreteDataType;
use datatypes::prelude::VectorRef;
use datatypes::scalars::ScalarVectorBuilder;
use datatypes::vectors::{
BooleanVectorBuilder, Float64VectorBuilder, Int64VectorBuilder, MutableVector,
StringVectorBuilder,
};
use snafu::ensure;
use crate::function::{Function, FunctionContext};
fn get_json_by_path(json: &[u8], path: &str) -> Option<Vec<u8>> {
let json_path = jsonb::jsonpath::parse_json_path(path.as_bytes());
match json_path {
Ok(json_path) => {
let mut sub_jsonb = Vec::new();
let mut sub_offsets = Vec::new();
match jsonb::get_by_path(json, json_path, &mut sub_jsonb, &mut sub_offsets) {
Ok(_) => Some(sub_jsonb),
Err(_) => None,
}
}
_ => None,
}
}
/// Get the value from the JSONB by the given path and return it as specified type.
/// If the path does not exist or the value is not the type specified, return `NULL`.
macro_rules! json_get {
// e.g. name = JsonGetInt, type = Int64, rust_type = i64, doc = "Get the value from the JSONB by the given path and return it as an integer."
($name: ident, $type: ident, $rust_type: ident, $doc:expr) => {
paste::paste! {
#[doc = $doc]
#[derive(Clone, Debug, Default)]
pub struct $name;
impl Function for $name {
fn name(&self) -> &str {
stringify!([<$name:snake>])
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::[<$type:snake _datatype>]())
}
fn signature(&self) -> Signature {
Signature::exact(
vec![
ConcreteDataType::json_datatype(),
ConcreteDataType::string_datatype(),
],
Volatility::Immutable,
)
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure!(
columns.len() == 2,
InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not correct, expect exactly two, have: {}",
columns.len()
),
}
);
let jsons = &columns[0];
let paths = &columns[1];
let size = jsons.len();
let datatype = jsons.data_type();
let mut results = [<$type VectorBuilder>]::with_capacity(size);
match datatype {
// JSON data type uses binary vector
ConcreteDataType::Binary(_) => {
for i in 0..size {
let json = jsons.get_ref(i);
let path = paths.get_ref(i);
let json = json.as_binary();
let path = path.as_string();
let result = match (json, path) {
(Ok(Some(json)), Ok(Some(path))) => {
get_json_by_path(json, path)
.and_then(|json| { jsonb::[<to_ $rust_type>](&json).ok() })
}
_ => None,
};
results.push(result);
}
}
_ => {
return UnsupportedInputDataTypeSnafu {
function: stringify!([<$name:snake>]),
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
}
.fail();
}
}
Ok(results.to_vector())
}
}
impl Display for $name {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", stringify!([<$name:snake>]).to_ascii_uppercase())
}
}
}
};
}
json_get!(
JsonGetInt,
Int64,
i64,
"Get the value from the JSONB by the given path and return it as an integer."
);
json_get!(
JsonGetFloat,
Float64,
f64,
"Get the value from the JSONB by the given path and return it as a float."
);
json_get!(
JsonGetBool,
Boolean,
bool,
"Get the value from the JSONB by the given path and return it as a boolean."
);
/// Get the value from the JSONB by the given path and return it as a string.
#[derive(Clone, Debug, Default)]
pub struct JsonGetString;
impl Function for JsonGetString {
fn name(&self) -> &str {
"json_get_string"
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::string_datatype())
}
fn signature(&self) -> Signature {
Signature::exact(
vec![
ConcreteDataType::json_datatype(),
ConcreteDataType::string_datatype(),
],
Volatility::Immutable,
)
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure!(
columns.len() == 2,
InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not correct, expect exactly two, have: {}",
columns.len()
),
}
);
let jsons = &columns[0];
let paths = &columns[1];
let size = jsons.len();
let datatype = jsons.data_type();
let mut results = StringVectorBuilder::with_capacity(size);
match datatype {
// JSON data type uses binary vector
ConcreteDataType::Binary(_) => {
for i in 0..size {
let json = jsons.get_ref(i);
let path = paths.get_ref(i);
let json = json.as_binary();
let path = path.as_string();
let result = match (json, path) {
(Ok(Some(json)), Ok(Some(path))) => {
get_json_by_path(json, path).and_then(|json| jsonb::to_str(&json).ok())
}
_ => None,
};
results.push(result.as_deref());
}
}
_ => {
return UnsupportedInputDataTypeSnafu {
function: "json_get_string",
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
}
.fail();
}
}
Ok(results.to_vector())
}
}
impl Display for JsonGetString {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", "json_get_string".to_ascii_uppercase())
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use common_query::prelude::TypeSignature;
use datatypes::scalars::ScalarVector;
use datatypes::vectors::{BinaryVector, StringVector};
use super::*;
#[test]
fn test_json_get_int() {
let json_get_int = JsonGetInt;
assert_eq!("json_get_int", json_get_int.name());
assert_eq!(
ConcreteDataType::int64_datatype(),
json_get_int
.return_type(&[
ConcreteDataType::json_datatype(),
ConcreteDataType::string_datatype()
])
.unwrap()
);
assert!(matches!(json_get_int.signature(),
Signature {
type_signature: TypeSignature::Exact(valid_types),
volatility: Volatility::Immutable
} if valid_types == vec![ConcreteDataType::json_datatype(), ConcreteDataType::string_datatype()]
));
let json_strings = [
r#"{"a": {"b": 2}, "b": 2, "c": 3}"#,
r#"{"a": 4, "b": {"c": 6}, "c": 6}"#,
r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
];
let paths = vec!["$.a.b", "$.a", "$.c"];
let results = [Some(2), Some(4), None];
let jsonbs = json_strings
.iter()
.map(|s| {
let value = jsonb::parse_value(s.as_bytes()).unwrap();
value.to_vec()
})
.collect::<Vec<_>>();
let json_vector = BinaryVector::from_vec(jsonbs);
let path_vector = StringVector::from_vec(paths);
let args: Vec<VectorRef> = vec![Arc::new(json_vector), Arc::new(path_vector)];
let vector = json_get_int
.eval(FunctionContext::default(), &args)
.unwrap();
assert_eq!(3, vector.len());
for (i, gt) in results.iter().enumerate() {
let result = vector.get_ref(i);
let result = result.as_i64().unwrap();
assert_eq!(*gt, result);
}
}
#[test]
fn test_json_get_float() {
let json_get_float = JsonGetFloat;
assert_eq!("json_get_float", json_get_float.name());
assert_eq!(
ConcreteDataType::float64_datatype(),
json_get_float
.return_type(&[
ConcreteDataType::json_datatype(),
ConcreteDataType::string_datatype()
])
.unwrap()
);
assert!(matches!(json_get_float.signature(),
Signature {
type_signature: TypeSignature::Exact(valid_types),
volatility: Volatility::Immutable
} if valid_types == vec![ConcreteDataType::json_datatype(), ConcreteDataType::string_datatype()]
));
let json_strings = [
r#"{"a": {"b": 2.1}, "b": 2.2, "c": 3.3}"#,
r#"{"a": 4.4, "b": {"c": 6.6}, "c": 6.6}"#,
r#"{"a": 7.7, "b": 8.8, "c": {"a": 7.7}}"#,
];
let paths = vec!["$.a.b", "$.a", "$.c"];
let results = [Some(2.1), Some(4.4), None];
let jsonbs = json_strings
.iter()
.map(|s| {
let value = jsonb::parse_value(s.as_bytes()).unwrap();
value.to_vec()
})
.collect::<Vec<_>>();
let json_vector = BinaryVector::from_vec(jsonbs);
let path_vector = StringVector::from_vec(paths);
let args: Vec<VectorRef> = vec![Arc::new(json_vector), Arc::new(path_vector)];
let vector = json_get_float
.eval(FunctionContext::default(), &args)
.unwrap();
assert_eq!(3, vector.len());
for (i, gt) in results.iter().enumerate() {
let result = vector.get_ref(i);
let result = result.as_f64().unwrap();
assert_eq!(*gt, result);
}
}
#[test]
fn test_json_get_bool() {
let json_get_bool = JsonGetBool;
assert_eq!("json_get_bool", json_get_bool.name());
assert_eq!(
ConcreteDataType::boolean_datatype(),
json_get_bool
.return_type(&[
ConcreteDataType::json_datatype(),
ConcreteDataType::string_datatype()
])
.unwrap()
);
assert!(matches!(json_get_bool.signature(),
Signature {
type_signature: TypeSignature::Exact(valid_types),
volatility: Volatility::Immutable
} if valid_types == vec![ConcreteDataType::json_datatype(), ConcreteDataType::string_datatype()]
));
let json_strings = [
r#"{"a": {"b": true}, "b": false, "c": true}"#,
r#"{"a": false, "b": {"c": true}, "c": false}"#,
r#"{"a": true, "b": false, "c": {"a": true}}"#,
];
let paths = vec!["$.a.b", "$.a", "$.c"];
let results = [Some(true), Some(false), None];
let jsonbs = json_strings
.iter()
.map(|s| {
let value = jsonb::parse_value(s.as_bytes()).unwrap();
value.to_vec()
})
.collect::<Vec<_>>();
let json_vector = BinaryVector::from_vec(jsonbs);
let path_vector = StringVector::from_vec(paths);
let args: Vec<VectorRef> = vec![Arc::new(json_vector), Arc::new(path_vector)];
let vector = json_get_bool
.eval(FunctionContext::default(), &args)
.unwrap();
assert_eq!(3, vector.len());
for (i, gt) in results.iter().enumerate() {
let result = vector.get_ref(i);
let result = result.as_boolean().unwrap();
assert_eq!(*gt, result);
}
}
#[test]
fn test_json_get_string() {
let json_get_string = JsonGetString;
assert_eq!("json_get_string", json_get_string.name());
assert_eq!(
ConcreteDataType::string_datatype(),
json_get_string
.return_type(&[
ConcreteDataType::json_datatype(),
ConcreteDataType::string_datatype()
])
.unwrap()
);
assert!(matches!(json_get_string.signature(),
Signature {
type_signature: TypeSignature::Exact(valid_types),
volatility: Volatility::Immutable
} if valid_types == vec![ConcreteDataType::json_datatype(), ConcreteDataType::string_datatype()]
));
let json_strings = [
r#"{"a": {"b": "a"}, "b": "b", "c": "c"}"#,
r#"{"a": "d", "b": {"c": "e"}, "c": "f"}"#,
r#"{"a": "g", "b": "h", "c": {"a": "g"}}"#,
];
let paths = vec!["$.a.b", "$.a", ""];
let results = [Some("a"), Some("d"), None];
let jsonbs = json_strings
.iter()
.map(|s| {
let value = jsonb::parse_value(s.as_bytes()).unwrap();
value.to_vec()
})
.collect::<Vec<_>>();
let json_vector = BinaryVector::from_vec(jsonbs);
let path_vector = StringVector::from_vec(paths);
let args: Vec<VectorRef> = vec![Arc::new(json_vector), Arc::new(path_vector)];
let vector = json_get_string
.eval(FunctionContext::default(), &args)
.unwrap();
assert_eq!(3, vector.len());
for (i, gt) in results.iter().enumerate() {
let result = vector.get_ref(i);
let result = result.as_string().unwrap();
assert_eq!(*gt, result);
}
}
}

View File

@@ -0,0 +1,174 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt::{self, Display};
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
use common_query::prelude::Signature;
use datafusion::logical_expr::Volatility;
use datatypes::data_type::ConcreteDataType;
use datatypes::prelude::VectorRef;
use datatypes::scalars::ScalarVectorBuilder;
use datatypes::vectors::{MutableVector, StringVectorBuilder};
use snafu::ensure;
use crate::function::{Function, FunctionContext};
/// Converts the `JSONB` into `String`. It's useful for displaying JSONB content.
#[derive(Clone, Debug, Default)]
pub struct JsonToStringFunction;
const NAME: &str = "json_to_string";
impl Function for JsonToStringFunction {
fn name(&self) -> &str {
NAME
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::string_datatype())
}
fn signature(&self) -> Signature {
Signature::exact(
vec![ConcreteDataType::json_datatype()],
Volatility::Immutable,
)
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure!(
columns.len() == 1,
InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not correct, expect exactly one, have: {}",
columns.len()
),
}
);
let jsons = &columns[0];
let size = jsons.len();
let datatype = jsons.data_type();
let mut results = StringVectorBuilder::with_capacity(size);
match datatype {
// JSON data type uses binary vector
ConcreteDataType::Binary(_) => {
for i in 0..size {
let json = jsons.get_ref(i);
let json = json.as_binary();
let result = match json {
Ok(Some(json)) => match jsonb::from_slice(json) {
Ok(json) => {
let json = json.to_string();
Some(json)
}
Err(_) => {
return InvalidFuncArgsSnafu {
err_msg: format!("Illegal json binary: {:?}", json),
}
.fail()
}
},
_ => None,
};
results.push(result.as_deref());
}
}
_ => {
return UnsupportedInputDataTypeSnafu {
function: NAME,
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
}
.fail();
}
}
Ok(results.to_vector())
}
}
impl Display for JsonToStringFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "JSON_TO_STRING")
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use common_query::prelude::TypeSignature;
use datatypes::scalars::ScalarVector;
use datatypes::vectors::BinaryVector;
use super::*;
#[test]
fn test_get_by_path_function() {
let json_to_string = JsonToStringFunction;
assert_eq!("json_to_string", json_to_string.name());
assert_eq!(
ConcreteDataType::string_datatype(),
json_to_string
.return_type(&[ConcreteDataType::json_datatype()])
.unwrap()
);
assert!(matches!(json_to_string.signature(),
Signature {
type_signature: TypeSignature::Exact(valid_types),
volatility: Volatility::Immutable
} if valid_types == vec![ConcreteDataType::json_datatype()]
));
let json_strings = [
r#"{"a": {"b": 2}, "b": 2, "c": 3}"#,
r#"{"a": 4, "b": {"c": 6}, "c": 6}"#,
r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
];
let jsonbs = json_strings
.iter()
.map(|s| {
let value = jsonb::parse_value(s.as_bytes()).unwrap();
value.to_vec()
})
.collect::<Vec<_>>();
let json_vector = BinaryVector::from_vec(jsonbs);
let args: Vec<VectorRef> = vec![Arc::new(json_vector)];
let vector = json_to_string
.eval(FunctionContext::default(), &args)
.unwrap();
assert_eq!(3, vector.len());
for (i, gt) in json_strings.iter().enumerate() {
let result = vector.get_ref(i);
let result = result.as_string().unwrap().unwrap();
// remove whitespaces
assert_eq!(gt.replace(" ", ""), result);
}
let invalid_jsonb = vec![b"invalid json"];
let invalid_json_vector = BinaryVector::from_vec(invalid_jsonb);
let args: Vec<VectorRef> = vec![Arc::new(invalid_json_vector)];
let vector = json_to_string.eval(FunctionContext::default(), &args);
assert!(vector.is_err());
}
}

View File

@@ -0,0 +1,165 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt::{self, Display};
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
use common_query::prelude::Signature;
use datafusion::logical_expr::Volatility;
use datatypes::data_type::ConcreteDataType;
use datatypes::prelude::VectorRef;
use datatypes::scalars::ScalarVectorBuilder;
use datatypes::vectors::{BinaryVectorBuilder, MutableVector};
use snafu::ensure;
use crate::function::{Function, FunctionContext};
/// Parses the `String` into `JSONB`.
#[derive(Clone, Debug, Default)]
pub struct ToJsonFunction;
const NAME: &str = "to_json";
impl Function for ToJsonFunction {
fn name(&self) -> &str {
NAME
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::json_datatype())
}
fn signature(&self) -> Signature {
Signature::exact(
vec![ConcreteDataType::string_datatype()],
Volatility::Immutable,
)
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure!(
columns.len() == 1,
InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not correct, expect exactly one, have: {}",
columns.len()
),
}
);
let json_strings = &columns[0];
let size = json_strings.len();
let datatype = json_strings.data_type();
let mut results = BinaryVectorBuilder::with_capacity(size);
match datatype {
ConcreteDataType::String(_) => {
for i in 0..size {
let json_string = json_strings.get_ref(i);
let json_string = json_string.as_string();
let result = match json_string {
Ok(Some(json_string)) => match jsonb::parse_value(json_string.as_bytes()) {
Ok(json) => Some(json.to_vec()),
Err(_) => {
return InvalidFuncArgsSnafu {
err_msg: format!(
"Cannot convert the string to json, have: {}",
json_string
),
}
.fail()
}
},
_ => None,
};
results.push(result.as_deref());
}
}
_ => {
return UnsupportedInputDataTypeSnafu {
function: NAME,
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
}
.fail();
}
}
Ok(results.to_vector())
}
}
impl Display for ToJsonFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "TO_JSON")
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use common_query::prelude::TypeSignature;
use datatypes::scalars::ScalarVector;
use datatypes::vectors::StringVector;
use super::*;
#[test]
fn test_get_by_path_function() {
let to_json = ToJsonFunction;
assert_eq!("to_json", to_json.name());
assert_eq!(
ConcreteDataType::json_datatype(),
to_json
.return_type(&[ConcreteDataType::json_datatype()])
.unwrap()
);
assert!(matches!(to_json.signature(),
Signature {
type_signature: TypeSignature::Exact(valid_types),
volatility: Volatility::Immutable
} if valid_types == vec![ConcreteDataType::string_datatype()]
));
let json_strings = [
r#"{"a": {"b": 2}, "b": 2, "c": 3}"#,
r#"{"a": 4, "b": {"c": 6}, "c": 6}"#,
r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
];
let jsonbs = json_strings
.iter()
.map(|s| {
let value = jsonb::parse_value(s.as_bytes()).unwrap();
value.to_vec()
})
.collect::<Vec<_>>();
let json_string_vector = StringVector::from_vec(json_strings.to_vec());
let args: Vec<VectorRef> = vec![Arc::new(json_string_vector)];
let vector = to_json.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(3, vector.len());
for (i, gt) in jsonbs.iter().enumerate() {
let result = vector.get_ref(i);
let result = result.as_binary().unwrap().unwrap();
// remove whitespaces
assert_eq!(gt, result);
}
}
}

View File

@@ -19,6 +19,7 @@ use common_query::error::Result;
use common_query::prelude::{Signature, Volatility};
use datatypes::data_type::ConcreteDataType;
use datatypes::vectors::{StringVector, VectorRef};
use session::context::Channel;
use crate::function::{Function, FunctionContext};
@@ -44,11 +45,22 @@ impl Function for VersionFunction {
Signature::exact(vec![], Volatility::Immutable)
}
fn eval(&self, _func_ctx: FunctionContext, _columns: &[VectorRef]) -> Result<VectorRef> {
let result = StringVector::from(vec![format!(
"5.7.20-greptimedb-{}",
env!("CARGO_PKG_VERSION")
)]);
fn eval(&self, func_ctx: FunctionContext, _columns: &[VectorRef]) -> Result<VectorRef> {
let version = match func_ctx.query_ctx.channel() {
Channel::Mysql => {
format!(
"{}-greptimedb-{}",
std::env::var("GREPTIMEDB_MYSQL_SERVER_VERSION")
.unwrap_or_else(|_| "8.4.2".to_string()),
env!("CARGO_PKG_VERSION")
)
}
Channel::Postgres => {
format!("16.3-greptimedb-{}", env!("CARGO_PKG_VERSION"))
}
_ => env!("CARGO_PKG_VERSION").to_string(),
};
let result = StringVector::from(vec![version]);
Ok(Arc::new(result))
}
}

View File

@@ -14,11 +14,10 @@
use api::helper;
use api::v1::column::Values;
use api::v1::{AddColumns, Column, CreateTableExpr};
use api::v1::{Column, CreateTableExpr};
use common_base::BitVec;
use datatypes::data_type::{ConcreteDataType, DataType};
use datatypes::prelude::VectorRef;
use datatypes::schema::SchemaRef;
use snafu::{ensure, ResultExt};
use table::metadata::TableId;
use table::table_reference::TableReference;
@@ -27,11 +26,6 @@ use crate::error::{CreateVectorSnafu, Result, UnexpectedValuesLengthSnafu};
use crate::util;
use crate::util::ColumnExpr;
pub fn find_new_columns(schema: &SchemaRef, columns: &[Column]) -> Result<Option<AddColumns>> {
let column_exprs = ColumnExpr::from_columns(columns);
util::extract_new_columns(schema, column_exprs)
}
/// Try to build create table request from insert data.
pub fn build_create_expr_from_insertion(
catalog_name: &str,
@@ -114,7 +108,6 @@ mod tests {
use super::*;
use crate::error;
use crate::error::ColumnDataTypeSnafu;
use crate::insert::find_new_columns;
#[inline]
fn build_column_schema(
@@ -281,11 +274,18 @@ mod tests {
let schema = Arc::new(SchemaBuilder::try_from(columns).unwrap().build().unwrap());
assert!(find_new_columns(&schema, &[]).unwrap().is_none());
assert!(
util::extract_new_columns(&schema, ColumnExpr::from_columns(&[]))
.unwrap()
.is_none()
);
let insert_batch = mock_insert_batch();
let add_columns = find_new_columns(&schema, &insert_batch.0).unwrap().unwrap();
let add_columns =
util::extract_new_columns(&schema, ColumnExpr::from_columns(&insert_batch.0))
.unwrap()
.unwrap();
assert_eq!(5, add_columns.add_columns.len());
let host_column = &add_columns.add_columns[0];

View File

@@ -19,4 +19,4 @@ pub mod insert;
pub mod util;
pub use alter::{alter_expr_to_request, create_table_schema};
pub use insert::{build_create_expr_from_insertion, find_new_columns};
pub use insert::build_create_expr_from_insertion;

View File

@@ -70,7 +70,7 @@ macro_rules! convert_arrow_array_to_grpc_vals {
return Ok(vals);
},
)+
ConcreteDataType::Null(_) | ConcreteDataType::List(_) | ConcreteDataType::Dictionary(_) | ConcreteDataType::Duration(_) => unreachable!("Should not send {:?} in gRPC", $data_type),
ConcreteDataType::Null(_) | ConcreteDataType::List(_) | ConcreteDataType::Dictionary(_) | ConcreteDataType::Duration(_) | ConcreteDataType::Json(_) => unreachable!("Should not send {:?} in gRPC", $data_type),
}
}};
}

View File

@@ -39,7 +39,7 @@ use crate::key::DeserializedValueWithBytes;
use crate::lock_key::{CatalogLock, SchemaLock, TableLock};
use crate::rpc::ddl::AlterTableTask;
use crate::rpc::router::find_leaders;
use crate::{cache_invalidator, metrics, ClusterId};
use crate::{metrics, ClusterId};
pub struct AlterLogicalTablesProcedure {
pub context: DdlContext,
@@ -131,7 +131,7 @@ impl AlterLogicalTablesProcedure {
let phy_raw_schemas = future::join_all(alter_region_tasks)
.await
.into_iter()
.map(|res| res.map(|mut res| res.extension.remove(ALTER_PHYSICAL_EXTENSION_KEY)))
.map(|res| res.map(|mut res| res.extensions.remove(ALTER_PHYSICAL_EXTENSION_KEY)))
.collect::<Result<Vec<_>>>()?;
if phy_raw_schemas.is_empty() {
@@ -170,12 +170,11 @@ impl AlterLogicalTablesProcedure {
}
pub(crate) async fn on_invalidate_table_cache(&mut self) -> Result<Status> {
let ctx = cache_invalidator::Context::default();
let to_invalidate = self.build_table_cache_keys_to_invalidate();
self.context
.cache_invalidator
.invalidate(&ctx, &to_invalidate)
.invalidate(&Default::default(), &to_invalidate)
.await?;
Ok(Status::done())
}

View File

@@ -157,7 +157,7 @@ impl CreateLogicalTablesProcedure {
let phy_raw_schemas = join_all(create_region_tasks)
.await
.into_iter()
.map(|res| res.map(|mut res| res.extension.remove(ALTER_PHYSICAL_EXTENSION_KEY)))
.map(|res| res.map(|mut res| res.extensions.remove(ALTER_PHYSICAL_EXTENSION_KEY)))
.collect::<Result<Vec<_>>>()?;
if phy_raw_schemas.is_empty() {

View File

@@ -441,11 +441,9 @@ async fn handle_alter_table_task(
.table_metadata_manager()
.table_route_manager()
.table_route_storage()
.get_raw(table_id)
.get(table_id)
.await?
.context(TableRouteNotFoundSnafu { table_id })?
.into_inner();
.context(TableRouteNotFoundSnafu { table_id })?;
ensure!(
table_route_value.is_physical(),
UnexpectedLogicalRouteTableSnafu {

View File

@@ -90,6 +90,7 @@
pub mod catalog_name;
pub mod datanode_table;
pub mod flow;
pub mod node_address;
pub mod schema_name;
pub mod table_info;
pub mod table_name;
@@ -102,7 +103,7 @@ pub mod view_info;
use std::collections::{BTreeMap, HashMap, HashSet};
use std::fmt::Debug;
use std::ops::Deref;
use std::ops::{Deref, DerefMut};
use std::sync::Arc;
use bytes::Bytes;
@@ -134,6 +135,7 @@ use self::table_route::{TableRouteManager, TableRouteValue};
use self::tombstone::TombstoneManager;
use crate::ddl::utils::region_storage_path;
use crate::error::{self, Result, SerdeJsonSnafu};
use crate::key::node_address::NodeAddressValue;
use crate::key::table_route::TableRouteKey;
use crate::key::txn_helper::TxnOpGetResponseSet;
use crate::kv_backend::txn::{Txn, TxnOp};
@@ -152,12 +154,15 @@ pub const TABLE_NAME_KEY_PREFIX: &str = "__table_name";
pub const CATALOG_NAME_KEY_PREFIX: &str = "__catalog_name";
pub const SCHEMA_NAME_KEY_PREFIX: &str = "__schema_name";
pub const TABLE_ROUTE_PREFIX: &str = "__table_route";
pub const NODE_ADDRESS_PREFIX: &str = "__node_address";
pub const CACHE_KEY_PREFIXES: [&str; 4] = [
/// The keys with these prefixes will be loaded into the cache when the leader starts.
pub const CACHE_KEY_PREFIXES: [&str; 5] = [
TABLE_NAME_KEY_PREFIX,
CATALOG_NAME_KEY_PREFIX,
SCHEMA_NAME_KEY_PREFIX,
TABLE_ROUTE_PREFIX,
NODE_ADDRESS_PREFIX,
];
pub type RegionDistribution = BTreeMap<DatanodeId, Vec<RegionNumber>>;
@@ -210,6 +215,11 @@ lazy_static! {
.unwrap();
}
lazy_static! {
static ref NODE_ADDRESS_PATTERN: Regex =
Regex::new(&format!("^{NODE_ADDRESS_PREFIX}/([0-9]+)/([0-9]+)$")).unwrap();
}
/// The key of metadata.
pub trait MetadataKey<'a, T> {
fn to_bytes(&self) -> Vec<u8>;
@@ -306,6 +316,12 @@ impl<T: DeserializeOwned + Serialize> Deref for DeserializedValueWithBytes<T> {
}
}
impl<T: DeserializeOwned + Serialize> DerefMut for DeserializedValueWithBytes<T> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.inner
}
}
impl<T: DeserializeOwned + Serialize + Debug> Debug for DeserializedValueWithBytes<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
@@ -1230,7 +1246,8 @@ impl_metadata_value! {
FlowInfoValue,
FlowNameValue,
FlowRouteValue,
TableFlowValue
TableFlowValue,
NodeAddressValue
}
impl_optional_metadata_value! {
@@ -1952,7 +1969,7 @@ mod tests {
let table_route_value = table_metadata_manager
.table_route_manager
.table_route_storage()
.get_raw(table_id)
.get_with_raw_bytes(table_id)
.await
.unwrap()
.unwrap();
@@ -2005,7 +2022,7 @@ mod tests {
let table_route_value = table_metadata_manager
.table_route_manager
.table_route_storage()
.get_raw(table_id)
.get_with_raw_bytes(table_id)
.await
.unwrap()
.unwrap();

View File

@@ -0,0 +1,114 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt::Display;
use api::v1::meta::Role;
use serde::{Deserialize, Serialize};
use snafu::OptionExt;
use crate::error::{InvalidMetadataSnafu, Result};
use crate::key::{MetadataKey, NODE_ADDRESS_PATTERN, NODE_ADDRESS_PREFIX};
use crate::peer::Peer;
/// The key stores node address.
///
/// The layout: `__node_address/{role}/{node_id}`
#[derive(Debug, PartialEq)]
pub struct NodeAddressKey {
pub role: Role,
pub node_id: u64,
}
impl NodeAddressKey {
pub fn new(role: Role, node_id: u64) -> Self {
Self { role, node_id }
}
pub fn with_datanode(node_id: u64) -> Self {
Self::new(Role::Datanode, node_id)
}
}
#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
pub struct NodeAddressValue {
pub peer: Peer,
}
impl NodeAddressValue {
pub fn new(peer: Peer) -> Self {
Self { peer }
}
}
impl<'a> MetadataKey<'a, NodeAddressKey> for NodeAddressKey {
fn to_bytes(&self) -> Vec<u8> {
self.to_string().into_bytes()
}
fn from_bytes(bytes: &[u8]) -> Result<NodeAddressKey> {
let key = std::str::from_utf8(bytes).map_err(|e| {
InvalidMetadataSnafu {
err_msg: format!(
"NodeAddressKey '{}' is not a valid UTF8 string: {e}",
String::from_utf8_lossy(bytes)
),
}
.build()
})?;
let captures = NODE_ADDRESS_PATTERN
.captures(key)
.context(InvalidMetadataSnafu {
err_msg: format!("Invalid NodeAddressKey '{key}'"),
})?;
// Safety: pass the regex check above
let role = captures[1].parse::<i32>().unwrap();
let role = Role::try_from(role).map_err(|_| {
InvalidMetadataSnafu {
err_msg: format!("Invalid Role value: {role}"),
}
.build()
})?;
let node_id = captures[2].parse::<u64>().unwrap();
Ok(NodeAddressKey::new(role, node_id))
}
}
impl Display for NodeAddressKey {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{}/{}/{}",
NODE_ADDRESS_PREFIX, self.role as i32, self.node_id
)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_node_address_key() {
let key = NodeAddressKey::new(Role::Datanode, 1);
let bytes = key.to_bytes();
let key2 = NodeAddressKey::from_bytes(&bytes).unwrap();
assert_eq!(key, key2);
let key = NodeAddressKey::new(Role::Flownode, 3);
let bytes = key.to_bytes();
let key2 = NodeAddressKey::from_bytes(&bytes).unwrap();
assert_eq!(key, key2);
}
}

View File

@@ -22,9 +22,10 @@ use store_api::storage::{RegionId, RegionNumber};
use table::metadata::TableId;
use crate::error::{
self, InvalidMetadataSnafu, MetadataCorruptionSnafu, Result, SerdeJsonSnafu,
TableRouteNotFoundSnafu, UnexpectedLogicalRouteTableSnafu,
InvalidMetadataSnafu, MetadataCorruptionSnafu, Result, SerdeJsonSnafu, TableRouteNotFoundSnafu,
UnexpectedLogicalRouteTableSnafu,
};
use crate::key::node_address::{NodeAddressKey, NodeAddressValue};
use crate::key::txn_helper::TxnOpGetResponseSet;
use crate::key::{
DeserializedValueWithBytes, MetadataKey, MetadataValue, RegionDistribution,
@@ -85,7 +86,7 @@ impl TableRouteValue {
debug_assert_eq!(region.region.id.table_id(), physical_table_id);
RegionId::new(table_id, region.region.id.region_number())
})
.collect::<Vec<_>>();
.collect();
TableRouteValue::logical(physical_table_id, region_routes)
}
}
@@ -189,12 +190,12 @@ impl TableRouteValue {
.region_routes
.iter()
.map(|region_route| region_route.region.id.region_number())
.collect::<Vec<_>>(),
.collect(),
TableRouteValue::Logical(x) => x
.region_ids()
.iter()
.map(|region_id| region_id.region_number())
.collect::<Vec<_>>(),
.collect(),
}
}
}
@@ -301,7 +302,7 @@ impl TableRouteManager {
Some(route) => {
ensure!(
route.is_physical(),
error::UnexpectedLogicalRouteTableSnafu {
UnexpectedLogicalRouteTableSnafu {
err_msg: format!("{route:?} is a non-physical TableRouteValue.")
}
);
@@ -321,7 +322,7 @@ impl TableRouteManager {
) -> Result<TableId> {
let table_route = self
.storage
.get(logical_or_physical_table_id)
.get_inner(logical_or_physical_table_id)
.await?
.context(TableRouteNotFoundSnafu {
table_id: logical_or_physical_table_id,
@@ -335,7 +336,7 @@ impl TableRouteManager {
/// Returns the [TableRouteValue::Physical] recursively.
///
/// Returns a [TableRouteNotFound](crate::error::Error::TableRouteNotFound) Error if:
/// Returns a [TableRouteNotFound](error::Error::TableRouteNotFound) Error if:
/// - the physical table(`logical_or_physical_table_id`) does not exist
/// - the corresponding physical table of the logical table(`logical_or_physical_table_id`) does not exist.
pub async fn get_physical_table_route(
@@ -528,6 +529,15 @@ impl TableRouteStorage {
/// Returns the [`TableRouteValue`].
pub async fn get(&self, table_id: TableId) -> Result<Option<TableRouteValue>> {
let mut table_route = self.get_inner(table_id).await?;
if let Some(table_route) = &mut table_route {
self.remap_route_address(table_route).await?;
};
Ok(table_route)
}
async fn get_inner(&self, table_id: TableId) -> Result<Option<TableRouteValue>> {
let key = TableRouteKey::new(table_id);
self.kv_backend
.get(&key.to_bytes())
@@ -537,7 +547,19 @@ impl TableRouteStorage {
}
/// Returns the [`TableRouteValue`] wrapped with [`DeserializedValueWithBytes`].
pub async fn get_raw(
pub async fn get_with_raw_bytes(
&self,
table_id: TableId,
) -> Result<Option<DeserializedValueWithBytes<TableRouteValue>>> {
let mut table_route = self.get_with_raw_bytes_inner(table_id).await?;
if let Some(table_route) = &mut table_route {
self.remap_route_address(table_route).await?;
};
Ok(table_route)
}
async fn get_with_raw_bytes_inner(
&self,
table_id: TableId,
) -> Result<Option<DeserializedValueWithBytes<TableRouteValue>>> {
@@ -554,27 +576,27 @@ impl TableRouteStorage {
/// Returns a [TableRouteNotFound](crate::error::Error::TableRouteNotFound) Error if:
/// - the physical table(`logical_or_physical_table_id`) does not exist
/// - the corresponding physical table of the logical table(`logical_or_physical_table_id`) does not exist.
pub async fn get_raw_physical_table_route(
pub async fn get_physical_table_route_with_raw_bytes(
&self,
logical_or_physical_table_id: TableId,
) -> Result<(TableId, DeserializedValueWithBytes<TableRouteValue>)> {
let table_route =
self.get_raw(logical_or_physical_table_id)
.await?
.context(TableRouteNotFoundSnafu {
table_id: logical_or_physical_table_id,
})?;
let table_route = self
.get_with_raw_bytes(logical_or_physical_table_id)
.await?
.context(TableRouteNotFoundSnafu {
table_id: logical_or_physical_table_id,
})?;
match table_route.get_inner_ref() {
TableRouteValue::Physical(_) => Ok((logical_or_physical_table_id, table_route)),
TableRouteValue::Logical(x) => {
let physical_table_id = x.physical_table_id();
let physical_table_route =
self.get_raw(physical_table_id)
.await?
.context(TableRouteNotFoundSnafu {
table_id: physical_table_id,
})?;
let physical_table_route = self
.get_with_raw_bytes(physical_table_id)
.await?
.context(TableRouteNotFoundSnafu {
table_id: physical_table_id,
})?;
Ok((physical_table_id, physical_table_route))
}
}
@@ -582,6 +604,13 @@ impl TableRouteStorage {
/// Returns batch of [`TableRouteValue`] that respects the order of `table_ids`.
pub async fn batch_get(&self, table_ids: &[TableId]) -> Result<Vec<Option<TableRouteValue>>> {
let mut table_routes = self.batch_get_inner(table_ids).await?;
self.remap_routes_addresses(&mut table_routes).await?;
Ok(table_routes)
}
async fn batch_get_inner(&self, table_ids: &[TableId]) -> Result<Vec<Option<TableRouteValue>>> {
let keys = table_ids
.iter()
.map(|id| TableRouteKey::new(*id).to_bytes())
@@ -604,8 +633,107 @@ impl TableRouteStorage {
Ok(None)
}
})
.collect::<Result<Vec<_>>>()
.collect()
}
async fn remap_routes_addresses(
&self,
table_routes: &mut [Option<TableRouteValue>],
) -> Result<()> {
let keys = table_routes
.iter()
.flat_map(|table_route| {
table_route
.as_ref()
.map(extract_address_keys)
.unwrap_or_default()
})
.collect::<HashSet<_>>()
.into_iter()
.collect();
let node_addrs = self.get_node_addresses(keys).await?;
for table_route in table_routes.iter_mut().flatten() {
set_addresses(&node_addrs, table_route)?;
}
Ok(())
}
async fn remap_route_address(&self, table_route: &mut TableRouteValue) -> Result<()> {
let keys = extract_address_keys(table_route).into_iter().collect();
let node_addrs = self.get_node_addresses(keys).await?;
set_addresses(&node_addrs, table_route)?;
Ok(())
}
async fn get_node_addresses(
&self,
keys: Vec<Vec<u8>>,
) -> Result<HashMap<u64, NodeAddressValue>> {
if keys.is_empty() {
return Ok(HashMap::default());
}
self.kv_backend
.batch_get(BatchGetRequest { keys })
.await?
.kvs
.into_iter()
.map(|kv| {
let node_id = NodeAddressKey::from_bytes(&kv.key)?.node_id;
let node_addr = NodeAddressValue::try_from_raw_value(&kv.value)?;
Ok((node_id, node_addr))
})
.collect()
}
}
fn set_addresses(
node_addrs: &HashMap<u64, NodeAddressValue>,
table_route: &mut TableRouteValue,
) -> Result<()> {
let TableRouteValue::Physical(physical_table_route) = table_route else {
return Ok(());
};
for region_route in &mut physical_table_route.region_routes {
if let Some(leader) = &mut region_route.leader_peer {
if let Some(node_addr) = node_addrs.get(&leader.id) {
leader.addr = node_addr.peer.addr.clone();
}
}
for follower in &mut region_route.follower_peers {
if let Some(node_addr) = node_addrs.get(&follower.id) {
follower.addr = node_addr.peer.addr.clone();
}
}
}
Ok(())
}
fn extract_address_keys(table_route: &TableRouteValue) -> HashSet<Vec<u8>> {
let TableRouteValue::Physical(physical_table_route) = table_route else {
return HashSet::default();
};
physical_table_route
.region_routes
.iter()
.flat_map(|region_route| {
region_route
.follower_peers
.iter()
.map(|peer| NodeAddressKey::with_datanode(peer.id).to_bytes())
.chain(
region_route
.leader_peer
.as_ref()
.map(|leader| NodeAddressKey::with_datanode(leader.id).to_bytes()),
)
})
.collect()
}
#[cfg(test)]
@@ -614,7 +742,9 @@ mod tests {
use super::*;
use crate::kv_backend::memory::MemoryKvBackend;
use crate::kv_backend::TxnService;
use crate::kv_backend::{KvBackend, TxnService};
use crate::peer::Peer;
use crate::rpc::store::PutRequest;
#[test]
fn test_table_route_compatibility() {
@@ -643,18 +773,18 @@ mod tests {
}
#[tokio::test]
async fn test_table_route_storage_get_raw_empty() {
async fn test_table_route_storage_get_with_raw_bytes_empty() {
let kv = Arc::new(MemoryKvBackend::default());
let table_route_storage = TableRouteStorage::new(kv);
let table_route = table_route_storage.get_raw(1024).await.unwrap();
let table_route = table_route_storage.get_with_raw_bytes(1024).await.unwrap();
assert!(table_route.is_none());
}
#[tokio::test]
async fn test_table_route_storage_get_raw() {
async fn test_table_route_storage_get_with_raw_bytes() {
let kv = Arc::new(MemoryKvBackend::default());
let table_route_storage = TableRouteStorage::new(kv.clone());
let table_route = table_route_storage.get_raw(1024).await.unwrap();
let table_route = table_route_storage.get_with_raw_bytes(1024).await.unwrap();
assert!(table_route.is_none());
let table_route_manager = TableRouteManager::new(kv.clone());
let table_route_value = TableRouteValue::Logical(LogicalTableRouteValue {
@@ -667,7 +797,7 @@ mod tests {
.unwrap();
let r = kv.txn(txn).await.unwrap();
assert!(r.succeeded);
let table_route = table_route_storage.get_raw(1024).await.unwrap();
let table_route = table_route_storage.get_with_raw_bytes(1024).await.unwrap();
assert!(table_route.is_some());
let got = table_route.unwrap().inner;
assert_eq!(got, table_route_value);
@@ -718,4 +848,61 @@ mod tests {
assert!(results[2].is_none());
assert_eq!(results[3].as_ref().unwrap(), &routes[0].1);
}
#[tokio::test]
async fn remap_route_address_updates_addresses() {
let kv = Arc::new(MemoryKvBackend::default());
let table_route_storage = TableRouteStorage::new(kv.clone());
let mut table_route = TableRouteValue::Physical(PhysicalTableRouteValue {
region_routes: vec![RegionRoute {
leader_peer: Some(Peer {
id: 1,
..Default::default()
}),
follower_peers: vec![Peer {
id: 2,
..Default::default()
}],
..Default::default()
}],
version: 0,
});
kv.put(PutRequest {
key: NodeAddressKey::with_datanode(1).to_bytes(),
value: NodeAddressValue {
peer: Peer {
addr: "addr1".to_string(),
..Default::default()
},
}
.try_as_raw_value()
.unwrap(),
..Default::default()
})
.await
.unwrap();
table_route_storage
.remap_route_address(&mut table_route)
.await
.unwrap();
if let TableRouteValue::Physical(physical_table_route) = table_route {
assert_eq!(
physical_table_route.region_routes[0]
.leader_peer
.as_ref()
.unwrap()
.addr,
"addr1"
);
assert_eq!(
physical_table_route.region_routes[0].follower_peers[0].addr,
""
);
} else {
panic!("Expected PhysicalTableRouteValue");
}
}
}

View File

@@ -17,6 +17,7 @@ common-macro.workspace = true
common-telemetry.workspace = true
futures-util.workspace = true
humantime-serde.workspace = true
num_cpus.workspace = true
rskafka.workspace = true
rustls = { version = "0.23", default-features = false, features = ["ring", "logging", "std", "tls12"] }
rustls-native-certs = "0.7"

View File

@@ -41,6 +41,8 @@ pub struct RaftEngineConfig {
/// Duration for fsyncing log files.
#[serde(with = "humantime_serde")]
pub sync_period: Option<Duration>,
/// Parallelism during log recovery.
pub recovery_parallelism: usize,
}
impl Default for RaftEngineConfig {
@@ -55,6 +57,7 @@ impl Default for RaftEngineConfig {
enable_log_recycle: true,
prefill_log_files: false,
sync_period: None,
recovery_parallelism: num_cpus::get(),
}
}
}

View File

@@ -454,7 +454,7 @@ impl DatanodeBuilder {
"Creating raft-engine logstore with config: {:?} and storage path: {}",
config, &wal_dir
);
let logstore = RaftEngineLogStore::try_new(wal_dir, config.clone())
let logstore = RaftEngineLogStore::try_new(wal_dir, config)
.await
.map_err(Box::new)
.context(OpenLogStoreSnafu)?;

View File

@@ -192,7 +192,7 @@ impl HeartbeatTask {
let (outgoing_tx, mut outgoing_rx) = mpsc::channel(16);
let mailbox = Arc::new(HeartbeatMailbox::new(outgoing_tx));
let quit_signal = Arc::new(tokio::sync::Notify::new());
let quit_signal = Arc::new(Notify::new());
let mut tx = Self::create_streams(
&meta_client,
@@ -324,10 +324,12 @@ impl HeartbeatTask {
region_id: stat.region_id.as_u64(),
engine: stat.engine,
role: RegionRole::from(stat.role).into(),
// TODO(jeremy): w/rcus
// TODO(weny): w/rcus
rcus: 0,
wcus: 0,
approximate_bytes: region_server.region_disk_usage(stat.region_id).unwrap_or(0),
// TODO(weny): add extensions
extensions: Default::default(),
})
.collect()
}

View File

@@ -366,10 +366,10 @@ impl RegionServerHandler for RegionServer {
// merge results by sum up affected rows and merge extensions.
let mut affected_rows = 0;
let mut extension = HashMap::new();
let mut extensions = HashMap::new();
for result in results {
affected_rows += result.affected_rows;
extension.extend(result.extension);
extensions.extend(result.extensions);
}
Ok(RegionResponseV1 {
@@ -380,7 +380,7 @@ impl RegionServerHandler for RegionServer {
}),
}),
affected_rows: affected_rows as _,
extension,
extensions,
})
}
}
@@ -708,7 +708,7 @@ impl RegionServerInner {
.await?;
Ok(RegionResponse {
affected_rows: result.affected_rows,
extension: result.extension,
extensions: result.extensions,
})
}
Err(err) => {

View File

@@ -15,6 +15,7 @@ workspace = true
arrow.workspace = true
arrow-array.workspace = true
arrow-schema.workspace = true
base64.workspace = true
common-base.workspace = true
common-decimal.workspace = true
common-error.workspace = true
@@ -23,6 +24,8 @@ common-telemetry.workspace = true
common-time.workspace = true
datafusion-common.workspace = true
enum_dispatch = "0.3"
greptime-proto.workspace = true
jsonb.workspace = true
num = "0.4"
num-traits = "0.2"
ordered-float = { version = "3.0", features = ["serde"] }

View File

@@ -33,8 +33,8 @@ use crate::types::{
BinaryType, BooleanType, DateTimeType, DateType, Decimal128Type, DictionaryType,
DurationMicrosecondType, DurationMillisecondType, DurationNanosecondType, DurationSecondType,
DurationType, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type,
IntervalDayTimeType, IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType, ListType,
NullType, StringType, TimeMillisecondType, TimeType, TimestampMicrosecondType,
IntervalDayTimeType, IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType, JsonType,
ListType, NullType, StringType, TimeMillisecondType, TimeType, TimestampMicrosecondType,
TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, TimestampType,
UInt16Type, UInt32Type, UInt64Type, UInt8Type,
};
@@ -81,6 +81,9 @@ pub enum ConcreteDataType {
// Compound types:
List(ListType),
Dictionary(DictionaryType),
// JSON type:
Json(JsonType),
}
impl fmt::Display for ConcreteDataType {
@@ -128,6 +131,7 @@ impl fmt::Display for ConcreteDataType {
ConcreteDataType::Decimal128(v) => write!(f, "{}", v.name()),
ConcreteDataType::List(v) => write!(f, "{}", v.name()),
ConcreteDataType::Dictionary(v) => write!(f, "{}", v.name()),
ConcreteDataType::Json(v) => write!(f, "{}", v.name()),
}
}
}
@@ -162,6 +166,7 @@ impl ConcreteDataType {
| ConcreteDataType::Duration(_)
| ConcreteDataType::Decimal128(_)
| ConcreteDataType::Binary(_)
| ConcreteDataType::Json(_)
)
}
@@ -216,6 +221,10 @@ impl ConcreteDataType {
matches!(self, ConcreteDataType::Decimal128(_))
}
pub fn is_json(&self) -> bool {
matches!(self, ConcreteDataType::Json(_))
}
pub fn numerics() -> Vec<ConcreteDataType> {
vec![
ConcreteDataType::int8_datatype(),
@@ -404,7 +413,7 @@ macro_rules! impl_new_concrete_type_functions {
impl_new_concrete_type_functions!(
Null, Boolean, UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64,
Binary, Date, DateTime, String
Binary, Date, DateTime, String, Json
);
impl ConcreteDataType {

View File

@@ -25,6 +25,7 @@ use datafusion_common::DFSchemaRef;
use snafu::{ensure, ResultExt};
use crate::error::{self, DuplicateColumnSnafu, Error, ProjectArrowSchemaSnafu, Result};
use crate::prelude::DataType;
pub use crate::schema::column_schema::{
ColumnSchema, FulltextAnalyzer, FulltextOptions, Metadata, COMMENT_KEY, FULLTEXT_KEY,
TIME_INDEX_KEY,
@@ -34,6 +35,8 @@ pub use crate::schema::raw::RawSchema;
/// Key used to store version number of the schema in metadata.
pub const VERSION_KEY: &str = "greptime:version";
/// Key used to store actual column type in field metadata.
pub const TYPE_KEY: &str = "greptime:type";
/// A common schema, should be immutable.
#[derive(Clone, PartialEq, Eq)]
@@ -256,7 +259,13 @@ fn collect_fields(column_schemas: &[ColumnSchema]) -> Result<FieldsAndIndices> {
if column_schema.is_time_index() && timestamp_index.is_none() {
timestamp_index = Some(index);
}
let field = Field::try_from(column_schema)?;
let mut field = Field::try_from(column_schema)?;
// Json column performs the same as binary column in Arrow, so we need to mark it
if column_schema.data_type.is_json() {
let metadata = HashMap::from([(TYPE_KEY.to_string(), column_schema.data_type.name())]);
field = field.with_metadata(metadata);
}
fields.push(field);
ensure!(
name_to_index

View File

@@ -22,6 +22,8 @@ use snafu::{ensure, ResultExt};
use crate::data_type::{ConcreteDataType, DataType};
use crate::error::{self, Error, Result};
use crate::schema::constraint::ColumnDefaultConstraint;
use crate::schema::TYPE_KEY;
use crate::types::JSON_TYPE_NAME;
use crate::value::Value;
use crate::vectors::VectorRef;
@@ -268,7 +270,14 @@ impl TryFrom<&Field> for ColumnSchema {
type Error = Error;
fn try_from(field: &Field) -> Result<ColumnSchema> {
let data_type = ConcreteDataType::try_from(field.data_type())?;
let mut data_type = ConcreteDataType::try_from(field.data_type())?;
// Override the data type if it is specified in the metadata.
if field.metadata().contains_key(TYPE_KEY) {
data_type = match field.metadata().get(TYPE_KEY).unwrap().as_str() {
JSON_TYPE_NAME => ConcreteDataType::json_datatype(),
_ => data_type,
};
}
let mut metadata = field.metadata().clone();
let default_constraint = match metadata.remove(DEFAULT_CONSTRAINT_KEY) {
Some(json) => {
@@ -528,4 +537,32 @@ mod tests {
assert_eq!(formatted_int8, "test_column_1 Int8 null");
assert_eq!(formatted_int32, "test_column_2 Int32 not null");
}
#[test]
fn test_from_field_to_column_schema() {
let field = Field::new("test", ArrowDataType::Int32, true);
let column_schema = ColumnSchema::try_from(&field).unwrap();
assert_eq!("test", column_schema.name);
assert_eq!(ConcreteDataType::int32_datatype(), column_schema.data_type);
assert!(column_schema.is_nullable);
assert!(!column_schema.is_time_index);
assert!(column_schema.default_constraint.is_none());
assert!(column_schema.metadata.is_empty());
let field = Field::new("test", ArrowDataType::Binary, true);
let field = field.with_metadata(Metadata::from([(
TYPE_KEY.to_string(),
ConcreteDataType::json_datatype().name(),
)]));
let column_schema = ColumnSchema::try_from(&field).unwrap();
assert_eq!("test", column_schema.name);
assert_eq!(ConcreteDataType::json_datatype(), column_schema.data_type);
assert!(column_schema.is_nullable);
assert!(!column_schema.is_time_index);
assert!(column_schema.default_constraint.is_none());
assert_eq!(
column_schema.metadata.get(TYPE_KEY).unwrap(),
&ConcreteDataType::json_datatype().name()
);
}
}

View File

@@ -68,6 +68,8 @@ pub enum LogicalTypeId {
List,
Dictionary,
Json,
}
impl LogicalTypeId {
@@ -126,6 +128,7 @@ impl LogicalTypeId {
LogicalTypeId::DurationMicrosecond => ConcreteDataType::duration_microsecond_datatype(),
LogicalTypeId::DurationNanosecond => ConcreteDataType::duration_nanosecond_datatype(),
LogicalTypeId::Decimal128 => ConcreteDataType::decimal128_default_datatype(),
LogicalTypeId::Json => ConcreteDataType::json_datatype(),
}
}
}

View File

@@ -21,6 +21,7 @@ mod decimal_type;
mod dictionary_type;
mod duration_type;
mod interval_type;
mod json_type;
mod list_type;
mod null_type;
mod primitive_type;
@@ -42,6 +43,7 @@ pub use duration_type::{
pub use interval_type::{
IntervalDayTimeType, IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType,
};
pub use json_type::{JsonType, JSON_TYPE_NAME};
pub use list_type::ListType;
pub use null_type::NullType;
pub use primitive_type::{

View File

@@ -0,0 +1,67 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use arrow::datatypes::DataType as ArrowDataType;
use common_base::bytes::Bytes;
use serde::{Deserialize, Serialize};
use crate::data_type::{DataType, DataTypeRef};
use crate::scalars::ScalarVectorBuilder;
use crate::type_id::LogicalTypeId;
use crate::value::Value;
use crate::vectors::{BinaryVectorBuilder, MutableVector};
pub const JSON_TYPE_NAME: &str = "Json";
/// JsonType is a data type for JSON data. It is stored as binary data of jsonb format.
/// It utilizes current binary value and vector implementation.
#[derive(Debug, Default, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
pub struct JsonType;
impl JsonType {
pub fn arc() -> DataTypeRef {
Arc::new(Self)
}
}
impl DataType for JsonType {
fn name(&self) -> String {
JSON_TYPE_NAME.to_string()
}
fn logical_type_id(&self) -> LogicalTypeId {
LogicalTypeId::Json
}
fn default_value(&self) -> Value {
Bytes::default().into()
}
fn as_arrow_type(&self) -> ArrowDataType {
ArrowDataType::Binary
}
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
Box::new(BinaryVectorBuilder::with_capacity(capacity))
}
fn try_cast(&self, from: Value) -> Option<Value> {
match from {
Value::Binary(v) => Some(Value::Binary(v)),
_ => None,
}
}
}

View File

@@ -18,6 +18,8 @@ use std::sync::Arc;
use arrow::datatypes::{DataType as ArrowDataType, Field};
use arrow_array::{Array, ListArray};
use base64::engine::general_purpose::URL_SAFE;
use base64::Engine as _;
use common_base::bytes::{Bytes, StringBytes};
use common_decimal::Decimal128;
use common_telemetry::error;
@@ -28,8 +30,10 @@ use common_time::time::Time;
use common_time::timestamp::{TimeUnit, Timestamp};
use common_time::{Duration, Interval, Timezone};
use datafusion_common::ScalarValue;
use greptime_proto::v1::value::ValueData;
pub use ordered_float::OrderedFloat;
use serde::{Deserialize, Serialize, Serializer};
use serde_json::{Number, Value as JsonValue};
use snafu::{ensure, ResultExt};
use crate::error::{self, ConvertArrowArrayToScalarsSnafu, Error, Result, TryFromValueSnafu};
@@ -338,7 +342,8 @@ impl Value {
let value_type_id = self.logical_type_id();
let output_type_id = output_type.logical_type_id();
ensure!(
output_type_id == value_type_id || self.is_null(),
// Json type leverage Value(Binary) for storage.
output_type_id == value_type_id || self.is_null() || (output_type_id == LogicalTypeId::Json && value_type_id == LogicalTypeId::Binary),
error::ToScalarValueSnafu {
reason: format!(
"expect value to return output_type {output_type_id:?}, actual: {value_type_id:?}",
@@ -480,7 +485,7 @@ pub fn to_null_scalar_value(output_type: &ConcreteDataType) -> Result<ScalarValu
ConcreteDataType::UInt64(_) => ScalarValue::UInt64(None),
ConcreteDataType::Float32(_) => ScalarValue::Float32(None),
ConcreteDataType::Float64(_) => ScalarValue::Float64(None),
ConcreteDataType::Binary(_) => ScalarValue::Binary(None),
ConcreteDataType::Binary(_) | ConcreteDataType::Json(_) => ScalarValue::Binary(None),
ConcreteDataType::String(_) => ScalarValue::Utf8(None),
ConcreteDataType::Date(_) => ScalarValue::Date32(None),
ConcreteDataType::DateTime(_) => ScalarValue::Date64(None),
@@ -1364,15 +1369,179 @@ impl<'a> ValueRef<'a> {
}
}
pub fn column_data_to_json(data: ValueData) -> JsonValue {
match data {
ValueData::BinaryValue(b) => JsonValue::String(URL_SAFE.encode(b)),
ValueData::BoolValue(b) => JsonValue::Bool(b),
ValueData::U8Value(i) => JsonValue::Number(i.into()),
ValueData::U16Value(i) => JsonValue::Number(i.into()),
ValueData::U32Value(i) => JsonValue::Number(i.into()),
ValueData::U64Value(i) => JsonValue::Number(i.into()),
ValueData::I8Value(i) => JsonValue::Number(i.into()),
ValueData::I16Value(i) => JsonValue::Number(i.into()),
ValueData::I32Value(i) => JsonValue::Number(i.into()),
ValueData::I64Value(i) => JsonValue::Number(i.into()),
ValueData::F32Value(f) => Number::from_f64(f as f64)
.map(JsonValue::Number)
.unwrap_or(JsonValue::Null),
ValueData::F64Value(f) => Number::from_f64(f)
.map(JsonValue::Number)
.unwrap_or(JsonValue::Null),
ValueData::StringValue(s) => JsonValue::String(s),
ValueData::DateValue(d) => JsonValue::String(Date::from(d).to_string()),
ValueData::DatetimeValue(d) => JsonValue::String(DateTime::from(d).to_string()),
ValueData::TimeSecondValue(d) => JsonValue::String(Time::new_second(d).to_iso8601_string()),
ValueData::TimeMillisecondValue(d) => {
JsonValue::String(Time::new_millisecond(d).to_iso8601_string())
}
ValueData::TimeMicrosecondValue(d) => {
JsonValue::String(Time::new_microsecond(d).to_iso8601_string())
}
ValueData::TimeNanosecondValue(d) => {
JsonValue::String(Time::new_nanosecond(d).to_iso8601_string())
}
ValueData::TimestampMicrosecondValue(d) => {
JsonValue::String(Timestamp::new_microsecond(d).to_iso8601_string())
}
ValueData::TimestampMillisecondValue(d) => {
JsonValue::String(Timestamp::new_millisecond(d).to_iso8601_string())
}
ValueData::TimestampNanosecondValue(d) => {
JsonValue::String(Timestamp::new_nanosecond(d).to_iso8601_string())
}
ValueData::TimestampSecondValue(d) => {
JsonValue::String(Timestamp::new_second(d).to_iso8601_string())
}
ValueData::IntervalYearMonthValue(d) => JsonValue::String(format!("interval year [{}]", d)),
ValueData::IntervalMonthDayNanoValue(d) => JsonValue::String(format!(
"interval month [{}][{}][{}]",
d.months, d.days, d.nanoseconds
)),
ValueData::IntervalDayTimeValue(d) => JsonValue::String(format!("interval day [{}]", d)),
ValueData::Decimal128Value(d) => {
JsonValue::String(format!("decimal128 [{}][{}]", d.hi, d.lo))
}
}
}
#[cfg(test)]
mod tests {
use arrow::datatypes::DataType as ArrowDataType;
use common_time::timezone::set_default_timezone;
use greptime_proto::v1::{Decimal128 as ProtoDecimal128, IntervalMonthDayNano};
use num_traits::Float;
use super::*;
use crate::vectors::ListVectorBuilder;
#[test]
fn test_column_data_to_json() {
assert_eq!(
column_data_to_json(ValueData::BinaryValue(b"hello".to_vec())),
JsonValue::String("aGVsbG8=".to_string())
);
assert_eq!(
column_data_to_json(ValueData::BoolValue(true)),
JsonValue::Bool(true)
);
assert_eq!(
column_data_to_json(ValueData::U8Value(1)),
JsonValue::Number(1.into())
);
assert_eq!(
column_data_to_json(ValueData::U16Value(2)),
JsonValue::Number(2.into())
);
assert_eq!(
column_data_to_json(ValueData::U32Value(3)),
JsonValue::Number(3.into())
);
assert_eq!(
column_data_to_json(ValueData::U64Value(4)),
JsonValue::Number(4.into())
);
assert_eq!(
column_data_to_json(ValueData::I8Value(5)),
JsonValue::Number(5.into())
);
assert_eq!(
column_data_to_json(ValueData::I16Value(6)),
JsonValue::Number(6.into())
);
assert_eq!(
column_data_to_json(ValueData::I32Value(7)),
JsonValue::Number(7.into())
);
assert_eq!(
column_data_to_json(ValueData::I64Value(8)),
JsonValue::Number(8.into())
);
assert_eq!(
column_data_to_json(ValueData::F32Value(9.0)),
JsonValue::Number(Number::from_f64(9.0_f64).unwrap())
);
assert_eq!(
column_data_to_json(ValueData::F64Value(10.0)),
JsonValue::Number(Number::from_f64(10.0_f64).unwrap())
);
assert_eq!(
column_data_to_json(ValueData::StringValue("hello".to_string())),
JsonValue::String("hello".to_string())
);
assert_eq!(
column_data_to_json(ValueData::DateValue(123)),
JsonValue::String("1970-05-04".to_string())
);
assert_eq!(
column_data_to_json(ValueData::DatetimeValue(456)),
JsonValue::String("1970-01-01 00:00:00.456+0000".to_string())
);
assert_eq!(
column_data_to_json(ValueData::TimeSecondValue(789)),
JsonValue::String("00:13:09+0000".to_string())
);
assert_eq!(
column_data_to_json(ValueData::TimeMillisecondValue(789)),
JsonValue::String("00:00:00.789+0000".to_string())
);
assert_eq!(
column_data_to_json(ValueData::TimeMicrosecondValue(789)),
JsonValue::String("00:00:00.000789+0000".to_string())
);
assert_eq!(
column_data_to_json(ValueData::TimestampMillisecondValue(1234567890)),
JsonValue::String("1970-01-15 06:56:07.890+0000".to_string())
);
assert_eq!(
column_data_to_json(ValueData::TimestampNanosecondValue(1234567890123456789)),
JsonValue::String("2009-02-13 23:31:30.123456789+0000".to_string())
);
assert_eq!(
column_data_to_json(ValueData::TimestampSecondValue(1234567890)),
JsonValue::String("2009-02-13 23:31:30+0000".to_string())
);
assert_eq!(
column_data_to_json(ValueData::IntervalYearMonthValue(12)),
JsonValue::String("interval year [12]".to_string())
);
assert_eq!(
column_data_to_json(ValueData::IntervalMonthDayNanoValue(IntervalMonthDayNano {
months: 1,
days: 2,
nanoseconds: 3,
})),
JsonValue::String("interval month [1][2][3]".to_string())
);
assert_eq!(
column_data_to_json(ValueData::IntervalDayTimeValue(4)),
JsonValue::String("interval day [4]".to_string())
);
assert_eq!(
column_data_to_json(ValueData::Decimal128Value(ProtoDecimal128 { hi: 5, lo: 6 })),
JsonValue::String("decimal128 [5][6]".to_string())
);
}
#[test]
fn test_try_from_scalar_value() {
assert_eq!(
@@ -1826,6 +1995,10 @@ mod tests {
&ConcreteDataType::duration_nanosecond_datatype(),
&Value::Duration(Duration::new_nanosecond(1)),
);
check_type_and_value(
&ConcreteDataType::decimal128_datatype(38, 10),
&Value::Decimal128(Decimal128::new(1, 38, 10)),
);
}
#[test]
@@ -2010,6 +2183,14 @@ mod tests {
ValueRef::List(ListValueRef::Ref { val: &list }),
Value::List(list.clone()).as_value_ref()
);
let jsonb_value = jsonb::parse_value(r#"{"key": "value"}"#.as_bytes())
.unwrap()
.to_vec();
assert_eq!(
ValueRef::Binary(jsonb_value.clone().as_slice()),
Value::Binary(jsonb_value.into()).as_value_ref()
);
}
#[test]
@@ -2223,6 +2404,16 @@ mod tests {
.try_to_scalar_value(&ConcreteDataType::binary_datatype())
.unwrap()
);
let jsonb_value = jsonb::parse_value(r#"{"key": "value"}"#.as_bytes())
.unwrap()
.to_vec();
assert_eq!(
ScalarValue::Binary(Some(jsonb_value.clone())),
Value::Binary(jsonb_value.into())
.try_to_scalar_value(&ConcreteDataType::json_datatype())
.unwrap()
);
}
#[test]
@@ -2355,6 +2546,12 @@ mod tests {
.try_to_scalar_value(&ConcreteDataType::duration_nanosecond_datatype())
.unwrap()
);
assert_eq!(
ScalarValue::Binary(None),
Value::Null
.try_to_scalar_value(&ConcreteDataType::json_datatype())
.unwrap()
);
}
#[test]

View File

@@ -80,7 +80,7 @@ fn equal(lhs: &dyn Vector, rhs: &dyn Vector) -> bool {
match lhs.data_type() {
Null(_) => true,
Boolean(_) => is_vector_eq!(BooleanVector, lhs, rhs),
Binary(_) => is_vector_eq!(BinaryVector, lhs, rhs),
Binary(_) | Json(_) => is_vector_eq!(BinaryVector, lhs, rhs),
String(_) => is_vector_eq!(StringVector, lhs, rhs),
Date(_) => is_vector_eq!(DateVector, lhs, rhs),
DateTime(_) => is_vector_eq!(DateTimeVector, lhs, rhs),

View File

@@ -50,11 +50,9 @@ use crate::adapter::util::column_schemas_to_proto;
use crate::adapter::worker::{create_worker, Worker, WorkerHandle};
use crate::compute::ErrCollector;
use crate::df_optimizer::sql_to_flow_plan;
use crate::error::{ExternalSnafu, InternalSnafu, TableNotFoundSnafu, UnexpectedSnafu};
use crate::expr::GlobalId;
use crate::metrics::{
METRIC_FLOW_INPUT_BUF_SIZE, METRIC_FLOW_INSERT_ELAPSED, METRIC_FLOW_RUN_INTERVAL_MS,
};
use crate::error::{EvalSnafu, ExternalSnafu, InternalSnafu, TableNotFoundSnafu, UnexpectedSnafu};
use crate::expr::{Batch, GlobalId};
use crate::metrics::{METRIC_FLOW_INSERT_ELAPSED, METRIC_FLOW_RUN_INTERVAL_MS};
use crate::repr::{self, DiffRow, Row, BATCH_SIZE};
mod flownode_impl;
@@ -227,11 +225,24 @@ pub fn diff_row_to_request(rows: Vec<DiffRow>) -> Vec<DiffRequest> {
reqs
}
pub fn batches_to_rows_req(batches: Vec<Batch>) -> Result<Vec<DiffRequest>, Error> {
let mut reqs = Vec::new();
for batch in batches {
let mut rows = Vec::with_capacity(batch.row_count());
for i in 0..batch.row_count() {
let row = batch.get_row(i).context(EvalSnafu)?;
rows.push((Row::new(row), 0));
}
reqs.push(DiffRequest::Insert(rows));
}
Ok(reqs)
}
/// This impl block contains methods to send writeback requests to frontend
impl FlowWorkerManager {
/// Return the number of requests it made
pub async fn send_writeback_requests(&self) -> Result<usize, Error> {
let all_reqs = self.generate_writeback_request().await;
let all_reqs = self.generate_writeback_request().await?;
if all_reqs.is_empty() || all_reqs.iter().all(|v| v.1.is_empty()) {
return Ok(0);
}
@@ -242,122 +253,16 @@ impl FlowWorkerManager {
}
let (catalog, schema) = (table_name[0].clone(), table_name[1].clone());
let ctx = Arc::new(QueryContext::with(&catalog, &schema));
// TODO(discord9): instead of auto build table from request schema, actually build table
// before `create flow` to be able to assign pk and ts etc.
let (primary_keys, schema, is_ts_placeholder) = if let Some(table_id) = self
.table_info_source
.get_table_id_from_name(&table_name)
.await?
{
let table_info = self
.table_info_source
.get_table_info_value(&table_id)
.await?
.unwrap();
let meta = table_info.table_info.meta;
let primary_keys = meta
.primary_key_indices
.into_iter()
.map(|i| meta.schema.column_schemas[i].name.clone())
.collect_vec();
let schema = meta.schema.column_schemas;
// check if the last column is the auto created timestamp column, hence the table is auto created from
// flow's plan type
let is_auto_create = {
let correct_name = schema
.last()
.map(|s| s.name == AUTO_CREATED_PLACEHOLDER_TS_COL)
.unwrap_or(false);
let correct_time_index = meta.schema.timestamp_index == Some(schema.len() - 1);
correct_name && correct_time_index
};
(primary_keys, schema, is_auto_create)
} else {
// TODO(discord9): condiser remove buggy auto create by schema
let node_ctx = self.node_context.read().await;
let gid: GlobalId = node_ctx
.table_repr
.get_by_name(&table_name)
.map(|x| x.1)
.unwrap();
let schema = node_ctx
.schema
.get(&gid)
.with_context(|| TableNotFoundSnafu {
name: format!("Table name = {:?}", table_name),
})?
.clone();
// TODO(discord9): use default key from schema
let primary_keys = schema
.typ()
.keys
.first()
.map(|v| {
v.column_indices
.iter()
.map(|i| {
schema
.get_name(*i)
.clone()
.unwrap_or_else(|| format!("col_{i}"))
})
.collect_vec()
})
.unwrap_or_default();
let update_at = ColumnSchema::new(
UPDATE_AT_TS_COL,
ConcreteDataType::timestamp_millisecond_datatype(),
true,
);
let (is_ts_placeholder, proto_schema) =
self.try_fetch_or_create_table(&table_name).await?;
let schema_len = proto_schema.len();
let original_schema = schema
.typ()
.column_types
.clone()
.into_iter()
.enumerate()
.map(|(idx, typ)| {
let name = schema
.names
.get(idx)
.cloned()
.flatten()
.unwrap_or(format!("col_{}", idx));
let ret = ColumnSchema::new(name, typ.scalar_type, typ.nullable);
if schema.typ().time_index == Some(idx) {
ret.with_time_index(true)
} else {
ret
}
})
.collect_vec();
let mut with_auto_added_col = original_schema.clone();
with_auto_added_col.push(update_at);
// if no time index, add one as placeholder
let no_time_index = schema.typ().time_index.is_none();
if no_time_index {
let ts_col = ColumnSchema::new(
AUTO_CREATED_PLACEHOLDER_TS_COL,
ConcreteDataType::timestamp_millisecond_datatype(),
true,
)
.with_time_index(true);
with_auto_added_col.push(ts_col);
}
(primary_keys, with_auto_added_col, no_time_index)
};
let schema_len = schema.len();
let proto_schema = column_schemas_to_proto(schema, &primary_keys)?;
debug!(
"Sending {} writeback requests to table {}, reqs={:?}",
trace!(
"Sending {} writeback requests to table {}, reqs total rows={}",
reqs.len(),
table_name.join("."),
reqs
reqs.iter().map(|r| r.len()).sum::<usize>()
);
let now = self.tick_manager.tick();
for req in reqs {
@@ -450,8 +355,12 @@ impl FlowWorkerManager {
}
/// Generate writeback request for all sink table
pub async fn generate_writeback_request(&self) -> BTreeMap<TableName, Vec<DiffRequest>> {
pub async fn generate_writeback_request(
&self,
) -> Result<BTreeMap<TableName, Vec<DiffRequest>>, Error> {
trace!("Start to generate writeback request");
let mut output = BTreeMap::new();
let mut total_row_count = 0;
for (name, sink_recv) in self
.node_context
.write()
@@ -460,14 +369,133 @@ impl FlowWorkerManager {
.iter_mut()
.map(|(n, (_s, r))| (n, r))
{
let mut rows = Vec::new();
while let Ok(row) = sink_recv.try_recv() {
rows.push(row);
let mut batches = Vec::new();
while let Ok(batch) = sink_recv.try_recv() {
total_row_count += batch.row_count();
batches.push(batch);
}
let reqs = diff_row_to_request(rows);
let reqs = batches_to_rows_req(batches)?;
output.insert(name.clone(), reqs);
}
output
trace!("Prepare writeback req: total row count={}", total_row_count);
Ok(output)
}
/// Fetch table info or create table from flow's schema if not exist
async fn try_fetch_or_create_table(
&self,
table_name: &TableName,
) -> Result<(bool, Vec<api::v1::ColumnSchema>), Error> {
// TODO(discord9): instead of auto build table from request schema, actually build table
// before `create flow` to be able to assign pk and ts etc.
let (primary_keys, schema, is_ts_placeholder) = if let Some(table_id) = self
.table_info_source
.get_table_id_from_name(table_name)
.await?
{
let table_info = self
.table_info_source
.get_table_info_value(&table_id)
.await?
.unwrap();
let meta = table_info.table_info.meta;
let primary_keys = meta
.primary_key_indices
.into_iter()
.map(|i| meta.schema.column_schemas[i].name.clone())
.collect_vec();
let schema = meta.schema.column_schemas;
// check if the last column is the auto created timestamp column, hence the table is auto created from
// flow's plan type
let is_auto_create = {
let correct_name = schema
.last()
.map(|s| s.name == AUTO_CREATED_PLACEHOLDER_TS_COL)
.unwrap_or(false);
let correct_time_index = meta.schema.timestamp_index == Some(schema.len() - 1);
correct_name && correct_time_index
};
(primary_keys, schema, is_auto_create)
} else {
// TODO(discord9): condiser remove buggy auto create by schema
let node_ctx = self.node_context.read().await;
let gid: GlobalId = node_ctx
.table_repr
.get_by_name(table_name)
.map(|x| x.1)
.unwrap();
let schema = node_ctx
.schema
.get(&gid)
.with_context(|| TableNotFoundSnafu {
name: format!("Table name = {:?}", table_name),
})?
.clone();
// TODO(discord9): use default key from schema
let primary_keys = schema
.typ()
.keys
.first()
.map(|v| {
v.column_indices
.iter()
.map(|i| {
schema
.get_name(*i)
.clone()
.unwrap_or_else(|| format!("col_{i}"))
})
.collect_vec()
})
.unwrap_or_default();
let update_at = ColumnSchema::new(
UPDATE_AT_TS_COL,
ConcreteDataType::timestamp_millisecond_datatype(),
true,
);
let original_schema = schema
.typ()
.column_types
.clone()
.into_iter()
.enumerate()
.map(|(idx, typ)| {
let name = schema
.names
.get(idx)
.cloned()
.flatten()
.unwrap_or(format!("col_{}", idx));
let ret = ColumnSchema::new(name, typ.scalar_type, typ.nullable);
if schema.typ().time_index == Some(idx) {
ret.with_time_index(true)
} else {
ret
}
})
.collect_vec();
let mut with_auto_added_col = original_schema.clone();
with_auto_added_col.push(update_at);
// if no time index, add one as placeholder
let no_time_index = schema.typ().time_index.is_none();
if no_time_index {
let ts_col = ColumnSchema::new(
AUTO_CREATED_PLACEHOLDER_TS_COL,
ConcreteDataType::timestamp_millisecond_datatype(),
true,
)
.with_time_index(true);
with_auto_added_col.push(ts_col);
}
(primary_keys, with_auto_added_col, no_time_index)
};
let proto_schema = column_schemas_to_proto(schema, &primary_keys)?;
Ok((is_ts_placeholder, proto_schema))
}
}
@@ -498,10 +526,6 @@ impl FlowWorkerManager {
}
}
async fn get_buf_size(&self) -> usize {
self.node_context.read().await.get_send_buf_size().await
}
/// Trigger dataflow running, and then send writeback request to the source sender
///
/// note that this method didn't handle input mirror request, as this should be handled by grpc server
@@ -575,43 +599,37 @@ impl FlowWorkerManager {
/// TODO(discord9): add flag for subgraph that have input since last run
pub async fn run_available(&self, blocking: bool) -> Result<usize, Error> {
let mut row_cnt = 0;
loop {
let now = self.tick_manager.tick();
for worker in self.worker_handles.iter() {
// TODO(discord9): consider how to handle error in individual worker
if blocking {
worker.lock().await.run_available(now, blocking).await?;
} else if let Ok(worker) = worker.try_lock() {
worker.run_available(now, blocking).await?;
} else {
return Ok(row_cnt);
}
}
// check row send and rows remain in send buf
let (flush_res, _buf_len) = if blocking {
let ctx = self.node_context.read().await;
(ctx.flush_all_sender().await, ctx.get_send_buf_size().await)
let now = self.tick_manager.tick();
for worker in self.worker_handles.iter() {
// TODO(discord9): consider how to handle error in individual worker
if blocking {
worker.lock().await.run_available(now, blocking).await?;
} else if let Ok(worker) = worker.try_lock() {
worker.run_available(now, blocking).await?;
} else {
match self.node_context.try_read() {
Ok(ctx) => (ctx.flush_all_sender().await, ctx.get_send_buf_size().await),
Err(_) => return Ok(row_cnt),
}
};
match flush_res {
Ok(r) => {
common_telemetry::trace!("Flushed {} rows", r);
row_cnt += r;
// send buf is likely to be somewhere empty now, wait
if r < BATCH_SIZE / 2 {
break;
}
}
Err(err) => {
common_telemetry::error!("Flush send buf errors: {:?}", err);
break;
}
};
return Ok(row_cnt);
}
}
// check row send and rows remain in send buf
let flush_res = if blocking {
let ctx = self.node_context.read().await;
ctx.flush_all_sender().await
} else {
match self.node_context.try_read() {
Ok(ctx) => ctx.flush_all_sender().await,
Err(_) => return Ok(row_cnt),
}
};
match flush_res {
Ok(r) => {
common_telemetry::trace!("Total flushed {} rows", r);
row_cnt += r;
}
Err(err) => {
common_telemetry::error!("Flush send buf errors: {:?}", err);
}
};
Ok(row_cnt)
}
@@ -624,14 +642,14 @@ impl FlowWorkerManager {
) -> Result<(), Error> {
let rows_len = rows.len();
let table_id = region_id.table_id();
METRIC_FLOW_INPUT_BUF_SIZE.add(rows_len as _);
let _timer = METRIC_FLOW_INSERT_ELAPSED
.with_label_values(&[table_id.to_string().as_str()])
.start_timer();
self.node_context.read().await.send(table_id, rows).await?;
debug!(
trace!(
"Handling write request for table_id={} with {} rows",
table_id, rows_len
table_id,
rows_len
);
Ok(())
}

View File

@@ -23,7 +23,7 @@ use api::v1::region::InsertRequests;
use common_error::ext::BoxedError;
use common_meta::error::{ExternalSnafu, Result, UnexpectedSnafu};
use common_meta::node_manager::Flownode;
use common_telemetry::debug;
use common_telemetry::{debug, trace};
use itertools::Itertools;
use snafu::{OptionExt, ResultExt};
use store_api::storage::RegionId;
@@ -189,7 +189,7 @@ impl Flownode for FlowWorkerManager {
})
.try_collect()?;
if !fetch_order.iter().enumerate().all(|(i, &v)| i == v) {
debug!("Reordering columns: {:?}", fetch_order)
trace!("Reordering columns: {:?}", fetch_order)
}
fetch_order
};

View File

@@ -15,10 +15,10 @@
//! Node context, prone to change with every incoming requests
use std::collections::{BTreeMap, BTreeSet, HashMap};
use std::sync::atomic::AtomicUsize;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use common_telemetry::debug;
use common_telemetry::trace;
use session::context::QueryContext;
use snafu::{OptionExt, ResultExt};
use table::metadata::TableId;
@@ -27,9 +27,9 @@ use tokio::sync::{broadcast, mpsc, RwLock};
use crate::adapter::{FlowId, TableName, TableSource};
use crate::error::{Error, EvalSnafu, TableNotFoundSnafu};
use crate::expr::error::InternalSnafu;
use crate::expr::GlobalId;
use crate::expr::{Batch, GlobalId};
use crate::metrics::METRIC_FLOW_INPUT_BUF_SIZE;
use crate::repr::{DiffRow, RelationDesc, BROADCAST_CAP, SEND_BUF_CAP};
use crate::repr::{DiffRow, RelationDesc, BATCH_SIZE, BROADCAST_CAP, SEND_BUF_CAP};
/// A context that holds the information of the dataflow
#[derive(Default, Debug)]
@@ -47,13 +47,8 @@ pub struct FlownodeContext {
///
/// and send it back to the client, since we are mocking the sink table as a client, we should use table name as the key
/// note that the sink receiver should only have one, and we are using broadcast as mpsc channel here
pub sink_receiver: BTreeMap<
TableName,
(
mpsc::UnboundedSender<DiffRow>,
mpsc::UnboundedReceiver<DiffRow>,
),
>,
pub sink_receiver:
BTreeMap<TableName, (mpsc::UnboundedSender<Batch>, mpsc::UnboundedReceiver<Batch>)>,
/// the schema of the table, query from metasrv or inferred from TypedPlan
pub schema: HashMap<GlobalId, RelationDesc>,
/// All the tables that have been registered in the worker
@@ -61,25 +56,27 @@ pub struct FlownodeContext {
pub query_context: Option<Arc<QueryContext>>,
}
/// a simple broadcast sender with backpressure and unbound capacity
/// a simple broadcast sender with backpressure, bounded capacity and blocking on send when send buf is full
/// note that it wouldn't evict old data, so it's possible to block forever if the receiver is slow
///
/// receiver still use tokio broadcast channel, since only sender side need to know
/// backpressure and adjust dataflow running duration to avoid blocking
#[derive(Debug)]
pub struct SourceSender {
// TODO(discord9): make it all Vec<DiffRow>?
sender: broadcast::Sender<DiffRow>,
send_buf_tx: mpsc::Sender<Vec<DiffRow>>,
send_buf_rx: RwLock<mpsc::Receiver<Vec<DiffRow>>>,
sender: broadcast::Sender<Batch>,
send_buf_tx: mpsc::Sender<Batch>,
send_buf_rx: RwLock<mpsc::Receiver<Batch>>,
send_buf_row_cnt: AtomicUsize,
}
impl Default for SourceSender {
fn default() -> Self {
// TODO(discord9): the capacity is arbitrary, we can adjust it later, might also want to limit the max number of rows in send buf
let (send_buf_tx, send_buf_rx) = mpsc::channel(SEND_BUF_CAP);
Self {
// TODO(discord9): found a better way then increase this to prevent lagging and hence missing input data
sender: broadcast::Sender::new(BROADCAST_CAP * 2),
sender: broadcast::Sender::new(SEND_BUF_CAP),
send_buf_tx,
send_buf_rx: RwLock::new(send_buf_rx),
send_buf_row_cnt: AtomicUsize::new(0),
@@ -90,7 +87,7 @@ impl Default for SourceSender {
impl SourceSender {
/// max number of iterations to try flush send buf
const MAX_ITERATIONS: usize = 16;
pub fn get_receiver(&self) -> broadcast::Receiver<DiffRow> {
pub fn get_receiver(&self) -> broadcast::Receiver<Batch> {
self.sender.subscribe()
}
@@ -106,30 +103,27 @@ impl SourceSender {
break;
}
// TODO(discord9): send rows instead so it's just moving a point
if let Some(rows) = send_buf.recv().await {
let len = rows.len();
self.send_buf_row_cnt
.fetch_sub(len, std::sync::atomic::Ordering::SeqCst);
for row in rows {
self.sender
.send(row)
.map_err(|err| {
InternalSnafu {
reason: format!("Failed to send row, error = {:?}", err),
}
.build()
})
.with_context(|_| EvalSnafu)?;
row_cnt += 1;
}
if let Some(batch) = send_buf.recv().await {
let len = batch.row_count();
self.send_buf_row_cnt.fetch_sub(len, Ordering::SeqCst);
row_cnt += len;
self.sender
.send(batch)
.map_err(|err| {
InternalSnafu {
reason: format!("Failed to send row, error = {:?}", err),
}
.build()
})
.with_context(|_| EvalSnafu)?;
}
}
if row_cnt > 0 {
debug!("Send {} rows", row_cnt);
trace!("Source Flushed {} rows", row_cnt);
METRIC_FLOW_INPUT_BUF_SIZE.sub(row_cnt as _);
debug!(
"Remaining Send buf.len() = {}",
self.send_buf_rx.read().await.len()
trace!(
"Remaining Source Send buf.len() = {}",
METRIC_FLOW_INPUT_BUF_SIZE.get()
);
}
@@ -138,12 +132,23 @@ impl SourceSender {
/// return number of rows it actual send(including what's in the buffer)
pub async fn send_rows(&self, rows: Vec<DiffRow>) -> Result<usize, Error> {
self.send_buf_tx.send(rows).await.map_err(|e| {
METRIC_FLOW_INPUT_BUF_SIZE.add(rows.len() as _);
while self.send_buf_row_cnt.load(Ordering::SeqCst) >= BATCH_SIZE * 4 {
tokio::task::yield_now().await;
}
// row count metrics is approx so relaxed order is ok
self.send_buf_row_cnt
.fetch_add(rows.len(), Ordering::SeqCst);
let batch = Batch::try_from_rows(rows.into_iter().map(|(row, _, _)| row).collect())
.context(EvalSnafu)?;
common_telemetry::trace!("Send one batch to worker with {} rows", batch.row_count());
self.send_buf_tx.send(batch).await.map_err(|e| {
crate::error::InternalSnafu {
reason: format!("Failed to send row, error = {:?}", e),
}
.build()
})?;
Ok(0)
}
}
@@ -159,8 +164,6 @@ impl FlownodeContext {
.with_context(|| TableNotFoundSnafu {
name: table_id.to_string(),
})?;
debug!("FlownodeContext::send: trying to send {} rows", rows.len());
sender.send_rows(rows).await
}
@@ -174,16 +177,6 @@ impl FlownodeContext {
}
Ok(sum)
}
/// Return the sum number of rows in all send buf
/// TODO(discord9): remove this since we can't get correct row cnt anyway
pub async fn get_send_buf_size(&self) -> usize {
let mut sum = 0;
for sender in self.source_sender.values() {
sum += sender.send_buf_rx.read().await.len();
}
sum
}
}
impl FlownodeContext {
@@ -230,7 +223,7 @@ impl FlownodeContext {
pub fn add_sink_receiver(&mut self, table_name: TableName) {
self.sink_receiver
.entry(table_name)
.or_insert_with(mpsc::unbounded_channel::<DiffRow>);
.or_insert_with(mpsc::unbounded_channel);
}
pub fn get_source_by_global_id(&self, id: &GlobalId) -> Result<&SourceSender, Error> {
@@ -254,7 +247,7 @@ impl FlownodeContext {
pub fn get_sink_by_global_id(
&self,
id: &GlobalId,
) -> Result<mpsc::UnboundedSender<DiffRow>, Error> {
) -> Result<mpsc::UnboundedSender<Batch>, Error> {
let table_name = self
.table_repr
.get_by_global_id(id)

View File

@@ -27,7 +27,7 @@ use tokio::sync::{broadcast, mpsc, oneshot, Mutex};
use crate::adapter::FlowId;
use crate::compute::{Context, DataflowState, ErrCollector};
use crate::error::{Error, FlowAlreadyExistSnafu, InternalSnafu, UnexpectedSnafu};
use crate::expr::GlobalId;
use crate::expr::{Batch, GlobalId};
use crate::plan::TypedPlan;
use crate::repr::{self, DiffRow};
@@ -89,6 +89,8 @@ impl<'subgraph> ActiveDataflowState<'subgraph> {
err_collector: self.err_collector.clone(),
input_collection: Default::default(),
local_scope: Default::default(),
input_collection_batch: Default::default(),
local_scope_batch: Default::default(),
}
}
@@ -156,13 +158,13 @@ impl WorkerHandle {
///
/// the returned error is unrecoverable, and the worker should be shutdown/rebooted
pub async fn run_available(&self, now: repr::Timestamp, blocking: bool) -> Result<(), Error> {
common_telemetry::debug!("Running available with blocking={}", blocking);
common_telemetry::trace!("Running available with blocking={}", blocking);
if blocking {
let resp = self
.itc_client
.call_with_resp(Request::RunAvail { now, blocking })
.await?;
common_telemetry::debug!("Running available with response={:?}", resp);
common_telemetry::trace!("Running available with response={:?}", resp);
Ok(())
} else {
self.itc_client
@@ -225,9 +227,9 @@ impl<'s> Worker<'s> {
flow_id: FlowId,
plan: TypedPlan,
sink_id: GlobalId,
sink_sender: mpsc::UnboundedSender<DiffRow>,
sink_sender: mpsc::UnboundedSender<Batch>,
source_ids: &[GlobalId],
src_recvs: Vec<broadcast::Receiver<DiffRow>>,
src_recvs: Vec<broadcast::Receiver<Batch>>,
// TODO(discord9): set expire duration for all arrangement and compare to sys timestamp instead
expire_after: Option<repr::Duration>,
create_if_not_exists: bool,
@@ -249,12 +251,12 @@ impl<'s> Worker<'s> {
{
let mut ctx = cur_task_state.new_ctx(sink_id);
for (source_id, src_recv) in source_ids.iter().zip(src_recvs) {
let bundle = ctx.render_source(src_recv)?;
ctx.insert_global(*source_id, bundle);
let bundle = ctx.render_source_batch(src_recv)?;
ctx.insert_global_batch(*source_id, bundle);
}
let rendered = ctx.render_plan(plan)?;
ctx.render_unbounded_sink(rendered, sink_sender);
let rendered = ctx.render_plan_batch(plan)?;
ctx.render_unbounded_sink_batch(rendered, sink_sender);
}
self.task_states.insert(flow_id, cur_task_state);
Ok(Some(flow_id))
@@ -370,9 +372,9 @@ pub enum Request {
flow_id: FlowId,
plan: TypedPlan,
sink_id: GlobalId,
sink_sender: mpsc::UnboundedSender<DiffRow>,
sink_sender: mpsc::UnboundedSender<Batch>,
source_ids: Vec<GlobalId>,
src_recvs: Vec<broadcast::Receiver<DiffRow>>,
src_recvs: Vec<broadcast::Receiver<Batch>>,
expire_after: Option<repr::Duration>,
create_if_not_exists: bool,
err_collector: ErrCollector,
@@ -472,7 +474,7 @@ mod test {
use super::*;
use crate::expr::Id;
use crate::plan::Plan;
use crate::repr::{RelationType, Row};
use crate::repr::RelationType;
#[test]
fn drop_handle() {
@@ -497,8 +499,8 @@ mod test {
});
let handle = rx.await.unwrap();
let src_ids = vec![GlobalId::User(1)];
let (tx, rx) = broadcast::channel::<DiffRow>(1024);
let (sink_tx, mut sink_rx) = mpsc::unbounded_channel::<DiffRow>();
let (tx, rx) = broadcast::channel::<Batch>(1024);
let (sink_tx, mut sink_rx) = mpsc::unbounded_channel::<Batch>();
let (flow_id, plan) = (
1,
TypedPlan {
@@ -523,9 +525,9 @@ mod test {
handle.create_flow(create_reqs).await.unwrap(),
Some(flow_id)
);
tx.send((Row::empty(), 0, 0)).unwrap();
tx.send(Batch::empty()).unwrap();
handle.run_available(0, true).await.unwrap();
assert_eq!(sink_rx.recv().await.unwrap().0, Row::empty());
assert_eq!(sink_rx.recv().await.unwrap(), Batch::empty());
drop(handle);
worker_thread_handle.join().unwrap();
}

View File

@@ -49,6 +49,14 @@ pub struct Context<'referred, 'df> {
///
/// TODO(discord9): consider if use Vec<(LocalId, CollectionBundle)> instead
pub local_scope: Vec<BTreeMap<LocalId, CollectionBundle>>,
/// a list of all collections being used in the operator
///
/// TODO(discord9): remove extra clone by counting usage and remove it on last usage?
pub input_collection_batch: BTreeMap<GlobalId, CollectionBundle<Batch>>,
/// used by `Get`/`Let` Plan for getting/setting local variables
///
/// TODO(discord9): consider if use Vec<(LocalId, CollectionBundle)> instead
pub local_scope_batch: Vec<BTreeMap<LocalId, CollectionBundle<Batch>>>,
// Collect all errors in this operator's evaluation
pub err_collector: ErrCollector,
}
@@ -67,6 +75,19 @@ impl<'referred, 'df> Drop for Context<'referred, 'df> {
bundle.collection.into_inner().drop(self.df);
drop(bundle.arranged);
}
for bundle in std::mem::take(&mut self.input_collection_batch)
.into_values()
.chain(
std::mem::take(&mut self.local_scope_batch)
.into_iter()
.flat_map(|v| v.into_iter())
.map(|(_k, v)| v),
)
{
bundle.collection.into_inner().drop(self.df);
drop(bundle.arranged);
}
// The automatically generated "drop glue" which recursively calls the destructors of all the fields (including the now empty `input_collection`)
}
}
@@ -84,6 +105,19 @@ impl<'referred, 'df> Context<'referred, 'df> {
self.local_scope.push(first);
}
}
pub fn insert_global_batch(&mut self, id: GlobalId, collection: CollectionBundle<Batch>) {
self.input_collection_batch.insert(id, collection);
}
pub fn insert_local_batch(&mut self, id: LocalId, collection: CollectionBundle<Batch>) {
if let Some(last) = self.local_scope_batch.last_mut() {
last.insert(id, collection);
} else {
let first = BTreeMap::from([(id, collection)]);
self.local_scope_batch.push(first);
}
}
}
impl<'referred, 'df> Context<'referred, 'df> {
@@ -91,14 +125,8 @@ impl<'referred, 'df> Context<'referred, 'df> {
pub fn render_plan_batch(&mut self, plan: TypedPlan) -> Result<CollectionBundle<Batch>, Error> {
match plan.plan {
Plan::Constant { rows } => Ok(self.render_constant_batch(rows)),
Plan::Get { .. } => NotImplementedSnafu {
reason: "Get is still WIP in batchmode",
}
.fail(),
Plan::Let { .. } => NotImplementedSnafu {
reason: "Let is still WIP in batchmode",
}
.fail(),
Plan::Get { id } => self.get_batch_by_id(id),
Plan::Let { id, value, body } => self.eval_batch_let(id, value, body),
Plan::Mfp { input, mfp } => self.render_mfp_batch(input, mfp),
Plan::Reduce {
input,
@@ -225,6 +253,32 @@ impl<'referred, 'df> Context<'referred, 'df> {
CollectionBundle::from_collection(Collection::from_port(recv_port))
}
pub fn get_batch_by_id(&mut self, id: expr::Id) -> Result<CollectionBundle<Batch>, Error> {
let ret = match id {
expr::Id::Local(local) => {
let bundle = self
.local_scope_batch
.iter()
.rev()
.find_map(|scope| scope.get(&local))
.with_context(|| InvalidQuerySnafu {
reason: format!("Local variable {:?} not found", local),
})?;
bundle.clone(self.df)
}
expr::Id::Global(id) => {
let bundle =
self.input_collection_batch
.get(&id)
.with_context(|| InvalidQuerySnafu {
reason: format!("Collection {:?} not found", id),
})?;
bundle.clone(self.df)
}
};
Ok(ret)
}
pub fn get_by_id(&mut self, id: expr::Id) -> Result<CollectionBundle, Error> {
let ret = match id {
expr::Id::Local(local) => {
@@ -251,6 +305,21 @@ impl<'referred, 'df> Context<'referred, 'df> {
Ok(ret)
}
/// Eval `Let` operator, useful for assigning a value to a local variable
pub fn eval_batch_let(
&mut self,
id: LocalId,
value: Box<TypedPlan>,
body: Box<TypedPlan>,
) -> Result<CollectionBundle<Batch>, Error> {
let value = self.render_plan_batch(*value)?;
self.local_scope_batch.push(Default::default());
self.insert_local_batch(id, value);
let ret = self.render_plan_batch(*body)?;
Ok(ret)
}
/// Eval `Let` operator, useful for assigning a value to a local variable
pub fn eval_let(
&mut self,
@@ -268,11 +337,11 @@ impl<'referred, 'df> Context<'referred, 'df> {
}
/// The Common argument for all `Subgraph` in the render process
struct SubgraphArg<'a> {
struct SubgraphArg<'a, T = Toff> {
now: repr::Timestamp,
err_collector: &'a ErrCollector,
scheduler: &'a Scheduler,
send: &'a PortCtx<SEND, Toff>,
send: &'a PortCtx<SEND, T>,
}
#[cfg(test)]
@@ -345,6 +414,8 @@ mod test {
compute_state: state,
input_collection: BTreeMap::new(),
local_scope: Default::default(),
input_collection_batch: BTreeMap::new(),
local_scope_batch: Default::default(),
err_collector,
}
}

View File

@@ -12,14 +12,15 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::BTreeMap;
use std::collections::{BTreeMap, BTreeSet};
use std::ops::Range;
use std::sync::Arc;
use common_telemetry::trace;
use datatypes::data_type::ConcreteDataType;
use datatypes::prelude::DataType;
use datatypes::value::{ListValue, Value};
use datatypes::vectors::NullVector;
use datatypes::vectors::{BooleanVector, NullVector};
use hydroflow::scheduled::graph_ext::GraphExt;
use itertools::Itertools;
use snafu::{ensure, OptionExt, ResultExt};
@@ -27,8 +28,8 @@ use snafu::{ensure, OptionExt, ResultExt};
use crate::compute::render::{Context, SubgraphArg};
use crate::compute::types::{Arranged, Collection, CollectionBundle, ErrCollector, Toff};
use crate::error::{Error, NotImplementedSnafu, PlanSnafu};
use crate::expr::error::{DataAlreadyExpiredSnafu, DataTypeSnafu, InternalSnafu};
use crate::expr::{Batch, EvalError, ScalarExpr};
use crate::expr::error::{ArrowSnafu, DataAlreadyExpiredSnafu, DataTypeSnafu, InternalSnafu};
use crate::expr::{Accum, Accumulator, Batch, EvalError, ScalarExpr, VectorDiff};
use crate::plan::{AccumulablePlan, AggrWithIndex, KeyValPlan, ReducePlan, TypedPlan};
use crate::repr::{self, DiffRow, KeyValDiffRow, RelationType, Row};
use crate::utils::{ArrangeHandler, ArrangeReader, ArrangeWriter, KeyExpiryManager};
@@ -93,152 +94,39 @@ impl<'referred, 'df> Context<'referred, 'df> {
// TODO(discord9): better way to schedule future run
let scheduler = self.compute_state.get_scheduler();
let scheduler_inner = scheduler.clone();
let (out_send_port, out_recv_port) =
self.df.make_edge::<_, Toff<Batch>>(Self::REDUCE_BATCH);
let subgraph =
self.df.add_subgraph_in_out(
Self::REDUCE_BATCH,
input.collection.into_inner(),
out_send_port,
move |_ctx, recv, send| {
let now = *(now.borrow());
let arrange = arrange_handler_inner.clone();
// mfp only need to passively receive updates from recvs
let src_data = recv
.take_inner()
.into_iter()
.flat_map(|v| v.into_iter())
.collect_vec();
let subgraph = self.df.add_subgraph_in_out(
Self::REDUCE_BATCH,
input.collection.into_inner(),
out_send_port,
move |_ctx, recv, send| {
let now = *(now.borrow());
let arrange = arrange_handler_inner.clone();
// mfp only need to passively receive updates from recvs
let src_data = recv
.take_inner()
.into_iter()
.flat_map(|v| v.into_iter())
.collect_vec();
let mut key_to_many_vals = BTreeMap::<Row, Batch>::new();
for batch in src_data {
err_collector.run(|| {
let (key_batch, val_batch) =
batch_split_by_key_val(&batch, &key_val_plan, &err_collector);
ensure!(
key_batch.row_count() == val_batch.row_count(),
InternalSnafu {
reason: format!(
"Key and val batch should have the same row count, found {} and {}",
key_batch.row_count(),
val_batch.row_count()
)
}
);
for row_idx in 0..key_batch.row_count() {
let key_row = key_batch.get_row(row_idx).unwrap();
let val_row = val_batch.slice(row_idx, 1)?;
let val_batch =
key_to_many_vals.entry(Row::new(key_row)).or_default();
val_batch.append_batch(val_row)?;
}
Ok(())
});
}
// write lock the arrange for the rest of the function body
// to prevent wired race condition
let mut arrange = arrange.write();
let mut all_arrange_updates = Vec::with_capacity(key_to_many_vals.len());
let mut all_output_rows = Vec::with_capacity(key_to_many_vals.len());
for (key, val_batch) in key_to_many_vals {
err_collector.run(|| -> Result<(), _> {
let (accums, _, _) = arrange.get(now, &key).unwrap_or_default();
let accum_list = from_accum_values_to_live_accums(
accums.unpack(),
accum_plan.simple_aggrs.len(),
)?;
let mut accum_output = AccumOutput::new();
for AggrWithIndex {
expr,
input_idx,
output_idx,
} in accum_plan.simple_aggrs.iter()
{
let cur_old_accum = accum_list.get(*output_idx).cloned().unwrap_or_default();
// if batch is empty, input null instead
let cur_input = val_batch.batch().get(*input_idx).cloned().unwrap_or_else(||Arc::new(NullVector::new(val_batch.row_count())));
let (output, new_accum) =
expr.func.eval_batch(cur_old_accum, cur_input, None)?;
accum_output.insert_accum(*output_idx, new_accum);
accum_output.insert_output(*output_idx, output);
}
let (new_accums, res_val_row) = accum_output.into_accum_output()?;
let arrange_update = ((key.clone(), Row::new(new_accums)), now, 1);
all_arrange_updates.push(arrange_update);
let mut key_val = key;
key_val.extend(res_val_row);
all_output_rows.push((key_val, now, 1));
Ok(())
});
}
err_collector.run(|| {
arrange.apply_updates(now, all_arrange_updates)?;
arrange.compact_to(now)
});
// this output part is not supposed to be resource intensive
// (because for every batch there wouldn't usually be as many output row?),
// so we can do some costly operation here
let output_types = all_output_rows.first().map(|(row, _, _)| {
row.iter()
.map(|v| v.data_type())
.collect::<Vec<ConcreteDataType>>()
});
if let Some(output_types) = output_types {
err_collector.run(|| {
let column_cnt = output_types.len();
let row_cnt = all_output_rows.len();
let mut output_builder = output_types
.into_iter()
.map(|t| t.create_mutable_vector(row_cnt))
.collect_vec();
for (row, _, _) in all_output_rows {
for (i, v) in row.into_iter().enumerate() {
output_builder
.get_mut(i)
.context(InternalSnafu{
reason: format!(
"Output builder should have the same length as the row, expected at most {} but got {}",
column_cnt-1,
i
)
})?
.try_push_value_ref(v.as_value_ref())
.context(DataTypeSnafu {
msg: "Failed to push value",
})?;
}
}
let output_columns = output_builder
.into_iter()
.map(|mut b| b.to_vector())
.collect_vec();
let output_batch = Batch::try_new(output_columns, row_cnt)?;
send.give(vec![output_batch]);
Ok(())
});
}
},
);
reduce_batch_subgraph(
&arrange,
src_data,
&key_val_plan,
&accum_plan,
SubgraphArg {
now,
err_collector: &err_collector,
scheduler: &scheduler_inner,
send,
},
)
},
);
scheduler.set_cur_subgraph(subgraph);
@@ -461,6 +349,245 @@ fn split_rows_to_key_val(
)
}
fn reduce_batch_subgraph(
arrange: &ArrangeHandler,
src_data: impl IntoIterator<Item = Batch>,
key_val_plan: &KeyValPlan,
accum_plan: &AccumulablePlan,
SubgraphArg {
now,
err_collector,
scheduler: _,
send,
}: SubgraphArg<Toff<Batch>>,
) {
let mut key_to_many_vals = BTreeMap::<Row, Vec<Batch>>::new();
let mut input_row_count = 0;
let mut input_batch_count = 0;
for batch in src_data {
input_batch_count += 1;
input_row_count += batch.row_count();
err_collector.run(|| {
let (key_batch, val_batch) =
batch_split_by_key_val(&batch, key_val_plan, err_collector);
ensure!(
key_batch.row_count() == val_batch.row_count(),
InternalSnafu {
reason: format!(
"Key and val batch should have the same row count, found {} and {}",
key_batch.row_count(),
val_batch.row_count()
)
}
);
let mut distinct_keys = BTreeSet::new();
for row_idx in 0..key_batch.row_count() {
let key_row = key_batch.get_row(row_idx)?;
let key_row = Row::new(key_row);
if distinct_keys.contains(&key_row) {
continue;
} else {
distinct_keys.insert(key_row.clone());
}
}
// TODO: here reduce numbers of eq to minimal by keeping slicing key/val batch
for key_row in distinct_keys {
let key_scalar_value = {
let mut key_scalar_value = Vec::with_capacity(key_row.len());
for key in key_row.iter() {
let v =
key.try_to_scalar_value(&key.data_type())
.context(DataTypeSnafu {
msg: "can't convert key values to datafusion value",
})?;
let arrow_value =
v.to_scalar().context(crate::expr::error::DatafusionSnafu {
context: "can't convert key values to arrow value",
})?;
key_scalar_value.push(arrow_value);
}
key_scalar_value
};
// first compute equal from separate columns
let eq_results = key_scalar_value
.into_iter()
.zip(key_batch.batch().iter())
.map(|(key, col)| {
// TODO(discord9): this takes half of the cpu! And this is redundant amount of `eq`!
arrow::compute::kernels::cmp::eq(&key, &col.to_arrow_array().as_ref() as _)
})
.try_collect::<_, Vec<_>, _>()
.context(ArrowSnafu {
context: "Failed to compare key values",
})?;
// then combine all equal results to finally found equal key rows
let opt_eq_mask = eq_results
.into_iter()
.fold(None, |acc, v| match acc {
Some(Ok(acc)) => Some(arrow::compute::kernels::boolean::and(&acc, &v)),
Some(Err(_)) => acc,
None => Some(Ok(v)),
})
.transpose()
.context(ArrowSnafu {
context: "Failed to combine key comparison results",
})?;
let key_eq_mask = if let Some(eq_mask) = opt_eq_mask {
BooleanVector::from(eq_mask)
} else {
// if None, meaning key_batch's column number is zero, which means
// the key is empty, so we just return a mask of all true
// meaning taking all values
BooleanVector::from(vec![true; key_batch.row_count()])
};
// TODO: both slice and mutate remaining batch
let cur_val_batch = val_batch.filter(&key_eq_mask)?;
key_to_many_vals
.entry(key_row)
.or_default()
.push(cur_val_batch);
}
Ok(())
});
}
trace!(
"Reduce take {} batches, {} rows",
input_batch_count,
input_row_count
);
// write lock the arrange for the rest of the function body
// to prevent wired race condition
let mut arrange = arrange.write();
let mut all_arrange_updates = Vec::with_capacity(key_to_many_vals.len());
let mut all_output_dict = BTreeMap::new();
for (key, val_batches) in key_to_many_vals {
err_collector.run(|| -> Result<(), _> {
let (accums, _, _) = arrange.get(now, &key).unwrap_or_default();
let accum_list =
from_accum_values_to_live_accums(accums.unpack(), accum_plan.simple_aggrs.len())?;
let mut accum_output = AccumOutput::new();
for AggrWithIndex {
expr,
input_idx,
output_idx,
} in accum_plan.simple_aggrs.iter()
{
let cur_accum_value = accum_list.get(*output_idx).cloned().unwrap_or_default();
let mut cur_accum = if cur_accum_value.is_empty() {
Accum::new_accum(&expr.func.clone())?
} else {
Accum::try_into_accum(&expr.func, cur_accum_value)?
};
for val_batch in val_batches.iter() {
// if batch is empty, input null instead
let cur_input = val_batch
.batch()
.get(*input_idx)
.cloned()
.unwrap_or_else(|| Arc::new(NullVector::new(val_batch.row_count())));
let len = cur_input.len();
cur_accum.update_batch(&expr.func, VectorDiff::from(cur_input))?;
trace!("Reduce accum after take {} rows: {:?}", len, cur_accum);
}
let final_output = cur_accum.eval(&expr.func)?;
trace!("Reduce accum final output: {:?}", final_output);
accum_output.insert_output(*output_idx, final_output);
let cur_accum_value = cur_accum.into_state();
accum_output.insert_accum(*output_idx, cur_accum_value);
}
let (new_accums, res_val_row) = accum_output.into_accum_output()?;
let arrange_update = ((key.clone(), Row::new(new_accums)), now, 1);
all_arrange_updates.push(arrange_update);
all_output_dict.insert(key, Row::from(res_val_row));
Ok(())
});
}
err_collector.run(|| {
arrange.apply_updates(now, all_arrange_updates)?;
arrange.compact_to(now)
});
// release the lock
drop(arrange);
// this output part is not supposed to be resource intensive
// (because for every batch there wouldn't usually be as many output row?),
// so we can do some costly operation here
let output_types = all_output_dict.first_entry().map(|entry| {
entry
.key()
.iter()
.chain(entry.get().iter())
.map(|v| v.data_type())
.collect::<Vec<ConcreteDataType>>()
});
if let Some(output_types) = output_types {
err_collector.run(|| {
let column_cnt = output_types.len();
let row_cnt = all_output_dict.len();
let mut output_builder = output_types
.into_iter()
.map(|t| t.create_mutable_vector(row_cnt))
.collect_vec();
for (key, val) in all_output_dict {
for (i, v) in key.into_iter().chain(val.into_iter()).enumerate() {
output_builder
.get_mut(i)
.context(InternalSnafu{
reason: format!(
"Output builder should have the same length as the row, expected at most {} but got {}",
column_cnt - 1,
i
)
})?
.try_push_value_ref(v.as_value_ref())
.context(DataTypeSnafu {
msg: "Failed to push value",
})?;
}
}
let output_columns = output_builder
.into_iter()
.map(|mut b| b.to_vector())
.collect_vec();
let output_batch = Batch::try_new(output_columns, row_cnt)?;
trace!("Reduce output batch: {:?}", output_batch);
send.give(vec![output_batch]);
Ok(())
});
}
}
/// reduce subgraph, reduce the input data into a single row
/// output is concat from key and val
fn reduce_subgraph(
@@ -856,6 +983,9 @@ impl AccumOutput {
/// return (accums, output)
fn into_accum_output(self) -> Result<(Vec<Value>, Vec<Value>), EvalError> {
if self.accum.is_empty() && self.output.is_empty() {
return Ok((vec![], vec![]));
}
ensure!(
!self.accum.is_empty() && self.accum.len() == self.output.len(),
InternalSnafu {

View File

@@ -16,7 +16,7 @@
use std::collections::{BTreeMap, VecDeque};
use common_telemetry::debug;
use common_telemetry::{debug, trace};
use hydroflow::scheduled::graph_ext::GraphExt;
use itertools::Itertools;
use snafu::OptionExt;
@@ -48,10 +48,13 @@ impl<'referred, 'df> Context<'referred, 'df> {
let sub = self
.df
.add_subgraph_source("source_batch", send_port, move |_ctx, send| {
let mut total_batches = vec![];
let mut total_row_count = 0;
loop {
match src_recv.try_recv() {
Ok(batch) => {
send.give(vec![batch]);
total_row_count += batch.row_count();
total_batches.push(batch);
}
Err(TryRecvError::Empty) => {
break;
@@ -78,6 +81,13 @@ impl<'referred, 'df> Context<'referred, 'df> {
}
}
trace!(
"Send {} rows in {} batches",
total_row_count,
total_batches.len()
);
send.give(total_batches);
let now = *now.borrow();
// always schedule source to run at now so we can
// repeatedly run source if needed
@@ -185,13 +195,18 @@ impl<'referred, 'df> Context<'referred, 'df> {
collection.into_inner(),
move |_ctx, recv| {
let data = recv.take_inner();
let mut row_count = 0;
let mut batch_count = 0;
for batch in data.into_iter().flat_map(|i| i.into_iter()) {
row_count += batch.row_count();
batch_count += 1;
// if the sender is closed unexpectedly, stop sending
if sender.is_closed() || sender.send(batch).is_err() {
common_telemetry::error!("UnboundedSinkBatch is closed");
break;
}
}
trace!("sink send {} rows in {} batches", row_count, batch_count);
},
);
}

View File

@@ -24,7 +24,7 @@ use hydroflow::scheduled::SubgraphId;
use itertools::Itertools;
use tokio::sync::Mutex;
use crate::expr::{EvalError, ScalarExpr};
use crate::expr::{Batch, EvalError, ScalarExpr};
use crate::repr::DiffRow;
use crate::utils::ArrangeHandler;
@@ -123,6 +123,38 @@ pub struct CollectionBundle<T: 'static = DiffRow> {
pub arranged: BTreeMap<Vec<ScalarExpr>, Arranged>,
}
pub trait GenericBundle {
fn is_batch(&self) -> bool;
fn try_as_batch(&self) -> Option<&CollectionBundle<Batch>> {
None
}
fn try_as_row(&self) -> Option<&CollectionBundle<DiffRow>> {
None
}
}
impl GenericBundle for CollectionBundle<Batch> {
fn is_batch(&self) -> bool {
true
}
fn try_as_batch(&self) -> Option<&CollectionBundle<Batch>> {
Some(self)
}
}
impl GenericBundle for CollectionBundle<DiffRow> {
fn is_batch(&self) -> bool {
false
}
fn try_as_row(&self) -> Option<&CollectionBundle<DiffRow>> {
Some(self)
}
}
impl<T: 'static> CollectionBundle<T> {
pub fn from_collection(collection: Collection<T>) -> Self {
Self {

View File

@@ -16,27 +16,29 @@
mod df_func;
pub(crate) mod error;
mod func;
pub(crate) mod func;
mod id;
mod linear;
mod relation;
pub(crate) mod relation;
mod scalar;
mod signature;
use arrow::compute::FilterBuilder;
use datatypes::prelude::DataType;
use datatypes::value::Value;
use datatypes::vectors::VectorRef;
use datatypes::vectors::{BooleanVector, Helper, VectorRef};
pub(crate) use df_func::{DfScalarFunction, RawDfScalarFn};
pub(crate) use error::{EvalError, InvalidArgumentSnafu};
pub(crate) use func::{BinaryFunc, UnaryFunc, UnmaterializableFunc, VariadicFunc};
pub(crate) use id::{GlobalId, Id, LocalId};
use itertools::Itertools;
pub(crate) use linear::{MapFilterProject, MfpPlan, SafeMfpPlan};
pub(crate) use relation::{AggregateExpr, AggregateFunc};
pub(crate) use relation::{Accum, Accumulator, AggregateExpr, AggregateFunc};
pub(crate) use scalar::{ScalarExpr, TypedExpr};
use snafu::{ensure, ResultExt};
use crate::expr::error::DataTypeSnafu;
use crate::expr::error::{ArrowSnafu, DataTypeSnafu};
use crate::repr::Diff;
pub const TUMBLE_START: &str = "tumble_start";
pub const TUMBLE_END: &str = "tumble_end";
@@ -179,7 +181,9 @@ impl Batch {
)
}
);
Ok(self.batch.iter().map(|v| v.get(idx)).collect_vec())
let mut ret = Vec::with_capacity(self.column_count());
ret.extend(self.batch.iter().map(|v| v.get(idx)));
Ok(ret)
}
/// Slices the `Batch`, returning a new `Batch`.
@@ -248,4 +252,97 @@ impl Batch {
self.row_count = self_row_count + other_row_count;
Ok(())
}
/// filter the batch with given predicate
pub fn filter(&self, predicate: &BooleanVector) -> Result<Self, EvalError> {
let len = predicate.as_boolean_array().true_count();
let filter_builder = FilterBuilder::new(predicate.as_boolean_array()).optimize();
let filter_pred = filter_builder.build();
let filtered = self
.batch()
.iter()
.map(|col| filter_pred.filter(col.to_arrow_array().as_ref()))
.try_collect::<_, Vec<_>, _>()
.context(ArrowSnafu {
context: "Failed to filter val batches",
})?;
let res_vector = Helper::try_into_vectors(&filtered).context(DataTypeSnafu {
msg: "can't convert arrow array to vector",
})?;
Self::try_new(res_vector, len)
}
}
/// Vector with diff to note the insert and delete
pub(crate) struct VectorDiff {
vector: VectorRef,
diff: Option<VectorRef>,
}
impl From<VectorRef> for VectorDiff {
fn from(vector: VectorRef) -> Self {
Self { vector, diff: None }
}
}
impl VectorDiff {
fn len(&self) -> usize {
self.vector.len()
}
fn try_new(vector: VectorRef, diff: Option<VectorRef>) -> Result<Self, EvalError> {
ensure!(
diff.as_ref()
.map_or(true, |diff| diff.len() == vector.len()),
InvalidArgumentSnafu {
reason: "Length of vector and diff should be the same"
}
);
Ok(Self { vector, diff })
}
}
impl IntoIterator for VectorDiff {
type Item = (Value, Diff);
type IntoIter = VectorDiffIter;
fn into_iter(self) -> Self::IntoIter {
VectorDiffIter {
vector: self.vector,
diff: self.diff,
idx: 0,
}
}
}
/// iterator for VectorDiff
pub(crate) struct VectorDiffIter {
vector: VectorRef,
diff: Option<VectorRef>,
idx: usize,
}
impl std::iter::Iterator for VectorDiffIter {
type Item = (Value, Diff);
fn next(&mut self) -> Option<Self::Item> {
if self.idx >= self.vector.len() {
return None;
}
let value = self.vector.get(self.idx);
// +1 means insert, -1 means delete, and default to +1 insert when diff is not provided
let diff = if let Some(diff) = self.diff.as_ref() {
if let Ok(diff_at) = diff.get(self.idx).try_into() {
diff_at
} else {
common_telemetry::warn!("Invalid diff value at index {}", self.idx);
return None;
}
} else {
1
};
self.idx += 1;
Some((value, diff))
}
}

View File

@@ -92,12 +92,8 @@ impl DfScalarFunction {
let len = rb.num_rows();
let res = self.fn_impl.evaluate(&rb).map_err(|err| {
EvalDatafusionSnafu {
raw: err,
context: "Failed to evaluate datafusion scalar function",
}
.build()
let res = self.fn_impl.evaluate(&rb).context(EvalDatafusionSnafu {
context: "Failed to evaluate datafusion scalar function",
})?;
let res = common_query::columnar_value::ColumnarValue::try_from(&res)
.map_err(BoxedError::new)
@@ -157,12 +153,8 @@ impl DfScalarFunction {
.into_error(err)
})?;
let res = self.fn_impl.evaluate(&rb).map_err(|err| {
EvalDatafusionSnafu {
raw: err,
context: "Failed to evaluate datafusion scalar function",
}
.build()
let res = self.fn_impl.evaluate(&rb).context(EvalDatafusionSnafu {
context: "Failed to evaluate datafusion scalar function",
})?;
let res = common_query::columnar_value::ColumnarValue::try_from(&res)
.map_err(BoxedError::new)

View File

@@ -106,18 +106,19 @@ pub enum EvalError {
location: Location,
},
#[snafu(display("Arrow error: {raw:?}, context: {context}"))]
#[snafu(display("Arrow error: {error:?}, context: {context}"))]
Arrow {
#[snafu(source)]
raw: ArrowError,
error: ArrowError,
context: String,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("DataFusion error: {raw:?}, context: {context}"))]
#[snafu(display("DataFusion error: {error:?}, context: {context}"))]
Datafusion {
raw: DataFusionError,
#[snafu(source)]
error: DataFusionError,
context: String,
#[snafu(implicit)]
location: Location,

View File

@@ -967,7 +967,7 @@ impl BinaryFunc {
| Self::DivUInt32
| Self::DivUInt64
| Self::DivFloat32
| Self::DivFloat64 => arrow::compute::kernels::numeric::mul(&left, &right)
| Self::DivFloat64 => arrow::compute::kernels::numeric::div(&left, &right)
.context(ArrowSnafu { context: "div" })?,
Self::ModInt16
@@ -1280,119 +1280,183 @@ where
Ok(Value::from(left % right))
}
#[test]
fn test_num_ops() {
let left = Value::from(10);
let right = Value::from(3);
let res = add::<i32>(left.clone(), right.clone()).unwrap();
assert_eq!(res, Value::from(13));
let res = sub::<i32>(left.clone(), right.clone()).unwrap();
assert_eq!(res, Value::from(7));
let res = mul::<i32>(left.clone(), right.clone()).unwrap();
assert_eq!(res, Value::from(30));
let res = div::<i32>(left.clone(), right.clone()).unwrap();
assert_eq!(res, Value::from(3));
let res = rem::<i32>(left, right).unwrap();
assert_eq!(res, Value::from(1));
#[cfg(test)]
mod test {
use std::sync::Arc;
let values = vec![Value::from(true), Value::from(false)];
let exprs = vec![ScalarExpr::Column(0), ScalarExpr::Column(1)];
let res = and(&values, &exprs).unwrap();
assert_eq!(res, Value::from(false));
let res = or(&values, &exprs).unwrap();
assert_eq!(res, Value::from(true));
}
use common_time::Interval;
use datatypes::vectors::Vector;
use pretty_assertions::assert_eq;
/// test if the binary function specialization works
/// whether from direct type or from the expression that is literal
#[test]
fn test_binary_func_spec() {
assert_eq!(
BinaryFunc::from_str_expr_and_type(
"add",
&[ScalarExpr::Column(0), ScalarExpr::Column(0)],
&[
Some(ConcreteDataType::int32_datatype()),
Some(ConcreteDataType::int32_datatype())
]
)
.unwrap(),
(BinaryFunc::AddInt32, BinaryFunc::AddInt32.signature())
);
use super::*;
assert_eq!(
BinaryFunc::from_str_expr_and_type(
"add",
&[ScalarExpr::Column(0), ScalarExpr::Column(0)],
&[Some(ConcreteDataType::int32_datatype()), None]
)
.unwrap(),
(BinaryFunc::AddInt32, BinaryFunc::AddInt32.signature())
);
#[test]
fn test_tumble_batch() {
let datetime_vector = DateTimeVector::from_vec(vec![1, 2, 10, 13, 14, 20, 25]);
let tumble_start = UnaryFunc::TumbleWindowFloor {
window_size: Interval::from_day_time(0, 10),
start_time: None,
};
let tumble_end = UnaryFunc::TumbleWindowCeiling {
window_size: Interval::from_day_time(0, 10),
start_time: None,
};
assert_eq!(
BinaryFunc::from_str_expr_and_type(
"add",
&[ScalarExpr::Column(0), ScalarExpr::Column(0)],
&[Some(ConcreteDataType::int32_datatype()), None]
)
.unwrap(),
(BinaryFunc::AddInt32, BinaryFunc::AddInt32.signature())
);
let len = datetime_vector.len();
let batch = Batch::try_new(vec![Arc::new(datetime_vector)], len).unwrap();
let arg = ScalarExpr::Column(0);
assert_eq!(
BinaryFunc::from_str_expr_and_type(
"add",
&[ScalarExpr::Column(0), ScalarExpr::Column(0)],
&[Some(ConcreteDataType::int32_datatype()), None]
)
.unwrap(),
(BinaryFunc::AddInt32, BinaryFunc::AddInt32.signature())
);
let start = tumble_start.eval_batch(&batch, &arg).unwrap();
let end = tumble_end.eval_batch(&batch, &arg).unwrap();
assert_eq!(
start.to_arrow_array().as_ref(),
TimestampMillisecondVector::from_vec(vec![0, 0, 10, 10, 10, 20, 20])
.to_arrow_array()
.as_ref()
);
assert_eq!(
BinaryFunc::from_str_expr_and_type(
"add",
&[
ScalarExpr::Literal(Value::from(1i32), ConcreteDataType::int32_datatype()),
ScalarExpr::Column(0)
],
&[None, None]
)
.unwrap(),
(BinaryFunc::AddInt32, BinaryFunc::AddInt32.signature())
);
assert_eq!(
end.to_arrow_array().as_ref(),
TimestampMillisecondVector::from_vec(vec![10, 10, 20, 20, 20, 30, 30])
.to_arrow_array()
.as_ref()
);
// this testcase make sure the specialization can find actual type from expression and fill in signature
assert_eq!(
BinaryFunc::from_str_expr_and_type(
"equal",
&[
ScalarExpr::Literal(Value::from(1i32), ConcreteDataType::int32_datatype()),
ScalarExpr::Column(0)
],
&[None, None]
)
.unwrap(),
(
BinaryFunc::Eq,
Signature {
input: smallvec![
ConcreteDataType::int32_datatype(),
ConcreteDataType::int32_datatype()
let ts_ms_vector = TimestampMillisecondVector::from_vec(vec![1, 2, 10, 13, 14, 20, 25]);
let batch = Batch::try_new(vec![Arc::new(ts_ms_vector)], len).unwrap();
let start = tumble_start.eval_batch(&batch, &arg).unwrap();
let end = tumble_end.eval_batch(&batch, &arg).unwrap();
assert_eq!(
start.to_arrow_array().as_ref(),
TimestampMillisecondVector::from_vec(vec![0, 0, 10, 10, 10, 20, 20])
.to_arrow_array()
.as_ref()
);
assert_eq!(
end.to_arrow_array().as_ref(),
TimestampMillisecondVector::from_vec(vec![10, 10, 20, 20, 20, 30, 30])
.to_arrow_array()
.as_ref()
);
}
#[test]
fn test_num_ops() {
let left = Value::from(10);
let right = Value::from(3);
let res = add::<i32>(left.clone(), right.clone()).unwrap();
assert_eq!(res, Value::from(13));
let res = sub::<i32>(left.clone(), right.clone()).unwrap();
assert_eq!(res, Value::from(7));
let res = mul::<i32>(left.clone(), right.clone()).unwrap();
assert_eq!(res, Value::from(30));
let res = div::<i32>(left.clone(), right.clone()).unwrap();
assert_eq!(res, Value::from(3));
let res = rem::<i32>(left, right).unwrap();
assert_eq!(res, Value::from(1));
let values = vec![Value::from(true), Value::from(false)];
let exprs = vec![ScalarExpr::Column(0), ScalarExpr::Column(1)];
let res = and(&values, &exprs).unwrap();
assert_eq!(res, Value::from(false));
let res = or(&values, &exprs).unwrap();
assert_eq!(res, Value::from(true));
}
/// test if the binary function specialization works
/// whether from direct type or from the expression that is literal
#[test]
fn test_binary_func_spec() {
assert_eq!(
BinaryFunc::from_str_expr_and_type(
"add",
&[ScalarExpr::Column(0), ScalarExpr::Column(0)],
&[
Some(ConcreteDataType::int32_datatype()),
Some(ConcreteDataType::int32_datatype())
]
)
.unwrap(),
(BinaryFunc::AddInt32, BinaryFunc::AddInt32.signature())
);
assert_eq!(
BinaryFunc::from_str_expr_and_type(
"add",
&[ScalarExpr::Column(0), ScalarExpr::Column(0)],
&[Some(ConcreteDataType::int32_datatype()), None]
)
.unwrap(),
(BinaryFunc::AddInt32, BinaryFunc::AddInt32.signature())
);
assert_eq!(
BinaryFunc::from_str_expr_and_type(
"add",
&[ScalarExpr::Column(0), ScalarExpr::Column(0)],
&[Some(ConcreteDataType::int32_datatype()), None]
)
.unwrap(),
(BinaryFunc::AddInt32, BinaryFunc::AddInt32.signature())
);
assert_eq!(
BinaryFunc::from_str_expr_and_type(
"add",
&[ScalarExpr::Column(0), ScalarExpr::Column(0)],
&[Some(ConcreteDataType::int32_datatype()), None]
)
.unwrap(),
(BinaryFunc::AddInt32, BinaryFunc::AddInt32.signature())
);
assert_eq!(
BinaryFunc::from_str_expr_and_type(
"add",
&[
ScalarExpr::Literal(Value::from(1i32), ConcreteDataType::int32_datatype()),
ScalarExpr::Column(0)
],
output: ConcreteDataType::boolean_datatype(),
generic_fn: GenericFn::Eq
}
)
);
&[None, None]
)
.unwrap(),
(BinaryFunc::AddInt32, BinaryFunc::AddInt32.signature())
);
matches!(
BinaryFunc::from_str_expr_and_type(
"add",
&[ScalarExpr::Column(0), ScalarExpr::Column(0)],
&[None, None]
),
Err(Error::InvalidQuery { .. })
);
// this testcase make sure the specialization can find actual type from expression and fill in signature
assert_eq!(
BinaryFunc::from_str_expr_and_type(
"equal",
&[
ScalarExpr::Literal(Value::from(1i32), ConcreteDataType::int32_datatype()),
ScalarExpr::Column(0)
],
&[None, None]
)
.unwrap(),
(
BinaryFunc::Eq,
Signature {
input: smallvec![
ConcreteDataType::int32_datatype(),
ConcreteDataType::int32_datatype()
],
output: ConcreteDataType::boolean_datatype(),
generic_fn: GenericFn::Eq
}
)
);
matches!(
BinaryFunc::from_str_expr_and_type(
"add",
&[ScalarExpr::Column(0), ScalarExpr::Column(0)],
&[None, None]
),
Err(Error::InvalidQuery { .. })
);
}
}

View File

@@ -17,8 +17,9 @@
use std::collections::{BTreeMap, BTreeSet};
use arrow::array::BooleanArray;
use arrow::buffer::BooleanBuffer;
use arrow::compute::FilterBuilder;
use common_telemetry::debug;
use common_telemetry::trace;
use datatypes::prelude::ConcreteDataType;
use datatypes::value::Value;
use datatypes::vectors::{BooleanVector, Helper};
@@ -500,7 +501,7 @@ impl SafeMfpPlan {
for col in batch.batch() {
let filtered = pred
.filter(col.to_arrow_array().as_ref())
.context(ArrowSnafu {
.with_context(|_| ArrowSnafu {
context: format!("failed to filter column for mfp operator {:?}", self),
})?;
result.push(Helper::try_into_vector(filtered).context(DataTypeSnafu {
@@ -523,7 +524,9 @@ impl SafeMfpPlan {
// mark the columns that have been evaluated and appended to the `batch`
let mut expression = 0;
// preds default to true and will be updated as we evaluate each predicate
let mut all_preds = BooleanVector::from(vec![Some(true); batch.row_count()]);
let buf = BooleanBuffer::new_set(batch.row_count());
let arr = BooleanArray::new(buf, None);
let mut all_preds = BooleanVector::from(arr);
// to compute predicate, need to first compute all expressions used in predicates
for (support, predicate) in self.mfp.predicates.iter() {
@@ -793,7 +796,7 @@ impl MfpPlan {
if Some(lower_bound) != upper_bound && !null_eval {
if self.mfp.mfp.projection.iter().any(|c| values.len() <= *c) {
debug!("values={:?}, mfp={:?}", &values, &self.mfp.mfp);
trace!("values={:?}, mfp={:?}", &values, &self.mfp.mfp);
let err = InternalSnafu {
reason: format!(
"Index out of bound for mfp={:?} and values={:?}",

View File

@@ -14,6 +14,7 @@
//! Describes an aggregation function and it's input expression.
pub(crate) use accum::{Accum, Accumulator};
pub(crate) use func::AggregateFunc;
use crate::expr::ScalarExpr;

View File

@@ -21,14 +21,14 @@ use datatypes::value::Value;
use datatypes::vectors::VectorRef;
use serde::{Deserialize, Serialize};
use smallvec::smallvec;
use snafu::{ensure, IntoError, OptionExt};
use snafu::{IntoError, OptionExt};
use strum::{EnumIter, IntoEnumIterator};
use crate::error::{DatafusionSnafu, Error, InvalidQuerySnafu};
use crate::expr::error::EvalError;
use crate::expr::relation::accum::{Accum, Accumulator};
use crate::expr::signature::{GenericFn, Signature};
use crate::expr::InvalidArgumentSnafu;
use crate::expr::VectorDiff;
use crate::repr::Diff;
/// Aggregate functions that can be applied to a group of rows.
@@ -161,72 +161,6 @@ impl AggregateFunc {
}
}
struct VectorDiff {
vector: VectorRef,
diff: Option<VectorRef>,
}
impl VectorDiff {
fn len(&self) -> usize {
self.vector.len()
}
fn try_new(vector: VectorRef, diff: Option<VectorRef>) -> Result<Self, EvalError> {
ensure!(
diff.as_ref()
.map_or(true, |diff| diff.len() == vector.len()),
InvalidArgumentSnafu {
reason: "Length of vector and diff should be the same"
}
);
Ok(Self { vector, diff })
}
}
impl IntoIterator for VectorDiff {
type Item = (Value, Diff);
type IntoIter = VectorDiffIter;
fn into_iter(self) -> Self::IntoIter {
VectorDiffIter {
vector: self.vector,
diff: self.diff,
idx: 0,
}
}
}
struct VectorDiffIter {
vector: VectorRef,
diff: Option<VectorRef>,
idx: usize,
}
impl std::iter::Iterator for VectorDiffIter {
type Item = (Value, Diff);
fn next(&mut self) -> Option<Self::Item> {
if self.idx >= self.vector.len() {
return None;
}
let value = self.vector.get(self.idx);
// +1 means insert, -1 means delete, and default to +1 insert when diff is not provided
let diff = if let Some(diff) = self.diff.as_ref() {
if let Ok(diff_at) = diff.get(self.idx).try_into() {
diff_at
} else {
common_telemetry::warn!("Invalid diff value at index {}", self.idx);
return None;
}
} else {
1
};
self.idx += 1;
Some((value, diff))
}
}
/// Generate signature for each aggregate function
macro_rules! generate_signature {
($value:ident,

View File

@@ -16,17 +16,20 @@
use std::collections::{BTreeMap, BTreeSet};
use arrow::array::{make_array, ArrayData, ArrayRef};
use common_error::ext::BoxedError;
use datatypes::prelude::{ConcreteDataType, DataType};
use datatypes::value::Value;
use datatypes::vectors::{BooleanVector, Helper, NullVector, Vector, VectorRef};
use datatypes::vectors::{BooleanVector, Helper, VectorRef};
use hydroflow::lattices::cc_traits::Iter;
use itertools::Itertools;
use snafu::{ensure, OptionExt, ResultExt};
use crate::error::{
DatafusionSnafu, Error, InvalidQuerySnafu, UnexpectedSnafu, UnsupportedTemporalFilterSnafu,
};
use crate::expr::error::{
DataTypeSnafu, EvalError, InternalSnafu, InvalidArgumentSnafu, OptimizeSnafu, TypeMismatchSnafu,
ArrowSnafu, DataTypeSnafu, EvalError, InvalidArgumentSnafu, OptimizeSnafu, TypeMismatchSnafu,
};
use crate::expr::func::{BinaryFunc, UnaryFunc, UnmaterializableFunc, VariadicFunc};
use crate::expr::{Batch, DfScalarFunction};
@@ -222,6 +225,8 @@ impl ScalarExpr {
}
}
/// NOTE: this if then eval impl assume all given expr are pure, and will not change the state of the world
/// since it will evaluate both then and else branch and filter the result
fn eval_if_then(
batch: &Batch,
cond: &ScalarExpr,
@@ -240,130 +245,69 @@ impl ScalarExpr {
})?
.as_boolean_array();
let mut then_input_batch = None;
let mut else_input_batch = None;
let mut null_input_batch = None;
let indices = bool_conds
.into_iter()
.enumerate()
.map(|(idx, b)| {
(
match b {
Some(true) => 0, // then branch vector
Some(false) => 1, // else branch vector
None => 2, // null vector
},
idx,
)
})
.collect_vec();
// instructions for how to reassembly result vector,
// iterate over (type of vec, offset, length) and append to resulting vec
let mut assembly_idx = vec![];
let then_input_vec = then.eval_batch(batch)?;
let else_input_vec = els.eval_batch(batch)?;
// append batch, returning appended batch's slice in (offset, length)
fn append_batch(
batch: &mut Option<Batch>,
to_be_append: Batch,
) -> Result<(usize, usize), EvalError> {
let len = to_be_append.row_count();
if let Some(batch) = batch {
let offset = batch.row_count();
batch.append_batch(to_be_append)?;
Ok((offset, len))
} else {
*batch = Some(to_be_append);
Ok((0, len))
ensure!(
then_input_vec.data_type() == else_input_vec.data_type(),
TypeMismatchSnafu {
expected: then_input_vec.data_type(),
actual: else_input_vec.data_type(),
}
);
ensure!(
then_input_vec.len() == else_input_vec.len() && then_input_vec.len() == batch.row_count(),
InvalidArgumentSnafu {
reason: format!(
"then and else branch must have the same length(found {} and {}) which equals input batch's row count(which is {})",
then_input_vec.len(),
else_input_vec.len(),
batch.row_count()
)
}
);
fn new_nulls(dt: &arrow_schema::DataType, len: usize) -> ArrayRef {
let data = ArrayData::new_null(dt, len);
make_array(data)
}
let mut prev_cond: Option<Option<bool>> = None;
let mut prev_start_idx: Option<usize> = None;
// first put different conds' vector into different batches
for (idx, cond) in bool_conds.iter().enumerate() {
// if belong to same slice and not last one continue
if prev_cond == Some(cond) {
continue;
} else if let Some(prev_cond_idx) = prev_start_idx {
let prev_cond = prev_cond.unwrap();
let null_input_vec = new_nulls(
&then_input_vec.data_type().as_arrow_type(),
batch.row_count(),
);
// put a slice to corresponding batch
let slice_offset = prev_cond_idx;
let slice_length = idx - prev_cond_idx;
let to_be_append = batch.slice(slice_offset, slice_length)?;
let interleave_values = vec![
then_input_vec.to_arrow_array(),
else_input_vec.to_arrow_array(),
null_input_vec,
];
let int_ref: Vec<_> = interleave_values.iter().map(|x| x.as_ref()).collect();
let to_put_back = match prev_cond {
Some(true) => (
Some(true),
append_batch(&mut then_input_batch, to_be_append)?,
),
Some(false) => (
Some(false),
append_batch(&mut else_input_batch, to_be_append)?,
),
None => (None, append_batch(&mut null_input_batch, to_be_append)?),
};
assembly_idx.push(to_put_back);
}
prev_cond = Some(cond);
prev_start_idx = Some(idx);
}
// deal with empty and last slice case
if let Some(slice_offset) = prev_start_idx {
let prev_cond = prev_cond.unwrap();
let slice_length = bool_conds.len() - slice_offset;
let to_be_append = batch.slice(slice_offset, slice_length)?;
let to_put_back = match prev_cond {
Some(true) => (
Some(true),
append_batch(&mut then_input_batch, to_be_append)?,
),
Some(false) => (
Some(false),
append_batch(&mut else_input_batch, to_be_append)?,
),
None => (None, append_batch(&mut null_input_batch, to_be_append)?),
};
assembly_idx.push(to_put_back);
}
let then_output_vec = then_input_batch
.map(|batch| then.eval_batch(&batch))
.transpose()?;
let else_output_vec = else_input_batch
.map(|batch| els.eval_batch(&batch))
.transpose()?;
let null_output_vec = null_input_batch
.map(|null| NullVector::new(null.row_count()).slice(0, null.row_count()));
let dt = then_output_vec
.as_ref()
.map(|v| v.data_type())
.or(else_output_vec.as_ref().map(|v| v.data_type()))
.unwrap_or(ConcreteDataType::null_datatype());
let mut builder = dt.create_mutable_vector(conds.len());
for (cond, (offset, length)) in assembly_idx {
let slice = match cond {
Some(true) => then_output_vec.as_ref(),
Some(false) => else_output_vec.as_ref(),
None => null_output_vec.as_ref(),
}
.context(InternalSnafu {
reason: "Expect corresponding output vector to exist",
let interleave_res_arr =
arrow::compute::interleave(&int_ref, &indices).context(ArrowSnafu {
context: "Failed to interleave output arrays",
})?;
// TODO(discord9): seems `extend_slice_of` doesn't support NullVector or ConstantVector
// consider adding it maybe?
if slice.data_type().is_null() {
builder.push_nulls(length);
} else if slice.is_const() {
let arr = slice.slice(offset, length).to_arrow_array();
let vector = Helper::try_into_vector(arr).context(DataTypeSnafu {
msg: "Failed to convert arrow array to vector",
})?;
builder
.extend_slice_of(vector.as_ref(), 0, vector.len())
.context(DataTypeSnafu {
msg: "Failed to build result vector for if-then expression",
})?;
} else {
builder
.extend_slice_of(slice.as_ref(), offset, length)
.context(DataTypeSnafu {
msg: "Failed to build result vector for if-then expression",
})?;
}
}
let result_vec = builder.to_vector();
Ok(result_vec)
let res_vec = Helper::try_into_vector(interleave_res_arr).context(DataTypeSnafu {
msg: "Failed to convert arrow array to vector",
})?;
Ok(res_vec)
}
/// Eval this expression with the given values.
@@ -685,7 +629,7 @@ impl ScalarExpr {
#[cfg(test)]
mod test {
use datatypes::vectors::Int32Vector;
use datatypes::vectors::{Int32Vector, Vector};
use pretty_assertions::assert_eq;
use super::*;
@@ -781,7 +725,7 @@ mod test {
}
#[test]
fn test_eval_batch() {
fn test_eval_batch_if_then() {
// TODO(discord9): add more tests
{
let expr = ScalarExpr::If {
@@ -840,7 +784,7 @@ mod test {
let vectors = vec![Int32Vector::from(raw).slice(0, raw_len)];
let batch = Batch::try_new(vectors, raw_len).unwrap();
let expected = NullVector::new(raw_len).slice(0, raw_len);
let expected = Int32Vector::from(vec![]).slice(0, raw_len);
assert_eq!(expr.eval_batch(&batch).unwrap(), expected);
}
}

View File

@@ -50,12 +50,13 @@ pub type KeyValDiffRow = ((Row, Row), Timestamp, Diff);
/// broadcast channel capacity, can be important to memory consumption, since this influence how many
/// updates can be buffered in memory in the entire dataflow
/// TODO(discord9): add config for this, so cpu&mem usage can be balanced and configured by this
pub const BROADCAST_CAP: usize = 65535;
pub const BROADCAST_CAP: usize = 1024;
/// The maximum capacity of the send buffer, to prevent the buffer from growing too large
pub const SEND_BUF_CAP: usize = BROADCAST_CAP * 2;
pub const BATCH_SIZE: usize = BROADCAST_CAP / 2;
/// Flow worker will try to at least accumulate this many rows before processing them(if one second havn't passed)
pub const BATCH_SIZE: usize = 32 * 16384;
/// Convert a value that is or can be converted to Datetime to internal timestamp
///

View File

@@ -18,7 +18,7 @@ use std::collections::{BTreeMap, BTreeSet};
use std::ops::Bound;
use std::sync::Arc;
use common_telemetry::debug;
use common_telemetry::trace;
use smallvec::{smallvec, SmallVec};
use tokio::sync::RwLock;
@@ -235,9 +235,11 @@ impl Arrangement {
if let Some(s) = &mut self.expire_state {
if let Some(expired_by) = s.get_expire_duration_and_update_event_ts(now, &key)? {
max_expired_by = max_expired_by.max(Some(expired_by));
debug!(
trace!(
"Expired key: {:?}, expired by: {:?} with time being now={}",
key, expired_by, now
key,
expired_by,
now
);
continue;
}

View File

@@ -356,9 +356,10 @@ impl SqlQueryHandler for Instance {
async fn is_valid_schema(&self, catalog: &str, schema: &str) -> Result<bool> {
self.catalog_manager
.schema_exists(catalog, schema)
.schema_exists(catalog, schema, None)
.await
.context(error::CatalogSnafu)
.map(|b| b && !self.catalog_manager.is_reserved_schema_name(schema))
}
}

View File

@@ -87,7 +87,7 @@ impl OpenTelemetryProtocolHandler for Instance {
OTLP_TRACES_ROWS.inc_by(rows as u64);
self.handle_row_inserts(requests, ctx)
self.handle_log_inserts(requests, ctx)
.await
.map_err(BoxedError::new)
.context(error::ExecuteGrpcQuerySnafu)

View File

@@ -102,7 +102,7 @@ impl Instance {
) -> Result<Output> {
let table = self
.catalog_manager
.table(catalog_name, schema_name, table_name)
.table(catalog_name, schema_name, table_name, Some(ctx))
.await
.context(CatalogSnafu)?
.with_context(|| TableNotFoundSnafu {

View File

@@ -152,7 +152,12 @@ mod python {
if let Some(table) = self
.catalog_manager
.table(&expr.catalog_name, &expr.schema_name, &expr.table_name)
.table(
&expr.catalog_name,
&expr.schema_name,
&expr.table_name,
None,
)
.await
.context(CatalogSnafu)?
{
@@ -185,6 +190,7 @@ mod python {
&table_name.catalog_name,
&table_name.schema_name,
&table_name.table_name,
None,
)
.await
.context(CatalogSnafu)?

View File

@@ -12,15 +12,14 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::io::SeekFrom;
use std::sync::Arc;
use async_trait::async_trait;
use futures::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt};
use common_base::range_read::RangeReader;
use greptime_proto::v1::index::InvertedIndexMetas;
use snafu::{ensure, ResultExt};
use crate::inverted_index::error::{ReadSnafu, Result, SeekSnafu, UnexpectedBlobSizeSnafu};
use crate::inverted_index::error::{CommonIoSnafu, Result, UnexpectedBlobSizeSnafu};
use crate::inverted_index::format::reader::footer::InvertedIndeFooterReader;
use crate::inverted_index::format::reader::InvertedIndexReader;
use crate::inverted_index::format::MIN_BLOB_SIZE;
@@ -49,28 +48,28 @@ impl<R> InvertedIndexBlobReader<R> {
}
#[async_trait]
impl<R: AsyncRead + AsyncSeek + Unpin + Send> InvertedIndexReader for InvertedIndexBlobReader<R> {
impl<R: RangeReader> InvertedIndexReader for InvertedIndexBlobReader<R> {
async fn read_all(&mut self, dest: &mut Vec<u8>) -> Result<usize> {
let metadata = self.source.metadata().await.context(CommonIoSnafu)?;
self.source
.seek(SeekFrom::Start(0))
.read_into(0..metadata.content_length, dest)
.await
.context(SeekSnafu)?;
self.source.read_to_end(dest).await.context(ReadSnafu)
.context(CommonIoSnafu)?;
Ok(metadata.content_length as usize)
}
async fn seek_read(&mut self, offset: u64, size: u32) -> Result<Vec<u8>> {
self.source
.seek(SeekFrom::Start(offset))
let buf = self
.source
.read(offset..offset + size as u64)
.await
.context(SeekSnafu)?;
let mut buf = vec![0u8; size as usize];
self.source.read(&mut buf).await.context(ReadSnafu)?;
Ok(buf)
.context(CommonIoSnafu)?;
Ok(buf.into())
}
async fn metadata(&mut self) -> Result<Arc<InvertedIndexMetas>> {
let end = SeekFrom::End(0);
let blob_size = self.source.seek(end).await.context(SeekSnafu)?;
let metadata = self.source.metadata().await.context(CommonIoSnafu)?;
let blob_size = metadata.content_length;
Self::validate_blob_size(blob_size)?;
let mut footer_reader = InvertedIndeFooterReader::new(&mut self.source, blob_size);
@@ -81,6 +80,7 @@ impl<R: AsyncRead + AsyncSeek + Unpin + Send> InvertedIndexReader for InvertedIn
#[cfg(test)]
mod tests {
use common_base::bit_vec::prelude::*;
use common_base::range_read::RangeReaderAdapter;
use fst::MapBuilder;
use futures::io::Cursor;
use greptime_proto::v1::index::{InvertedIndexMeta, InvertedIndexMetas};
@@ -163,7 +163,8 @@ mod tests {
#[tokio::test]
async fn test_inverted_index_blob_reader_metadata() {
let blob = create_inverted_index_blob();
let mut blob_reader = InvertedIndexBlobReader::new(Cursor::new(blob));
let cursor = RangeReaderAdapter(Cursor::new(blob));
let mut blob_reader = InvertedIndexBlobReader::new(cursor);
let metas = blob_reader.metadata().await.unwrap();
assert_eq!(metas.metas.len(), 2);
@@ -190,7 +191,8 @@ mod tests {
#[tokio::test]
async fn test_inverted_index_blob_reader_fst() {
let blob = create_inverted_index_blob();
let mut blob_reader = InvertedIndexBlobReader::new(Cursor::new(blob));
let cursor = RangeReaderAdapter(Cursor::new(blob));
let mut blob_reader = InvertedIndexBlobReader::new(cursor);
let metas = blob_reader.metadata().await.unwrap();
let meta = metas.metas.get("tag0").unwrap();
@@ -222,7 +224,8 @@ mod tests {
#[tokio::test]
async fn test_inverted_index_blob_reader_bitmap() {
let blob = create_inverted_index_blob();
let mut blob_reader = InvertedIndexBlobReader::new(Cursor::new(blob));
let cursor = RangeReaderAdapter(Cursor::new(blob));
let mut blob_reader = InvertedIndexBlobReader::new(cursor);
let metas = blob_reader.metadata().await.unwrap();
let meta = metas.metas.get("tag0").unwrap();

View File

@@ -12,15 +12,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::io::SeekFrom;
use futures::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt};
use common_base::range_read::RangeReader;
use greptime_proto::v1::index::{InvertedIndexMeta, InvertedIndexMetas};
use prost::Message;
use snafu::{ensure, ResultExt};
use crate::inverted_index::error::{
DecodeProtoSnafu, ReadSnafu, Result, SeekSnafu, UnexpectedFooterPayloadSizeSnafu,
CommonIoSnafu, DecodeProtoSnafu, Result, UnexpectedFooterPayloadSizeSnafu,
UnexpectedOffsetSizeSnafu, UnexpectedZeroSegmentRowCountSnafu,
};
use crate::inverted_index::format::FOOTER_PAYLOAD_SIZE_SIZE;
@@ -37,7 +35,7 @@ impl<R> InvertedIndeFooterReader<R> {
}
}
impl<R: AsyncRead + AsyncSeek + Unpin> InvertedIndeFooterReader<R> {
impl<R: RangeReader> InvertedIndeFooterReader<R> {
pub async fn metadata(&mut self) -> Result<InvertedIndexMetas> {
let payload_size = self.read_payload_size().await?;
let metas = self.read_payload(payload_size).await?;
@@ -45,26 +43,26 @@ impl<R: AsyncRead + AsyncSeek + Unpin> InvertedIndeFooterReader<R> {
}
async fn read_payload_size(&mut self) -> Result<u64> {
let size_offset = SeekFrom::Start(self.blob_size - FOOTER_PAYLOAD_SIZE_SIZE);
self.source.seek(size_offset).await.context(SeekSnafu)?;
let size_buf = &mut [0u8; FOOTER_PAYLOAD_SIZE_SIZE as usize];
self.source.read_exact(size_buf).await.context(ReadSnafu)?;
let mut size_buf = [0u8; FOOTER_PAYLOAD_SIZE_SIZE as usize];
let end = self.blob_size;
let start = end - FOOTER_PAYLOAD_SIZE_SIZE;
self.source
.read_into(start..end, &mut &mut size_buf[..])
.await
.context(CommonIoSnafu)?;
let payload_size = u32::from_le_bytes(*size_buf) as u64;
let payload_size = u32::from_le_bytes(size_buf) as u64;
self.validate_payload_size(payload_size)?;
Ok(payload_size)
}
async fn read_payload(&mut self, payload_size: u64) -> Result<InvertedIndexMetas> {
let payload_offset =
SeekFrom::Start(self.blob_size - FOOTER_PAYLOAD_SIZE_SIZE - payload_size);
self.source.seek(payload_offset).await.context(SeekSnafu)?;
let end = self.blob_size - FOOTER_PAYLOAD_SIZE_SIZE;
let start = end - payload_size;
let bytes = self.source.read(start..end).await.context(CommonIoSnafu)?;
let payload = &mut vec![0u8; payload_size as usize];
self.source.read_exact(payload).await.context(ReadSnafu)?;
let metas = InvertedIndexMetas::decode(&payload[..]).context(DecodeProtoSnafu)?;
let metas = InvertedIndexMetas::decode(&*bytes).context(DecodeProtoSnafu)?;
self.validate_metas(&metas, payload_size)?;
Ok(metas)
@@ -115,6 +113,7 @@ impl<R: AsyncRead + AsyncSeek + Unpin> InvertedIndeFooterReader<R> {
#[cfg(test)]
mod tests {
use common_base::range_read::RangeReaderAdapter;
use futures::io::Cursor;
use prost::Message;
@@ -144,7 +143,8 @@ mod tests {
let payload_buf = create_test_payload(meta);
let blob_size = payload_buf.len() as u64;
let mut reader = InvertedIndeFooterReader::new(Cursor::new(payload_buf), blob_size);
let cursor = RangeReaderAdapter(Cursor::new(payload_buf));
let mut reader = InvertedIndeFooterReader::new(cursor, blob_size);
let payload_size = reader.read_payload_size().await.unwrap();
let metas = reader.read_payload(payload_size).await.unwrap();
@@ -164,7 +164,8 @@ mod tests {
let mut payload_buf = create_test_payload(meta);
payload_buf.push(0xff); // Add an extra byte to corrupt the footer
let blob_size = payload_buf.len() as u64;
let mut reader = InvertedIndeFooterReader::new(Cursor::new(payload_buf), blob_size);
let cursor = RangeReaderAdapter(Cursor::new(payload_buf));
let mut reader = InvertedIndeFooterReader::new(cursor, blob_size);
let payload_size_result = reader.read_payload_size().await;
assert!(payload_size_result.is_err());
@@ -181,7 +182,8 @@ mod tests {
let payload_buf = create_test_payload(meta);
let blob_size = payload_buf.len() as u64;
let mut reader = InvertedIndeFooterReader::new(Cursor::new(payload_buf), blob_size);
let cursor = RangeReaderAdapter(Cursor::new(payload_buf));
let mut reader = InvertedIndeFooterReader::new(cursor, blob_size);
let payload_size = reader.read_payload_size().await.unwrap();
let payload_result = reader.read_payload(payload_size).await;

View File

@@ -99,6 +99,7 @@ impl<W: AsyncWrite + Send + Unpin> InvertedIndexBlobWriter<W> {
#[cfg(test)]
mod tests {
use common_base::range_read::RangeReaderAdapter;
use futures::io::Cursor;
use futures::stream;
@@ -119,7 +120,7 @@ mod tests {
.await
.unwrap();
let cursor = Cursor::new(blob);
let cursor = RangeReaderAdapter(Cursor::new(blob));
let mut reader = InvertedIndexBlobReader::new(cursor);
let metadata = reader.metadata().await.unwrap();
assert_eq!(metadata.total_row_count, 8);
@@ -160,7 +161,7 @@ mod tests {
.await
.unwrap();
let cursor = Cursor::new(blob);
let cursor = RangeReaderAdapter(Cursor::new(blob));
let mut reader = InvertedIndexBlobReader::new(cursor);
let metadata = reader.metadata().await.unwrap();
assert_eq!(metadata.total_row_count, 8);

View File

@@ -16,6 +16,7 @@ use std::collections::{hash_map, HashMap};
use std::fmt::{Debug, Formatter};
use std::sync::atomic::{AtomicI64, Ordering};
use std::sync::Arc;
use std::time::Duration;
use async_stream::stream;
use common_runtime::{RepeatedTask, TaskFunction};
@@ -40,7 +41,9 @@ use crate::raft_engine::protos::logstore::{EntryImpl, NamespaceImpl};
const NAMESPACE_PREFIX: &str = "$sys/";
pub struct RaftEngineLogStore {
config: RaftEngineConfig,
sync_write: bool,
sync_period: Option<Duration>,
read_batch_size: usize,
engine: Arc<Engine>,
gc_task: RepeatedTask<Error>,
last_sync_time: AtomicI64,
@@ -76,7 +79,7 @@ impl TaskFunction<Error> for PurgeExpiredFilesFunction {
}
impl RaftEngineLogStore {
pub async fn try_new(dir: String, config: RaftEngineConfig) -> Result<Self> {
pub async fn try_new(dir: String, config: &RaftEngineConfig) -> Result<Self> {
let raft_engine_config = Config {
dir,
purge_threshold: ReadableSize(config.purge_threshold.0),
@@ -85,6 +88,7 @@ impl RaftEngineLogStore {
target_file_size: ReadableSize(config.file_size.0),
enable_log_recycle: config.enable_log_recycle,
prefill_for_recycle: config.prefill_log_files,
recovery_threads: config.recovery_parallelism,
..Default::default()
};
let engine = Arc::new(Engine::open(raft_engine_config).context(RaftEngineSnafu)?);
@@ -96,7 +100,9 @@ impl RaftEngineLogStore {
);
let log_store = Self {
config,
sync_write: config.sync_write,
sync_period: config.sync_period,
read_batch_size: config.read_batch_size,
engine,
gc_task,
last_sync_time: AtomicI64::new(0),
@@ -196,7 +202,9 @@ impl RaftEngineLogStore {
impl Debug for RaftEngineLogStore {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("RaftEngineLogsStore")
.field("config", &self.config)
.field("sync_write", &self.sync_write)
.field("sync_period", &self.sync_period)
.field("read_batch_size", &self.read_batch_size)
.field("started", &self.gc_task.started())
.finish()
}
@@ -228,9 +236,9 @@ impl LogStore for RaftEngineLogStore {
let (mut batch, last_entry_ids) = self.entries_to_batch(entries)?;
let mut sync = self.config.sync_write;
let mut sync = self.sync_write;
if let Some(sync_period) = &self.config.sync_period {
if let Some(sync_period) = &self.sync_period {
let now = common_time::util::current_time_millis();
if now - self.last_sync_time.load(Ordering::Relaxed) >= sync_period.as_millis() as i64 {
self.last_sync_time.store(now, Ordering::Relaxed);
@@ -276,7 +284,7 @@ impl LogStore for RaftEngineLogStore {
entry_id,
self.span(ns)
);
let max_batch_size = self.config.read_batch_size;
let max_batch_size = self.read_batch_size;
let (tx, mut rx) = tokio::sync::mpsc::channel(max_batch_size);
let _handle = common_runtime::spawn_global(async move {
while start_index <= last_index {
@@ -489,7 +497,7 @@ mod tests {
let dir = create_temp_dir("raft-engine-logstore-test");
let logstore = RaftEngineLogStore::try_new(
dir.path().to_str().unwrap().to_string(),
RaftEngineConfig::default(),
&RaftEngineConfig::default(),
)
.await
.unwrap();
@@ -502,7 +510,7 @@ mod tests {
let dir = create_temp_dir("raft-engine-logstore-test");
let logstore = RaftEngineLogStore::try_new(
dir.path().to_str().unwrap().to_string(),
RaftEngineConfig::default(),
&RaftEngineConfig::default(),
)
.await
.unwrap();
@@ -528,7 +536,7 @@ mod tests {
let dir = create_temp_dir("raft-engine-logstore-test");
let logstore = RaftEngineLogStore::try_new(
dir.path().to_str().unwrap().to_string(),
RaftEngineConfig::default(),
&RaftEngineConfig::default(),
)
.await
.unwrap();
@@ -570,7 +578,7 @@ mod tests {
{
let logstore = RaftEngineLogStore::try_new(
dir.path().to_str().unwrap().to_string(),
RaftEngineConfig::default(),
&RaftEngineConfig::default(),
)
.await
.unwrap();
@@ -590,7 +598,7 @@ mod tests {
let logstore = RaftEngineLogStore::try_new(
dir.path().to_str().unwrap().to_string(),
RaftEngineConfig::default(),
&RaftEngineConfig::default(),
)
.await
.unwrap();
@@ -634,7 +642,7 @@ mod tests {
..Default::default()
};
RaftEngineLogStore::try_new(path, config).await.unwrap()
RaftEngineLogStore::try_new(path, &config).await.unwrap()
}
#[tokio::test]

View File

@@ -29,7 +29,7 @@ pub async fn create_tmp_local_file_log_store<P: AsRef<Path>>(path: P) -> RaftEng
file_size: ReadableSize::kb(128),
..Default::default()
};
RaftEngineLogStore::try_new(path, cfg).await.unwrap()
RaftEngineLogStore::try_new(path, &cfg).await.unwrap()
}
/// Create a [KafkaLogStore].

View File

@@ -655,13 +655,6 @@ pub enum Error {
location: Location,
},
#[snafu(display("Invalid heartbeat request: {}", err_msg))]
InvalidHeartbeatRequest {
err_msg: String,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Failed to publish message"))]
PublishMessage {
#[snafu(source)]
@@ -809,7 +802,6 @@ impl ErrorExt for Error {
| Error::UnsupportedSelectorType { .. }
| Error::InvalidArguments { .. }
| Error::InitExportMetricsTask { .. }
| Error::InvalidHeartbeatRequest { .. }
| Error::ProcedureNotFound { .. }
| Error::TooManyPartitions { .. }
| Error::TomlFormat { .. } => StatusCode::InvalidArguments,

View File

@@ -15,8 +15,12 @@
use std::cmp::Ordering;
use api::v1::meta::{HeartbeatRequest, Role};
use common_meta::instruction::CacheIdent;
use common_meta::key::node_address::{NodeAddressKey, NodeAddressValue};
use common_meta::key::{MetadataKey, MetadataValue};
use common_meta::peer::Peer;
use common_meta::rpc::store::PutRequest;
use common_telemetry::warn;
use common_telemetry::{error, warn};
use dashmap::DashMap;
use snafu::ResultExt;
@@ -120,6 +124,13 @@ impl HeartbeatHandler for CollectStatsHandler {
true
};
// Need to refresh the [datanode -> address] mapping
if refresh {
// Safety: `epoch_stats.stats` is not empty
let last = epoch_stats.stats.last().unwrap();
rewrite_node_address(ctx, last).await;
}
if !refresh && epoch_stats.len() < MAX_CACHED_STATS_PER_KEY {
return Ok(HandleControl::Continue);
}
@@ -131,7 +142,7 @@ impl HeartbeatHandler for CollectStatsHandler {
let put = PutRequest {
key,
value,
..Default::default()
prev_kv: false,
};
let _ = ctx
@@ -144,6 +155,44 @@ impl HeartbeatHandler for CollectStatsHandler {
}
}
async fn rewrite_node_address(ctx: &mut Context, stat: &Stat) {
let peer = Peer {
id: stat.id,
addr: stat.addr.clone(),
};
let key = NodeAddressKey::with_datanode(peer.id).to_bytes();
if let Ok(value) = NodeAddressValue::new(peer.clone()).try_as_raw_value() {
let put = PutRequest {
key,
value,
prev_kv: false,
};
match ctx.leader_cached_kv_backend.put(put).await {
Ok(_) => {
// broadcast invalidating cache
let cache_idents = stat
.table_ids()
.into_iter()
.map(CacheIdent::TableId)
.collect::<Vec<_>>();
if let Err(e) = ctx
.cache_invalidator
.invalidate(&Default::default(), &cache_idents)
.await
{
error!(e; "Failed to invalidate {} `NodeAddressKey` cache, peer: {:?}", cache_idents.len(), peer);
}
}
Err(e) => {
error!(e; "Failed to update NodeAddressValue: {:?}", peer);
}
}
} else {
warn!("Failed to serialize NodeAddressValue: {:?}", peer);
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;

View File

@@ -13,7 +13,7 @@
// limitations under the License.
use api::v1::meta::{HeartbeatRequest, Role};
use common_telemetry::warn;
use common_telemetry::{info, warn};
use super::node_stat::Stat;
use crate::error::Result;
@@ -40,12 +40,15 @@ impl HeartbeatHandler for ExtractStatHandler {
return Ok(HandleControl::Continue);
}
match Stat::try_from(req.clone()) {
match Stat::try_from(req) {
Ok(stat) => {
let _ = acc.stat.insert(stat);
}
Err(err) => {
warn!(err; "Incomplete heartbeat data: {:?}", req);
Err(Some(header)) => {
info!("New handshake request: {:?}", header);
}
Err(_) => {
warn!("Incomplete heartbeat data: {:?}", req);
}
};

View File

@@ -93,6 +93,7 @@ mod tests {
approximate_bytes: 0,
engine: default_engine().to_string(),
role: RegionRole::Follower,
extensions: Default::default(),
}
}
acc.stat = Some(Stat {

View File

@@ -12,16 +12,16 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashSet;
use std::collections::{HashMap, HashSet};
use api::v1::meta::HeartbeatRequest;
use api::v1::meta::{HeartbeatRequest, RequestHeader};
use common_meta::ClusterId;
use common_time::util as time_util;
use serde::{Deserialize, Serialize};
use store_api::region_engine::RegionRole;
use store_api::storage::RegionId;
use table::metadata::TableId;
use crate::error::{Error, InvalidHeartbeatRequestSnafu};
use crate::key::DatanodeStatKey;
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
@@ -57,6 +57,8 @@ pub struct RegionStat {
pub engine: String,
/// The region role.
pub role: RegionRole,
/// The extension info of this region
pub extensions: HashMap<String, Vec<u8>>,
}
impl Stat {
@@ -77,6 +79,11 @@ impl Stat {
self.region_stats.iter().map(|s| (s.id, s.role)).collect()
}
/// Returns all table ids in the region stats.
pub fn table_ids(&self) -> HashSet<TableId> {
self.region_stats.iter().map(|s| s.id.table_id()).collect()
}
pub fn retain_active_region_stats(&mut self, inactive_region_ids: &HashSet<RegionId>) {
if inactive_region_ids.is_empty() {
return;
@@ -90,10 +97,10 @@ impl Stat {
}
}
impl TryFrom<HeartbeatRequest> for Stat {
type Error = Error;
impl TryFrom<&HeartbeatRequest> for Stat {
type Error = Option<RequestHeader>;
fn try_from(value: HeartbeatRequest) -> Result<Self, Self::Error> {
fn try_from(value: &HeartbeatRequest) -> Result<Self, Self::Error> {
let HeartbeatRequest {
header,
peer,
@@ -105,9 +112,9 @@ impl TryFrom<HeartbeatRequest> for Stat {
match (header, peer) {
(Some(header), Some(peer)) => {
let region_stats = region_stats
.into_iter()
.map(RegionStat::try_from)
.collect::<Result<Vec<_>, _>>()?;
.iter()
.map(RegionStat::from)
.collect::<Vec<_>>();
Ok(Self {
timestamp_millis: time_util::current_time_millis(),
@@ -115,34 +122,30 @@ impl TryFrom<HeartbeatRequest> for Stat {
// datanode id
id: peer.id,
// datanode address
addr: peer.addr,
addr: peer.addr.clone(),
rcus: region_stats.iter().map(|s| s.rcus).sum(),
wcus: region_stats.iter().map(|s| s.wcus).sum(),
region_num: region_stats.len() as u64,
region_stats,
node_epoch,
node_epoch: *node_epoch,
})
}
_ => InvalidHeartbeatRequestSnafu {
err_msg: "missing header or peer",
}
.fail(),
(header, _) => Err(header.clone()),
}
}
}
impl TryFrom<api::v1::meta::RegionStat> for RegionStat {
type Error = Error;
fn try_from(value: api::v1::meta::RegionStat) -> Result<Self, Self::Error> {
Ok(Self {
impl From<&api::v1::meta::RegionStat> for RegionStat {
fn from(value: &api::v1::meta::RegionStat) -> Self {
Self {
id: RegionId::from_u64(value.region_id),
rcus: value.rcus,
wcus: value.wcus,
approximate_bytes: value.approximate_bytes,
engine: value.engine.to_string(),
role: RegionRole::from(value.role()),
})
extensions: value.extensions.clone(),
}
}
}

View File

@@ -135,6 +135,7 @@ mod test {
wcus: 0,
approximate_bytes: 0,
engine: String::new(),
extensions: Default::default(),
}
}

Some files were not shown because too many files have changed in this diff Show More