mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-24 07:00:00 +00:00
Compare commits
175 Commits
cache-logi
...
v0.11.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
03a28320d6 | ||
|
|
ce86ba3425 | ||
|
|
2fcb95f50a | ||
|
|
1b642ea6a9 | ||
|
|
b35221ccb6 | ||
|
|
bac7e7bac9 | ||
|
|
903da8f4cb | ||
|
|
c0f498b00c | ||
|
|
19373d806d | ||
|
|
3133f3fb4e | ||
|
|
8b944268da | ||
|
|
dc83b0aa15 | ||
|
|
2b699e735c | ||
|
|
7a3d6f2bd5 | ||
|
|
f9ebb58a12 | ||
|
|
c732016fa0 | ||
|
|
01a308fe6b | ||
|
|
cf0c84bed1 | ||
|
|
66c0445974 | ||
|
|
7d8b256942 | ||
|
|
5092f5f451 | ||
|
|
ff4c153d4b | ||
|
|
a51853846a | ||
|
|
51c6eafb16 | ||
|
|
5bdea1a755 | ||
|
|
bcadce3988 | ||
|
|
0f116c8501 | ||
|
|
c049ce6ab1 | ||
|
|
6308e86e21 | ||
|
|
36263830bb | ||
|
|
d931389a4c | ||
|
|
8bdef776b3 | ||
|
|
91e933517a | ||
|
|
a617e0dbef | ||
|
|
6130c70b63 | ||
|
|
fae141ad0a | ||
|
|
57f31d14c8 | ||
|
|
1cd6abb61f | ||
|
|
e3927ea6f7 | ||
|
|
a6571d3392 | ||
|
|
1255638e84 | ||
|
|
1578c004b0 | ||
|
|
5f8d849981 | ||
|
|
3029b47a89 | ||
|
|
14d997e2d1 | ||
|
|
0aab68c23b | ||
|
|
027284ed1b | ||
|
|
6a958e2c36 | ||
|
|
db345c92df | ||
|
|
55ced9aa71 | ||
|
|
3633f25d0c | ||
|
|
63bbfd04c7 | ||
|
|
2f260d8b27 | ||
|
|
4d8fe29ea8 | ||
|
|
dbb3f2d98d | ||
|
|
9926e3bc78 | ||
|
|
0dd02e93cf | ||
|
|
73e6bf399d | ||
|
|
4402f638cd | ||
|
|
c199604ece | ||
|
|
2b72e66536 | ||
|
|
7c135c0ef9 | ||
|
|
9289265f54 | ||
|
|
485782af51 | ||
|
|
4b263ef1cc | ||
|
|
08f59008cc | ||
|
|
a2852affeb | ||
|
|
cdba7b442f | ||
|
|
42bf7e9965 | ||
|
|
a70b4d7eba | ||
|
|
408013c22b | ||
|
|
22c8a7656b | ||
|
|
35898f0b2e | ||
|
|
1101e98651 | ||
|
|
0089cf1b4f | ||
|
|
d7c3c8e124 | ||
|
|
f4b9eac465 | ||
|
|
aa6c2de42a | ||
|
|
175fddb3b5 | ||
|
|
6afc4e778a | ||
|
|
3bbcde8e58 | ||
|
|
3bf9981aab | ||
|
|
c47ad548a4 | ||
|
|
0b6d78a527 | ||
|
|
d616bd92ef | ||
|
|
84aa5b7b22 | ||
|
|
cbf21e53a9 | ||
|
|
6248a6ccf5 | ||
|
|
0e0c4faf0d | ||
|
|
1a02fc31c2 | ||
|
|
8efbafa538 | ||
|
|
fcd0ceea94 | ||
|
|
22f31f5929 | ||
|
|
5d20acca44 | ||
|
|
e3733344fe | ||
|
|
305767e226 | ||
|
|
22a662f6bc | ||
|
|
1431393fc8 | ||
|
|
dfe8cf25f9 | ||
|
|
cccd25ddbb | ||
|
|
ac387bd2af | ||
|
|
2e9737c01d | ||
|
|
a8b426aebe | ||
|
|
f3509fa312 | ||
|
|
3dcd6b8e51 | ||
|
|
f221ee30fd | ||
|
|
fb822987a9 | ||
|
|
4ab6dc2825 | ||
|
|
191755fc42 | ||
|
|
1676d02149 | ||
|
|
edc49623de | ||
|
|
9405d1c578 | ||
|
|
7a4276c24a | ||
|
|
be72d3bedb | ||
|
|
1ff29d8fde | ||
|
|
39ab1a6415 | ||
|
|
758ad0a8c5 | ||
|
|
8b60c27c2e | ||
|
|
ea6df9ba49 | ||
|
|
69420793e2 | ||
|
|
0da112b335 | ||
|
|
dcc08f6b3e | ||
|
|
a34035a1f2 | ||
|
|
fd8eba36a8 | ||
|
|
9712295177 | ||
|
|
d275cdd570 | ||
|
|
83eb777d21 | ||
|
|
8ed5bc5305 | ||
|
|
9ded314905 | ||
|
|
702a55a235 | ||
|
|
f3e5a5a7aa | ||
|
|
9c79baca4b | ||
|
|
03f2fa219d | ||
|
|
0ee455a980 | ||
|
|
eab9e3a48d | ||
|
|
1008af5324 | ||
|
|
2485f66077 | ||
|
|
4f3afb13b6 | ||
|
|
32a0023010 | ||
|
|
4e9c251041 | ||
|
|
e328c7067c | ||
|
|
8b307e4548 | ||
|
|
ff38abde2e | ||
|
|
aa9a265984 | ||
|
|
9d3ee6384a | ||
|
|
fcde0a4874 | ||
|
|
5d42e63ab0 | ||
|
|
0c01532a37 | ||
|
|
6d503b047a | ||
|
|
5d28f7a912 | ||
|
|
a50eea76a6 | ||
|
|
2ee1ce2ba1 | ||
|
|
c02b5dae93 | ||
|
|
081c6d9e74 | ||
|
|
ca6e02980e | ||
|
|
74bdba4613 | ||
|
|
2e0e82ddc8 | ||
|
|
e0c4157ad8 | ||
|
|
613e07afb4 | ||
|
|
0ce93f0b88 | ||
|
|
c231eee7c1 | ||
|
|
176f2df5b3 | ||
|
|
4622412dfe | ||
|
|
59ec90299b | ||
|
|
16b8cdc3d5 | ||
|
|
3197b8b535 | ||
|
|
972c2441af | ||
|
|
bb8b54b5d3 | ||
|
|
b5233e500b | ||
|
|
b61a388d04 | ||
|
|
06e565d25a | ||
|
|
3b2ce31a19 | ||
|
|
a889ea88ca | ||
|
|
2f2b4b306c | ||
|
|
856c0280f5 |
@@ -40,7 +40,7 @@ runs:
|
||||
|
||||
- name: Install PyArrow Package
|
||||
shell: pwsh
|
||||
run: pip install pyarrow
|
||||
run: pip install pyarrow numpy
|
||||
|
||||
- name: Install WSL distribution
|
||||
uses: Vampire/setup-wsl@v2
|
||||
|
||||
@@ -18,7 +18,7 @@ runs:
|
||||
--set replicaCount=${{ inputs.etcd-replicas }} \
|
||||
--set resources.requests.cpu=50m \
|
||||
--set resources.requests.memory=128Mi \
|
||||
--set resources.limits.cpu=1000m \
|
||||
--set resources.limits.cpu=1500m \
|
||||
--set resources.limits.memory=2Gi \
|
||||
--set auth.rbac.create=false \
|
||||
--set auth.rbac.token.enabled=false \
|
||||
|
||||
@@ -8,7 +8,7 @@ inputs:
|
||||
default: 2
|
||||
description: "Number of Datanode replicas"
|
||||
meta-replicas:
|
||||
default: 3
|
||||
default: 1
|
||||
description: "Number of Metasrv replicas"
|
||||
image-registry:
|
||||
default: "docker.io"
|
||||
@@ -58,7 +58,7 @@ runs:
|
||||
--set image.tag=${{ inputs.image-tag }} \
|
||||
--set base.podTemplate.main.resources.requests.cpu=50m \
|
||||
--set base.podTemplate.main.resources.requests.memory=256Mi \
|
||||
--set base.podTemplate.main.resources.limits.cpu=1000m \
|
||||
--set base.podTemplate.main.resources.limits.cpu=2000m \
|
||||
--set base.podTemplate.main.resources.limits.memory=2Gi \
|
||||
--set frontend.replicas=${{ inputs.frontend-replicas }} \
|
||||
--set datanode.replicas=${{ inputs.datanode-replicas }} \
|
||||
|
||||
2
.github/cargo-blacklist.txt
vendored
Normal file
2
.github/cargo-blacklist.txt
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
native-tls
|
||||
openssl
|
||||
14
.github/scripts/check-install-script.sh
vendored
Executable file
14
.github/scripts/check-install-script.sh
vendored
Executable file
@@ -0,0 +1,14 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
# Get the latest version of github.com/GreptimeTeam/greptimedb
|
||||
VERSION=$(curl -s https://api.github.com/repos/GreptimeTeam/greptimedb/releases/latest | jq -r '.tag_name')
|
||||
|
||||
echo "Downloading the latest version: $VERSION"
|
||||
|
||||
# Download the install script
|
||||
curl -fsSL https://raw.githubusercontent.com/greptimeteam/greptimedb/main/scripts/install.sh | sh -s $VERSION
|
||||
|
||||
# Execute the `greptime` command
|
||||
./greptime --version
|
||||
36
.github/workflows/dependency-check.yml
vendored
Normal file
36
.github/workflows/dependency-check.yml
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
name: Check Dependencies
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
|
||||
jobs:
|
||||
check-dependencies:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Rust
|
||||
uses: actions-rust-lang/setup-rust-toolchain@v1
|
||||
|
||||
- name: Run cargo tree
|
||||
run: cargo tree --prefix none > dependencies.txt
|
||||
|
||||
- name: Extract dependency names
|
||||
run: awk '{print $1}' dependencies.txt > dependency_names.txt
|
||||
|
||||
- name: Check for blacklisted crates
|
||||
run: |
|
||||
while read -r dep; do
|
||||
if grep -qFx "$dep" dependency_names.txt; then
|
||||
echo "Blacklisted crate '$dep' found in dependencies."
|
||||
exit 1
|
||||
fi
|
||||
done < .github/cargo-blacklist.txt
|
||||
echo "No blacklisted crates found."
|
||||
10
.github/workflows/develop.yml
vendored
10
.github/workflows/develop.yml
vendored
@@ -436,7 +436,7 @@ jobs:
|
||||
timeout-minutes: 60
|
||||
strategy:
|
||||
matrix:
|
||||
target: ["fuzz_migrate_mito_regions", "fuzz_failover_mito_regions", "fuzz_failover_metric_regions"]
|
||||
target: ["fuzz_migrate_mito_regions", "fuzz_migrate_metric_regions", "fuzz_failover_mito_regions", "fuzz_failover_metric_regions"]
|
||||
mode:
|
||||
- name: "Remote WAL"
|
||||
minio: true
|
||||
@@ -449,6 +449,12 @@ jobs:
|
||||
minio: true
|
||||
kafka: false
|
||||
values: "with-minio.yaml"
|
||||
- target: "fuzz_migrate_metric_regions"
|
||||
mode:
|
||||
name: "Local WAL"
|
||||
minio: true
|
||||
kafka: false
|
||||
values: "with-minio.yaml"
|
||||
steps:
|
||||
- name: Remove unused software
|
||||
run: |
|
||||
@@ -688,7 +694,7 @@ jobs:
|
||||
with:
|
||||
python-version: '3.10'
|
||||
- name: Install PyArrow Package
|
||||
run: pip install pyarrow
|
||||
run: pip install pyarrow numpy
|
||||
- name: Setup etcd server
|
||||
working-directory: tests-integration/fixtures/etcd
|
||||
run: docker compose -f docker-compose-standalone.yml up -d --wait
|
||||
|
||||
6
.github/workflows/nightly-ci.yml
vendored
6
.github/workflows/nightly-ci.yml
vendored
@@ -22,6 +22,10 @@ jobs:
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Check install.sh
|
||||
run: ./.github/scripts/check-install-script.sh
|
||||
|
||||
- name: Run sqlness test
|
||||
uses: ./.github/actions/sqlness-test
|
||||
with:
|
||||
@@ -92,7 +96,7 @@ jobs:
|
||||
with:
|
||||
python-version: "3.10"
|
||||
- name: Install PyArrow Package
|
||||
run: pip install pyarrow
|
||||
run: pip install pyarrow numpy
|
||||
- name: Install WSL distribution
|
||||
uses: Vampire/setup-wsl@v2
|
||||
with:
|
||||
|
||||
2
.github/workflows/release.yml
vendored
2
.github/workflows/release.yml
vendored
@@ -91,7 +91,7 @@ env:
|
||||
# The scheduled version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-YYYYMMDD', like v0.2.0-nigthly-20230313;
|
||||
NIGHTLY_RELEASE_PREFIX: nightly
|
||||
# Note: The NEXT_RELEASE_VERSION should be modified manually by every formal release.
|
||||
NEXT_RELEASE_VERSION: v0.10.0
|
||||
NEXT_RELEASE_VERSION: v0.11.0
|
||||
|
||||
# Permission reference: https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs
|
||||
permissions:
|
||||
|
||||
@@ -17,6 +17,6 @@ repos:
|
||||
- id: fmt
|
||||
- id: clippy
|
||||
args: ["--workspace", "--all-targets", "--all-features", "--", "-D", "warnings"]
|
||||
stages: [push]
|
||||
stages: [pre-push]
|
||||
- id: cargo-check
|
||||
args: ["--workspace", "--all-targets", "--all-features"]
|
||||
|
||||
@@ -7,6 +7,8 @@
|
||||
* [NiwakaDev](https://github.com/NiwakaDev)
|
||||
* [etolbakov](https://github.com/etolbakov)
|
||||
* [irenjj](https://github.com/irenjj)
|
||||
* [tisonkun](https://github.com/tisonkun)
|
||||
* [Lanqing Yang](https://github.com/lyang24)
|
||||
|
||||
## Team Members (in alphabetical order)
|
||||
|
||||
@@ -30,7 +32,6 @@
|
||||
* [shuiyisong](https://github.com/shuiyisong)
|
||||
* [sunchanglong](https://github.com/sunchanglong)
|
||||
* [sunng87](https://github.com/sunng87)
|
||||
* [tisonkun](https://github.com/tisonkun)
|
||||
* [v0y4g3r](https://github.com/v0y4g3r)
|
||||
* [waynexia](https://github.com/waynexia)
|
||||
* [xtang](https://github.com/xtang)
|
||||
|
||||
1087
Cargo.lock
generated
1087
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
49
Cargo.toml
49
Cargo.toml
@@ -2,24 +2,28 @@
|
||||
members = [
|
||||
"src/api",
|
||||
"src/auth",
|
||||
"src/catalog",
|
||||
"src/cache",
|
||||
"src/catalog",
|
||||
"src/cli",
|
||||
"src/client",
|
||||
"src/cmd",
|
||||
"src/common/base",
|
||||
"src/common/catalog",
|
||||
"src/common/config",
|
||||
"src/common/datasource",
|
||||
"src/common/decimal",
|
||||
"src/common/error",
|
||||
"src/common/frontend",
|
||||
"src/common/function",
|
||||
"src/common/macro",
|
||||
"src/common/greptimedb-telemetry",
|
||||
"src/common/grpc",
|
||||
"src/common/grpc-expr",
|
||||
"src/common/macro",
|
||||
"src/common/mem-prof",
|
||||
"src/common/meta",
|
||||
"src/common/options",
|
||||
"src/common/plugins",
|
||||
"src/common/pprof",
|
||||
"src/common/procedure",
|
||||
"src/common/procedure-test",
|
||||
"src/common/query",
|
||||
@@ -29,7 +33,6 @@ members = [
|
||||
"src/common/telemetry",
|
||||
"src/common/test-util",
|
||||
"src/common/time",
|
||||
"src/common/decimal",
|
||||
"src/common/version",
|
||||
"src/common/wal",
|
||||
"src/datanode",
|
||||
@@ -37,6 +40,8 @@ members = [
|
||||
"src/file-engine",
|
||||
"src/flow",
|
||||
"src/frontend",
|
||||
"src/index",
|
||||
"src/log-query",
|
||||
"src/log-store",
|
||||
"src/meta-client",
|
||||
"src/meta-srv",
|
||||
@@ -56,7 +61,6 @@ members = [
|
||||
"src/sql",
|
||||
"src/store-api",
|
||||
"src/table",
|
||||
"src/index",
|
||||
"tests-fuzz",
|
||||
"tests-integration",
|
||||
"tests/runner",
|
||||
@@ -64,7 +68,7 @@ members = [
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
version = "0.9.3"
|
||||
version = "0.11.0"
|
||||
edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
|
||||
@@ -116,17 +120,18 @@ datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev
|
||||
datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" }
|
||||
derive_builder = "0.12"
|
||||
dotenv = "0.15"
|
||||
etcd-client = { version = "0.13" }
|
||||
etcd-client = "0.13"
|
||||
fst = "0.4.7"
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "0b4f7c8ab06399f6b90e1626e8d5b9697cb33bb9" }
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "a875e976441188028353f7274a46a7e6e065c5d4" }
|
||||
hex = "0.4"
|
||||
humantime = "2.1"
|
||||
humantime-serde = "1.1"
|
||||
itertools = "0.10"
|
||||
jsonb = { git = "https://github.com/datafuselabs/jsonb.git", rev = "46ad50fc71cf75afbf98eec455f7892a6387c1fc", default-features = false }
|
||||
jsonb = { git = "https://github.com/databendlabs/jsonb.git", rev = "8c8d2fc294a39f3ff08909d60f718639cfba3875", default-features = false }
|
||||
lazy_static = "1.4"
|
||||
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "80eb97c24c88af4dd9a86f8bbaf50e741d4eb8cd" }
|
||||
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "a10facb353b41460eeb98578868ebf19c2084fac" }
|
||||
mockall = "0.11.4"
|
||||
moka = "0.12"
|
||||
notify = "6.1"
|
||||
@@ -137,17 +142,20 @@ opentelemetry-proto = { version = "0.5", features = [
|
||||
"metrics",
|
||||
"trace",
|
||||
"with-serde",
|
||||
"logs",
|
||||
] }
|
||||
parking_lot = "0.12"
|
||||
parquet = { version = "51.0.0", default-features = false, features = ["arrow", "async", "object_store"] }
|
||||
paste = "1.0"
|
||||
pin-project = "1.0"
|
||||
prometheus = { version = "0.13.3", features = ["process"] }
|
||||
promql-parser = { version = "0.4" }
|
||||
promql-parser = { version = "0.4.3", features = ["ser"] }
|
||||
prost = "0.12"
|
||||
raft-engine = { version = "0.4.1", default-features = false }
|
||||
rand = "0.8"
|
||||
ratelimit = "0.9"
|
||||
regex = "1.8"
|
||||
regex-automata = { version = "0.4" }
|
||||
regex-automata = "0.4"
|
||||
reqwest = { version = "0.12", default-features = false, features = [
|
||||
"json",
|
||||
"rustls-tls-native-roots",
|
||||
@@ -161,11 +169,10 @@ rstest = "0.21"
|
||||
rstest_reuse = "0.7"
|
||||
rust_decimal = "1.33"
|
||||
rustc-hash = "2.0"
|
||||
schemars = "0.8"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = { version = "1.0", features = ["float_roundtrip"] }
|
||||
serde_with = "3"
|
||||
shadow-rs = "0.31"
|
||||
shadow-rs = "0.35"
|
||||
similar-asserts = "1.6.0"
|
||||
smallvec = { version = "1", features = ["serde"] }
|
||||
snafu = "0.8"
|
||||
@@ -176,13 +183,16 @@ sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "5
|
||||
] }
|
||||
strum = { version = "0.25", features = ["derive"] }
|
||||
tempfile = "3"
|
||||
tokio = { version = "1.36", features = ["full"] }
|
||||
tokio = { version = "1.40", features = ["full"] }
|
||||
tokio-postgres = "0.7"
|
||||
tokio-stream = { version = "0.1" }
|
||||
tokio-stream = "0.1"
|
||||
tokio-util = { version = "0.7", features = ["io-util", "compat"] }
|
||||
toml = "0.8.8"
|
||||
tonic = { version = "0.11", features = ["tls", "gzip", "zstd"] }
|
||||
tower = { version = "0.4" }
|
||||
tower = "0.4"
|
||||
tracing-appender = "0.2"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter", "json", "fmt"] }
|
||||
typetag = "0.2"
|
||||
uuid = { version = "1.7", features = ["serde", "v4", "fast-rng"] }
|
||||
zstd = "0.13"
|
||||
|
||||
@@ -191,6 +201,7 @@ api = { path = "src/api" }
|
||||
auth = { path = "src/auth" }
|
||||
cache = { path = "src/cache" }
|
||||
catalog = { path = "src/catalog" }
|
||||
cli = { path = "src/cli" }
|
||||
client = { path = "src/client" }
|
||||
cmd = { path = "src/cmd", default-features = false }
|
||||
common-base = { path = "src/common/base" }
|
||||
@@ -207,7 +218,9 @@ common-grpc-expr = { path = "src/common/grpc-expr" }
|
||||
common-macro = { path = "src/common/macro" }
|
||||
common-mem-prof = { path = "src/common/mem-prof" }
|
||||
common-meta = { path = "src/common/meta" }
|
||||
common-options = { path = "src/common/options" }
|
||||
common-plugins = { path = "src/common/plugins" }
|
||||
common-pprof = { path = "src/common/pprof" }
|
||||
common-procedure = { path = "src/common/procedure" }
|
||||
common-procedure-test = { path = "src/common/procedure-test" }
|
||||
common-query = { path = "src/common/query" }
|
||||
@@ -253,10 +266,12 @@ tokio-rustls = { git = "https://github.com/GreptimeTeam/tokio-rustls" }
|
||||
# This is commented, since we are not using aws-lc-sys, if we need to use it, we need to uncomment this line or use a release after this commit, or it wouldn't compile with gcc < 8.1
|
||||
# see https://github.com/aws/aws-lc-rs/pull/526
|
||||
# aws-lc-sys = { git ="https://github.com/aws/aws-lc-rs", rev = "556558441e3494af4b156ae95ebc07ebc2fd38aa" }
|
||||
# Apply a fix for pprof for unaligned pointer access
|
||||
pprof = { git = "https://github.com/GreptimeTeam/pprof-rs", rev = "1bd1e21" }
|
||||
|
||||
[workspace.dependencies.meter-macros]
|
||||
git = "https://github.com/GreptimeTeam/greptime-meter.git"
|
||||
rev = "80eb97c24c88af4dd9a86f8bbaf50e741d4eb8cd"
|
||||
rev = "a10facb353b41460eeb98578868ebf19c2084fac"
|
||||
|
||||
[profile.release]
|
||||
debug = 1
|
||||
|
||||
2
Makefile
2
Makefile
@@ -8,7 +8,7 @@ CARGO_BUILD_OPTS := --locked
|
||||
IMAGE_REGISTRY ?= docker.io
|
||||
IMAGE_NAMESPACE ?= greptime
|
||||
IMAGE_TAG ?= latest
|
||||
DEV_BUILDER_IMAGE_TAG ?= 2024-06-06-5674c14f-20240920110415
|
||||
DEV_BUILDER_IMAGE_TAG ?= 2024-10-19-a5c00e85-20241024184445
|
||||
BUILDX_MULTI_PLATFORM_BUILD ?= false
|
||||
BUILDX_BUILDER_NAME ?= gtbuilder
|
||||
BASE_IMAGE ?= ubuntu
|
||||
|
||||
76
README.md
76
README.md
@@ -6,7 +6,7 @@
|
||||
</picture>
|
||||
</p>
|
||||
|
||||
<h2 align="center">Unified Time Series Database for Metrics, Logs, and Events</h2>
|
||||
<h2 align="center">Unified & Cost-Effective Time Series Database for Metrics, Logs, and Events</h2>
|
||||
|
||||
<div align="center">
|
||||
<h3 align="center">
|
||||
@@ -48,37 +48,51 @@
|
||||
</a>
|
||||
</div>
|
||||
|
||||
- [Introduction](#introduction)
|
||||
- [**Features: Why GreptimeDB**](#why-greptimedb)
|
||||
- [Architecture](https://docs.greptime.com/contributor-guide/overview/#architecture)
|
||||
- [Try it for free](#try-greptimedb)
|
||||
- [Getting Started](#getting-started)
|
||||
- [Project Status](#project-status)
|
||||
- [Join the community](#community)
|
||||
- [Contributing](#contributing)
|
||||
- [Tools & Extensions](#tools--extensions)
|
||||
- [License](#license)
|
||||
- [Acknowledgement](#acknowledgement)
|
||||
|
||||
## Introduction
|
||||
|
||||
**GreptimeDB** is an open-source unified time-series database for **Metrics**, **Logs**, and **Events** (also **Traces** in plan). You can gain real-time insights from Edge to Cloud at any scale.
|
||||
**GreptimeDB** is an open-source unified & cost-effective time-series database for **Metrics**, **Logs**, and **Events** (also **Traces** in plan). You can gain real-time insights from Edge to Cloud at Any Scale.
|
||||
|
||||
## Why GreptimeDB
|
||||
|
||||
Our core developers have been building time-series data platforms for years. Based on our best-practices, GreptimeDB is born to give you:
|
||||
Our core developers have been building time-series data platforms for years. Based on our best practices, GreptimeDB was born to give you:
|
||||
|
||||
* **Unified all kinds of time series**
|
||||
* **Unified Processing of Metrics, Logs, and Events**
|
||||
|
||||
GreptimeDB treats all time series as contextual events with timestamp, and thus unifies the processing of metrics, logs, and events. It supports analyzing metrics, logs, and events with SQL and PromQL, and doing streaming with continuous aggregation.
|
||||
GreptimeDB unifies time series data processing by treating all data - whether metrics, logs, or events - as timestamped events with context. Users can analyze this data using either [SQL](https://docs.greptime.com/user-guide/query-data/sql) or [PromQL](https://docs.greptime.com/user-guide/query-data/promql) and leverage stream processing ([Flow](https://docs.greptime.com/user-guide/continuous-aggregation/overview)) to enable continuous aggregation. [Read more](https://docs.greptime.com/user-guide/concepts/data-model).
|
||||
|
||||
* **Cloud-Edge collaboration**
|
||||
* **Cloud-native Distributed Database**
|
||||
|
||||
GreptimeDB can be deployed on ARM architecture-compatible Android/Linux systems as well as cloud environments from various vendors. Both sides run the same software, providing identical APIs and control planes, so your application can run at the edge or on the cloud without modification, and data synchronization also becomes extremely easy and efficient.
|
||||
|
||||
* **Cloud-native distributed database**
|
||||
|
||||
By leveraging object storage (S3 and others), separating compute and storage, scaling stateless compute nodes arbitrarily, GreptimeDB implements seamless scalability. It also supports cross-cloud deployment with a built-in unified data access layer over different object storages.
|
||||
Built for [Kubernetes](https://docs.greptime.com/user-guide/deployments/deploy-on-kubernetes/greptimedb-operator-management). GreptimeDB achieves seamless scalability with its [cloud-native architecture](https://docs.greptime.com/user-guide/concepts/architecture) of separated compute and storage, built on object storage (AWS S3, Azure Blob Storage, etc.) while enabling cross-cloud deployment through a unified data access layer.
|
||||
|
||||
* **Performance and Cost-effective**
|
||||
|
||||
Flexible indexing capabilities and distributed, parallel-processing query engine, tackling high cardinality issues down. Optimized columnar layout for handling time-series data; compacted, compressed, and stored on various storage backends, particularly cloud object storage with 50x cost efficiency.
|
||||
Written in pure Rust for superior performance and reliability. GreptimeDB features a distributed query engine with intelligent indexing to handle high cardinality data efficiently. Its optimized columnar storage achieves 50x cost efficiency on cloud object storage through advanced compression. [Benchmark reports](https://www.greptime.com/blogs/2024-09-09-report-summary).
|
||||
|
||||
* **Compatible with InfluxDB, Prometheus and more protocols**
|
||||
* **Cloud-Edge Collaboration**
|
||||
|
||||
Widely adopted database protocols and APIs, including MySQL, PostgreSQL, and Prometheus Remote Storage, etc. [Read more](https://docs.greptime.com/user-guide/protocols/overview).
|
||||
GreptimeDB seamlessly operates across cloud and edge (ARM/Android/Linux), providing consistent APIs and control plane for unified data management and efficient synchronization. [Learn how to run on Android](https://docs.greptime.com/user-guide/deployments/run-on-android/).
|
||||
|
||||
* **Multi-protocol Ingestion, SQL & PromQL Ready**
|
||||
|
||||
Widely adopted database protocols and APIs, including MySQL, PostgreSQL, InfluxDB, OpenTelemetry, Loki and Prometheus, etc. Effortless Adoption & Seamless Migration. [Supported Protocols Overview](https://docs.greptime.com/user-guide/protocols/overview).
|
||||
|
||||
For more detailed info please read [Why GreptimeDB](https://docs.greptime.com/user-guide/concepts/why-greptimedb).
|
||||
|
||||
## Try GreptimeDB
|
||||
|
||||
### 1. [GreptimePlay](https://greptime.com/playground)
|
||||
### 1. [Live Demo](https://greptime.com/playground)
|
||||
|
||||
Try out the features of GreptimeDB right from your browser.
|
||||
|
||||
@@ -97,9 +111,18 @@ docker pull greptime/greptimedb
|
||||
Start a GreptimeDB container with:
|
||||
|
||||
```shell
|
||||
docker run --rm --name greptime --net=host greptime/greptimedb standalone start
|
||||
docker run -p 127.0.0.1:4000-4003:4000-4003 \
|
||||
-v "$(pwd)/greptimedb:/tmp/greptimedb" \
|
||||
--name greptime --rm \
|
||||
greptime/greptimedb:latest standalone start \
|
||||
--http-addr 0.0.0.0:4000 \
|
||||
--rpc-addr 0.0.0.0:4001 \
|
||||
--mysql-addr 0.0.0.0:4002 \
|
||||
--postgres-addr 0.0.0.0:4003
|
||||
```
|
||||
|
||||
Access the dashboard via `http://localhost:4000/dashboard`.
|
||||
|
||||
Read more about [Installation](https://docs.greptime.com/getting-started/installation/overview) on docs.
|
||||
|
||||
## Getting Started
|
||||
@@ -129,7 +152,7 @@ Run a standalone server:
|
||||
cargo run -- standalone start
|
||||
```
|
||||
|
||||
## Extension
|
||||
## Tools & Extensions
|
||||
|
||||
### Dashboard
|
||||
|
||||
@@ -146,14 +169,19 @@ cargo run -- standalone start
|
||||
|
||||
### Grafana Dashboard
|
||||
|
||||
Our official Grafana dashboard is available at [grafana](grafana/README.md) directory.
|
||||
Our official Grafana dashboard for monitoring GreptimeDB is available at [grafana](grafana/README.md) directory.
|
||||
|
||||
## Project Status
|
||||
|
||||
The current version has not yet reached the standards for General Availability.
|
||||
According to our Greptime 2024 Roadmap, we aim to achieve a production-level version with the release of v1.0 by the end of 2024. [Join Us](https://github.com/GreptimeTeam/greptimedb/issues/3412)
|
||||
GreptimeDB is currently in Beta. We are targeting GA (General Availability) with v1.0 release by Early 2025.
|
||||
|
||||
We welcome you to test and use GreptimeDB. Some users have already adopted it in their production environments. If you're interested in trying it out, please use the latest stable release available.
|
||||
While in Beta, GreptimeDB is already:
|
||||
|
||||
* Being used in production by early adopters
|
||||
* Actively maintained with regular releases, [about version number](https://docs.greptime.com/nightly/reference/about-greptimedb-version)
|
||||
* Suitable for testing and evaluation
|
||||
|
||||
For production use, we recommend using the latest stable release.
|
||||
|
||||
## Community
|
||||
|
||||
@@ -172,12 +200,12 @@ In addition, you may:
|
||||
- Connect us with [Linkedin](https://www.linkedin.com/company/greptime/)
|
||||
- Follow us on [Twitter](https://twitter.com/greptime)
|
||||
|
||||
## Commerial Support
|
||||
## Commercial Support
|
||||
|
||||
If you are running GreptimeDB OSS in your organization, we offer additional
|
||||
enterprise addons, installation service, training and consulting. [Contact
|
||||
enterprise add-ons, installation services, training, and consulting. [Contact
|
||||
us](https://greptime.com/contactus) and we will reach out to you with more
|
||||
detail of our commerial license.
|
||||
detail of our commercial license.
|
||||
|
||||
## License
|
||||
|
||||
|
||||
@@ -83,6 +83,7 @@
|
||||
| `wal.backoff_max` | String | `10s` | The maximum backoff delay.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.backoff_base` | Integer | `2` | The exponential backoff rate, i.e. next backoff = base * current backoff.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.backoff_deadline` | String | `5mins` | The deadline of retries.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.overwrite_entry_start_id` | Bool | `false` | Ignore missing entries during read WAL.<br/>**It's only used when the provider is `kafka`**.<br/><br/>This option ensures that when Kafka messages are deleted, the system<br/>can still successfully replay memtable data without throwing an<br/>out-of-range error.<br/>However, enabling this option might lead to unexpected data loss,<br/>as the system will skip over missing entries instead of treating<br/>them as critical errors. |
|
||||
| `metadata_store` | -- | -- | Metadata storage options. |
|
||||
| `metadata_store.file_size` | String | `256MB` | Kv file size in bytes. |
|
||||
| `metadata_store.purge_threshold` | String | `4GB` | Kv purge threshold. |
|
||||
@@ -92,8 +93,8 @@
|
||||
| `storage` | -- | -- | The data storage options. |
|
||||
| `storage.data_home` | String | `/tmp/greptimedb/` | The working home directory. |
|
||||
| `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
|
||||
| `storage.cache_path` | String | Unset | Cache configuration for object storage such as 'S3' etc.<br/>The local file cache directory. |
|
||||
| `storage.cache_capacity` | String | Unset | The local file cache capacity in bytes. |
|
||||
| `storage.cache_path` | String | Unset | Read cache configuration for object storage such as 'S3' etc, it's configured by default when using object storage. It is recommended to configure it when using object storage for better performance.<br/>A local file directory, defaults to `{data_home}/object_cache/read`. An empty string means disabling. |
|
||||
| `storage.cache_capacity` | String | Unset | The local file cache capacity in bytes. If your disk space is sufficient, it is recommended to set it larger. |
|
||||
| `storage.bucket` | String | Unset | The S3 bucket name.<br/>**It's only used when the storage type is `S3`, `Oss` and `Gcs`**. |
|
||||
| `storage.root` | String | Unset | The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`.<br/>**It's only used when the storage type is `S3`, `Oss` and `Azblob`**. |
|
||||
| `storage.access_key_id` | String | Unset | The access key id of the aws account.<br/>It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.<br/>**It's only used when the storage type is `S3` and `Oss`**. |
|
||||
@@ -108,6 +109,11 @@
|
||||
| `storage.sas_token` | String | Unset | The sas token of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
|
||||
| `storage.endpoint` | String | Unset | The endpoint of the S3 service.<br/>**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. |
|
||||
| `storage.region` | String | Unset | The region of the S3 service.<br/>**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. |
|
||||
| `storage.http_client` | -- | -- | The http client options to the storage.<br/>**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. |
|
||||
| `storage.http_client.pool_max_idle_per_host` | Integer | `1024` | The maximum idle connection per host allowed in the pool. |
|
||||
| `storage.http_client.connect_timeout` | String | `30s` | The timeout for only the connect phase of a http client. |
|
||||
| `storage.http_client.timeout` | String | `30s` | The total request timeout, applied from when the request starts connecting until the response body has finished.<br/>Also considered a total deadline. |
|
||||
| `storage.http_client.pool_idle_timeout` | String | `90s` | The timeout for idle sockets being kept-alive. |
|
||||
| `[[region_engine]]` | -- | -- | The region engine options. You can configure multiple region engines. |
|
||||
| `region_engine.mito` | -- | -- | The Mito engine options. |
|
||||
| `region_engine.mito.num_workers` | Integer | `8` | Number of region workers. |
|
||||
@@ -115,7 +121,9 @@
|
||||
| `region_engine.mito.worker_request_batch_size` | Integer | `64` | Max batch size for a worker to handle requests. |
|
||||
| `region_engine.mito.manifest_checkpoint_distance` | Integer | `10` | Number of meta action updated to trigger a new checkpoint for the manifest. |
|
||||
| `region_engine.mito.compress_manifest` | Bool | `false` | Whether to compress manifest and checkpoint file by gzip (default false). |
|
||||
| `region_engine.mito.max_background_jobs` | Integer | `4` | Max number of running background jobs |
|
||||
| `region_engine.mito.max_background_flushes` | Integer | Auto | Max number of running background flush jobs (default: 1/2 of cpu cores). |
|
||||
| `region_engine.mito.max_background_compactions` | Integer | Auto | Max number of running background compaction jobs (default: 1/4 of cpu cores). |
|
||||
| `region_engine.mito.max_background_purges` | Integer | Auto | Max number of running background purge jobs (default: number of cpu cores). |
|
||||
| `region_engine.mito.auto_flush_interval` | String | `1h` | Interval to auto flush a region if it has not flushed yet. |
|
||||
| `region_engine.mito.global_write_buffer_size` | String | Auto | Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB. |
|
||||
| `region_engine.mito.global_write_buffer_reject_size` | String | Auto | Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size`. |
|
||||
@@ -123,12 +131,11 @@
|
||||
| `region_engine.mito.vector_cache_size` | String | Auto | Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
|
||||
| `region_engine.mito.page_cache_size` | String | Auto | Cache size for pages of SST row groups. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/8 of OS memory. |
|
||||
| `region_engine.mito.selector_result_cache_size` | String | Auto | Cache size for time series selector (e.g. `last_value()`). Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
|
||||
| `region_engine.mito.enable_experimental_write_cache` | Bool | `false` | Whether to enable the experimental write cache. |
|
||||
| `region_engine.mito.experimental_write_cache_path` | String | `""` | File system path for write cache, defaults to `{data_home}/write_cache`. |
|
||||
| `region_engine.mito.experimental_write_cache_size` | String | `512MB` | Capacity for write cache. |
|
||||
| `region_engine.mito.enable_experimental_write_cache` | Bool | `false` | Whether to enable the experimental write cache, it's enabled by default when using object storage. It is recommended to enable it when using object storage for better performance. |
|
||||
| `region_engine.mito.experimental_write_cache_path` | String | `""` | File system path for write cache, defaults to `{data_home}/object_cache/write`. |
|
||||
| `region_engine.mito.experimental_write_cache_size` | String | `5GiB` | Capacity for write cache. If your disk space is sufficient, it is recommended to set it larger. |
|
||||
| `region_engine.mito.experimental_write_cache_ttl` | String | Unset | TTL for write cache. |
|
||||
| `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
|
||||
| `region_engine.mito.scan_parallelism` | Integer | `0` | Parallelism to scan a region (default: 1/4 of cpu cores).<br/>- `0`: using the default value (1/4 of cpu cores).<br/>- `1`: scan in current thread.<br/>- `n`: scan in parallelism n. |
|
||||
| `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
|
||||
| `region_engine.mito.allow_stale_entries` | Bool | `false` | Whether to allow stale WAL entries read during replay. |
|
||||
| `region_engine.mito.min_compaction_interval` | String | `0m` | Minimum time interval between two compactions.<br/>To align with the old behavior, the default value is 0 (no restrictions). |
|
||||
@@ -278,7 +285,7 @@
|
||||
| `data_home` | String | `/tmp/metasrv/` | The working home directory. |
|
||||
| `bind_addr` | String | `127.0.0.1:3002` | The bind address of metasrv. |
|
||||
| `server_addr` | String | `127.0.0.1:3002` | The communication server address for frontend and datanode to connect to metasrv, "127.0.0.1:3002" by default for localhost. |
|
||||
| `store_addr` | String | `127.0.0.1:2379` | Store server address default to etcd store. |
|
||||
| `store_addrs` | Array | -- | Store server address default to etcd store. |
|
||||
| `selector` | String | `round_robin` | Datanode selector type.<br/>- `round_robin` (default value)<br/>- `lease_based`<br/>- `load_based`<br/>For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". |
|
||||
| `use_memory_store` | Bool | `false` | Store data in memory. |
|
||||
| `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. |
|
||||
@@ -409,11 +416,12 @@
|
||||
| `wal.backoff_deadline` | String | `5mins` | The deadline of retries.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.create_index` | Bool | `true` | Whether to enable WAL index creation.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.dump_index_interval` | String | `60s` | The interval for dumping WAL indexes.<br/>**It's only used when the provider is `kafka`**. |
|
||||
| `wal.overwrite_entry_start_id` | Bool | `false` | Ignore missing entries during read WAL.<br/>**It's only used when the provider is `kafka`**.<br/><br/>This option ensures that when Kafka messages are deleted, the system<br/>can still successfully replay memtable data without throwing an<br/>out-of-range error.<br/>However, enabling this option might lead to unexpected data loss,<br/>as the system will skip over missing entries instead of treating<br/>them as critical errors. |
|
||||
| `storage` | -- | -- | The data storage options. |
|
||||
| `storage.data_home` | String | `/tmp/greptimedb/` | The working home directory. |
|
||||
| `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
|
||||
| `storage.cache_path` | String | Unset | Cache configuration for object storage such as 'S3' etc.<br/>The local file cache directory. |
|
||||
| `storage.cache_capacity` | String | Unset | The local file cache capacity in bytes. |
|
||||
| `storage.cache_path` | String | Unset | Read cache configuration for object storage such as 'S3' etc, it's configured by default when using object storage. It is recommended to configure it when using object storage for better performance.<br/>A local file directory, defaults to `{data_home}/object_cache/read`. An empty string means disabling. |
|
||||
| `storage.cache_capacity` | String | Unset | The local file cache capacity in bytes. If your disk space is sufficient, it is recommended to set it larger. |
|
||||
| `storage.bucket` | String | Unset | The S3 bucket name.<br/>**It's only used when the storage type is `S3`, `Oss` and `Gcs`**. |
|
||||
| `storage.root` | String | Unset | The S3 data will be stored in the specified prefix, for example, `s3://${bucket}/${root}`.<br/>**It's only used when the storage type is `S3`, `Oss` and `Azblob`**. |
|
||||
| `storage.access_key_id` | String | Unset | The access key id of the aws account.<br/>It's **highly recommended** to use AWS IAM roles instead of hardcoding the access key id and secret key.<br/>**It's only used when the storage type is `S3` and `Oss`**. |
|
||||
@@ -428,6 +436,11 @@
|
||||
| `storage.sas_token` | String | Unset | The sas token of the azure account.<br/>**It's only used when the storage type is `Azblob`**. |
|
||||
| `storage.endpoint` | String | Unset | The endpoint of the S3 service.<br/>**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. |
|
||||
| `storage.region` | String | Unset | The region of the S3 service.<br/>**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. |
|
||||
| `storage.http_client` | -- | -- | The http client options to the storage.<br/>**It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**. |
|
||||
| `storage.http_client.pool_max_idle_per_host` | Integer | `1024` | The maximum idle connection per host allowed in the pool. |
|
||||
| `storage.http_client.connect_timeout` | String | `30s` | The timeout for only the connect phase of a http client. |
|
||||
| `storage.http_client.timeout` | String | `30s` | The total request timeout, applied from when the request starts connecting until the response body has finished.<br/>Also considered a total deadline. |
|
||||
| `storage.http_client.pool_idle_timeout` | String | `90s` | The timeout for idle sockets being kept-alive. |
|
||||
| `[[region_engine]]` | -- | -- | The region engine options. You can configure multiple region engines. |
|
||||
| `region_engine.mito` | -- | -- | The Mito engine options. |
|
||||
| `region_engine.mito.num_workers` | Integer | `8` | Number of region workers. |
|
||||
@@ -435,7 +448,9 @@
|
||||
| `region_engine.mito.worker_request_batch_size` | Integer | `64` | Max batch size for a worker to handle requests. |
|
||||
| `region_engine.mito.manifest_checkpoint_distance` | Integer | `10` | Number of meta action updated to trigger a new checkpoint for the manifest. |
|
||||
| `region_engine.mito.compress_manifest` | Bool | `false` | Whether to compress manifest and checkpoint file by gzip (default false). |
|
||||
| `region_engine.mito.max_background_jobs` | Integer | `4` | Max number of running background jobs |
|
||||
| `region_engine.mito.max_background_flushes` | Integer | Auto | Max number of running background flush jobs (default: 1/2 of cpu cores). |
|
||||
| `region_engine.mito.max_background_compactions` | Integer | Auto | Max number of running background compaction jobs (default: 1/4 of cpu cores). |
|
||||
| `region_engine.mito.max_background_purges` | Integer | Auto | Max number of running background purge jobs (default: number of cpu cores). |
|
||||
| `region_engine.mito.auto_flush_interval` | String | `1h` | Interval to auto flush a region if it has not flushed yet. |
|
||||
| `region_engine.mito.global_write_buffer_size` | String | Auto | Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB. |
|
||||
| `region_engine.mito.global_write_buffer_reject_size` | String | Auto | Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size` |
|
||||
@@ -443,12 +458,11 @@
|
||||
| `region_engine.mito.vector_cache_size` | String | Auto | Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
|
||||
| `region_engine.mito.page_cache_size` | String | Auto | Cache size for pages of SST row groups. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/8 of OS memory. |
|
||||
| `region_engine.mito.selector_result_cache_size` | String | Auto | Cache size for time series selector (e.g. `last_value()`). Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
|
||||
| `region_engine.mito.enable_experimental_write_cache` | Bool | `false` | Whether to enable the experimental write cache. |
|
||||
| `region_engine.mito.experimental_write_cache_path` | String | `""` | File system path for write cache, defaults to `{data_home}/write_cache`. |
|
||||
| `region_engine.mito.experimental_write_cache_size` | String | `512MB` | Capacity for write cache. |
|
||||
| `region_engine.mito.enable_experimental_write_cache` | Bool | `false` | Whether to enable the experimental write cache, it's enabled by default when using object storage. It is recommended to enable it when using object storage for better performance. |
|
||||
| `region_engine.mito.experimental_write_cache_path` | String | `""` | File system path for write cache, defaults to `{data_home}/object_cache/write`. |
|
||||
| `region_engine.mito.experimental_write_cache_size` | String | `5GiB` | Capacity for write cache. If your disk space is sufficient, it is recommended to set it larger. |
|
||||
| `region_engine.mito.experimental_write_cache_ttl` | String | Unset | TTL for write cache. |
|
||||
| `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
|
||||
| `region_engine.mito.scan_parallelism` | Integer | `0` | Parallelism to scan a region (default: 1/4 of cpu cores).<br/>- `0`: using the default value (1/4 of cpu cores).<br/>- `1`: scan in current thread.<br/>- `n`: scan in parallelism n. |
|
||||
| `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
|
||||
| `region_engine.mito.allow_stale_entries` | Bool | `false` | Whether to allow stale WAL entries read during replay. |
|
||||
| `region_engine.mito.min_compaction_interval` | String | `0m` | Minimum time interval between two compactions.<br/>To align with the old behavior, the default value is 0 (no restrictions). |
|
||||
|
||||
@@ -213,6 +213,17 @@ create_index = true
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
dump_index_interval = "60s"
|
||||
|
||||
## Ignore missing entries during read WAL.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
##
|
||||
## This option ensures that when Kafka messages are deleted, the system
|
||||
## can still successfully replay memtable data without throwing an
|
||||
## out-of-range error.
|
||||
## However, enabling this option might lead to unexpected data loss,
|
||||
## as the system will skip over missing entries instead of treating
|
||||
## them as critical errors.
|
||||
overwrite_entry_start_id = false
|
||||
|
||||
# The Kafka SASL configuration.
|
||||
# **It's only used when the provider is `kafka`**.
|
||||
# Available SASL mechanisms:
|
||||
@@ -283,14 +294,14 @@ data_home = "/tmp/greptimedb/"
|
||||
## - `Oss`: the data is stored in the Aliyun OSS.
|
||||
type = "File"
|
||||
|
||||
## Cache configuration for object storage such as 'S3' etc.
|
||||
## The local file cache directory.
|
||||
## Read cache configuration for object storage such as 'S3' etc, it's configured by default when using object storage. It is recommended to configure it when using object storage for better performance.
|
||||
## A local file directory, defaults to `{data_home}/object_cache/read`. An empty string means disabling.
|
||||
## @toml2docs:none-default
|
||||
cache_path = "/path/local_cache"
|
||||
#+ cache_path = ""
|
||||
|
||||
## The local file cache capacity in bytes.
|
||||
## The local file cache capacity in bytes. If your disk space is sufficient, it is recommended to set it larger.
|
||||
## @toml2docs:none-default
|
||||
cache_capacity = "256MB"
|
||||
cache_capacity = "5GiB"
|
||||
|
||||
## The S3 bucket name.
|
||||
## **It's only used when the storage type is `S3`, `Oss` and `Gcs`**.
|
||||
@@ -364,6 +375,23 @@ endpoint = "https://s3.amazonaws.com"
|
||||
## @toml2docs:none-default
|
||||
region = "us-west-2"
|
||||
|
||||
## The http client options to the storage.
|
||||
## **It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**.
|
||||
[storage.http_client]
|
||||
|
||||
## The maximum idle connection per host allowed in the pool.
|
||||
pool_max_idle_per_host = 1024
|
||||
|
||||
## The timeout for only the connect phase of a http client.
|
||||
connect_timeout = "30s"
|
||||
|
||||
## The total request timeout, applied from when the request starts connecting until the response body has finished.
|
||||
## Also considered a total deadline.
|
||||
timeout = "30s"
|
||||
|
||||
## The timeout for idle sockets being kept-alive.
|
||||
pool_idle_timeout = "90s"
|
||||
|
||||
# Custom storage options
|
||||
# [[storage.providers]]
|
||||
# name = "S3"
|
||||
@@ -405,8 +433,17 @@ manifest_checkpoint_distance = 10
|
||||
## Whether to compress manifest and checkpoint file by gzip (default false).
|
||||
compress_manifest = false
|
||||
|
||||
## Max number of running background jobs
|
||||
max_background_jobs = 4
|
||||
## Max number of running background flush jobs (default: 1/2 of cpu cores).
|
||||
## @toml2docs:none-default="Auto"
|
||||
#+ max_background_flushes = 4
|
||||
|
||||
## Max number of running background compaction jobs (default: 1/4 of cpu cores).
|
||||
## @toml2docs:none-default="Auto"
|
||||
#+ max_background_compactions = 2
|
||||
|
||||
## Max number of running background purge jobs (default: number of cpu cores).
|
||||
## @toml2docs:none-default="Auto"
|
||||
#+ max_background_purges = 8
|
||||
|
||||
## Interval to auto flush a region if it has not flushed yet.
|
||||
auto_flush_interval = "1h"
|
||||
@@ -439,14 +476,14 @@ auto_flush_interval = "1h"
|
||||
## @toml2docs:none-default="Auto"
|
||||
#+ selector_result_cache_size = "512MB"
|
||||
|
||||
## Whether to enable the experimental write cache.
|
||||
## Whether to enable the experimental write cache, it's enabled by default when using object storage. It is recommended to enable it when using object storage for better performance.
|
||||
enable_experimental_write_cache = false
|
||||
|
||||
## File system path for write cache, defaults to `{data_home}/write_cache`.
|
||||
## File system path for write cache, defaults to `{data_home}/object_cache/write`.
|
||||
experimental_write_cache_path = ""
|
||||
|
||||
## Capacity for write cache.
|
||||
experimental_write_cache_size = "512MB"
|
||||
## Capacity for write cache. If your disk space is sufficient, it is recommended to set it larger.
|
||||
experimental_write_cache_size = "5GiB"
|
||||
|
||||
## TTL for write cache.
|
||||
## @toml2docs:none-default
|
||||
@@ -455,12 +492,6 @@ experimental_write_cache_ttl = "8h"
|
||||
## Buffer size for SST writing.
|
||||
sst_write_buffer_size = "8MB"
|
||||
|
||||
## Parallelism to scan a region (default: 1/4 of cpu cores).
|
||||
## - `0`: using the default value (1/4 of cpu cores).
|
||||
## - `1`: scan in current thread.
|
||||
## - `n`: scan in parallelism n.
|
||||
scan_parallelism = 0
|
||||
|
||||
## Capacity of the channel to send data from parallel scan tasks to the main task.
|
||||
parallel_scan_channel_size = 32
|
||||
|
||||
@@ -626,7 +657,7 @@ url = ""
|
||||
headers = { }
|
||||
|
||||
## The tracing options. Only effect when compiled with `tokio-console` feature.
|
||||
[tracing]
|
||||
#+ [tracing]
|
||||
## The tokio console address.
|
||||
## @toml2docs:none-default
|
||||
tokio_console_addr = "127.0.0.1"
|
||||
#+ tokio_console_addr = "127.0.0.1"
|
||||
|
||||
@@ -101,8 +101,8 @@ threshold = "10s"
|
||||
sample_ratio = 1.0
|
||||
|
||||
## The tracing options. Only effect when compiled with `tokio-console` feature.
|
||||
[tracing]
|
||||
#+ [tracing]
|
||||
## The tokio console address.
|
||||
## @toml2docs:none-default
|
||||
tokio_console_addr = "127.0.0.1"
|
||||
#+ tokio_console_addr = "127.0.0.1"
|
||||
|
||||
|
||||
@@ -231,7 +231,7 @@ url = ""
|
||||
headers = { }
|
||||
|
||||
## The tracing options. Only effect when compiled with `tokio-console` feature.
|
||||
[tracing]
|
||||
#+ [tracing]
|
||||
## The tokio console address.
|
||||
## @toml2docs:none-default
|
||||
tokio_console_addr = "127.0.0.1"
|
||||
#+ tokio_console_addr = "127.0.0.1"
|
||||
|
||||
@@ -8,7 +8,7 @@ bind_addr = "127.0.0.1:3002"
|
||||
server_addr = "127.0.0.1:3002"
|
||||
|
||||
## Store server address default to etcd store.
|
||||
store_addr = "127.0.0.1:2379"
|
||||
store_addrs = ["127.0.0.1:2379"]
|
||||
|
||||
## Datanode selector type.
|
||||
## - `round_robin` (default value)
|
||||
@@ -218,7 +218,7 @@ url = ""
|
||||
headers = { }
|
||||
|
||||
## The tracing options. Only effect when compiled with `tokio-console` feature.
|
||||
[tracing]
|
||||
#+ [tracing]
|
||||
## The tokio console address.
|
||||
## @toml2docs:none-default
|
||||
tokio_console_addr = "127.0.0.1"
|
||||
#+ tokio_console_addr = "127.0.0.1"
|
||||
|
||||
@@ -237,6 +237,17 @@ backoff_base = 2
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
backoff_deadline = "5mins"
|
||||
|
||||
## Ignore missing entries during read WAL.
|
||||
## **It's only used when the provider is `kafka`**.
|
||||
##
|
||||
## This option ensures that when Kafka messages are deleted, the system
|
||||
## can still successfully replay memtable data without throwing an
|
||||
## out-of-range error.
|
||||
## However, enabling this option might lead to unexpected data loss,
|
||||
## as the system will skip over missing entries instead of treating
|
||||
## them as critical errors.
|
||||
overwrite_entry_start_id = false
|
||||
|
||||
# The Kafka SASL configuration.
|
||||
# **It's only used when the provider is `kafka`**.
|
||||
# Available SASL mechanisms:
|
||||
@@ -321,14 +332,14 @@ data_home = "/tmp/greptimedb/"
|
||||
## - `Oss`: the data is stored in the Aliyun OSS.
|
||||
type = "File"
|
||||
|
||||
## Cache configuration for object storage such as 'S3' etc.
|
||||
## The local file cache directory.
|
||||
## Read cache configuration for object storage such as 'S3' etc, it's configured by default when using object storage. It is recommended to configure it when using object storage for better performance.
|
||||
## A local file directory, defaults to `{data_home}/object_cache/read`. An empty string means disabling.
|
||||
## @toml2docs:none-default
|
||||
cache_path = "/path/local_cache"
|
||||
#+ cache_path = ""
|
||||
|
||||
## The local file cache capacity in bytes.
|
||||
## The local file cache capacity in bytes. If your disk space is sufficient, it is recommended to set it larger.
|
||||
## @toml2docs:none-default
|
||||
cache_capacity = "256MB"
|
||||
cache_capacity = "5GiB"
|
||||
|
||||
## The S3 bucket name.
|
||||
## **It's only used when the storage type is `S3`, `Oss` and `Gcs`**.
|
||||
@@ -402,6 +413,23 @@ endpoint = "https://s3.amazonaws.com"
|
||||
## @toml2docs:none-default
|
||||
region = "us-west-2"
|
||||
|
||||
## The http client options to the storage.
|
||||
## **It's only used when the storage type is `S3`, `Oss`, `Gcs` and `Azblob`**.
|
||||
[storage.http_client]
|
||||
|
||||
## The maximum idle connection per host allowed in the pool.
|
||||
pool_max_idle_per_host = 1024
|
||||
|
||||
## The timeout for only the connect phase of a http client.
|
||||
connect_timeout = "30s"
|
||||
|
||||
## The total request timeout, applied from when the request starts connecting until the response body has finished.
|
||||
## Also considered a total deadline.
|
||||
timeout = "30s"
|
||||
|
||||
## The timeout for idle sockets being kept-alive.
|
||||
pool_idle_timeout = "90s"
|
||||
|
||||
# Custom storage options
|
||||
# [[storage.providers]]
|
||||
# name = "S3"
|
||||
@@ -443,8 +471,17 @@ manifest_checkpoint_distance = 10
|
||||
## Whether to compress manifest and checkpoint file by gzip (default false).
|
||||
compress_manifest = false
|
||||
|
||||
## Max number of running background jobs
|
||||
max_background_jobs = 4
|
||||
## Max number of running background flush jobs (default: 1/2 of cpu cores).
|
||||
## @toml2docs:none-default="Auto"
|
||||
#+ max_background_flushes = 4
|
||||
|
||||
## Max number of running background compaction jobs (default: 1/4 of cpu cores).
|
||||
## @toml2docs:none-default="Auto"
|
||||
#+ max_background_compactions = 2
|
||||
|
||||
## Max number of running background purge jobs (default: number of cpu cores).
|
||||
## @toml2docs:none-default="Auto"
|
||||
#+ max_background_purges = 8
|
||||
|
||||
## Interval to auto flush a region if it has not flushed yet.
|
||||
auto_flush_interval = "1h"
|
||||
@@ -477,14 +514,14 @@ auto_flush_interval = "1h"
|
||||
## @toml2docs:none-default="Auto"
|
||||
#+ selector_result_cache_size = "512MB"
|
||||
|
||||
## Whether to enable the experimental write cache.
|
||||
## Whether to enable the experimental write cache, it's enabled by default when using object storage. It is recommended to enable it when using object storage for better performance.
|
||||
enable_experimental_write_cache = false
|
||||
|
||||
## File system path for write cache, defaults to `{data_home}/write_cache`.
|
||||
## File system path for write cache, defaults to `{data_home}/object_cache/write`.
|
||||
experimental_write_cache_path = ""
|
||||
|
||||
## Capacity for write cache.
|
||||
experimental_write_cache_size = "512MB"
|
||||
## Capacity for write cache. If your disk space is sufficient, it is recommended to set it larger.
|
||||
experimental_write_cache_size = "5GiB"
|
||||
|
||||
## TTL for write cache.
|
||||
## @toml2docs:none-default
|
||||
@@ -493,12 +530,6 @@ experimental_write_cache_ttl = "8h"
|
||||
## Buffer size for SST writing.
|
||||
sst_write_buffer_size = "8MB"
|
||||
|
||||
## Parallelism to scan a region (default: 1/4 of cpu cores).
|
||||
## - `0`: using the default value (1/4 of cpu cores).
|
||||
## - `1`: scan in current thread.
|
||||
## - `n`: scan in parallelism n.
|
||||
scan_parallelism = 0
|
||||
|
||||
## Capacity of the channel to send data from parallel scan tasks to the main task.
|
||||
parallel_scan_channel_size = 32
|
||||
|
||||
@@ -670,7 +701,7 @@ url = ""
|
||||
headers = { }
|
||||
|
||||
## The tracing options. Only effect when compiled with `tokio-console` feature.
|
||||
[tracing]
|
||||
#+ [tracing]
|
||||
## The tokio console address.
|
||||
## @toml2docs:none-default
|
||||
tokio_console_addr = "127.0.0.1"
|
||||
#+ tokio_console_addr = "127.0.0.1"
|
||||
|
||||
@@ -48,4 +48,4 @@ Please refer to [SQL query](./query.sql) for GreptimeDB and Clickhouse, and [que
|
||||
|
||||
## Addition
|
||||
- You can tune GreptimeDB's configuration to get better performance.
|
||||
- You can setup GreptimeDB to use S3 as storage, see [here](https://docs.greptime.com/user-guide/operations/configuration/#storage-options).
|
||||
- You can setup GreptimeDB to use S3 as storage, see [here](https://docs.greptime.com/user-guide/deployments/configuration#storage-options).
|
||||
|
||||
16
docs/how-to/how-to-change-log-level-on-the-fly.md
Normal file
16
docs/how-to/how-to-change-log-level-on-the-fly.md
Normal file
@@ -0,0 +1,16 @@
|
||||
# Change Log Level on the Fly
|
||||
|
||||
## HTTP API
|
||||
|
||||
example:
|
||||
```bash
|
||||
curl --data "trace,flow=debug" 127.0.0.1:4000/debug/log_level
|
||||
```
|
||||
And database will reply with something like:
|
||||
```bash
|
||||
Log Level changed from Some("info") to "trace,flow=debug"%
|
||||
```
|
||||
|
||||
The data is a string in the format of `global_level,module1=level1,module2=level2,...` that follow the same rule of `RUST_LOG`.
|
||||
|
||||
The module is the module name of the log, and the level is the log level. The log level can be one of the following: `trace`, `debug`, `info`, `warn`, `error`, `off`(case insensitive).
|
||||
@@ -1,15 +1,9 @@
|
||||
# Profiling CPU
|
||||
|
||||
## Build GreptimeDB with `pprof` feature
|
||||
|
||||
```bash
|
||||
cargo build --features=pprof
|
||||
```
|
||||
|
||||
## HTTP API
|
||||
Sample at 99 Hertz, for 5 seconds, output report in [protobuf format](https://github.com/google/pprof/blob/master/proto/profile.proto).
|
||||
```bash
|
||||
curl -s '0:4000/debug/prof/cpu' > /tmp/pprof.out
|
||||
curl -X POST -s '0:4000/debug/prof/cpu' > /tmp/pprof.out
|
||||
```
|
||||
|
||||
Then you can use `pprof` command with the protobuf file.
|
||||
@@ -19,10 +13,10 @@ go tool pprof -top /tmp/pprof.out
|
||||
|
||||
Sample at 99 Hertz, for 60 seconds, output report in flamegraph format.
|
||||
```bash
|
||||
curl -s '0:4000/debug/prof/cpu?seconds=60&output=flamegraph' > /tmp/pprof.svg
|
||||
curl -X POST -s '0:4000/debug/prof/cpu?seconds=60&output=flamegraph' > /tmp/pprof.svg
|
||||
```
|
||||
|
||||
Sample at 49 Hertz, for 10 seconds, output report in text format.
|
||||
```bash
|
||||
curl -s '0:4000/debug/prof/cpu?seconds=10&frequency=49&output=text' > /tmp/pprof.txt
|
||||
curl -X POST -s '0:4000/debug/prof/cpu?seconds=10&frequency=49&output=text' > /tmp/pprof.txt
|
||||
```
|
||||
|
||||
@@ -18,24 +18,18 @@ sudo apt install libjemalloc-dev
|
||||
curl https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl > ./flamegraph.pl
|
||||
```
|
||||
|
||||
### Build GreptimeDB with `mem-prof` feature.
|
||||
|
||||
```bash
|
||||
cargo build --features=mem-prof
|
||||
```
|
||||
|
||||
## Profiling
|
||||
|
||||
Start GreptimeDB instance with environment variables:
|
||||
|
||||
```bash
|
||||
MALLOC_CONF=prof:true,lg_prof_interval:28 ./target/debug/greptime standalone start
|
||||
MALLOC_CONF=prof:true ./target/debug/greptime standalone start
|
||||
```
|
||||
|
||||
Dump memory profiling data through HTTP API:
|
||||
|
||||
```bash
|
||||
curl localhost:4000/debug/prof/mem > greptime.hprof
|
||||
curl -X POST localhost:4000/debug/prof/mem > greptime.hprof
|
||||
```
|
||||
|
||||
You can periodically dump profiling data and compare them to find the delta memory usage.
|
||||
|
||||
@@ -5,6 +5,13 @@ GreptimeDB's official Grafana dashboard.
|
||||
|
||||
Status notify: we are still working on this config. It's expected to change frequently in the recent days. Please feel free to submit your feedback and/or contribution to this dashboard 🤗
|
||||
|
||||
If you use Helm [chart](https://github.com/GreptimeTeam/helm-charts) to deploy GreptimeDB cluster, you can enable self-monitoring by setting the following values in your Helm chart:
|
||||
|
||||
- `monitoring.enabled=true`: Deploys a standalone GreptimeDB instance dedicated to monitoring the cluster;
|
||||
- `grafana.enabled=true`: Deploys Grafana and automatically imports the monitoring dashboard;
|
||||
|
||||
The standalone GreptimeDB instance will collect metrics from your cluster and the dashboard will be available in the Grafana UI. For detailed deployment instructions, please refer to our [Kubernetes deployment guide](https://docs.greptime.com/nightly/user-guide/deployments/deploy-on-kubernetes/getting-started).
|
||||
|
||||
# How to use
|
||||
|
||||
## `greptimedb.json`
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -409,7 +409,39 @@
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"fieldMinMax": false,
|
||||
"mappings": [],
|
||||
@@ -438,18 +470,16 @@
|
||||
},
|
||||
"id": 27,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"text": {},
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "10.2.3",
|
||||
"targets": [
|
||||
@@ -467,7 +497,7 @@
|
||||
}
|
||||
],
|
||||
"title": "CPU",
|
||||
"type": "stat"
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
@@ -477,7 +507,39 @@
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"decimals": 0,
|
||||
"fieldMinMax": false,
|
||||
@@ -503,18 +565,16 @@
|
||||
},
|
||||
"id": 28,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"text": {},
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "10.2.3",
|
||||
"targets": [
|
||||
@@ -532,7 +592,7 @@
|
||||
}
|
||||
],
|
||||
"title": "Memory",
|
||||
"type": "stat"
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
@@ -3335,6 +3395,6 @@
|
||||
"timezone": "",
|
||||
"title": "GreptimeDB",
|
||||
"uid": "e7097237-669b-4f8d-b751-13067afbfb68",
|
||||
"version": 15,
|
||||
"version": 16,
|
||||
"weekStart": ""
|
||||
}
|
||||
|
||||
@@ -1,3 +1,2 @@
|
||||
[toolchain]
|
||||
channel = "nightly-2024-06-06"
|
||||
|
||||
channel = "nightly-2024-10-19"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/usr/bin/env bash
|
||||
#!/bin/sh
|
||||
|
||||
set -ue
|
||||
|
||||
@@ -15,7 +15,7 @@ GITHUB_ORG=GreptimeTeam
|
||||
GITHUB_REPO=greptimedb
|
||||
BIN=greptime
|
||||
|
||||
function get_os_type() {
|
||||
get_os_type() {
|
||||
os_type="$(uname -s)"
|
||||
|
||||
case "$os_type" in
|
||||
@@ -31,7 +31,7 @@ function get_os_type() {
|
||||
esac
|
||||
}
|
||||
|
||||
function get_arch_type() {
|
||||
get_arch_type() {
|
||||
arch_type="$(uname -m)"
|
||||
|
||||
case "$arch_type" in
|
||||
@@ -53,7 +53,7 @@ function get_arch_type() {
|
||||
esac
|
||||
}
|
||||
|
||||
function download_artifact() {
|
||||
download_artifact() {
|
||||
if [ -n "${OS_TYPE}" ] && [ -n "${ARCH_TYPE}" ]; then
|
||||
# Use the latest stable released version.
|
||||
# GitHub API reference: https://docs.github.com/en/rest/releases/releases?apiVersion=2022-11-28#get-the-latest-release.
|
||||
|
||||
@@ -17,10 +17,11 @@ use std::sync::Arc;
|
||||
use common_base::BitVec;
|
||||
use common_decimal::decimal128::{DECIMAL128_DEFAULT_SCALE, DECIMAL128_MAX_PRECISION};
|
||||
use common_decimal::Decimal128;
|
||||
use common_time::interval::IntervalUnit;
|
||||
use common_time::time::Time;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::{Date, DateTime, Interval, Timestamp};
|
||||
use common_time::{
|
||||
Date, DateTime, IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth, Timestamp,
|
||||
};
|
||||
use datatypes::prelude::{ConcreteDataType, ValueRef};
|
||||
use datatypes::scalars::ScalarVector;
|
||||
use datatypes::types::{
|
||||
@@ -35,15 +36,14 @@ use datatypes::vectors::{
|
||||
TimestampMillisecondVector, TimestampNanosecondVector, TimestampSecondVector, UInt32Vector,
|
||||
UInt64Vector, VectorRef,
|
||||
};
|
||||
use greptime_proto::v1;
|
||||
use greptime_proto::v1::column_data_type_extension::TypeExt;
|
||||
use greptime_proto::v1::ddl_request::Expr;
|
||||
use greptime_proto::v1::greptime_request::Request;
|
||||
use greptime_proto::v1::query_request::Query;
|
||||
use greptime_proto::v1::value::ValueData;
|
||||
use greptime_proto::v1::{
|
||||
ColumnDataTypeExtension, DdlRequest, DecimalTypeExtension, JsonTypeExtension, QueryRequest,
|
||||
Row, SemanticType,
|
||||
self, ColumnDataTypeExtension, DdlRequest, DecimalTypeExtension, JsonTypeExtension,
|
||||
QueryRequest, Row, SemanticType, VectorTypeExtension,
|
||||
};
|
||||
use paste::paste;
|
||||
use snafu::prelude::*;
|
||||
@@ -115,6 +115,7 @@ impl From<ColumnDataTypeWrapper> for ConcreteDataType {
|
||||
ConcreteDataType::binary_datatype()
|
||||
}
|
||||
}
|
||||
ColumnDataType::Json => ConcreteDataType::json_datatype(),
|
||||
ColumnDataType::String => ConcreteDataType::string_datatype(),
|
||||
ColumnDataType::Date => ConcreteDataType::date_datatype(),
|
||||
ColumnDataType::Datetime => ConcreteDataType::datetime_datatype(),
|
||||
@@ -148,6 +149,17 @@ impl From<ColumnDataTypeWrapper> for ConcreteDataType {
|
||||
ConcreteDataType::decimal128_default_datatype()
|
||||
}
|
||||
}
|
||||
ColumnDataType::Vector => {
|
||||
if let Some(TypeExt::VectorType(d)) = datatype_wrapper
|
||||
.datatype_ext
|
||||
.as_ref()
|
||||
.and_then(|datatype_ext| datatype_ext.type_ext.as_ref())
|
||||
{
|
||||
ConcreteDataType::vector_datatype(d.dim)
|
||||
} else {
|
||||
ConcreteDataType::vector_default_datatype()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -229,6 +241,15 @@ impl ColumnDataTypeWrapper {
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn vector_datatype(dim: u32) -> Self {
|
||||
ColumnDataTypeWrapper {
|
||||
datatype: ColumnDataType::Vector,
|
||||
datatype_ext: Some(ColumnDataTypeExtension {
|
||||
type_ext: Some(TypeExt::VectorType(VectorTypeExtension { dim })),
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<ConcreteDataType> for ColumnDataTypeWrapper {
|
||||
@@ -247,7 +268,7 @@ impl TryFrom<ConcreteDataType> for ColumnDataTypeWrapper {
|
||||
ConcreteDataType::UInt64(_) => ColumnDataType::Uint64,
|
||||
ConcreteDataType::Float32(_) => ColumnDataType::Float32,
|
||||
ConcreteDataType::Float64(_) => ColumnDataType::Float64,
|
||||
ConcreteDataType::Binary(_) | ConcreteDataType::Json(_) => ColumnDataType::Binary,
|
||||
ConcreteDataType::Binary(_) => ColumnDataType::Binary,
|
||||
ConcreteDataType::String(_) => ColumnDataType::String,
|
||||
ConcreteDataType::Date(_) => ColumnDataType::Date,
|
||||
ConcreteDataType::DateTime(_) => ColumnDataType::Datetime,
|
||||
@@ -269,6 +290,8 @@ impl TryFrom<ConcreteDataType> for ColumnDataTypeWrapper {
|
||||
IntervalType::MonthDayNano(_) => ColumnDataType::IntervalMonthDayNano,
|
||||
},
|
||||
ConcreteDataType::Decimal128(_) => ColumnDataType::Decimal128,
|
||||
ConcreteDataType::Json(_) => ColumnDataType::Json,
|
||||
ConcreteDataType::Vector(_) => ColumnDataType::Vector,
|
||||
ConcreteDataType::Null(_)
|
||||
| ConcreteDataType::List(_)
|
||||
| ConcreteDataType::Dictionary(_)
|
||||
@@ -287,15 +310,17 @@ impl TryFrom<ConcreteDataType> for ColumnDataTypeWrapper {
|
||||
})),
|
||||
})
|
||||
}
|
||||
ColumnDataType::Binary => {
|
||||
if datatype == ConcreteDataType::json_datatype() {
|
||||
// Json is the same as binary in proto. The extension marks the binary in proto is actually a json.
|
||||
Some(ColumnDataTypeExtension {
|
||||
type_ext: Some(TypeExt::JsonType(JsonTypeExtension::JsonBinary.into())),
|
||||
ColumnDataType::Json => datatype.as_json().map(|_| ColumnDataTypeExtension {
|
||||
type_ext: Some(TypeExt::JsonType(JsonTypeExtension::JsonBinary.into())),
|
||||
}),
|
||||
ColumnDataType::Vector => {
|
||||
datatype
|
||||
.as_vector()
|
||||
.map(|vector_type| ColumnDataTypeExtension {
|
||||
type_ext: Some(TypeExt::VectorType(VectorTypeExtension {
|
||||
dim: vector_type.dim as _,
|
||||
})),
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
@@ -416,6 +441,14 @@ pub fn values_with_capacity(datatype: ColumnDataType, capacity: usize) -> Values
|
||||
decimal128_values: Vec::with_capacity(capacity),
|
||||
..Default::default()
|
||||
},
|
||||
ColumnDataType::Json => Values {
|
||||
string_values: Vec::with_capacity(capacity),
|
||||
..Default::default()
|
||||
},
|
||||
ColumnDataType::Vector => Values {
|
||||
binary_values: Vec::with_capacity(capacity),
|
||||
..Default::default()
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -456,13 +489,11 @@ pub fn push_vals(column: &mut Column, origin_count: usize, vector: VectorRef) {
|
||||
TimeUnit::Microsecond => values.time_microsecond_values.push(val.value()),
|
||||
TimeUnit::Nanosecond => values.time_nanosecond_values.push(val.value()),
|
||||
},
|
||||
Value::Interval(val) => match val.unit() {
|
||||
IntervalUnit::YearMonth => values.interval_year_month_values.push(val.to_i32()),
|
||||
IntervalUnit::DayTime => values.interval_day_time_values.push(val.to_i64()),
|
||||
IntervalUnit::MonthDayNano => values
|
||||
.interval_month_day_nano_values
|
||||
.push(convert_i128_to_interval(val.to_i128())),
|
||||
},
|
||||
Value::IntervalYearMonth(val) => values.interval_year_month_values.push(val.to_i32()),
|
||||
Value::IntervalDayTime(val) => values.interval_day_time_values.push(val.to_i64()),
|
||||
Value::IntervalMonthDayNano(val) => values
|
||||
.interval_month_day_nano_values
|
||||
.push(convert_month_day_nano_to_pb(val)),
|
||||
Value::Decimal128(val) => values.decimal128_values.push(convert_to_pb_decimal128(val)),
|
||||
Value::List(_) | Value::Duration(_) => unreachable!(),
|
||||
});
|
||||
@@ -496,25 +527,24 @@ fn ddl_request_type(request: &DdlRequest) -> &'static str {
|
||||
match request.expr {
|
||||
Some(Expr::CreateDatabase(_)) => "ddl.create_database",
|
||||
Some(Expr::CreateTable(_)) => "ddl.create_table",
|
||||
Some(Expr::Alter(_)) => "ddl.alter",
|
||||
Some(Expr::AlterTable(_)) => "ddl.alter_table",
|
||||
Some(Expr::DropTable(_)) => "ddl.drop_table",
|
||||
Some(Expr::TruncateTable(_)) => "ddl.truncate_table",
|
||||
Some(Expr::CreateFlow(_)) => "ddl.create_flow",
|
||||
Some(Expr::DropFlow(_)) => "ddl.drop_flow",
|
||||
Some(Expr::CreateView(_)) => "ddl.create_view",
|
||||
Some(Expr::DropView(_)) => "ddl.drop_view",
|
||||
Some(Expr::AlterDatabase(_)) => "ddl.alter_database",
|
||||
None => "ddl.empty",
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts an i128 value to google protobuf type [IntervalMonthDayNano].
|
||||
pub fn convert_i128_to_interval(v: i128) -> v1::IntervalMonthDayNano {
|
||||
let interval = Interval::from_i128(v);
|
||||
let (months, days, nanoseconds) = interval.to_month_day_nano();
|
||||
/// Converts an interval to google protobuf type [IntervalMonthDayNano].
|
||||
pub fn convert_month_day_nano_to_pb(v: IntervalMonthDayNano) -> v1::IntervalMonthDayNano {
|
||||
v1::IntervalMonthDayNano {
|
||||
months,
|
||||
days,
|
||||
nanoseconds,
|
||||
months: v.months,
|
||||
days: v.days,
|
||||
nanoseconds: v.nanoseconds,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -562,11 +592,15 @@ pub fn pb_value_to_value_ref<'a>(
|
||||
ValueData::TimeMillisecondValue(t) => ValueRef::Time(Time::new_millisecond(*t)),
|
||||
ValueData::TimeMicrosecondValue(t) => ValueRef::Time(Time::new_microsecond(*t)),
|
||||
ValueData::TimeNanosecondValue(t) => ValueRef::Time(Time::new_nanosecond(*t)),
|
||||
ValueData::IntervalYearMonthValue(v) => ValueRef::Interval(Interval::from_i32(*v)),
|
||||
ValueData::IntervalDayTimeValue(v) => ValueRef::Interval(Interval::from_i64(*v)),
|
||||
ValueData::IntervalYearMonthValue(v) => {
|
||||
ValueRef::IntervalYearMonth(IntervalYearMonth::from_i32(*v))
|
||||
}
|
||||
ValueData::IntervalDayTimeValue(v) => {
|
||||
ValueRef::IntervalDayTime(IntervalDayTime::from_i64(*v))
|
||||
}
|
||||
ValueData::IntervalMonthDayNanoValue(v) => {
|
||||
let interval = Interval::from_month_day_nano(v.months, v.days, v.nanoseconds);
|
||||
ValueRef::Interval(interval)
|
||||
let interval = IntervalMonthDayNano::new(v.months, v.days, v.nanoseconds);
|
||||
ValueRef::IntervalMonthDayNano(interval)
|
||||
}
|
||||
ValueData::Decimal128Value(v) => {
|
||||
// get precision and scale from datatype_extension
|
||||
@@ -657,7 +691,7 @@ pub fn pb_values_to_vector_ref(data_type: &ConcreteDataType, values: Values) ->
|
||||
IntervalType::MonthDayNano(_) => {
|
||||
Arc::new(IntervalMonthDayNanoVector::from_iter_values(
|
||||
values.interval_month_day_nano_values.iter().map(|x| {
|
||||
Interval::from_month_day_nano(x.months, x.days, x.nanoseconds).to_i128()
|
||||
IntervalMonthDayNano::new(x.months, x.days, x.nanoseconds).to_i128()
|
||||
}),
|
||||
))
|
||||
}
|
||||
@@ -667,6 +701,7 @@ pub fn pb_values_to_vector_ref(data_type: &ConcreteDataType, values: Values) ->
|
||||
Decimal128::from_value_precision_scale(x.hi, x.lo, d.precision(), d.scale()).into()
|
||||
}),
|
||||
)),
|
||||
ConcreteDataType::Vector(_) => Arc::new(BinaryVector::from_vec(values.binary_values)),
|
||||
ConcreteDataType::Null(_)
|
||||
| ConcreteDataType::List(_)
|
||||
| ConcreteDataType::Dictionary(_)
|
||||
@@ -802,18 +837,18 @@ pub fn pb_values_to_values(data_type: &ConcreteDataType, values: Values) -> Vec<
|
||||
ConcreteDataType::Interval(IntervalType::YearMonth(_)) => values
|
||||
.interval_year_month_values
|
||||
.into_iter()
|
||||
.map(|v| Value::Interval(Interval::from_i32(v)))
|
||||
.map(|v| Value::IntervalYearMonth(IntervalYearMonth::from_i32(v)))
|
||||
.collect(),
|
||||
ConcreteDataType::Interval(IntervalType::DayTime(_)) => values
|
||||
.interval_day_time_values
|
||||
.into_iter()
|
||||
.map(|v| Value::Interval(Interval::from_i64(v)))
|
||||
.map(|v| Value::IntervalDayTime(IntervalDayTime::from_i64(v)))
|
||||
.collect(),
|
||||
ConcreteDataType::Interval(IntervalType::MonthDayNano(_)) => values
|
||||
.interval_month_day_nano_values
|
||||
.into_iter()
|
||||
.map(|v| {
|
||||
Value::Interval(Interval::from_month_day_nano(
|
||||
Value::IntervalMonthDayNano(IntervalMonthDayNano::new(
|
||||
v.months,
|
||||
v.days,
|
||||
v.nanoseconds,
|
||||
@@ -832,6 +867,7 @@ pub fn pb_values_to_values(data_type: &ConcreteDataType, values: Values) -> Vec<
|
||||
))
|
||||
})
|
||||
.collect(),
|
||||
ConcreteDataType::Vector(_) => values.binary_values.into_iter().map(|v| v.into()).collect(),
|
||||
ConcreteDataType::Null(_)
|
||||
| ConcreteDataType::List(_)
|
||||
| ConcreteDataType::Dictionary(_)
|
||||
@@ -856,10 +892,7 @@ pub fn is_column_type_value_eq(
|
||||
ColumnDataTypeWrapper::try_new(type_value, type_extension)
|
||||
.map(|wrapper| {
|
||||
let datatype = ConcreteDataType::from(wrapper);
|
||||
(datatype == *expect_type)
|
||||
// Json type leverage binary type in pb, so this is valid.
|
||||
|| (datatype == ConcreteDataType::binary_datatype()
|
||||
&& *expect_type == ConcreteDataType::json_datatype())
|
||||
expect_type == &datatype
|
||||
})
|
||||
.unwrap_or(false)
|
||||
}
|
||||
@@ -941,18 +974,16 @@ pub fn to_proto_value(value: Value) -> Option<v1::Value> {
|
||||
value_data: Some(ValueData::TimeNanosecondValue(v.value())),
|
||||
},
|
||||
},
|
||||
Value::Interval(v) => match v.unit() {
|
||||
IntervalUnit::YearMonth => v1::Value {
|
||||
value_data: Some(ValueData::IntervalYearMonthValue(v.to_i32())),
|
||||
},
|
||||
IntervalUnit::DayTime => v1::Value {
|
||||
value_data: Some(ValueData::IntervalDayTimeValue(v.to_i64())),
|
||||
},
|
||||
IntervalUnit::MonthDayNano => v1::Value {
|
||||
value_data: Some(ValueData::IntervalMonthDayNanoValue(
|
||||
convert_i128_to_interval(v.to_i128()),
|
||||
)),
|
||||
},
|
||||
Value::IntervalYearMonth(v) => v1::Value {
|
||||
value_data: Some(ValueData::IntervalYearMonthValue(v.to_i32())),
|
||||
},
|
||||
Value::IntervalDayTime(v) => v1::Value {
|
||||
value_data: Some(ValueData::IntervalDayTimeValue(v.to_i64())),
|
||||
},
|
||||
Value::IntervalMonthDayNano(v) => v1::Value {
|
||||
value_data: Some(ValueData::IntervalMonthDayNanoValue(
|
||||
convert_month_day_nano_to_pb(v),
|
||||
)),
|
||||
},
|
||||
Value::Decimal128(v) => v1::Value {
|
||||
value_data: Some(ValueData::Decimal128Value(convert_to_pb_decimal128(v))),
|
||||
@@ -1044,13 +1075,11 @@ pub fn value_to_grpc_value(value: Value) -> GrpcValue {
|
||||
TimeUnit::Microsecond => ValueData::TimeMicrosecondValue(v.value()),
|
||||
TimeUnit::Nanosecond => ValueData::TimeNanosecondValue(v.value()),
|
||||
}),
|
||||
Value::Interval(v) => Some(match v.unit() {
|
||||
IntervalUnit::YearMonth => ValueData::IntervalYearMonthValue(v.to_i32()),
|
||||
IntervalUnit::DayTime => ValueData::IntervalDayTimeValue(v.to_i64()),
|
||||
IntervalUnit::MonthDayNano => {
|
||||
ValueData::IntervalMonthDayNanoValue(convert_i128_to_interval(v.to_i128()))
|
||||
}
|
||||
}),
|
||||
Value::IntervalYearMonth(v) => Some(ValueData::IntervalYearMonthValue(v.to_i32())),
|
||||
Value::IntervalDayTime(v) => Some(ValueData::IntervalDayTimeValue(v.to_i64())),
|
||||
Value::IntervalMonthDayNano(v) => Some(ValueData::IntervalMonthDayNanoValue(
|
||||
convert_month_day_nano_to_pb(v),
|
||||
)),
|
||||
Value::Decimal128(v) => Some(ValueData::Decimal128Value(convert_to_pb_decimal128(v))),
|
||||
Value::List(_) | Value::Duration(_) => unreachable!(),
|
||||
},
|
||||
@@ -1061,6 +1090,7 @@ pub fn value_to_grpc_value(value: Value) -> GrpcValue {
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_time::interval::IntervalUnit;
|
||||
use datatypes::types::{
|
||||
Int32Type, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalYearMonthType,
|
||||
TimeMillisecondType, TimeSecondType, TimestampMillisecondType, TimestampSecondType,
|
||||
@@ -1149,6 +1179,10 @@ mod tests {
|
||||
let values = values_with_capacity(ColumnDataType::Decimal128, 2);
|
||||
let values = values.decimal128_values;
|
||||
assert_eq!(2, values.capacity());
|
||||
|
||||
let values = values_with_capacity(ColumnDataType::Vector, 2);
|
||||
let values = values.binary_values;
|
||||
assert_eq!(2, values.capacity());
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -1236,7 +1270,11 @@ mod tests {
|
||||
assert_eq!(
|
||||
ConcreteDataType::decimal128_datatype(10, 2),
|
||||
ColumnDataTypeWrapper::decimal128_datatype(10, 2).into()
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::vector_datatype(3),
|
||||
ColumnDataTypeWrapper::vector_datatype(3).into()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -1332,6 +1370,10 @@ mod tests {
|
||||
.try_into()
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ColumnDataTypeWrapper::vector_datatype(3),
|
||||
ConcreteDataType::vector_datatype(3).try_into().unwrap()
|
||||
);
|
||||
|
||||
let result: Result<ColumnDataTypeWrapper> = ConcreteDataType::null_datatype().try_into();
|
||||
assert!(result.is_err());
|
||||
@@ -1506,11 +1548,11 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_convert_i128_to_interval() {
|
||||
let i128_val = 3000;
|
||||
let interval = convert_i128_to_interval(i128_val);
|
||||
let i128_val = 3;
|
||||
let interval = convert_month_day_nano_to_pb(IntervalMonthDayNano::from_i128(i128_val));
|
||||
assert_eq!(interval.months, 0);
|
||||
assert_eq!(interval.days, 0);
|
||||
assert_eq!(interval.nanoseconds, 3000);
|
||||
assert_eq!(interval.nanoseconds, 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -1590,9 +1632,9 @@ mod tests {
|
||||
},
|
||||
);
|
||||
let expect = vec![
|
||||
Value::Interval(Interval::from_year_month(1_i32)),
|
||||
Value::Interval(Interval::from_year_month(2_i32)),
|
||||
Value::Interval(Interval::from_year_month(3_i32)),
|
||||
Value::IntervalYearMonth(IntervalYearMonth::new(1_i32)),
|
||||
Value::IntervalYearMonth(IntervalYearMonth::new(2_i32)),
|
||||
Value::IntervalYearMonth(IntervalYearMonth::new(3_i32)),
|
||||
];
|
||||
assert_eq!(expect, actual);
|
||||
|
||||
@@ -1605,9 +1647,9 @@ mod tests {
|
||||
},
|
||||
);
|
||||
let expect = vec![
|
||||
Value::Interval(Interval::from_i64(1_i64)),
|
||||
Value::Interval(Interval::from_i64(2_i64)),
|
||||
Value::Interval(Interval::from_i64(3_i64)),
|
||||
Value::IntervalDayTime(IntervalDayTime::from_i64(1_i64)),
|
||||
Value::IntervalDayTime(IntervalDayTime::from_i64(2_i64)),
|
||||
Value::IntervalDayTime(IntervalDayTime::from_i64(3_i64)),
|
||||
];
|
||||
assert_eq!(expect, actual);
|
||||
|
||||
@@ -1636,9 +1678,9 @@ mod tests {
|
||||
},
|
||||
);
|
||||
let expect = vec![
|
||||
Value::Interval(Interval::from_month_day_nano(1, 2, 3)),
|
||||
Value::Interval(Interval::from_month_day_nano(5, 6, 7)),
|
||||
Value::Interval(Interval::from_month_day_nano(9, 10, 11)),
|
||||
Value::IntervalMonthDayNano(IntervalMonthDayNano::new(1, 2, 3)),
|
||||
Value::IntervalMonthDayNano(IntervalMonthDayNano::new(5, 6, 7)),
|
||||
Value::IntervalMonthDayNano(IntervalMonthDayNano::new(9, 10, 11)),
|
||||
];
|
||||
assert_eq!(expect, actual);
|
||||
}
|
||||
|
||||
@@ -15,8 +15,10 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use datatypes::schema::{
|
||||
ColumnDefaultConstraint, ColumnSchema, FulltextOptions, COMMENT_KEY, FULLTEXT_KEY,
|
||||
ColumnDefaultConstraint, ColumnSchema, FulltextAnalyzer, FulltextOptions, COMMENT_KEY,
|
||||
FULLTEXT_KEY, INVERTED_INDEX_KEY,
|
||||
};
|
||||
use greptime_proto::v1::Analyzer;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
@@ -25,6 +27,8 @@ use crate::v1::{ColumnDef, ColumnOptions, SemanticType};
|
||||
|
||||
/// Key used to store fulltext options in gRPC column options.
|
||||
const FULLTEXT_GRPC_KEY: &str = "fulltext";
|
||||
/// Key used to store inverted index options in gRPC column options.
|
||||
const INVERTED_INDEX_GRPC_KEY: &str = "inverted_index";
|
||||
|
||||
/// Tries to construct a `ColumnSchema` from the given `ColumnDef`.
|
||||
pub fn try_as_column_schema(column_def: &ColumnDef) -> Result<ColumnSchema> {
|
||||
@@ -49,10 +53,13 @@ pub fn try_as_column_schema(column_def: &ColumnDef) -> Result<ColumnSchema> {
|
||||
if !column_def.comment.is_empty() {
|
||||
metadata.insert(COMMENT_KEY.to_string(), column_def.comment.clone());
|
||||
}
|
||||
if let Some(options) = column_def.options.as_ref()
|
||||
&& let Some(fulltext) = options.options.get(FULLTEXT_GRPC_KEY)
|
||||
{
|
||||
metadata.insert(FULLTEXT_KEY.to_string(), fulltext.to_string());
|
||||
if let Some(options) = column_def.options.as_ref() {
|
||||
if let Some(fulltext) = options.options.get(FULLTEXT_GRPC_KEY) {
|
||||
metadata.insert(FULLTEXT_KEY.to_string(), fulltext.clone());
|
||||
}
|
||||
if let Some(inverted_index) = options.options.get(INVERTED_INDEX_GRPC_KEY) {
|
||||
metadata.insert(INVERTED_INDEX_KEY.to_string(), inverted_index.clone());
|
||||
}
|
||||
}
|
||||
|
||||
ColumnSchema::new(&column_def.name, data_type.into(), column_def.is_nullable)
|
||||
@@ -70,7 +77,12 @@ pub fn options_from_column_schema(column_schema: &ColumnSchema) -> Option<Column
|
||||
if let Some(fulltext) = column_schema.metadata().get(FULLTEXT_KEY) {
|
||||
options
|
||||
.options
|
||||
.insert(FULLTEXT_GRPC_KEY.to_string(), fulltext.to_string());
|
||||
.insert(FULLTEXT_GRPC_KEY.to_string(), fulltext.clone());
|
||||
}
|
||||
if let Some(inverted_index) = column_schema.metadata().get(INVERTED_INDEX_KEY) {
|
||||
options
|
||||
.options
|
||||
.insert(INVERTED_INDEX_GRPC_KEY.to_string(), inverted_index.clone());
|
||||
}
|
||||
|
||||
(!options.options.is_empty()).then_some(options)
|
||||
@@ -93,6 +105,14 @@ pub fn options_from_fulltext(fulltext: &FulltextOptions) -> Result<Option<Column
|
||||
Ok((!options.options.is_empty()).then_some(options))
|
||||
}
|
||||
|
||||
/// Tries to construct a `FulltextAnalyzer` from the given analyzer.
|
||||
pub fn as_fulltext_option(analyzer: Analyzer) -> FulltextAnalyzer {
|
||||
match analyzer {
|
||||
Analyzer::English => FulltextAnalyzer::English,
|
||||
Analyzer::Chinese => FulltextAnalyzer::Chinese,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
@@ -115,10 +135,13 @@ mod tests {
|
||||
comment: "test_comment".to_string(),
|
||||
datatype_extension: None,
|
||||
options: Some(ColumnOptions {
|
||||
options: HashMap::from([(
|
||||
FULLTEXT_GRPC_KEY.to_string(),
|
||||
"{\"enable\":true}".to_string(),
|
||||
)]),
|
||||
options: HashMap::from([
|
||||
(
|
||||
FULLTEXT_GRPC_KEY.to_string(),
|
||||
"{\"enable\":true}".to_string(),
|
||||
),
|
||||
(INVERTED_INDEX_GRPC_KEY.to_string(), "true".to_string()),
|
||||
]),
|
||||
}),
|
||||
};
|
||||
|
||||
@@ -139,6 +162,7 @@ mod tests {
|
||||
..Default::default()
|
||||
}
|
||||
);
|
||||
assert!(schema.is_inverted_indexed());
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -153,12 +177,17 @@ mod tests {
|
||||
analyzer: FulltextAnalyzer::English,
|
||||
case_sensitive: false,
|
||||
})
|
||||
.unwrap();
|
||||
.unwrap()
|
||||
.set_inverted_index(true);
|
||||
let options = options_from_column_schema(&schema).unwrap();
|
||||
assert_eq!(
|
||||
options.options.get(FULLTEXT_GRPC_KEY).unwrap(),
|
||||
"{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false}"
|
||||
);
|
||||
assert_eq!(
|
||||
options.options.get(INVERTED_INDEX_GRPC_KEY).unwrap(),
|
||||
"true"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -33,7 +33,7 @@ impl StaticUserProvider {
|
||||
value: value.to_string(),
|
||||
msg: "StaticUserProviderOption must be in format `<option>:<value>`",
|
||||
})?;
|
||||
return match mode {
|
||||
match mode {
|
||||
"file" => {
|
||||
let users = load_credential_from_file(content)?
|
||||
.context(InvalidConfigSnafu {
|
||||
@@ -58,7 +58,7 @@ impl StaticUserProvider {
|
||||
msg: "StaticUserProviderOption must be in format `file:<path>` or `cmd:<values>`",
|
||||
}
|
||||
.fail(),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
62
src/cache/src/lib.rs
vendored
62
src/cache/src/lib.rs
vendored
@@ -19,9 +19,9 @@ use std::time::Duration;
|
||||
|
||||
use catalog::kvbackend::new_table_cache;
|
||||
use common_meta::cache::{
|
||||
new_table_flownode_set_cache, new_table_info_cache, new_table_name_cache,
|
||||
new_table_route_cache, new_view_info_cache, CacheRegistry, CacheRegistryBuilder,
|
||||
LayeredCacheRegistryBuilder,
|
||||
new_schema_cache, new_table_flownode_set_cache, new_table_info_cache, new_table_name_cache,
|
||||
new_table_route_cache, new_table_schema_cache, new_view_info_cache, CacheRegistry,
|
||||
CacheRegistryBuilder, LayeredCacheRegistryBuilder,
|
||||
};
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
use moka::future::CacheBuilder;
|
||||
@@ -37,9 +37,47 @@ pub const TABLE_INFO_CACHE_NAME: &str = "table_info_cache";
|
||||
pub const VIEW_INFO_CACHE_NAME: &str = "view_info_cache";
|
||||
pub const TABLE_NAME_CACHE_NAME: &str = "table_name_cache";
|
||||
pub const TABLE_CACHE_NAME: &str = "table_cache";
|
||||
pub const SCHEMA_CACHE_NAME: &str = "schema_cache";
|
||||
pub const TABLE_SCHEMA_NAME_CACHE_NAME: &str = "table_schema_name_cache";
|
||||
pub const TABLE_FLOWNODE_SET_CACHE_NAME: &str = "table_flownode_set_cache";
|
||||
pub const TABLE_ROUTE_CACHE_NAME: &str = "table_route_cache";
|
||||
|
||||
/// Builds cache registry for datanode, including:
|
||||
/// - Schema cache.
|
||||
/// - Table id to schema name cache.
|
||||
pub fn build_datanode_cache_registry(kv_backend: KvBackendRef) -> CacheRegistry {
|
||||
// Builds table id schema name cache that never expires.
|
||||
let cache = CacheBuilder::new(DEFAULT_CACHE_MAX_CAPACITY).build();
|
||||
let table_id_schema_cache = Arc::new(new_table_schema_cache(
|
||||
TABLE_SCHEMA_NAME_CACHE_NAME.to_string(),
|
||||
cache,
|
||||
kv_backend.clone(),
|
||||
));
|
||||
|
||||
// Builds schema cache
|
||||
let cache = CacheBuilder::new(DEFAULT_CACHE_MAX_CAPACITY)
|
||||
.time_to_live(DEFAULT_CACHE_TTL)
|
||||
.time_to_idle(DEFAULT_CACHE_TTI)
|
||||
.build();
|
||||
let schema_cache = Arc::new(new_schema_cache(
|
||||
SCHEMA_CACHE_NAME.to_string(),
|
||||
cache,
|
||||
kv_backend.clone(),
|
||||
));
|
||||
|
||||
CacheRegistryBuilder::default()
|
||||
.add_cache(table_id_schema_cache)
|
||||
.add_cache(schema_cache)
|
||||
.build()
|
||||
}
|
||||
|
||||
/// Builds cache registry for frontend and datanode, including:
|
||||
/// - Table info cache
|
||||
/// - Table name cache
|
||||
/// - Table route cache
|
||||
/// - Table flow node cache
|
||||
/// - View cache
|
||||
/// - Schema cache
|
||||
pub fn build_fundamental_cache_registry(kv_backend: KvBackendRef) -> CacheRegistry {
|
||||
// Builds table info cache
|
||||
let cache = CacheBuilder::new(DEFAULT_CACHE_MAX_CAPACITY)
|
||||
@@ -95,12 +133,30 @@ pub fn build_fundamental_cache_registry(kv_backend: KvBackendRef) -> CacheRegist
|
||||
kv_backend.clone(),
|
||||
));
|
||||
|
||||
// Builds schema cache
|
||||
let cache = CacheBuilder::new(DEFAULT_CACHE_MAX_CAPACITY)
|
||||
.time_to_live(DEFAULT_CACHE_TTL)
|
||||
.time_to_idle(DEFAULT_CACHE_TTI)
|
||||
.build();
|
||||
let schema_cache = Arc::new(new_schema_cache(
|
||||
SCHEMA_CACHE_NAME.to_string(),
|
||||
cache,
|
||||
kv_backend.clone(),
|
||||
));
|
||||
|
||||
let table_id_schema_cache = Arc::new(new_table_schema_cache(
|
||||
TABLE_SCHEMA_NAME_CACHE_NAME.to_string(),
|
||||
CacheBuilder::new(DEFAULT_CACHE_MAX_CAPACITY).build(),
|
||||
kv_backend,
|
||||
));
|
||||
CacheRegistryBuilder::default()
|
||||
.add_cache(table_info_cache)
|
||||
.add_cache(table_name_cache)
|
||||
.add_cache(table_route_cache)
|
||||
.add_cache(view_info_cache)
|
||||
.add_cache(table_flownode_set_cache)
|
||||
.add_cache(schema_cache)
|
||||
.add_cache(table_id_schema_cache)
|
||||
.build()
|
||||
}
|
||||
|
||||
|
||||
@@ -178,6 +178,12 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Partition manager not found, it's not expected."))]
|
||||
PartitionManagerNotFound {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to find table partitions"))]
|
||||
FindPartitions { source: partition::error::Error },
|
||||
|
||||
@@ -301,6 +307,7 @@ impl ErrorExt for Error {
|
||||
| Error::CastManager { .. }
|
||||
| Error::Json { .. }
|
||||
| Error::GetInformationExtension { .. }
|
||||
| Error::PartitionManagerNotFound { .. }
|
||||
| Error::ProcedureIdNotFound { .. } => StatusCode::Unexpected,
|
||||
|
||||
Error::ViewPlanColumnsChanged { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
92
src/catalog/src/information_extension.rs
Normal file
92
src/catalog/src/information_extension.rs
Normal file
@@ -0,0 +1,92 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use api::v1::meta::ProcedureStatus;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::cluster::{ClusterInfo, NodeInfo};
|
||||
use common_meta::datanode::RegionStat;
|
||||
use common_meta::ddl::{ExecutorContext, ProcedureExecutor};
|
||||
use common_meta::rpc::procedure;
|
||||
use common_procedure::{ProcedureInfo, ProcedureState};
|
||||
use meta_client::MetaClientRef;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error;
|
||||
use crate::information_schema::InformationExtension;
|
||||
|
||||
pub struct DistributedInformationExtension {
|
||||
meta_client: MetaClientRef,
|
||||
}
|
||||
|
||||
impl DistributedInformationExtension {
|
||||
pub fn new(meta_client: MetaClientRef) -> Self {
|
||||
Self { meta_client }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl InformationExtension for DistributedInformationExtension {
|
||||
type Error = crate::error::Error;
|
||||
|
||||
async fn nodes(&self) -> std::result::Result<Vec<NodeInfo>, Self::Error> {
|
||||
self.meta_client
|
||||
.list_nodes(None)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::ListNodesSnafu)
|
||||
}
|
||||
|
||||
async fn procedures(&self) -> std::result::Result<Vec<(String, ProcedureInfo)>, Self::Error> {
|
||||
let procedures = self
|
||||
.meta_client
|
||||
.list_procedures(&ExecutorContext::default())
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::ListProceduresSnafu)?
|
||||
.procedures;
|
||||
let mut result = Vec::with_capacity(procedures.len());
|
||||
for procedure in procedures {
|
||||
let pid = match procedure.id {
|
||||
Some(pid) => pid,
|
||||
None => return error::ProcedureIdNotFoundSnafu {}.fail(),
|
||||
};
|
||||
let pid = procedure::pb_pid_to_pid(&pid)
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::ConvertProtoDataSnafu)?;
|
||||
let status = ProcedureStatus::try_from(procedure.status)
|
||||
.map(|v| v.as_str_name())
|
||||
.unwrap_or("Unknown")
|
||||
.to_string();
|
||||
let procedure_info = ProcedureInfo {
|
||||
id: pid,
|
||||
type_name: procedure.type_name,
|
||||
start_time_ms: procedure.start_time_ms,
|
||||
end_time_ms: procedure.end_time_ms,
|
||||
state: ProcedureState::Running,
|
||||
lock_keys: procedure.lock_keys,
|
||||
};
|
||||
result.push((status, procedure_info));
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn region_stats(&self) -> std::result::Result<Vec<RegionStat>, Self::Error> {
|
||||
self.meta_client
|
||||
.list_region_stats()
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::ListRegionStatsSnafu)
|
||||
}
|
||||
}
|
||||
@@ -12,7 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub use client::{CachedMetaKvBackend, CachedMetaKvBackendBuilder, MetaKvBackend};
|
||||
pub use client::{CachedKvBackend, CachedKvBackendBuilder, MetaKvBackend};
|
||||
|
||||
mod client;
|
||||
mod manager;
|
||||
|
||||
@@ -22,6 +22,7 @@ use common_error::ext::BoxedError;
|
||||
use common_meta::cache_invalidator::KvCacheInvalidator;
|
||||
use common_meta::error::Error::CacheNotGet;
|
||||
use common_meta::error::{CacheNotGetSnafu, Error, ExternalSnafu, GetKvCacheSnafu, Result};
|
||||
use common_meta::kv_backend::txn::{Txn, TxnResponse};
|
||||
use common_meta::kv_backend::{KvBackend, KvBackendRef, TxnService};
|
||||
use common_meta::rpc::store::{
|
||||
BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse, BatchPutRequest,
|
||||
@@ -42,20 +43,20 @@ const DEFAULT_CACHE_MAX_CAPACITY: u64 = 10000;
|
||||
const DEFAULT_CACHE_TTL: Duration = Duration::from_secs(10 * 60);
|
||||
const DEFAULT_CACHE_TTI: Duration = Duration::from_secs(5 * 60);
|
||||
|
||||
pub struct CachedMetaKvBackendBuilder {
|
||||
pub struct CachedKvBackendBuilder {
|
||||
cache_max_capacity: Option<u64>,
|
||||
cache_ttl: Option<Duration>,
|
||||
cache_tti: Option<Duration>,
|
||||
meta_client: Arc<MetaClient>,
|
||||
inner: KvBackendRef,
|
||||
}
|
||||
|
||||
impl CachedMetaKvBackendBuilder {
|
||||
pub fn new(meta_client: Arc<MetaClient>) -> Self {
|
||||
impl CachedKvBackendBuilder {
|
||||
pub fn new(inner: KvBackendRef) -> Self {
|
||||
Self {
|
||||
cache_max_capacity: None,
|
||||
cache_ttl: None,
|
||||
cache_tti: None,
|
||||
meta_client,
|
||||
inner,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -74,7 +75,7 @@ impl CachedMetaKvBackendBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
pub fn build(self) -> CachedMetaKvBackend {
|
||||
pub fn build(self) -> CachedKvBackend {
|
||||
let cache_max_capacity = self
|
||||
.cache_max_capacity
|
||||
.unwrap_or(DEFAULT_CACHE_MAX_CAPACITY);
|
||||
@@ -85,14 +86,11 @@ impl CachedMetaKvBackendBuilder {
|
||||
.time_to_live(cache_ttl)
|
||||
.time_to_idle(cache_tti)
|
||||
.build();
|
||||
|
||||
let kv_backend = Arc::new(MetaKvBackend {
|
||||
client: self.meta_client,
|
||||
});
|
||||
let kv_backend = self.inner;
|
||||
let name = format!("CachedKvBackend({})", kv_backend.name());
|
||||
let version = AtomicUsize::new(0);
|
||||
|
||||
CachedMetaKvBackend {
|
||||
CachedKvBackend {
|
||||
kv_backend,
|
||||
cache,
|
||||
name,
|
||||
@@ -112,19 +110,29 @@ pub type CacheBackend = Cache<Vec<u8>, KeyValue>;
|
||||
/// Therefore, it is recommended to use CachedMetaKvBackend to only read metadata related
|
||||
/// information. Note: If you read other information, you may read expired data, which depends on
|
||||
/// TTL and TTI for cache.
|
||||
pub struct CachedMetaKvBackend {
|
||||
pub struct CachedKvBackend {
|
||||
kv_backend: KvBackendRef,
|
||||
cache: CacheBackend,
|
||||
name: String,
|
||||
version: AtomicUsize,
|
||||
}
|
||||
|
||||
impl TxnService for CachedMetaKvBackend {
|
||||
#[async_trait::async_trait]
|
||||
impl TxnService for CachedKvBackend {
|
||||
type Error = Error;
|
||||
|
||||
async fn txn(&self, txn: Txn) -> std::result::Result<TxnResponse, Self::Error> {
|
||||
// TODO(hl): txn of CachedKvBackend simply pass through to inner backend without invalidating caches.
|
||||
self.kv_backend.txn(txn).await
|
||||
}
|
||||
|
||||
fn max_txn_ops(&self) -> usize {
|
||||
self.kv_backend.max_txn_ops()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl KvBackend for CachedMetaKvBackend {
|
||||
impl KvBackend for CachedKvBackend {
|
||||
fn name(&self) -> &str {
|
||||
&self.name
|
||||
}
|
||||
@@ -305,7 +313,7 @@ impl KvBackend for CachedMetaKvBackend {
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl KvCacheInvalidator for CachedMetaKvBackend {
|
||||
impl KvCacheInvalidator for CachedKvBackend {
|
||||
async fn invalidate_key(&self, key: &[u8]) {
|
||||
self.create_new_version();
|
||||
self.cache.invalidate(key).await;
|
||||
@@ -313,7 +321,7 @@ impl KvCacheInvalidator for CachedMetaKvBackend {
|
||||
}
|
||||
}
|
||||
|
||||
impl CachedMetaKvBackend {
|
||||
impl CachedKvBackend {
|
||||
// only for test
|
||||
#[cfg(test)]
|
||||
fn wrap(kv_backend: KvBackendRef) -> Self {
|
||||
@@ -466,7 +474,7 @@ mod tests {
|
||||
use common_meta::rpc::KeyValue;
|
||||
use dashmap::DashMap;
|
||||
|
||||
use super::CachedMetaKvBackend;
|
||||
use super::CachedKvBackend;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct SimpleKvBackend {
|
||||
@@ -540,7 +548,7 @@ mod tests {
|
||||
async fn test_cached_kv_backend() {
|
||||
let simple_kv = Arc::new(SimpleKvBackend::default());
|
||||
let get_execute_times = simple_kv.get_execute_times.clone();
|
||||
let cached_kv = CachedMetaKvBackend::wrap(simple_kv);
|
||||
let cached_kv = CachedKvBackend::wrap(simple_kv);
|
||||
|
||||
add_some_vals(&cached_kv).await;
|
||||
|
||||
|
||||
@@ -30,6 +30,7 @@ use table::TableRef;
|
||||
use crate::error::Result;
|
||||
|
||||
pub mod error;
|
||||
pub mod information_extension;
|
||||
pub mod kvbackend;
|
||||
pub mod memory;
|
||||
mod metrics;
|
||||
|
||||
@@ -34,15 +34,14 @@ use datatypes::vectors::{
|
||||
};
|
||||
use futures::{StreamExt, TryStreamExt};
|
||||
use partition::manager::PartitionInfo;
|
||||
use partition::partition::PartitionDef;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::{RegionId, ScanRequest, TableId};
|
||||
use store_api::storage::{ScanRequest, TableId};
|
||||
use table::metadata::{TableInfo, TableType};
|
||||
|
||||
use super::PARTITIONS;
|
||||
use crate::error::{
|
||||
CreateRecordBatchSnafu, FindPartitionsSnafu, InternalSnafu, Result,
|
||||
UpgradeWeakCatalogManagerRefSnafu,
|
||||
CreateRecordBatchSnafu, FindPartitionsSnafu, InternalSnafu, PartitionManagerNotFoundSnafu,
|
||||
Result, UpgradeWeakCatalogManagerRefSnafu,
|
||||
};
|
||||
use crate::kvbackend::KvBackendCatalogManager;
|
||||
use crate::system_schema::information_schema::{InformationTable, Predicates};
|
||||
@@ -236,7 +235,8 @@ impl InformationSchemaPartitionsBuilder {
|
||||
let partition_manager = catalog_manager
|
||||
.as_any()
|
||||
.downcast_ref::<KvBackendCatalogManager>()
|
||||
.map(|catalog_manager| catalog_manager.partition_manager());
|
||||
.map(|catalog_manager| catalog_manager.partition_manager())
|
||||
.context(PartitionManagerNotFoundSnafu)?;
|
||||
|
||||
let predicates = Predicates::from_scan_request(&request);
|
||||
|
||||
@@ -262,27 +262,10 @@ impl InformationSchemaPartitionsBuilder {
|
||||
let table_ids: Vec<TableId> =
|
||||
table_infos.iter().map(|info| info.ident.table_id).collect();
|
||||
|
||||
let mut table_partitions = if let Some(partition_manager) = &partition_manager {
|
||||
partition_manager
|
||||
.batch_find_table_partitions(&table_ids)
|
||||
.await
|
||||
.context(FindPartitionsSnafu)?
|
||||
} else {
|
||||
// Current node must be a standalone instance, contains only one partition by default.
|
||||
// TODO(dennis): change it when we support multi-regions for standalone.
|
||||
table_ids
|
||||
.into_iter()
|
||||
.map(|table_id| {
|
||||
(
|
||||
table_id,
|
||||
vec![PartitionInfo {
|
||||
id: RegionId::new(table_id, 0),
|
||||
partition: PartitionDef::new(vec![], vec![]),
|
||||
}],
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
};
|
||||
let mut table_partitions = partition_manager
|
||||
.batch_find_table_partitions(&table_ids)
|
||||
.await
|
||||
.context(FindPartitionsSnafu)?;
|
||||
|
||||
for table_info in table_infos {
|
||||
let partitions = table_partitions
|
||||
|
||||
@@ -39,9 +39,12 @@ use crate::CatalogManager;
|
||||
const REGION_ID: &str = "region_id";
|
||||
const TABLE_ID: &str = "table_id";
|
||||
const REGION_NUMBER: &str = "region_number";
|
||||
const REGION_ROWS: &str = "region_rows";
|
||||
const DISK_SIZE: &str = "disk_size";
|
||||
const MEMTABLE_SIZE: &str = "memtable_size";
|
||||
const MANIFEST_SIZE: &str = "manifest_size";
|
||||
const SST_SIZE: &str = "sst_size";
|
||||
const INDEX_SIZE: &str = "index_size";
|
||||
const ENGINE: &str = "engine";
|
||||
const REGION_ROLE: &str = "region_role";
|
||||
|
||||
@@ -52,9 +55,12 @@ const INIT_CAPACITY: usize = 42;
|
||||
/// - `region_id`: The region id.
|
||||
/// - `table_id`: The table id.
|
||||
/// - `region_number`: The region number.
|
||||
/// - `region_rows`: The number of rows in region.
|
||||
/// - `memtable_size`: The memtable size in bytes.
|
||||
/// - `disk_size`: The approximate disk size in bytes.
|
||||
/// - `manifest_size`: The manifest size in bytes.
|
||||
/// - `sst_size`: The sst size in bytes.
|
||||
/// - `sst_size`: The sst data files size in bytes.
|
||||
/// - `index_size`: The sst index files size in bytes.
|
||||
/// - `engine`: The engine type.
|
||||
/// - `region_role`: The region role.
|
||||
///
|
||||
@@ -76,9 +82,12 @@ impl InformationSchemaRegionStatistics {
|
||||
ColumnSchema::new(REGION_ID, ConcreteDataType::uint64_datatype(), false),
|
||||
ColumnSchema::new(TABLE_ID, ConcreteDataType::uint32_datatype(), false),
|
||||
ColumnSchema::new(REGION_NUMBER, ConcreteDataType::uint32_datatype(), false),
|
||||
ColumnSchema::new(REGION_ROWS, ConcreteDataType::uint64_datatype(), true),
|
||||
ColumnSchema::new(DISK_SIZE, ConcreteDataType::uint64_datatype(), true),
|
||||
ColumnSchema::new(MEMTABLE_SIZE, ConcreteDataType::uint64_datatype(), true),
|
||||
ColumnSchema::new(MANIFEST_SIZE, ConcreteDataType::uint64_datatype(), true),
|
||||
ColumnSchema::new(SST_SIZE, ConcreteDataType::uint64_datatype(), true),
|
||||
ColumnSchema::new(INDEX_SIZE, ConcreteDataType::uint64_datatype(), true),
|
||||
ColumnSchema::new(ENGINE, ConcreteDataType::string_datatype(), true),
|
||||
ColumnSchema::new(REGION_ROLE, ConcreteDataType::string_datatype(), true),
|
||||
]))
|
||||
@@ -135,9 +144,12 @@ struct InformationSchemaRegionStatisticsBuilder {
|
||||
region_ids: UInt64VectorBuilder,
|
||||
table_ids: UInt32VectorBuilder,
|
||||
region_numbers: UInt32VectorBuilder,
|
||||
region_rows: UInt64VectorBuilder,
|
||||
disk_sizes: UInt64VectorBuilder,
|
||||
memtable_sizes: UInt64VectorBuilder,
|
||||
manifest_sizes: UInt64VectorBuilder,
|
||||
sst_sizes: UInt64VectorBuilder,
|
||||
index_sizes: UInt64VectorBuilder,
|
||||
engines: StringVectorBuilder,
|
||||
region_roles: StringVectorBuilder,
|
||||
}
|
||||
@@ -150,9 +162,12 @@ impl InformationSchemaRegionStatisticsBuilder {
|
||||
region_ids: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
table_ids: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
region_numbers: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
region_rows: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
disk_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
memtable_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
manifest_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
sst_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
index_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
engines: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
region_roles: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
}
|
||||
@@ -177,9 +192,12 @@ impl InformationSchemaRegionStatisticsBuilder {
|
||||
(REGION_ID, &Value::from(region_stat.id.as_u64())),
|
||||
(TABLE_ID, &Value::from(region_stat.id.table_id())),
|
||||
(REGION_NUMBER, &Value::from(region_stat.id.region_number())),
|
||||
(REGION_ROWS, &Value::from(region_stat.num_rows)),
|
||||
(DISK_SIZE, &Value::from(region_stat.approximate_bytes)),
|
||||
(MEMTABLE_SIZE, &Value::from(region_stat.memtable_size)),
|
||||
(MANIFEST_SIZE, &Value::from(region_stat.manifest_size)),
|
||||
(SST_SIZE, &Value::from(region_stat.sst_size)),
|
||||
(INDEX_SIZE, &Value::from(region_stat.index_size)),
|
||||
(ENGINE, &Value::from(region_stat.engine.as_str())),
|
||||
(REGION_ROLE, &Value::from(region_stat.role.to_string())),
|
||||
];
|
||||
@@ -192,9 +210,12 @@ impl InformationSchemaRegionStatisticsBuilder {
|
||||
self.table_ids.push(Some(region_stat.id.table_id()));
|
||||
self.region_numbers
|
||||
.push(Some(region_stat.id.region_number()));
|
||||
self.region_rows.push(Some(region_stat.num_rows));
|
||||
self.disk_sizes.push(Some(region_stat.approximate_bytes));
|
||||
self.memtable_sizes.push(Some(region_stat.memtable_size));
|
||||
self.manifest_sizes.push(Some(region_stat.manifest_size));
|
||||
self.sst_sizes.push(Some(region_stat.sst_size));
|
||||
self.index_sizes.push(Some(region_stat.index_size));
|
||||
self.engines.push(Some(®ion_stat.engine));
|
||||
self.region_roles.push(Some(®ion_stat.role.to_string()));
|
||||
}
|
||||
@@ -204,9 +225,12 @@ impl InformationSchemaRegionStatisticsBuilder {
|
||||
Arc::new(self.region_ids.finish()),
|
||||
Arc::new(self.table_ids.finish()),
|
||||
Arc::new(self.region_numbers.finish()),
|
||||
Arc::new(self.region_rows.finish()),
|
||||
Arc::new(self.disk_sizes.finish()),
|
||||
Arc::new(self.memtable_sizes.finish()),
|
||||
Arc::new(self.manifest_sizes.finish()),
|
||||
Arc::new(self.sst_sizes.finish()),
|
||||
Arc::new(self.index_sizes.finish()),
|
||||
Arc::new(self.engines.finish()),
|
||||
Arc::new(self.region_roles.finish()),
|
||||
];
|
||||
|
||||
@@ -180,7 +180,7 @@ impl InformationSchemaSchemataBuilder {
|
||||
.context(TableMetadataManagerSnafu)?
|
||||
// information_schema is not available from this
|
||||
// table_metadata_manager and we return None
|
||||
.map(|schema_opts| format!("{schema_opts}"))
|
||||
.map(|schema_opts| format!("{}", schema_opts.into_inner()))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
/// All table names in `information_schema`.
|
||||
//! All table names in `information_schema`.
|
||||
|
||||
pub const TABLES: &str = "tables";
|
||||
pub const COLUMNS: &str = "columns";
|
||||
|
||||
@@ -12,13 +12,16 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::sync::{Arc, Weak};
|
||||
|
||||
use arrow_schema::SchemaRef as ArrowSchemaRef;
|
||||
use common_catalog::consts::INFORMATION_SCHEMA_TABLES_TABLE_ID;
|
||||
use common_catalog::consts::{INFORMATION_SCHEMA_TABLES_TABLE_ID, MITO_ENGINE};
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::datanode::RegionStat;
|
||||
use common_recordbatch::adapter::RecordBatchStreamAdapter;
|
||||
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
|
||||
use common_telemetry::error;
|
||||
use datafusion::execution::TaskContext;
|
||||
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
|
||||
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
|
||||
@@ -31,7 +34,7 @@ use datatypes::vectors::{
|
||||
};
|
||||
use futures::TryStreamExt;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use store_api::storage::{ScanRequest, TableId};
|
||||
use store_api::storage::{RegionId, ScanRequest, TableId};
|
||||
use table::metadata::{TableInfo, TableType};
|
||||
|
||||
use super::TABLES;
|
||||
@@ -39,6 +42,7 @@ use crate::error::{
|
||||
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
|
||||
};
|
||||
use crate::system_schema::information_schema::{InformationTable, Predicates};
|
||||
use crate::system_schema::utils;
|
||||
use crate::CatalogManager;
|
||||
|
||||
pub const TABLE_CATALOG: &str = "table_catalog";
|
||||
@@ -234,17 +238,51 @@ impl InformationSchemaTablesBuilder {
|
||||
.context(UpgradeWeakCatalogManagerRefSnafu)?;
|
||||
let predicates = Predicates::from_scan_request(&request);
|
||||
|
||||
let information_extension = utils::information_extension(&self.catalog_manager)?;
|
||||
|
||||
// TODO(dennis): `region_stats` API is not stable in distributed cluster because of network issue etc.
|
||||
// But we don't want the statements such as `show tables` fail,
|
||||
// so using `unwrap_or_else` here instead of `?` operator.
|
||||
let region_stats = information_extension
|
||||
.region_stats()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
error!(e; "Failed to call region_stats");
|
||||
e
|
||||
})
|
||||
.unwrap_or_else(|_| vec![]);
|
||||
|
||||
for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
|
||||
let mut stream = catalog_manager.tables(&catalog_name, &schema_name, None);
|
||||
|
||||
while let Some(table) = stream.try_next().await? {
|
||||
let table_info = table.table_info();
|
||||
|
||||
// TODO(dennis): make it working for metric engine
|
||||
let table_region_stats =
|
||||
if table_info.meta.engine == MITO_ENGINE || table_info.is_physical_table() {
|
||||
let region_ids = table_info
|
||||
.meta
|
||||
.region_numbers
|
||||
.iter()
|
||||
.map(|n| RegionId::new(table_info.ident.table_id, *n))
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
region_stats
|
||||
.iter()
|
||||
.filter(|stat| region_ids.contains(&stat.id))
|
||||
.collect::<Vec<_>>()
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
|
||||
self.add_table(
|
||||
&predicates,
|
||||
&catalog_name,
|
||||
&schema_name,
|
||||
table_info,
|
||||
table.table_type(),
|
||||
&table_region_stats,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -260,6 +298,7 @@ impl InformationSchemaTablesBuilder {
|
||||
schema_name: &str,
|
||||
table_info: Arc<TableInfo>,
|
||||
table_type: TableType,
|
||||
region_stats: &[&RegionStat],
|
||||
) {
|
||||
let table_name = table_info.name.as_ref();
|
||||
let table_id = table_info.table_id();
|
||||
@@ -273,7 +312,9 @@ impl InformationSchemaTablesBuilder {
|
||||
|
||||
let row = [
|
||||
(TABLE_CATALOG, &Value::from(catalog_name)),
|
||||
(TABLE_ID, &Value::from(table_id)),
|
||||
(TABLE_SCHEMA, &Value::from(schema_name)),
|
||||
(ENGINE, &Value::from(engine)),
|
||||
(TABLE_NAME, &Value::from(table_name)),
|
||||
(TABLE_TYPE, &Value::from(table_type_text)),
|
||||
];
|
||||
@@ -287,21 +328,39 @@ impl InformationSchemaTablesBuilder {
|
||||
self.table_names.push(Some(table_name));
|
||||
self.table_types.push(Some(table_type_text));
|
||||
self.table_ids.push(Some(table_id));
|
||||
|
||||
let data_length = region_stats.iter().map(|stat| stat.sst_size).sum();
|
||||
let table_rows = region_stats.iter().map(|stat| stat.num_rows).sum();
|
||||
let index_length = region_stats.iter().map(|stat| stat.index_size).sum();
|
||||
|
||||
// It's not precise, but it is acceptable for long-term data storage.
|
||||
let avg_row_length = if table_rows > 0 {
|
||||
let total_data_length = data_length
|
||||
+ region_stats
|
||||
.iter()
|
||||
.map(|stat| stat.memtable_size)
|
||||
.sum::<u64>();
|
||||
|
||||
total_data_length / table_rows
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
self.data_length.push(Some(data_length));
|
||||
self.index_length.push(Some(index_length));
|
||||
self.table_rows.push(Some(table_rows));
|
||||
self.avg_row_length.push(Some(avg_row_length));
|
||||
|
||||
// TODO(sunng87): use real data for these fields
|
||||
self.data_length.push(Some(0));
|
||||
self.max_data_length.push(Some(0));
|
||||
self.index_length.push(Some(0));
|
||||
self.avg_row_length.push(Some(0));
|
||||
self.max_index_length.push(Some(0));
|
||||
self.checksum.push(Some(0));
|
||||
self.table_rows.push(Some(0));
|
||||
self.max_index_length.push(Some(0));
|
||||
self.data_free.push(Some(0));
|
||||
self.auto_increment.push(Some(0));
|
||||
self.row_format.push(Some("Fixed"));
|
||||
self.table_collation.push(Some("utf8_bin"));
|
||||
self.update_time.push(None);
|
||||
self.check_time.push(None);
|
||||
|
||||
// use mariadb default table version number here
|
||||
self.version.push(Some(11));
|
||||
self.table_comment.push(table_info.desc.as_deref());
|
||||
|
||||
@@ -74,7 +74,7 @@ impl MemoryTableBuilder {
|
||||
/// Construct the `information_schema.{table_name}` virtual table
|
||||
pub async fn memory_records(&mut self) -> Result<RecordBatch> {
|
||||
if self.columns.is_empty() {
|
||||
RecordBatch::new_empty(self.schema.clone()).context(CreateRecordBatchSnafu)
|
||||
Ok(RecordBatch::new_empty(self.schema.clone()))
|
||||
} else {
|
||||
RecordBatch::new(self.schema.clone(), std::mem::take(&mut self.columns))
|
||||
.context(CreateRecordBatchSnafu)
|
||||
|
||||
@@ -12,6 +12,9 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! The `pg_catalog.pg_namespace` table implementation.
|
||||
//! namespace is a schema in greptime
|
||||
|
||||
pub(super) mod oid_map;
|
||||
|
||||
use std::sync::{Arc, Weak};
|
||||
@@ -40,9 +43,6 @@ use crate::system_schema::utils::tables::{string_column, u32_column};
|
||||
use crate::system_schema::SystemTable;
|
||||
use crate::CatalogManager;
|
||||
|
||||
/// The `pg_catalog.pg_namespace` table implementation.
|
||||
/// namespace is a schema in greptime
|
||||
|
||||
const NSPNAME: &str = "nspname";
|
||||
const INIT_CAPACITY: usize = 42;
|
||||
|
||||
|
||||
65
src/cli/Cargo.toml
Normal file
65
src/cli/Cargo.toml
Normal file
@@ -0,0 +1,65 @@
|
||||
[package]
|
||||
name = "cli"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
async-trait.workspace = true
|
||||
auth.workspace = true
|
||||
base64.workspace = true
|
||||
cache.workspace = true
|
||||
catalog.workspace = true
|
||||
chrono.workspace = true
|
||||
clap.workspace = true
|
||||
client.workspace = true
|
||||
common-base.workspace = true
|
||||
common-catalog.workspace = true
|
||||
common-config.workspace = true
|
||||
common-error.workspace = true
|
||||
common-grpc.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-meta.workspace = true
|
||||
common-options.workspace = true
|
||||
common-procedure.workspace = true
|
||||
common-query.workspace = true
|
||||
common-recordbatch.workspace = true
|
||||
common-runtime.workspace = true
|
||||
common-telemetry = { workspace = true, features = [
|
||||
"deadlock_detection",
|
||||
] }
|
||||
common-time.workspace = true
|
||||
common-version.workspace = true
|
||||
common-wal.workspace = true
|
||||
datatypes.workspace = true
|
||||
either = "1.8"
|
||||
etcd-client.workspace = true
|
||||
futures.workspace = true
|
||||
humantime.workspace = true
|
||||
meta-client.workspace = true
|
||||
nu-ansi-term = "0.46"
|
||||
query.workspace = true
|
||||
rand.workspace = true
|
||||
reqwest.workspace = true
|
||||
rustyline = "10.1"
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
servers.workspace = true
|
||||
session.workspace = true
|
||||
snafu.workspace = true
|
||||
store-api.workspace = true
|
||||
substrait.workspace = true
|
||||
table.workspace = true
|
||||
tokio.workspace = true
|
||||
tracing-appender.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
client = { workspace = true, features = ["testing"] }
|
||||
common-test-util.workspace = true
|
||||
common-version.workspace = true
|
||||
serde.workspace = true
|
||||
temp-env = "0.3"
|
||||
tempfile.workspace = true
|
||||
@@ -19,6 +19,7 @@ use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use clap::Parser;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
|
||||
use common_meta::kv_backend::etcd::EtcdStore;
|
||||
use common_meta::peer::Peer;
|
||||
@@ -30,11 +31,9 @@ use rand::Rng;
|
||||
use store_api::storage::RegionNumber;
|
||||
use table::metadata::{RawTableInfo, RawTableMeta, TableId, TableIdent, TableType};
|
||||
use table::table_name::TableName;
|
||||
use tracing_appender::non_blocking::WorkerGuard;
|
||||
|
||||
use self::metadata::TableMetadataBencher;
|
||||
use crate::cli::{Instance, Tool};
|
||||
use crate::error::Result;
|
||||
use crate::Tool;
|
||||
|
||||
mod metadata;
|
||||
|
||||
@@ -62,7 +61,7 @@ pub struct BenchTableMetadataCommand {
|
||||
}
|
||||
|
||||
impl BenchTableMetadataCommand {
|
||||
pub async fn build(&self, guard: Vec<WorkerGuard>) -> Result<Instance> {
|
||||
pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
|
||||
let etcd_store = EtcdStore::with_endpoints([&self.etcd_addr], 128)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -73,7 +72,7 @@ impl BenchTableMetadataCommand {
|
||||
table_metadata_manager,
|
||||
count: self.count,
|
||||
};
|
||||
Ok(Instance::new(Box::new(tool), guard))
|
||||
Ok(Box::new(tool))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -84,7 +83,7 @@ struct BenchTableMetadata {
|
||||
|
||||
#[async_trait]
|
||||
impl Tool for BenchTableMetadata {
|
||||
async fn do_work(&self) -> Result<()> {
|
||||
async fn do_work(&self) -> std::result::Result<(), BoxedError> {
|
||||
let bencher = TableMetadataBencher::new(self.table_metadata_manager.clone(), self.count);
|
||||
bencher.bench_create().await;
|
||||
bencher.bench_get().await;
|
||||
@@ -18,7 +18,7 @@ use common_meta::key::table_route::TableRouteValue;
|
||||
use common_meta::key::TableMetadataManagerRef;
|
||||
use table::table_name::TableName;
|
||||
|
||||
use crate::cli::bench::{
|
||||
use crate::bench::{
|
||||
bench_self_recorded, create_region_routes, create_region_wal_options, create_table_info,
|
||||
};
|
||||
|
||||
@@ -12,24 +12,35 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use base64::engine::general_purpose;
|
||||
use base64::Engine;
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use humantime::format_duration;
|
||||
use serde_json::Value;
|
||||
use servers::http::greptime_result_v1::GreptimedbV1Response;
|
||||
use servers::http::header::constants::GREPTIME_DB_HEADER_TIMEOUT;
|
||||
use servers::http::result::greptime_result_v1::GreptimedbV1Response;
|
||||
use servers::http::GreptimeQueryOutput;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{HttpQuerySqlSnafu, Result, SerdeJsonSnafu};
|
||||
|
||||
pub(crate) struct DatabaseClient {
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DatabaseClient {
|
||||
addr: String,
|
||||
catalog: String,
|
||||
auth_header: Option<String>,
|
||||
timeout: Duration,
|
||||
}
|
||||
|
||||
impl DatabaseClient {
|
||||
pub fn new(addr: String, catalog: String, auth_basic: Option<String>) -> Self {
|
||||
pub fn new(
|
||||
addr: String,
|
||||
catalog: String,
|
||||
auth_basic: Option<String>,
|
||||
timeout: Duration,
|
||||
) -> Self {
|
||||
let auth_header = if let Some(basic) = auth_basic {
|
||||
let encoded = general_purpose::STANDARD.encode(basic);
|
||||
Some(format!("basic {}", encoded))
|
||||
@@ -41,6 +52,7 @@ impl DatabaseClient {
|
||||
addr,
|
||||
catalog,
|
||||
auth_header,
|
||||
timeout,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -63,6 +75,11 @@ impl DatabaseClient {
|
||||
request = request.header("Authorization", auth);
|
||||
}
|
||||
|
||||
request = request.header(
|
||||
GREPTIME_DB_HEADER_TIMEOUT,
|
||||
format_duration(self.timeout).to_string(),
|
||||
);
|
||||
|
||||
let response = request.send().await.with_context(|_| HttpQuerySqlSnafu {
|
||||
reason: format!("bad url: {}", url),
|
||||
})?;
|
||||
316
src/cli/src/error.rs
Normal file
316
src/cli/src/error.rs
Normal file
@@ -0,0 +1,316 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
|
||||
use common_error::ext::{BoxedError, ErrorExt};
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_macro::stack_trace_debug;
|
||||
use rustyline::error::ReadlineError;
|
||||
use snafu::{Location, Snafu};
|
||||
|
||||
#[derive(Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
#[stack_trace_debug]
|
||||
pub enum Error {
|
||||
#[snafu(display("Failed to install ring crypto provider: {}", msg))]
|
||||
InitTlsProvider {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
msg: String,
|
||||
},
|
||||
#[snafu(display("Failed to create default catalog and schema"))]
|
||||
InitMetadata {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: common_meta::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to init DDL manager"))]
|
||||
InitDdlManager {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: common_meta::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to init default timezone"))]
|
||||
InitTimezone {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: common_time::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to start procedure manager"))]
|
||||
StartProcedureManager {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: common_procedure::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to stop procedure manager"))]
|
||||
StopProcedureManager {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: common_procedure::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to start wal options allocator"))]
|
||||
StartWalOptionsAllocator {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: common_meta::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Missing config, msg: {}", msg))]
|
||||
MissingConfig {
|
||||
msg: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Illegal config: {}", msg))]
|
||||
IllegalConfig {
|
||||
msg: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid REPL command: {reason}"))]
|
||||
InvalidReplCommand { reason: String },
|
||||
|
||||
#[snafu(display("Cannot create REPL"))]
|
||||
ReplCreation {
|
||||
#[snafu(source)]
|
||||
error: ReadlineError,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Error reading command"))]
|
||||
Readline {
|
||||
#[snafu(source)]
|
||||
error: ReadlineError,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to request database, sql: {sql}"))]
|
||||
RequestDatabase {
|
||||
sql: String,
|
||||
#[snafu(source)]
|
||||
source: client::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to collect RecordBatches"))]
|
||||
CollectRecordBatches {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: common_recordbatch::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to pretty print Recordbatches"))]
|
||||
PrettyPrintRecordBatches {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: common_recordbatch::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to start Meta client"))]
|
||||
StartMetaClient {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: meta_client::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to parse SQL: {}", sql))]
|
||||
ParseSql {
|
||||
sql: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: query::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to plan statement"))]
|
||||
PlanStatement {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: query::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to encode logical plan in substrait"))]
|
||||
SubstraitEncodeLogicalPlan {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: substrait::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to load layered config"))]
|
||||
LoadLayeredConfig {
|
||||
#[snafu(source(from(common_config::error::Error, Box::new)))]
|
||||
source: Box<common_config::error::Error>,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to connect to Etcd at {etcd_addr}"))]
|
||||
ConnectEtcd {
|
||||
etcd_addr: String,
|
||||
#[snafu(source)]
|
||||
error: etcd_client::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to serde json"))]
|
||||
SerdeJson {
|
||||
#[snafu(source)]
|
||||
error: serde_json::error::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to run http request: {reason}"))]
|
||||
HttpQuerySql {
|
||||
reason: String,
|
||||
#[snafu(source)]
|
||||
error: reqwest::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Empty result from output"))]
|
||||
EmptyResult {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to manipulate file"))]
|
||||
FileIo {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
#[snafu(source)]
|
||||
error: std::io::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to create directory {}", dir))]
|
||||
CreateDir {
|
||||
dir: String,
|
||||
#[snafu(source)]
|
||||
error: std::io::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to spawn thread"))]
|
||||
SpawnThread {
|
||||
#[snafu(source)]
|
||||
error: std::io::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Other error"))]
|
||||
Other {
|
||||
source: BoxedError,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to build runtime"))]
|
||||
BuildRuntime {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: common_runtime::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to get cache from cache registry: {}", name))]
|
||||
CacheRequired {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
name: String,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to build cache registry"))]
|
||||
BuildCacheRegistry {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: cache::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to initialize meta client"))]
|
||||
MetaClientInit {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: meta_client::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Cannot find schema {schema} in catalog {catalog}"))]
|
||||
SchemaNotFound {
|
||||
catalog: String,
|
||||
schema: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
impl ErrorExt for Error {
|
||||
fn status_code(&self) -> StatusCode {
|
||||
match self {
|
||||
Error::InitMetadata { source, .. } | Error::InitDdlManager { source, .. } => {
|
||||
source.status_code()
|
||||
}
|
||||
|
||||
Error::MissingConfig { .. }
|
||||
| Error::LoadLayeredConfig { .. }
|
||||
| Error::IllegalConfig { .. }
|
||||
| Error::InvalidReplCommand { .. }
|
||||
| Error::InitTimezone { .. }
|
||||
| Error::ConnectEtcd { .. }
|
||||
| Error::CreateDir { .. }
|
||||
| Error::EmptyResult { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
Error::StartProcedureManager { source, .. }
|
||||
| Error::StopProcedureManager { source, .. } => source.status_code(),
|
||||
Error::StartWalOptionsAllocator { source, .. } => source.status_code(),
|
||||
Error::ReplCreation { .. } | Error::Readline { .. } | Error::HttpQuerySql { .. } => {
|
||||
StatusCode::Internal
|
||||
}
|
||||
Error::RequestDatabase { source, .. } => source.status_code(),
|
||||
Error::CollectRecordBatches { source, .. }
|
||||
| Error::PrettyPrintRecordBatches { source, .. } => source.status_code(),
|
||||
Error::StartMetaClient { source, .. } => source.status_code(),
|
||||
Error::ParseSql { source, .. } | Error::PlanStatement { source, .. } => {
|
||||
source.status_code()
|
||||
}
|
||||
Error::SubstraitEncodeLogicalPlan { source, .. } => source.status_code(),
|
||||
|
||||
Error::SerdeJson { .. }
|
||||
| Error::FileIo { .. }
|
||||
| Error::SpawnThread { .. }
|
||||
| Error::InitTlsProvider { .. } => StatusCode::Unexpected,
|
||||
|
||||
Error::Other { source, .. } => source.status_code(),
|
||||
|
||||
Error::BuildRuntime { source, .. } => source.status_code(),
|
||||
|
||||
Error::CacheRequired { .. } | Error::BuildCacheRegistry { .. } => StatusCode::Internal,
|
||||
Error::MetaClientInit { source, .. } => source.status_code(),
|
||||
Error::SchemaNotFound { .. } => StatusCode::DatabaseNotFound,
|
||||
}
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
@@ -15,9 +15,11 @@
|
||||
use std::collections::HashSet;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use clap::{Parser, ValueEnum};
|
||||
use common_error::ext::BoxedError;
|
||||
use common_telemetry::{debug, error, info};
|
||||
use serde_json::Value;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
@@ -25,11 +27,10 @@ use tokio::fs::File;
|
||||
use tokio::io::{AsyncWriteExt, BufWriter};
|
||||
use tokio::sync::Semaphore;
|
||||
use tokio::time::Instant;
|
||||
use tracing_appender::non_blocking::WorkerGuard;
|
||||
|
||||
use crate::cli::database::DatabaseClient;
|
||||
use crate::cli::{database, Instance, Tool};
|
||||
use crate::database::DatabaseClient;
|
||||
use crate::error::{EmptyResultSnafu, Error, FileIoSnafu, Result, SchemaNotFoundSnafu};
|
||||
use crate::{database, Tool};
|
||||
|
||||
type TableReference = (String, String, String);
|
||||
|
||||
@@ -83,28 +84,38 @@ pub struct ExportCommand {
|
||||
/// The basic authentication for connecting to the server
|
||||
#[clap(long)]
|
||||
auth_basic: Option<String>,
|
||||
|
||||
/// The timeout of invoking the database.
|
||||
///
|
||||
/// It is used to override the server-side timeout setting.
|
||||
/// The default behavior will disable server-side default timeout(i.e. `0s`).
|
||||
#[clap(long, value_parser = humantime::parse_duration)]
|
||||
timeout: Option<Duration>,
|
||||
}
|
||||
|
||||
impl ExportCommand {
|
||||
pub async fn build(&self, guard: Vec<WorkerGuard>) -> Result<Instance> {
|
||||
let (catalog, schema) = database::split_database(&self.database)?;
|
||||
pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
|
||||
let (catalog, schema) =
|
||||
database::split_database(&self.database).map_err(BoxedError::new)?;
|
||||
|
||||
let database_client =
|
||||
DatabaseClient::new(self.addr.clone(), catalog.clone(), self.auth_basic.clone());
|
||||
let database_client = DatabaseClient::new(
|
||||
self.addr.clone(),
|
||||
catalog.clone(),
|
||||
self.auth_basic.clone(),
|
||||
// Treats `None` as `0s` to disable server-side default timeout.
|
||||
self.timeout.unwrap_or_default(),
|
||||
);
|
||||
|
||||
Ok(Instance::new(
|
||||
Box::new(Export {
|
||||
catalog,
|
||||
schema,
|
||||
database_client,
|
||||
output_dir: self.output_dir.clone(),
|
||||
parallelism: self.export_jobs,
|
||||
target: self.target.clone(),
|
||||
start_time: self.start_time.clone(),
|
||||
end_time: self.end_time.clone(),
|
||||
}),
|
||||
guard,
|
||||
))
|
||||
Ok(Box::new(Export {
|
||||
catalog,
|
||||
schema,
|
||||
database_client,
|
||||
output_dir: self.output_dir.clone(),
|
||||
parallelism: self.export_jobs,
|
||||
target: self.target.clone(),
|
||||
start_time: self.start_time.clone(),
|
||||
end_time: self.end_time.clone(),
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -452,97 +463,22 @@ impl Export {
|
||||
|
||||
#[async_trait]
|
||||
impl Tool for Export {
|
||||
async fn do_work(&self) -> Result<()> {
|
||||
async fn do_work(&self) -> std::result::Result<(), BoxedError> {
|
||||
match self.target {
|
||||
ExportTarget::Schema => {
|
||||
self.export_create_database().await?;
|
||||
self.export_create_table().await
|
||||
self.export_create_database()
|
||||
.await
|
||||
.map_err(BoxedError::new)?;
|
||||
self.export_create_table().await.map_err(BoxedError::new)
|
||||
}
|
||||
ExportTarget::Data => self.export_database_data().await,
|
||||
ExportTarget::Data => self.export_database_data().await.map_err(BoxedError::new),
|
||||
ExportTarget::All => {
|
||||
self.export_create_database().await?;
|
||||
self.export_create_table().await?;
|
||||
self.export_database_data().await
|
||||
self.export_create_database()
|
||||
.await
|
||||
.map_err(BoxedError::new)?;
|
||||
self.export_create_table().await.map_err(BoxedError::new)?;
|
||||
self.export_database_data().await.map_err(BoxedError::new)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use clap::Parser;
|
||||
use client::{Client, Database};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_telemetry::logging::LoggingOptions;
|
||||
|
||||
use crate::error::Result as CmdResult;
|
||||
use crate::options::GlobalOptions;
|
||||
use crate::{cli, standalone, App};
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_export_create_table_with_quoted_names() -> CmdResult<()> {
|
||||
let output_dir = tempfile::tempdir().unwrap();
|
||||
|
||||
let standalone = standalone::Command::parse_from([
|
||||
"standalone",
|
||||
"start",
|
||||
"--data-home",
|
||||
&*output_dir.path().to_string_lossy(),
|
||||
]);
|
||||
|
||||
let standalone_opts = standalone.load_options(&GlobalOptions::default()).unwrap();
|
||||
let mut instance = standalone.build(standalone_opts).await?;
|
||||
instance.start().await?;
|
||||
|
||||
let client = Client::with_urls(["127.0.0.1:4001"]);
|
||||
let database = Database::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, client);
|
||||
database
|
||||
.sql(r#"CREATE DATABASE "cli.export.create_table";"#)
|
||||
.await
|
||||
.unwrap();
|
||||
database
|
||||
.sql(
|
||||
r#"CREATE TABLE "cli.export.create_table"."a.b.c"(
|
||||
ts TIMESTAMP,
|
||||
TIME INDEX (ts)
|
||||
) engine=mito;
|
||||
"#,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let output_dir = tempfile::tempdir().unwrap();
|
||||
let cli = cli::Command::parse_from([
|
||||
"cli",
|
||||
"export",
|
||||
"--addr",
|
||||
"127.0.0.1:4000",
|
||||
"--output-dir",
|
||||
&*output_dir.path().to_string_lossy(),
|
||||
"--target",
|
||||
"schema",
|
||||
]);
|
||||
let mut cli_app = cli.build(LoggingOptions::default()).await?;
|
||||
cli_app.start().await?;
|
||||
|
||||
instance.stop().await?;
|
||||
|
||||
let output_file = output_dir
|
||||
.path()
|
||||
.join("greptime")
|
||||
.join("cli.export.create_table")
|
||||
.join("create_tables.sql");
|
||||
let res = std::fs::read_to_string(output_file).unwrap();
|
||||
let expect = r#"CREATE TABLE IF NOT EXISTS "a.b.c" (
|
||||
"ts" TIMESTAMP(3) NOT NULL,
|
||||
TIME INDEX ("ts")
|
||||
)
|
||||
|
||||
ENGINE=mito
|
||||
;
|
||||
"#;
|
||||
assert_eq!(res.trim(), expect.trim());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -19,7 +19,7 @@ use rustyline::highlight::{Highlighter, MatchingBracketHighlighter};
|
||||
use rustyline::hint::{Hinter, HistoryHinter};
|
||||
use rustyline::validate::{ValidationContext, ValidationResult, Validator};
|
||||
|
||||
use crate::cli::cmd::ReplCommand;
|
||||
use crate::cmd::ReplCommand;
|
||||
|
||||
pub(crate) struct RustylineHelper {
|
||||
hinter: HistoryHinter,
|
||||
@@ -14,19 +14,20 @@
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use clap::{Parser, ValueEnum};
|
||||
use common_catalog::consts::DEFAULT_SCHEMA_NAME;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_telemetry::{error, info, warn};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use tokio::sync::Semaphore;
|
||||
use tokio::time::Instant;
|
||||
use tracing_appender::non_blocking::WorkerGuard;
|
||||
|
||||
use crate::cli::database::DatabaseClient;
|
||||
use crate::cli::{database, Instance, Tool};
|
||||
use crate::database::DatabaseClient;
|
||||
use crate::error::{Error, FileIoSnafu, Result, SchemaNotFoundSnafu};
|
||||
use crate::{database, Tool};
|
||||
|
||||
#[derive(Debug, Default, Clone, ValueEnum)]
|
||||
enum ImportTarget {
|
||||
@@ -68,25 +69,35 @@ pub struct ImportCommand {
|
||||
/// The basic authentication for connecting to the server
|
||||
#[clap(long)]
|
||||
auth_basic: Option<String>,
|
||||
|
||||
/// The timeout of invoking the database.
|
||||
///
|
||||
/// It is used to override the server-side timeout setting.
|
||||
/// The default behavior will disable server-side default timeout(i.e. `0s`).
|
||||
#[clap(long, value_parser = humantime::parse_duration)]
|
||||
timeout: Option<Duration>,
|
||||
}
|
||||
|
||||
impl ImportCommand {
|
||||
pub async fn build(&self, guard: Vec<WorkerGuard>) -> Result<Instance> {
|
||||
let (catalog, schema) = database::split_database(&self.database)?;
|
||||
let database_client =
|
||||
DatabaseClient::new(self.addr.clone(), catalog.clone(), self.auth_basic.clone());
|
||||
pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
|
||||
let (catalog, schema) =
|
||||
database::split_database(&self.database).map_err(BoxedError::new)?;
|
||||
let database_client = DatabaseClient::new(
|
||||
self.addr.clone(),
|
||||
catalog.clone(),
|
||||
self.auth_basic.clone(),
|
||||
// Treats `None` as `0s` to disable server-side default timeout.
|
||||
self.timeout.unwrap_or_default(),
|
||||
);
|
||||
|
||||
Ok(Instance::new(
|
||||
Box::new(Import {
|
||||
catalog,
|
||||
schema,
|
||||
database_client,
|
||||
input_dir: self.input_dir.clone(),
|
||||
parallelism: self.import_jobs,
|
||||
target: self.target.clone(),
|
||||
}),
|
||||
guard,
|
||||
))
|
||||
Ok(Box::new(Import {
|
||||
catalog,
|
||||
schema,
|
||||
database_client,
|
||||
input_dir: self.input_dir.clone(),
|
||||
parallelism: self.import_jobs,
|
||||
target: self.target.clone(),
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -205,13 +216,13 @@ impl Import {
|
||||
|
||||
#[async_trait]
|
||||
impl Tool for Import {
|
||||
async fn do_work(&self) -> Result<()> {
|
||||
async fn do_work(&self) -> std::result::Result<(), BoxedError> {
|
||||
match self.target {
|
||||
ImportTarget::Schema => self.import_create_table().await,
|
||||
ImportTarget::Data => self.import_database_data().await,
|
||||
ImportTarget::Schema => self.import_create_table().await.map_err(BoxedError::new),
|
||||
ImportTarget::Data => self.import_database_data().await.map_err(BoxedError::new),
|
||||
ImportTarget::All => {
|
||||
self.import_create_table().await?;
|
||||
self.import_database_data().await
|
||||
self.import_create_table().await.map_err(BoxedError::new)?;
|
||||
self.import_database_data().await.map_err(BoxedError::new)
|
||||
}
|
||||
}
|
||||
}
|
||||
60
src/cli/src/lib.rs
Normal file
60
src/cli/src/lib.rs
Normal file
@@ -0,0 +1,60 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod bench;
|
||||
pub mod error;
|
||||
// Wait for https://github.com/GreptimeTeam/greptimedb/issues/2373
|
||||
#[allow(unused)]
|
||||
mod cmd;
|
||||
mod export;
|
||||
mod helper;
|
||||
|
||||
// Wait for https://github.com/GreptimeTeam/greptimedb/issues/2373
|
||||
mod database;
|
||||
mod import;
|
||||
#[allow(unused)]
|
||||
mod repl;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use clap::Parser;
|
||||
use common_error::ext::BoxedError;
|
||||
pub use database::DatabaseClient;
|
||||
use error::Result;
|
||||
pub use repl::Repl;
|
||||
|
||||
pub use crate::bench::BenchTableMetadataCommand;
|
||||
pub use crate::export::ExportCommand;
|
||||
pub use crate::import::ImportCommand;
|
||||
|
||||
#[async_trait]
|
||||
pub trait Tool: Send + Sync {
|
||||
async fn do_work(&self) -> std::result::Result<(), BoxedError>;
|
||||
}
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
pub(crate) struct AttachCommand {
|
||||
#[clap(long)]
|
||||
pub(crate) grpc_addr: String,
|
||||
#[clap(long)]
|
||||
pub(crate) meta_addr: Option<String>,
|
||||
#[clap(long, action)]
|
||||
pub(crate) disable_helper: bool,
|
||||
}
|
||||
|
||||
impl AttachCommand {
|
||||
#[allow(dead_code)]
|
||||
async fn build(self) -> Result<Box<dyn Tool>> {
|
||||
unimplemented!("Wait for https://github.com/GreptimeTeam/greptimedb/issues/2373")
|
||||
}
|
||||
}
|
||||
@@ -20,14 +20,16 @@ use cache::{
|
||||
build_fundamental_cache_registry, with_default_composite_cache_registry, TABLE_CACHE_NAME,
|
||||
TABLE_ROUTE_CACHE_NAME,
|
||||
};
|
||||
use catalog::information_extension::DistributedInformationExtension;
|
||||
use catalog::kvbackend::{
|
||||
CachedMetaKvBackend, CachedMetaKvBackendBuilder, KvBackendCatalogManager, MetaKvBackend,
|
||||
CachedKvBackend, CachedKvBackendBuilder, KvBackendCatalogManager, MetaKvBackend,
|
||||
};
|
||||
use client::{Client, Database, OutputData, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_base::Plugins;
|
||||
use common_config::Mode;
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_meta::cache::{CacheRegistryBuilder, LayeredCacheRegistryBuilder};
|
||||
use common_meta::kv_backend::KvBackendRef;
|
||||
use common_query::Output;
|
||||
use common_recordbatch::RecordBatches;
|
||||
use common_telemetry::debug;
|
||||
@@ -43,15 +45,14 @@ use session::context::QueryContext;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan};
|
||||
|
||||
use crate::cli::cmd::ReplCommand;
|
||||
use crate::cli::helper::RustylineHelper;
|
||||
use crate::cli::AttachCommand;
|
||||
use crate::cmd::ReplCommand;
|
||||
use crate::error::{
|
||||
CollectRecordBatchesSnafu, ParseSqlSnafu, PlanStatementSnafu, PrettyPrintRecordBatchesSnafu,
|
||||
ReadlineSnafu, ReplCreationSnafu, RequestDatabaseSnafu, Result, StartMetaClientSnafu,
|
||||
SubstraitEncodeLogicalPlanSnafu,
|
||||
};
|
||||
use crate::{error, DistributedInformationExtension};
|
||||
use crate::helper::RustylineHelper;
|
||||
use crate::{error, AttachCommand};
|
||||
|
||||
/// Captures the state of the repl, gathers commands and executes them one by one
|
||||
pub struct Repl {
|
||||
@@ -174,7 +175,7 @@ impl Repl {
|
||||
|
||||
let plan = query_engine
|
||||
.planner()
|
||||
.plan(stmt, query_ctx.clone())
|
||||
.plan(&stmt, query_ctx.clone())
|
||||
.await
|
||||
.context(PlanStatementSnafu)?;
|
||||
|
||||
@@ -258,8 +259,9 @@ async fn create_query_engine(meta_addr: &str) -> Result<DatafusionQueryEngine> {
|
||||
.context(StartMetaClientSnafu)?;
|
||||
let meta_client = Arc::new(meta_client);
|
||||
|
||||
let cached_meta_backend =
|
||||
Arc::new(CachedMetaKvBackendBuilder::new(meta_client.clone()).build());
|
||||
let cached_meta_backend = Arc::new(
|
||||
CachedKvBackendBuilder::new(Arc::new(MetaKvBackend::new(meta_client.clone()))).build(),
|
||||
);
|
||||
let layered_cache_builder = LayeredCacheRegistryBuilder::default().add_cache_registry(
|
||||
CacheRegistryBuilder::default()
|
||||
.add_cache(cached_meta_backend.clone())
|
||||
@@ -28,7 +28,7 @@ enum_dispatch = "0.3"
|
||||
futures-util.workspace = true
|
||||
lazy_static.workspace = true
|
||||
moka = { workspace = true, features = ["future"] }
|
||||
parking_lot = "0.12"
|
||||
parking_lot.workspace = true
|
||||
prometheus.workspace = true
|
||||
prost.workspace = true
|
||||
query.workspace = true
|
||||
@@ -45,7 +45,6 @@ common-grpc-expr.workspace = true
|
||||
datanode.workspace = true
|
||||
derive-new = "0.5"
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||
|
||||
[dev-dependencies.substrait_proto]
|
||||
package = "substrait"
|
||||
|
||||
@@ -18,7 +18,7 @@ use api::v1::greptime_database_client::GreptimeDatabaseClient;
|
||||
use api::v1::greptime_request::Request;
|
||||
use api::v1::query_request::Query;
|
||||
use api::v1::{
|
||||
AlterExpr, AuthHeader, CreateTableExpr, DdlRequest, GreptimeRequest, InsertRequests,
|
||||
AlterTableExpr, AuthHeader, CreateTableExpr, DdlRequest, GreptimeRequest, InsertRequests,
|
||||
QueryRequest, RequestHeader,
|
||||
};
|
||||
use arrow_flight::Ticket;
|
||||
@@ -211,9 +211,9 @@ impl Database {
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn alter(&self, expr: AlterExpr) -> Result<Output> {
|
||||
pub async fn alter(&self, expr: AlterTableExpr) -> Result<Output> {
|
||||
self.do_get(Request::Ddl(DdlRequest {
|
||||
expr: Some(DdlExpr::Alter(expr)),
|
||||
expr: Some(DdlExpr::AlterTable(expr)),
|
||||
}))
|
||||
.await
|
||||
}
|
||||
|
||||
@@ -10,7 +10,7 @@ name = "greptime"
|
||||
path = "src/bin/greptime.rs"
|
||||
|
||||
[features]
|
||||
default = ["python"]
|
||||
default = ["python", "servers/pprof", "servers/mem-prof"]
|
||||
tokio-console = ["common-telemetry/tokio-console"]
|
||||
python = ["frontend/python"]
|
||||
|
||||
@@ -25,6 +25,7 @@ cache.workspace = true
|
||||
catalog.workspace = true
|
||||
chrono.workspace = true
|
||||
clap.workspace = true
|
||||
cli.workspace = true
|
||||
client.workspace = true
|
||||
common-base.workspace = true
|
||||
common-catalog.workspace = true
|
||||
@@ -33,6 +34,7 @@ common-error.workspace = true
|
||||
common-grpc.workspace = true
|
||||
common-macro.workspace = true
|
||||
common-meta.workspace = true
|
||||
common-options.workspace = true
|
||||
common-procedure.workspace = true
|
||||
common-query.workspace = true
|
||||
common-recordbatch.workspace = true
|
||||
@@ -52,6 +54,7 @@ flow.workspace = true
|
||||
frontend = { workspace = true, default-features = false }
|
||||
futures.workspace = true
|
||||
human-panic = "2.0"
|
||||
humantime.workspace = true
|
||||
lazy_static.workspace = true
|
||||
meta-client.workspace = true
|
||||
meta-srv.workspace = true
|
||||
@@ -78,7 +81,7 @@ table.workspace = true
|
||||
tokio.workspace = true
|
||||
toml.workspace = true
|
||||
tonic.workspace = true
|
||||
tracing-appender = "0.2"
|
||||
tracing-appender.workspace = true
|
||||
|
||||
[target.'cfg(not(windows))'.dependencies]
|
||||
tikv-jemallocator = "0.6"
|
||||
|
||||
@@ -12,39 +12,17 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod bench;
|
||||
|
||||
// Wait for https://github.com/GreptimeTeam/greptimedb/issues/2373
|
||||
#[allow(unused)]
|
||||
mod cmd;
|
||||
mod export;
|
||||
mod helper;
|
||||
|
||||
// Wait for https://github.com/GreptimeTeam/greptimedb/issues/2373
|
||||
mod database;
|
||||
mod import;
|
||||
#[allow(unused)]
|
||||
mod repl;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use bench::BenchTableMetadataCommand;
|
||||
use clap::Parser;
|
||||
use cli::Tool;
|
||||
use common_telemetry::logging::{LoggingOptions, TracingOptions};
|
||||
pub use repl::Repl;
|
||||
use plugins::SubCommand;
|
||||
use snafu::ResultExt;
|
||||
use tracing_appender::non_blocking::WorkerGuard;
|
||||
|
||||
use self::export::ExportCommand;
|
||||
use crate::cli::import::ImportCommand;
|
||||
use crate::error::Result;
|
||||
use crate::options::GlobalOptions;
|
||||
use crate::App;
|
||||
|
||||
use crate::{error, App, Result};
|
||||
pub const APP_NAME: &str = "greptime-cli";
|
||||
|
||||
#[async_trait]
|
||||
pub trait Tool: Send + Sync {
|
||||
async fn do_work(&self) -> Result<()>;
|
||||
}
|
||||
use async_trait::async_trait;
|
||||
|
||||
pub struct Instance {
|
||||
tool: Box<dyn Tool>,
|
||||
@@ -54,12 +32,16 @@ pub struct Instance {
|
||||
}
|
||||
|
||||
impl Instance {
|
||||
fn new(tool: Box<dyn Tool>, guard: Vec<WorkerGuard>) -> Self {
|
||||
pub fn new(tool: Box<dyn Tool>, guard: Vec<WorkerGuard>) -> Self {
|
||||
Self {
|
||||
tool,
|
||||
_guard: guard,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn start(&mut self) -> Result<()> {
|
||||
self.tool.do_work().await.context(error::StartCliSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -69,7 +51,8 @@ impl App for Instance {
|
||||
}
|
||||
|
||||
async fn start(&mut self) -> Result<()> {
|
||||
self.tool.do_work().await
|
||||
self.start().await.unwrap();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn wait_signal(&self) -> bool {
|
||||
@@ -96,7 +79,12 @@ impl Command {
|
||||
None,
|
||||
);
|
||||
|
||||
self.cmd.build(guard).await
|
||||
let tool = self.cmd.build().await.context(error::BuildCliSnafu)?;
|
||||
let instance = Instance {
|
||||
tool,
|
||||
_guard: guard,
|
||||
};
|
||||
Ok(instance)
|
||||
}
|
||||
|
||||
pub fn load_options(&self, global_options: &GlobalOptions) -> Result<LoggingOptions> {
|
||||
@@ -112,38 +100,81 @@ impl Command {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
enum SubCommand {
|
||||
// Attach(AttachCommand),
|
||||
Bench(BenchTableMetadataCommand),
|
||||
Export(ExportCommand),
|
||||
Import(ImportCommand),
|
||||
}
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use clap::Parser;
|
||||
use client::{Client, Database};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_telemetry::logging::LoggingOptions;
|
||||
|
||||
impl SubCommand {
|
||||
async fn build(&self, guard: Vec<WorkerGuard>) -> Result<Instance> {
|
||||
match self {
|
||||
// SubCommand::Attach(cmd) => cmd.build().await,
|
||||
SubCommand::Bench(cmd) => cmd.build(guard).await,
|
||||
SubCommand::Export(cmd) => cmd.build(guard).await,
|
||||
SubCommand::Import(cmd) => cmd.build(guard).await,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
pub(crate) struct AttachCommand {
|
||||
#[clap(long)]
|
||||
pub(crate) grpc_addr: String,
|
||||
#[clap(long)]
|
||||
pub(crate) meta_addr: Option<String>,
|
||||
#[clap(long, action)]
|
||||
pub(crate) disable_helper: bool,
|
||||
}
|
||||
|
||||
impl AttachCommand {
|
||||
#[allow(dead_code)]
|
||||
async fn build(self) -> Result<Instance> {
|
||||
unimplemented!("Wait for https://github.com/GreptimeTeam/greptimedb/issues/2373")
|
||||
use crate::error::Result as CmdResult;
|
||||
use crate::options::GlobalOptions;
|
||||
use crate::{cli, standalone, App};
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_export_create_table_with_quoted_names() -> CmdResult<()> {
|
||||
let output_dir = tempfile::tempdir().unwrap();
|
||||
|
||||
let standalone = standalone::Command::parse_from([
|
||||
"standalone",
|
||||
"start",
|
||||
"--data-home",
|
||||
&*output_dir.path().to_string_lossy(),
|
||||
]);
|
||||
|
||||
let standalone_opts = standalone.load_options(&GlobalOptions::default()).unwrap();
|
||||
let mut instance = standalone.build(standalone_opts).await?;
|
||||
instance.start().await?;
|
||||
|
||||
let client = Client::with_urls(["127.0.0.1:4001"]);
|
||||
let database = Database::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, client);
|
||||
database
|
||||
.sql(r#"CREATE DATABASE "cli.export.create_table";"#)
|
||||
.await
|
||||
.unwrap();
|
||||
database
|
||||
.sql(
|
||||
r#"CREATE TABLE "cli.export.create_table"."a.b.c"(
|
||||
ts TIMESTAMP,
|
||||
TIME INDEX (ts)
|
||||
) engine=mito;
|
||||
"#,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let output_dir = tempfile::tempdir().unwrap();
|
||||
let cli = cli::Command::parse_from([
|
||||
"cli",
|
||||
"export",
|
||||
"--addr",
|
||||
"127.0.0.1:4000",
|
||||
"--output-dir",
|
||||
&*output_dir.path().to_string_lossy(),
|
||||
"--target",
|
||||
"schema",
|
||||
]);
|
||||
let mut cli_app = cli.build(LoggingOptions::default()).await?;
|
||||
cli_app.start().await?;
|
||||
|
||||
instance.stop().await?;
|
||||
|
||||
let output_file = output_dir
|
||||
.path()
|
||||
.join("greptime")
|
||||
.join("cli.export.create_table")
|
||||
.join("create_tables.sql");
|
||||
let res = std::fs::read_to_string(output_file).unwrap();
|
||||
let expect = r#"CREATE TABLE IF NOT EXISTS "a.b.c" (
|
||||
"ts" TIMESTAMP(3) NOT NULL,
|
||||
TIME INDEX ("ts")
|
||||
)
|
||||
|
||||
ENGINE=mito
|
||||
;
|
||||
"#;
|
||||
assert_eq!(res.trim(), expect.trim());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,10 +16,12 @@ use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use cache::build_datanode_cache_registry;
|
||||
use catalog::kvbackend::MetaKvBackend;
|
||||
use clap::Parser;
|
||||
use common_base::Plugins;
|
||||
use common_config::Configurable;
|
||||
use common_meta::cache::LayeredCacheRegistryBuilder;
|
||||
use common_telemetry::logging::TracingOptions;
|
||||
use common_telemetry::{info, warn};
|
||||
use common_version::{short_version, version};
|
||||
@@ -272,9 +274,10 @@ impl StartCommand {
|
||||
info!("Datanode start command: {:#?}", self);
|
||||
info!("Datanode options: {:#?}", opts);
|
||||
|
||||
let plugin_opts = opts.plugins;
|
||||
let opts = opts.component;
|
||||
let mut plugins = Plugins::new();
|
||||
plugins::setup_datanode_plugins(&mut plugins, &opts)
|
||||
plugins::setup_datanode_plugins(&mut plugins, &plugin_opts, &opts)
|
||||
.await
|
||||
.context(StartDatanodeSnafu)?;
|
||||
|
||||
@@ -299,9 +302,17 @@ impl StartCommand {
|
||||
client: meta_client.clone(),
|
||||
});
|
||||
|
||||
// Builds cache registry for datanode.
|
||||
let layered_cache_registry = Arc::new(
|
||||
LayeredCacheRegistryBuilder::default()
|
||||
.add_cache_registry(build_datanode_cache_registry(meta_backend.clone()))
|
||||
.build(),
|
||||
);
|
||||
|
||||
let mut datanode = DatanodeBuilder::new(opts.clone(), plugins)
|
||||
.with_meta_client(meta_client)
|
||||
.with_kv_backend(meta_backend)
|
||||
.with_cache_registry(layered_cache_registry)
|
||||
.build()
|
||||
.await
|
||||
.context(StartDatanodeSnafu)?;
|
||||
|
||||
@@ -114,6 +114,20 @@ pub enum Error {
|
||||
source: frontend::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to build cli"))]
|
||||
BuildCli {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to start cli"))]
|
||||
StartCli {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to build meta server"))]
|
||||
BuildMetaServer {
|
||||
#[snafu(implicit)]
|
||||
@@ -346,6 +360,8 @@ impl ErrorExt for Error {
|
||||
Error::ShutdownMetaServer { source, .. } => source.status_code(),
|
||||
Error::BuildMetaServer { source, .. } => source.status_code(),
|
||||
Error::UnsupportedSelectorType { source, .. } => source.status_code(),
|
||||
Error::BuildCli { source, .. } => source.status_code(),
|
||||
Error::StartCli { source, .. } => source.status_code(),
|
||||
|
||||
Error::InitMetadata { source, .. } | Error::InitDdlManager { source, .. } => {
|
||||
source.status_code()
|
||||
|
||||
@@ -15,13 +15,15 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use cache::{build_fundamental_cache_registry, with_default_composite_cache_registry};
|
||||
use catalog::kvbackend::{CachedMetaKvBackendBuilder, KvBackendCatalogManager, MetaKvBackend};
|
||||
use catalog::information_extension::DistributedInformationExtension;
|
||||
use catalog::kvbackend::{CachedKvBackendBuilder, KvBackendCatalogManager, MetaKvBackend};
|
||||
use clap::Parser;
|
||||
use client::client_manager::NodeClients;
|
||||
use common_base::Plugins;
|
||||
use common_config::Configurable;
|
||||
use common_grpc::channel_manager::ChannelConfig;
|
||||
use common_meta::cache::{CacheRegistryBuilder, LayeredCacheRegistryBuilder};
|
||||
use common_meta::heartbeat::handler::invalidate_table_cache::InvalidateCacheHandler;
|
||||
use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
|
||||
use common_meta::heartbeat::handler::HandlerGroupExecutor;
|
||||
use common_meta::key::flow::FlowMetadataManager;
|
||||
@@ -30,7 +32,6 @@ use common_telemetry::info;
|
||||
use common_telemetry::logging::TracingOptions;
|
||||
use common_version::{short_version, version};
|
||||
use flow::{FlownodeBuilder, FlownodeInstance, FrontendInvoker};
|
||||
use frontend::heartbeat::handler::invalidate_table_cache::InvalidateTableCacheHandler;
|
||||
use meta_client::{MetaClientOptions, MetaClientType};
|
||||
use servers::Mode;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
@@ -41,7 +42,7 @@ use crate::error::{
|
||||
MissingConfigSnafu, Result, ShutdownFlownodeSnafu, StartFlownodeSnafu,
|
||||
};
|
||||
use crate::options::{GlobalOptions, GreptimeOptions};
|
||||
use crate::{log_versions, App, DistributedInformationExtension};
|
||||
use crate::{log_versions, App};
|
||||
|
||||
pub const APP_NAME: &str = "greptime-flownode";
|
||||
|
||||
@@ -246,11 +247,12 @@ impl StartCommand {
|
||||
let cache_tti = meta_config.metadata_cache_tti;
|
||||
|
||||
// TODO(discord9): add helper function to ease the creation of cache registry&such
|
||||
let cached_meta_backend = CachedMetaKvBackendBuilder::new(meta_client.clone())
|
||||
.cache_max_capacity(cache_max_capacity)
|
||||
.cache_ttl(cache_ttl)
|
||||
.cache_tti(cache_tti)
|
||||
.build();
|
||||
let cached_meta_backend =
|
||||
CachedKvBackendBuilder::new(Arc::new(MetaKvBackend::new(meta_client.clone())))
|
||||
.cache_max_capacity(cache_max_capacity)
|
||||
.cache_ttl(cache_ttl)
|
||||
.cache_tti(cache_tti)
|
||||
.build();
|
||||
let cached_meta_backend = Arc::new(cached_meta_backend);
|
||||
|
||||
// Builds cache registry
|
||||
@@ -287,9 +289,7 @@ impl StartCommand {
|
||||
|
||||
let executor = HandlerGroupExecutor::new(vec![
|
||||
Arc::new(ParseMailboxMessageHandler),
|
||||
Arc::new(InvalidateTableCacheHandler::new(
|
||||
layered_cache_registry.clone(),
|
||||
)),
|
||||
Arc::new(InvalidateCacheHandler::new(layered_cache_registry.clone())),
|
||||
]);
|
||||
|
||||
let heartbeat_task = flow::heartbeat::HeartbeatTask::new(
|
||||
|
||||
@@ -17,20 +17,21 @@ use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use cache::{build_fundamental_cache_registry, with_default_composite_cache_registry};
|
||||
use catalog::kvbackend::{CachedMetaKvBackendBuilder, KvBackendCatalogManager, MetaKvBackend};
|
||||
use catalog::information_extension::DistributedInformationExtension;
|
||||
use catalog::kvbackend::{CachedKvBackendBuilder, KvBackendCatalogManager, MetaKvBackend};
|
||||
use clap::Parser;
|
||||
use client::client_manager::NodeClients;
|
||||
use common_base::Plugins;
|
||||
use common_config::Configurable;
|
||||
use common_grpc::channel_manager::ChannelConfig;
|
||||
use common_meta::cache::{CacheRegistryBuilder, LayeredCacheRegistryBuilder};
|
||||
use common_meta::heartbeat::handler::invalidate_table_cache::InvalidateCacheHandler;
|
||||
use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
|
||||
use common_meta::heartbeat::handler::HandlerGroupExecutor;
|
||||
use common_telemetry::info;
|
||||
use common_telemetry::logging::TracingOptions;
|
||||
use common_time::timezone::set_default_timezone;
|
||||
use common_version::{short_version, version};
|
||||
use frontend::heartbeat::handler::invalidate_table_cache::InvalidateTableCacheHandler;
|
||||
use frontend::heartbeat::HeartbeatTask;
|
||||
use frontend::instance::builder::FrontendBuilder;
|
||||
use frontend::instance::{FrontendInstance, Instance as FeInstance};
|
||||
@@ -46,7 +47,7 @@ use crate::error::{
|
||||
Result, StartFrontendSnafu,
|
||||
};
|
||||
use crate::options::{GlobalOptions, GreptimeOptions};
|
||||
use crate::{log_versions, App, DistributedInformationExtension};
|
||||
use crate::{log_versions, App};
|
||||
|
||||
type FrontendOptions = GreptimeOptions<frontend::frontend::FrontendOptions>;
|
||||
|
||||
@@ -266,9 +267,10 @@ impl StartCommand {
|
||||
info!("Frontend start command: {:#?}", self);
|
||||
info!("Frontend options: {:#?}", opts);
|
||||
|
||||
let plugin_opts = opts.plugins;
|
||||
let opts = opts.component;
|
||||
let mut plugins = Plugins::new();
|
||||
plugins::setup_frontend_plugins(&mut plugins, &opts)
|
||||
plugins::setup_frontend_plugins(&mut plugins, &plugin_opts, &opts)
|
||||
.await
|
||||
.context(StartFrontendSnafu)?;
|
||||
|
||||
@@ -292,11 +294,12 @@ impl StartCommand {
|
||||
.context(MetaClientInitSnafu)?;
|
||||
|
||||
// TODO(discord9): add helper function to ease the creation of cache registry&such
|
||||
let cached_meta_backend = CachedMetaKvBackendBuilder::new(meta_client.clone())
|
||||
.cache_max_capacity(cache_max_capacity)
|
||||
.cache_ttl(cache_ttl)
|
||||
.cache_tti(cache_tti)
|
||||
.build();
|
||||
let cached_meta_backend =
|
||||
CachedKvBackendBuilder::new(Arc::new(MetaKvBackend::new(meta_client.clone())))
|
||||
.cache_max_capacity(cache_max_capacity)
|
||||
.cache_ttl(cache_ttl)
|
||||
.cache_tti(cache_tti)
|
||||
.build();
|
||||
let cached_meta_backend = Arc::new(cached_meta_backend);
|
||||
|
||||
// Builds cache registry
|
||||
@@ -326,9 +329,7 @@ impl StartCommand {
|
||||
|
||||
let executor = HandlerGroupExecutor::new(vec![
|
||||
Arc::new(ParseMailboxMessageHandler),
|
||||
Arc::new(InvalidateTableCacheHandler::new(
|
||||
layered_cache_registry.clone(),
|
||||
)),
|
||||
Arc::new(InvalidateCacheHandler::new(layered_cache_registry.clone())),
|
||||
]);
|
||||
|
||||
let heartbeat_task = HeartbeatTask::new(
|
||||
@@ -342,6 +343,8 @@ impl StartCommand {
|
||||
// Some queries are expected to take long time.
|
||||
let channel_config = ChannelConfig {
|
||||
timeout: None,
|
||||
tcp_nodelay: opts.datanode.client.tcp_nodelay,
|
||||
connect_timeout: Some(opts.datanode.client.connect_timeout),
|
||||
..Default::default()
|
||||
};
|
||||
let client = NodeClients::new(channel_config);
|
||||
@@ -472,7 +475,7 @@ mod tests {
|
||||
};
|
||||
|
||||
let mut plugins = Plugins::new();
|
||||
plugins::setup_frontend_plugins(&mut plugins, &fe_opts)
|
||||
plugins::setup_frontend_plugins(&mut plugins, &[], &fe_opts)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -15,17 +15,7 @@
|
||||
#![feature(assert_matches, let_chains)]
|
||||
|
||||
use async_trait::async_trait;
|
||||
use catalog::information_schema::InformationExtension;
|
||||
use client::api::v1::meta::ProcedureStatus;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::cluster::{ClusterInfo, NodeInfo};
|
||||
use common_meta::datanode::RegionStat;
|
||||
use common_meta::ddl::{ExecutorContext, ProcedureExecutor};
|
||||
use common_meta::rpc::procedure;
|
||||
use common_procedure::{ProcedureInfo, ProcedureState};
|
||||
use common_telemetry::{error, info};
|
||||
use meta_client::MetaClientRef;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::Result;
|
||||
|
||||
@@ -43,6 +33,31 @@ lazy_static::lazy_static! {
|
||||
prometheus::register_int_gauge_vec!("greptime_app_version", "app version", &["version", "short_version", "app"]).unwrap();
|
||||
}
|
||||
|
||||
/// wait for the close signal, for unix platform it's SIGINT or SIGTERM
|
||||
#[cfg(unix)]
|
||||
async fn start_wait_for_close_signal() -> std::io::Result<()> {
|
||||
use tokio::signal::unix::{signal, SignalKind};
|
||||
let mut sigint = signal(SignalKind::interrupt())?;
|
||||
let mut sigterm = signal(SignalKind::terminate())?;
|
||||
|
||||
tokio::select! {
|
||||
_ = sigint.recv() => {
|
||||
info!("Received SIGINT, shutting down");
|
||||
}
|
||||
_ = sigterm.recv() => {
|
||||
info!("Received SIGTERM, shutting down");
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// wait for the close signal, for non-unix platform it's ctrl-c
|
||||
#[cfg(not(unix))]
|
||||
async fn start_wait_for_close_signal() -> std::io::Result<()> {
|
||||
tokio::signal::ctrl_c().await
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait App: Send {
|
||||
fn name(&self) -> &str;
|
||||
@@ -69,9 +84,9 @@ pub trait App: Send {
|
||||
self.start().await?;
|
||||
|
||||
if self.wait_signal() {
|
||||
if let Err(e) = tokio::signal::ctrl_c().await {
|
||||
error!(e; "Failed to listen for ctrl-c signal");
|
||||
// It's unusual to fail to listen for ctrl-c signal, maybe there's something unexpected in
|
||||
if let Err(e) = start_wait_for_close_signal().await {
|
||||
error!(e; "Failed to listen for close signal");
|
||||
// It's unusual to fail to listen for close signal, maybe there's something unexpected in
|
||||
// the underlying system. So we stop the app instead of running nonetheless to let people
|
||||
// investigate the issue.
|
||||
}
|
||||
@@ -84,6 +99,7 @@ pub trait App: Send {
|
||||
}
|
||||
|
||||
/// Log the versions of the application, and the arguments passed to the cli.
|
||||
///
|
||||
/// `version` should be the same as the output of cli "--version";
|
||||
/// and the `short_version` is the short version of the codes, often consist of git branch and commit.
|
||||
pub fn log_versions(version: &str, short_version: &str, app: &str) {
|
||||
@@ -104,69 +120,3 @@ fn log_env_flags() {
|
||||
info!("argument: {}", argument);
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DistributedInformationExtension {
|
||||
meta_client: MetaClientRef,
|
||||
}
|
||||
|
||||
impl DistributedInformationExtension {
|
||||
pub fn new(meta_client: MetaClientRef) -> Self {
|
||||
Self { meta_client }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl InformationExtension for DistributedInformationExtension {
|
||||
type Error = catalog::error::Error;
|
||||
|
||||
async fn nodes(&self) -> std::result::Result<Vec<NodeInfo>, Self::Error> {
|
||||
self.meta_client
|
||||
.list_nodes(None)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(catalog::error::ListNodesSnafu)
|
||||
}
|
||||
|
||||
async fn procedures(&self) -> std::result::Result<Vec<(String, ProcedureInfo)>, Self::Error> {
|
||||
let procedures = self
|
||||
.meta_client
|
||||
.list_procedures(&ExecutorContext::default())
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(catalog::error::ListProceduresSnafu)?
|
||||
.procedures;
|
||||
let mut result = Vec::with_capacity(procedures.len());
|
||||
for procedure in procedures {
|
||||
let pid = match procedure.id {
|
||||
Some(pid) => pid,
|
||||
None => return catalog::error::ProcedureIdNotFoundSnafu {}.fail(),
|
||||
};
|
||||
let pid = procedure::pb_pid_to_pid(&pid)
|
||||
.map_err(BoxedError::new)
|
||||
.context(catalog::error::ConvertProtoDataSnafu)?;
|
||||
let status = ProcedureStatus::try_from(procedure.status)
|
||||
.map(|v| v.as_str_name())
|
||||
.unwrap_or("Unknown")
|
||||
.to_string();
|
||||
let procedure_info = ProcedureInfo {
|
||||
id: pid,
|
||||
type_name: procedure.type_name,
|
||||
start_time_ms: procedure.start_time_ms,
|
||||
end_time_ms: procedure.end_time_ms,
|
||||
state: ProcedureState::Running,
|
||||
lock_keys: procedure.lock_keys,
|
||||
};
|
||||
result.push((status, procedure_info));
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn region_stats(&self) -> std::result::Result<Vec<RegionStat>, Self::Error> {
|
||||
self.meta_client
|
||||
.list_region_stats()
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(catalog::error::ListRegionStatsSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -48,6 +48,10 @@ impl Instance {
|
||||
_guard: guard,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_inner(&self) -> &MetasrvInstance {
|
||||
&self.instance
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
@@ -86,6 +90,14 @@ impl Command {
|
||||
pub fn load_options(&self, global_options: &GlobalOptions) -> Result<MetasrvOptions> {
|
||||
self.subcmd.load_options(global_options)
|
||||
}
|
||||
|
||||
pub fn config_file(&self) -> &Option<String> {
|
||||
self.subcmd.config_file()
|
||||
}
|
||||
|
||||
pub fn env_prefix(&self) -> &String {
|
||||
self.subcmd.env_prefix()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
@@ -105,6 +117,18 @@ impl SubCommand {
|
||||
SubCommand::Start(cmd) => cmd.load_options(global_options),
|
||||
}
|
||||
}
|
||||
|
||||
fn config_file(&self) -> &Option<String> {
|
||||
match self {
|
||||
SubCommand::Start(cmd) => &cmd.config_file,
|
||||
}
|
||||
}
|
||||
|
||||
fn env_prefix(&self) -> &String {
|
||||
match self {
|
||||
SubCommand::Start(cmd) => &cmd.env_prefix,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Parser)]
|
||||
@@ -249,9 +273,10 @@ impl StartCommand {
|
||||
info!("Metasrv start command: {:#?}", self);
|
||||
info!("Metasrv options: {:#?}", opts);
|
||||
|
||||
let plugin_opts = opts.plugins;
|
||||
let opts = opts.component;
|
||||
let mut plugins = Plugins::new();
|
||||
plugins::setup_metasrv_plugins(&mut plugins, &opts)
|
||||
plugins::setup_metasrv_plugins(&mut plugins, &plugin_opts, &opts)
|
||||
.await
|
||||
.context(StartMetaServerSnafu)?;
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
use clap::Parser;
|
||||
use common_config::Configurable;
|
||||
use common_runtime::global::RuntimeOptions;
|
||||
use plugins::PluginOptions;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Parser, Default, Debug, Clone)]
|
||||
@@ -40,6 +41,8 @@ pub struct GlobalOptions {
|
||||
pub struct GreptimeOptions<T> {
|
||||
/// The runtime options.
|
||||
pub runtime: RuntimeOptions,
|
||||
/// The plugin options.
|
||||
pub plugins: Vec<PluginOptions>,
|
||||
|
||||
/// The options of each component (like Datanode or Standalone) of GreptimeDB.
|
||||
#[serde(flatten)]
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use std::{fs, path};
|
||||
|
||||
@@ -250,6 +251,13 @@ pub struct Instance {
|
||||
_guard: Vec<WorkerGuard>,
|
||||
}
|
||||
|
||||
impl Instance {
|
||||
/// Find the socket addr of a server by its `name`.
|
||||
pub async fn server_addr(&self, name: &str) -> Option<SocketAddr> {
|
||||
self.frontend.server_handlers().addr(name).await
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl App for Instance {
|
||||
fn name(&self) -> &str {
|
||||
@@ -340,7 +348,8 @@ pub struct StartCommand {
|
||||
}
|
||||
|
||||
impl StartCommand {
|
||||
fn load_options(
|
||||
/// Load the GreptimeDB options from various sources (command line, config file or env).
|
||||
pub fn load_options(
|
||||
&self,
|
||||
global_options: &GlobalOptions,
|
||||
) -> Result<GreptimeOptions<StandaloneOptions>> {
|
||||
@@ -430,7 +439,8 @@ impl StartCommand {
|
||||
#[allow(unreachable_code)]
|
||||
#[allow(unused_variables)]
|
||||
#[allow(clippy::diverging_sub_expression)]
|
||||
async fn build(&self, opts: GreptimeOptions<StandaloneOptions>) -> Result<Instance> {
|
||||
/// Build GreptimeDB instance with the loaded options.
|
||||
pub async fn build(&self, opts: GreptimeOptions<StandaloneOptions>) -> Result<Instance> {
|
||||
common_runtime::init_global_runtimes(&opts.runtime);
|
||||
|
||||
let guard = common_telemetry::init_global_logging(
|
||||
@@ -445,15 +455,16 @@ impl StartCommand {
|
||||
info!("Standalone options: {opts:#?}");
|
||||
|
||||
let mut plugins = Plugins::new();
|
||||
let plugin_opts = opts.plugins;
|
||||
let opts = opts.component;
|
||||
let fe_opts = opts.frontend_options();
|
||||
let dn_opts = opts.datanode_options();
|
||||
|
||||
plugins::setup_frontend_plugins(&mut plugins, &fe_opts)
|
||||
plugins::setup_frontend_plugins(&mut plugins, &plugin_opts, &fe_opts)
|
||||
.await
|
||||
.context(StartFrontendSnafu)?;
|
||||
|
||||
plugins::setup_datanode_plugins(&mut plugins, &dn_opts)
|
||||
plugins::setup_datanode_plugins(&mut plugins, &plugin_opts, &dn_opts)
|
||||
.await
|
||||
.context(StartDatanodeSnafu)?;
|
||||
|
||||
@@ -486,6 +497,7 @@ impl StartCommand {
|
||||
|
||||
let datanode = DatanodeBuilder::new(dn_opts, plugins.clone())
|
||||
.with_kv_backend(kv_backend.clone())
|
||||
.with_cache_registry(layered_cache_registry.clone())
|
||||
.build()
|
||||
.await
|
||||
.context(StartDatanodeSnafu)?;
|
||||
@@ -653,7 +665,7 @@ impl StartCommand {
|
||||
}
|
||||
}
|
||||
|
||||
struct StandaloneInformationExtension {
|
||||
pub struct StandaloneInformationExtension {
|
||||
region_server: RegionServer,
|
||||
procedure_manager: ProcedureManagerRef,
|
||||
start_time_ms: u64,
|
||||
@@ -725,12 +737,14 @@ impl InformationExtension for StandaloneInformationExtension {
|
||||
id: stat.region_id,
|
||||
rcus: 0,
|
||||
wcus: 0,
|
||||
approximate_bytes: region_stat.estimated_disk_size() as i64,
|
||||
approximate_bytes: region_stat.estimated_disk_size(),
|
||||
engine: stat.engine,
|
||||
role: RegionRole::from(stat.role).into(),
|
||||
num_rows: region_stat.num_rows,
|
||||
memtable_size: region_stat.memtable_size,
|
||||
manifest_size: region_stat.manifest_size,
|
||||
sst_size: region_stat.sst_size,
|
||||
index_size: region_stat.index_size,
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
@@ -762,7 +776,7 @@ mod tests {
|
||||
};
|
||||
|
||||
let mut plugins = Plugins::new();
|
||||
plugins::setup_frontend_plugins(&mut plugins, &fe_opts)
|
||||
plugins::setup_frontend_plugins(&mut plugins, &[], &fe_opts)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -20,13 +20,13 @@ use common_config::Configurable;
|
||||
use common_grpc::channel_manager::{
|
||||
DEFAULT_MAX_GRPC_RECV_MESSAGE_SIZE, DEFAULT_MAX_GRPC_SEND_MESSAGE_SIZE,
|
||||
};
|
||||
use common_telemetry::logging::{LoggingOptions, DEFAULT_OTLP_ENDPOINT};
|
||||
use common_options::datanode::{ClientOptions, DatanodeClientOptions};
|
||||
use common_telemetry::logging::{LoggingOptions, SlowQueryOptions, DEFAULT_OTLP_ENDPOINT};
|
||||
use common_wal::config::raft_engine::RaftEngineConfig;
|
||||
use common_wal::config::DatanodeWalConfig;
|
||||
use datanode::config::{DatanodeOptions, RegionEngineConfig, StorageConfig};
|
||||
use file_engine::config::EngineConfig;
|
||||
use frontend::frontend::FrontendOptions;
|
||||
use frontend::service_config::datanode::DatanodeClientOptions;
|
||||
use meta_client::MetaClientOptions;
|
||||
use meta_srv::metasrv::MetasrvOptions;
|
||||
use meta_srv::selector::SelectorType;
|
||||
@@ -69,7 +69,6 @@ fn test_load_datanode_example_config() {
|
||||
region_engine: vec![
|
||||
RegionEngineConfig::Mito(MitoConfig {
|
||||
auto_flush_interval: Duration::from_secs(3600),
|
||||
scan_parallelism: 0,
|
||||
experimental_write_cache_ttl: Some(Duration::from_secs(60 * 60 * 8)),
|
||||
..Default::default()
|
||||
}),
|
||||
@@ -126,10 +125,11 @@ fn test_load_frontend_example_config() {
|
||||
tracing_sample_ratio: Some(Default::default()),
|
||||
..Default::default()
|
||||
},
|
||||
datanode: frontend::service_config::DatanodeOptions {
|
||||
client: DatanodeClientOptions {
|
||||
datanode: DatanodeClientOptions {
|
||||
client: ClientOptions {
|
||||
connect_timeout: Duration::from_secs(10),
|
||||
tcp_nodelay: true,
|
||||
..Default::default()
|
||||
},
|
||||
},
|
||||
export_metrics: ExportMetricsOption {
|
||||
@@ -159,8 +159,20 @@ fn test_load_metasrv_example_config() {
|
||||
level: Some("info".to_string()),
|
||||
otlp_endpoint: Some(DEFAULT_OTLP_ENDPOINT.to_string()),
|
||||
tracing_sample_ratio: Some(Default::default()),
|
||||
slow_query: SlowQueryOptions {
|
||||
enable: false,
|
||||
threshold: Some(Duration::from_secs(10)),
|
||||
sample_ratio: Some(1.0),
|
||||
},
|
||||
..Default::default()
|
||||
},
|
||||
datanode: DatanodeClientOptions {
|
||||
client: ClientOptions {
|
||||
timeout: Duration::from_secs(10),
|
||||
connect_timeout: Duration::from_secs(10),
|
||||
tcp_nodelay: true,
|
||||
},
|
||||
},
|
||||
export_metrics: ExportMetricsOption {
|
||||
self_import: Some(Default::default()),
|
||||
remote_write: Some(Default::default()),
|
||||
@@ -192,7 +204,6 @@ fn test_load_standalone_example_config() {
|
||||
RegionEngineConfig::Mito(MitoConfig {
|
||||
auto_flush_interval: Duration::from_secs(3600),
|
||||
experimental_write_cache_ttl: Some(Duration::from_secs(60 * 60 * 8)),
|
||||
scan_parallelism: 0,
|
||||
..Default::default()
|
||||
}),
|
||||
RegionEngineConfig::File(EngineConfig {}),
|
||||
|
||||
@@ -16,9 +16,12 @@ common-error.workspace = true
|
||||
common-macro.workspace = true
|
||||
futures.workspace = true
|
||||
paste = "1.0"
|
||||
pin-project.workspace = true
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
snafu.workspace = true
|
||||
tokio.workspace = true
|
||||
zeroize = { version = "1.6", default-features = false, features = ["alloc"] }
|
||||
|
||||
[dev-dependencies]
|
||||
common-test-util.workspace = true
|
||||
toml.workspace = true
|
||||
|
||||
@@ -38,6 +38,18 @@ impl Plugins {
|
||||
self.read().get::<T>().cloned()
|
||||
}
|
||||
|
||||
pub fn get_or_insert<T, F>(&self, f: F) -> T
|
||||
where
|
||||
T: 'static + Send + Sync + Clone,
|
||||
F: FnOnce() -> T,
|
||||
{
|
||||
let mut binding = self.write();
|
||||
if !binding.contains::<T>() {
|
||||
binding.insert(f());
|
||||
}
|
||||
binding.get::<T>().cloned().unwrap()
|
||||
}
|
||||
|
||||
pub fn map_mut<T: 'static + Send + Sync, F, R>(&self, mapper: F) -> R
|
||||
where
|
||||
F: FnOnce(Option<&mut T>) -> R,
|
||||
|
||||
@@ -12,12 +12,20 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::future::Future;
|
||||
use std::io;
|
||||
use std::ops::Range;
|
||||
use std::path::Path;
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use bytes::{BufMut, Bytes};
|
||||
use futures::{AsyncReadExt, AsyncSeekExt};
|
||||
use futures::AsyncRead;
|
||||
use pin_project::pin_project;
|
||||
use tokio::io::{AsyncReadExt as _, AsyncSeekExt as _};
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
/// `Metadata` contains the metadata of a source.
|
||||
pub struct Metadata {
|
||||
@@ -61,7 +69,7 @@ pub trait RangeReader: Send + Unpin {
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<R: RangeReader + Send + Unpin> RangeReader for &mut R {
|
||||
impl<R: ?Sized + RangeReader> RangeReader for &mut R {
|
||||
async fn metadata(&mut self) -> io::Result<Metadata> {
|
||||
(*self).metadata().await
|
||||
}
|
||||
@@ -80,26 +88,212 @@ impl<R: RangeReader + Send + Unpin> RangeReader for &mut R {
|
||||
}
|
||||
}
|
||||
|
||||
/// `RangeReaderAdapter` bridges `RangeReader` and `AsyncRead + AsyncSeek`.
|
||||
pub struct RangeReaderAdapter<R>(pub R);
|
||||
/// `AsyncReadAdapter` adapts a `RangeReader` to an `AsyncRead`.
|
||||
#[pin_project]
|
||||
pub struct AsyncReadAdapter<R> {
|
||||
/// The inner `RangeReader`.
|
||||
/// Use `Mutex` to get rid of the borrow checker issue.
|
||||
inner: Arc<Mutex<R>>,
|
||||
|
||||
/// The current position from the view of the reader.
|
||||
position: u64,
|
||||
|
||||
/// The buffer for the read bytes.
|
||||
buffer: Vec<u8>,
|
||||
|
||||
/// The length of the content.
|
||||
content_length: u64,
|
||||
|
||||
/// The future for reading the next bytes.
|
||||
#[pin]
|
||||
read_fut: Option<Pin<Box<dyn Future<Output = io::Result<Bytes>> + Send>>>,
|
||||
}
|
||||
|
||||
impl<R: RangeReader + 'static> AsyncReadAdapter<R> {
|
||||
pub async fn new(inner: R) -> io::Result<Self> {
|
||||
let mut inner = inner;
|
||||
let metadata = inner.metadata().await?;
|
||||
Ok(AsyncReadAdapter {
|
||||
inner: Arc::new(Mutex::new(inner)),
|
||||
position: 0,
|
||||
buffer: Vec::new(),
|
||||
content_length: metadata.content_length,
|
||||
read_fut: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// The maximum size per read for the inner reader in `AsyncReadAdapter`.
|
||||
const MAX_SIZE_PER_READ: usize = 8 * 1024 * 1024; // 8MB
|
||||
|
||||
impl<R: RangeReader + 'static> AsyncRead for AsyncReadAdapter<R> {
|
||||
fn poll_read(
|
||||
mut self: Pin<&mut Self>,
|
||||
cx: &mut Context<'_>,
|
||||
buf: &mut [u8],
|
||||
) -> Poll<io::Result<usize>> {
|
||||
let mut this = self.as_mut().project();
|
||||
|
||||
if *this.position >= *this.content_length {
|
||||
return Poll::Ready(Ok(0));
|
||||
}
|
||||
|
||||
if !this.buffer.is_empty() {
|
||||
let to_read = this.buffer.len().min(buf.len());
|
||||
buf[..to_read].copy_from_slice(&this.buffer[..to_read]);
|
||||
this.buffer.drain(..to_read);
|
||||
*this.position += to_read as u64;
|
||||
return Poll::Ready(Ok(to_read));
|
||||
}
|
||||
|
||||
if this.read_fut.is_none() {
|
||||
let size = (*this.content_length - *this.position).min(MAX_SIZE_PER_READ as u64);
|
||||
let range = *this.position..(*this.position + size);
|
||||
let inner = this.inner.clone();
|
||||
let fut = async move {
|
||||
let mut inner = inner.lock().await;
|
||||
inner.read(range).await
|
||||
};
|
||||
|
||||
*this.read_fut = Some(Box::pin(fut));
|
||||
}
|
||||
|
||||
match this
|
||||
.read_fut
|
||||
.as_mut()
|
||||
.as_pin_mut()
|
||||
.expect("checked above")
|
||||
.poll(cx)
|
||||
{
|
||||
Poll::Pending => Poll::Pending,
|
||||
Poll::Ready(Ok(bytes)) => {
|
||||
*this.read_fut = None;
|
||||
|
||||
if !bytes.is_empty() {
|
||||
this.buffer.extend_from_slice(&bytes);
|
||||
self.poll_read(cx, buf)
|
||||
} else {
|
||||
Poll::Ready(Ok(0))
|
||||
}
|
||||
}
|
||||
Poll::Ready(Err(e)) => {
|
||||
*this.read_fut = None;
|
||||
Poll::Ready(Err(e))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Implements `RangeReader` for a type that implements `AsyncRead + AsyncSeek`.
|
||||
///
|
||||
/// TODO(zhongzc): It's a temporary solution for porting the codebase from `AsyncRead + AsyncSeek` to `RangeReader`.
|
||||
/// Until the codebase is fully ported to `RangeReader`, remove this implementation.
|
||||
#[async_trait]
|
||||
impl<R: futures::AsyncRead + futures::AsyncSeek + Send + Unpin> RangeReader
|
||||
for RangeReaderAdapter<R>
|
||||
{
|
||||
impl RangeReader for Vec<u8> {
|
||||
async fn metadata(&mut self) -> io::Result<Metadata> {
|
||||
let content_length = self.0.seek(io::SeekFrom::End(0)).await?;
|
||||
Ok(Metadata { content_length })
|
||||
Ok(Metadata {
|
||||
content_length: self.len() as u64,
|
||||
})
|
||||
}
|
||||
|
||||
async fn read(&mut self, range: Range<u64>) -> io::Result<Bytes> {
|
||||
async fn read(&mut self, mut range: Range<u64>) -> io::Result<Bytes> {
|
||||
range.end = range.end.min(self.len() as u64);
|
||||
|
||||
let bytes = Bytes::copy_from_slice(&self[range.start as usize..range.end as usize]);
|
||||
Ok(bytes)
|
||||
}
|
||||
}
|
||||
|
||||
/// `FileReader` is a `RangeReader` for reading a file.
|
||||
pub struct FileReader {
|
||||
content_length: u64,
|
||||
position: u64,
|
||||
file: tokio::fs::File,
|
||||
}
|
||||
|
||||
impl FileReader {
|
||||
/// Creates a new `FileReader` for the file at the given path.
|
||||
pub async fn new(path: impl AsRef<Path>) -> io::Result<Self> {
|
||||
let file = tokio::fs::File::open(path).await?;
|
||||
let metadata = file.metadata().await?;
|
||||
Ok(FileReader {
|
||||
content_length: metadata.len(),
|
||||
position: 0,
|
||||
file,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl RangeReader for FileReader {
|
||||
async fn metadata(&mut self) -> io::Result<Metadata> {
|
||||
Ok(Metadata {
|
||||
content_length: self.content_length,
|
||||
})
|
||||
}
|
||||
|
||||
async fn read(&mut self, mut range: Range<u64>) -> io::Result<Bytes> {
|
||||
if range.start != self.position {
|
||||
self.file.seek(io::SeekFrom::Start(range.start)).await?;
|
||||
self.position = range.start;
|
||||
}
|
||||
|
||||
range.end = range.end.min(self.content_length);
|
||||
if range.end <= self.position {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::UnexpectedEof,
|
||||
"Start of range is out of bounds",
|
||||
));
|
||||
}
|
||||
|
||||
let mut buf = vec![0; (range.end - range.start) as usize];
|
||||
self.0.seek(io::SeekFrom::Start(range.start)).await?;
|
||||
self.0.read_exact(&mut buf).await?;
|
||||
|
||||
self.file.read_exact(&mut buf).await?;
|
||||
self.position = range.end;
|
||||
|
||||
Ok(Bytes::from(buf))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_test_util::temp_dir::create_named_temp_file;
|
||||
use futures::io::AsyncReadExt as _;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_async_read_adapter() {
|
||||
let data = b"hello world";
|
||||
let reader = Vec::from(data);
|
||||
let mut adapter = AsyncReadAdapter::new(reader).await.unwrap();
|
||||
|
||||
let mut buf = Vec::new();
|
||||
adapter.read_to_end(&mut buf).await.unwrap();
|
||||
assert_eq!(buf, data);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_async_read_adapter_large() {
|
||||
let data = (0..20 * 1024 * 1024).map(|i| i as u8).collect::<Vec<u8>>();
|
||||
let mut adapter = AsyncReadAdapter::new(data.clone()).await.unwrap();
|
||||
|
||||
let mut buf = Vec::new();
|
||||
adapter.read_to_end(&mut buf).await.unwrap();
|
||||
assert_eq!(buf, data);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_file_reader() {
|
||||
let file = create_named_temp_file();
|
||||
let path = file.path();
|
||||
let data = b"hello world";
|
||||
tokio::fs::write(path, data).await.unwrap();
|
||||
|
||||
let mut reader = FileReader::new(path).await.unwrap();
|
||||
let metadata = reader.metadata().await.unwrap();
|
||||
assert_eq!(metadata.content_length, data.len() as u64);
|
||||
|
||||
let bytes = reader.read(0..metadata.content_length).await.unwrap();
|
||||
assert_eq!(&*bytes, data);
|
||||
|
||||
let bytes = reader.read(0..5).await.unwrap();
|
||||
assert_eq!(&*bytes, &data[..5]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -46,8 +46,9 @@ impl From<String> for SecretString {
|
||||
}
|
||||
}
|
||||
|
||||
/// Wrapper type for values that contains secrets, which attempts to limit
|
||||
/// accidental exposure and ensure secrets are wiped from memory when dropped.
|
||||
/// Wrapper type for values that contains secrets.
|
||||
///
|
||||
/// It attempts to limit accidental exposure and ensure secrets are wiped from memory when dropped.
|
||||
/// (e.g. passwords, cryptographic keys, access tokens or other credentials)
|
||||
///
|
||||
/// Access to the secret inner value occurs through the [`ExposeSecret`]
|
||||
|
||||
@@ -103,14 +103,15 @@ pub const INFORMATION_SCHEMA_PROCEDURE_INFO_TABLE_ID: u32 = 34;
|
||||
/// id for information_schema.region_statistics
|
||||
pub const INFORMATION_SCHEMA_REGION_STATISTICS_TABLE_ID: u32 = 35;
|
||||
|
||||
/// ----- End of information_schema tables -----
|
||||
// ----- End of information_schema tables -----
|
||||
|
||||
/// ----- Begin of pg_catalog tables -----
|
||||
pub const PG_CATALOG_PG_CLASS_TABLE_ID: u32 = 256;
|
||||
pub const PG_CATALOG_PG_TYPE_TABLE_ID: u32 = 257;
|
||||
pub const PG_CATALOG_PG_NAMESPACE_TABLE_ID: u32 = 258;
|
||||
|
||||
/// ----- End of pg_catalog tables -----
|
||||
// ----- End of pg_catalog tables -----
|
||||
|
||||
pub const MITO_ENGINE: &str = "mito";
|
||||
pub const MITO2_ENGINE: &str = "mito2";
|
||||
pub const METRIC_ENGINE: &str = "metric";
|
||||
|
||||
@@ -9,7 +9,7 @@ workspace = true
|
||||
|
||||
[features]
|
||||
default = ["geo"]
|
||||
geo = ["geohash", "h3o"]
|
||||
geo = ["geohash", "h3o", "s2", "wkt", "geo-types", "dep:geo"]
|
||||
|
||||
[dependencies]
|
||||
api.workspace = true
|
||||
@@ -28,13 +28,17 @@ common-version.workspace = true
|
||||
datafusion.workspace = true
|
||||
datatypes.workspace = true
|
||||
derive_more = { version = "1", default-features = false, features = ["display"] }
|
||||
geo = { version = "0.29", optional = true }
|
||||
geo-types = { version = "0.7", optional = true }
|
||||
geohash = { version = "0.13", optional = true }
|
||||
h3o = { version = "0.6", optional = true }
|
||||
jsonb.workspace = true
|
||||
nalgebra = "0.33"
|
||||
num = "0.4"
|
||||
num-traits = "0.2"
|
||||
once_cell.workspace = true
|
||||
paste = "1.0"
|
||||
s2 = { version = "0.0.12", optional = true }
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
session.workspace = true
|
||||
@@ -43,8 +47,10 @@ sql.workspace = true
|
||||
statrs = "0.16"
|
||||
store-api.workspace = true
|
||||
table.workspace = true
|
||||
wkt = { version = "0.11", optional = true }
|
||||
|
||||
[dev-dependencies]
|
||||
approx = "0.5"
|
||||
ron = "0.7"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
tokio.workspace = true
|
||||
|
||||
@@ -27,6 +27,7 @@ use crate::scalars::matches::MatchesFunction;
|
||||
use crate::scalars::math::MathFunction;
|
||||
use crate::scalars::numpy::NumpyFunction;
|
||||
use crate::scalars::timestamp::TimestampFunction;
|
||||
use crate::scalars::vector::VectorFunction;
|
||||
use crate::system::SystemFunction;
|
||||
use crate::table::TableFunction;
|
||||
|
||||
@@ -120,6 +121,9 @@ pub static FUNCTION_REGISTRY: Lazy<Arc<FunctionRegistry>> = Lazy::new(|| {
|
||||
// Json related functions
|
||||
JsonFunction::register(&function_registry);
|
||||
|
||||
// Vector related functions
|
||||
VectorFunction::register(&function_registry);
|
||||
|
||||
// Geo functions
|
||||
#[cfg(feature = "geo")]
|
||||
crate::scalars::geo::GeoFunctions::register(&function_registry);
|
||||
|
||||
@@ -21,6 +21,7 @@ pub mod json;
|
||||
pub mod matches;
|
||||
pub mod math;
|
||||
pub mod numpy;
|
||||
pub mod vector;
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod test;
|
||||
|
||||
@@ -31,7 +31,6 @@ pub use polyval::PolyvalAccumulatorCreator;
|
||||
pub use scipy_stats_norm_cdf::ScipyStatsNormCdfAccumulatorCreator;
|
||||
pub use scipy_stats_norm_pdf::ScipyStatsNormPdfAccumulatorCreator;
|
||||
|
||||
use super::geo::encoding::JsonPathEncodeFunctionCreator;
|
||||
use crate::function_registry::FunctionRegistry;
|
||||
|
||||
/// A function creates `AggregateFunctionCreator`.
|
||||
@@ -93,6 +92,11 @@ impl AggregateFunctions {
|
||||
register_aggr_func!("scipystatsnormcdf", 2, ScipyStatsNormCdfAccumulatorCreator);
|
||||
register_aggr_func!("scipystatsnormpdf", 2, ScipyStatsNormPdfAccumulatorCreator);
|
||||
|
||||
register_aggr_func!("json_encode_path", 3, JsonPathEncodeFunctionCreator);
|
||||
#[cfg(feature = "geo")]
|
||||
register_aggr_func!(
|
||||
"json_encode_path",
|
||||
3,
|
||||
super::geo::encoding::JsonPathEncodeFunctionCreator
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,18 +14,19 @@
|
||||
|
||||
use std::fmt;
|
||||
|
||||
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
|
||||
use common_query::error::{ArrowComputeSnafu, IntoVectorSnafu, InvalidFuncArgsSnafu, Result};
|
||||
use common_query::prelude::Signature;
|
||||
use datatypes::data_type::DataType;
|
||||
use datatypes::arrow::compute::kernels::numeric;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::value::ValueRef;
|
||||
use datatypes::vectors::VectorRef;
|
||||
use snafu::ensure;
|
||||
use datatypes::vectors::{Helper, VectorRef};
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::function::{Function, FunctionContext};
|
||||
use crate::helper;
|
||||
|
||||
/// A function adds an interval value to Timestamp, Date or DateTime, and return the result.
|
||||
/// A function adds an interval value to Timestamp, Date, and return the result.
|
||||
/// The implementation of datetime type is based on Date64 which is incorrect so this function
|
||||
/// doesn't support the datetime type.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct DateAddFunction;
|
||||
|
||||
@@ -44,7 +45,6 @@ impl Function for DateAddFunction {
|
||||
helper::one_of_sigs2(
|
||||
vec![
|
||||
ConcreteDataType::date_datatype(),
|
||||
ConcreteDataType::datetime_datatype(),
|
||||
ConcreteDataType::timestamp_second_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
@@ -69,64 +69,14 @@ impl Function for DateAddFunction {
|
||||
}
|
||||
);
|
||||
|
||||
let left = &columns[0];
|
||||
let right = &columns[1];
|
||||
let left = columns[0].to_arrow_array();
|
||||
let right = columns[1].to_arrow_array();
|
||||
|
||||
let size = left.len();
|
||||
let left_datatype = columns[0].data_type();
|
||||
match left_datatype {
|
||||
ConcreteDataType::Timestamp(_) => {
|
||||
let mut result = left_datatype.create_mutable_vector(size);
|
||||
for i in 0..size {
|
||||
let ts = left.get(i).as_timestamp();
|
||||
let interval = right.get(i).as_interval();
|
||||
|
||||
let new_ts = match (ts, interval) {
|
||||
(Some(ts), Some(interval)) => ts.add_interval(interval),
|
||||
_ => ts,
|
||||
};
|
||||
|
||||
result.push_value_ref(ValueRef::from(new_ts));
|
||||
}
|
||||
|
||||
Ok(result.to_vector())
|
||||
}
|
||||
ConcreteDataType::Date(_) => {
|
||||
let mut result = left_datatype.create_mutable_vector(size);
|
||||
for i in 0..size {
|
||||
let date = left.get(i).as_date();
|
||||
let interval = right.get(i).as_interval();
|
||||
let new_date = match (date, interval) {
|
||||
(Some(date), Some(interval)) => date.add_interval(interval),
|
||||
_ => date,
|
||||
};
|
||||
|
||||
result.push_value_ref(ValueRef::from(new_date));
|
||||
}
|
||||
|
||||
Ok(result.to_vector())
|
||||
}
|
||||
ConcreteDataType::DateTime(_) => {
|
||||
let mut result = left_datatype.create_mutable_vector(size);
|
||||
for i in 0..size {
|
||||
let datetime = left.get(i).as_datetime();
|
||||
let interval = right.get(i).as_interval();
|
||||
let new_datetime = match (datetime, interval) {
|
||||
(Some(datetime), Some(interval)) => datetime.add_interval(interval),
|
||||
_ => datetime,
|
||||
};
|
||||
|
||||
result.push_value_ref(ValueRef::from(new_datetime));
|
||||
}
|
||||
|
||||
Ok(result.to_vector())
|
||||
}
|
||||
_ => UnsupportedInputDataTypeSnafu {
|
||||
function: NAME,
|
||||
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
let result = numeric::add(&left, &right).context(ArrowComputeSnafu)?;
|
||||
let arrow_type = result.data_type().clone();
|
||||
Helper::try_into_vector(result).context(IntoVectorSnafu {
|
||||
data_type: arrow_type,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -144,8 +94,7 @@ mod tests {
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{
|
||||
DateTimeVector, DateVector, IntervalDayTimeVector, IntervalYearMonthVector,
|
||||
TimestampSecondVector,
|
||||
DateVector, IntervalDayTimeVector, IntervalYearMonthVector, TimestampSecondVector,
|
||||
};
|
||||
|
||||
use super::{DateAddFunction, *};
|
||||
@@ -168,16 +117,15 @@ mod tests {
|
||||
ConcreteDataType::date_datatype(),
|
||||
f.return_type(&[ConcreteDataType::date_datatype()]).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::datetime_datatype(),
|
||||
f.return_type(&[ConcreteDataType::datetime_datatype()])
|
||||
.unwrap()
|
||||
);
|
||||
assert!(matches!(f.signature(),
|
||||
assert!(
|
||||
matches!(f.signature(),
|
||||
Signature {
|
||||
type_signature: TypeSignature::OneOf(sigs),
|
||||
volatility: Volatility::Immutable
|
||||
} if sigs.len() == 18));
|
||||
} if sigs.len() == 15),
|
||||
"{:?}",
|
||||
f.signature()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -243,36 +191,4 @@ mod tests {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_datetime_date_add() {
|
||||
let f = DateAddFunction;
|
||||
|
||||
let dates = vec![Some(123), None, Some(42), None];
|
||||
// Intervals in months
|
||||
let intervals = vec![1, 2, 3, 1];
|
||||
let results = [Some(2678400123), None, Some(7776000042), None];
|
||||
|
||||
let date_vector = DateTimeVector::from(dates.clone());
|
||||
let interval_vector = IntervalYearMonthVector::from_vec(intervals);
|
||||
let args: Vec<VectorRef> = vec![Arc::new(date_vector), Arc::new(interval_vector)];
|
||||
let vector = f.eval(FunctionContext::default(), &args).unwrap();
|
||||
|
||||
assert_eq!(4, vector.len());
|
||||
for (i, _t) in dates.iter().enumerate() {
|
||||
let v = vector.get(i);
|
||||
let result = results.get(i).unwrap();
|
||||
|
||||
if result.is_none() {
|
||||
assert_eq!(Value::Null, v);
|
||||
continue;
|
||||
}
|
||||
match v {
|
||||
Value::DateTime(date) => {
|
||||
assert_eq!(date.val(), result.unwrap());
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,18 +14,19 @@
|
||||
|
||||
use std::fmt;
|
||||
|
||||
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
|
||||
use common_query::error::{ArrowComputeSnafu, IntoVectorSnafu, InvalidFuncArgsSnafu, Result};
|
||||
use common_query::prelude::Signature;
|
||||
use datatypes::data_type::DataType;
|
||||
use datatypes::arrow::compute::kernels::numeric;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::value::ValueRef;
|
||||
use datatypes::vectors::VectorRef;
|
||||
use snafu::ensure;
|
||||
use datatypes::vectors::{Helper, VectorRef};
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::function::{Function, FunctionContext};
|
||||
use crate::helper;
|
||||
|
||||
/// A function subtracts an interval value to Timestamp, Date or DateTime, and return the result.
|
||||
/// A function subtracts an interval value to Timestamp, Date, and return the result.
|
||||
/// The implementation of datetime type is based on Date64 which is incorrect so this function
|
||||
/// doesn't support the datetime type.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct DateSubFunction;
|
||||
|
||||
@@ -44,7 +45,6 @@ impl Function for DateSubFunction {
|
||||
helper::one_of_sigs2(
|
||||
vec![
|
||||
ConcreteDataType::date_datatype(),
|
||||
ConcreteDataType::datetime_datatype(),
|
||||
ConcreteDataType::timestamp_second_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
@@ -69,65 +69,14 @@ impl Function for DateSubFunction {
|
||||
}
|
||||
);
|
||||
|
||||
let left = &columns[0];
|
||||
let right = &columns[1];
|
||||
let left = columns[0].to_arrow_array();
|
||||
let right = columns[1].to_arrow_array();
|
||||
|
||||
let size = left.len();
|
||||
let left_datatype = columns[0].data_type();
|
||||
|
||||
match left_datatype {
|
||||
ConcreteDataType::Timestamp(_) => {
|
||||
let mut result = left_datatype.create_mutable_vector(size);
|
||||
for i in 0..size {
|
||||
let ts = left.get(i).as_timestamp();
|
||||
let interval = right.get(i).as_interval();
|
||||
|
||||
let new_ts = match (ts, interval) {
|
||||
(Some(ts), Some(interval)) => ts.sub_interval(interval),
|
||||
_ => ts,
|
||||
};
|
||||
|
||||
result.push_value_ref(ValueRef::from(new_ts));
|
||||
}
|
||||
|
||||
Ok(result.to_vector())
|
||||
}
|
||||
ConcreteDataType::Date(_) => {
|
||||
let mut result = left_datatype.create_mutable_vector(size);
|
||||
for i in 0..size {
|
||||
let date = left.get(i).as_date();
|
||||
let interval = right.get(i).as_interval();
|
||||
let new_date = match (date, interval) {
|
||||
(Some(date), Some(interval)) => date.sub_interval(interval),
|
||||
_ => date,
|
||||
};
|
||||
|
||||
result.push_value_ref(ValueRef::from(new_date));
|
||||
}
|
||||
|
||||
Ok(result.to_vector())
|
||||
}
|
||||
ConcreteDataType::DateTime(_) => {
|
||||
let mut result = left_datatype.create_mutable_vector(size);
|
||||
for i in 0..size {
|
||||
let datetime = left.get(i).as_datetime();
|
||||
let interval = right.get(i).as_interval();
|
||||
let new_datetime = match (datetime, interval) {
|
||||
(Some(datetime), Some(interval)) => datetime.sub_interval(interval),
|
||||
_ => datetime,
|
||||
};
|
||||
|
||||
result.push_value_ref(ValueRef::from(new_datetime));
|
||||
}
|
||||
|
||||
Ok(result.to_vector())
|
||||
}
|
||||
_ => UnsupportedInputDataTypeSnafu {
|
||||
function: NAME,
|
||||
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
let result = numeric::sub(&left, &right).context(ArrowComputeSnafu)?;
|
||||
let arrow_type = result.data_type().clone();
|
||||
Helper::try_into_vector(result).context(IntoVectorSnafu {
|
||||
data_type: arrow_type,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -145,8 +94,7 @@ mod tests {
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{
|
||||
DateTimeVector, DateVector, IntervalDayTimeVector, IntervalYearMonthVector,
|
||||
TimestampSecondVector,
|
||||
DateVector, IntervalDayTimeVector, IntervalYearMonthVector, TimestampSecondVector,
|
||||
};
|
||||
|
||||
use super::{DateSubFunction, *};
|
||||
@@ -174,11 +122,15 @@ mod tests {
|
||||
f.return_type(&[ConcreteDataType::datetime_datatype()])
|
||||
.unwrap()
|
||||
);
|
||||
assert!(matches!(f.signature(),
|
||||
assert!(
|
||||
matches!(f.signature(),
|
||||
Signature {
|
||||
type_signature: TypeSignature::OneOf(sigs),
|
||||
volatility: Volatility::Immutable
|
||||
} if sigs.len() == 18));
|
||||
} if sigs.len() == 15),
|
||||
"{:?}",
|
||||
f.signature()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -250,42 +202,4 @@ mod tests {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_datetime_date_sub() {
|
||||
let f = DateSubFunction;
|
||||
let millis_per_month = 3600 * 24 * 30 * 1000;
|
||||
|
||||
let dates = vec![
|
||||
Some(123 * millis_per_month),
|
||||
None,
|
||||
Some(42 * millis_per_month),
|
||||
None,
|
||||
];
|
||||
// Intervals in months
|
||||
let intervals = vec![1, 2, 3, 1];
|
||||
let results = [Some(316137600000), None, Some(100915200000), None];
|
||||
|
||||
let date_vector = DateTimeVector::from(dates.clone());
|
||||
let interval_vector = IntervalYearMonthVector::from_vec(intervals);
|
||||
let args: Vec<VectorRef> = vec![Arc::new(date_vector), Arc::new(interval_vector)];
|
||||
let vector = f.eval(FunctionContext::default(), &args).unwrap();
|
||||
|
||||
assert_eq!(4, vector.len());
|
||||
for (i, _t) in dates.iter().enumerate() {
|
||||
let v = vector.get(i);
|
||||
let result = results.get(i).unwrap();
|
||||
|
||||
if result.is_none() {
|
||||
assert_eq!(Value::Null, v);
|
||||
continue;
|
||||
}
|
||||
match v {
|
||||
Value::DateTime(date) => {
|
||||
assert_eq!(date.val(), result.unwrap());
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,8 +17,10 @@ pub(crate) mod encoding;
|
||||
mod geohash;
|
||||
mod h3;
|
||||
mod helpers;
|
||||
|
||||
use geohash::{GeohashFunction, GeohashNeighboursFunction};
|
||||
mod measure;
|
||||
mod relation;
|
||||
mod s2;
|
||||
mod wkt;
|
||||
|
||||
use crate::function_registry::FunctionRegistry;
|
||||
|
||||
@@ -27,8 +29,8 @@ pub(crate) struct GeoFunctions;
|
||||
impl GeoFunctions {
|
||||
pub fn register(registry: &FunctionRegistry) {
|
||||
// geohash
|
||||
registry.register(Arc::new(GeohashFunction));
|
||||
registry.register(Arc::new(GeohashNeighboursFunction));
|
||||
registry.register(Arc::new(geohash::GeohashFunction));
|
||||
registry.register(Arc::new(geohash::GeohashNeighboursFunction));
|
||||
|
||||
// h3 index
|
||||
registry.register(Arc::new(h3::H3LatLngToCell));
|
||||
@@ -49,11 +51,35 @@ impl GeoFunctions {
|
||||
registry.register(Arc::new(h3::H3CellToChildrenSize));
|
||||
registry.register(Arc::new(h3::H3CellToChildPos));
|
||||
registry.register(Arc::new(h3::H3ChildPosToCell));
|
||||
registry.register(Arc::new(h3::H3CellContains));
|
||||
|
||||
// h3 grid traversal
|
||||
registry.register(Arc::new(h3::H3GridDisk));
|
||||
registry.register(Arc::new(h3::H3GridDiskDistances));
|
||||
registry.register(Arc::new(h3::H3GridDistance));
|
||||
registry.register(Arc::new(h3::H3GridPathCells));
|
||||
|
||||
// h3 measurement
|
||||
registry.register(Arc::new(h3::H3CellDistanceSphereKm));
|
||||
registry.register(Arc::new(h3::H3CellDistanceEuclideanDegree));
|
||||
|
||||
// s2
|
||||
registry.register(Arc::new(s2::S2LatLngToCell));
|
||||
registry.register(Arc::new(s2::S2CellLevel));
|
||||
registry.register(Arc::new(s2::S2CellToToken));
|
||||
registry.register(Arc::new(s2::S2CellParent));
|
||||
|
||||
// spatial data type
|
||||
registry.register(Arc::new(wkt::LatLngToPointWkt));
|
||||
|
||||
// spatial relation
|
||||
registry.register(Arc::new(relation::STContains));
|
||||
registry.register(Arc::new(relation::STWithin));
|
||||
registry.register(Arc::new(relation::STIntersects));
|
||||
|
||||
// spatial measure
|
||||
registry.register(Arc::new(measure::STDistance));
|
||||
registry.register(Arc::new(measure::STDistanceSphere));
|
||||
registry.register(Arc::new(measure::STArea));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@ use std::sync::Arc;
|
||||
use common_error::ext::{BoxedError, PlainError};
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
|
||||
use common_query::error::{self, InvalidFuncArgsSnafu, InvalidInputStateSnafu, Result};
|
||||
use common_query::error::{self, InvalidInputStateSnafu, Result};
|
||||
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::AccumulatorCreatorFunction;
|
||||
|
||||
@@ -16,28 +16,29 @@ use std::str::FromStr;
|
||||
|
||||
use common_error::ext::{BoxedError, PlainError};
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_query::error::{self, InvalidFuncArgsSnafu, Result};
|
||||
use common_query::error::{self, Result};
|
||||
use common_query::prelude::{Signature, TypeSignature};
|
||||
use datafusion::logical_expr::Volatility;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::scalars::{Scalar, ScalarVectorBuilder};
|
||||
use datatypes::value::{ListValue, Value};
|
||||
use datatypes::vectors::{
|
||||
BooleanVectorBuilder, Int32VectorBuilder, ListVectorBuilder, MutableVector,
|
||||
StringVectorBuilder, UInt64VectorBuilder, UInt8VectorBuilder, VectorRef,
|
||||
BooleanVectorBuilder, Float64VectorBuilder, Int32VectorBuilder, ListVectorBuilder,
|
||||
MutableVector, StringVectorBuilder, UInt64VectorBuilder, UInt8VectorBuilder, VectorRef,
|
||||
};
|
||||
use derive_more::Display;
|
||||
use h3o::{CellIndex, LatLng, Resolution};
|
||||
use once_cell::sync::Lazy;
|
||||
use snafu::{ensure, ResultExt};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use super::helpers::{ensure_columns_len, ensure_columns_n};
|
||||
use super::helpers::{ensure_and_coerce, ensure_columns_len, ensure_columns_n};
|
||||
use crate::function::{Function, FunctionContext};
|
||||
|
||||
static CELL_TYPES: Lazy<Vec<ConcreteDataType>> = Lazy::new(|| {
|
||||
vec![
|
||||
ConcreteDataType::int64_datatype(),
|
||||
ConcreteDataType::uint64_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
]
|
||||
});
|
||||
|
||||
@@ -382,15 +383,7 @@ impl Function for H3CellResolution {
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure!(
|
||||
columns.len() == 1,
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the args is not correct, expect 1, provided : {}",
|
||||
columns.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
ensure_columns_n!(columns, 1);
|
||||
|
||||
let cell_vec = &columns[0];
|
||||
let size = cell_vec.len();
|
||||
@@ -960,6 +953,181 @@ impl Function for H3GridPathCells {
|
||||
}
|
||||
}
|
||||
|
||||
/// Tests if cells contains given cells
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct H3CellContains;
|
||||
|
||||
impl Function for H3CellContains {
|
||||
fn name(&self) -> &str {
|
||||
"h3_cells_contains"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::boolean_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
let multi_cell_types = vec![
|
||||
ConcreteDataType::list_datatype(ConcreteDataType::int64_datatype()),
|
||||
ConcreteDataType::list_datatype(ConcreteDataType::uint64_datatype()),
|
||||
ConcreteDataType::list_datatype(ConcreteDataType::string_datatype()),
|
||||
ConcreteDataType::string_datatype(),
|
||||
];
|
||||
|
||||
let mut signatures = Vec::with_capacity(multi_cell_types.len() * CELL_TYPES.len());
|
||||
for multi_cell_type in &multi_cell_types {
|
||||
for cell_type in CELL_TYPES.as_slice() {
|
||||
signatures.push(TypeSignature::Exact(vec![
|
||||
multi_cell_type.clone(),
|
||||
cell_type.clone(),
|
||||
]));
|
||||
}
|
||||
}
|
||||
|
||||
Signature::one_of(signatures, Volatility::Stable)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 2);
|
||||
|
||||
let cells_vec = &columns[0];
|
||||
let cell_this_vec = &columns[1];
|
||||
|
||||
let size = cell_this_vec.len();
|
||||
let mut results = BooleanVectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let mut result = None;
|
||||
if let (cells, Some(cell_this)) = (
|
||||
cells_from_value(cells_vec.get(i))?,
|
||||
cell_from_value(cell_this_vec.get(i))?,
|
||||
) {
|
||||
result = Some(false);
|
||||
|
||||
for cell_that in cells.iter() {
|
||||
// get cell resolution, and find cell_this's parent at
|
||||
// this solution, test if cell_that equals the parent
|
||||
let resolution = cell_that.resolution();
|
||||
if let Some(cell_this_parent) = cell_this.parent(resolution) {
|
||||
if cell_this_parent == *cell_that {
|
||||
result = Some(true);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
/// Get WGS84 great circle distance of two cell centroid
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct H3CellDistanceSphereKm;
|
||||
|
||||
impl Function for H3CellDistanceSphereKm {
|
||||
fn name(&self) -> &str {
|
||||
"h3_distance_sphere_km"
|
||||
}
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::float64_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
signature_of_double_cells()
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 2);
|
||||
|
||||
let cell_this_vec = &columns[0];
|
||||
let cell_that_vec = &columns[1];
|
||||
let size = cell_this_vec.len();
|
||||
|
||||
let mut results = Float64VectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let result = match (
|
||||
cell_from_value(cell_this_vec.get(i))?,
|
||||
cell_from_value(cell_that_vec.get(i))?,
|
||||
) {
|
||||
(Some(cell_this), Some(cell_that)) => {
|
||||
let centroid_this = LatLng::from(cell_this);
|
||||
let centroid_that = LatLng::from(cell_that);
|
||||
|
||||
Some(centroid_this.distance_km(centroid_that))
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
/// Get Euclidean distance of two cell centroid
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct H3CellDistanceEuclideanDegree;
|
||||
|
||||
impl H3CellDistanceEuclideanDegree {
|
||||
fn distance(centroid_this: LatLng, centroid_that: LatLng) -> f64 {
|
||||
((centroid_this.lat() - centroid_that.lat()).powi(2)
|
||||
+ (centroid_this.lng() - centroid_that.lng()).powi(2))
|
||||
.sqrt()
|
||||
}
|
||||
}
|
||||
|
||||
impl Function for H3CellDistanceEuclideanDegree {
|
||||
fn name(&self) -> &str {
|
||||
"h3_distance_degree"
|
||||
}
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::float64_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
signature_of_double_cells()
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 2);
|
||||
|
||||
let cell_this_vec = &columns[0];
|
||||
let cell_that_vec = &columns[1];
|
||||
let size = cell_this_vec.len();
|
||||
|
||||
let mut results = Float64VectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let result = match (
|
||||
cell_from_value(cell_this_vec.get(i))?,
|
||||
cell_from_value(cell_that_vec.get(i))?,
|
||||
) {
|
||||
(Some(cell_this), Some(cell_that)) => {
|
||||
let centroid_this = LatLng::from(cell_this);
|
||||
let centroid_that = LatLng::from(cell_that);
|
||||
|
||||
let dist = Self::distance(centroid_this, centroid_that);
|
||||
Some(dist)
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
fn value_to_resolution(v: Value) -> Result<Resolution> {
|
||||
let r = match v {
|
||||
Value::Int8(v) => v as u8,
|
||||
@@ -982,18 +1150,6 @@ fn value_to_resolution(v: Value) -> Result<Resolution> {
|
||||
.context(error::ExecuteSnafu)
|
||||
}
|
||||
|
||||
macro_rules! ensure_and_coerce {
|
||||
($compare:expr, $coerce:expr) => {{
|
||||
ensure!(
|
||||
$compare,
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: "Argument was outside of acceptable range "
|
||||
}
|
||||
);
|
||||
Ok($coerce)
|
||||
}};
|
||||
}
|
||||
|
||||
fn value_to_position(v: Value) -> Result<u64> {
|
||||
match v {
|
||||
Value::Int8(v) => ensure_and_coerce!(v >= 0, v as u64),
|
||||
@@ -1093,7 +1249,126 @@ fn cell_from_value(v: Value) -> Result<Option<CellIndex>> {
|
||||
})
|
||||
.context(error::ExecuteSnafu)?,
|
||||
),
|
||||
Value::String(s) => Some(
|
||||
CellIndex::from_str(s.as_utf8())
|
||||
.map_err(|e| {
|
||||
BoxedError::new(PlainError::new(
|
||||
format!("H3 error: {}", e),
|
||||
StatusCode::EngineExecuteQuery,
|
||||
))
|
||||
})
|
||||
.context(error::ExecuteSnafu)?,
|
||||
),
|
||||
_ => None,
|
||||
};
|
||||
Ok(cell)
|
||||
}
|
||||
|
||||
/// extract cell array from all possible types including:
|
||||
/// - int64 list
|
||||
/// - uint64 list
|
||||
/// - string list
|
||||
/// - comma-separated string
|
||||
fn cells_from_value(v: Value) -> Result<Vec<CellIndex>> {
|
||||
match v {
|
||||
Value::List(list) => match list.datatype() {
|
||||
ConcreteDataType::Int64(_) => list
|
||||
.items()
|
||||
.iter()
|
||||
.map(|v| {
|
||||
if let Value::Int64(v) = v {
|
||||
CellIndex::try_from(*v as u64)
|
||||
.map_err(|e| {
|
||||
BoxedError::new(PlainError::new(
|
||||
format!("H3 error: {}", e),
|
||||
StatusCode::EngineExecuteQuery,
|
||||
))
|
||||
})
|
||||
.context(error::ExecuteSnafu)
|
||||
} else {
|
||||
Err(BoxedError::new(PlainError::new(
|
||||
"Invalid data type in array".to_string(),
|
||||
StatusCode::EngineExecuteQuery,
|
||||
)))
|
||||
.context(error::ExecuteSnafu)
|
||||
}
|
||||
})
|
||||
.collect::<Result<Vec<CellIndex>>>(),
|
||||
ConcreteDataType::UInt64(_) => list
|
||||
.items()
|
||||
.iter()
|
||||
.map(|v| {
|
||||
if let Value::UInt64(v) = v {
|
||||
CellIndex::try_from(*v)
|
||||
.map_err(|e| {
|
||||
BoxedError::new(PlainError::new(
|
||||
format!("H3 error: {}", e),
|
||||
StatusCode::EngineExecuteQuery,
|
||||
))
|
||||
})
|
||||
.context(error::ExecuteSnafu)
|
||||
} else {
|
||||
Err(BoxedError::new(PlainError::new(
|
||||
"Invalid data type in array".to_string(),
|
||||
StatusCode::EngineExecuteQuery,
|
||||
)))
|
||||
.context(error::ExecuteSnafu)
|
||||
}
|
||||
})
|
||||
.collect::<Result<Vec<CellIndex>>>(),
|
||||
ConcreteDataType::String(_) => list
|
||||
.items()
|
||||
.iter()
|
||||
.map(|v| {
|
||||
if let Value::String(v) = v {
|
||||
CellIndex::from_str(v.as_utf8().trim())
|
||||
.map_err(|e| {
|
||||
BoxedError::new(PlainError::new(
|
||||
format!("H3 error: {}", e),
|
||||
StatusCode::EngineExecuteQuery,
|
||||
))
|
||||
})
|
||||
.context(error::ExecuteSnafu)
|
||||
} else {
|
||||
Err(BoxedError::new(PlainError::new(
|
||||
"Invalid data type in array".to_string(),
|
||||
StatusCode::EngineExecuteQuery,
|
||||
)))
|
||||
.context(error::ExecuteSnafu)
|
||||
}
|
||||
})
|
||||
.collect::<Result<Vec<CellIndex>>>(),
|
||||
_ => Ok(vec![]),
|
||||
},
|
||||
Value::String(csv) => {
|
||||
let str_seq = csv.as_utf8().split(',');
|
||||
str_seq
|
||||
.map(|v| {
|
||||
CellIndex::from_str(v.trim())
|
||||
.map_err(|e| {
|
||||
BoxedError::new(PlainError::new(
|
||||
format!("H3 error: {}", e),
|
||||
StatusCode::EngineExecuteQuery,
|
||||
))
|
||||
})
|
||||
.context(error::ExecuteSnafu)
|
||||
})
|
||||
.collect::<Result<Vec<CellIndex>>>()
|
||||
}
|
||||
_ => Ok(vec![]),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_h3_euclidean_distance() {
|
||||
let point_this = LatLng::new(42.3521, -72.1235).expect("incorrect lat lng");
|
||||
let point_that = LatLng::new(42.45, -72.1260).expect("incorrect lat lng");
|
||||
|
||||
let dist = H3CellDistanceEuclideanDegree::distance(point_this, point_that);
|
||||
assert_eq!(dist, 0.09793191512474639);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,15 +14,15 @@
|
||||
|
||||
macro_rules! ensure_columns_len {
|
||||
($columns:ident) => {
|
||||
ensure!(
|
||||
snafu::ensure!(
|
||||
$columns.windows(2).all(|c| c[0].len() == c[1].len()),
|
||||
InvalidFuncArgsSnafu {
|
||||
common_query::error::InvalidFuncArgsSnafu {
|
||||
err_msg: "The length of input columns are in different size"
|
||||
}
|
||||
)
|
||||
};
|
||||
($column_a:ident, $column_b:ident, $($column_n:ident),*) => {
|
||||
ensure!(
|
||||
snafu::ensure!(
|
||||
{
|
||||
let mut result = $column_a.len() == $column_b.len();
|
||||
$(
|
||||
@@ -30,7 +30,7 @@ macro_rules! ensure_columns_len {
|
||||
)*
|
||||
result
|
||||
}
|
||||
InvalidFuncArgsSnafu {
|
||||
common_query::error::InvalidFuncArgsSnafu {
|
||||
err_msg: "The length of input columns are in different size"
|
||||
}
|
||||
)
|
||||
@@ -41,9 +41,9 @@ pub(super) use ensure_columns_len;
|
||||
|
||||
macro_rules! ensure_columns_n {
|
||||
($columns:ident, $n:literal) => {
|
||||
ensure!(
|
||||
snafu::ensure!(
|
||||
$columns.len() == $n,
|
||||
InvalidFuncArgsSnafu {
|
||||
common_query::error::InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of arguments is not correct, expect {}, provided : {}",
|
||||
stringify!($n),
|
||||
@@ -59,3 +59,17 @@ macro_rules! ensure_columns_n {
|
||||
}
|
||||
|
||||
pub(super) use ensure_columns_n;
|
||||
|
||||
macro_rules! ensure_and_coerce {
|
||||
($compare:expr, $coerce:expr) => {{
|
||||
snafu::ensure!(
|
||||
$compare,
|
||||
common_query::error::InvalidFuncArgsSnafu {
|
||||
err_msg: "Argument was outside of acceptable range "
|
||||
}
|
||||
);
|
||||
Ok($coerce)
|
||||
}};
|
||||
}
|
||||
|
||||
pub(super) use ensure_and_coerce;
|
||||
|
||||
195
src/common/function/src/scalars/geo/measure.rs
Normal file
195
src/common/function/src/scalars/geo/measure.rs
Normal file
@@ -0,0 +1,195 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_error::ext::{BoxedError, PlainError};
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_query::error::{self, Result};
|
||||
use common_query::prelude::{Signature, TypeSignature};
|
||||
use datafusion::logical_expr::Volatility;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::vectors::{Float64VectorBuilder, MutableVector, VectorRef};
|
||||
use derive_more::Display;
|
||||
use geo::algorithm::line_measures::metric_spaces::Euclidean;
|
||||
use geo::{Area, Distance, Haversine};
|
||||
use geo_types::Geometry;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use super::helpers::{ensure_columns_len, ensure_columns_n};
|
||||
use super::wkt::parse_wkt;
|
||||
use crate::function::{Function, FunctionContext};
|
||||
|
||||
/// Return WGS84(SRID: 4326) euclidean distance between two geometry object, in degree
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct STDistance;
|
||||
|
||||
impl Function for STDistance {
|
||||
fn name(&self) -> &str {
|
||||
"st_distance"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::float64_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::new(
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::string_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
]),
|
||||
Volatility::Stable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 2);
|
||||
|
||||
let wkt_this_vec = &columns[0];
|
||||
let wkt_that_vec = &columns[1];
|
||||
|
||||
let size = wkt_this_vec.len();
|
||||
let mut results = Float64VectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let wkt_this = wkt_this_vec.get(i).as_string();
|
||||
let wkt_that = wkt_that_vec.get(i).as_string();
|
||||
|
||||
let result = match (wkt_this, wkt_that) {
|
||||
(Some(wkt_this), Some(wkt_that)) => {
|
||||
let geom_this = parse_wkt(&wkt_this)?;
|
||||
let geom_that = parse_wkt(&wkt_that)?;
|
||||
|
||||
Some(Euclidean::distance(&geom_this, &geom_that))
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
/// Return great circle distance between two geometry object, in meters
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct STDistanceSphere;
|
||||
|
||||
impl Function for STDistanceSphere {
|
||||
fn name(&self) -> &str {
|
||||
"st_distance_sphere_m"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::float64_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::new(
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::string_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
]),
|
||||
Volatility::Stable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 2);
|
||||
|
||||
let wkt_this_vec = &columns[0];
|
||||
let wkt_that_vec = &columns[1];
|
||||
|
||||
let size = wkt_this_vec.len();
|
||||
let mut results = Float64VectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let wkt_this = wkt_this_vec.get(i).as_string();
|
||||
let wkt_that = wkt_that_vec.get(i).as_string();
|
||||
|
||||
let result = match (wkt_this, wkt_that) {
|
||||
(Some(wkt_this), Some(wkt_that)) => {
|
||||
let geom_this = parse_wkt(&wkt_this)?;
|
||||
let geom_that = parse_wkt(&wkt_that)?;
|
||||
|
||||
match (geom_this, geom_that) {
|
||||
(Geometry::Point(this), Geometry::Point(that)) => {
|
||||
Some(Haversine::distance(this, that))
|
||||
}
|
||||
_ => {
|
||||
Err(BoxedError::new(PlainError::new(
|
||||
"Great circle distance between non-point objects are not supported for now.".to_string(),
|
||||
StatusCode::Unsupported,
|
||||
))).context(error::ExecuteSnafu)?
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
/// Return area of given geometry object
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct STArea;
|
||||
|
||||
impl Function for STArea {
|
||||
fn name(&self) -> &str {
|
||||
"st_area"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::float64_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::new(
|
||||
TypeSignature::Exact(vec![ConcreteDataType::string_datatype()]),
|
||||
Volatility::Stable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 1);
|
||||
|
||||
let wkt_vec = &columns[0];
|
||||
|
||||
let size = wkt_vec.len();
|
||||
let mut results = Float64VectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let wkt = wkt_vec.get(i).as_string();
|
||||
|
||||
let result = if let Some(wkt) = wkt {
|
||||
let geom = parse_wkt(&wkt)?;
|
||||
Some(geom.unsigned_area())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
190
src/common/function/src/scalars/geo/relation.rs
Normal file
190
src/common/function/src/scalars/geo/relation.rs
Normal file
@@ -0,0 +1,190 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_query::error::Result;
|
||||
use common_query::prelude::{Signature, TypeSignature};
|
||||
use datafusion::logical_expr::Volatility;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::vectors::{BooleanVectorBuilder, MutableVector, VectorRef};
|
||||
use derive_more::Display;
|
||||
use geo::algorithm::contains::Contains;
|
||||
use geo::algorithm::intersects::Intersects;
|
||||
use geo::algorithm::within::Within;
|
||||
|
||||
use super::helpers::{ensure_columns_len, ensure_columns_n};
|
||||
use super::wkt::parse_wkt;
|
||||
use crate::function::{Function, FunctionContext};
|
||||
|
||||
/// Test if spatial relationship: contains
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct STContains;
|
||||
|
||||
impl Function for STContains {
|
||||
fn name(&self) -> &str {
|
||||
"st_contains"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::boolean_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::new(
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::string_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
]),
|
||||
Volatility::Stable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 2);
|
||||
|
||||
let wkt_this_vec = &columns[0];
|
||||
let wkt_that_vec = &columns[1];
|
||||
|
||||
let size = wkt_this_vec.len();
|
||||
let mut results = BooleanVectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let wkt_this = wkt_this_vec.get(i).as_string();
|
||||
let wkt_that = wkt_that_vec.get(i).as_string();
|
||||
|
||||
let result = match (wkt_this, wkt_that) {
|
||||
(Some(wkt_this), Some(wkt_that)) => {
|
||||
let geom_this = parse_wkt(&wkt_this)?;
|
||||
let geom_that = parse_wkt(&wkt_that)?;
|
||||
|
||||
Some(geom_this.contains(&geom_that))
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
/// Test if spatial relationship: within
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct STWithin;
|
||||
|
||||
impl Function for STWithin {
|
||||
fn name(&self) -> &str {
|
||||
"st_within"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::boolean_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::new(
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::string_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
]),
|
||||
Volatility::Stable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 2);
|
||||
|
||||
let wkt_this_vec = &columns[0];
|
||||
let wkt_that_vec = &columns[1];
|
||||
|
||||
let size = wkt_this_vec.len();
|
||||
let mut results = BooleanVectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let wkt_this = wkt_this_vec.get(i).as_string();
|
||||
let wkt_that = wkt_that_vec.get(i).as_string();
|
||||
|
||||
let result = match (wkt_this, wkt_that) {
|
||||
(Some(wkt_this), Some(wkt_that)) => {
|
||||
let geom_this = parse_wkt(&wkt_this)?;
|
||||
let geom_that = parse_wkt(&wkt_that)?;
|
||||
|
||||
Some(geom_this.is_within(&geom_that))
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
/// Test if spatial relationship: within
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct STIntersects;
|
||||
|
||||
impl Function for STIntersects {
|
||||
fn name(&self) -> &str {
|
||||
"st_intersects"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::boolean_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::new(
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::string_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
]),
|
||||
Volatility::Stable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 2);
|
||||
|
||||
let wkt_this_vec = &columns[0];
|
||||
let wkt_that_vec = &columns[1];
|
||||
|
||||
let size = wkt_this_vec.len();
|
||||
let mut results = BooleanVectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let wkt_this = wkt_this_vec.get(i).as_string();
|
||||
let wkt_that = wkt_that_vec.get(i).as_string();
|
||||
|
||||
let result = match (wkt_this, wkt_that) {
|
||||
(Some(wkt_this), Some(wkt_that)) => {
|
||||
let geom_this = parse_wkt(&wkt_this)?;
|
||||
let geom_that = parse_wkt(&wkt_that)?;
|
||||
|
||||
Some(geom_this.intersects(&geom_that))
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
275
src/common/function/src/scalars/geo/s2.rs
Normal file
275
src/common/function/src/scalars/geo/s2.rs
Normal file
@@ -0,0 +1,275 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_query::error::{InvalidFuncArgsSnafu, Result};
|
||||
use common_query::prelude::{Signature, TypeSignature};
|
||||
use datafusion::logical_expr::Volatility;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{MutableVector, StringVectorBuilder, UInt64VectorBuilder, VectorRef};
|
||||
use derive_more::Display;
|
||||
use once_cell::sync::Lazy;
|
||||
use s2::cellid::{CellID, MAX_LEVEL};
|
||||
use s2::latlng::LatLng;
|
||||
use snafu::ensure;
|
||||
|
||||
use crate::function::{Function, FunctionContext};
|
||||
use crate::scalars::geo::helpers::{ensure_and_coerce, ensure_columns_len, ensure_columns_n};
|
||||
|
||||
static CELL_TYPES: Lazy<Vec<ConcreteDataType>> = Lazy::new(|| {
|
||||
vec![
|
||||
ConcreteDataType::int64_datatype(),
|
||||
ConcreteDataType::uint64_datatype(),
|
||||
]
|
||||
});
|
||||
|
||||
static COORDINATE_TYPES: Lazy<Vec<ConcreteDataType>> = Lazy::new(|| {
|
||||
vec![
|
||||
ConcreteDataType::float32_datatype(),
|
||||
ConcreteDataType::float64_datatype(),
|
||||
]
|
||||
});
|
||||
|
||||
static LEVEL_TYPES: Lazy<Vec<ConcreteDataType>> = Lazy::new(|| {
|
||||
vec![
|
||||
ConcreteDataType::int8_datatype(),
|
||||
ConcreteDataType::int16_datatype(),
|
||||
ConcreteDataType::int32_datatype(),
|
||||
ConcreteDataType::int64_datatype(),
|
||||
ConcreteDataType::uint8_datatype(),
|
||||
ConcreteDataType::uint16_datatype(),
|
||||
ConcreteDataType::uint32_datatype(),
|
||||
ConcreteDataType::uint64_datatype(),
|
||||
]
|
||||
});
|
||||
|
||||
/// Function that returns [s2] encoding cellid for a given geospatial coordinate.
|
||||
///
|
||||
/// [s2]: http://s2geometry.io
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct S2LatLngToCell;
|
||||
|
||||
impl Function for S2LatLngToCell {
|
||||
fn name(&self) -> &str {
|
||||
"s2_latlng_to_cell"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::uint64_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
let mut signatures = Vec::with_capacity(COORDINATE_TYPES.len());
|
||||
for coord_type in COORDINATE_TYPES.as_slice() {
|
||||
signatures.push(TypeSignature::Exact(vec![
|
||||
// latitude
|
||||
coord_type.clone(),
|
||||
// longitude
|
||||
coord_type.clone(),
|
||||
]));
|
||||
}
|
||||
Signature::one_of(signatures, Volatility::Stable)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 2);
|
||||
|
||||
let lat_vec = &columns[0];
|
||||
let lon_vec = &columns[1];
|
||||
|
||||
let size = lat_vec.len();
|
||||
let mut results = UInt64VectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let lat = lat_vec.get(i).as_f64_lossy();
|
||||
let lon = lon_vec.get(i).as_f64_lossy();
|
||||
|
||||
let result = match (lat, lon) {
|
||||
(Some(lat), Some(lon)) => {
|
||||
let coord = LatLng::from_degrees(lat, lon);
|
||||
ensure!(
|
||||
coord.is_valid(),
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: "The input coordinates are invalid",
|
||||
}
|
||||
);
|
||||
let cellid = CellID::from(coord);
|
||||
let encoded: u64 = cellid.0;
|
||||
Some(encoded)
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the level of current s2 cell
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct S2CellLevel;
|
||||
|
||||
impl Function for S2CellLevel {
|
||||
fn name(&self) -> &str {
|
||||
"s2_cell_level"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::uint64_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
signature_of_cell()
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 1);
|
||||
|
||||
let cell_vec = &columns[0];
|
||||
let size = cell_vec.len();
|
||||
let mut results = UInt64VectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let cell = cell_from_value(cell_vec.get(i));
|
||||
let res = cell.map(|cell| cell.level());
|
||||
|
||||
results.push(res);
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the string presentation of the cell
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct S2CellToToken;
|
||||
|
||||
impl Function for S2CellToToken {
|
||||
fn name(&self) -> &str {
|
||||
"s2_cell_to_token"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::string_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
signature_of_cell()
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 1);
|
||||
|
||||
let cell_vec = &columns[0];
|
||||
let size = cell_vec.len();
|
||||
let mut results = StringVectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let cell = cell_from_value(cell_vec.get(i));
|
||||
let res = cell.map(|cell| cell.to_token());
|
||||
|
||||
results.push(res.as_deref());
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
/// Return parent at given level of current s2 cell
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct S2CellParent;
|
||||
|
||||
impl Function for S2CellParent {
|
||||
fn name(&self) -> &str {
|
||||
"s2_cell_parent"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::uint64_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
signature_of_cell_and_level()
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 2);
|
||||
|
||||
let cell_vec = &columns[0];
|
||||
let level_vec = &columns[1];
|
||||
let size = cell_vec.len();
|
||||
let mut results = UInt64VectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let cell = cell_from_value(cell_vec.get(i));
|
||||
let level = value_to_level(level_vec.get(i))?;
|
||||
let result = cell.map(|cell| cell.parent(level).0);
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
fn signature_of_cell() -> Signature {
|
||||
let mut signatures = Vec::with_capacity(CELL_TYPES.len());
|
||||
for cell_type in CELL_TYPES.as_slice() {
|
||||
signatures.push(TypeSignature::Exact(vec![cell_type.clone()]));
|
||||
}
|
||||
|
||||
Signature::one_of(signatures, Volatility::Stable)
|
||||
}
|
||||
|
||||
fn signature_of_cell_and_level() -> Signature {
|
||||
let mut signatures = Vec::with_capacity(CELL_TYPES.len() * LEVEL_TYPES.len());
|
||||
for cell_type in CELL_TYPES.as_slice() {
|
||||
for level_type in LEVEL_TYPES.as_slice() {
|
||||
signatures.push(TypeSignature::Exact(vec![
|
||||
cell_type.clone(),
|
||||
level_type.clone(),
|
||||
]));
|
||||
}
|
||||
}
|
||||
Signature::one_of(signatures, Volatility::Stable)
|
||||
}
|
||||
|
||||
fn cell_from_value(v: Value) -> Option<CellID> {
|
||||
match v {
|
||||
Value::Int64(v) => Some(CellID(v as u64)),
|
||||
Value::UInt64(v) => Some(CellID(v)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn value_to_level(v: Value) -> Result<u64> {
|
||||
match v {
|
||||
Value::Int8(v) => ensure_and_coerce!(v >= 0 && v <= MAX_LEVEL as i8, v as u64),
|
||||
Value::Int16(v) => ensure_and_coerce!(v >= 0 && v <= MAX_LEVEL as i16, v as u64),
|
||||
Value::Int32(v) => ensure_and_coerce!(v >= 0 && v <= MAX_LEVEL as i32, v as u64),
|
||||
Value::Int64(v) => ensure_and_coerce!(v >= 0 && v <= MAX_LEVEL as i64, v as u64),
|
||||
Value::UInt8(v) => ensure_and_coerce!(v <= MAX_LEVEL as u8, v as u64),
|
||||
Value::UInt16(v) => ensure_and_coerce!(v <= MAX_LEVEL as u16, v as u64),
|
||||
Value::UInt32(v) => ensure_and_coerce!(v <= MAX_LEVEL as u32, v as u64),
|
||||
Value::UInt64(v) => ensure_and_coerce!(v <= MAX_LEVEL, v),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
100
src/common/function/src/scalars/geo/wkt.rs
Normal file
100
src/common/function/src/scalars/geo/wkt.rs
Normal file
@@ -0,0 +1,100 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_error::ext::{BoxedError, PlainError};
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_query::error::{self, Result};
|
||||
use common_query::prelude::{Signature, TypeSignature};
|
||||
use datafusion::logical_expr::Volatility;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::vectors::{MutableVector, StringVectorBuilder, VectorRef};
|
||||
use derive_more::Display;
|
||||
use geo_types::{Geometry, Point};
|
||||
use once_cell::sync::Lazy;
|
||||
use snafu::ResultExt;
|
||||
use wkt::{ToWkt, TryFromWkt};
|
||||
|
||||
use super::helpers::{ensure_columns_len, ensure_columns_n};
|
||||
use crate::function::{Function, FunctionContext};
|
||||
|
||||
static COORDINATE_TYPES: Lazy<Vec<ConcreteDataType>> = Lazy::new(|| {
|
||||
vec![
|
||||
ConcreteDataType::float32_datatype(),
|
||||
ConcreteDataType::float64_datatype(),
|
||||
]
|
||||
});
|
||||
|
||||
/// Return WGS84(SRID: 4326) euclidean distance between two geometry object, in degree
|
||||
#[derive(Clone, Debug, Default, Display)]
|
||||
#[display("{}", self.name())]
|
||||
pub struct LatLngToPointWkt;
|
||||
|
||||
impl Function for LatLngToPointWkt {
|
||||
fn name(&self) -> &str {
|
||||
"wkt_point_from_latlng"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::string_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
let mut signatures = Vec::new();
|
||||
for coord_type in COORDINATE_TYPES.as_slice() {
|
||||
signatures.push(TypeSignature::Exact(vec![
|
||||
// latitude
|
||||
coord_type.clone(),
|
||||
// longitude
|
||||
coord_type.clone(),
|
||||
]));
|
||||
}
|
||||
Signature::one_of(signatures, Volatility::Stable)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure_columns_n!(columns, 2);
|
||||
|
||||
let lat_vec = &columns[0];
|
||||
let lng_vec = &columns[1];
|
||||
|
||||
let size = lat_vec.len();
|
||||
let mut results = StringVectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let lat = lat_vec.get(i).as_f64_lossy();
|
||||
let lng = lng_vec.get(i).as_f64_lossy();
|
||||
|
||||
let result = match (lat, lng) {
|
||||
(Some(lat), Some(lng)) => Some(Point::new(lng, lat).wkt_string()),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result.as_deref());
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn parse_wkt(s: &str) -> Result<Geometry> {
|
||||
Geometry::try_from_wkt_str(s)
|
||||
.map_err(|e| {
|
||||
BoxedError::new(PlainError::new(
|
||||
format!("Fail to parse WKT: {}", e),
|
||||
StatusCode::EngineExecuteQuery,
|
||||
))
|
||||
})
|
||||
.context(error::ExecuteSnafu)
|
||||
}
|
||||
@@ -16,6 +16,7 @@ use std::sync::Arc;
|
||||
mod json_get;
|
||||
mod json_is;
|
||||
mod json_path_exists;
|
||||
mod json_path_match;
|
||||
mod json_to_string;
|
||||
mod parse_json;
|
||||
|
||||
@@ -49,5 +50,6 @@ impl JsonFunction {
|
||||
registry.register(Arc::new(JsonIsObject));
|
||||
|
||||
registry.register(Arc::new(json_path_exists::JsonPathExistsFunction));
|
||||
registry.register(Arc::new(json_path_match::JsonPathMatchFunction));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
use std::fmt::{self, Display};
|
||||
|
||||
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
|
||||
use common_query::prelude::Signature;
|
||||
use common_query::prelude::{Signature, TypeSignature};
|
||||
use datafusion::logical_expr::Volatility;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::prelude::VectorRef;
|
||||
@@ -41,10 +41,24 @@ impl Function for JsonPathExistsFunction {
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::exact(
|
||||
Signature::one_of(
|
||||
vec![
|
||||
ConcreteDataType::json_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::json_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
]),
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::null_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
]),
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::json_datatype(),
|
||||
ConcreteDataType::null_datatype(),
|
||||
]),
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::null_datatype(),
|
||||
ConcreteDataType::null_datatype(),
|
||||
]),
|
||||
],
|
||||
Volatility::Immutable,
|
||||
)
|
||||
@@ -64,25 +78,26 @@ impl Function for JsonPathExistsFunction {
|
||||
let paths = &columns[1];
|
||||
|
||||
let size = jsons.len();
|
||||
let datatype = jsons.data_type();
|
||||
let mut results = BooleanVectorBuilder::with_capacity(size);
|
||||
|
||||
match datatype {
|
||||
// JSON data type uses binary vector
|
||||
ConcreteDataType::Binary(_) => {
|
||||
match (jsons.data_type(), paths.data_type()) {
|
||||
(ConcreteDataType::Binary(_), ConcreteDataType::String(_)) => {
|
||||
for i in 0..size {
|
||||
let json = jsons.get_ref(i);
|
||||
let path = paths.get_ref(i);
|
||||
|
||||
let json = json.as_binary();
|
||||
let path = path.as_string();
|
||||
let result = match (json, path) {
|
||||
let result = match (jsons.get_ref(i).as_binary(), paths.get_ref(i).as_string())
|
||||
{
|
||||
(Ok(Some(json)), Ok(Some(path))) => {
|
||||
let json_path = jsonb::jsonpath::parse_json_path(path.as_bytes());
|
||||
match json_path {
|
||||
Ok(json_path) => jsonb::path_exists(json, json_path).ok(),
|
||||
Err(_) => None,
|
||||
}
|
||||
// Get `JsonPath`.
|
||||
let json_path = match jsonb::jsonpath::parse_json_path(path.as_bytes())
|
||||
{
|
||||
Ok(json_path) => json_path,
|
||||
Err(_) => {
|
||||
return InvalidFuncArgsSnafu {
|
||||
err_msg: format!("Illegal json path: {:?}", path),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
};
|
||||
jsonb::path_exists(json, json_path).ok()
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
@@ -90,6 +105,12 @@ impl Function for JsonPathExistsFunction {
|
||||
results.push(result);
|
||||
}
|
||||
}
|
||||
|
||||
// Any null args existence causes the result to be NULL.
|
||||
(ConcreteDataType::Null(_), ConcreteDataType::String(_)) => results.push_nulls(size),
|
||||
(ConcreteDataType::Binary(_), ConcreteDataType::Null(_)) => results.push_nulls(size),
|
||||
(ConcreteDataType::Null(_), ConcreteDataType::Null(_)) => results.push_nulls(size),
|
||||
|
||||
_ => {
|
||||
return UnsupportedInputDataTypeSnafu {
|
||||
function: NAME,
|
||||
@@ -114,8 +135,8 @@ mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::prelude::TypeSignature;
|
||||
use datatypes::scalars::ScalarVector;
|
||||
use datatypes::vectors::{BinaryVector, StringVector};
|
||||
use datatypes::prelude::ScalarVector;
|
||||
use datatypes::vectors::{BinaryVector, NullVector, StringVector};
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -133,9 +154,27 @@ mod tests {
|
||||
|
||||
assert!(matches!(json_path_exists.signature(),
|
||||
Signature {
|
||||
type_signature: TypeSignature::Exact(valid_types),
|
||||
type_signature: TypeSignature::OneOf(valid_types),
|
||||
volatility: Volatility::Immutable
|
||||
} if valid_types == vec![ConcreteDataType::json_datatype(), ConcreteDataType::string_datatype()]
|
||||
} if valid_types ==
|
||||
vec![
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::json_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
]),
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::null_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
]),
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::json_datatype(),
|
||||
ConcreteDataType::null_datatype(),
|
||||
]),
|
||||
TypeSignature::Exact(vec![
|
||||
ConcreteDataType::null_datatype(),
|
||||
ConcreteDataType::null_datatype(),
|
||||
]),
|
||||
],
|
||||
));
|
||||
|
||||
let json_strings = [
|
||||
@@ -143,9 +182,15 @@ mod tests {
|
||||
r#"{"a": 4, "b": {"c": 6}, "c": 6}"#,
|
||||
r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
|
||||
r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
|
||||
r#"[1, 2, 3]"#,
|
||||
r#"null"#,
|
||||
r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
|
||||
r#"null"#,
|
||||
];
|
||||
let paths = vec!["$.a.b.c", "$.b", "$.c.a", ".d"];
|
||||
let results = [false, true, true, false];
|
||||
let paths = vec![
|
||||
"$.a.b.c", "$.b", "$.c.a", ".d", "$[0]", "$.a", "null", "null",
|
||||
];
|
||||
let expected = [false, true, true, false, true, false, false, false];
|
||||
|
||||
let jsonbs = json_strings
|
||||
.iter()
|
||||
@@ -162,11 +207,44 @@ mod tests {
|
||||
.eval(FunctionContext::default(), &args)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(4, vector.len());
|
||||
for (i, gt) in results.iter().enumerate() {
|
||||
// Test for non-nulls.
|
||||
assert_eq!(8, vector.len());
|
||||
for (i, real) in expected.iter().enumerate() {
|
||||
let result = vector.get_ref(i);
|
||||
let result = result.as_boolean().unwrap().unwrap();
|
||||
assert_eq!(*gt, result);
|
||||
assert!(!result.is_null());
|
||||
let val = result.as_boolean().unwrap().unwrap();
|
||||
assert_eq!(val, *real);
|
||||
}
|
||||
|
||||
// Test for path error.
|
||||
let json_bytes = jsonb::parse_value("{}".as_bytes()).unwrap().to_vec();
|
||||
let json = BinaryVector::from_vec(vec![json_bytes]);
|
||||
let illegal_path = StringVector::from_vec(vec!["$..a"]);
|
||||
|
||||
let args: Vec<VectorRef> = vec![Arc::new(json), Arc::new(illegal_path)];
|
||||
let err = json_path_exists.eval(FunctionContext::default(), &args);
|
||||
assert!(err.is_err());
|
||||
|
||||
// Test for nulls.
|
||||
let json_bytes = jsonb::parse_value("{}".as_bytes()).unwrap().to_vec();
|
||||
let json = BinaryVector::from_vec(vec![json_bytes]);
|
||||
let null_json = NullVector::new(1);
|
||||
|
||||
let path = StringVector::from_vec(vec!["$.a"]);
|
||||
let null_path = NullVector::new(1);
|
||||
|
||||
let args: Vec<VectorRef> = vec![Arc::new(null_json), Arc::new(path)];
|
||||
let result1 = json_path_exists
|
||||
.eval(FunctionContext::default(), &args)
|
||||
.unwrap();
|
||||
let args: Vec<VectorRef> = vec![Arc::new(json), Arc::new(null_path)];
|
||||
let result2 = json_path_exists
|
||||
.eval(FunctionContext::default(), &args)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(result1.len(), 1);
|
||||
assert!(result1.get_ref(0).is_null());
|
||||
assert_eq!(result2.len(), 1);
|
||||
assert!(result2.get_ref(0).is_null());
|
||||
}
|
||||
}
|
||||
|
||||
202
src/common/function/src/scalars/json/json_path_match.rs
Normal file
202
src/common/function/src/scalars/json/json_path_match.rs
Normal file
@@ -0,0 +1,202 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::{self, Display};
|
||||
|
||||
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
|
||||
use common_query::prelude::Signature;
|
||||
use datafusion::logical_expr::Volatility;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::prelude::VectorRef;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::vectors::{BooleanVectorBuilder, MutableVector};
|
||||
use snafu::ensure;
|
||||
|
||||
use crate::function::{Function, FunctionContext};
|
||||
|
||||
/// Check if the given JSON data match the given JSON path's predicate.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct JsonPathMatchFunction;
|
||||
|
||||
const NAME: &str = "json_path_match";
|
||||
|
||||
impl Function for JsonPathMatchFunction {
|
||||
fn name(&self) -> &str {
|
||||
NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::boolean_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::exact(
|
||||
vec![
|
||||
ConcreteDataType::json_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
],
|
||||
Volatility::Immutable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure!(
|
||||
columns.len() == 2,
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the args is not correct, expect exactly two, have: {}",
|
||||
columns.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
let jsons = &columns[0];
|
||||
let paths = &columns[1];
|
||||
|
||||
let size = jsons.len();
|
||||
let mut results = BooleanVectorBuilder::with_capacity(size);
|
||||
|
||||
for i in 0..size {
|
||||
let json = jsons.get_ref(i);
|
||||
let path = paths.get_ref(i);
|
||||
|
||||
match json.data_type() {
|
||||
// JSON data type uses binary vector
|
||||
ConcreteDataType::Binary(_) => {
|
||||
let json = json.as_binary();
|
||||
let path = path.as_string();
|
||||
let result = match (json, path) {
|
||||
(Ok(Some(json)), Ok(Some(path))) => {
|
||||
if !jsonb::is_null(json) {
|
||||
let json_path = jsonb::jsonpath::parse_json_path(path.as_bytes());
|
||||
match json_path {
|
||||
Ok(json_path) => jsonb::path_match(json, json_path).ok(),
|
||||
Err(_) => None,
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
_ => {
|
||||
return UnsupportedInputDataTypeSnafu {
|
||||
function: NAME,
|
||||
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(results.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for JsonPathMatchFunction {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "JSON_PATH_MATCH")
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::prelude::TypeSignature;
|
||||
use datatypes::vectors::{BinaryVector, StringVector};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_json_path_match_function() {
|
||||
let json_path_match = JsonPathMatchFunction;
|
||||
|
||||
assert_eq!("json_path_match", json_path_match.name());
|
||||
assert_eq!(
|
||||
ConcreteDataType::boolean_datatype(),
|
||||
json_path_match
|
||||
.return_type(&[ConcreteDataType::json_datatype()])
|
||||
.unwrap()
|
||||
);
|
||||
|
||||
assert!(matches!(json_path_match.signature(),
|
||||
Signature {
|
||||
type_signature: TypeSignature::Exact(valid_types),
|
||||
volatility: Volatility::Immutable
|
||||
} if valid_types == vec![ConcreteDataType::json_datatype(), ConcreteDataType::string_datatype()],
|
||||
));
|
||||
|
||||
let json_strings = [
|
||||
Some(r#"{"a": {"b": 2}, "b": 2, "c": 3}"#.to_string()),
|
||||
Some(r#"{"a": 1, "b": [1,2,3]}"#.to_string()),
|
||||
Some(r#"{"a": 1 ,"b": [1,2,3]}"#.to_string()),
|
||||
Some(r#"[1,2,3]"#.to_string()),
|
||||
Some(r#"{"a":1,"b":[1,2,3]}"#.to_string()),
|
||||
Some(r#"null"#.to_string()),
|
||||
Some(r#"null"#.to_string()),
|
||||
];
|
||||
|
||||
let paths = vec![
|
||||
Some("$.a.b == 2".to_string()),
|
||||
Some("$.b[1 to last] >= 2".to_string()),
|
||||
Some("$.c > 0".to_string()),
|
||||
Some("$[0 to last] > 0".to_string()),
|
||||
Some(r#"null"#.to_string()),
|
||||
Some("$.c > 0".to_string()),
|
||||
Some(r#"null"#.to_string()),
|
||||
];
|
||||
|
||||
let results = [
|
||||
Some(true),
|
||||
Some(true),
|
||||
Some(false),
|
||||
Some(true),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
];
|
||||
|
||||
let jsonbs = json_strings
|
||||
.into_iter()
|
||||
.map(|s| s.map(|json| jsonb::parse_value(json.as_bytes()).unwrap().to_vec()))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let json_vector = BinaryVector::from(jsonbs);
|
||||
let path_vector = StringVector::from(paths);
|
||||
let args: Vec<VectorRef> = vec![Arc::new(json_vector), Arc::new(path_vector)];
|
||||
let vector = json_path_match
|
||||
.eval(FunctionContext::default(), &args)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(7, vector.len());
|
||||
for (i, expected) in results.iter().enumerate() {
|
||||
let result = vector.get_ref(i);
|
||||
|
||||
match expected {
|
||||
Some(expected_value) => {
|
||||
assert!(!result.is_null());
|
||||
let result_value = result.as_boolean().unwrap().unwrap();
|
||||
assert_eq!(*expected_value, result_value);
|
||||
}
|
||||
None => {
|
||||
assert!(result.is_null());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -22,8 +22,12 @@ use datafusion::arrow::compute::kernels::cmp::gt;
|
||||
use datatypes::arrow::array::AsArray;
|
||||
use datatypes::arrow::compute::cast;
|
||||
use datatypes::arrow::compute::kernels::zip;
|
||||
use datatypes::arrow::datatypes::{DataType as ArrowDataType, Date32Type};
|
||||
use datatypes::arrow::datatypes::{
|
||||
DataType as ArrowDataType, Date32Type, Date64Type, TimestampMicrosecondType,
|
||||
TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
|
||||
};
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::types::TimestampType;
|
||||
use datatypes::vectors::{Helper, VectorRef};
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
@@ -34,13 +38,47 @@ pub struct GreatestFunction;
|
||||
|
||||
const NAME: &str = "greatest";
|
||||
|
||||
macro_rules! gt_time_types {
|
||||
($ty: ident, $columns:expr) => {{
|
||||
let column1 = $columns[0].to_arrow_array();
|
||||
let column2 = $columns[1].to_arrow_array();
|
||||
|
||||
let column1 = column1.as_primitive::<$ty>();
|
||||
let column2 = column2.as_primitive::<$ty>();
|
||||
let boolean_array = gt(&column1, &column2).context(ArrowComputeSnafu)?;
|
||||
|
||||
let result = zip::zip(&boolean_array, &column1, &column2).context(ArrowComputeSnafu)?;
|
||||
Helper::try_into_vector(&result).context(error::FromArrowArraySnafu)
|
||||
}};
|
||||
}
|
||||
|
||||
impl Function for GreatestFunction {
|
||||
fn name(&self) -> &str {
|
||||
NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::date_datatype())
|
||||
fn return_type(&self, input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
ensure!(
|
||||
input_types.len() == 2,
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the args is not correct, expect exactly two, have: {}",
|
||||
input_types.len()
|
||||
)
|
||||
}
|
||||
);
|
||||
|
||||
match &input_types[0] {
|
||||
ConcreteDataType::String(_) => Ok(ConcreteDataType::datetime_datatype()),
|
||||
ConcreteDataType::Date(_) => Ok(ConcreteDataType::date_datatype()),
|
||||
ConcreteDataType::DateTime(_) => Ok(ConcreteDataType::datetime_datatype()),
|
||||
ConcreteDataType::Timestamp(ts_type) => Ok(ConcreteDataType::Timestamp(*ts_type)),
|
||||
_ => UnsupportedInputDataTypeSnafu {
|
||||
function: NAME,
|
||||
datatypes: input_types,
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
@@ -49,6 +87,11 @@ impl Function for GreatestFunction {
|
||||
vec![
|
||||
ConcreteDataType::string_datatype(),
|
||||
ConcreteDataType::date_datatype(),
|
||||
ConcreteDataType::datetime_datatype(),
|
||||
ConcreteDataType::timestamp_nanosecond_datatype(),
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
ConcreteDataType::timestamp_second_datatype(),
|
||||
],
|
||||
Volatility::Immutable,
|
||||
)
|
||||
@@ -66,27 +109,32 @@ impl Function for GreatestFunction {
|
||||
);
|
||||
match columns[0].data_type() {
|
||||
ConcreteDataType::String(_) => {
|
||||
let column1 = cast(&columns[0].to_arrow_array(), &ArrowDataType::Date32)
|
||||
// Treats string as `DateTime` type.
|
||||
let column1 = cast(&columns[0].to_arrow_array(), &ArrowDataType::Date64)
|
||||
.context(ArrowComputeSnafu)?;
|
||||
let column1 = column1.as_primitive::<Date32Type>();
|
||||
let column2 = cast(&columns[1].to_arrow_array(), &ArrowDataType::Date32)
|
||||
let column1 = column1.as_primitive::<Date64Type>();
|
||||
let column2 = cast(&columns[1].to_arrow_array(), &ArrowDataType::Date64)
|
||||
.context(ArrowComputeSnafu)?;
|
||||
let column2 = column2.as_primitive::<Date32Type>();
|
||||
let boolean_array = gt(&column1, &column2).context(ArrowComputeSnafu)?;
|
||||
let result =
|
||||
zip::zip(&boolean_array, &column1, &column2).context(ArrowComputeSnafu)?;
|
||||
Ok(Helper::try_into_vector(&result).context(error::FromArrowArraySnafu)?)
|
||||
}
|
||||
ConcreteDataType::Date(_) => {
|
||||
let column1 = columns[0].to_arrow_array();
|
||||
let column1 = column1.as_primitive::<Date32Type>();
|
||||
let column2 = columns[1].to_arrow_array();
|
||||
let column2 = column2.as_primitive::<Date32Type>();
|
||||
let column2 = column2.as_primitive::<Date64Type>();
|
||||
let boolean_array = gt(&column1, &column2).context(ArrowComputeSnafu)?;
|
||||
let result =
|
||||
zip::zip(&boolean_array, &column1, &column2).context(ArrowComputeSnafu)?;
|
||||
Ok(Helper::try_into_vector(&result).context(error::FromArrowArraySnafu)?)
|
||||
}
|
||||
ConcreteDataType::Date(_) => gt_time_types!(Date32Type, columns),
|
||||
ConcreteDataType::DateTime(_) => gt_time_types!(Date64Type, columns),
|
||||
ConcreteDataType::Timestamp(ts_type) => match ts_type {
|
||||
TimestampType::Second(_) => gt_time_types!(TimestampSecondType, columns),
|
||||
TimestampType::Millisecond(_) => {
|
||||
gt_time_types!(TimestampMillisecondType, columns)
|
||||
}
|
||||
TimestampType::Microsecond(_) => {
|
||||
gt_time_types!(TimestampMicrosecondType, columns)
|
||||
}
|
||||
TimestampType::Nanosecond(_) => {
|
||||
gt_time_types!(TimestampNanosecondType, columns)
|
||||
}
|
||||
},
|
||||
_ => UnsupportedInputDataTypeSnafu {
|
||||
function: NAME,
|
||||
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
||||
@@ -106,19 +154,31 @@ impl fmt::Display for GreatestFunction {
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_time::Date;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::types::DateType;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::{Date, DateTime, Timestamp};
|
||||
use datatypes::types::{
|
||||
DateTimeType, DateType, TimestampMicrosecondType, TimestampMillisecondType,
|
||||
TimestampNanosecondType, TimestampSecondType,
|
||||
};
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{DateVector, StringVector, Vector};
|
||||
use datatypes::vectors::{
|
||||
DateTimeVector, DateVector, StringVector, TimestampMicrosecondVector,
|
||||
TimestampMillisecondVector, TimestampNanosecondVector, TimestampSecondVector, Vector,
|
||||
};
|
||||
use paste::paste;
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
fn test_greatest_takes_string_vector() {
|
||||
let function = GreatestFunction;
|
||||
assert_eq!(
|
||||
function.return_type(&[]).unwrap(),
|
||||
ConcreteDataType::Date(DateType)
|
||||
function
|
||||
.return_type(&[
|
||||
ConcreteDataType::string_datatype(),
|
||||
ConcreteDataType::string_datatype()
|
||||
])
|
||||
.unwrap(),
|
||||
ConcreteDataType::DateTime(DateTimeType)
|
||||
);
|
||||
let columns = vec![
|
||||
Arc::new(StringVector::from(vec![
|
||||
@@ -132,15 +192,15 @@ mod tests {
|
||||
];
|
||||
|
||||
let result = function.eval(FunctionContext::default(), &columns).unwrap();
|
||||
let result = result.as_any().downcast_ref::<DateVector>().unwrap();
|
||||
let result = result.as_any().downcast_ref::<DateTimeVector>().unwrap();
|
||||
assert_eq!(result.len(), 2);
|
||||
assert_eq!(
|
||||
result.get(0),
|
||||
Value::Date(Date::from_str_utc("2001-02-01").unwrap())
|
||||
Value::DateTime(DateTime::from_str("2001-02-01 00:00:00", None).unwrap())
|
||||
);
|
||||
assert_eq!(
|
||||
result.get(1),
|
||||
Value::Date(Date::from_str_utc("2012-12-23").unwrap())
|
||||
Value::DateTime(DateTime::from_str("2012-12-23 00:00:00", None).unwrap())
|
||||
);
|
||||
}
|
||||
|
||||
@@ -148,9 +208,15 @@ mod tests {
|
||||
fn test_greatest_takes_date_vector() {
|
||||
let function = GreatestFunction;
|
||||
assert_eq!(
|
||||
function.return_type(&[]).unwrap(),
|
||||
function
|
||||
.return_type(&[
|
||||
ConcreteDataType::date_datatype(),
|
||||
ConcreteDataType::date_datatype()
|
||||
])
|
||||
.unwrap(),
|
||||
ConcreteDataType::Date(DateType)
|
||||
);
|
||||
|
||||
let columns = vec![
|
||||
Arc::new(DateVector::from_slice(vec![-1, 2])) as _,
|
||||
Arc::new(DateVector::from_slice(vec![0, 1])) as _,
|
||||
@@ -168,4 +234,81 @@ mod tests {
|
||||
Value::Date(Date::from_str_utc("1970-01-03").unwrap())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_greatest_takes_datetime_vector() {
|
||||
let function = GreatestFunction;
|
||||
assert_eq!(
|
||||
function
|
||||
.return_type(&[
|
||||
ConcreteDataType::datetime_datatype(),
|
||||
ConcreteDataType::datetime_datatype()
|
||||
])
|
||||
.unwrap(),
|
||||
ConcreteDataType::DateTime(DateTimeType)
|
||||
);
|
||||
|
||||
let columns = vec![
|
||||
Arc::new(DateTimeVector::from_slice(vec![-1, 2])) as _,
|
||||
Arc::new(DateTimeVector::from_slice(vec![0, 1])) as _,
|
||||
];
|
||||
|
||||
let result = function.eval(FunctionContext::default(), &columns).unwrap();
|
||||
let result = result.as_any().downcast_ref::<DateTimeVector>().unwrap();
|
||||
assert_eq!(result.len(), 2);
|
||||
assert_eq!(
|
||||
result.get(0),
|
||||
Value::DateTime(DateTime::from_str("1970-01-01 00:00:00", None).unwrap())
|
||||
);
|
||||
assert_eq!(
|
||||
result.get(1),
|
||||
Value::DateTime(DateTime::from_str("1970-01-01 00:00:00.002", None).unwrap())
|
||||
);
|
||||
}
|
||||
|
||||
macro_rules! test_timestamp {
|
||||
($type: expr,$unit: ident) => {
|
||||
paste! {
|
||||
#[test]
|
||||
fn [<test_greatest_takes_ $unit:lower _vector>]() {
|
||||
let function = GreatestFunction;
|
||||
assert_eq!(
|
||||
function.return_type(&[$type, $type]).unwrap(),
|
||||
ConcreteDataType::Timestamp(TimestampType::$unit([<Timestamp $unit Type>]))
|
||||
);
|
||||
|
||||
let columns = vec![
|
||||
Arc::new([<Timestamp $unit Vector>]::from_slice(vec![-1, 2])) as _,
|
||||
Arc::new([<Timestamp $unit Vector>]::from_slice(vec![0, 1])) as _,
|
||||
];
|
||||
|
||||
let result = function.eval(FunctionContext::default(), &columns).unwrap();
|
||||
let result = result.as_any().downcast_ref::<[<Timestamp $unit Vector>]>().unwrap();
|
||||
assert_eq!(result.len(), 2);
|
||||
assert_eq!(
|
||||
result.get(0),
|
||||
Value::Timestamp(Timestamp::new(0, TimeUnit::$unit))
|
||||
);
|
||||
assert_eq!(
|
||||
result.get(1),
|
||||
Value::Timestamp(Timestamp::new(2, TimeUnit::$unit))
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test_timestamp!(
|
||||
ConcreteDataType::timestamp_nanosecond_datatype(),
|
||||
Nanosecond
|
||||
);
|
||||
test_timestamp!(
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
Microsecond
|
||||
);
|
||||
test_timestamp!(
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
Millisecond
|
||||
);
|
||||
test_timestamp!(ConcreteDataType::timestamp_second_datatype(), Second);
|
||||
}
|
||||
|
||||
36
src/common/function/src/scalars/vector.rs
Normal file
36
src/common/function/src/scalars/vector.rs
Normal file
@@ -0,0 +1,36 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod convert;
|
||||
mod distance;
|
||||
pub(crate) mod impl_conv;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::function_registry::FunctionRegistry;
|
||||
|
||||
pub(crate) struct VectorFunction;
|
||||
|
||||
impl VectorFunction {
|
||||
pub fn register(registry: &FunctionRegistry) {
|
||||
// conversion
|
||||
registry.register(Arc::new(convert::ParseVectorFunction));
|
||||
registry.register(Arc::new(convert::VectorToStringFunction));
|
||||
|
||||
// distance
|
||||
registry.register(Arc::new(distance::CosDistanceFunction));
|
||||
registry.register(Arc::new(distance::DotProductFunction));
|
||||
registry.register(Arc::new(distance::L2SqDistanceFunction));
|
||||
}
|
||||
}
|
||||
@@ -12,27 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Internal metrics of the memtable.
|
||||
mod parse_vector;
|
||||
mod vector_to_string;
|
||||
|
||||
/// Metrics of writing the partition tree.
|
||||
pub struct WriteMetrics {
|
||||
/// Size allocated by keys.
|
||||
pub key_bytes: usize,
|
||||
/// Size allocated by values.
|
||||
pub value_bytes: usize,
|
||||
/// Minimum timestamp.
|
||||
pub min_ts: i64,
|
||||
/// Maximum timestamp
|
||||
pub max_ts: i64,
|
||||
}
|
||||
|
||||
impl Default for WriteMetrics {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
key_bytes: 0,
|
||||
value_bytes: 0,
|
||||
min_ts: i64::MAX,
|
||||
max_ts: i64::MIN,
|
||||
}
|
||||
}
|
||||
}
|
||||
pub use parse_vector::ParseVectorFunction;
|
||||
pub use vector_to_string::VectorToStringFunction;
|
||||
160
src/common/function/src/scalars/vector/convert/parse_vector.rs
Normal file
160
src/common/function/src/scalars/vector/convert/parse_vector.rs
Normal file
@@ -0,0 +1,160 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::Display;
|
||||
|
||||
use common_query::error::{InvalidFuncArgsSnafu, InvalidVectorStringSnafu, Result};
|
||||
use common_query::prelude::{Signature, Volatility};
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::types::parse_string_to_vector_type_value;
|
||||
use datatypes::vectors::{BinaryVectorBuilder, MutableVector, VectorRef};
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::function::{Function, FunctionContext};
|
||||
|
||||
const NAME: &str = "parse_vec";
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct ParseVectorFunction;
|
||||
|
||||
impl Function for ParseVectorFunction {
|
||||
fn name(&self) -> &str {
|
||||
NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::binary_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::exact(
|
||||
vec![ConcreteDataType::string_datatype()],
|
||||
Volatility::Immutable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure!(
|
||||
columns.len() == 1,
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the args is not correct, expect exactly one, have: {}",
|
||||
columns.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
|
||||
let column = &columns[0];
|
||||
let size = column.len();
|
||||
|
||||
let mut result = BinaryVectorBuilder::with_capacity(size);
|
||||
for i in 0..size {
|
||||
let value = column.get(i).as_string();
|
||||
if let Some(value) = value {
|
||||
let res = parse_string_to_vector_type_value(&value, None)
|
||||
.context(InvalidVectorStringSnafu { vec_str: &value })?;
|
||||
result.push(Some(&res));
|
||||
} else {
|
||||
result.push_null();
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for ParseVectorFunction {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", NAME.to_ascii_uppercase())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_base::bytes::Bytes;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::StringVector;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_vector() {
|
||||
let func = ParseVectorFunction;
|
||||
|
||||
let input = Arc::new(StringVector::from(vec![
|
||||
Some("[1.0,2.0,3.0]".to_string()),
|
||||
Some("[4.0,5.0,6.0]".to_string()),
|
||||
None,
|
||||
]));
|
||||
|
||||
let result = func.eval(FunctionContext::default(), &[input]).unwrap();
|
||||
|
||||
let result = result.as_ref();
|
||||
assert_eq!(result.len(), 3);
|
||||
assert_eq!(
|
||||
result.get(0),
|
||||
Value::Binary(Bytes::from(
|
||||
[1.0f32, 2.0, 3.0]
|
||||
.iter()
|
||||
.flat_map(|e| e.to_le_bytes())
|
||||
.collect::<Vec<u8>>()
|
||||
))
|
||||
);
|
||||
assert_eq!(
|
||||
result.get(1),
|
||||
Value::Binary(Bytes::from(
|
||||
[4.0f32, 5.0, 6.0]
|
||||
.iter()
|
||||
.flat_map(|e| e.to_le_bytes())
|
||||
.collect::<Vec<u8>>()
|
||||
))
|
||||
);
|
||||
assert!(result.get(2).is_null());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_vector_error() {
|
||||
let func = ParseVectorFunction;
|
||||
|
||||
let input = Arc::new(StringVector::from(vec![
|
||||
Some("[1.0,2.0,3.0]".to_string()),
|
||||
Some("[4.0,5.0,6.0]".to_string()),
|
||||
Some("[7.0,8.0,9.0".to_string()),
|
||||
]));
|
||||
|
||||
let result = func.eval(FunctionContext::default(), &[input]);
|
||||
assert!(result.is_err());
|
||||
|
||||
let input = Arc::new(StringVector::from(vec![
|
||||
Some("[1.0,2.0,3.0]".to_string()),
|
||||
Some("[4.0,5.0,6.0]".to_string()),
|
||||
Some("7.0,8.0,9.0]".to_string()),
|
||||
]));
|
||||
|
||||
let result = func.eval(FunctionContext::default(), &[input]);
|
||||
assert!(result.is_err());
|
||||
|
||||
let input = Arc::new(StringVector::from(vec![
|
||||
Some("[1.0,2.0,3.0]".to_string()),
|
||||
Some("[4.0,5.0,6.0]".to_string()),
|
||||
Some("[7.0,hello,9.0]".to_string()),
|
||||
]));
|
||||
|
||||
let result = func.eval(FunctionContext::default(), &[input]);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,139 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::Display;
|
||||
|
||||
use common_query::error::{InvalidFuncArgsSnafu, Result};
|
||||
use common_query::prelude::{Signature, Volatility};
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::types::vector_type_value_to_string;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{MutableVector, StringVectorBuilder, VectorRef};
|
||||
use snafu::ensure;
|
||||
|
||||
use crate::function::{Function, FunctionContext};
|
||||
|
||||
const NAME: &str = "vec_to_string";
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct VectorToStringFunction;
|
||||
|
||||
impl Function for VectorToStringFunction {
|
||||
fn name(&self) -> &str {
|
||||
NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::string_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::exact(
|
||||
vec![ConcreteDataType::binary_datatype()],
|
||||
Volatility::Immutable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure!(
|
||||
columns.len() == 1,
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the args is not correct, expect exactly one, have: {}",
|
||||
columns.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
|
||||
let column = &columns[0];
|
||||
let size = column.len();
|
||||
|
||||
let mut result = StringVectorBuilder::with_capacity(size);
|
||||
for i in 0..size {
|
||||
let value = column.get(i);
|
||||
match value {
|
||||
Value::Binary(bytes) => {
|
||||
let len = bytes.len();
|
||||
if len % std::mem::size_of::<f32>() != 0 {
|
||||
return InvalidFuncArgsSnafu {
|
||||
err_msg: format!("Invalid binary length of vector: {}", len),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
|
||||
let dim = len / std::mem::size_of::<f32>();
|
||||
// Safety: `dim` is calculated from the length of `bytes` and is guaranteed to be valid
|
||||
let res = vector_type_value_to_string(&bytes, dim as _).unwrap();
|
||||
result.push(Some(&res));
|
||||
}
|
||||
Value::Null => {
|
||||
result.push_null();
|
||||
}
|
||||
_ => {
|
||||
return InvalidFuncArgsSnafu {
|
||||
err_msg: format!("Invalid value type: {:?}", value.data_type()),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result.to_vector())
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for VectorToStringFunction {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", NAME.to_ascii_uppercase())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::BinaryVectorBuilder;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_vector_to_string() {
|
||||
let func = VectorToStringFunction;
|
||||
|
||||
let mut builder = BinaryVectorBuilder::with_capacity(3);
|
||||
builder.push(Some(
|
||||
[1.0f32, 2.0, 3.0]
|
||||
.iter()
|
||||
.flat_map(|e| e.to_le_bytes())
|
||||
.collect::<Vec<_>>()
|
||||
.as_slice(),
|
||||
));
|
||||
builder.push(Some(
|
||||
[4.0f32, 5.0, 6.0]
|
||||
.iter()
|
||||
.flat_map(|e| e.to_le_bytes())
|
||||
.collect::<Vec<_>>()
|
||||
.as_slice(),
|
||||
));
|
||||
builder.push_null();
|
||||
let vector = builder.to_vector();
|
||||
|
||||
let result = func.eval(FunctionContext::default(), &[vector]).unwrap();
|
||||
|
||||
assert_eq!(result.len(), 3);
|
||||
assert_eq!(result.get(0), Value::String("[1,2,3]".to_string().into()));
|
||||
assert_eq!(result.get(1), Value::String("[4,5,6]".to_string().into()));
|
||||
assert_eq!(result.get(2), Value::Null);
|
||||
}
|
||||
}
|
||||
366
src/common/function/src/scalars/vector/distance.rs
Normal file
366
src/common/function/src/scalars/vector/distance.rs
Normal file
@@ -0,0 +1,366 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod cos;
|
||||
mod dot;
|
||||
mod l2sq;
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::fmt::Display;
|
||||
|
||||
use common_query::error::{InvalidFuncArgsSnafu, Result};
|
||||
use common_query::prelude::Signature;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::scalars::ScalarVectorBuilder;
|
||||
use datatypes::vectors::{Float32VectorBuilder, MutableVector, VectorRef};
|
||||
use snafu::ensure;
|
||||
|
||||
use crate::function::{Function, FunctionContext};
|
||||
use crate::helper;
|
||||
use crate::scalars::vector::impl_conv::{as_veclit, as_veclit_if_const};
|
||||
|
||||
macro_rules! define_distance_function {
|
||||
($StructName:ident, $display_name:expr, $similarity_method:path) => {
|
||||
|
||||
/// A function calculates the distance between two vectors.
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct $StructName;
|
||||
|
||||
impl Function for $StructName {
|
||||
fn name(&self) -> &str {
|
||||
$display_name
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::float32_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
helper::one_of_sigs2(
|
||||
vec![
|
||||
ConcreteDataType::string_datatype(),
|
||||
ConcreteDataType::binary_datatype(),
|
||||
],
|
||||
vec![
|
||||
ConcreteDataType::string_datatype(),
|
||||
ConcreteDataType::binary_datatype(),
|
||||
],
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure!(
|
||||
columns.len() == 2,
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the args is not correct, expect exactly two, have: {}",
|
||||
columns.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
let arg0 = &columns[0];
|
||||
let arg1 = &columns[1];
|
||||
|
||||
let size = arg0.len();
|
||||
let mut result = Float32VectorBuilder::with_capacity(size);
|
||||
if size == 0 {
|
||||
return Ok(result.to_vector());
|
||||
}
|
||||
|
||||
let arg0_const = as_veclit_if_const(arg0)?;
|
||||
let arg1_const = as_veclit_if_const(arg1)?;
|
||||
|
||||
for i in 0..size {
|
||||
let vec0 = match arg0_const.as_ref() {
|
||||
Some(a) => Some(Cow::Borrowed(a.as_ref())),
|
||||
None => as_veclit(arg0.get_ref(i))?,
|
||||
};
|
||||
let vec1 = match arg1_const.as_ref() {
|
||||
Some(b) => Some(Cow::Borrowed(b.as_ref())),
|
||||
None => as_veclit(arg1.get_ref(i))?,
|
||||
};
|
||||
|
||||
if let (Some(vec0), Some(vec1)) = (vec0, vec1) {
|
||||
ensure!(
|
||||
vec0.len() == vec1.len(),
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the vectors must match to calculate distance, have: {} vs {}",
|
||||
vec0.len(),
|
||||
vec1.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
|
||||
// Checked if the length of the vectors match
|
||||
let d = $similarity_method(vec0.as_ref(), vec1.as_ref());
|
||||
result.push(Some(d));
|
||||
} else {
|
||||
result.push_null();
|
||||
}
|
||||
}
|
||||
|
||||
return Ok(result.to_vector());
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for $StructName {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", $display_name.to_ascii_uppercase())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
define_distance_function!(CosDistanceFunction, "vec_cos_distance", cos::cos);
|
||||
define_distance_function!(L2SqDistanceFunction, "vec_l2sq_distance", l2sq::l2sq);
|
||||
define_distance_function!(DotProductFunction, "vec_dot_product", dot::dot);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use datatypes::vectors::{BinaryVector, ConstantVector, StringVector};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_distance_string_string() {
|
||||
let funcs = [
|
||||
Box::new(CosDistanceFunction {}) as Box<dyn Function>,
|
||||
Box::new(L2SqDistanceFunction {}) as Box<dyn Function>,
|
||||
Box::new(DotProductFunction {}) as Box<dyn Function>,
|
||||
];
|
||||
|
||||
for func in funcs {
|
||||
let vec1 = Arc::new(StringVector::from(vec![
|
||||
Some("[0.0, 1.0]"),
|
||||
Some("[1.0, 0.0]"),
|
||||
None,
|
||||
Some("[1.0, 0.0]"),
|
||||
])) as VectorRef;
|
||||
let vec2 = Arc::new(StringVector::from(vec![
|
||||
Some("[0.0, 1.0]"),
|
||||
Some("[0.0, 1.0]"),
|
||||
Some("[0.0, 1.0]"),
|
||||
None,
|
||||
])) as VectorRef;
|
||||
|
||||
let result = func
|
||||
.eval(FunctionContext::default(), &[vec1.clone(), vec2.clone()])
|
||||
.unwrap();
|
||||
|
||||
assert!(!result.get(0).is_null());
|
||||
assert!(!result.get(1).is_null());
|
||||
assert!(result.get(2).is_null());
|
||||
assert!(result.get(3).is_null());
|
||||
|
||||
let result = func
|
||||
.eval(FunctionContext::default(), &[vec2, vec1])
|
||||
.unwrap();
|
||||
|
||||
assert!(!result.get(0).is_null());
|
||||
assert!(!result.get(1).is_null());
|
||||
assert!(result.get(2).is_null());
|
||||
assert!(result.get(3).is_null());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_distance_binary_binary() {
|
||||
let funcs = [
|
||||
Box::new(CosDistanceFunction {}) as Box<dyn Function>,
|
||||
Box::new(L2SqDistanceFunction {}) as Box<dyn Function>,
|
||||
Box::new(DotProductFunction {}) as Box<dyn Function>,
|
||||
];
|
||||
|
||||
for func in funcs {
|
||||
let vec1 = Arc::new(BinaryVector::from(vec![
|
||||
Some(vec![0, 0, 0, 0, 0, 0, 128, 63]),
|
||||
Some(vec![0, 0, 128, 63, 0, 0, 0, 0]),
|
||||
None,
|
||||
Some(vec![0, 0, 128, 63, 0, 0, 0, 0]),
|
||||
])) as VectorRef;
|
||||
let vec2 = Arc::new(BinaryVector::from(vec![
|
||||
// [0.0, 1.0]
|
||||
Some(vec![0, 0, 0, 0, 0, 0, 128, 63]),
|
||||
Some(vec![0, 0, 0, 0, 0, 0, 128, 63]),
|
||||
Some(vec![0, 0, 0, 0, 0, 0, 128, 63]),
|
||||
None,
|
||||
])) as VectorRef;
|
||||
|
||||
let result = func
|
||||
.eval(FunctionContext::default(), &[vec1.clone(), vec2.clone()])
|
||||
.unwrap();
|
||||
|
||||
assert!(!result.get(0).is_null());
|
||||
assert!(!result.get(1).is_null());
|
||||
assert!(result.get(2).is_null());
|
||||
assert!(result.get(3).is_null());
|
||||
|
||||
let result = func
|
||||
.eval(FunctionContext::default(), &[vec2, vec1])
|
||||
.unwrap();
|
||||
|
||||
assert!(!result.get(0).is_null());
|
||||
assert!(!result.get(1).is_null());
|
||||
assert!(result.get(2).is_null());
|
||||
assert!(result.get(3).is_null());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_distance_string_binary() {
|
||||
let funcs = [
|
||||
Box::new(CosDistanceFunction {}) as Box<dyn Function>,
|
||||
Box::new(L2SqDistanceFunction {}) as Box<dyn Function>,
|
||||
Box::new(DotProductFunction {}) as Box<dyn Function>,
|
||||
];
|
||||
|
||||
for func in funcs {
|
||||
let vec1 = Arc::new(StringVector::from(vec![
|
||||
Some("[0.0, 1.0]"),
|
||||
Some("[1.0, 0.0]"),
|
||||
None,
|
||||
Some("[1.0, 0.0]"),
|
||||
])) as VectorRef;
|
||||
let vec2 = Arc::new(BinaryVector::from(vec![
|
||||
// [0.0, 1.0]
|
||||
Some(vec![0, 0, 0, 0, 0, 0, 128, 63]),
|
||||
Some(vec![0, 0, 0, 0, 0, 0, 128, 63]),
|
||||
Some(vec![0, 0, 0, 0, 0, 0, 128, 63]),
|
||||
None,
|
||||
])) as VectorRef;
|
||||
|
||||
let result = func
|
||||
.eval(FunctionContext::default(), &[vec1.clone(), vec2.clone()])
|
||||
.unwrap();
|
||||
|
||||
assert!(!result.get(0).is_null());
|
||||
assert!(!result.get(1).is_null());
|
||||
assert!(result.get(2).is_null());
|
||||
assert!(result.get(3).is_null());
|
||||
|
||||
let result = func
|
||||
.eval(FunctionContext::default(), &[vec2, vec1])
|
||||
.unwrap();
|
||||
|
||||
assert!(!result.get(0).is_null());
|
||||
assert!(!result.get(1).is_null());
|
||||
assert!(result.get(2).is_null());
|
||||
assert!(result.get(3).is_null());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_distance_const_string() {
|
||||
let funcs = [
|
||||
Box::new(CosDistanceFunction {}) as Box<dyn Function>,
|
||||
Box::new(L2SqDistanceFunction {}) as Box<dyn Function>,
|
||||
Box::new(DotProductFunction {}) as Box<dyn Function>,
|
||||
];
|
||||
|
||||
for func in funcs {
|
||||
let const_str = Arc::new(ConstantVector::new(
|
||||
Arc::new(StringVector::from(vec!["[0.0, 1.0]"])),
|
||||
4,
|
||||
));
|
||||
|
||||
let vec1 = Arc::new(StringVector::from(vec![
|
||||
Some("[0.0, 1.0]"),
|
||||
Some("[1.0, 0.0]"),
|
||||
None,
|
||||
Some("[1.0, 0.0]"),
|
||||
])) as VectorRef;
|
||||
let vec2 = Arc::new(BinaryVector::from(vec![
|
||||
// [0.0, 1.0]
|
||||
Some(vec![0, 0, 0, 0, 0, 0, 128, 63]),
|
||||
Some(vec![0, 0, 0, 0, 0, 0, 128, 63]),
|
||||
Some(vec![0, 0, 0, 0, 0, 0, 128, 63]),
|
||||
None,
|
||||
])) as VectorRef;
|
||||
|
||||
let result = func
|
||||
.eval(
|
||||
FunctionContext::default(),
|
||||
&[const_str.clone(), vec1.clone()],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert!(!result.get(0).is_null());
|
||||
assert!(!result.get(1).is_null());
|
||||
assert!(result.get(2).is_null());
|
||||
assert!(!result.get(3).is_null());
|
||||
|
||||
let result = func
|
||||
.eval(
|
||||
FunctionContext::default(),
|
||||
&[vec1.clone(), const_str.clone()],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert!(!result.get(0).is_null());
|
||||
assert!(!result.get(1).is_null());
|
||||
assert!(result.get(2).is_null());
|
||||
assert!(!result.get(3).is_null());
|
||||
|
||||
let result = func
|
||||
.eval(
|
||||
FunctionContext::default(),
|
||||
&[const_str.clone(), vec2.clone()],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert!(!result.get(0).is_null());
|
||||
assert!(!result.get(1).is_null());
|
||||
assert!(!result.get(2).is_null());
|
||||
assert!(result.get(3).is_null());
|
||||
|
||||
let result = func
|
||||
.eval(
|
||||
FunctionContext::default(),
|
||||
&[vec2.clone(), const_str.clone()],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert!(!result.get(0).is_null());
|
||||
assert!(!result.get(1).is_null());
|
||||
assert!(!result.get(2).is_null());
|
||||
assert!(result.get(3).is_null());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_vector_length() {
|
||||
let funcs = [
|
||||
Box::new(CosDistanceFunction {}) as Box<dyn Function>,
|
||||
Box::new(L2SqDistanceFunction {}) as Box<dyn Function>,
|
||||
Box::new(DotProductFunction {}) as Box<dyn Function>,
|
||||
];
|
||||
|
||||
for func in funcs {
|
||||
let vec1 = Arc::new(StringVector::from(vec!["[1.0]"])) as VectorRef;
|
||||
let vec2 = Arc::new(StringVector::from(vec!["[1.0, 1.0]"])) as VectorRef;
|
||||
let result = func.eval(FunctionContext::default(), &[vec1, vec2]);
|
||||
assert!(result.is_err());
|
||||
|
||||
let vec1 = Arc::new(BinaryVector::from(vec![vec![0, 0, 128, 63]])) as VectorRef;
|
||||
let vec2 =
|
||||
Arc::new(BinaryVector::from(vec![vec![0, 0, 128, 63, 0, 0, 0, 64]])) as VectorRef;
|
||||
let result = func.eval(FunctionContext::default(), &[vec1, vec2]);
|
||||
assert!(result.is_err());
|
||||
}
|
||||
}
|
||||
}
|
||||
87
src/common/function/src/scalars/vector/distance/cos.rs
Normal file
87
src/common/function/src/scalars/vector/distance/cos.rs
Normal file
@@ -0,0 +1,87 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use nalgebra::DVectorView;
|
||||
|
||||
/// Calculates the cos distance between two vectors.
|
||||
///
|
||||
/// **Note:** Must ensure that the length of the two vectors are the same.
|
||||
pub fn cos(lhs: &[f32], rhs: &[f32]) -> f32 {
|
||||
let lhs_vec = DVectorView::from_slice(lhs, lhs.len());
|
||||
let rhs_vec = DVectorView::from_slice(rhs, rhs.len());
|
||||
|
||||
let dot_product = lhs_vec.dot(&rhs_vec);
|
||||
let lhs_norm = lhs_vec.norm();
|
||||
let rhs_norm = rhs_vec.norm();
|
||||
if dot_product.abs() < f32::EPSILON
|
||||
|| lhs_norm.abs() < f32::EPSILON
|
||||
|| rhs_norm.abs() < f32::EPSILON
|
||||
{
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
let cos_similar = dot_product / (lhs_norm * rhs_norm);
|
||||
let res = 1.0 - cos_similar;
|
||||
if res.abs() < f32::EPSILON {
|
||||
0.0
|
||||
} else {
|
||||
res
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use approx::assert_relative_eq;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_cos_scalar() {
|
||||
let lhs = vec![1.0, 2.0, 3.0];
|
||||
let rhs = vec![1.0, 2.0, 3.0];
|
||||
assert_relative_eq!(cos(&lhs, &rhs), 0.0, epsilon = 1e-2);
|
||||
|
||||
let lhs = vec![1.0, 2.0, 3.0];
|
||||
let rhs = vec![4.0, 5.0, 6.0];
|
||||
assert_relative_eq!(cos(&lhs, &rhs), 0.025, epsilon = 1e-2);
|
||||
|
||||
let lhs = vec![1.0, 2.0, 3.0];
|
||||
let rhs = vec![7.0, 8.0, 9.0];
|
||||
assert_relative_eq!(cos(&lhs, &rhs), 0.04, epsilon = 1e-2);
|
||||
|
||||
let lhs = vec![0.0, 0.0, 0.0];
|
||||
let rhs = vec![1.0, 2.0, 3.0];
|
||||
assert_relative_eq!(cos(&lhs, &rhs), 1.0, epsilon = 1e-2);
|
||||
|
||||
let lhs = vec![0.0, 0.0, 0.0];
|
||||
let rhs = vec![4.0, 5.0, 6.0];
|
||||
assert_relative_eq!(cos(&lhs, &rhs), 1.0, epsilon = 1e-2);
|
||||
|
||||
let lhs = vec![0.0, 0.0, 0.0];
|
||||
let rhs = vec![7.0, 8.0, 9.0];
|
||||
assert_relative_eq!(cos(&lhs, &rhs), 1.0, epsilon = 1e-2);
|
||||
|
||||
let lhs = vec![7.0, 8.0, 9.0];
|
||||
let rhs = vec![1.0, 2.0, 3.0];
|
||||
assert_relative_eq!(cos(&lhs, &rhs), 0.04, epsilon = 1e-2);
|
||||
|
||||
let lhs = vec![7.0, 8.0, 9.0];
|
||||
let rhs = vec![4.0, 5.0, 6.0];
|
||||
assert_relative_eq!(cos(&lhs, &rhs), 0.0, epsilon = 1e-2);
|
||||
|
||||
let lhs = vec![7.0, 8.0, 9.0];
|
||||
let rhs = vec![7.0, 8.0, 9.0];
|
||||
assert_relative_eq!(cos(&lhs, &rhs), 0.0, epsilon = 1e-2);
|
||||
}
|
||||
}
|
||||
71
src/common/function/src/scalars/vector/distance/dot.rs
Normal file
71
src/common/function/src/scalars/vector/distance/dot.rs
Normal file
@@ -0,0 +1,71 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use nalgebra::DVectorView;
|
||||
|
||||
/// Calculates the dot product between two vectors.
|
||||
///
|
||||
/// **Note:** Must ensure that the length of the two vectors are the same.
|
||||
pub fn dot(lhs: &[f32], rhs: &[f32]) -> f32 {
|
||||
let lhs = DVectorView::from_slice(lhs, lhs.len());
|
||||
let rhs = DVectorView::from_slice(rhs, rhs.len());
|
||||
|
||||
lhs.dot(&rhs)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use approx::assert_relative_eq;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_dot_scalar() {
|
||||
let lhs = vec![1.0, 2.0, 3.0];
|
||||
let rhs = vec![1.0, 2.0, 3.0];
|
||||
assert_relative_eq!(dot(&lhs, &rhs), 14.0, epsilon = 1e-2);
|
||||
|
||||
let lhs = vec![1.0, 2.0, 3.0];
|
||||
let rhs = vec![4.0, 5.0, 6.0];
|
||||
assert_relative_eq!(dot(&lhs, &rhs), 32.0, epsilon = 1e-2);
|
||||
|
||||
let lhs = vec![1.0, 2.0, 3.0];
|
||||
let rhs = vec![7.0, 8.0, 9.0];
|
||||
assert_relative_eq!(dot(&lhs, &rhs), 50.0, epsilon = 1e-2);
|
||||
|
||||
let lhs = vec![0.0, 0.0, 0.0];
|
||||
let rhs = vec![1.0, 2.0, 3.0];
|
||||
assert_relative_eq!(dot(&lhs, &rhs), 0.0, epsilon = 1e-2);
|
||||
|
||||
let lhs = vec![0.0, 0.0, 0.0];
|
||||
let rhs = vec![4.0, 5.0, 6.0];
|
||||
assert_relative_eq!(dot(&lhs, &rhs), 0.0, epsilon = 1e-2);
|
||||
|
||||
let lhs = vec![0.0, 0.0, 0.0];
|
||||
let rhs = vec![7.0, 8.0, 9.0];
|
||||
assert_relative_eq!(dot(&lhs, &rhs), 0.0, epsilon = 1e-2);
|
||||
|
||||
let lhs = vec![7.0, 8.0, 9.0];
|
||||
let rhs = vec![1.0, 2.0, 3.0];
|
||||
assert_relative_eq!(dot(&lhs, &rhs), 50.0, epsilon = 1e-2);
|
||||
|
||||
let lhs = vec![7.0, 8.0, 9.0];
|
||||
let rhs = vec![4.0, 5.0, 6.0];
|
||||
assert_relative_eq!(dot(&lhs, &rhs), 122.0, epsilon = 1e-2);
|
||||
|
||||
let lhs = vec![7.0, 8.0, 9.0];
|
||||
let rhs = vec![7.0, 8.0, 9.0];
|
||||
assert_relative_eq!(dot(&lhs, &rhs), 194.0, epsilon = 1e-2);
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user