mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-06 13:22:57 +00:00
Compare commits
38 Commits
v0.7.0
...
fix-proto-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
038bc4fe6e | ||
|
|
6d07c422d8 | ||
|
|
6c14ece23f | ||
|
|
89c51d9b87 | ||
|
|
e4333969b4 | ||
|
|
b55905cf66 | ||
|
|
fb4da05f25 | ||
|
|
904484b525 | ||
|
|
cafb4708ce | ||
|
|
7c895e2605 | ||
|
|
9afe327bca | ||
|
|
58bd065c6b | ||
|
|
9aa8f756ab | ||
|
|
7639c227ca | ||
|
|
1255c1fc9e | ||
|
|
06dcd0f6ed | ||
|
|
0a4444a43a | ||
|
|
b7ac8d6aa8 | ||
|
|
e767f37241 | ||
|
|
da098f5568 | ||
|
|
aa953dcc34 | ||
|
|
aa125a50f9 | ||
|
|
d8939eb891 | ||
|
|
0bb949787c | ||
|
|
8c37c3fc0f | ||
|
|
21ff3620be | ||
|
|
aeca0d8e8a | ||
|
|
a309cd018a | ||
|
|
3ee53360ee | ||
|
|
352bd7b6fd | ||
|
|
3f3ef2e7af | ||
|
|
a218f12bd9 | ||
|
|
c884c56151 | ||
|
|
9ec288cab9 | ||
|
|
1f1491e429 | ||
|
|
c52bc613e0 | ||
|
|
a9d42f7b87 | ||
|
|
86ce2d8713 |
10
.editorconfig
Normal file
10
.editorconfig
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
root = true
|
||||||
|
|
||||||
|
[*]
|
||||||
|
end_of_line = lf
|
||||||
|
indent_style = space
|
||||||
|
insert_final_newline = true
|
||||||
|
trim_trailing_whitespace = true
|
||||||
|
|
||||||
|
[{Makefile,**.mk}]
|
||||||
|
indent_style = tab
|
||||||
@@ -21,3 +21,6 @@ GT_GCS_CREDENTIAL_PATH = GCS credential path
|
|||||||
GT_GCS_ENDPOINT = GCS end point
|
GT_GCS_ENDPOINT = GCS end point
|
||||||
# Settings for kafka wal test
|
# Settings for kafka wal test
|
||||||
GT_KAFKA_ENDPOINTS = localhost:9092
|
GT_KAFKA_ENDPOINTS = localhost:9092
|
||||||
|
|
||||||
|
# Setting for fuzz tests
|
||||||
|
GT_MYSQL_ADDR = localhost:4002
|
||||||
|
|||||||
13
.github/actions/fuzz-test/action.yaml
vendored
Normal file
13
.github/actions/fuzz-test/action.yaml
vendored
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
name: Fuzz Test
|
||||||
|
description: 'Fuzz test given setup and service'
|
||||||
|
inputs:
|
||||||
|
target:
|
||||||
|
description: "The fuzz target to test"
|
||||||
|
runs:
|
||||||
|
using: composite
|
||||||
|
steps:
|
||||||
|
- name: Run Fuzz Test
|
||||||
|
shell: bash
|
||||||
|
run: cargo fuzz run ${{ inputs.target }} --fuzz-dir tests-fuzz -D -s none -- -max_total_time=120
|
||||||
|
env:
|
||||||
|
GT_MYSQL_ADDR: 127.0.0.1:4002
|
||||||
42
.github/workflows/develop.yml
vendored
42
.github/workflows/develop.yml
vendored
@@ -102,7 +102,7 @@ jobs:
|
|||||||
shared-key: "build-binaries"
|
shared-key: "build-binaries"
|
||||||
- name: Build greptime binaries
|
- name: Build greptime binaries
|
||||||
shell: bash
|
shell: bash
|
||||||
run: cargo build
|
run: cargo build --bin greptime --bin sqlness-runner
|
||||||
- name: Pack greptime binaries
|
- name: Pack greptime binaries
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
@@ -117,6 +117,46 @@ jobs:
|
|||||||
artifacts-dir: bins
|
artifacts-dir: bins
|
||||||
version: current
|
version: current
|
||||||
|
|
||||||
|
fuzztest:
|
||||||
|
name: Fuzz Test
|
||||||
|
needs: build
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
target: [ "fuzz_create_table", "fuzz_alter_table" ]
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- uses: arduino/setup-protoc@v3
|
||||||
|
- uses: dtolnay/rust-toolchain@master
|
||||||
|
with:
|
||||||
|
toolchain: ${{ env.RUST_TOOLCHAIN }}
|
||||||
|
- name: Rust Cache
|
||||||
|
uses: Swatinem/rust-cache@v2
|
||||||
|
with:
|
||||||
|
# Shares across multiple jobs
|
||||||
|
shared-key: "fuzz-test-targets"
|
||||||
|
- name: Set Rust Fuzz
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
sudo apt update && sudo apt install -y libfuzzer-14-dev
|
||||||
|
cargo install cargo-fuzz
|
||||||
|
- name: Download pre-built binaries
|
||||||
|
uses: actions/download-artifact@v4
|
||||||
|
with:
|
||||||
|
name: bins
|
||||||
|
path: .
|
||||||
|
- name: Unzip binaries
|
||||||
|
run: tar -xvf ./bins.tar.gz
|
||||||
|
- name: Run GreptimeDB
|
||||||
|
run: |
|
||||||
|
./bins/greptime standalone start&
|
||||||
|
- name: Fuzz Test
|
||||||
|
uses: ./.github/actions/fuzz-test
|
||||||
|
env:
|
||||||
|
CUSTOM_LIBFUZZER_PATH: /usr/lib/llvm-14/lib/libFuzzer.a
|
||||||
|
with:
|
||||||
|
target: ${{ matrix.target }}
|
||||||
|
|
||||||
sqlness:
|
sqlness:
|
||||||
name: Sqlness Test
|
name: Sqlness Test
|
||||||
needs: build
|
needs: build
|
||||||
|
|||||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -46,3 +46,7 @@ benchmarks/data
|
|||||||
*.code-workspace
|
*.code-workspace
|
||||||
|
|
||||||
venv/
|
venv/
|
||||||
|
|
||||||
|
# Fuzz tests
|
||||||
|
tests-fuzz/artifacts/
|
||||||
|
tests-fuzz/corpus/
|
||||||
|
|||||||
167
Cargo.lock
generated
167
Cargo.lock
generated
@@ -29,6 +29,17 @@ version = "1.2.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234"
|
checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "aes"
|
||||||
|
version = "0.8.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if 1.0.0",
|
||||||
|
"cipher",
|
||||||
|
"cpufeatures",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ahash"
|
name = "ahash"
|
||||||
version = "0.7.7"
|
version = "0.7.7"
|
||||||
@@ -241,6 +252,15 @@ dependencies = [
|
|||||||
"syn 1.0.109",
|
"syn 1.0.109",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "arbitrary"
|
||||||
|
version = "1.3.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110"
|
||||||
|
dependencies = [
|
||||||
|
"derive_arbitrary",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "arc-swap"
|
name = "arc-swap"
|
||||||
version = "1.6.0"
|
version = "1.6.0"
|
||||||
@@ -992,6 +1012,15 @@ dependencies = [
|
|||||||
"generic-array",
|
"generic-array",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "block-padding"
|
||||||
|
version = "0.3.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a8894febbff9f758034a5b8e12d87918f56dfc64a8e1fe757d65e29041538d93"
|
||||||
|
dependencies = [
|
||||||
|
"generic-array",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "borsh"
|
name = "borsh"
|
||||||
version = "1.3.0"
|
version = "1.3.0"
|
||||||
@@ -1266,6 +1295,15 @@ dependencies = [
|
|||||||
"tokio",
|
"tokio",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cbc"
|
||||||
|
version = "0.1.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "26b52a9543ae338f279b96b0b9fed9c8093744685043739079ce85cd58f289a6"
|
||||||
|
dependencies = [
|
||||||
|
"cipher",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cc"
|
name = "cc"
|
||||||
version = "1.0.83"
|
version = "1.0.83"
|
||||||
@@ -1421,6 +1459,16 @@ dependencies = [
|
|||||||
"half 1.8.2",
|
"half 1.8.2",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cipher"
|
||||||
|
version = "0.4.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad"
|
||||||
|
dependencies = [
|
||||||
|
"crypto-common",
|
||||||
|
"inout",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "clang-sys"
|
name = "clang-sys"
|
||||||
version = "1.6.1"
|
version = "1.6.1"
|
||||||
@@ -2912,6 +2960,17 @@ dependencies = [
|
|||||||
"syn 2.0.43",
|
"syn 2.0.43",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "derive_arbitrary"
|
||||||
|
version = "1.3.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn 2.0.43",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "derive_builder"
|
name = "derive_builder"
|
||||||
version = "0.11.2"
|
version = "0.11.2"
|
||||||
@@ -3415,10 +3474,12 @@ dependencies = [
|
|||||||
"common-telemetry",
|
"common-telemetry",
|
||||||
"common-time",
|
"common-time",
|
||||||
"datatypes",
|
"datatypes",
|
||||||
|
"enum_dispatch",
|
||||||
"hydroflow",
|
"hydroflow",
|
||||||
"itertools 0.10.5",
|
"itertools 0.10.5",
|
||||||
"num-traits",
|
"num-traits",
|
||||||
"serde",
|
"serde",
|
||||||
|
"serde_json",
|
||||||
"servers",
|
"servers",
|
||||||
"session",
|
"session",
|
||||||
"snafu",
|
"snafu",
|
||||||
@@ -4406,6 +4467,16 @@ dependencies = [
|
|||||||
"libc",
|
"libc",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "inout"
|
||||||
|
version = "0.1.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5"
|
||||||
|
dependencies = [
|
||||||
|
"block-padding",
|
||||||
|
"generic-array",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "instant"
|
name = "instant"
|
||||||
version = "0.1.12"
|
version = "0.1.12"
|
||||||
@@ -4746,9 +4817,20 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "libc"
|
name = "libc"
|
||||||
version = "0.2.151"
|
version = "0.2.153"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4"
|
checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "libfuzzer-sys"
|
||||||
|
version = "0.4.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a96cfd5557eb82f2b83fed4955246c988d331975a002961b07c81584d107e7f7"
|
||||||
|
dependencies = [
|
||||||
|
"arbitrary",
|
||||||
|
"cc",
|
||||||
|
"once_cell",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "libgit2-sys"
|
name = "libgit2-sys"
|
||||||
@@ -5989,9 +6071,9 @@ checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "opendal"
|
name = "opendal"
|
||||||
version = "0.44.2"
|
version = "0.45.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4af824652d4d2ffabf606d337a071677ae621b05622adf35df9562f69d9b4498"
|
checksum = "52c17c077f23fa2d2c25d9d22af98baa43b8bbe2ef0de80cf66339aa70401467"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
@@ -6007,7 +6089,7 @@ dependencies = [
|
|||||||
"md-5",
|
"md-5",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"percent-encoding",
|
"percent-encoding",
|
||||||
"quick-xml 0.30.0",
|
"quick-xml 0.31.0",
|
||||||
"reqsign",
|
"reqsign",
|
||||||
"reqwest",
|
"reqwest",
|
||||||
"serde",
|
"serde",
|
||||||
@@ -6500,6 +6582,16 @@ version = "0.2.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8835116a5c179084a830efb3adc117ab007512b535bc1a21c991d3b32a6b44dd"
|
checksum = "8835116a5c179084a830efb3adc117ab007512b535bc1a21c991d3b32a6b44dd"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pbkdf2"
|
||||||
|
version = "0.12.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f8ed6a7761f76e3b9f92dfb0a60a6a6477c61024b775147ff0973a02653abaf2"
|
||||||
|
dependencies = [
|
||||||
|
"digest",
|
||||||
|
"hmac",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "peeking_take_while"
|
name = "peeking_take_while"
|
||||||
version = "0.1.2"
|
version = "0.1.2"
|
||||||
@@ -6540,6 +6632,12 @@ version = "2.3.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
|
checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "permutation"
|
||||||
|
version = "0.4.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "df202b0b0f5b8e389955afd5f27b007b00fb948162953f1db9c70d2c7e3157d7"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pest"
|
name = "pest"
|
||||||
version = "2.7.5"
|
version = "2.7.5"
|
||||||
@@ -6724,6 +6822,21 @@ dependencies = [
|
|||||||
"spki 0.7.3",
|
"spki 0.7.3",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pkcs5"
|
||||||
|
version = "0.7.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e847e2c91a18bfa887dd028ec33f2fe6f25db77db3619024764914affe8b69a6"
|
||||||
|
dependencies = [
|
||||||
|
"aes",
|
||||||
|
"cbc",
|
||||||
|
"der 0.7.8",
|
||||||
|
"pbkdf2",
|
||||||
|
"scrypt",
|
||||||
|
"sha2",
|
||||||
|
"spki 0.7.3",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pkcs8"
|
name = "pkcs8"
|
||||||
version = "0.8.0"
|
version = "0.8.0"
|
||||||
@@ -6742,6 +6855,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7"
|
checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"der 0.7.8",
|
"der 0.7.8",
|
||||||
|
"pkcs5",
|
||||||
|
"rand_core",
|
||||||
"spki 0.7.3",
|
"spki 0.7.3",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -7456,16 +7571,6 @@ dependencies = [
|
|||||||
"memchr",
|
"memchr",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "quick-xml"
|
|
||||||
version = "0.30.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "eff6510e86862b57b210fd8cbe8ed3f0d7d600b9c2863cd4549a2e033c66e956"
|
|
||||||
dependencies = [
|
|
||||||
"memchr",
|
|
||||||
"serde",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "quick-xml"
|
name = "quick-xml"
|
||||||
version = "0.31.0"
|
version = "0.31.0"
|
||||||
@@ -7748,9 +7853,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "reqsign"
|
name = "reqsign"
|
||||||
version = "0.14.6"
|
version = "0.14.9"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "dce87f66ba6c6acef277a729f989a0eca946cb9ce6a15bcc036bda0f72d4b9fd"
|
checksum = "43e319d9de9ff4d941abf4ac718897118b0fe04577ea3f8e0f5788971784eef5"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
@@ -7775,7 +7880,6 @@ dependencies = [
|
|||||||
"serde_json",
|
"serde_json",
|
||||||
"sha1",
|
"sha1",
|
||||||
"sha2",
|
"sha2",
|
||||||
"tokio",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -7968,6 +8072,7 @@ dependencies = [
|
|||||||
"pkcs1 0.7.5",
|
"pkcs1 0.7.5",
|
||||||
"pkcs8 0.10.2",
|
"pkcs8 0.10.2",
|
||||||
"rand_core",
|
"rand_core",
|
||||||
|
"sha2",
|
||||||
"signature",
|
"signature",
|
||||||
"spki 0.7.3",
|
"spki 0.7.3",
|
||||||
"subtle",
|
"subtle",
|
||||||
@@ -8702,6 +8807,15 @@ dependencies = [
|
|||||||
"bytemuck",
|
"bytemuck",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "salsa20"
|
||||||
|
version = "0.10.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "97a22f5af31f73a954c10289c93e8a50cc23d971e80ee446f1f6f7137a088213"
|
||||||
|
dependencies = [
|
||||||
|
"cipher",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "same-file"
|
name = "same-file"
|
||||||
version = "1.0.6"
|
version = "1.0.6"
|
||||||
@@ -8815,6 +8929,17 @@ dependencies = [
|
|||||||
"tokio-test",
|
"tokio-test",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "scrypt"
|
||||||
|
version = "0.11.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0516a385866c09368f0b5bcd1caff3366aace790fcd46e2bb032697bb172fd1f"
|
||||||
|
dependencies = [
|
||||||
|
"pbkdf2",
|
||||||
|
"salsa20",
|
||||||
|
"sha2",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sct"
|
name = "sct"
|
||||||
version = "0.7.1"
|
version = "0.7.1"
|
||||||
@@ -9074,6 +9199,7 @@ dependencies = [
|
|||||||
"derive_builder 0.12.0",
|
"derive_builder 0.12.0",
|
||||||
"digest",
|
"digest",
|
||||||
"futures",
|
"futures",
|
||||||
|
"hashbrown 0.14.3",
|
||||||
"headers",
|
"headers",
|
||||||
"hex",
|
"hex",
|
||||||
"hostname",
|
"hostname",
|
||||||
@@ -9092,6 +9218,7 @@ dependencies = [
|
|||||||
"opensrv-mysql",
|
"opensrv-mysql",
|
||||||
"opentelemetry-proto 0.3.0",
|
"opentelemetry-proto 0.3.0",
|
||||||
"parking_lot 0.12.1",
|
"parking_lot 0.12.1",
|
||||||
|
"permutation",
|
||||||
"pgwire",
|
"pgwire",
|
||||||
"pin-project",
|
"pin-project",
|
||||||
"postgres-types",
|
"postgres-types",
|
||||||
@@ -10092,15 +10219,19 @@ checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76"
|
|||||||
name = "tests-fuzz"
|
name = "tests-fuzz"
|
||||||
version = "0.7.0"
|
version = "0.7.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"arbitrary",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
"common-error",
|
"common-error",
|
||||||
"common-macro",
|
"common-macro",
|
||||||
"common-query",
|
"common-query",
|
||||||
|
"common-runtime",
|
||||||
"common-telemetry",
|
"common-telemetry",
|
||||||
|
"common-time",
|
||||||
"datatypes",
|
"datatypes",
|
||||||
"derive_builder 0.12.0",
|
"derive_builder 0.12.0",
|
||||||
"dotenv",
|
"dotenv",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
|
"libfuzzer-sys",
|
||||||
"partition",
|
"partition",
|
||||||
"rand",
|
"rand",
|
||||||
"rand_chacha",
|
"rand_chacha",
|
||||||
|
|||||||
@@ -134,7 +134,7 @@ reqwest = { version = "0.11", default-features = false, features = [
|
|||||||
rskafka = "0.5"
|
rskafka = "0.5"
|
||||||
rust_decimal = "1.33"
|
rust_decimal = "1.33"
|
||||||
serde = { version = "1.0", features = ["derive"] }
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
serde_json = "1.0"
|
serde_json = { version = "1.0", features = ["float_roundtrip"] }
|
||||||
serde_with = "3"
|
serde_with = "3"
|
||||||
smallvec = { version = "1", features = ["serde"] }
|
smallvec = { version = "1", features = ["serde"] }
|
||||||
snafu = "0.7"
|
snafu = "0.7"
|
||||||
|
|||||||
5
Makefile
5
Makefile
@@ -3,6 +3,7 @@ CARGO_PROFILE ?=
|
|||||||
FEATURES ?=
|
FEATURES ?=
|
||||||
TARGET_DIR ?=
|
TARGET_DIR ?=
|
||||||
TARGET ?=
|
TARGET ?=
|
||||||
|
BUILD_BIN ?= greptime
|
||||||
CARGO_BUILD_OPTS := --locked
|
CARGO_BUILD_OPTS := --locked
|
||||||
IMAGE_REGISTRY ?= docker.io
|
IMAGE_REGISTRY ?= docker.io
|
||||||
IMAGE_NAMESPACE ?= greptime
|
IMAGE_NAMESPACE ?= greptime
|
||||||
@@ -45,6 +46,10 @@ ifneq ($(strip $(TARGET)),)
|
|||||||
CARGO_BUILD_OPTS += --target ${TARGET}
|
CARGO_BUILD_OPTS += --target ${TARGET}
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifneq ($(strip $(BUILD_BIN)),)
|
||||||
|
CARGO_BUILD_OPTS += --bin ${BUILD_BIN}
|
||||||
|
endif
|
||||||
|
|
||||||
ifneq ($(strip $(RELEASE)),)
|
ifneq ($(strip $(RELEASE)),)
|
||||||
CARGO_BUILD_OPTS += --release
|
CARGO_BUILD_OPTS += --release
|
||||||
endif
|
endif
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ use client::api::v1::column::Values;
|
|||||||
use client::api::v1::{
|
use client::api::v1::{
|
||||||
Column, ColumnDataType, ColumnDef, CreateTableExpr, InsertRequest, InsertRequests, SemanticType,
|
Column, ColumnDataType, ColumnDef, CreateTableExpr, InsertRequest, InsertRequests, SemanticType,
|
||||||
};
|
};
|
||||||
use client::{Client, Database, Output, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
use client::{Client, Database, OutputData, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||||
use futures_util::TryStreamExt;
|
use futures_util::TryStreamExt;
|
||||||
use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
|
use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
|
||||||
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
|
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
|
||||||
@@ -502,9 +502,9 @@ async fn do_query(num_iter: usize, db: &Database, table_name: &str) {
|
|||||||
for i in 0..num_iter {
|
for i in 0..num_iter {
|
||||||
let now = Instant::now();
|
let now = Instant::now();
|
||||||
let res = db.sql(&query).await.unwrap();
|
let res = db.sql(&query).await.unwrap();
|
||||||
match res {
|
match res.data {
|
||||||
Output::AffectedRows(_) | Output::RecordBatches(_) => (),
|
OutputData::AffectedRows(_) | OutputData::RecordBatches(_) => (),
|
||||||
Output::Stream(stream, _) => {
|
OutputData::Stream(stream) => {
|
||||||
stream.try_collect::<Vec<_>>().await.unwrap();
|
stream.try_collect::<Vec<_>>().await.unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -79,7 +79,7 @@ This RFC proposes to add a new expression node `MergeScan` to merge result from
|
|||||||
│ │ │ │
|
│ │ │ │
|
||||||
└─Frontend──────┘ └─Remote-Sources──────────────┘
|
└─Frontend──────┘ └─Remote-Sources──────────────┘
|
||||||
```
|
```
|
||||||
This merge operation simply chains all the the underlying remote data sources and return `RecordBatch`, just like a coalesce op. And each remote sources is a gRPC query to datanode via the substrait logical plan interface. The plan is transformed and divided from the original query that comes to frontend.
|
This merge operation simply chains all the underlying remote data sources and return `RecordBatch`, just like a coalesce op. And each remote sources is a gRPC query to datanode via the substrait logical plan interface. The plan is transformed and divided from the original query that comes to frontend.
|
||||||
|
|
||||||
## Commutativity of MergeScan
|
## Commutativity of MergeScan
|
||||||
|
|
||||||
|
|||||||
@@ -307,7 +307,7 @@ impl Database {
|
|||||||
reason: "Expect 'AffectedRows' Flight messages to be the one and the only!"
|
reason: "Expect 'AffectedRows' Flight messages to be the one and the only!"
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
Ok(Output::AffectedRows(rows))
|
Ok(Output::new_with_affected_rows(rows))
|
||||||
}
|
}
|
||||||
FlightMessage::Recordbatch(_) | FlightMessage::Metrics(_) => {
|
FlightMessage::Recordbatch(_) | FlightMessage::Metrics(_) => {
|
||||||
IllegalFlightMessagesSnafu {
|
IllegalFlightMessagesSnafu {
|
||||||
@@ -340,7 +340,7 @@ impl Database {
|
|||||||
output_ordering: None,
|
output_ordering: None,
|
||||||
metrics: Default::default(),
|
metrics: Default::default(),
|
||||||
};
|
};
|
||||||
Ok(Output::new_stream(Box::pin(record_batch_stream)))
|
Ok(Output::new_with_stream(Box::pin(record_batch_stream)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ use api::v1::greptime_response::Response;
|
|||||||
use api::v1::{AffectedRows, GreptimeResponse};
|
use api::v1::{AffectedRows, GreptimeResponse};
|
||||||
pub use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
pub use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||||
use common_error::status_code::StatusCode;
|
use common_error::status_code::StatusCode;
|
||||||
pub use common_query::Output;
|
pub use common_query::{Output, OutputData, OutputMeta};
|
||||||
pub use common_recordbatch::{RecordBatches, SendableRecordBatchStream};
|
pub use common_recordbatch::{RecordBatches, SendableRecordBatchStream};
|
||||||
use snafu::OptionExt;
|
use snafu::OptionExt;
|
||||||
|
|
||||||
|
|||||||
@@ -62,7 +62,9 @@ pub struct BenchTableMetadataCommand {
|
|||||||
|
|
||||||
impl BenchTableMetadataCommand {
|
impl BenchTableMetadataCommand {
|
||||||
pub async fn build(&self) -> Result<Instance> {
|
pub async fn build(&self) -> Result<Instance> {
|
||||||
let etcd_store = EtcdStore::with_endpoints([&self.etcd_addr]).await.unwrap();
|
let etcd_store = EtcdStore::with_endpoints([&self.etcd_addr], 128)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let table_metadata_manager = Arc::new(TableMetadataManager::new(etcd_store));
|
let table_metadata_manager = Arc::new(TableMetadataManager::new(etcd_store));
|
||||||
|
|
||||||
|
|||||||
@@ -19,8 +19,7 @@ use async_trait::async_trait;
|
|||||||
use clap::{Parser, ValueEnum};
|
use clap::{Parser, ValueEnum};
|
||||||
use client::api::v1::auth_header::AuthScheme;
|
use client::api::v1::auth_header::AuthScheme;
|
||||||
use client::api::v1::Basic;
|
use client::api::v1::Basic;
|
||||||
use client::{Client, Database, DEFAULT_SCHEMA_NAME};
|
use client::{Client, Database, OutputData, DEFAULT_SCHEMA_NAME};
|
||||||
use common_query::Output;
|
|
||||||
use common_recordbatch::util::collect;
|
use common_recordbatch::util::collect;
|
||||||
use common_telemetry::{debug, error, info, warn};
|
use common_telemetry::{debug, error, info, warn};
|
||||||
use datatypes::scalars::ScalarVector;
|
use datatypes::scalars::ScalarVector;
|
||||||
@@ -142,7 +141,7 @@ impl Export {
|
|||||||
.with_context(|_| RequestDatabaseSnafu {
|
.with_context(|_| RequestDatabaseSnafu {
|
||||||
sql: "show databases".to_string(),
|
sql: "show databases".to_string(),
|
||||||
})?;
|
})?;
|
||||||
let Output::Stream(stream, _) = result else {
|
let OutputData::Stream(stream) = result.data else {
|
||||||
NotDataFromOutputSnafu.fail()?
|
NotDataFromOutputSnafu.fail()?
|
||||||
};
|
};
|
||||||
let record_batch = collect(stream)
|
let record_batch = collect(stream)
|
||||||
@@ -183,7 +182,7 @@ impl Export {
|
|||||||
.sql(&sql)
|
.sql(&sql)
|
||||||
.await
|
.await
|
||||||
.with_context(|_| RequestDatabaseSnafu { sql })?;
|
.with_context(|_| RequestDatabaseSnafu { sql })?;
|
||||||
let Output::Stream(stream, _) = result else {
|
let OutputData::Stream(stream) = result.data else {
|
||||||
NotDataFromOutputSnafu.fail()?
|
NotDataFromOutputSnafu.fail()?
|
||||||
};
|
};
|
||||||
let Some(record_batch) = collect(stream)
|
let Some(record_batch) = collect(stream)
|
||||||
@@ -235,7 +234,7 @@ impl Export {
|
|||||||
.sql(&sql)
|
.sql(&sql)
|
||||||
.await
|
.await
|
||||||
.with_context(|_| RequestDatabaseSnafu { sql })?;
|
.with_context(|_| RequestDatabaseSnafu { sql })?;
|
||||||
let Output::Stream(stream, _) = result else {
|
let OutputData::Stream(stream) = result.data else {
|
||||||
NotDataFromOutputSnafu.fail()?
|
NotDataFromOutputSnafu.fail()?
|
||||||
};
|
};
|
||||||
let record_batch = collect(stream)
|
let record_batch = collect(stream)
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ use std::time::Instant;
|
|||||||
use catalog::kvbackend::{
|
use catalog::kvbackend::{
|
||||||
CachedMetaKvBackend, CachedMetaKvBackendBuilder, KvBackendCatalogManager,
|
CachedMetaKvBackend, CachedMetaKvBackendBuilder, KvBackendCatalogManager,
|
||||||
};
|
};
|
||||||
use client::{Client, Database, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
use client::{Client, Database, OutputData, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||||
use common_base::Plugins;
|
use common_base::Plugins;
|
||||||
use common_error::ext::ErrorExt;
|
use common_error::ext::ErrorExt;
|
||||||
use common_query::Output;
|
use common_query::Output;
|
||||||
@@ -184,15 +184,15 @@ impl Repl {
|
|||||||
}
|
}
|
||||||
.context(RequestDatabaseSnafu { sql: &sql })?;
|
.context(RequestDatabaseSnafu { sql: &sql })?;
|
||||||
|
|
||||||
let either = match output {
|
let either = match output.data {
|
||||||
Output::Stream(s, _) => {
|
OutputData::Stream(s) => {
|
||||||
let x = RecordBatches::try_collect(s)
|
let x = RecordBatches::try_collect(s)
|
||||||
.await
|
.await
|
||||||
.context(CollectRecordBatchesSnafu)?;
|
.context(CollectRecordBatchesSnafu)?;
|
||||||
Either::Left(x)
|
Either::Left(x)
|
||||||
}
|
}
|
||||||
Output::RecordBatches(x) => Either::Left(x),
|
OutputData::RecordBatches(x) => Either::Left(x),
|
||||||
Output::AffectedRows(rows) => Either::Right(rows),
|
OutputData::AffectedRows(rows) => Either::Right(rows),
|
||||||
};
|
};
|
||||||
|
|
||||||
let end = Instant::now();
|
let end = Instant::now();
|
||||||
|
|||||||
@@ -70,7 +70,7 @@ impl UpgradeCommand {
|
|||||||
etcd_addr: &self.etcd_addr,
|
etcd_addr: &self.etcd_addr,
|
||||||
})?;
|
})?;
|
||||||
let tool = MigrateTableMetadata {
|
let tool = MigrateTableMetadata {
|
||||||
etcd_store: EtcdStore::with_etcd_client(client),
|
etcd_store: EtcdStore::with_etcd_client(client, 128),
|
||||||
dryrun: self.dryrun,
|
dryrun: self.dryrun,
|
||||||
skip_catalog_keys: self.skip_catalog_keys,
|
skip_catalog_keys: self.skip_catalog_keys,
|
||||||
skip_table_global_keys: self.skip_table_global_keys,
|
skip_table_global_keys: self.skip_table_global_keys,
|
||||||
|
|||||||
@@ -117,10 +117,12 @@ struct StartCommand {
|
|||||||
/// The working home directory of this metasrv instance.
|
/// The working home directory of this metasrv instance.
|
||||||
#[clap(long)]
|
#[clap(long)]
|
||||||
data_home: Option<String>,
|
data_home: Option<String>,
|
||||||
|
|
||||||
/// If it's not empty, the metasrv will store all data with this key prefix.
|
/// If it's not empty, the metasrv will store all data with this key prefix.
|
||||||
#[clap(long, default_value = "")]
|
#[clap(long, default_value = "")]
|
||||||
store_key_prefix: String,
|
store_key_prefix: String,
|
||||||
|
/// The max operations per txn
|
||||||
|
#[clap(long)]
|
||||||
|
max_txn_ops: Option<usize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl StartCommand {
|
impl StartCommand {
|
||||||
@@ -181,6 +183,10 @@ impl StartCommand {
|
|||||||
opts.store_key_prefix = self.store_key_prefix.clone()
|
opts.store_key_prefix = self.store_key_prefix.clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(max_txn_ops) = self.max_txn_ops {
|
||||||
|
opts.max_txn_ops = max_txn_ops;
|
||||||
|
}
|
||||||
|
|
||||||
// Disable dashboard in metasrv.
|
// Disable dashboard in metasrv.
|
||||||
opts.http.disable_dashboard = true;
|
opts.http.disable_dashboard = true;
|
||||||
|
|
||||||
|
|||||||
@@ -28,12 +28,15 @@ const REGION: &str = "region";
|
|||||||
const ENABLE_VIRTUAL_HOST_STYLE: &str = "enable_virtual_host_style";
|
const ENABLE_VIRTUAL_HOST_STYLE: &str = "enable_virtual_host_style";
|
||||||
|
|
||||||
pub fn is_supported_in_s3(key: &str) -> bool {
|
pub fn is_supported_in_s3(key: &str) -> bool {
|
||||||
key == ENDPOINT
|
[
|
||||||
|| key == ACCESS_KEY_ID
|
ENDPOINT,
|
||||||
|| key == SECRET_ACCESS_KEY
|
ACCESS_KEY_ID,
|
||||||
|| key == SESSION_TOKEN
|
SECRET_ACCESS_KEY,
|
||||||
|| key == REGION
|
SESSION_TOKEN,
|
||||||
|| key == ENABLE_VIRTUAL_HOST_STYLE
|
REGION,
|
||||||
|
ENABLE_VIRTUAL_HOST_STYLE,
|
||||||
|
]
|
||||||
|
.contains(&key)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn build_s3_backend(
|
pub fn build_s3_backend(
|
||||||
|
|||||||
@@ -12,6 +12,7 @@
|
|||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
|
mod clamp;
|
||||||
mod modulo;
|
mod modulo;
|
||||||
mod pow;
|
mod pow;
|
||||||
mod rate;
|
mod rate;
|
||||||
@@ -19,6 +20,7 @@ mod rate;
|
|||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
pub use clamp::ClampFunction;
|
||||||
use common_query::error::{GeneralDataFusionSnafu, Result};
|
use common_query::error::{GeneralDataFusionSnafu, Result};
|
||||||
use common_query::prelude::Signature;
|
use common_query::prelude::Signature;
|
||||||
use datafusion::error::DataFusionError;
|
use datafusion::error::DataFusionError;
|
||||||
@@ -40,7 +42,8 @@ impl MathFunction {
|
|||||||
registry.register(Arc::new(ModuloFunction));
|
registry.register(Arc::new(ModuloFunction));
|
||||||
registry.register(Arc::new(PowFunction));
|
registry.register(Arc::new(PowFunction));
|
||||||
registry.register(Arc::new(RateFunction));
|
registry.register(Arc::new(RateFunction));
|
||||||
registry.register(Arc::new(RangeFunction))
|
registry.register(Arc::new(RangeFunction));
|
||||||
|
registry.register(Arc::new(ClampFunction));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
403
src/common/function/src/scalars/math/clamp.rs
Normal file
403
src/common/function/src/scalars/math/clamp.rs
Normal file
@@ -0,0 +1,403 @@
|
|||||||
|
// Copyright 2023 Greptime Team
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
use std::fmt::{self, Display};
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use common_query::error::{InvalidFuncArgsSnafu, Result};
|
||||||
|
use common_query::prelude::Signature;
|
||||||
|
use datafusion::arrow::array::{ArrayIter, PrimitiveArray};
|
||||||
|
use datafusion::logical_expr::Volatility;
|
||||||
|
use datatypes::data_type::{ConcreteDataType, DataType};
|
||||||
|
use datatypes::prelude::VectorRef;
|
||||||
|
use datatypes::types::LogicalPrimitiveType;
|
||||||
|
use datatypes::value::TryAsPrimitive;
|
||||||
|
use datatypes::vectors::PrimitiveVector;
|
||||||
|
use datatypes::with_match_primitive_type_id;
|
||||||
|
use snafu::{ensure, OptionExt};
|
||||||
|
|
||||||
|
use crate::function::Function;
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Default)]
|
||||||
|
pub struct ClampFunction;
|
||||||
|
|
||||||
|
const CLAMP_NAME: &str = "clamp";
|
||||||
|
|
||||||
|
impl Function for ClampFunction {
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
CLAMP_NAME
|
||||||
|
}
|
||||||
|
|
||||||
|
fn return_type(&self, input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||||
|
// Type check is done by `signature`
|
||||||
|
Ok(input_types[0].clone())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn signature(&self) -> Signature {
|
||||||
|
// input, min, max
|
||||||
|
Signature::uniform(3, ConcreteDataType::numerics(), Volatility::Immutable)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn eval(
|
||||||
|
&self,
|
||||||
|
_func_ctx: crate::function::FunctionContext,
|
||||||
|
columns: &[VectorRef],
|
||||||
|
) -> Result<VectorRef> {
|
||||||
|
ensure!(
|
||||||
|
columns.len() == 3,
|
||||||
|
InvalidFuncArgsSnafu {
|
||||||
|
err_msg: format!(
|
||||||
|
"The length of the args is not correct, expect exactly 3, have: {}",
|
||||||
|
columns.len()
|
||||||
|
),
|
||||||
|
}
|
||||||
|
);
|
||||||
|
ensure!(
|
||||||
|
columns[0].data_type().is_numeric(),
|
||||||
|
InvalidFuncArgsSnafu {
|
||||||
|
err_msg: format!(
|
||||||
|
"The first arg's type is not numeric, have: {}",
|
||||||
|
columns[0].data_type()
|
||||||
|
),
|
||||||
|
}
|
||||||
|
);
|
||||||
|
ensure!(
|
||||||
|
columns[0].data_type() == columns[1].data_type()
|
||||||
|
&& columns[1].data_type() == columns[2].data_type(),
|
||||||
|
InvalidFuncArgsSnafu {
|
||||||
|
err_msg: format!(
|
||||||
|
"Arguments don't have identical types: {}, {}, {}",
|
||||||
|
columns[0].data_type(),
|
||||||
|
columns[1].data_type(),
|
||||||
|
columns[2].data_type()
|
||||||
|
),
|
||||||
|
}
|
||||||
|
);
|
||||||
|
ensure!(
|
||||||
|
columns[1].len() == 1 && columns[2].len() == 1,
|
||||||
|
InvalidFuncArgsSnafu {
|
||||||
|
err_msg: format!(
|
||||||
|
"The second and third args should be scalar, have: {:?}, {:?}",
|
||||||
|
columns[1], columns[2]
|
||||||
|
),
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
with_match_primitive_type_id!(columns[0].data_type().logical_type_id(), |$S| {
|
||||||
|
let input_array = columns[0].to_arrow_array();
|
||||||
|
let input = input_array
|
||||||
|
.as_any()
|
||||||
|
.downcast_ref::<PrimitiveArray<<$S as LogicalPrimitiveType>::ArrowPrimitive>>()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let min = TryAsPrimitive::<$S>::try_as_primitive(&columns[1].get(0))
|
||||||
|
.with_context(|| {
|
||||||
|
InvalidFuncArgsSnafu {
|
||||||
|
err_msg: "The second arg should not be none",
|
||||||
|
}
|
||||||
|
})?;
|
||||||
|
let max = TryAsPrimitive::<$S>::try_as_primitive(&columns[2].get(0))
|
||||||
|
.with_context(|| {
|
||||||
|
InvalidFuncArgsSnafu {
|
||||||
|
err_msg: "The third arg should not be none",
|
||||||
|
}
|
||||||
|
})?;
|
||||||
|
|
||||||
|
// ensure min <= max
|
||||||
|
ensure!(
|
||||||
|
min <= max,
|
||||||
|
InvalidFuncArgsSnafu {
|
||||||
|
err_msg: format!(
|
||||||
|
"The second arg should be less than or equal to the third arg, have: {:?}, {:?}",
|
||||||
|
columns[1], columns[2]
|
||||||
|
),
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
clamp_impl::<$S, true, true>(input, min, max)
|
||||||
|
},{
|
||||||
|
unreachable!()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for ClampFunction {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
write!(f, "{}", CLAMP_NAME.to_ascii_uppercase())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn clamp_impl<T: LogicalPrimitiveType, const CLAMP_MIN: bool, const CLAMP_MAX: bool>(
|
||||||
|
input: &PrimitiveArray<T::ArrowPrimitive>,
|
||||||
|
min: T::Native,
|
||||||
|
max: T::Native,
|
||||||
|
) -> Result<VectorRef> {
|
||||||
|
common_telemetry::info!("[DEBUG] min {min:?}, max {max:?}");
|
||||||
|
|
||||||
|
let iter = ArrayIter::new(input);
|
||||||
|
let result = iter.map(|x| {
|
||||||
|
x.map(|x| {
|
||||||
|
if CLAMP_MIN && x < min {
|
||||||
|
min
|
||||||
|
} else if CLAMP_MAX && x > max {
|
||||||
|
max
|
||||||
|
} else {
|
||||||
|
x
|
||||||
|
}
|
||||||
|
})
|
||||||
|
});
|
||||||
|
let result = PrimitiveArray::<T::ArrowPrimitive>::from_iter(result);
|
||||||
|
Ok(Arc::new(PrimitiveVector::<T>::from(result)))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use datatypes::prelude::ScalarVector;
|
||||||
|
use datatypes::vectors::{
|
||||||
|
ConstantVector, Float64Vector, Int64Vector, StringVector, UInt64Vector,
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
use crate::function::FunctionContext;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn clamp_i64() {
|
||||||
|
let inputs = [
|
||||||
|
(
|
||||||
|
vec![Some(-3), Some(-2), Some(-1), Some(0), Some(1), Some(2)],
|
||||||
|
-1,
|
||||||
|
10,
|
||||||
|
vec![Some(-1), Some(-1), Some(-1), Some(0), Some(1), Some(2)],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
vec![Some(-3), Some(-2), Some(-1), Some(0), Some(1), Some(2)],
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
vec![Some(0), Some(0), Some(0), Some(0), Some(0), Some(0)],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
vec![Some(-3), None, Some(-1), None, None, Some(2)],
|
||||||
|
-2,
|
||||||
|
1,
|
||||||
|
vec![Some(-2), None, Some(-1), None, None, Some(1)],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
vec![None, None, None, None, None],
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
vec![None, None, None, None, None],
|
||||||
|
),
|
||||||
|
];
|
||||||
|
|
||||||
|
let func = ClampFunction;
|
||||||
|
for (in_data, min, max, expected) in inputs {
|
||||||
|
let args = [
|
||||||
|
Arc::new(Int64Vector::from(in_data)) as _,
|
||||||
|
Arc::new(Int64Vector::from_vec(vec![min])) as _,
|
||||||
|
Arc::new(Int64Vector::from_vec(vec![max])) as _,
|
||||||
|
];
|
||||||
|
let result = func
|
||||||
|
.eval(FunctionContext::default(), args.as_slice())
|
||||||
|
.unwrap();
|
||||||
|
let expected: VectorRef = Arc::new(Int64Vector::from(expected));
|
||||||
|
assert_eq!(expected, result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn clamp_u64() {
|
||||||
|
let inputs = [
|
||||||
|
(
|
||||||
|
vec![Some(0), Some(1), Some(2), Some(3), Some(4), Some(5)],
|
||||||
|
1,
|
||||||
|
3,
|
||||||
|
vec![Some(1), Some(1), Some(2), Some(3), Some(3), Some(3)],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
vec![Some(0), Some(1), Some(2), Some(3), Some(4), Some(5)],
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
vec![Some(0), Some(0), Some(0), Some(0), Some(0), Some(0)],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
vec![Some(0), None, Some(2), None, None, Some(5)],
|
||||||
|
1,
|
||||||
|
3,
|
||||||
|
vec![Some(1), None, Some(2), None, None, Some(3)],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
vec![None, None, None, None, None],
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
vec![None, None, None, None, None],
|
||||||
|
),
|
||||||
|
];
|
||||||
|
|
||||||
|
let func = ClampFunction;
|
||||||
|
for (in_data, min, max, expected) in inputs {
|
||||||
|
let args = [
|
||||||
|
Arc::new(UInt64Vector::from(in_data)) as _,
|
||||||
|
Arc::new(UInt64Vector::from_vec(vec![min])) as _,
|
||||||
|
Arc::new(UInt64Vector::from_vec(vec![max])) as _,
|
||||||
|
];
|
||||||
|
let result = func
|
||||||
|
.eval(FunctionContext::default(), args.as_slice())
|
||||||
|
.unwrap();
|
||||||
|
let expected: VectorRef = Arc::new(UInt64Vector::from(expected));
|
||||||
|
assert_eq!(expected, result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn clamp_f64() {
|
||||||
|
let inputs = [
|
||||||
|
(
|
||||||
|
vec![Some(-3.0), Some(-2.0), Some(-1.0), Some(0.0), Some(1.0)],
|
||||||
|
-1.0,
|
||||||
|
10.0,
|
||||||
|
vec![Some(-1.0), Some(-1.0), Some(-1.0), Some(0.0), Some(1.0)],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
vec![Some(-2.0), Some(-1.0), Some(0.0), Some(1.0)],
|
||||||
|
0.0,
|
||||||
|
0.0,
|
||||||
|
vec![Some(0.0), Some(0.0), Some(0.0), Some(0.0)],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
vec![Some(-3.0), None, Some(-1.0), None, None, Some(2.0)],
|
||||||
|
-2.0,
|
||||||
|
1.0,
|
||||||
|
vec![Some(-2.0), None, Some(-1.0), None, None, Some(1.0)],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
vec![None, None, None, None, None],
|
||||||
|
0.0,
|
||||||
|
1.0,
|
||||||
|
vec![None, None, None, None, None],
|
||||||
|
),
|
||||||
|
];
|
||||||
|
|
||||||
|
let func = ClampFunction;
|
||||||
|
for (in_data, min, max, expected) in inputs {
|
||||||
|
let args = [
|
||||||
|
Arc::new(Float64Vector::from(in_data)) as _,
|
||||||
|
Arc::new(Float64Vector::from_vec(vec![min])) as _,
|
||||||
|
Arc::new(Float64Vector::from_vec(vec![max])) as _,
|
||||||
|
];
|
||||||
|
let result = func
|
||||||
|
.eval(FunctionContext::default(), args.as_slice())
|
||||||
|
.unwrap();
|
||||||
|
let expected: VectorRef = Arc::new(Float64Vector::from(expected));
|
||||||
|
assert_eq!(expected, result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn clamp_const_i32() {
|
||||||
|
let input = vec![Some(5)];
|
||||||
|
let min = 2;
|
||||||
|
let max = 4;
|
||||||
|
|
||||||
|
let func = ClampFunction;
|
||||||
|
let args = [
|
||||||
|
Arc::new(ConstantVector::new(Arc::new(Int64Vector::from(input)), 1)) as _,
|
||||||
|
Arc::new(Int64Vector::from_vec(vec![min])) as _,
|
||||||
|
Arc::new(Int64Vector::from_vec(vec![max])) as _,
|
||||||
|
];
|
||||||
|
let result = func
|
||||||
|
.eval(FunctionContext::default(), args.as_slice())
|
||||||
|
.unwrap();
|
||||||
|
let expected: VectorRef = Arc::new(Int64Vector::from(vec![Some(4)]));
|
||||||
|
assert_eq!(expected, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn clamp_invalid_min_max() {
|
||||||
|
let input = vec![Some(-3.0), Some(-2.0), Some(-1.0), Some(0.0), Some(1.0)];
|
||||||
|
let min = 10.0;
|
||||||
|
let max = -1.0;
|
||||||
|
|
||||||
|
let func = ClampFunction;
|
||||||
|
let args = [
|
||||||
|
Arc::new(Float64Vector::from(input)) as _,
|
||||||
|
Arc::new(Float64Vector::from_vec(vec![min])) as _,
|
||||||
|
Arc::new(Float64Vector::from_vec(vec![max])) as _,
|
||||||
|
];
|
||||||
|
let result = func.eval(FunctionContext::default(), args.as_slice());
|
||||||
|
assert!(result.is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn clamp_type_not_match() {
|
||||||
|
let input = vec![Some(-3.0), Some(-2.0), Some(-1.0), Some(0.0), Some(1.0)];
|
||||||
|
let min = -1;
|
||||||
|
let max = 10;
|
||||||
|
|
||||||
|
let func = ClampFunction;
|
||||||
|
let args = [
|
||||||
|
Arc::new(Float64Vector::from(input)) as _,
|
||||||
|
Arc::new(Int64Vector::from_vec(vec![min])) as _,
|
||||||
|
Arc::new(UInt64Vector::from_vec(vec![max])) as _,
|
||||||
|
];
|
||||||
|
let result = func.eval(FunctionContext::default(), args.as_slice());
|
||||||
|
assert!(result.is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn clamp_min_is_not_scalar() {
|
||||||
|
let input = vec![Some(-3.0), Some(-2.0), Some(-1.0), Some(0.0), Some(1.0)];
|
||||||
|
let min = -10.0;
|
||||||
|
let max = 1.0;
|
||||||
|
|
||||||
|
let func = ClampFunction;
|
||||||
|
let args = [
|
||||||
|
Arc::new(Float64Vector::from(input)) as _,
|
||||||
|
Arc::new(Float64Vector::from_vec(vec![min, min])) as _,
|
||||||
|
Arc::new(Float64Vector::from_vec(vec![max])) as _,
|
||||||
|
];
|
||||||
|
let result = func.eval(FunctionContext::default(), args.as_slice());
|
||||||
|
assert!(result.is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn clamp_no_max() {
|
||||||
|
let input = vec![Some(-3.0), Some(-2.0), Some(-1.0), Some(0.0), Some(1.0)];
|
||||||
|
let min = -10.0;
|
||||||
|
|
||||||
|
let func = ClampFunction;
|
||||||
|
let args = [
|
||||||
|
Arc::new(Float64Vector::from(input)) as _,
|
||||||
|
Arc::new(Float64Vector::from_vec(vec![min])) as _,
|
||||||
|
];
|
||||||
|
let result = func.eval(FunctionContext::default(), args.as_slice());
|
||||||
|
assert!(result.is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn clamp_on_string() {
|
||||||
|
let input = vec![Some("foo"), Some("foo"), Some("foo"), Some("foo")];
|
||||||
|
|
||||||
|
let func = ClampFunction;
|
||||||
|
let args = [
|
||||||
|
Arc::new(StringVector::from(input)) as _,
|
||||||
|
Arc::new(StringVector::from_vec(vec!["bar"])) as _,
|
||||||
|
Arc::new(StringVector::from_vec(vec!["baz"])) as _,
|
||||||
|
];
|
||||||
|
let result = func.eval(FunctionContext::default(), args.as_slice());
|
||||||
|
assert!(result.is_err());
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -14,9 +14,11 @@
|
|||||||
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
mod greatest;
|
mod greatest;
|
||||||
|
mod to_timezone;
|
||||||
mod to_unixtime;
|
mod to_unixtime;
|
||||||
|
|
||||||
use greatest::GreatestFunction;
|
use greatest::GreatestFunction;
|
||||||
|
use to_timezone::ToTimezoneFunction;
|
||||||
use to_unixtime::ToUnixtimeFunction;
|
use to_unixtime::ToUnixtimeFunction;
|
||||||
|
|
||||||
use crate::function_registry::FunctionRegistry;
|
use crate::function_registry::FunctionRegistry;
|
||||||
@@ -25,6 +27,7 @@ pub(crate) struct TimestampFunction;
|
|||||||
|
|
||||||
impl TimestampFunction {
|
impl TimestampFunction {
|
||||||
pub fn register(registry: &FunctionRegistry) {
|
pub fn register(registry: &FunctionRegistry) {
|
||||||
|
registry.register(Arc::new(ToTimezoneFunction));
|
||||||
registry.register(Arc::new(ToUnixtimeFunction));
|
registry.register(Arc::new(ToUnixtimeFunction));
|
||||||
registry.register(Arc::new(GreatestFunction));
|
registry.register(Arc::new(GreatestFunction));
|
||||||
}
|
}
|
||||||
|
|||||||
260
src/common/function/src/scalars/timestamp/to_timezone.rs
Normal file
260
src/common/function/src/scalars/timestamp/to_timezone.rs
Normal file
@@ -0,0 +1,260 @@
|
|||||||
|
// Copyright 2023 Greptime Team
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
use std::fmt;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
|
||||||
|
use common_query::prelude::Signature;
|
||||||
|
use common_time::{Timestamp, Timezone};
|
||||||
|
use datatypes::data_type::ConcreteDataType;
|
||||||
|
use datatypes::prelude::VectorRef;
|
||||||
|
use datatypes::types::TimestampType;
|
||||||
|
use datatypes::value::Value;
|
||||||
|
use datatypes::vectors::{
|
||||||
|
StringVector, TimestampMicrosecondVector, TimestampMillisecondVector,
|
||||||
|
TimestampNanosecondVector, TimestampSecondVector, Vector,
|
||||||
|
};
|
||||||
|
use snafu::{ensure, OptionExt};
|
||||||
|
|
||||||
|
use crate::function::{Function, FunctionContext};
|
||||||
|
use crate::helper;
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Default)]
|
||||||
|
pub struct ToTimezoneFunction;
|
||||||
|
|
||||||
|
const NAME: &str = "to_timezone";
|
||||||
|
|
||||||
|
fn convert_to_timezone(arg: &str) -> Option<Timezone> {
|
||||||
|
Timezone::from_tz_string(arg).ok()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn convert_to_timestamp(arg: &Value) -> Option<Timestamp> {
|
||||||
|
match arg {
|
||||||
|
Value::Timestamp(ts) => Some(*ts),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for ToTimezoneFunction {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
write!(f, "TO_TIMEZONE")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Function for ToTimezoneFunction {
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
NAME
|
||||||
|
}
|
||||||
|
|
||||||
|
fn return_type(&self, input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||||
|
// type checked by signature - MUST BE timestamp
|
||||||
|
Ok(input_types[0].clone())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn signature(&self) -> Signature {
|
||||||
|
helper::one_of_sigs2(
|
||||||
|
vec![
|
||||||
|
ConcreteDataType::timestamp_second_datatype(),
|
||||||
|
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||||
|
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||||
|
ConcreteDataType::timestamp_nanosecond_datatype(),
|
||||||
|
],
|
||||||
|
vec![ConcreteDataType::string_datatype()],
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn eval(&self, _ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||||
|
ensure!(
|
||||||
|
columns.len() == 2,
|
||||||
|
InvalidFuncArgsSnafu {
|
||||||
|
err_msg: format!(
|
||||||
|
"The length of the args is not correct, expect exactly 2, have: {}",
|
||||||
|
columns.len()
|
||||||
|
),
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
// TODO: maybe support epoch timestamp? https://github.com/GreptimeTeam/greptimedb/issues/3477
|
||||||
|
let ts = columns[0].data_type().as_timestamp().with_context(|| {
|
||||||
|
UnsupportedInputDataTypeSnafu {
|
||||||
|
function: NAME,
|
||||||
|
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
||||||
|
}
|
||||||
|
})?;
|
||||||
|
let array = columns[0].to_arrow_array();
|
||||||
|
let times = match ts {
|
||||||
|
TimestampType::Second(_) => {
|
||||||
|
let vector = TimestampSecondVector::try_from_arrow_array(array).unwrap();
|
||||||
|
(0..vector.len())
|
||||||
|
.map(|i| convert_to_timestamp(&vector.get(i)))
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
}
|
||||||
|
TimestampType::Millisecond(_) => {
|
||||||
|
let vector = TimestampMillisecondVector::try_from_arrow_array(array).unwrap();
|
||||||
|
(0..vector.len())
|
||||||
|
.map(|i| convert_to_timestamp(&vector.get(i)))
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
}
|
||||||
|
TimestampType::Microsecond(_) => {
|
||||||
|
let vector = TimestampMicrosecondVector::try_from_arrow_array(array).unwrap();
|
||||||
|
(0..vector.len())
|
||||||
|
.map(|i| convert_to_timestamp(&vector.get(i)))
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
}
|
||||||
|
TimestampType::Nanosecond(_) => {
|
||||||
|
let vector = TimestampNanosecondVector::try_from_arrow_array(array).unwrap();
|
||||||
|
(0..vector.len())
|
||||||
|
.map(|i| convert_to_timestamp(&vector.get(i)))
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let tzs = {
|
||||||
|
let array = columns[1].to_arrow_array();
|
||||||
|
let vector = StringVector::try_from_arrow_array(&array)
|
||||||
|
.ok()
|
||||||
|
.with_context(|| UnsupportedInputDataTypeSnafu {
|
||||||
|
function: NAME,
|
||||||
|
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
||||||
|
})?;
|
||||||
|
(0..vector.len())
|
||||||
|
.map(|i| convert_to_timezone(&vector.get(i).to_string()))
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
};
|
||||||
|
|
||||||
|
let result = times
|
||||||
|
.iter()
|
||||||
|
.zip(tzs.iter())
|
||||||
|
.map(|(time, tz)| match (time, tz) {
|
||||||
|
(Some(time), _) => Some(time.to_timezone_aware_string(tz.as_ref())),
|
||||||
|
_ => None,
|
||||||
|
})
|
||||||
|
.collect::<Vec<Option<String>>>();
|
||||||
|
Ok(Arc::new(StringVector::from(result)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
|
||||||
|
use datatypes::scalars::ScalarVector;
|
||||||
|
use datatypes::timestamp::{
|
||||||
|
TimestampMicrosecond, TimestampMillisecond, TimestampNanosecond, TimestampSecond,
|
||||||
|
};
|
||||||
|
use datatypes::vectors::StringVector;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_timestamp_to_timezone() {
|
||||||
|
let f = ToTimezoneFunction;
|
||||||
|
assert_eq!("to_timezone", f.name());
|
||||||
|
|
||||||
|
let results = vec![
|
||||||
|
Some("1969-12-31 19:00:01"),
|
||||||
|
None,
|
||||||
|
Some("1970-01-01 03:00:01"),
|
||||||
|
None,
|
||||||
|
];
|
||||||
|
let times: Vec<Option<TimestampSecond>> = vec![
|
||||||
|
Some(TimestampSecond::new(1)),
|
||||||
|
None,
|
||||||
|
Some(TimestampSecond::new(1)),
|
||||||
|
None,
|
||||||
|
];
|
||||||
|
let ts_vector: TimestampSecondVector =
|
||||||
|
TimestampSecondVector::from_owned_iterator(times.into_iter());
|
||||||
|
let tzs = vec![Some("America/New_York"), None, Some("Europe/Moscow"), None];
|
||||||
|
let args: Vec<VectorRef> = vec![
|
||||||
|
Arc::new(ts_vector),
|
||||||
|
Arc::new(StringVector::from(tzs.clone())),
|
||||||
|
];
|
||||||
|
let vector = f.eval(FunctionContext::default(), &args).unwrap();
|
||||||
|
assert_eq!(4, vector.len());
|
||||||
|
let expect_times: VectorRef = Arc::new(StringVector::from(results));
|
||||||
|
assert_eq!(expect_times, vector);
|
||||||
|
|
||||||
|
let results = vec![
|
||||||
|
Some("1969-12-31 19:00:00.001"),
|
||||||
|
None,
|
||||||
|
Some("1970-01-01 03:00:00.001"),
|
||||||
|
None,
|
||||||
|
];
|
||||||
|
let times: Vec<Option<TimestampMillisecond>> = vec![
|
||||||
|
Some(TimestampMillisecond::new(1)),
|
||||||
|
None,
|
||||||
|
Some(TimestampMillisecond::new(1)),
|
||||||
|
None,
|
||||||
|
];
|
||||||
|
let ts_vector: TimestampMillisecondVector =
|
||||||
|
TimestampMillisecondVector::from_owned_iterator(times.into_iter());
|
||||||
|
let args: Vec<VectorRef> = vec![
|
||||||
|
Arc::new(ts_vector),
|
||||||
|
Arc::new(StringVector::from(tzs.clone())),
|
||||||
|
];
|
||||||
|
let vector = f.eval(FunctionContext::default(), &args).unwrap();
|
||||||
|
assert_eq!(4, vector.len());
|
||||||
|
let expect_times: VectorRef = Arc::new(StringVector::from(results));
|
||||||
|
assert_eq!(expect_times, vector);
|
||||||
|
|
||||||
|
let results = vec![
|
||||||
|
Some("1969-12-31 19:00:00.000001"),
|
||||||
|
None,
|
||||||
|
Some("1970-01-01 03:00:00.000001"),
|
||||||
|
None,
|
||||||
|
];
|
||||||
|
let times: Vec<Option<TimestampMicrosecond>> = vec![
|
||||||
|
Some(TimestampMicrosecond::new(1)),
|
||||||
|
None,
|
||||||
|
Some(TimestampMicrosecond::new(1)),
|
||||||
|
None,
|
||||||
|
];
|
||||||
|
let ts_vector: TimestampMicrosecondVector =
|
||||||
|
TimestampMicrosecondVector::from_owned_iterator(times.into_iter());
|
||||||
|
|
||||||
|
let args: Vec<VectorRef> = vec![
|
||||||
|
Arc::new(ts_vector),
|
||||||
|
Arc::new(StringVector::from(tzs.clone())),
|
||||||
|
];
|
||||||
|
let vector = f.eval(FunctionContext::default(), &args).unwrap();
|
||||||
|
assert_eq!(4, vector.len());
|
||||||
|
let expect_times: VectorRef = Arc::new(StringVector::from(results));
|
||||||
|
assert_eq!(expect_times, vector);
|
||||||
|
|
||||||
|
let results = vec![
|
||||||
|
Some("1969-12-31 19:00:00.000000001"),
|
||||||
|
None,
|
||||||
|
Some("1970-01-01 03:00:00.000000001"),
|
||||||
|
None,
|
||||||
|
];
|
||||||
|
let times: Vec<Option<TimestampNanosecond>> = vec![
|
||||||
|
Some(TimestampNanosecond::new(1)),
|
||||||
|
None,
|
||||||
|
Some(TimestampNanosecond::new(1)),
|
||||||
|
None,
|
||||||
|
];
|
||||||
|
let ts_vector: TimestampNanosecondVector =
|
||||||
|
TimestampNanosecondVector::from_owned_iterator(times.into_iter());
|
||||||
|
|
||||||
|
let args: Vec<VectorRef> = vec![
|
||||||
|
Arc::new(ts_vector),
|
||||||
|
Arc::new(StringVector::from(tzs.clone())),
|
||||||
|
];
|
||||||
|
let vector = f.eval(FunctionContext::default(), &args).unwrap();
|
||||||
|
assert_eq!(4, vector.len());
|
||||||
|
let expect_times: VectorRef = Arc::new(StringVector::from(results));
|
||||||
|
assert_eq!(expect_times, vector);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -32,7 +32,7 @@ macro_rules! ok {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Internal util macro to to create an error.
|
/// Internal util macro to create an error.
|
||||||
macro_rules! error {
|
macro_rules! error {
|
||||||
($span:expr, $msg: expr) => {
|
($span:expr, $msg: expr) => {
|
||||||
Err(syn::Error::new($span, $msg))
|
Err(syn::Error::new($span, $msg))
|
||||||
|
|||||||
@@ -67,6 +67,14 @@ pub enum Error {
|
|||||||
location: Location,
|
location: Location,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
#[snafu(display("Failed to execute {} txn operations via Etcd", max_operations))]
|
||||||
|
EtcdTxnFailed {
|
||||||
|
max_operations: usize,
|
||||||
|
#[snafu(source)]
|
||||||
|
error: etcd_client::Error,
|
||||||
|
location: Location,
|
||||||
|
},
|
||||||
|
|
||||||
#[snafu(display("Failed to get sequence: {}", err_msg))]
|
#[snafu(display("Failed to get sequence: {}", err_msg))]
|
||||||
NextSequence { err_msg: String, location: Location },
|
NextSequence { err_msg: String, location: Location },
|
||||||
|
|
||||||
@@ -400,6 +408,7 @@ impl ErrorExt for Error {
|
|||||||
IllegalServerState { .. }
|
IllegalServerState { .. }
|
||||||
| EtcdTxnOpResponse { .. }
|
| EtcdTxnOpResponse { .. }
|
||||||
| EtcdFailed { .. }
|
| EtcdFailed { .. }
|
||||||
|
| EtcdTxnFailed { .. }
|
||||||
| ConnectEtcd { .. } => StatusCode::Internal,
|
| ConnectEtcd { .. } => StatusCode::Internal,
|
||||||
|
|
||||||
SerdeJson { .. }
|
SerdeJson { .. }
|
||||||
|
|||||||
@@ -464,7 +464,7 @@ impl TableMetadataManager {
|
|||||||
pub fn max_logical_tables_per_batch(&self) -> usize {
|
pub fn max_logical_tables_per_batch(&self) -> usize {
|
||||||
// The batch size is max_txn_size / 3 because the size of the `tables_data`
|
// The batch size is max_txn_size / 3 because the size of the `tables_data`
|
||||||
// is 3 times the size of the `tables_data`.
|
// is 3 times the size of the `tables_data`.
|
||||||
self.kv_backend.max_txn_size() / 3
|
self.kv_backend.max_txn_ops() / 3
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Creates metadata for multiple logical tables and return an error if different metadata exists.
|
/// Creates metadata for multiple logical tables and return an error if different metadata exists.
|
||||||
@@ -860,6 +860,7 @@ mod tests {
|
|||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
use common_time::util::current_time_millis;
|
use common_time::util::current_time_millis;
|
||||||
use futures::TryStreamExt;
|
use futures::TryStreamExt;
|
||||||
|
use store_api::storage::RegionId;
|
||||||
use table::metadata::{RawTableInfo, TableInfo};
|
use table::metadata::{RawTableInfo, TableInfo};
|
||||||
|
|
||||||
use super::datanode_table::DatanodeTableKey;
|
use super::datanode_table::DatanodeTableKey;
|
||||||
@@ -1056,6 +1057,36 @@ mod tests {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_create_many_logical_tables_metadata() {
|
||||||
|
let kv_backend = Arc::new(MemoryKvBackend::default());
|
||||||
|
let table_metadata_manager = TableMetadataManager::new(kv_backend);
|
||||||
|
|
||||||
|
let mut tables_data = vec![];
|
||||||
|
for i in 0..128 {
|
||||||
|
let table_id = i + 1;
|
||||||
|
let regin_number = table_id * 3;
|
||||||
|
let region_id = RegionId::new(table_id, regin_number);
|
||||||
|
let region_route = new_region_route(region_id.as_u64(), 2);
|
||||||
|
let region_routes = vec![region_route.clone()];
|
||||||
|
let table_info: RawTableInfo = test_utils::new_test_table_info_with_name(
|
||||||
|
table_id,
|
||||||
|
&format!("my_table_{}", table_id),
|
||||||
|
region_routes.iter().map(|r| r.region.id.region_number()),
|
||||||
|
)
|
||||||
|
.into();
|
||||||
|
let table_route_value = TableRouteValue::physical(region_routes.clone());
|
||||||
|
|
||||||
|
tables_data.push((table_info, table_route_value));
|
||||||
|
}
|
||||||
|
|
||||||
|
// creates metadata.
|
||||||
|
table_metadata_manager
|
||||||
|
.create_logical_tables_metadata(tables_data)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_delete_table_metadata() {
|
async fn test_delete_table_metadata() {
|
||||||
let mem_kv = Arc::new(MemoryKvBackend::default());
|
let mem_kv = Arc::new(MemoryKvBackend::default());
|
||||||
|
|||||||
@@ -19,8 +19,9 @@ use datatypes::schema::{ColumnSchema, SchemaBuilder};
|
|||||||
use store_api::storage::TableId;
|
use store_api::storage::TableId;
|
||||||
use table::metadata::{TableInfo, TableInfoBuilder, TableMetaBuilder};
|
use table::metadata::{TableInfo, TableInfoBuilder, TableMetaBuilder};
|
||||||
|
|
||||||
pub fn new_test_table_info<I: IntoIterator<Item = u32>>(
|
pub fn new_test_table_info_with_name<I: IntoIterator<Item = u32>>(
|
||||||
table_id: TableId,
|
table_id: TableId,
|
||||||
|
table_name: &str,
|
||||||
region_numbers: I,
|
region_numbers: I,
|
||||||
) -> TableInfo {
|
) -> TableInfo {
|
||||||
let column_schemas = vec![
|
let column_schemas = vec![
|
||||||
@@ -50,8 +51,14 @@ pub fn new_test_table_info<I: IntoIterator<Item = u32>>(
|
|||||||
TableInfoBuilder::default()
|
TableInfoBuilder::default()
|
||||||
.table_id(table_id)
|
.table_id(table_id)
|
||||||
.table_version(5)
|
.table_version(5)
|
||||||
.name("mytable")
|
.name(table_name)
|
||||||
.meta(meta)
|
.meta(meta)
|
||||||
.build()
|
.build()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
}
|
}
|
||||||
|
pub fn new_test_table_info<I: IntoIterator<Item = u32>>(
|
||||||
|
table_id: TableId,
|
||||||
|
region_numbers: I,
|
||||||
|
) -> TableInfo {
|
||||||
|
new_test_table_info_with_name(table_id, "mytable", region_numbers)
|
||||||
|
}
|
||||||
|
|||||||
@@ -45,6 +45,10 @@ impl TxnService for ChrootKvBackend {
|
|||||||
let txn_res = self.inner.txn(txn).await?;
|
let txn_res = self.inner.txn(txn).await?;
|
||||||
Ok(self.chroot_txn_response(txn_res))
|
Ok(self.chroot_txn_response(txn_res))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn max_txn_ops(&self) -> usize {
|
||||||
|
self.inner.max_txn_ops()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
|
|||||||
@@ -33,12 +33,6 @@ use crate::rpc::store::{
|
|||||||
};
|
};
|
||||||
use crate::rpc::KeyValue;
|
use crate::rpc::KeyValue;
|
||||||
|
|
||||||
// Maximum number of operations permitted in a transaction.
|
|
||||||
// The etcd default configuration's `--max-txn-ops` is 128.
|
|
||||||
//
|
|
||||||
// For more detail, see: https://etcd.io/docs/v3.5/op-guide/configuration/
|
|
||||||
const MAX_TXN_SIZE: usize = 128;
|
|
||||||
|
|
||||||
fn convert_key_value(kv: etcd_client::KeyValue) -> KeyValue {
|
fn convert_key_value(kv: etcd_client::KeyValue) -> KeyValue {
|
||||||
let (key, value) = kv.into_key_value();
|
let (key, value) = kv.into_key_value();
|
||||||
KeyValue { key, value }
|
KeyValue { key, value }
|
||||||
@@ -46,10 +40,15 @@ fn convert_key_value(kv: etcd_client::KeyValue) -> KeyValue {
|
|||||||
|
|
||||||
pub struct EtcdStore {
|
pub struct EtcdStore {
|
||||||
client: Client,
|
client: Client,
|
||||||
|
// Maximum number of operations permitted in a transaction.
|
||||||
|
// The etcd default configuration's `--max-txn-ops` is 128.
|
||||||
|
//
|
||||||
|
// For more detail, see: https://etcd.io/docs/v3.5/op-guide/configuration/
|
||||||
|
max_txn_ops: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl EtcdStore {
|
impl EtcdStore {
|
||||||
pub async fn with_endpoints<E, S>(endpoints: S) -> Result<KvBackendRef>
|
pub async fn with_endpoints<E, S>(endpoints: S, max_txn_ops: usize) -> Result<KvBackendRef>
|
||||||
where
|
where
|
||||||
E: AsRef<str>,
|
E: AsRef<str>,
|
||||||
S: AsRef<[E]>,
|
S: AsRef<[E]>,
|
||||||
@@ -58,16 +57,19 @@ impl EtcdStore {
|
|||||||
.await
|
.await
|
||||||
.context(error::ConnectEtcdSnafu)?;
|
.context(error::ConnectEtcdSnafu)?;
|
||||||
|
|
||||||
Ok(Self::with_etcd_client(client))
|
Ok(Self::with_etcd_client(client, max_txn_ops))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn with_etcd_client(client: Client) -> KvBackendRef {
|
pub fn with_etcd_client(client: Client, max_txn_ops: usize) -> KvBackendRef {
|
||||||
Arc::new(Self { client })
|
Arc::new(Self {
|
||||||
|
client,
|
||||||
|
max_txn_ops,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn do_multi_txn(&self, txn_ops: Vec<TxnOp>) -> Result<Vec<TxnResponse>> {
|
async fn do_multi_txn(&self, txn_ops: Vec<TxnOp>) -> Result<Vec<TxnResponse>> {
|
||||||
let max_txn_size = self.max_txn_size();
|
let max_txn_ops = self.max_txn_ops();
|
||||||
if txn_ops.len() < max_txn_size {
|
if txn_ops.len() < max_txn_ops {
|
||||||
// fast path
|
// fast path
|
||||||
let _timer = METRIC_META_TXN_REQUEST
|
let _timer = METRIC_META_TXN_REQUEST
|
||||||
.with_label_values(&["etcd", "txn"])
|
.with_label_values(&["etcd", "txn"])
|
||||||
@@ -83,7 +85,7 @@ impl EtcdStore {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let txns = txn_ops
|
let txns = txn_ops
|
||||||
.chunks(max_txn_size)
|
.chunks(max_txn_ops)
|
||||||
.map(|part| async move {
|
.map(|part| async move {
|
||||||
let _timer = METRIC_META_TXN_REQUEST
|
let _timer = METRIC_META_TXN_REQUEST
|
||||||
.with_label_values(&["etcd", "txn"])
|
.with_label_values(&["etcd", "txn"])
|
||||||
@@ -311,18 +313,20 @@ impl TxnService for EtcdStore {
|
|||||||
.with_label_values(&["etcd", "txn"])
|
.with_label_values(&["etcd", "txn"])
|
||||||
.start_timer();
|
.start_timer();
|
||||||
|
|
||||||
|
let max_operations = txn.max_operations();
|
||||||
|
|
||||||
let etcd_txn: Txn = txn.into();
|
let etcd_txn: Txn = txn.into();
|
||||||
let txn_res = self
|
let txn_res = self
|
||||||
.client
|
.client
|
||||||
.kv_client()
|
.kv_client()
|
||||||
.txn(etcd_txn)
|
.txn(etcd_txn)
|
||||||
.await
|
.await
|
||||||
.context(error::EtcdFailedSnafu)?;
|
.context(error::EtcdTxnFailedSnafu { max_operations })?;
|
||||||
txn_res.try_into()
|
txn_res.try_into()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn max_txn_size(&self) -> usize {
|
fn max_txn_ops(&self) -> usize {
|
||||||
MAX_TXN_SIZE
|
self.max_txn_ops
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -323,6 +323,10 @@ impl<T: ErrorExt + Send + Sync> TxnService for MemoryKvBackend<T> {
|
|||||||
responses,
|
responses,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn max_txn_ops(&self) -> usize {
|
||||||
|
usize::MAX
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T: ErrorExt + Send + Sync + 'static> ResettableKvBackend for MemoryKvBackend<T> {
|
impl<T: ErrorExt + Send + Sync + 'static> ResettableKvBackend for MemoryKvBackend<T> {
|
||||||
|
|||||||
@@ -12,6 +12,8 @@
|
|||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
|
use std::cmp::max;
|
||||||
|
|
||||||
use common_error::ext::ErrorExt;
|
use common_error::ext::ErrorExt;
|
||||||
|
|
||||||
use crate::rpc::store::{DeleteRangeResponse, PutResponse, RangeResponse};
|
use crate::rpc::store::{DeleteRangeResponse, PutResponse, RangeResponse};
|
||||||
@@ -27,8 +29,8 @@ pub trait TxnService: Sync + Send {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Maximum number of operations permitted in a transaction.
|
/// Maximum number of operations permitted in a transaction.
|
||||||
fn max_txn_size(&self) -> usize {
|
fn max_txn_ops(&self) -> usize {
|
||||||
usize::MAX
|
unimplemented!("txn is not implemented")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -192,6 +194,12 @@ impl Txn {
|
|||||||
self.req.failure = operations.into();
|
self.req.failure = operations.into();
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn max_operations(&self) -> usize {
|
||||||
|
let opc = max(self.req.compare.len(), self.req.success.len());
|
||||||
|
max(opc, self.req.failure.len())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<Txn> for TxnRequest {
|
impl From<Txn> for TxnRequest {
|
||||||
|
|||||||
@@ -152,7 +152,7 @@ impl Runner {
|
|||||||
guard.key_guards.push(key_guard);
|
guard.key_guards.push(key_guard);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Execute the procedure. We need to release the lock whenever the the execution
|
// Execute the procedure. We need to release the lock whenever the execution
|
||||||
// is successful or fail.
|
// is successful or fail.
|
||||||
self.execute_procedure_in_loop().await;
|
self.execute_procedure_in_loop().await;
|
||||||
|
|
||||||
|
|||||||
@@ -30,38 +30,87 @@ pub mod prelude;
|
|||||||
mod signature;
|
mod signature;
|
||||||
use sqlparser_derive::{Visit, VisitMut};
|
use sqlparser_derive::{Visit, VisitMut};
|
||||||
|
|
||||||
// sql output
|
/// new Output struct with output data(previously Output) and output meta
|
||||||
pub enum Output {
|
#[derive(Debug)]
|
||||||
|
pub struct Output {
|
||||||
|
pub data: OutputData,
|
||||||
|
pub meta: OutputMeta,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Original Output struct
|
||||||
|
/// carrying result data to response/client/user interface
|
||||||
|
pub enum OutputData {
|
||||||
AffectedRows(usize),
|
AffectedRows(usize),
|
||||||
RecordBatches(RecordBatches),
|
RecordBatches(RecordBatches),
|
||||||
Stream(SendableRecordBatchStream, Option<Arc<dyn PhysicalPlan>>),
|
Stream(SendableRecordBatchStream),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// OutputMeta stores meta information produced/generated during the execution
|
||||||
|
#[derive(Debug, Default)]
|
||||||
|
pub struct OutputMeta {
|
||||||
|
/// May exist for query output. One can retrieve execution metrics from this plan.
|
||||||
|
pub plan: Option<Arc<dyn PhysicalPlan>>,
|
||||||
|
pub cost: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Output {
|
impl Output {
|
||||||
// helper function to build original `Output::Stream`
|
pub fn new_with_affected_rows(affected_rows: usize) -> Self {
|
||||||
pub fn new_stream(stream: SendableRecordBatchStream) -> Self {
|
Self {
|
||||||
Output::Stream(stream, None)
|
data: OutputData::AffectedRows(affected_rows),
|
||||||
|
meta: Default::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn new_with_record_batches(recordbatches: RecordBatches) -> Self {
|
||||||
|
Self {
|
||||||
|
data: OutputData::RecordBatches(recordbatches),
|
||||||
|
meta: Default::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn new_with_stream(stream: SendableRecordBatchStream) -> Self {
|
||||||
|
Self {
|
||||||
|
data: OutputData::Stream(stream),
|
||||||
|
meta: Default::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn new(data: OutputData, meta: OutputMeta) -> Self {
|
||||||
|
Self { data, meta }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Debug for Output {
|
impl Debug for OutputData {
|
||||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||||
match self {
|
match self {
|
||||||
Output::AffectedRows(rows) => write!(f, "Output::AffectedRows({rows})"),
|
OutputData::AffectedRows(rows) => write!(f, "OutputData::AffectedRows({rows})"),
|
||||||
Output::RecordBatches(recordbatches) => {
|
OutputData::RecordBatches(recordbatches) => {
|
||||||
write!(f, "Output::RecordBatches({recordbatches:?})")
|
write!(f, "OutputData::RecordBatches({recordbatches:?})")
|
||||||
}
|
}
|
||||||
Output::Stream(_, df) => {
|
OutputData::Stream(_) => {
|
||||||
if df.is_some() {
|
write!(f, "OutputData::Stream(<stream>)")
|
||||||
write!(f, "Output::Stream(<stream>, Some<physical_plan>)")
|
|
||||||
} else {
|
|
||||||
write!(f, "Output::Stream(<stream>)")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl OutputMeta {
|
||||||
|
pub fn new(plan: Option<Arc<dyn PhysicalPlan>>, cost: usize) -> Self {
|
||||||
|
Self { plan, cost }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn new_with_plan(plan: Arc<dyn PhysicalPlan>) -> Self {
|
||||||
|
Self {
|
||||||
|
plan: Some(plan),
|
||||||
|
cost: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn new_with_cost(cost: usize) -> Self {
|
||||||
|
Self { plan: None, cost }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub use datafusion::physical_plan::ExecutionPlan as DfPhysicalPlan;
|
pub use datafusion::physical_plan::ExecutionPlan as DfPhysicalPlan;
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Visit, VisitMut)]
|
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Visit, VisitMut)]
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ use snafu::ResultExt;
|
|||||||
|
|
||||||
use crate::error::{self, Result};
|
use crate::error::{self, Result};
|
||||||
use crate::{
|
use crate::{
|
||||||
DfRecordBatch, DfSendableRecordBatchStream, RecordBatch, RecordBatchStream,
|
DfRecordBatch, DfSendableRecordBatchStream, OrderOption, RecordBatch, RecordBatchStream,
|
||||||
SendableRecordBatchStream, Stream,
|
SendableRecordBatchStream, Stream,
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -228,6 +228,10 @@ impl RecordBatchStream for RecordBatchStreamAdapter {
|
|||||||
Metrics::Unavailable | Metrics::Unresolved(_) => None,
|
Metrics::Unavailable | Metrics::Unresolved(_) => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn output_ordering(&self) -> Option<&[OrderOption]> {
|
||||||
|
None
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Stream for RecordBatchStreamAdapter {
|
impl Stream for RecordBatchStreamAdapter {
|
||||||
@@ -316,6 +320,14 @@ impl RecordBatchStream for AsyncRecordBatchStreamAdapter {
|
|||||||
fn schema(&self) -> SchemaRef {
|
fn schema(&self) -> SchemaRef {
|
||||||
self.schema.clone()
|
self.schema.clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn output_ordering(&self) -> Option<&[OrderOption]> {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
fn metrics(&self) -> Option<RecordBatchMetrics> {
|
||||||
|
None
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Stream for AsyncRecordBatchStreamAdapter {
|
impl Stream for AsyncRecordBatchStreamAdapter {
|
||||||
@@ -375,6 +387,14 @@ mod test {
|
|||||||
fn schema(&self) -> SchemaRef {
|
fn schema(&self) -> SchemaRef {
|
||||||
unimplemented!()
|
unimplemented!()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn output_ordering(&self) -> Option<&[OrderOption]> {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
fn metrics(&self) -> Option<RecordBatchMetrics> {
|
||||||
|
None
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Stream for MaybeErrorRecordBatchStream {
|
impl Stream for MaybeErrorRecordBatchStream {
|
||||||
|
|||||||
@@ -39,13 +39,9 @@ use snafu::{ensure, ResultExt};
|
|||||||
pub trait RecordBatchStream: Stream<Item = Result<RecordBatch>> {
|
pub trait RecordBatchStream: Stream<Item = Result<RecordBatch>> {
|
||||||
fn schema(&self) -> SchemaRef;
|
fn schema(&self) -> SchemaRef;
|
||||||
|
|
||||||
fn output_ordering(&self) -> Option<&[OrderOption]> {
|
fn output_ordering(&self) -> Option<&[OrderOption]>;
|
||||||
None
|
|
||||||
}
|
|
||||||
|
|
||||||
fn metrics(&self) -> Option<RecordBatchMetrics> {
|
fn metrics(&self) -> Option<RecordBatchMetrics>;
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type SendableRecordBatchStream = Pin<Box<dyn RecordBatchStream + Send>>;
|
pub type SendableRecordBatchStream = Pin<Box<dyn RecordBatchStream + Send>>;
|
||||||
@@ -74,6 +70,14 @@ impl RecordBatchStream for EmptyRecordBatchStream {
|
|||||||
fn schema(&self) -> SchemaRef {
|
fn schema(&self) -> SchemaRef {
|
||||||
self.schema.clone()
|
self.schema.clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn output_ordering(&self) -> Option<&[OrderOption]> {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
fn metrics(&self) -> Option<RecordBatchMetrics> {
|
||||||
|
None
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Stream for EmptyRecordBatchStream {
|
impl Stream for EmptyRecordBatchStream {
|
||||||
@@ -192,6 +196,14 @@ impl RecordBatchStream for SimpleRecordBatchStream {
|
|||||||
fn schema(&self) -> SchemaRef {
|
fn schema(&self) -> SchemaRef {
|
||||||
self.inner.schema()
|
self.inner.schema()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn output_ordering(&self) -> Option<&[OrderOption]> {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
fn metrics(&self) -> Option<RecordBatchMetrics> {
|
||||||
|
None
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Stream for SimpleRecordBatchStream {
|
impl Stream for SimpleRecordBatchStream {
|
||||||
|
|||||||
@@ -41,7 +41,8 @@ mod tests {
|
|||||||
use futures::Stream;
|
use futures::Stream;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::RecordBatchStream;
|
use crate::adapter::RecordBatchMetrics;
|
||||||
|
use crate::{OrderOption, RecordBatchStream};
|
||||||
|
|
||||||
struct MockRecordBatchStream {
|
struct MockRecordBatchStream {
|
||||||
batch: Option<RecordBatch>,
|
batch: Option<RecordBatch>,
|
||||||
@@ -52,6 +53,14 @@ mod tests {
|
|||||||
fn schema(&self) -> SchemaRef {
|
fn schema(&self) -> SchemaRef {
|
||||||
self.schema.clone()
|
self.schema.clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn output_ordering(&self) -> Option<&[OrderOption]> {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
fn metrics(&self) -> Option<RecordBatchMetrics> {
|
||||||
|
None
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Stream for MockRecordBatchStream {
|
impl Stream for MockRecordBatchStream {
|
||||||
|
|||||||
@@ -13,7 +13,7 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
use client::Database;
|
use client::Database;
|
||||||
use common_query::Output;
|
use common_query::OutputData;
|
||||||
use common_recordbatch::util;
|
use common_recordbatch::util;
|
||||||
|
|
||||||
pub enum ExpectedOutput<'a> {
|
pub enum ExpectedOutput<'a> {
|
||||||
@@ -23,22 +23,24 @@ pub enum ExpectedOutput<'a> {
|
|||||||
|
|
||||||
pub async fn execute_and_check_output(db: &Database, sql: &str, expected: ExpectedOutput<'_>) {
|
pub async fn execute_and_check_output(db: &Database, sql: &str, expected: ExpectedOutput<'_>) {
|
||||||
let output = db.sql(sql).await.unwrap();
|
let output = db.sql(sql).await.unwrap();
|
||||||
|
let output = output.data;
|
||||||
|
|
||||||
match (&output, expected) {
|
match (&output, expected) {
|
||||||
(Output::AffectedRows(x), ExpectedOutput::AffectedRows(y)) => {
|
(OutputData::AffectedRows(x), ExpectedOutput::AffectedRows(y)) => {
|
||||||
assert_eq!(*x, y, "actual: \n{}", x)
|
assert_eq!(*x, y, "actual: \n{}", x)
|
||||||
}
|
}
|
||||||
(Output::RecordBatches(_), ExpectedOutput::QueryResult(x))
|
(OutputData::RecordBatches(_), ExpectedOutput::QueryResult(x))
|
||||||
| (Output::Stream(_, _), ExpectedOutput::QueryResult(x)) => {
|
| (OutputData::Stream(_), ExpectedOutput::QueryResult(x)) => {
|
||||||
check_output_stream(output, x).await
|
check_output_stream(output, x).await
|
||||||
}
|
}
|
||||||
_ => panic!(),
|
_ => panic!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn check_output_stream(output: Output, expected: &str) {
|
pub async fn check_output_stream(output: OutputData, expected: &str) {
|
||||||
let recordbatches = match output {
|
let recordbatches = match output {
|
||||||
Output::Stream(stream, _) => util::collect_batches(stream).await.unwrap(),
|
OutputData::Stream(stream) => util::collect_batches(stream).await.unwrap(),
|
||||||
Output::RecordBatches(recordbatches) => recordbatches,
|
OutputData::RecordBatches(recordbatches) => recordbatches,
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
};
|
};
|
||||||
let pretty_print = recordbatches.pretty_print().unwrap();
|
let pretty_print = recordbatches.pretty_print().unwrap();
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ use crate::{error, Interval};
|
|||||||
/// - for [TimeUnit::Second]: [-262144-01-01 00:00:00, +262143-12-31 23:59:59]
|
/// - for [TimeUnit::Second]: [-262144-01-01 00:00:00, +262143-12-31 23:59:59]
|
||||||
/// - for [TimeUnit::Millisecond]: [-262144-01-01 00:00:00.000, +262143-12-31 23:59:59.999]
|
/// - for [TimeUnit::Millisecond]: [-262144-01-01 00:00:00.000, +262143-12-31 23:59:59.999]
|
||||||
/// - for [TimeUnit::Microsecond]: [-262144-01-01 00:00:00.000000, +262143-12-31 23:59:59.999999]
|
/// - for [TimeUnit::Microsecond]: [-262144-01-01 00:00:00.000000, +262143-12-31 23:59:59.999999]
|
||||||
/// - for [TimeUnit::Nanosecond]: [1677-09-21 00:12:43.145225, 2262-04-11 23:47:16.854775807]
|
/// - for [TimeUnit::Nanosecond]: [1677-09-21 00:12:43.145224192, 2262-04-11 23:47:16.854775807]
|
||||||
///
|
///
|
||||||
/// # Note:
|
/// # Note:
|
||||||
/// For values out of range, you can still store these timestamps, but while performing arithmetic
|
/// For values out of range, you can still store these timestamps, but while performing arithmetic
|
||||||
@@ -187,28 +187,28 @@ impl Timestamp {
|
|||||||
Self { unit, value }
|
Self { unit, value }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new_second(value: i64) -> Self {
|
pub const fn new_second(value: i64) -> Self {
|
||||||
Self {
|
Self {
|
||||||
value,
|
value,
|
||||||
unit: TimeUnit::Second,
|
unit: TimeUnit::Second,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new_millisecond(value: i64) -> Self {
|
pub const fn new_millisecond(value: i64) -> Self {
|
||||||
Self {
|
Self {
|
||||||
value,
|
value,
|
||||||
unit: TimeUnit::Millisecond,
|
unit: TimeUnit::Millisecond,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new_microsecond(value: i64) -> Self {
|
pub const fn new_microsecond(value: i64) -> Self {
|
||||||
Self {
|
Self {
|
||||||
value,
|
value,
|
||||||
unit: TimeUnit::Microsecond,
|
unit: TimeUnit::Microsecond,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new_nanosecond(value: i64) -> Self {
|
pub const fn new_nanosecond(value: i64) -> Self {
|
||||||
Self {
|
Self {
|
||||||
value,
|
value,
|
||||||
unit: TimeUnit::Nanosecond,
|
unit: TimeUnit::Nanosecond,
|
||||||
@@ -281,8 +281,26 @@ impl Timestamp {
|
|||||||
.and_then(|v| v.checked_add(micros as i64))
|
.and_then(|v| v.checked_add(micros as i64))
|
||||||
.map(Timestamp::new_microsecond)
|
.map(Timestamp::new_microsecond)
|
||||||
} else {
|
} else {
|
||||||
|
// Refer to <https://github.com/chronotope/chrono/issues/1289>
|
||||||
|
//
|
||||||
|
// subsec nanos are always non-negative, however the timestamp itself (both in seconds and in nanos) can be
|
||||||
|
// negative. Now i64::MIN is NOT dividable by 1_000_000_000, so
|
||||||
|
//
|
||||||
|
// (sec * 1_000_000_000) + nsec
|
||||||
|
//
|
||||||
|
// may underflow (even when in theory we COULD represent the datetime as i64) because we add the non-negative
|
||||||
|
// nanos AFTER the multiplication. This is fixed by converting the negative case to
|
||||||
|
//
|
||||||
|
// ((sec + 1) * 1_000_000_000) + (nsec - 1_000_000_000)
|
||||||
|
let mut sec = sec;
|
||||||
|
let mut nsec = nsec as i64;
|
||||||
|
if sec < 0 && nsec > 0 {
|
||||||
|
nsec -= 1_000_000_000;
|
||||||
|
sec += 1;
|
||||||
|
}
|
||||||
|
|
||||||
sec.checked_mul(1_000_000_000)
|
sec.checked_mul(1_000_000_000)
|
||||||
.and_then(|v| v.checked_add(nsec as i64))
|
.and_then(|v| v.checked_add(nsec))
|
||||||
.map(Timestamp::new_nanosecond)
|
.map(Timestamp::new_nanosecond)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -425,6 +443,20 @@ impl Timestamp {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Timestamp {
|
||||||
|
pub const MIN_SECOND: Self = Self::new_second(-8_334_601_228_800);
|
||||||
|
pub const MAX_SECOND: Self = Self::new_second(8_210_266_876_799);
|
||||||
|
|
||||||
|
pub const MIN_MILLISECOND: Self = Self::new_millisecond(-8_334_601_228_800_000);
|
||||||
|
pub const MAX_MILLISECOND: Self = Self::new_millisecond(8_210_266_876_799_999);
|
||||||
|
|
||||||
|
pub const MIN_MICROSECOND: Self = Self::new_microsecond(-8_334_601_228_800_000_000);
|
||||||
|
pub const MAX_MICROSECOND: Self = Self::new_microsecond(8_210_266_876_799_999_999);
|
||||||
|
|
||||||
|
pub const MIN_NANOSECOND: Self = Self::new_nanosecond(i64::MIN);
|
||||||
|
pub const MAX_NANOSECOND: Self = Self::new_nanosecond(i64::MAX);
|
||||||
|
}
|
||||||
|
|
||||||
/// Converts the naive datetime (which has no specific timezone) to a
|
/// Converts the naive datetime (which has no specific timezone) to a
|
||||||
/// nanosecond epoch timestamp in UTC.
|
/// nanosecond epoch timestamp in UTC.
|
||||||
fn naive_datetime_to_timestamp(
|
fn naive_datetime_to_timestamp(
|
||||||
@@ -586,6 +618,7 @@ impl Hash for Timestamp {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use std::collections::hash_map::DefaultHasher;
|
use std::collections::hash_map::DefaultHasher;
|
||||||
|
|
||||||
|
use chrono_tz::Tz;
|
||||||
use rand::Rng;
|
use rand::Rng;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
|
||||||
@@ -1297,7 +1330,7 @@ mod tests {
|
|||||||
"+262142-12-31 23:59:59Z",
|
"+262142-12-31 23:59:59Z",
|
||||||
"+262142-12-31 23:59:59.999Z",
|
"+262142-12-31 23:59:59.999Z",
|
||||||
"+262142-12-31 23:59:59.999999Z",
|
"+262142-12-31 23:59:59.999999Z",
|
||||||
"1677-09-21 00:12:43.145225Z",
|
"1677-09-21 00:12:43.145224192Z",
|
||||||
"2262-04-11 23:47:16.854775807Z",
|
"2262-04-11 23:47:16.854775807Z",
|
||||||
"+100000-01-01 00:00:01.5Z",
|
"+100000-01-01 00:00:01.5Z",
|
||||||
];
|
];
|
||||||
@@ -1306,4 +1339,47 @@ mod tests {
|
|||||||
Timestamp::from_str_utc(s).unwrap();
|
Timestamp::from_str_utc(s).unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_min_nanos_roundtrip() {
|
||||||
|
let (sec, nsec) = Timestamp::MIN_NANOSECOND.split();
|
||||||
|
let ts = Timestamp::from_splits(sec, nsec).unwrap();
|
||||||
|
assert_eq!(Timestamp::MIN_NANOSECOND, ts);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_timestamp_bound_format() {
|
||||||
|
assert_eq!(
|
||||||
|
"1677-09-21 00:12:43.145224192",
|
||||||
|
Timestamp::MIN_NANOSECOND.to_timezone_aware_string(Some(&Timezone::Named(Tz::UTC)))
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
"2262-04-11 23:47:16.854775807",
|
||||||
|
Timestamp::MAX_NANOSECOND.to_timezone_aware_string(Some(&Timezone::Named(Tz::UTC)))
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
"-262143-01-01 00:00:00",
|
||||||
|
Timestamp::MIN_MICROSECOND.to_timezone_aware_string(Some(&Timezone::Named(Tz::UTC)))
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
"+262142-12-31 23:59:59.999999",
|
||||||
|
Timestamp::MAX_MICROSECOND.to_timezone_aware_string(Some(&Timezone::Named(Tz::UTC)))
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
"-262143-01-01 00:00:00",
|
||||||
|
Timestamp::MIN_MILLISECOND.to_timezone_aware_string(Some(&Timezone::Named(Tz::UTC)))
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
"+262142-12-31 23:59:59.999",
|
||||||
|
Timestamp::MAX_MILLISECOND.to_timezone_aware_string(Some(&Timezone::Named(Tz::UTC)))
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
"-262143-01-01 00:00:00",
|
||||||
|
Timestamp::MIN_SECOND.to_timezone_aware_string(Some(&Timezone::Named(Tz::UTC)))
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
"+262142-12-31 23:59:59",
|
||||||
|
Timestamp::MAX_SECOND.to_timezone_aware_string(Some(&Timezone::Named(Tz::UTC)))
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ use common_error::ext::BoxedError;
|
|||||||
use common_error::status_code::StatusCode;
|
use common_error::status_code::StatusCode;
|
||||||
use common_query::logical_plan::Expr;
|
use common_query::logical_plan::Expr;
|
||||||
use common_query::physical_plan::DfPhysicalPlanAdapter;
|
use common_query::physical_plan::DfPhysicalPlanAdapter;
|
||||||
use common_query::{DfPhysicalPlan, Output};
|
use common_query::{DfPhysicalPlan, OutputData};
|
||||||
use common_recordbatch::SendableRecordBatchStream;
|
use common_recordbatch::SendableRecordBatchStream;
|
||||||
use common_runtime::Runtime;
|
use common_runtime::Runtime;
|
||||||
use common_telemetry::tracing::{self, info_span};
|
use common_telemetry::tracing::{self, info_span};
|
||||||
@@ -651,11 +651,11 @@ impl RegionServerInner {
|
|||||||
.await
|
.await
|
||||||
.context(ExecuteLogicalPlanSnafu)?;
|
.context(ExecuteLogicalPlanSnafu)?;
|
||||||
|
|
||||||
match result {
|
match result.data {
|
||||||
Output::AffectedRows(_) | Output::RecordBatches(_) => {
|
OutputData::AffectedRows(_) | OutputData::RecordBatches(_) => {
|
||||||
UnsupportedOutputSnafu { expected: "stream" }.fail()
|
UnsupportedOutputSnafu { expected: "stream" }.fail()
|
||||||
}
|
}
|
||||||
Output::Stream(stream, _) => Ok(stream),
|
OutputData::Stream(stream) => Ok(stream),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -370,6 +370,36 @@ impl Value {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub trait TryAsPrimitive<T: LogicalPrimitiveType> {
|
||||||
|
fn try_as_primitive(&self) -> Option<T::Native>;
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! impl_try_as_primitive {
|
||||||
|
($Type: ident, $Variant: ident) => {
|
||||||
|
impl TryAsPrimitive<crate::types::$Type> for Value {
|
||||||
|
fn try_as_primitive(
|
||||||
|
&self,
|
||||||
|
) -> Option<<crate::types::$Type as crate::types::LogicalPrimitiveType>::Native> {
|
||||||
|
match self {
|
||||||
|
Value::$Variant(v) => Some((*v).into()),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
impl_try_as_primitive!(Int8Type, Int8);
|
||||||
|
impl_try_as_primitive!(Int16Type, Int16);
|
||||||
|
impl_try_as_primitive!(Int32Type, Int32);
|
||||||
|
impl_try_as_primitive!(Int64Type, Int64);
|
||||||
|
impl_try_as_primitive!(UInt8Type, UInt8);
|
||||||
|
impl_try_as_primitive!(UInt16Type, UInt16);
|
||||||
|
impl_try_as_primitive!(UInt32Type, UInt32);
|
||||||
|
impl_try_as_primitive!(UInt64Type, UInt64);
|
||||||
|
impl_try_as_primitive!(Float32Type, Float32);
|
||||||
|
impl_try_as_primitive!(Float64Type, Float64);
|
||||||
|
|
||||||
pub fn to_null_scalar_value(output_type: &ConcreteDataType) -> Result<ScalarValue> {
|
pub fn to_null_scalar_value(output_type: &ConcreteDataType) -> Result<ScalarValue> {
|
||||||
Ok(match output_type {
|
Ok(match output_type {
|
||||||
ConcreteDataType::Null(_) => ScalarValue::Null,
|
ConcreteDataType::Null(_) => ScalarValue::Null,
|
||||||
@@ -2387,4 +2417,12 @@ mod tests {
|
|||||||
);
|
);
|
||||||
check_value_ref_size_eq(&ValueRef::Decimal128(Decimal128::new(1234, 3, 1)), 32)
|
check_value_ref_size_eq(&ValueRef::Decimal128(Decimal128::new(1234, 3, 1)), 32)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_incorrect_default_value_issue_3479() {
|
||||||
|
let value = OrderedF64::from(0.047318541668048164);
|
||||||
|
let serialized = serde_json::to_string(&value).unwrap();
|
||||||
|
let deserialized: OrderedF64 = serde_json::from_str(&serialized).unwrap();
|
||||||
|
assert_eq!(value, deserialized);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -22,8 +22,9 @@ use std::task::{Context, Poll};
|
|||||||
use common_datasource::object_store::build_backend;
|
use common_datasource::object_store::build_backend;
|
||||||
use common_error::ext::BoxedError;
|
use common_error::ext::BoxedError;
|
||||||
use common_query::prelude::Expr;
|
use common_query::prelude::Expr;
|
||||||
|
use common_recordbatch::adapter::RecordBatchMetrics;
|
||||||
use common_recordbatch::error::{CastVectorSnafu, ExternalSnafu, Result as RecordBatchResult};
|
use common_recordbatch::error::{CastVectorSnafu, ExternalSnafu, Result as RecordBatchResult};
|
||||||
use common_recordbatch::{RecordBatch, RecordBatchStream, SendableRecordBatchStream};
|
use common_recordbatch::{OrderOption, RecordBatch, RecordBatchStream, SendableRecordBatchStream};
|
||||||
use datafusion::logical_expr::utils as df_logical_expr_utils;
|
use datafusion::logical_expr::utils as df_logical_expr_utils;
|
||||||
use datatypes::prelude::ConcreteDataType;
|
use datatypes::prelude::ConcreteDataType;
|
||||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||||
@@ -151,6 +152,14 @@ impl RecordBatchStream for FileToScanRegionStream {
|
|||||||
fn schema(&self) -> SchemaRef {
|
fn schema(&self) -> SchemaRef {
|
||||||
self.scan_schema.clone()
|
self.scan_schema.clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn output_ordering(&self) -> Option<&[OrderOption]> {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
fn metrics(&self) -> Option<RecordBatchMetrics> {
|
||||||
|
None
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Stream for FileToScanRegionStream {
|
impl Stream for FileToScanRegionStream {
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ common-query.workspace = true
|
|||||||
common-telemetry.workspace = true
|
common-telemetry.workspace = true
|
||||||
common-time.workspace = true
|
common-time.workspace = true
|
||||||
datatypes.workspace = true
|
datatypes.workspace = true
|
||||||
|
enum_dispatch = "0.3"
|
||||||
hydroflow = "0.5.0"
|
hydroflow = "0.5.0"
|
||||||
itertools.workspace = true
|
itertools.workspace = true
|
||||||
num-traits = "0.2"
|
num-traits = "0.2"
|
||||||
@@ -27,3 +28,6 @@ session.workspace = true
|
|||||||
snafu.workspace = true
|
snafu.workspace = true
|
||||||
tokio.workspace = true
|
tokio.workspace = true
|
||||||
tonic.workspace = true
|
tonic.workspace = true
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
serde_json = "1.0"
|
||||||
|
|||||||
@@ -24,5 +24,6 @@ mod scalar;
|
|||||||
pub(crate) use error::{EvalError, InvalidArgumentSnafu, OptimizeSnafu};
|
pub(crate) use error::{EvalError, InvalidArgumentSnafu, OptimizeSnafu};
|
||||||
pub(crate) use func::{BinaryFunc, UnaryFunc, UnmaterializableFunc, VariadicFunc};
|
pub(crate) use func::{BinaryFunc, UnaryFunc, UnmaterializableFunc, VariadicFunc};
|
||||||
pub(crate) use id::{GlobalId, Id, LocalId};
|
pub(crate) use id::{GlobalId, Id, LocalId};
|
||||||
|
pub(crate) use linear::{MapFilterProject, MfpPlan, SafeMfpPlan};
|
||||||
pub(crate) use relation::{AggregateExpr, AggregateFunc};
|
pub(crate) use relation::{AggregateExpr, AggregateFunc};
|
||||||
pub(crate) use scalar::ScalarExpr;
|
pub(crate) use scalar::ScalarExpr;
|
||||||
|
|||||||
@@ -61,4 +61,7 @@ pub enum EvalError {
|
|||||||
|
|
||||||
#[snafu(display("Unsupported temporal filter: {reason}"))]
|
#[snafu(display("Unsupported temporal filter: {reason}"))]
|
||||||
UnsupportedTemporalFilter { reason: String, location: Location },
|
UnsupportedTemporalFilter { reason: String, location: Location },
|
||||||
|
|
||||||
|
#[snafu(display("Overflowed during evaluation"))]
|
||||||
|
Overflow { location: Location },
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ use crate::repr::{self, value_to_internal_ts, Diff, Row};
|
|||||||
/// expressions in `self.expressions`, even though this is not something
|
/// expressions in `self.expressions`, even though this is not something
|
||||||
/// we can directly evaluate. The plan creation methods will defensively
|
/// we can directly evaluate. The plan creation methods will defensively
|
||||||
/// ensure that the right thing happens.
|
/// ensure that the right thing happens.
|
||||||
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
|
#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize)]
|
||||||
pub struct MapFilterProject {
|
pub struct MapFilterProject {
|
||||||
/// A sequence of expressions that should be appended to the row.
|
/// A sequence of expressions that should be appended to the row.
|
||||||
///
|
///
|
||||||
@@ -415,7 +415,7 @@ impl MapFilterProject {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// A wrapper type which indicates it is safe to simply evaluate all expressions.
|
/// A wrapper type which indicates it is safe to simply evaluate all expressions.
|
||||||
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
|
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq, Ord, PartialOrd)]
|
||||||
pub struct SafeMfpPlan {
|
pub struct SafeMfpPlan {
|
||||||
pub(crate) mfp: MapFilterProject,
|
pub(crate) mfp: MapFilterProject,
|
||||||
}
|
}
|
||||||
@@ -800,7 +800,7 @@ mod test {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
// only retain sum result
|
// only retain sum result
|
||||||
let mfp = mfp.project(vec![4]).unwrap();
|
let mfp = mfp.project(vec![4]).unwrap();
|
||||||
// accept only if if the sum is greater than 10
|
// accept only if the sum is greater than 10
|
||||||
let mfp = mfp
|
let mfp = mfp
|
||||||
.filter(vec![ScalarExpr::Column(0).call_binary(
|
.filter(vec![ScalarExpr::Column(0).call_binary(
|
||||||
ScalarExpr::Literal(Value::from(10i32), ConcreteDataType::int32_datatype()),
|
ScalarExpr::Literal(Value::from(10i32), ConcreteDataType::int32_datatype()),
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ mod accum;
|
|||||||
mod func;
|
mod func;
|
||||||
|
|
||||||
/// Describes an aggregation expression.
|
/// Describes an aggregation expression.
|
||||||
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
|
#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize)]
|
||||||
pub struct AggregateExpr {
|
pub struct AggregateExpr {
|
||||||
/// Names the aggregation function.
|
/// Names the aggregation function.
|
||||||
pub func: AggregateFunc,
|
pub func: AggregateFunc,
|
||||||
|
|||||||
@@ -14,7 +14,10 @@
|
|||||||
|
|
||||||
//! Accumulators for aggregate functions that's is accumulatable. i.e. sum/count
|
//! Accumulators for aggregate functions that's is accumulatable. i.e. sum/count
|
||||||
//!
|
//!
|
||||||
//! Currently support sum, count, any, all
|
//! Accumulator will only be restore from row and being updated every time dataflow need process a new batch of rows.
|
||||||
|
//! So the overhead is acceptable.
|
||||||
|
//!
|
||||||
|
//! Currently support sum, count, any, all and min/max(with one caveat that min/max can't support delete with aggregate).
|
||||||
|
|
||||||
use std::fmt::Display;
|
use std::fmt::Display;
|
||||||
|
|
||||||
@@ -22,13 +25,506 @@ use common_decimal::Decimal128;
|
|||||||
use common_time::{Date, DateTime};
|
use common_time::{Date, DateTime};
|
||||||
use datatypes::data_type::ConcreteDataType;
|
use datatypes::data_type::ConcreteDataType;
|
||||||
use datatypes::value::{OrderedF32, OrderedF64, OrderedFloat, Value};
|
use datatypes::value::{OrderedF32, OrderedF64, OrderedFloat, Value};
|
||||||
|
use enum_dispatch::enum_dispatch;
|
||||||
use hydroflow::futures::stream::Concat;
|
use hydroflow::futures::stream::Concat;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
use snafu::ensure;
|
||||||
|
|
||||||
use crate::expr::error::{InternalSnafu, TryFromValueSnafu, TypeMismatchSnafu};
|
use crate::expr::error::{InternalSnafu, OverflowSnafu, TryFromValueSnafu, TypeMismatchSnafu};
|
||||||
|
use crate::expr::relation::func::GenericFn;
|
||||||
use crate::expr::{AggregateFunc, EvalError};
|
use crate::expr::{AggregateFunc, EvalError};
|
||||||
use crate::repr::Diff;
|
use crate::repr::Diff;
|
||||||
|
|
||||||
|
/// Accumulates values for the various types of accumulable aggregations.
|
||||||
|
#[enum_dispatch]
|
||||||
|
pub trait Accumulator: Sized {
|
||||||
|
fn into_state(self) -> Vec<Value>;
|
||||||
|
fn update(
|
||||||
|
&mut self,
|
||||||
|
aggr_fn: &AggregateFunc,
|
||||||
|
value: Value,
|
||||||
|
diff: Diff,
|
||||||
|
) -> Result<(), EvalError>;
|
||||||
|
|
||||||
|
fn update_batch<I>(&mut self, aggr_fn: &AggregateFunc, value_diffs: I) -> Result<(), EvalError>
|
||||||
|
where
|
||||||
|
I: IntoIterator<Item = (Value, Diff)>,
|
||||||
|
{
|
||||||
|
for (v, d) in value_diffs {
|
||||||
|
self.update(aggr_fn, v, d)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn eval(&self, aggr_fn: &AggregateFunc) -> Result<Value, EvalError>;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Bool accumulator, used for `Any` `All` `Max/MinBool`
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
|
||||||
|
pub struct Bool {
|
||||||
|
/// The number of `true` values observed.
|
||||||
|
trues: Diff,
|
||||||
|
/// The number of `false` values observed.
|
||||||
|
falses: Diff,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<Vec<Value>> for Bool {
|
||||||
|
type Error = EvalError;
|
||||||
|
|
||||||
|
fn try_from(state: Vec<Value>) -> Result<Self, Self::Error> {
|
||||||
|
ensure!(
|
||||||
|
state.len() == 2,
|
||||||
|
InternalSnafu {
|
||||||
|
reason: "Bool Accumulator state should have 2 values",
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut iter = state.into_iter();
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
trues: Diff::try_from(iter.next().unwrap()).map_err(err_try_from_val)?,
|
||||||
|
falses: Diff::try_from(iter.next().unwrap()).map_err(err_try_from_val)?,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Accumulator for Bool {
|
||||||
|
fn into_state(self) -> Vec<Value> {
|
||||||
|
vec![self.trues.into(), self.falses.into()]
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Null values are ignored
|
||||||
|
fn update(
|
||||||
|
&mut self,
|
||||||
|
aggr_fn: &AggregateFunc,
|
||||||
|
value: Value,
|
||||||
|
diff: Diff,
|
||||||
|
) -> Result<(), EvalError> {
|
||||||
|
ensure!(
|
||||||
|
matches!(
|
||||||
|
aggr_fn,
|
||||||
|
AggregateFunc::Any
|
||||||
|
| AggregateFunc::All
|
||||||
|
| AggregateFunc::MaxBool
|
||||||
|
| AggregateFunc::MinBool
|
||||||
|
),
|
||||||
|
InternalSnafu {
|
||||||
|
reason: format!(
|
||||||
|
"Bool Accumulator does not support this aggregation function: {:?}",
|
||||||
|
aggr_fn
|
||||||
|
),
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
match value {
|
||||||
|
Value::Boolean(true) => self.trues += diff,
|
||||||
|
Value::Boolean(false) => self.falses += diff,
|
||||||
|
Value::Null => (), // ignore nulls
|
||||||
|
x => {
|
||||||
|
return Err(TypeMismatchSnafu {
|
||||||
|
expected: ConcreteDataType::boolean_datatype(),
|
||||||
|
actual: x.data_type(),
|
||||||
|
}
|
||||||
|
.build());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn eval(&self, aggr_fn: &AggregateFunc) -> Result<Value, EvalError> {
|
||||||
|
match aggr_fn {
|
||||||
|
AggregateFunc::Any => Ok(Value::from(self.trues > 0)),
|
||||||
|
AggregateFunc::All => Ok(Value::from(self.falses == 0)),
|
||||||
|
AggregateFunc::MaxBool => Ok(Value::from(self.trues > 0)),
|
||||||
|
AggregateFunc::MinBool => Ok(Value::from(self.falses == 0)),
|
||||||
|
_ => Err(InternalSnafu {
|
||||||
|
reason: format!(
|
||||||
|
"Bool Accumulator does not support this aggregation function: {:?}",
|
||||||
|
aggr_fn
|
||||||
|
),
|
||||||
|
}
|
||||||
|
.build()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Accumulates simple numeric values for sum over integer.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
|
||||||
|
pub struct SimpleNumber {
|
||||||
|
/// The accumulation of all non-NULL values observed.
|
||||||
|
accum: i128,
|
||||||
|
/// The number of non-NULL values observed.
|
||||||
|
non_nulls: Diff,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<Vec<Value>> for SimpleNumber {
|
||||||
|
type Error = EvalError;
|
||||||
|
|
||||||
|
fn try_from(state: Vec<Value>) -> Result<Self, Self::Error> {
|
||||||
|
ensure!(
|
||||||
|
state.len() == 2,
|
||||||
|
InternalSnafu {
|
||||||
|
reason: "Number Accumulator state should have 2 values",
|
||||||
|
}
|
||||||
|
);
|
||||||
|
let mut iter = state.into_iter();
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
accum: Decimal128::try_from(iter.next().unwrap())
|
||||||
|
.map_err(err_try_from_val)?
|
||||||
|
.val(),
|
||||||
|
non_nulls: Diff::try_from(iter.next().unwrap()).map_err(err_try_from_val)?,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Accumulator for SimpleNumber {
|
||||||
|
fn into_state(self) -> Vec<Value> {
|
||||||
|
vec![
|
||||||
|
Value::Decimal128(Decimal128::new(self.accum, 38, 0)),
|
||||||
|
self.non_nulls.into(),
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
fn update(
|
||||||
|
&mut self,
|
||||||
|
aggr_fn: &AggregateFunc,
|
||||||
|
value: Value,
|
||||||
|
diff: Diff,
|
||||||
|
) -> Result<(), EvalError> {
|
||||||
|
ensure!(
|
||||||
|
matches!(
|
||||||
|
aggr_fn,
|
||||||
|
AggregateFunc::SumInt16
|
||||||
|
| AggregateFunc::SumInt32
|
||||||
|
| AggregateFunc::SumInt64
|
||||||
|
| AggregateFunc::SumUInt16
|
||||||
|
| AggregateFunc::SumUInt32
|
||||||
|
| AggregateFunc::SumUInt64
|
||||||
|
),
|
||||||
|
InternalSnafu {
|
||||||
|
reason: format!(
|
||||||
|
"SimpleNumber Accumulator does not support this aggregation function: {:?}",
|
||||||
|
aggr_fn
|
||||||
|
),
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
let v = match (aggr_fn, value) {
|
||||||
|
(AggregateFunc::SumInt16, Value::Int16(x)) => i128::from(x),
|
||||||
|
(AggregateFunc::SumInt32, Value::Int32(x)) => i128::from(x),
|
||||||
|
(AggregateFunc::SumInt64, Value::Int64(x)) => i128::from(x),
|
||||||
|
(AggregateFunc::SumUInt16, Value::UInt16(x)) => i128::from(x),
|
||||||
|
(AggregateFunc::SumUInt32, Value::UInt32(x)) => i128::from(x),
|
||||||
|
(AggregateFunc::SumUInt64, Value::UInt64(x)) => i128::from(x),
|
||||||
|
(_f, Value::Null) => return Ok(()), // ignore null
|
||||||
|
(f, v) => {
|
||||||
|
let expected_datatype = f.signature().input;
|
||||||
|
return Err(TypeMismatchSnafu {
|
||||||
|
expected: expected_datatype,
|
||||||
|
actual: v.data_type(),
|
||||||
|
}
|
||||||
|
.build())?;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
self.accum += v * i128::from(diff);
|
||||||
|
|
||||||
|
self.non_nulls += diff;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn eval(&self, aggr_fn: &AggregateFunc) -> Result<Value, EvalError> {
|
||||||
|
match aggr_fn {
|
||||||
|
AggregateFunc::SumInt16 | AggregateFunc::SumInt32 | AggregateFunc::SumInt64 => {
|
||||||
|
i64::try_from(self.accum)
|
||||||
|
.map_err(|_e| OverflowSnafu {}.build())
|
||||||
|
.map(Value::from)
|
||||||
|
}
|
||||||
|
AggregateFunc::SumUInt16 | AggregateFunc::SumUInt32 | AggregateFunc::SumUInt64 => {
|
||||||
|
u64::try_from(self.accum)
|
||||||
|
.map_err(|_e| OverflowSnafu {}.build())
|
||||||
|
.map(Value::from)
|
||||||
|
}
|
||||||
|
_ => Err(InternalSnafu {
|
||||||
|
reason: format!(
|
||||||
|
"SimpleNumber Accumulator does not support this aggregation function: {:?}",
|
||||||
|
aggr_fn
|
||||||
|
),
|
||||||
|
}
|
||||||
|
.build()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/// Accumulates float values for sum over floating numbers.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
|
||||||
|
|
||||||
|
pub struct Float {
|
||||||
|
/// Accumulates non-special float values, i.e. not NaN, +inf, -inf.
|
||||||
|
/// accum will be set to zero if `non_nulls` is zero.
|
||||||
|
accum: OrderedF64,
|
||||||
|
/// Counts +inf
|
||||||
|
pos_infs: Diff,
|
||||||
|
/// Counts -inf
|
||||||
|
neg_infs: Diff,
|
||||||
|
/// Counts NaNs
|
||||||
|
nans: Diff,
|
||||||
|
/// Counts non-NULL values
|
||||||
|
non_nulls: Diff,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<Vec<Value>> for Float {
|
||||||
|
type Error = EvalError;
|
||||||
|
|
||||||
|
fn try_from(state: Vec<Value>) -> Result<Self, Self::Error> {
|
||||||
|
ensure!(
|
||||||
|
state.len() == 5,
|
||||||
|
InternalSnafu {
|
||||||
|
reason: "Float Accumulator state should have 5 values",
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut iter = state.into_iter();
|
||||||
|
|
||||||
|
let mut ret = Self {
|
||||||
|
accum: OrderedF64::try_from(iter.next().unwrap()).map_err(err_try_from_val)?,
|
||||||
|
pos_infs: Diff::try_from(iter.next().unwrap()).map_err(err_try_from_val)?,
|
||||||
|
neg_infs: Diff::try_from(iter.next().unwrap()).map_err(err_try_from_val)?,
|
||||||
|
nans: Diff::try_from(iter.next().unwrap()).map_err(err_try_from_val)?,
|
||||||
|
non_nulls: Diff::try_from(iter.next().unwrap()).map_err(err_try_from_val)?,
|
||||||
|
};
|
||||||
|
|
||||||
|
// This prevent counter-intuitive behavior of summing over no values
|
||||||
|
if ret.non_nulls == 0 {
|
||||||
|
ret.accum = OrderedFloat::from(0.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(ret)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Accumulator for Float {
|
||||||
|
fn into_state(self) -> Vec<Value> {
|
||||||
|
vec![
|
||||||
|
self.accum.into(),
|
||||||
|
self.pos_infs.into(),
|
||||||
|
self.neg_infs.into(),
|
||||||
|
self.nans.into(),
|
||||||
|
self.non_nulls.into(),
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
/// sum ignore null
|
||||||
|
fn update(
|
||||||
|
&mut self,
|
||||||
|
aggr_fn: &AggregateFunc,
|
||||||
|
value: Value,
|
||||||
|
diff: Diff,
|
||||||
|
) -> Result<(), EvalError> {
|
||||||
|
ensure!(
|
||||||
|
matches!(
|
||||||
|
aggr_fn,
|
||||||
|
AggregateFunc::SumFloat32 | AggregateFunc::SumFloat64
|
||||||
|
),
|
||||||
|
InternalSnafu {
|
||||||
|
reason: format!(
|
||||||
|
"Float Accumulator does not support this aggregation function: {:?}",
|
||||||
|
aggr_fn
|
||||||
|
),
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
let x = match (aggr_fn, value) {
|
||||||
|
(AggregateFunc::SumFloat32, Value::Float32(x)) => OrderedF64::from(*x as f64),
|
||||||
|
(AggregateFunc::SumFloat64, Value::Float64(x)) => OrderedF64::from(x),
|
||||||
|
(_f, Value::Null) => return Ok(()), // ignore null
|
||||||
|
(f, v) => {
|
||||||
|
let expected_datatype = f.signature().input;
|
||||||
|
return Err(TypeMismatchSnafu {
|
||||||
|
expected: expected_datatype,
|
||||||
|
actual: v.data_type(),
|
||||||
|
}
|
||||||
|
.build())?;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if x.is_nan() {
|
||||||
|
self.nans += diff;
|
||||||
|
} else if x.is_infinite() {
|
||||||
|
if x.is_sign_positive() {
|
||||||
|
self.pos_infs += diff;
|
||||||
|
} else {
|
||||||
|
self.neg_infs += diff;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
self.accum += *(x * OrderedF64::from(diff as f64));
|
||||||
|
}
|
||||||
|
|
||||||
|
self.non_nulls += diff;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn eval(&self, aggr_fn: &AggregateFunc) -> Result<Value, EvalError> {
|
||||||
|
match aggr_fn {
|
||||||
|
AggregateFunc::SumFloat32 => Ok(Value::Float32(OrderedF32::from(self.accum.0 as f32))),
|
||||||
|
AggregateFunc::SumFloat64 => Ok(Value::Float64(self.accum)),
|
||||||
|
_ => Err(InternalSnafu {
|
||||||
|
reason: format!(
|
||||||
|
"Float Accumulator does not support this aggregation function: {:?}",
|
||||||
|
aggr_fn
|
||||||
|
),
|
||||||
|
}
|
||||||
|
.build()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Accumulates a single `Ord`ed `Value`, useful for min/max aggregations.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
|
||||||
|
pub struct OrdValue {
|
||||||
|
val: Option<Value>,
|
||||||
|
non_nulls: Diff,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<Vec<Value>> for OrdValue {
|
||||||
|
type Error = EvalError;
|
||||||
|
|
||||||
|
fn try_from(state: Vec<Value>) -> Result<Self, Self::Error> {
|
||||||
|
ensure!(
|
||||||
|
state.len() == 2,
|
||||||
|
InternalSnafu {
|
||||||
|
reason: "OrdValue Accumulator state should have 2 values",
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut iter = state.into_iter();
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
val: {
|
||||||
|
let v = iter.next().unwrap();
|
||||||
|
if v == Value::Null {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(v)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
non_nulls: Diff::try_from(iter.next().unwrap()).map_err(err_try_from_val)?,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Accumulator for OrdValue {
|
||||||
|
fn into_state(self) -> Vec<Value> {
|
||||||
|
vec![self.val.unwrap_or(Value::Null), self.non_nulls.into()]
|
||||||
|
}
|
||||||
|
|
||||||
|
/// min/max try to find results in all non-null values, if all values are null, the result is null.
|
||||||
|
/// count(col_name) gives the number of non-null values, count(*) gives the number of rows including nulls.
|
||||||
|
/// TODO(discord9): add count(*) as a aggr function
|
||||||
|
fn update(
|
||||||
|
&mut self,
|
||||||
|
aggr_fn: &AggregateFunc,
|
||||||
|
value: Value,
|
||||||
|
diff: Diff,
|
||||||
|
) -> Result<(), EvalError> {
|
||||||
|
ensure!(
|
||||||
|
aggr_fn.is_max() || aggr_fn.is_min() || matches!(aggr_fn, AggregateFunc::Count),
|
||||||
|
InternalSnafu {
|
||||||
|
reason: format!(
|
||||||
|
"OrdValue Accumulator does not support this aggregation function: {:?}",
|
||||||
|
aggr_fn
|
||||||
|
),
|
||||||
|
}
|
||||||
|
);
|
||||||
|
if diff <= 0 && (aggr_fn.is_max() || aggr_fn.is_min()) {
|
||||||
|
return Err(InternalSnafu {
|
||||||
|
reason: "OrdValue Accumulator does not support non-monotonic input for min/max aggregation".to_string(),
|
||||||
|
}.build());
|
||||||
|
}
|
||||||
|
|
||||||
|
// if aggr_fn is count, the incoming value type doesn't matter in type checking
|
||||||
|
// otherwise, type need to be the same or value can be null
|
||||||
|
let check_type_aggr_fn_and_arg_value =
|
||||||
|
ty_eq_without_precision(value.data_type(), aggr_fn.signature().input)
|
||||||
|
|| matches!(aggr_fn, AggregateFunc::Count)
|
||||||
|
|| value.is_null();
|
||||||
|
let check_type_aggr_fn_and_self_val = self
|
||||||
|
.val
|
||||||
|
.as_ref()
|
||||||
|
.map(|zelf| ty_eq_without_precision(zelf.data_type(), aggr_fn.signature().input))
|
||||||
|
.unwrap_or(true)
|
||||||
|
|| matches!(aggr_fn, AggregateFunc::Count);
|
||||||
|
|
||||||
|
if !check_type_aggr_fn_and_arg_value {
|
||||||
|
return Err(TypeMismatchSnafu {
|
||||||
|
expected: aggr_fn.signature().input,
|
||||||
|
actual: value.data_type(),
|
||||||
|
}
|
||||||
|
.build());
|
||||||
|
} else if !check_type_aggr_fn_and_self_val {
|
||||||
|
return Err(TypeMismatchSnafu {
|
||||||
|
expected: aggr_fn.signature().input,
|
||||||
|
actual: self
|
||||||
|
.val
|
||||||
|
.as_ref()
|
||||||
|
.map(|v| v.data_type())
|
||||||
|
.unwrap_or(ConcreteDataType::null_datatype()),
|
||||||
|
}
|
||||||
|
.build());
|
||||||
|
}
|
||||||
|
|
||||||
|
let is_null = value.is_null();
|
||||||
|
if is_null {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
if !is_null {
|
||||||
|
// compile count(*) to count(true) to include null/non-nulls
|
||||||
|
// And the counts of non-null values are updated here
|
||||||
|
self.non_nulls += diff;
|
||||||
|
|
||||||
|
match aggr_fn.signature().generic_fn {
|
||||||
|
GenericFn::Max => {
|
||||||
|
self.val = self
|
||||||
|
.val
|
||||||
|
.clone()
|
||||||
|
.map(|v| v.max(value.clone()))
|
||||||
|
.or_else(|| Some(value))
|
||||||
|
}
|
||||||
|
GenericFn::Min => {
|
||||||
|
self.val = self
|
||||||
|
.val
|
||||||
|
.clone()
|
||||||
|
.map(|v| v.min(value.clone()))
|
||||||
|
.or_else(|| Some(value))
|
||||||
|
}
|
||||||
|
|
||||||
|
GenericFn::Count => (),
|
||||||
|
_ => unreachable!("already checked by ensure!"),
|
||||||
|
}
|
||||||
|
};
|
||||||
|
// min/max ignore nulls
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn eval(&self, aggr_fn: &AggregateFunc) -> Result<Value, EvalError> {
|
||||||
|
if aggr_fn.is_max() || aggr_fn.is_min() {
|
||||||
|
Ok(self.val.clone().unwrap_or(Value::Null))
|
||||||
|
} else if matches!(aggr_fn, AggregateFunc::Count) {
|
||||||
|
Ok(self.non_nulls.into())
|
||||||
|
} else {
|
||||||
|
Err(InternalSnafu {
|
||||||
|
reason: format!(
|
||||||
|
"OrdValue Accumulator does not support this aggregation function: {:?}",
|
||||||
|
aggr_fn
|
||||||
|
),
|
||||||
|
}
|
||||||
|
.build())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Accumulates values for the various types of accumulable aggregations.
|
/// Accumulates values for the various types of accumulable aggregations.
|
||||||
///
|
///
|
||||||
/// We assume that there are not more than 2^32 elements for the aggregation.
|
/// We assume that there are not more than 2^32 elements for the aggregation.
|
||||||
@@ -38,34 +534,407 @@ use crate::repr::Diff;
|
|||||||
/// The float accumulator performs accumulation with tolerance for floating point error.
|
/// The float accumulator performs accumulation with tolerance for floating point error.
|
||||||
///
|
///
|
||||||
/// TODO(discord9): check for overflowing
|
/// TODO(discord9): check for overflowing
|
||||||
|
#[enum_dispatch(Accumulator)]
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
|
||||||
pub enum Accum {
|
pub enum Accum {
|
||||||
/// Accumulates boolean values.
|
/// Accumulates boolean values.
|
||||||
Bool {
|
Bool(Bool),
|
||||||
/// The number of `true` values observed.
|
|
||||||
trues: Diff,
|
|
||||||
/// The number of `false` values observed.
|
|
||||||
falses: Diff,
|
|
||||||
},
|
|
||||||
/// Accumulates simple numeric values.
|
/// Accumulates simple numeric values.
|
||||||
SimpleNumber {
|
SimpleNumber(SimpleNumber),
|
||||||
/// The accumulation of all non-NULL values observed.
|
|
||||||
accum: i128,
|
|
||||||
/// The number of non-NULL values observed.
|
|
||||||
non_nulls: Diff,
|
|
||||||
},
|
|
||||||
/// Accumulates float values.
|
/// Accumulates float values.
|
||||||
Float {
|
Float(Float),
|
||||||
/// Accumulates non-special float values, i.e. not NaN, +inf, -inf.
|
/// Accumulate Values that impl `Ord`
|
||||||
/// accum will be set to zero if `non_nulls` is zero.
|
OrdValue(OrdValue),
|
||||||
accum: OrderedF64,
|
}
|
||||||
/// Counts +inf
|
|
||||||
pos_infs: Diff,
|
impl Accum {
|
||||||
/// Counts -inf
|
pub fn new_accum(aggr_fn: &AggregateFunc) -> Result<Self, EvalError> {
|
||||||
neg_infs: Diff,
|
Ok(match aggr_fn {
|
||||||
/// Counts NaNs
|
AggregateFunc::Any
|
||||||
nans: Diff,
|
| AggregateFunc::All
|
||||||
/// Counts non-NULL values
|
| AggregateFunc::MaxBool
|
||||||
non_nulls: Diff,
|
| AggregateFunc::MinBool => Self::from(Bool {
|
||||||
},
|
trues: 0,
|
||||||
|
falses: 0,
|
||||||
|
}),
|
||||||
|
AggregateFunc::SumInt16
|
||||||
|
| AggregateFunc::SumInt32
|
||||||
|
| AggregateFunc::SumInt64
|
||||||
|
| AggregateFunc::SumUInt16
|
||||||
|
| AggregateFunc::SumUInt32
|
||||||
|
| AggregateFunc::SumUInt64 => Self::from(SimpleNumber {
|
||||||
|
accum: 0,
|
||||||
|
non_nulls: 0,
|
||||||
|
}),
|
||||||
|
AggregateFunc::SumFloat32 | AggregateFunc::SumFloat64 => Self::from(Float {
|
||||||
|
accum: OrderedF64::from(0.0),
|
||||||
|
pos_infs: 0,
|
||||||
|
neg_infs: 0,
|
||||||
|
nans: 0,
|
||||||
|
non_nulls: 0,
|
||||||
|
}),
|
||||||
|
f if f.is_max() || f.is_min() || matches!(f, AggregateFunc::Count) => {
|
||||||
|
Self::from(OrdValue {
|
||||||
|
val: None,
|
||||||
|
non_nulls: 0,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
f => {
|
||||||
|
return Err(InternalSnafu {
|
||||||
|
reason: format!(
|
||||||
|
"Accumulator does not support this aggregation function: {:?}",
|
||||||
|
f
|
||||||
|
),
|
||||||
|
}
|
||||||
|
.build());
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
pub fn try_into_accum(aggr_fn: &AggregateFunc, state: Vec<Value>) -> Result<Self, EvalError> {
|
||||||
|
match aggr_fn {
|
||||||
|
AggregateFunc::Any
|
||||||
|
| AggregateFunc::All
|
||||||
|
| AggregateFunc::MaxBool
|
||||||
|
| AggregateFunc::MinBool => Ok(Self::from(Bool::try_from(state)?)),
|
||||||
|
AggregateFunc::SumInt16
|
||||||
|
| AggregateFunc::SumInt32
|
||||||
|
| AggregateFunc::SumInt64
|
||||||
|
| AggregateFunc::SumUInt16
|
||||||
|
| AggregateFunc::SumUInt32
|
||||||
|
| AggregateFunc::SumUInt64 => Ok(Self::from(SimpleNumber::try_from(state)?)),
|
||||||
|
AggregateFunc::SumFloat32 | AggregateFunc::SumFloat64 => {
|
||||||
|
Ok(Self::from(Float::try_from(state)?))
|
||||||
|
}
|
||||||
|
f if f.is_max() || f.is_min() || matches!(f, AggregateFunc::Count) => {
|
||||||
|
Ok(Self::from(OrdValue::try_from(state)?))
|
||||||
|
}
|
||||||
|
f => Err(InternalSnafu {
|
||||||
|
reason: format!(
|
||||||
|
"Accumulator does not support this aggregation function: {:?}",
|
||||||
|
f
|
||||||
|
),
|
||||||
|
}
|
||||||
|
.build()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn err_try_from_val<T: Display>(reason: T) -> EvalError {
|
||||||
|
TryFromValueSnafu {
|
||||||
|
msg: reason.to_string(),
|
||||||
|
}
|
||||||
|
.build()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// compare type while ignore their precision, including `TimeStamp`, `Time`,
|
||||||
|
/// `Duration`, `Interval`
|
||||||
|
fn ty_eq_without_precision(left: ConcreteDataType, right: ConcreteDataType) -> bool {
|
||||||
|
left == right
|
||||||
|
|| matches!(left, ConcreteDataType::Timestamp(..))
|
||||||
|
&& matches!(right, ConcreteDataType::Timestamp(..))
|
||||||
|
|| matches!(left, ConcreteDataType::Time(..)) && matches!(right, ConcreteDataType::Time(..))
|
||||||
|
|| matches!(left, ConcreteDataType::Duration(..))
|
||||||
|
&& matches!(right, ConcreteDataType::Duration(..))
|
||||||
|
|| matches!(left, ConcreteDataType::Interval(..))
|
||||||
|
&& matches!(right, ConcreteDataType::Interval(..))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use super::*;
|
||||||
|
#[test]
|
||||||
|
fn test_accum() {
|
||||||
|
let testcases = vec![
|
||||||
|
(
|
||||||
|
AggregateFunc::SumInt32,
|
||||||
|
vec![(Value::Int32(1), 1), (Value::Null, 1)],
|
||||||
|
(
|
||||||
|
Value::Int64(1),
|
||||||
|
vec![Value::Decimal128(Decimal128::new(1, 38, 0)), 1i64.into()],
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
AggregateFunc::SumFloat32,
|
||||||
|
vec![(Value::Float32(OrderedF32::from(1.0)), 1), (Value::Null, 1)],
|
||||||
|
(
|
||||||
|
Value::Float32(OrderedF32::from(1.0)),
|
||||||
|
vec![
|
||||||
|
Value::Float64(OrderedF64::from(1.0)),
|
||||||
|
0i64.into(),
|
||||||
|
0i64.into(),
|
||||||
|
0i64.into(),
|
||||||
|
1i64.into(),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
AggregateFunc::MaxInt32,
|
||||||
|
vec![(Value::Int32(1), 1), (Value::Int32(2), 1), (Value::Null, 1)],
|
||||||
|
(Value::Int32(2), vec![Value::Int32(2), 2i64.into()]),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
AggregateFunc::MinInt32,
|
||||||
|
vec![(Value::Int32(2), 1), (Value::Int32(1), 1), (Value::Null, 1)],
|
||||||
|
(Value::Int32(1), vec![Value::Int32(1), 2i64.into()]),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
AggregateFunc::MaxFloat32,
|
||||||
|
vec![
|
||||||
|
(Value::Float32(OrderedF32::from(1.0)), 1),
|
||||||
|
(Value::Float32(OrderedF32::from(2.0)), 1),
|
||||||
|
(Value::Null, 1),
|
||||||
|
],
|
||||||
|
(
|
||||||
|
Value::Float32(OrderedF32::from(2.0)),
|
||||||
|
vec![Value::Float32(OrderedF32::from(2.0)), 2i64.into()],
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
AggregateFunc::MaxDateTime,
|
||||||
|
vec![
|
||||||
|
(Value::DateTime(DateTime::from(0)), 1),
|
||||||
|
(Value::DateTime(DateTime::from(1)), 1),
|
||||||
|
(Value::Null, 1),
|
||||||
|
],
|
||||||
|
(
|
||||||
|
Value::DateTime(DateTime::from(1)),
|
||||||
|
vec![Value::DateTime(DateTime::from(1)), 2i64.into()],
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
AggregateFunc::Count,
|
||||||
|
vec![
|
||||||
|
(Value::Int32(1), 1),
|
||||||
|
(Value::Int32(2), 1),
|
||||||
|
(Value::Null, 1),
|
||||||
|
(Value::Null, 1),
|
||||||
|
],
|
||||||
|
(2i64.into(), vec![Value::Null, 2i64.into()]),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
AggregateFunc::Any,
|
||||||
|
vec![
|
||||||
|
(Value::Boolean(false), 1),
|
||||||
|
(Value::Boolean(false), 1),
|
||||||
|
(Value::Boolean(true), 1),
|
||||||
|
(Value::Null, 1),
|
||||||
|
],
|
||||||
|
(
|
||||||
|
Value::Boolean(true),
|
||||||
|
vec![Value::from(1i64), Value::from(2i64)],
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
AggregateFunc::All,
|
||||||
|
vec![
|
||||||
|
(Value::Boolean(false), 1),
|
||||||
|
(Value::Boolean(false), 1),
|
||||||
|
(Value::Boolean(true), 1),
|
||||||
|
(Value::Null, 1),
|
||||||
|
],
|
||||||
|
(
|
||||||
|
Value::Boolean(false),
|
||||||
|
vec![Value::from(1i64), Value::from(2i64)],
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
AggregateFunc::MaxBool,
|
||||||
|
vec![
|
||||||
|
(Value::Boolean(false), 1),
|
||||||
|
(Value::Boolean(false), 1),
|
||||||
|
(Value::Boolean(true), 1),
|
||||||
|
(Value::Null, 1),
|
||||||
|
],
|
||||||
|
(
|
||||||
|
Value::Boolean(true),
|
||||||
|
vec![Value::from(1i64), Value::from(2i64)],
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
AggregateFunc::MinBool,
|
||||||
|
vec![
|
||||||
|
(Value::Boolean(false), 1),
|
||||||
|
(Value::Boolean(false), 1),
|
||||||
|
(Value::Boolean(true), 1),
|
||||||
|
(Value::Null, 1),
|
||||||
|
],
|
||||||
|
(
|
||||||
|
Value::Boolean(false),
|
||||||
|
vec![Value::from(1i64), Value::from(2i64)],
|
||||||
|
),
|
||||||
|
),
|
||||||
|
];
|
||||||
|
|
||||||
|
for (aggr_fn, input, (eval_res, state)) in testcases {
|
||||||
|
let create_and_insert = || -> Result<Accum, EvalError> {
|
||||||
|
let mut acc = Accum::new_accum(&aggr_fn)?;
|
||||||
|
acc.update_batch(&aggr_fn, input.clone())?;
|
||||||
|
let row = acc.into_state();
|
||||||
|
let acc = Accum::try_into_accum(&aggr_fn, row)?;
|
||||||
|
Ok(acc)
|
||||||
|
};
|
||||||
|
let acc = match create_and_insert() {
|
||||||
|
Ok(acc) => acc,
|
||||||
|
Err(err) => panic!(
|
||||||
|
"Failed to create accum for {:?} with input {:?} with error: {:?}",
|
||||||
|
aggr_fn, input, err
|
||||||
|
),
|
||||||
|
};
|
||||||
|
|
||||||
|
if acc.eval(&aggr_fn).unwrap() != eval_res {
|
||||||
|
panic!(
|
||||||
|
"Failed to eval accum for {:?} with input {:?}, expect {:?}, got {:?}",
|
||||||
|
aggr_fn,
|
||||||
|
input,
|
||||||
|
eval_res,
|
||||||
|
acc.eval(&aggr_fn).unwrap()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
let actual_state = acc.into_state();
|
||||||
|
if actual_state != state {
|
||||||
|
panic!(
|
||||||
|
"Failed to cast into state from accum for {:?} with input {:?}, expect state {:?}, got state {:?}",
|
||||||
|
aggr_fn,
|
||||||
|
input,
|
||||||
|
state,
|
||||||
|
actual_state
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
fn test_fail_path_accum() {
|
||||||
|
{
|
||||||
|
let bool_accum = Bool::try_from(vec![Value::Null]);
|
||||||
|
assert!(matches!(bool_accum, Err(EvalError::Internal { .. })));
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
let mut bool_accum = Bool::try_from(vec![1i64.into(), 1i64.into()]).unwrap();
|
||||||
|
// serde
|
||||||
|
let bool_accum_serde = serde_json::to_string(&bool_accum).unwrap();
|
||||||
|
let bool_accum_de = serde_json::from_str::<Bool>(&bool_accum_serde).unwrap();
|
||||||
|
assert_eq!(bool_accum, bool_accum_de);
|
||||||
|
assert!(matches!(
|
||||||
|
bool_accum.update(&AggregateFunc::MaxDate, 1.into(), 1),
|
||||||
|
Err(EvalError::Internal { .. })
|
||||||
|
));
|
||||||
|
assert!(matches!(
|
||||||
|
bool_accum.update(&AggregateFunc::Any, 1.into(), 1),
|
||||||
|
Err(EvalError::TypeMismatch { .. })
|
||||||
|
));
|
||||||
|
assert!(matches!(
|
||||||
|
bool_accum.eval(&AggregateFunc::MaxDate),
|
||||||
|
Err(EvalError::Internal { .. })
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
let ret = SimpleNumber::try_from(vec![Value::Null]);
|
||||||
|
assert!(matches!(ret, Err(EvalError::Internal { .. })));
|
||||||
|
let mut accum =
|
||||||
|
SimpleNumber::try_from(vec![Decimal128::new(0, 38, 0).into(), 0i64.into()])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert!(matches!(
|
||||||
|
accum.update(&AggregateFunc::All, 0.into(), 1),
|
||||||
|
Err(EvalError::Internal { .. })
|
||||||
|
));
|
||||||
|
assert!(matches!(
|
||||||
|
accum.update(&AggregateFunc::SumInt64, 0i32.into(), 1),
|
||||||
|
Err(EvalError::TypeMismatch { .. })
|
||||||
|
));
|
||||||
|
assert!(matches!(
|
||||||
|
accum.eval(&AggregateFunc::All),
|
||||||
|
Err(EvalError::Internal { .. })
|
||||||
|
));
|
||||||
|
accum
|
||||||
|
.update(&AggregateFunc::SumInt64, 1i64.into(), 1)
|
||||||
|
.unwrap();
|
||||||
|
accum
|
||||||
|
.update(&AggregateFunc::SumInt64, i64::MAX.into(), 1)
|
||||||
|
.unwrap();
|
||||||
|
assert!(matches!(
|
||||||
|
accum.eval(&AggregateFunc::SumInt64),
|
||||||
|
Err(EvalError::Overflow { .. })
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
let ret = Float::try_from(vec![2f64.into(), 0i64.into(), 0i64.into(), 0i64.into()]);
|
||||||
|
assert!(matches!(ret, Err(EvalError::Internal { .. })));
|
||||||
|
let mut accum = Float::try_from(vec![
|
||||||
|
2f64.into(),
|
||||||
|
0i64.into(),
|
||||||
|
0i64.into(),
|
||||||
|
0i64.into(),
|
||||||
|
1i64.into(),
|
||||||
|
])
|
||||||
|
.unwrap();
|
||||||
|
accum
|
||||||
|
.update(&AggregateFunc::SumFloat64, 2f64.into(), -1)
|
||||||
|
.unwrap();
|
||||||
|
assert!(matches!(
|
||||||
|
accum.update(&AggregateFunc::All, 0.into(), 1),
|
||||||
|
Err(EvalError::Internal { .. })
|
||||||
|
));
|
||||||
|
assert!(matches!(
|
||||||
|
accum.update(&AggregateFunc::SumFloat64, 0.0f32.into(), 1),
|
||||||
|
Err(EvalError::TypeMismatch { .. })
|
||||||
|
));
|
||||||
|
// no record, no accum
|
||||||
|
assert_eq!(
|
||||||
|
accum.eval(&AggregateFunc::SumFloat64).unwrap(),
|
||||||
|
0.0f64.into()
|
||||||
|
);
|
||||||
|
|
||||||
|
assert!(matches!(
|
||||||
|
accum.eval(&AggregateFunc::All),
|
||||||
|
Err(EvalError::Internal { .. })
|
||||||
|
));
|
||||||
|
|
||||||
|
accum
|
||||||
|
.update(&AggregateFunc::SumFloat64, f64::INFINITY.into(), 1)
|
||||||
|
.unwrap();
|
||||||
|
accum
|
||||||
|
.update(&AggregateFunc::SumFloat64, (-f64::INFINITY).into(), 1)
|
||||||
|
.unwrap();
|
||||||
|
accum
|
||||||
|
.update(&AggregateFunc::SumFloat64, f64::NAN.into(), 1)
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
let ret = OrdValue::try_from(vec![Value::Null]);
|
||||||
|
assert!(matches!(ret, Err(EvalError::Internal { .. })));
|
||||||
|
let mut accum = OrdValue::try_from(vec![Value::Null, 0i64.into()]).unwrap();
|
||||||
|
assert!(matches!(
|
||||||
|
accum.update(&AggregateFunc::All, 0.into(), 1),
|
||||||
|
Err(EvalError::Internal { .. })
|
||||||
|
));
|
||||||
|
accum
|
||||||
|
.update(&AggregateFunc::MaxInt16, 1i16.into(), 1)
|
||||||
|
.unwrap();
|
||||||
|
assert!(matches!(
|
||||||
|
accum.update(&AggregateFunc::MaxInt16, 0i32.into(), 1),
|
||||||
|
Err(EvalError::TypeMismatch { .. })
|
||||||
|
));
|
||||||
|
assert!(matches!(
|
||||||
|
accum.update(&AggregateFunc::MaxInt16, 0i16.into(), -1),
|
||||||
|
Err(EvalError::Internal { .. })
|
||||||
|
));
|
||||||
|
accum
|
||||||
|
.update(&AggregateFunc::MaxInt16, Value::Null, 1)
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
// insert uint64 into max_int64 should fail
|
||||||
|
{
|
||||||
|
let mut accum = OrdValue::try_from(vec![Value::Null, 0i64.into()]).unwrap();
|
||||||
|
assert!(matches!(
|
||||||
|
accum.update(&AggregateFunc::MaxInt64, 0u64.into(), 1),
|
||||||
|
Err(EvalError::TypeMismatch { .. })
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -12,15 +12,13 @@
|
|||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
use std::any::type_name;
|
|
||||||
|
|
||||||
use common_time::{Date, DateTime};
|
use common_time::{Date, DateTime};
|
||||||
use datatypes::prelude::ConcreteDataType;
|
use datatypes::prelude::ConcreteDataType;
|
||||||
use datatypes::value::{OrderedF32, OrderedF64, Value};
|
use datatypes::value::{OrderedF32, OrderedF64, Value};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::expr::error::{EvalError, TryFromValueSnafu, TypeMismatchSnafu};
|
use crate::expr::error::{EvalError, TryFromValueSnafu, TypeMismatchSnafu};
|
||||||
use crate::expr::relation::accum::Accum;
|
use crate::expr::relation::accum::{Accum, Accumulator};
|
||||||
use crate::repr::Diff;
|
use crate::repr::Diff;
|
||||||
|
|
||||||
/// Aggregate functions that can be applied to a group of rows.
|
/// Aggregate functions that can be applied to a group of rows.
|
||||||
@@ -83,3 +81,280 @@ pub enum AggregateFunc {
|
|||||||
Any,
|
Any,
|
||||||
All,
|
All,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl AggregateFunc {
|
||||||
|
pub fn is_max(&self) -> bool {
|
||||||
|
self.signature().generic_fn == GenericFn::Max
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_min(&self) -> bool {
|
||||||
|
self.signature().generic_fn == GenericFn::Min
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_sum(&self) -> bool {
|
||||||
|
self.signature().generic_fn == GenericFn::Sum
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Eval value, diff with accumulator
|
||||||
|
///
|
||||||
|
/// Expect self to be accumulable aggregate functio, i.e. sum/count
|
||||||
|
///
|
||||||
|
/// TODO(discord9): deal with overflow&better accumulator
|
||||||
|
pub fn eval_diff_accumulable<I>(
|
||||||
|
&self,
|
||||||
|
accum: Vec<Value>,
|
||||||
|
value_diffs: I,
|
||||||
|
) -> Result<(Value, Vec<Value>), EvalError>
|
||||||
|
where
|
||||||
|
I: IntoIterator<Item = (Value, Diff)>,
|
||||||
|
{
|
||||||
|
let mut accum = if accum.is_empty() {
|
||||||
|
Accum::new_accum(self)?
|
||||||
|
} else {
|
||||||
|
Accum::try_into_accum(self, accum)?
|
||||||
|
};
|
||||||
|
accum.update_batch(self, value_diffs)?;
|
||||||
|
let res = accum.eval(self)?;
|
||||||
|
Ok((res, accum.into_state()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct Signature {
|
||||||
|
pub input: ConcreteDataType,
|
||||||
|
pub output: ConcreteDataType,
|
||||||
|
pub generic_fn: GenericFn,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
|
pub enum GenericFn {
|
||||||
|
Max,
|
||||||
|
Min,
|
||||||
|
Sum,
|
||||||
|
Count,
|
||||||
|
Any,
|
||||||
|
All,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AggregateFunc {
|
||||||
|
/// all concrete datatypes with precision types will be returned with largest possible variant
|
||||||
|
/// as a exception, count have a signature of `null -> i64`, but it's actually `anytype -> i64`
|
||||||
|
pub fn signature(&self) -> Signature {
|
||||||
|
match self {
|
||||||
|
AggregateFunc::MaxInt16 => Signature {
|
||||||
|
input: ConcreteDataType::int16_datatype(),
|
||||||
|
output: ConcreteDataType::int16_datatype(),
|
||||||
|
generic_fn: GenericFn::Max,
|
||||||
|
},
|
||||||
|
AggregateFunc::MaxInt32 => Signature {
|
||||||
|
input: ConcreteDataType::int32_datatype(),
|
||||||
|
output: ConcreteDataType::int32_datatype(),
|
||||||
|
generic_fn: GenericFn::Max,
|
||||||
|
},
|
||||||
|
AggregateFunc::MaxInt64 => Signature {
|
||||||
|
input: ConcreteDataType::int64_datatype(),
|
||||||
|
output: ConcreteDataType::int64_datatype(),
|
||||||
|
generic_fn: GenericFn::Max,
|
||||||
|
},
|
||||||
|
AggregateFunc::MaxUInt16 => Signature {
|
||||||
|
input: ConcreteDataType::uint16_datatype(),
|
||||||
|
output: ConcreteDataType::uint16_datatype(),
|
||||||
|
generic_fn: GenericFn::Max,
|
||||||
|
},
|
||||||
|
AggregateFunc::MaxUInt32 => Signature {
|
||||||
|
input: ConcreteDataType::uint32_datatype(),
|
||||||
|
output: ConcreteDataType::uint32_datatype(),
|
||||||
|
generic_fn: GenericFn::Max,
|
||||||
|
},
|
||||||
|
AggregateFunc::MaxUInt64 => Signature {
|
||||||
|
input: ConcreteDataType::uint64_datatype(),
|
||||||
|
output: ConcreteDataType::uint64_datatype(),
|
||||||
|
generic_fn: GenericFn::Max,
|
||||||
|
},
|
||||||
|
AggregateFunc::MaxFloat32 => Signature {
|
||||||
|
input: ConcreteDataType::float32_datatype(),
|
||||||
|
output: ConcreteDataType::float32_datatype(),
|
||||||
|
generic_fn: GenericFn::Max,
|
||||||
|
},
|
||||||
|
AggregateFunc::MaxFloat64 => Signature {
|
||||||
|
input: ConcreteDataType::float64_datatype(),
|
||||||
|
output: ConcreteDataType::float64_datatype(),
|
||||||
|
generic_fn: GenericFn::Max,
|
||||||
|
},
|
||||||
|
AggregateFunc::MaxBool => Signature {
|
||||||
|
input: ConcreteDataType::boolean_datatype(),
|
||||||
|
output: ConcreteDataType::boolean_datatype(),
|
||||||
|
generic_fn: GenericFn::Max,
|
||||||
|
},
|
||||||
|
AggregateFunc::MaxString => Signature {
|
||||||
|
input: ConcreteDataType::string_datatype(),
|
||||||
|
output: ConcreteDataType::string_datatype(),
|
||||||
|
generic_fn: GenericFn::Max,
|
||||||
|
},
|
||||||
|
AggregateFunc::MaxDate => Signature {
|
||||||
|
input: ConcreteDataType::date_datatype(),
|
||||||
|
output: ConcreteDataType::date_datatype(),
|
||||||
|
generic_fn: GenericFn::Max,
|
||||||
|
},
|
||||||
|
AggregateFunc::MaxDateTime => Signature {
|
||||||
|
input: ConcreteDataType::datetime_datatype(),
|
||||||
|
output: ConcreteDataType::datetime_datatype(),
|
||||||
|
generic_fn: GenericFn::Max,
|
||||||
|
},
|
||||||
|
AggregateFunc::MaxTimestamp => Signature {
|
||||||
|
input: ConcreteDataType::timestamp_second_datatype(),
|
||||||
|
output: ConcreteDataType::timestamp_second_datatype(),
|
||||||
|
generic_fn: GenericFn::Max,
|
||||||
|
},
|
||||||
|
AggregateFunc::MaxTime => Signature {
|
||||||
|
input: ConcreteDataType::time_second_datatype(),
|
||||||
|
output: ConcreteDataType::time_second_datatype(),
|
||||||
|
generic_fn: GenericFn::Max,
|
||||||
|
},
|
||||||
|
AggregateFunc::MaxDuration => Signature {
|
||||||
|
input: ConcreteDataType::duration_second_datatype(),
|
||||||
|
output: ConcreteDataType::duration_second_datatype(),
|
||||||
|
generic_fn: GenericFn::Max,
|
||||||
|
},
|
||||||
|
AggregateFunc::MaxInterval => Signature {
|
||||||
|
input: ConcreteDataType::interval_year_month_datatype(),
|
||||||
|
output: ConcreteDataType::interval_year_month_datatype(),
|
||||||
|
generic_fn: GenericFn::Max,
|
||||||
|
},
|
||||||
|
AggregateFunc::MinInt16 => Signature {
|
||||||
|
input: ConcreteDataType::int16_datatype(),
|
||||||
|
output: ConcreteDataType::int16_datatype(),
|
||||||
|
generic_fn: GenericFn::Min,
|
||||||
|
},
|
||||||
|
AggregateFunc::MinInt32 => Signature {
|
||||||
|
input: ConcreteDataType::int32_datatype(),
|
||||||
|
output: ConcreteDataType::int32_datatype(),
|
||||||
|
generic_fn: GenericFn::Min,
|
||||||
|
},
|
||||||
|
AggregateFunc::MinInt64 => Signature {
|
||||||
|
input: ConcreteDataType::int64_datatype(),
|
||||||
|
output: ConcreteDataType::int64_datatype(),
|
||||||
|
generic_fn: GenericFn::Min,
|
||||||
|
},
|
||||||
|
AggregateFunc::MinUInt16 => Signature {
|
||||||
|
input: ConcreteDataType::uint16_datatype(),
|
||||||
|
output: ConcreteDataType::uint16_datatype(),
|
||||||
|
generic_fn: GenericFn::Min,
|
||||||
|
},
|
||||||
|
AggregateFunc::MinUInt32 => Signature {
|
||||||
|
input: ConcreteDataType::uint32_datatype(),
|
||||||
|
output: ConcreteDataType::uint32_datatype(),
|
||||||
|
generic_fn: GenericFn::Min,
|
||||||
|
},
|
||||||
|
AggregateFunc::MinUInt64 => Signature {
|
||||||
|
input: ConcreteDataType::uint64_datatype(),
|
||||||
|
output: ConcreteDataType::uint64_datatype(),
|
||||||
|
generic_fn: GenericFn::Min,
|
||||||
|
},
|
||||||
|
AggregateFunc::MinFloat32 => Signature {
|
||||||
|
input: ConcreteDataType::float32_datatype(),
|
||||||
|
output: ConcreteDataType::float32_datatype(),
|
||||||
|
generic_fn: GenericFn::Min,
|
||||||
|
},
|
||||||
|
AggregateFunc::MinFloat64 => Signature {
|
||||||
|
input: ConcreteDataType::float64_datatype(),
|
||||||
|
output: ConcreteDataType::float64_datatype(),
|
||||||
|
generic_fn: GenericFn::Min,
|
||||||
|
},
|
||||||
|
AggregateFunc::MinBool => Signature {
|
||||||
|
input: ConcreteDataType::boolean_datatype(),
|
||||||
|
output: ConcreteDataType::boolean_datatype(),
|
||||||
|
generic_fn: GenericFn::Min,
|
||||||
|
},
|
||||||
|
AggregateFunc::MinString => Signature {
|
||||||
|
input: ConcreteDataType::string_datatype(),
|
||||||
|
output: ConcreteDataType::string_datatype(),
|
||||||
|
generic_fn: GenericFn::Min,
|
||||||
|
},
|
||||||
|
AggregateFunc::MinDate => Signature {
|
||||||
|
input: ConcreteDataType::date_datatype(),
|
||||||
|
output: ConcreteDataType::date_datatype(),
|
||||||
|
generic_fn: GenericFn::Min,
|
||||||
|
},
|
||||||
|
AggregateFunc::MinDateTime => Signature {
|
||||||
|
input: ConcreteDataType::datetime_datatype(),
|
||||||
|
output: ConcreteDataType::datetime_datatype(),
|
||||||
|
generic_fn: GenericFn::Min,
|
||||||
|
},
|
||||||
|
AggregateFunc::MinTimestamp => Signature {
|
||||||
|
input: ConcreteDataType::timestamp_second_datatype(),
|
||||||
|
output: ConcreteDataType::timestamp_second_datatype(),
|
||||||
|
generic_fn: GenericFn::Min,
|
||||||
|
},
|
||||||
|
AggregateFunc::MinTime => Signature {
|
||||||
|
input: ConcreteDataType::time_second_datatype(),
|
||||||
|
output: ConcreteDataType::time_second_datatype(),
|
||||||
|
generic_fn: GenericFn::Min,
|
||||||
|
},
|
||||||
|
AggregateFunc::MinDuration => Signature {
|
||||||
|
input: ConcreteDataType::duration_second_datatype(),
|
||||||
|
output: ConcreteDataType::duration_second_datatype(),
|
||||||
|
generic_fn: GenericFn::Min,
|
||||||
|
},
|
||||||
|
AggregateFunc::MinInterval => Signature {
|
||||||
|
input: ConcreteDataType::interval_year_month_datatype(),
|
||||||
|
output: ConcreteDataType::interval_year_month_datatype(),
|
||||||
|
generic_fn: GenericFn::Min,
|
||||||
|
},
|
||||||
|
AggregateFunc::SumInt16 => Signature {
|
||||||
|
input: ConcreteDataType::int16_datatype(),
|
||||||
|
output: ConcreteDataType::int16_datatype(),
|
||||||
|
generic_fn: GenericFn::Sum,
|
||||||
|
},
|
||||||
|
AggregateFunc::SumInt32 => Signature {
|
||||||
|
input: ConcreteDataType::int32_datatype(),
|
||||||
|
output: ConcreteDataType::int32_datatype(),
|
||||||
|
generic_fn: GenericFn::Sum,
|
||||||
|
},
|
||||||
|
AggregateFunc::SumInt64 => Signature {
|
||||||
|
input: ConcreteDataType::int64_datatype(),
|
||||||
|
output: ConcreteDataType::int64_datatype(),
|
||||||
|
generic_fn: GenericFn::Sum,
|
||||||
|
},
|
||||||
|
AggregateFunc::SumUInt16 => Signature {
|
||||||
|
input: ConcreteDataType::uint16_datatype(),
|
||||||
|
output: ConcreteDataType::uint16_datatype(),
|
||||||
|
generic_fn: GenericFn::Sum,
|
||||||
|
},
|
||||||
|
AggregateFunc::SumUInt32 => Signature {
|
||||||
|
input: ConcreteDataType::uint32_datatype(),
|
||||||
|
output: ConcreteDataType::uint32_datatype(),
|
||||||
|
generic_fn: GenericFn::Sum,
|
||||||
|
},
|
||||||
|
AggregateFunc::SumUInt64 => Signature {
|
||||||
|
input: ConcreteDataType::uint64_datatype(),
|
||||||
|
output: ConcreteDataType::uint64_datatype(),
|
||||||
|
generic_fn: GenericFn::Sum,
|
||||||
|
},
|
||||||
|
AggregateFunc::SumFloat32 => Signature {
|
||||||
|
input: ConcreteDataType::float32_datatype(),
|
||||||
|
output: ConcreteDataType::float32_datatype(),
|
||||||
|
generic_fn: GenericFn::Sum,
|
||||||
|
},
|
||||||
|
AggregateFunc::SumFloat64 => Signature {
|
||||||
|
input: ConcreteDataType::float64_datatype(),
|
||||||
|
output: ConcreteDataType::float64_datatype(),
|
||||||
|
generic_fn: GenericFn::Sum,
|
||||||
|
},
|
||||||
|
AggregateFunc::Count => Signature {
|
||||||
|
input: ConcreteDataType::null_datatype(),
|
||||||
|
output: ConcreteDataType::int64_datatype(),
|
||||||
|
generic_fn: GenericFn::Count,
|
||||||
|
},
|
||||||
|
AggregateFunc::Any => Signature {
|
||||||
|
input: ConcreteDataType::boolean_datatype(),
|
||||||
|
output: ConcreteDataType::boolean_datatype(),
|
||||||
|
generic_fn: GenericFn::Any,
|
||||||
|
},
|
||||||
|
AggregateFunc::All => Signature {
|
||||||
|
input: ConcreteDataType::boolean_datatype(),
|
||||||
|
output: ConcreteDataType::boolean_datatype(),
|
||||||
|
generic_fn: GenericFn::All,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -17,4 +17,5 @@
|
|||||||
// allow unused for now because it should be use later
|
// allow unused for now because it should be use later
|
||||||
mod adapter;
|
mod adapter;
|
||||||
mod expr;
|
mod expr;
|
||||||
|
mod plan;
|
||||||
mod repr;
|
mod repr;
|
||||||
|
|||||||
98
src/flow/src/plan.rs
Normal file
98
src/flow/src/plan.rs
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
// Copyright 2023 Greptime Team
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
//! This module contain basic definition for dataflow's plan
|
||||||
|
//! that can be translate to hydro dataflow
|
||||||
|
|
||||||
|
mod join;
|
||||||
|
mod reduce;
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
pub(crate) use self::reduce::{AccumulablePlan, KeyValPlan, ReducePlan};
|
||||||
|
use crate::expr::{
|
||||||
|
AggregateExpr, EvalError, Id, LocalId, MapFilterProject, SafeMfpPlan, ScalarExpr,
|
||||||
|
};
|
||||||
|
use crate::plan::join::JoinPlan;
|
||||||
|
use crate::repr::{DiffRow, RelationType};
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Deserialize, Serialize)]
|
||||||
|
pub struct TypedPlan {
|
||||||
|
/// output type of the relation
|
||||||
|
pub typ: RelationType,
|
||||||
|
pub plan: Plan,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// TODO(discord9): support `TableFunc`(by define FlatMap that map 1 to n)
|
||||||
|
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Deserialize, Serialize)]
|
||||||
|
pub enum Plan {
|
||||||
|
/// A constant collection of rows.
|
||||||
|
Constant { rows: Vec<DiffRow> },
|
||||||
|
/// Get CDC data from an source, be it external reference to an existing source or an internal
|
||||||
|
/// reference to a `Let` identifier
|
||||||
|
Get { id: Id },
|
||||||
|
/// Create a temporary collection from given `value``, and make this bind only available
|
||||||
|
/// in scope of `body`
|
||||||
|
Let {
|
||||||
|
id: LocalId,
|
||||||
|
value: Box<Plan>,
|
||||||
|
body: Box<Plan>,
|
||||||
|
},
|
||||||
|
/// Map, Filter, and Project operators.
|
||||||
|
Mfp {
|
||||||
|
/// The input collection.
|
||||||
|
input: Box<Plan>,
|
||||||
|
/// Linear operator to apply to each record.
|
||||||
|
mfp: MapFilterProject,
|
||||||
|
},
|
||||||
|
/// Reduce operator, aggregation by key assembled from KeyValPlan
|
||||||
|
Reduce {
|
||||||
|
/// The input collection.
|
||||||
|
input: Box<Plan>,
|
||||||
|
/// A plan for changing input records into key, value pairs.
|
||||||
|
key_val_plan: KeyValPlan,
|
||||||
|
/// A plan for performing the reduce.
|
||||||
|
///
|
||||||
|
/// The implementation of reduction has several different strategies based
|
||||||
|
/// on the properties of the reduction, and the input itself.
|
||||||
|
reduce_plan: ReducePlan,
|
||||||
|
},
|
||||||
|
/// A multiway relational equijoin, with fused map, filter, and projection.
|
||||||
|
///
|
||||||
|
/// This stage performs a multiway join among `inputs`, using the equality
|
||||||
|
/// constraints expressed in `plan`. The plan also describes the implementation
|
||||||
|
/// strategy we will use, and any pushed down per-record work.
|
||||||
|
Join {
|
||||||
|
/// An ordered list of inputs that will be joined.
|
||||||
|
inputs: Vec<Plan>,
|
||||||
|
/// Detailed information about the implementation of the join.
|
||||||
|
///
|
||||||
|
/// This includes information about the implementation strategy, but also
|
||||||
|
/// any map, filter, project work that we might follow the join with, but
|
||||||
|
/// potentially pushed down into the implementation of the join.
|
||||||
|
plan: JoinPlan,
|
||||||
|
},
|
||||||
|
/// Adds the contents of the input collections.
|
||||||
|
///
|
||||||
|
/// Importantly, this is *multiset* union, so the multiplicities of records will
|
||||||
|
/// add. This is in contrast to *set* union, where the multiplicities would be
|
||||||
|
/// capped at one. A set union can be formed with `Union` followed by `Reduce`
|
||||||
|
/// implementing the "distinct" operator.
|
||||||
|
Union {
|
||||||
|
/// The input collections
|
||||||
|
inputs: Vec<Plan>,
|
||||||
|
/// Whether to consolidate the output, e.g., cancel negated records.
|
||||||
|
consolidate_output: bool,
|
||||||
|
},
|
||||||
|
}
|
||||||
78
src/flow/src/plan/join.rs
Normal file
78
src/flow/src/plan/join.rs
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
// Copyright 2023 Greptime Team
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::expr::ScalarExpr;
|
||||||
|
use crate::plan::SafeMfpPlan;
|
||||||
|
|
||||||
|
/// TODO(discord9): consider impl more join strategies
|
||||||
|
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq, Ord, PartialOrd)]
|
||||||
|
pub enum JoinPlan {
|
||||||
|
Linear(LinearJoinPlan),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Determine if a given row should stay in the output. And apply a map filter project before output the row
|
||||||
|
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq, Ord, PartialOrd)]
|
||||||
|
pub struct JoinFilter {
|
||||||
|
/// each element in the outer vector will check if each expr in itself can be eval to same value
|
||||||
|
/// if not, the row will be filtered out. Useful for equi-join(join based on equality of some columns)
|
||||||
|
pub ready_equivalences: Vec<Vec<ScalarExpr>>,
|
||||||
|
/// Apply a map filter project before output the row
|
||||||
|
pub before: SafeMfpPlan,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A plan for the execution of a linear join.
|
||||||
|
///
|
||||||
|
/// A linear join is a sequence of stages, each of which introduces
|
||||||
|
/// a new collection. Each stage is represented by a [LinearStagePlan].
|
||||||
|
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq, Ord, PartialOrd)]
|
||||||
|
pub struct LinearJoinPlan {
|
||||||
|
/// The source relation from which we start the join.
|
||||||
|
pub source_relation: usize,
|
||||||
|
/// The arrangement to use for the source relation, if any
|
||||||
|
pub source_key: Option<Vec<ScalarExpr>>,
|
||||||
|
/// An initial closure to apply before any stages.
|
||||||
|
///
|
||||||
|
/// Values of `None` indicate the identity closure.
|
||||||
|
pub initial_closure: Option<JoinFilter>,
|
||||||
|
/// A *sequence* of stages to apply one after the other.
|
||||||
|
pub stage_plans: Vec<LinearStagePlan>,
|
||||||
|
/// A concluding filter to apply after the last stage.
|
||||||
|
///
|
||||||
|
/// Values of `None` indicate the identity closure.
|
||||||
|
pub final_closure: Option<JoinFilter>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A plan for the execution of one stage of a linear join.
|
||||||
|
///
|
||||||
|
/// Each stage is a binary join between the current accumulated
|
||||||
|
/// join results, and a new collection. The former is referred to
|
||||||
|
/// as the "stream" and the latter the "lookup".
|
||||||
|
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq, Ord, PartialOrd)]
|
||||||
|
pub struct LinearStagePlan {
|
||||||
|
/// The index of the relation into which we will look up.
|
||||||
|
pub lookup_relation: usize,
|
||||||
|
/// The key expressions to use for the stream relation.
|
||||||
|
pub stream_key: Vec<ScalarExpr>,
|
||||||
|
/// Columns to retain from the stream relation.
|
||||||
|
/// These columns are those that are not redundant with `stream_key`,
|
||||||
|
/// and cannot be read out of the key component of an arrangement.
|
||||||
|
pub stream_thinning: Vec<usize>,
|
||||||
|
/// The key expressions to use for the lookup relation.
|
||||||
|
pub lookup_key: Vec<ScalarExpr>,
|
||||||
|
/// The closure to apply to the concatenation of the key columns,
|
||||||
|
/// the stream value columns, and the lookup value colunms.
|
||||||
|
pub closure: JoinFilter,
|
||||||
|
}
|
||||||
50
src/flow/src/plan/reduce.rs
Normal file
50
src/flow/src/plan/reduce.rs
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
// Copyright 2023 Greptime Team
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::expr::{AggregateExpr, Id, LocalId, MapFilterProject, SafeMfpPlan, ScalarExpr};
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Deserialize, Serialize)]
|
||||||
|
pub struct KeyValPlan {
|
||||||
|
pub key_plan: SafeMfpPlan,
|
||||||
|
pub val_plan: SafeMfpPlan,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// TODO(discord9): def&impl of Hierarchical aggregates(for min/max with support to deletion) and
|
||||||
|
/// basic aggregates(for other aggregate functions) and mixed aggregate
|
||||||
|
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Deserialize, Serialize)]
|
||||||
|
pub enum ReducePlan {
|
||||||
|
/// Plan for not computing any aggregations, just determining the set of
|
||||||
|
/// distinct keys.
|
||||||
|
Distinct,
|
||||||
|
/// Plan for computing only accumulable aggregations.
|
||||||
|
/// Including simple functions like `sum`, `count`, `min/max`(without deletion)
|
||||||
|
Accumulable(AccumulablePlan),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Accumulable plan for the execution of a reduction.
|
||||||
|
#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Deserialize, Serialize)]
|
||||||
|
pub struct AccumulablePlan {
|
||||||
|
/// All of the aggregations we were asked to compute, stored
|
||||||
|
/// in order.
|
||||||
|
pub full_aggrs: Vec<AggregateExpr>,
|
||||||
|
/// All of the non-distinct accumulable aggregates.
|
||||||
|
/// Each element represents:
|
||||||
|
/// (index of aggr output, index of value among inputs, aggr expr)
|
||||||
|
/// These will all be rendered together in one dataflow fragment.
|
||||||
|
pub simple_aggrs: Vec<(usize, usize, AggregateExpr)>,
|
||||||
|
/// Same as above but for all of the `DISTINCT` accumulable aggregations.
|
||||||
|
pub distinct_aggrs: Vec<(usize, usize, AggregateExpr)>,
|
||||||
|
}
|
||||||
@@ -33,7 +33,10 @@ use snafu::ResultExt;
|
|||||||
|
|
||||||
use crate::expr::error::{CastValueSnafu, EvalError};
|
use crate::expr::error::{CastValueSnafu, EvalError};
|
||||||
|
|
||||||
/// System-wide Record count difference type.
|
/// System-wide Record count difference type. Useful for capture data change
|
||||||
|
///
|
||||||
|
/// i.e. +1 means insert one record, -1 means remove,
|
||||||
|
/// and +/-n means insert/remove multiple duplicate records.
|
||||||
pub type Diff = i64;
|
pub type Diff = i64;
|
||||||
|
|
||||||
/// System-wide default timestamp type
|
/// System-wide default timestamp type
|
||||||
|
|||||||
@@ -28,6 +28,7 @@ use api::v1::meta::Role;
|
|||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq};
|
use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq};
|
||||||
use catalog::CatalogManagerRef;
|
use catalog::CatalogManagerRef;
|
||||||
|
use client::OutputData;
|
||||||
use common_base::Plugins;
|
use common_base::Plugins;
|
||||||
use common_config::KvBackendConfig;
|
use common_config::KvBackendConfig;
|
||||||
use common_error::ext::BoxedError;
|
use common_error::ext::BoxedError;
|
||||||
@@ -401,13 +402,13 @@ impl SqlQueryHandler for Instance {
|
|||||||
|
|
||||||
/// Attaches a timer to the output and observes it once the output is exhausted.
|
/// Attaches a timer to the output and observes it once the output is exhausted.
|
||||||
pub fn attach_timer(output: Output, timer: HistogramTimer) -> Output {
|
pub fn attach_timer(output: Output, timer: HistogramTimer) -> Output {
|
||||||
match output {
|
match output.data {
|
||||||
Output::AffectedRows(_) | Output::RecordBatches(_) => output,
|
OutputData::AffectedRows(_) | OutputData::RecordBatches(_) => output,
|
||||||
Output::Stream(stream, plan) => {
|
OutputData::Stream(stream) => {
|
||||||
let stream = OnDone::new(stream, move || {
|
let stream = OnDone::new(stream, move || {
|
||||||
timer.observe_duration();
|
timer.observe_duration();
|
||||||
});
|
});
|
||||||
Output::Stream(Box::pin(stream), plan)
|
Output::new(OutputData::Stream(Box::pin(stream)), output.meta)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -113,7 +113,7 @@ impl GrpcQueryHandler for Instance {
|
|||||||
.statement_executor
|
.statement_executor
|
||||||
.create_table_inner(&mut expr, None, &ctx)
|
.create_table_inner(&mut expr, None, &ctx)
|
||||||
.await?;
|
.await?;
|
||||||
Output::AffectedRows(0)
|
Output::new_with_affected_rows(0)
|
||||||
}
|
}
|
||||||
DdlExpr::Alter(expr) => self.statement_executor.alter_table_inner(expr).await?,
|
DdlExpr::Alter(expr) => self.statement_executor.alter_table_inner(expr).await?,
|
||||||
DdlExpr::CreateDatabase(expr) => {
|
DdlExpr::CreateDatabase(expr) => {
|
||||||
|
|||||||
@@ -47,8 +47,8 @@ impl OpentsdbProtocolHandler for Instance {
|
|||||||
.map_err(BoxedError::new)
|
.map_err(BoxedError::new)
|
||||||
.context(servers::error::ExecuteGrpcQuerySnafu)?;
|
.context(servers::error::ExecuteGrpcQuerySnafu)?;
|
||||||
|
|
||||||
Ok(match output {
|
Ok(match output.data {
|
||||||
common_query::Output::AffectedRows(rows) => rows,
|
common_query::OutputData::AffectedRows(rows) => rows,
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ use api::prom_store::remote::{Query, QueryResult, ReadRequest, ReadResponse, Wri
|
|||||||
use api::v1::RowInsertRequests;
|
use api::v1::RowInsertRequests;
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq};
|
use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq};
|
||||||
|
use client::OutputData;
|
||||||
use common_catalog::format_full_table_name;
|
use common_catalog::format_full_table_name;
|
||||||
use common_error::ext::BoxedError;
|
use common_error::ext::BoxedError;
|
||||||
use common_query::prelude::GREPTIME_PHYSICAL_TABLE;
|
use common_query::prelude::GREPTIME_PHYSICAL_TABLE;
|
||||||
@@ -77,7 +78,7 @@ fn negotiate_response_type(accepted_response_types: &[i32]) -> ServerResult<Resp
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn to_query_result(table_name: &str, output: Output) -> ServerResult<QueryResult> {
|
async fn to_query_result(table_name: &str, output: Output) -> ServerResult<QueryResult> {
|
||||||
let Output::Stream(stream, _) = output else {
|
let OutputData::Stream(stream) = output.data else {
|
||||||
unreachable!()
|
unreachable!()
|
||||||
};
|
};
|
||||||
let recordbatches = RecordBatches::try_collect(stream)
|
let recordbatches = RecordBatches::try_collect(stream)
|
||||||
|
|||||||
@@ -152,6 +152,10 @@ impl TxnService for RaftEngineBackend {
|
|||||||
responses,
|
responses,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn max_txn_ops(&self) -> usize {
|
||||||
|
usize::MAX
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
|
|||||||
@@ -24,7 +24,9 @@ fn main() {
|
|||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn run() {
|
async fn run() {
|
||||||
let kv_backend = EtcdStore::with_endpoints(["127.0.0.1:2380"]).await.unwrap();
|
let kv_backend = EtcdStore::with_endpoints(["127.0.0.1:2380"], 128)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
// put
|
// put
|
||||||
let put_req = PutRequest {
|
let put_req = PutRequest {
|
||||||
|
|||||||
@@ -193,7 +193,8 @@ pub async fn metasrv_builder(
|
|||||||
(None, false) => {
|
(None, false) => {
|
||||||
let etcd_client = create_etcd_client(opts).await?;
|
let etcd_client = create_etcd_client(opts).await?;
|
||||||
let kv_backend = {
|
let kv_backend = {
|
||||||
let etcd_backend = EtcdStore::with_etcd_client(etcd_client.clone());
|
let etcd_backend =
|
||||||
|
EtcdStore::with_etcd_client(etcd_client.clone(), opts.max_txn_ops);
|
||||||
if !opts.store_key_prefix.is_empty() {
|
if !opts.store_key_prefix.is_empty() {
|
||||||
Arc::new(ChrootKvBackend::new(
|
Arc::new(ChrootKvBackend::new(
|
||||||
opts.store_key_prefix.clone().into_bytes(),
|
opts.store_key_prefix.clone().into_bytes(),
|
||||||
|
|||||||
@@ -79,6 +79,17 @@ pub struct MetaSrvOptions {
|
|||||||
pub wal: MetaSrvWalConfig,
|
pub wal: MetaSrvWalConfig,
|
||||||
pub export_metrics: ExportMetricsOption,
|
pub export_metrics: ExportMetricsOption,
|
||||||
pub store_key_prefix: String,
|
pub store_key_prefix: String,
|
||||||
|
/// The max operations per txn
|
||||||
|
///
|
||||||
|
/// This value is usually limited by which store is used for the `KvBackend`.
|
||||||
|
/// For example, if using etcd, this value should ensure that it is less than
|
||||||
|
/// or equal to the `--max-txn-ops` option value of etcd.
|
||||||
|
///
|
||||||
|
/// TODO(jeremy): Currently, this option only affects the etcd store, but it may
|
||||||
|
/// also affect other stores in the future. In other words, each store needs to
|
||||||
|
/// limit the number of operations in a txn because an infinitely large txn could
|
||||||
|
/// potentially block other operations.
|
||||||
|
pub max_txn_ops: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MetaSrvOptions {
|
impl MetaSrvOptions {
|
||||||
@@ -112,6 +123,7 @@ impl Default for MetaSrvOptions {
|
|||||||
wal: MetaSrvWalConfig::default(),
|
wal: MetaSrvWalConfig::default(),
|
||||||
export_metrics: ExportMetricsOption::default(),
|
export_metrics: ExportMetricsOption::default(),
|
||||||
store_key_prefix: String::new(),
|
store_key_prefix: String::new(),
|
||||||
|
max_txn_ops: 128,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -42,7 +42,7 @@ pub async fn mock_with_memstore() -> MockInfo {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub async fn mock_with_etcdstore(addr: &str) -> MockInfo {
|
pub async fn mock_with_etcdstore(addr: &str) -> MockInfo {
|
||||||
let kv_backend = EtcdStore::with_endpoints([addr]).await.unwrap();
|
let kv_backend = EtcdStore::with_endpoints([addr], 128).await.unwrap();
|
||||||
mock(Default::default(), kv_backend, None, None).await
|
mock(Default::default(), kv_backend, None, None).await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -380,6 +380,10 @@ impl TxnService for LeaderCachedKvBackend {
|
|||||||
|
|
||||||
Ok(res)
|
Ok(res)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn max_txn_ops(&self) -> usize {
|
||||||
|
self.store.max_txn_ops()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ResettableKvBackend for LeaderCachedKvBackend {
|
impl ResettableKvBackend for LeaderCachedKvBackend {
|
||||||
|
|||||||
@@ -79,5 +79,6 @@ rand.workspace = true
|
|||||||
toml.workspace = true
|
toml.workspace = true
|
||||||
|
|
||||||
[[bench]]
|
[[bench]]
|
||||||
name = "bench_merge_tree"
|
name = "memtable_bench"
|
||||||
harness = false
|
harness = false
|
||||||
|
required-features = ["test"]
|
||||||
|
|||||||
@@ -7,3 +7,9 @@ The Alfa Romeo [MiTo](https://en.wikipedia.org/wiki/Alfa_Romeo_MiTo) is a front-
|
|||||||
|
|
||||||
> "You can't be a true petrolhead until you've owned an Alfa Romeo."
|
> "You can't be a true petrolhead until you've owned an Alfa Romeo."
|
||||||
> <div align="right">-- by Jeremy Clarkson</div>
|
> <div align="right">-- by Jeremy Clarkson</div>
|
||||||
|
|
||||||
|
## Benchmarks
|
||||||
|
Run benchmarks in this crate:
|
||||||
|
```bash
|
||||||
|
cargo bench -p mito2 -F test
|
||||||
|
```
|
||||||
|
|||||||
352
src/mito2/benches/memtable_bench.rs
Normal file
352
src/mito2/benches/memtable_bench.rs
Normal file
@@ -0,0 +1,352 @@
|
|||||||
|
// Copyright 2023 Greptime Team
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use api::v1::value::ValueData;
|
||||||
|
use api::v1::{Row, Rows, SemanticType};
|
||||||
|
use criterion::{criterion_group, criterion_main, Criterion};
|
||||||
|
use datafusion_common::Column;
|
||||||
|
use datafusion_expr::{lit, Expr};
|
||||||
|
use datatypes::data_type::ConcreteDataType;
|
||||||
|
use datatypes::schema::ColumnSchema;
|
||||||
|
use mito2::memtable::merge_tree::{MergeTreeConfig, MergeTreeMemtable};
|
||||||
|
use mito2::memtable::time_series::TimeSeriesMemtable;
|
||||||
|
use mito2::memtable::{KeyValues, Memtable};
|
||||||
|
use mito2::test_util::memtable_util::{self, region_metadata_to_row_schema};
|
||||||
|
use rand::rngs::ThreadRng;
|
||||||
|
use rand::seq::SliceRandom;
|
||||||
|
use rand::Rng;
|
||||||
|
use store_api::metadata::{
|
||||||
|
ColumnMetadata, RegionMetadata, RegionMetadataBuilder, RegionMetadataRef,
|
||||||
|
};
|
||||||
|
use store_api::storage::RegionId;
|
||||||
|
use table::predicate::Predicate;
|
||||||
|
|
||||||
|
/// Writes rows.
|
||||||
|
fn write_rows(c: &mut Criterion) {
|
||||||
|
let metadata = memtable_util::metadata_with_primary_key(vec![1, 0], true);
|
||||||
|
let timestamps = (0..100).collect::<Vec<_>>();
|
||||||
|
|
||||||
|
// Note that this test only generate one time series.
|
||||||
|
let mut group = c.benchmark_group("write");
|
||||||
|
group.bench_function("merge_tree", |b| {
|
||||||
|
let memtable =
|
||||||
|
MergeTreeMemtable::new(1, metadata.clone(), None, &MergeTreeConfig::default());
|
||||||
|
let kvs =
|
||||||
|
memtable_util::build_key_values(&metadata, "hello".to_string(), 42, ×tamps, 1);
|
||||||
|
b.iter(|| {
|
||||||
|
memtable.write(&kvs).unwrap();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
group.bench_function("time_series", |b| {
|
||||||
|
let memtable = TimeSeriesMemtable::new(metadata.clone(), 1, None);
|
||||||
|
let kvs =
|
||||||
|
memtable_util::build_key_values(&metadata, "hello".to_string(), 42, ×tamps, 1);
|
||||||
|
b.iter(|| {
|
||||||
|
memtable.write(&kvs).unwrap();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Scans all rows.
|
||||||
|
fn full_scan(c: &mut Criterion) {
|
||||||
|
let metadata = Arc::new(cpu_metadata());
|
||||||
|
let config = MergeTreeConfig::default();
|
||||||
|
let start_sec = 1710043200;
|
||||||
|
let generator = CpuDataGenerator::new(metadata.clone(), 4000, start_sec, start_sec + 3600 * 2);
|
||||||
|
|
||||||
|
let mut group = c.benchmark_group("full_scan");
|
||||||
|
group.sample_size(10);
|
||||||
|
group.bench_function("merge_tree", |b| {
|
||||||
|
let memtable = MergeTreeMemtable::new(1, metadata.clone(), None, &config);
|
||||||
|
for kvs in generator.iter() {
|
||||||
|
memtable.write(&kvs).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
b.iter(|| {
|
||||||
|
let iter = memtable.iter(None, None).unwrap();
|
||||||
|
for batch in iter {
|
||||||
|
let _batch = batch.unwrap();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
group.bench_function("time_series", |b| {
|
||||||
|
let memtable = TimeSeriesMemtable::new(metadata.clone(), 1, None);
|
||||||
|
for kvs in generator.iter() {
|
||||||
|
memtable.write(&kvs).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
b.iter(|| {
|
||||||
|
let iter = memtable.iter(None, None).unwrap();
|
||||||
|
for batch in iter {
|
||||||
|
let _batch = batch.unwrap();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Filters 1 host.
|
||||||
|
fn filter_1_host(c: &mut Criterion) {
|
||||||
|
let metadata = Arc::new(cpu_metadata());
|
||||||
|
let config = MergeTreeConfig::default();
|
||||||
|
let start_sec = 1710043200;
|
||||||
|
let generator = CpuDataGenerator::new(metadata.clone(), 4000, start_sec, start_sec + 3600 * 2);
|
||||||
|
|
||||||
|
let mut group = c.benchmark_group("filter_1_host");
|
||||||
|
group.sample_size(10);
|
||||||
|
group.bench_function("merge_tree", |b| {
|
||||||
|
let memtable = MergeTreeMemtable::new(1, metadata.clone(), None, &config);
|
||||||
|
for kvs in generator.iter() {
|
||||||
|
memtable.write(&kvs).unwrap();
|
||||||
|
}
|
||||||
|
let predicate = generator.random_host_filter();
|
||||||
|
|
||||||
|
b.iter(|| {
|
||||||
|
let iter = memtable.iter(None, Some(predicate.clone())).unwrap();
|
||||||
|
for batch in iter {
|
||||||
|
let _batch = batch.unwrap();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
group.bench_function("time_series", |b| {
|
||||||
|
let memtable = TimeSeriesMemtable::new(metadata.clone(), 1, None);
|
||||||
|
for kvs in generator.iter() {
|
||||||
|
memtable.write(&kvs).unwrap();
|
||||||
|
}
|
||||||
|
let predicate = generator.random_host_filter();
|
||||||
|
|
||||||
|
b.iter(|| {
|
||||||
|
let iter = memtable.iter(None, Some(predicate.clone())).unwrap();
|
||||||
|
for batch in iter {
|
||||||
|
let _batch = batch.unwrap();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Host {
|
||||||
|
hostname: String,
|
||||||
|
region: String,
|
||||||
|
datacenter: String,
|
||||||
|
rack: String,
|
||||||
|
os: String,
|
||||||
|
arch: String,
|
||||||
|
team: String,
|
||||||
|
service: String,
|
||||||
|
service_version: String,
|
||||||
|
service_environment: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Host {
|
||||||
|
fn random_with_id(id: usize) -> Host {
|
||||||
|
let mut rng = rand::thread_rng();
|
||||||
|
let region = format!("ap-southeast-{}", rng.gen_range(0..10));
|
||||||
|
let datacenter = format!(
|
||||||
|
"{}{}",
|
||||||
|
region,
|
||||||
|
['a', 'b', 'c', 'd', 'e'].choose(&mut rng).unwrap()
|
||||||
|
);
|
||||||
|
Host {
|
||||||
|
hostname: format!("host_{id}"),
|
||||||
|
region,
|
||||||
|
datacenter,
|
||||||
|
rack: rng.gen_range(0..100).to_string(),
|
||||||
|
os: "Ubuntu16.04LTS".to_string(),
|
||||||
|
arch: "x86".to_string(),
|
||||||
|
team: "CHI".to_string(),
|
||||||
|
service: rng.gen_range(0..100).to_string(),
|
||||||
|
service_version: rng.gen_range(0..10).to_string(),
|
||||||
|
service_environment: "test".to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn fill_values(&self, values: &mut Vec<api::v1::Value>) {
|
||||||
|
let tags = [
|
||||||
|
api::v1::Value {
|
||||||
|
value_data: Some(ValueData::StringValue(self.hostname.clone())),
|
||||||
|
},
|
||||||
|
api::v1::Value {
|
||||||
|
value_data: Some(ValueData::StringValue(self.region.clone())),
|
||||||
|
},
|
||||||
|
api::v1::Value {
|
||||||
|
value_data: Some(ValueData::StringValue(self.datacenter.clone())),
|
||||||
|
},
|
||||||
|
api::v1::Value {
|
||||||
|
value_data: Some(ValueData::StringValue(self.rack.clone())),
|
||||||
|
},
|
||||||
|
api::v1::Value {
|
||||||
|
value_data: Some(ValueData::StringValue(self.os.clone())),
|
||||||
|
},
|
||||||
|
api::v1::Value {
|
||||||
|
value_data: Some(ValueData::StringValue(self.arch.clone())),
|
||||||
|
},
|
||||||
|
api::v1::Value {
|
||||||
|
value_data: Some(ValueData::StringValue(self.team.clone())),
|
||||||
|
},
|
||||||
|
api::v1::Value {
|
||||||
|
value_data: Some(ValueData::StringValue(self.service.clone())),
|
||||||
|
},
|
||||||
|
api::v1::Value {
|
||||||
|
value_data: Some(ValueData::StringValue(self.service_version.clone())),
|
||||||
|
},
|
||||||
|
api::v1::Value {
|
||||||
|
value_data: Some(ValueData::StringValue(self.service_environment.clone())),
|
||||||
|
},
|
||||||
|
];
|
||||||
|
for tag in tags {
|
||||||
|
values.push(tag);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct CpuDataGenerator {
|
||||||
|
metadata: RegionMetadataRef,
|
||||||
|
column_schemas: Vec<api::v1::ColumnSchema>,
|
||||||
|
hosts: Vec<Host>,
|
||||||
|
start_sec: i64,
|
||||||
|
end_sec: i64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CpuDataGenerator {
|
||||||
|
fn new(metadata: RegionMetadataRef, num_hosts: usize, start_sec: i64, end_sec: i64) -> Self {
|
||||||
|
let column_schemas = region_metadata_to_row_schema(&metadata);
|
||||||
|
Self {
|
||||||
|
metadata,
|
||||||
|
column_schemas,
|
||||||
|
hosts: Self::generate_hosts(num_hosts),
|
||||||
|
start_sec,
|
||||||
|
end_sec,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn iter(&self) -> impl Iterator<Item = KeyValues> + '_ {
|
||||||
|
// point per 10s.
|
||||||
|
(self.start_sec..self.end_sec)
|
||||||
|
.step_by(10)
|
||||||
|
.enumerate()
|
||||||
|
.map(|(seq, ts)| self.build_key_values(seq, ts))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_key_values(&self, seq: usize, current_sec: i64) -> KeyValues {
|
||||||
|
let rows = self
|
||||||
|
.hosts
|
||||||
|
.iter()
|
||||||
|
.map(|host| {
|
||||||
|
let mut rng = rand::thread_rng();
|
||||||
|
let mut values = Vec::with_capacity(21);
|
||||||
|
values.push(api::v1::Value {
|
||||||
|
value_data: Some(ValueData::TimestampMillisecondValue(current_sec * 1000)),
|
||||||
|
});
|
||||||
|
host.fill_values(&mut values);
|
||||||
|
for _ in 0..10 {
|
||||||
|
values.push(api::v1::Value {
|
||||||
|
value_data: Some(ValueData::F64Value(Self::random_f64(&mut rng))),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
Row { values }
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
let mutation = api::v1::Mutation {
|
||||||
|
op_type: api::v1::OpType::Put as i32,
|
||||||
|
sequence: seq as u64,
|
||||||
|
rows: Some(Rows {
|
||||||
|
schema: self.column_schemas.clone(),
|
||||||
|
rows,
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
|
||||||
|
KeyValues::new(&self.metadata, mutation).unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn random_host_filter(&self) -> Predicate {
|
||||||
|
let host = self.random_hostname();
|
||||||
|
let expr = Expr::Column(Column::from_name("hostname")).eq(lit(host));
|
||||||
|
Predicate::new(vec![expr.into()])
|
||||||
|
}
|
||||||
|
|
||||||
|
fn random_hostname(&self) -> String {
|
||||||
|
let mut rng = rand::thread_rng();
|
||||||
|
self.hosts.choose(&mut rng).unwrap().hostname.clone()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn random_f64(rng: &mut ThreadRng) -> f64 {
|
||||||
|
let base: u32 = rng.gen_range(30..95);
|
||||||
|
base as f64
|
||||||
|
}
|
||||||
|
|
||||||
|
fn generate_hosts(num_hosts: usize) -> Vec<Host> {
|
||||||
|
(0..num_hosts).map(Host::random_with_id).collect()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates a metadata for TSBS cpu-like table.
|
||||||
|
fn cpu_metadata() -> RegionMetadata {
|
||||||
|
let mut builder = RegionMetadataBuilder::new(RegionId::new(1, 1));
|
||||||
|
builder.push_column_metadata(ColumnMetadata {
|
||||||
|
column_schema: ColumnSchema::new(
|
||||||
|
"ts",
|
||||||
|
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||||
|
false,
|
||||||
|
),
|
||||||
|
semantic_type: SemanticType::Timestamp,
|
||||||
|
column_id: 0,
|
||||||
|
});
|
||||||
|
let mut column_id = 1;
|
||||||
|
let tags = [
|
||||||
|
"hostname",
|
||||||
|
"region",
|
||||||
|
"datacenter",
|
||||||
|
"rack",
|
||||||
|
"os",
|
||||||
|
"arch",
|
||||||
|
"team",
|
||||||
|
"service",
|
||||||
|
"service_version",
|
||||||
|
"service_environment",
|
||||||
|
];
|
||||||
|
for tag in tags {
|
||||||
|
builder.push_column_metadata(ColumnMetadata {
|
||||||
|
column_schema: ColumnSchema::new(tag, ConcreteDataType::string_datatype(), true),
|
||||||
|
semantic_type: SemanticType::Tag,
|
||||||
|
column_id,
|
||||||
|
});
|
||||||
|
column_id += 1;
|
||||||
|
}
|
||||||
|
let fields = [
|
||||||
|
"usage_user",
|
||||||
|
"usage_system",
|
||||||
|
"usage_idle",
|
||||||
|
"usage_nice",
|
||||||
|
"usage_iowait",
|
||||||
|
"usage_irq",
|
||||||
|
"usage_softirq",
|
||||||
|
"usage_steal",
|
||||||
|
"usage_guest",
|
||||||
|
"usage_guest_nice",
|
||||||
|
];
|
||||||
|
for field in fields {
|
||||||
|
builder.push_column_metadata(ColumnMetadata {
|
||||||
|
column_schema: ColumnSchema::new(field, ConcreteDataType::float64_datatype(), true),
|
||||||
|
semantic_type: SemanticType::Field,
|
||||||
|
column_id,
|
||||||
|
});
|
||||||
|
column_id += 1;
|
||||||
|
}
|
||||||
|
builder.primary_key(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
|
||||||
|
builder.build().unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
criterion_group!(benches, write_rows, full_scan, filter_1_host);
|
||||||
|
criterion_main!(benches);
|
||||||
@@ -1,36 +0,0 @@
|
|||||||
// Copyright 2023 Greptime Team
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
// you may not use this file except in compliance with the License.
|
|
||||||
// You may obtain a copy of the License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
use criterion::{criterion_group, criterion_main, Criterion};
|
|
||||||
use mito2::memtable::merge_tree::{MergeTreeConfig, MergeTreeMemtable};
|
|
||||||
use mito2::memtable::Memtable;
|
|
||||||
use mito2::test_util::memtable_util;
|
|
||||||
|
|
||||||
fn bench_merge_tree_memtable(c: &mut Criterion) {
|
|
||||||
let metadata = memtable_util::metadata_with_primary_key(vec![1, 0], true);
|
|
||||||
let timestamps = (0..100).collect::<Vec<_>>();
|
|
||||||
|
|
||||||
let memtable = MergeTreeMemtable::new(1, metadata.clone(), None, &MergeTreeConfig::default());
|
|
||||||
|
|
||||||
let _ = c.bench_function("MergeTreeMemtable", |b| {
|
|
||||||
let kvs =
|
|
||||||
memtable_util::build_key_values(&metadata, "hello".to_string(), 42, ×tamps, 1);
|
|
||||||
b.iter(|| {
|
|
||||||
memtable.write(&kvs).unwrap();
|
|
||||||
});
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
criterion_group!(benches, bench_merge_tree_memtable);
|
|
||||||
criterion_main!(benches);
|
|
||||||
@@ -158,7 +158,7 @@ impl CacheManager {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Gets the the write cache.
|
/// Gets the write cache.
|
||||||
pub(crate) fn write_cache(&self) -> Option<&WriteCacheRef> {
|
pub(crate) fn write_cache(&self) -> Option<&WriteCacheRef> {
|
||||||
self.write_cache.as_ref()
|
self.write_cache.as_ref()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -85,7 +85,7 @@ impl Default for MergeTreeConfig {
|
|||||||
|
|
||||||
Self {
|
Self {
|
||||||
index_max_keys_per_shard: 8192,
|
index_max_keys_per_shard: 8192,
|
||||||
data_freeze_threshold: 32768,
|
data_freeze_threshold: 131072,
|
||||||
dedup: true,
|
dedup: true,
|
||||||
fork_dictionary_bytes,
|
fork_dictionary_bytes,
|
||||||
}
|
}
|
||||||
@@ -293,6 +293,8 @@ mod tests {
|
|||||||
use std::collections::BTreeSet;
|
use std::collections::BTreeSet;
|
||||||
|
|
||||||
use common_time::Timestamp;
|
use common_time::Timestamp;
|
||||||
|
use datafusion_common::{Column, ScalarValue};
|
||||||
|
use datafusion_expr::{BinaryExpr, Expr, Operator};
|
||||||
use datatypes::scalars::ScalarVector;
|
use datatypes::scalars::ScalarVector;
|
||||||
use datatypes::vectors::{Int64Vector, TimestampMillisecondVector};
|
use datatypes::vectors::{Int64Vector, TimestampMillisecondVector};
|
||||||
|
|
||||||
@@ -528,4 +530,55 @@ mod tests {
|
|||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
assert_eq!(expect, read);
|
assert_eq!(expect, read);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_memtable_filter() {
|
||||||
|
let metadata = memtable_util::metadata_with_primary_key(vec![0, 1], false);
|
||||||
|
// Try to build a memtable via the builder.
|
||||||
|
let memtable = MergeTreeMemtableBuilder::new(
|
||||||
|
MergeTreeConfig {
|
||||||
|
index_max_keys_per_shard: 40,
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.build(1, &metadata);
|
||||||
|
|
||||||
|
for i in 0..100 {
|
||||||
|
let timestamps: Vec<_> = (0..10).map(|v| i as i64 * 1000 + v).collect();
|
||||||
|
let kvs =
|
||||||
|
memtable_util::build_key_values(&metadata, "hello".to_string(), i, ×tamps, 1);
|
||||||
|
memtable.write(&kvs).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
for i in 0..100 {
|
||||||
|
let timestamps: Vec<_> = (0..10).map(|v| i as i64 * 1000 + v).collect();
|
||||||
|
let expr = Expr::BinaryExpr(BinaryExpr {
|
||||||
|
left: Box::new(Expr::Column(Column {
|
||||||
|
relation: None,
|
||||||
|
name: "k1".to_string(),
|
||||||
|
})),
|
||||||
|
op: Operator::Eq,
|
||||||
|
right: Box::new(Expr::Literal(ScalarValue::UInt32(Some(i)))),
|
||||||
|
});
|
||||||
|
let iter = memtable
|
||||||
|
.iter(None, Some(Predicate::new(vec![expr.into()])))
|
||||||
|
.unwrap();
|
||||||
|
let read = iter
|
||||||
|
.flat_map(|batch| {
|
||||||
|
batch
|
||||||
|
.unwrap()
|
||||||
|
.timestamps()
|
||||||
|
.as_any()
|
||||||
|
.downcast_ref::<TimestampMillisecondVector>()
|
||||||
|
.unwrap()
|
||||||
|
.iter_data()
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.into_iter()
|
||||||
|
})
|
||||||
|
.map(|v| v.unwrap().0.value())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
assert_eq!(timestamps, read);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -957,6 +957,18 @@ impl DataParts {
|
|||||||
self.active.write_row(pk_index, kv)
|
self.active.write_row(pk_index, kv)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the number of rows in the active buffer.
|
||||||
|
pub fn num_active_rows(&self) -> usize {
|
||||||
|
self.active.num_rows()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Freezes active buffer and creates a new active buffer.
|
||||||
|
pub fn freeze(&mut self) -> Result<()> {
|
||||||
|
let part = self.active.freeze(None, false)?;
|
||||||
|
self.frozen.push(part);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
/// Reads data from all parts including active and frozen parts.
|
/// Reads data from all parts including active and frozen parts.
|
||||||
/// The returned iterator yields a record batch of one primary key at a time.
|
/// The returned iterator yields a record batch of one primary key at a time.
|
||||||
/// The order of yielding primary keys is determined by provided weights.
|
/// The order of yielding primary keys is determined by provided weights.
|
||||||
@@ -976,6 +988,11 @@ impl DataParts {
|
|||||||
pub(crate) fn is_empty(&self) -> bool {
|
pub(crate) fn is_empty(&self) -> bool {
|
||||||
self.active.is_empty() && self.frozen.iter().all(|part| part.is_empty())
|
self.active.is_empty() && self.frozen.iter().all(|part| part.is_empty())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
pub(crate) fn frozen_len(&self) -> usize {
|
||||||
|
self.frozen.len()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct DataPartsReaderBuilder {
|
pub struct DataPartsReaderBuilder {
|
||||||
@@ -994,9 +1011,11 @@ impl DataPartsReaderBuilder {
|
|||||||
for p in self.parts {
|
for p in self.parts {
|
||||||
nodes.push(DataNode::new(DataSource::Part(p)));
|
nodes.push(DataNode::new(DataSource::Part(p)));
|
||||||
}
|
}
|
||||||
|
let num_parts = nodes.len();
|
||||||
let merger = Merger::try_new(nodes)?;
|
let merger = Merger::try_new(nodes)?;
|
||||||
Ok(DataPartsReader {
|
Ok(DataPartsReader {
|
||||||
merger,
|
merger,
|
||||||
|
num_parts,
|
||||||
elapsed: Default::default(),
|
elapsed: Default::default(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -1005,6 +1024,7 @@ impl DataPartsReaderBuilder {
|
|||||||
/// Reader for all parts inside a `DataParts`.
|
/// Reader for all parts inside a `DataParts`.
|
||||||
pub struct DataPartsReader {
|
pub struct DataPartsReader {
|
||||||
merger: Merger<DataNode>,
|
merger: Merger<DataNode>,
|
||||||
|
num_parts: usize,
|
||||||
elapsed: Duration,
|
elapsed: Duration,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1032,6 +1052,10 @@ impl DataPartsReader {
|
|||||||
pub(crate) fn is_valid(&self) -> bool {
|
pub(crate) fn is_valid(&self) -> bool {
|
||||||
self.merger.is_valid()
|
self.merger.is_valid()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn num_parts(&self) -> usize {
|
||||||
|
self.num_parts
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ impl<T: DataBatchSource> DataBatchSource for DedupReader<T> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn next(&mut self) -> Result<()> {
|
fn next(&mut self) -> Result<()> {
|
||||||
loop {
|
while self.inner.is_valid() {
|
||||||
match &mut self.prev_batch_last_row {
|
match &mut self.prev_batch_last_row {
|
||||||
None => {
|
None => {
|
||||||
// First shot, fill prev_batch_last_row and current_batch_range with first batch.
|
// First shot, fill prev_batch_last_row and current_batch_range with first batch.
|
||||||
|
|||||||
@@ -78,7 +78,7 @@ impl Partition {
|
|||||||
|
|
||||||
// Finds key in shards, now we ensure one key only exists in one shard.
|
// Finds key in shards, now we ensure one key only exists in one shard.
|
||||||
if let Some(pk_id) = inner.find_key_in_shards(primary_key) {
|
if let Some(pk_id) = inner.find_key_in_shards(primary_key) {
|
||||||
inner.write_to_shard(pk_id, &key_value);
|
inner.write_to_shard(pk_id, &key_value)?;
|
||||||
inner.num_rows += 1;
|
inner.num_rows += 1;
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
@@ -106,7 +106,7 @@ impl Partition {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Writes to the partition without a primary key.
|
/// Writes to the partition without a primary key.
|
||||||
pub fn write_no_key(&self, key_value: KeyValue) {
|
pub fn write_no_key(&self, key_value: KeyValue) -> Result<()> {
|
||||||
let mut inner = self.inner.write().unwrap();
|
let mut inner = self.inner.write().unwrap();
|
||||||
// If no primary key, always write to the first shard.
|
// If no primary key, always write to the first shard.
|
||||||
debug_assert!(!inner.shards.is_empty());
|
debug_assert!(!inner.shards.is_empty());
|
||||||
@@ -117,12 +117,24 @@ impl Partition {
|
|||||||
shard_id: 0,
|
shard_id: 0,
|
||||||
pk_index: 0,
|
pk_index: 0,
|
||||||
};
|
};
|
||||||
inner.shards[0].write_with_pk_id(pk_id, &key_value);
|
inner.shards[0].write_with_pk_id(pk_id, &key_value)?;
|
||||||
inner.num_rows += 1;
|
inner.num_rows += 1;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Scans data in the partition.
|
/// Scans data in the partition.
|
||||||
pub fn read(&self, mut context: ReadPartitionContext) -> Result<PartitionReader> {
|
pub fn read(&self, mut context: ReadPartitionContext) -> Result<PartitionReader> {
|
||||||
|
let start = Instant::now();
|
||||||
|
let key_filter = if context.need_prune_key {
|
||||||
|
Some(PrimaryKeyFilter::new(
|
||||||
|
context.metadata.clone(),
|
||||||
|
context.filters.clone(),
|
||||||
|
context.row_codec.clone(),
|
||||||
|
))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
let (builder_source, shard_reader_builders) = {
|
let (builder_source, shard_reader_builders) = {
|
||||||
let inner = self.inner.read().unwrap();
|
let inner = self.inner.read().unwrap();
|
||||||
let mut shard_source = Vec::with_capacity(inner.shards.len() + 1);
|
let mut shard_source = Vec::with_capacity(inner.shards.len() + 1);
|
||||||
@@ -141,14 +153,21 @@ impl Partition {
|
|||||||
(builder_reader, shard_source)
|
(builder_reader, shard_source)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
context.metrics.num_shards += shard_reader_builders.len();
|
||||||
let mut nodes = shard_reader_builders
|
let mut nodes = shard_reader_builders
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|builder| Ok(ShardNode::new(ShardSource::Shard(builder.build()?))))
|
.map(|builder| {
|
||||||
|
Ok(ShardNode::new(ShardSource::Shard(
|
||||||
|
builder.build(key_filter.clone())?,
|
||||||
|
)))
|
||||||
|
})
|
||||||
.collect::<Result<Vec<_>>>()?;
|
.collect::<Result<Vec<_>>>()?;
|
||||||
|
|
||||||
if let Some(builder) = builder_source {
|
if let Some(builder) = builder_source {
|
||||||
|
context.metrics.num_builder += 1;
|
||||||
// Move the initialization of ShardBuilderReader out of read lock.
|
// Move the initialization of ShardBuilderReader out of read lock.
|
||||||
let shard_builder_reader = builder.build(Some(&context.pk_weights))?;
|
let shard_builder_reader =
|
||||||
|
builder.build(Some(&context.pk_weights), key_filter.clone())?;
|
||||||
nodes.push(ShardNode::new(ShardSource::Builder(shard_builder_reader)));
|
nodes.push(ShardNode::new(ShardSource::Builder(shard_builder_reader)));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -156,8 +175,10 @@ impl Partition {
|
|||||||
let merger = ShardMerger::try_new(nodes)?;
|
let merger = ShardMerger::try_new(nodes)?;
|
||||||
if self.dedup {
|
if self.dedup {
|
||||||
let source = DedupReader::try_new(merger)?;
|
let source = DedupReader::try_new(merger)?;
|
||||||
|
context.metrics.build_partition_reader += start.elapsed();
|
||||||
PartitionReader::new(context, Box::new(source))
|
PartitionReader::new(context, Box::new(source))
|
||||||
} else {
|
} else {
|
||||||
|
context.metrics.build_partition_reader += start.elapsed();
|
||||||
PartitionReader::new(context, Box::new(merger))
|
PartitionReader::new(context, Box::new(merger))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -266,11 +287,11 @@ pub(crate) struct PartitionStats {
|
|||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
struct PartitionReaderMetrics {
|
struct PartitionReaderMetrics {
|
||||||
prune_pk: Duration,
|
build_partition_reader: Duration,
|
||||||
read_source: Duration,
|
read_source: Duration,
|
||||||
data_batch_to_batch: Duration,
|
data_batch_to_batch: Duration,
|
||||||
keys_before_pruning: usize,
|
num_builder: usize,
|
||||||
keys_after_pruning: usize,
|
num_shards: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Reader to scan rows in a partition.
|
/// Reader to scan rows in a partition.
|
||||||
@@ -279,18 +300,11 @@ struct PartitionReaderMetrics {
|
|||||||
pub struct PartitionReader {
|
pub struct PartitionReader {
|
||||||
context: ReadPartitionContext,
|
context: ReadPartitionContext,
|
||||||
source: BoxedDataBatchSource,
|
source: BoxedDataBatchSource,
|
||||||
last_yield_pk_id: Option<PkId>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PartitionReader {
|
impl PartitionReader {
|
||||||
fn new(context: ReadPartitionContext, source: BoxedDataBatchSource) -> Result<Self> {
|
fn new(context: ReadPartitionContext, source: BoxedDataBatchSource) -> Result<Self> {
|
||||||
let mut reader = Self {
|
let reader = Self { context, source };
|
||||||
context,
|
|
||||||
source,
|
|
||||||
last_yield_pk_id: None,
|
|
||||||
};
|
|
||||||
// Find next valid batch.
|
|
||||||
reader.prune_batch_by_key()?;
|
|
||||||
|
|
||||||
Ok(reader)
|
Ok(reader)
|
||||||
}
|
}
|
||||||
@@ -305,8 +319,7 @@ impl PartitionReader {
|
|||||||
/// # Panics
|
/// # Panics
|
||||||
/// Panics if the reader is invalid.
|
/// Panics if the reader is invalid.
|
||||||
pub fn next(&mut self) -> Result<()> {
|
pub fn next(&mut self) -> Result<()> {
|
||||||
self.advance_source()?;
|
self.advance_source()
|
||||||
self.prune_batch_by_key()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Converts current data batch into a [Batch].
|
/// Converts current data batch into a [Batch].
|
||||||
@@ -336,106 +349,77 @@ impl PartitionReader {
|
|||||||
self.context.metrics.read_source += read_source.elapsed();
|
self.context.metrics.read_source += read_source.elapsed();
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn prune_batch_by_key(&mut self) -> Result<()> {
|
|
||||||
if self.context.metadata.primary_key.is_empty() || !self.context.need_prune_key {
|
|
||||||
// Nothing to prune.
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
while self.source.is_valid() {
|
|
||||||
let pk_id = self.source.current_pk_id();
|
|
||||||
if let Some(yield_pk_id) = self.last_yield_pk_id {
|
|
||||||
if pk_id == yield_pk_id {
|
|
||||||
// If this batch has the same key as last returned batch.
|
|
||||||
// We can return it without evaluating filters.
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let key = self.source.current_key().unwrap();
|
|
||||||
self.context.metrics.keys_before_pruning += 1;
|
|
||||||
// Prune batch by primary key.
|
|
||||||
if prune_primary_key(
|
|
||||||
&self.context.metadata,
|
|
||||||
&self.context.filters,
|
|
||||||
&self.context.row_codec,
|
|
||||||
key,
|
|
||||||
&mut self.context.metrics,
|
|
||||||
) {
|
|
||||||
// We need this key.
|
|
||||||
self.last_yield_pk_id = Some(pk_id);
|
|
||||||
self.context.metrics.keys_after_pruning += 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
self.advance_source()?;
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn prune_primary_key(
|
#[derive(Clone)]
|
||||||
metadata: &RegionMetadataRef,
|
pub(crate) struct PrimaryKeyFilter {
|
||||||
filters: &[SimpleFilterEvaluator],
|
metadata: RegionMetadataRef,
|
||||||
codec: &McmpRowCodec,
|
filters: Arc<Vec<SimpleFilterEvaluator>>,
|
||||||
pk: &[u8],
|
codec: Arc<McmpRowCodec>,
|
||||||
metrics: &mut PartitionReaderMetrics,
|
offsets_buf: Vec<usize>,
|
||||||
) -> bool {
|
|
||||||
let start = Instant::now();
|
|
||||||
let res = prune_primary_key_inner(metadata, filters, codec, pk);
|
|
||||||
metrics.prune_pk += start.elapsed();
|
|
||||||
res
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(yingwen): Improve performance of key pruning. Now we need to find index and
|
impl PrimaryKeyFilter {
|
||||||
// then decode and convert each value.
|
pub(crate) fn new(
|
||||||
/// Returns true if the `pk` is still needed.
|
metadata: RegionMetadataRef,
|
||||||
fn prune_primary_key_inner(
|
filters: Arc<Vec<SimpleFilterEvaluator>>,
|
||||||
metadata: &RegionMetadataRef,
|
codec: Arc<McmpRowCodec>,
|
||||||
filters: &[SimpleFilterEvaluator],
|
) -> Self {
|
||||||
codec: &McmpRowCodec,
|
Self {
|
||||||
pk: &[u8],
|
metadata,
|
||||||
) -> bool {
|
filters,
|
||||||
if filters.is_empty() {
|
codec,
|
||||||
return true;
|
offsets_buf: Vec::new(),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// no primary key, we simply return true.
|
pub(crate) fn prune_primary_key(&mut self, pk: &[u8]) -> bool {
|
||||||
if metadata.primary_key.is_empty() {
|
if self.filters.is_empty() {
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
let pk_values = match codec.decode(pk) {
|
|
||||||
Ok(values) => values,
|
|
||||||
Err(e) => {
|
|
||||||
common_telemetry::error!(e; "Failed to decode primary key");
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
// evaluate filters against primary key values
|
// no primary key, we simply return true.
|
||||||
let mut result = true;
|
if self.metadata.primary_key.is_empty() {
|
||||||
for filter in filters {
|
return true;
|
||||||
if Partition::is_partition_column(filter.column_name()) {
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
let Some(column) = metadata.column_by_name(filter.column_name()) else {
|
|
||||||
continue;
|
// evaluate filters against primary key values
|
||||||
};
|
let mut result = true;
|
||||||
// ignore filters that are not referencing primary key columns
|
self.offsets_buf.clear();
|
||||||
if column.semantic_type != SemanticType::Tag {
|
for filter in &*self.filters {
|
||||||
continue;
|
if Partition::is_partition_column(filter.column_name()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let Some(column) = self.metadata.column_by_name(filter.column_name()) else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
// ignore filters that are not referencing primary key columns
|
||||||
|
if column.semantic_type != SemanticType::Tag {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// index of the column in primary keys.
|
||||||
|
// Safety: A tag column is always in primary key.
|
||||||
|
let index = self.metadata.primary_key_index(column.column_id).unwrap();
|
||||||
|
let value = match self.codec.decode_value_at(pk, index, &mut self.offsets_buf) {
|
||||||
|
Ok(v) => v,
|
||||||
|
Err(e) => {
|
||||||
|
common_telemetry::error!(e; "Failed to decode primary key");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// TODO(yingwen): `evaluate_scalar()` creates temporary arrays to compare scalars. We
|
||||||
|
// can compare the bytes directly without allocation and matching types as we use
|
||||||
|
// comparable encoding.
|
||||||
|
// Safety: arrow schema and datatypes are constructed from the same source.
|
||||||
|
let scalar_value = value
|
||||||
|
.try_to_scalar_value(&column.column_schema.data_type)
|
||||||
|
.unwrap();
|
||||||
|
result &= filter.evaluate_scalar(&scalar_value).unwrap_or(true);
|
||||||
}
|
}
|
||||||
// index of the column in primary keys.
|
|
||||||
// Safety: A tag column is always in primary key.
|
result
|
||||||
let index = metadata.primary_key_index(column.column_id).unwrap();
|
|
||||||
// Safety: arrow schema and datatypes are constructed from the same source.
|
|
||||||
let scalar_value = pk_values[index]
|
|
||||||
.try_to_scalar_value(&column.column_schema.data_type)
|
|
||||||
.unwrap();
|
|
||||||
result &= filter.evaluate_scalar(&scalar_value).unwrap_or(true);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
result
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Structs to reuse across readers to avoid allocating for each reader.
|
/// Structs to reuse across readers to avoid allocating for each reader.
|
||||||
@@ -443,7 +427,7 @@ pub(crate) struct ReadPartitionContext {
|
|||||||
metadata: RegionMetadataRef,
|
metadata: RegionMetadataRef,
|
||||||
row_codec: Arc<McmpRowCodec>,
|
row_codec: Arc<McmpRowCodec>,
|
||||||
projection: HashSet<ColumnId>,
|
projection: HashSet<ColumnId>,
|
||||||
filters: Vec<SimpleFilterEvaluator>,
|
filters: Arc<Vec<SimpleFilterEvaluator>>,
|
||||||
/// Buffer to store pk weights.
|
/// Buffer to store pk weights.
|
||||||
pk_weights: Vec<u16>,
|
pk_weights: Vec<u16>,
|
||||||
need_prune_key: bool,
|
need_prune_key: bool,
|
||||||
@@ -452,10 +436,6 @@ pub(crate) struct ReadPartitionContext {
|
|||||||
|
|
||||||
impl Drop for ReadPartitionContext {
|
impl Drop for ReadPartitionContext {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
let partition_prune_pk = self.metrics.prune_pk.as_secs_f64();
|
|
||||||
MERGE_TREE_READ_STAGE_ELAPSED
|
|
||||||
.with_label_values(&["partition_prune_pk"])
|
|
||||||
.observe(partition_prune_pk);
|
|
||||||
let partition_read_source = self.metrics.read_source.as_secs_f64();
|
let partition_read_source = self.metrics.read_source.as_secs_f64();
|
||||||
MERGE_TREE_READ_STAGE_ELAPSED
|
MERGE_TREE_READ_STAGE_ELAPSED
|
||||||
.with_label_values(&["partition_read_source"])
|
.with_label_values(&["partition_read_source"])
|
||||||
@@ -465,16 +445,19 @@ impl Drop for ReadPartitionContext {
|
|||||||
.with_label_values(&["partition_data_batch_to_batch"])
|
.with_label_values(&["partition_data_batch_to_batch"])
|
||||||
.observe(partition_data_batch_to_batch);
|
.observe(partition_data_batch_to_batch);
|
||||||
|
|
||||||
if self.metrics.keys_before_pruning != 0 {
|
common_telemetry::debug!(
|
||||||
common_telemetry::debug!(
|
"TreeIter partitions metrics, \
|
||||||
"TreeIter pruning, before: {}, after: {}, partition_read_source: {}s, partition_prune_pk: {}s, partition_data_batch_to_batch: {}s",
|
num_builder: {}, \
|
||||||
self.metrics.keys_before_pruning,
|
num_shards: {}, \
|
||||||
self.metrics.keys_after_pruning,
|
build_partition_reader: {}s, \
|
||||||
partition_read_source,
|
partition_read_source: {}s, \
|
||||||
partition_prune_pk,
|
partition_data_batch_to_batch: {}s",
|
||||||
partition_data_batch_to_batch,
|
self.metrics.num_builder,
|
||||||
);
|
self.metrics.num_shards,
|
||||||
}
|
self.metrics.build_partition_reader.as_secs_f64(),
|
||||||
|
partition_read_source,
|
||||||
|
partition_data_batch_to_batch,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -490,7 +473,7 @@ impl ReadPartitionContext {
|
|||||||
metadata,
|
metadata,
|
||||||
row_codec,
|
row_codec,
|
||||||
projection,
|
projection,
|
||||||
filters,
|
filters: Arc::new(filters),
|
||||||
pk_weights: Vec::new(),
|
pk_weights: Vec::new(),
|
||||||
need_prune_key,
|
need_prune_key,
|
||||||
metrics: Default::default(),
|
metrics: Default::default(),
|
||||||
@@ -578,7 +561,16 @@ impl Inner {
|
|||||||
fn new(metadata: RegionMetadataRef, config: &MergeTreeConfig) -> Self {
|
fn new(metadata: RegionMetadataRef, config: &MergeTreeConfig) -> Self {
|
||||||
let (shards, current_shard_id) = if metadata.primary_key.is_empty() {
|
let (shards, current_shard_id) = if metadata.primary_key.is_empty() {
|
||||||
let data_parts = DataParts::new(metadata.clone(), DATA_INIT_CAP, config.dedup);
|
let data_parts = DataParts::new(metadata.clone(), DATA_INIT_CAP, config.dedup);
|
||||||
(vec![Shard::new(0, None, data_parts, config.dedup)], 1)
|
(
|
||||||
|
vec![Shard::new(
|
||||||
|
0,
|
||||||
|
None,
|
||||||
|
data_parts,
|
||||||
|
config.dedup,
|
||||||
|
config.data_freeze_threshold,
|
||||||
|
)],
|
||||||
|
1,
|
||||||
|
)
|
||||||
} else {
|
} else {
|
||||||
(Vec::new(), 0)
|
(Vec::new(), 0)
|
||||||
};
|
};
|
||||||
@@ -598,18 +590,22 @@ impl Inner {
|
|||||||
self.pk_to_pk_id.get(primary_key).copied()
|
self.pk_to_pk_id.get(primary_key).copied()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_to_shard(&mut self, pk_id: PkId, key_value: &KeyValue) {
|
fn write_to_shard(&mut self, pk_id: PkId, key_value: &KeyValue) -> Result<()> {
|
||||||
if pk_id.shard_id == self.shard_builder.current_shard_id() {
|
if pk_id.shard_id == self.shard_builder.current_shard_id() {
|
||||||
self.shard_builder.write_with_pk_id(pk_id, key_value);
|
self.shard_builder.write_with_pk_id(pk_id, key_value);
|
||||||
return;
|
return Ok(());
|
||||||
}
|
|
||||||
for shard in &mut self.shards {
|
|
||||||
if shard.shard_id == pk_id.shard_id {
|
|
||||||
shard.write_with_pk_id(pk_id, key_value);
|
|
||||||
self.num_rows += 1;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Safety: We find the shard by shard id.
|
||||||
|
let shard = self
|
||||||
|
.shards
|
||||||
|
.iter_mut()
|
||||||
|
.find(|shard| shard.shard_id == pk_id.shard_id)
|
||||||
|
.unwrap();
|
||||||
|
shard.write_with_pk_id(pk_id, key_value)?;
|
||||||
|
self.num_rows += 1;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn freeze_active_shard(&mut self) -> Result<()> {
|
fn freeze_active_shard(&mut self) -> Result<()> {
|
||||||
|
|||||||
@@ -15,6 +15,7 @@
|
|||||||
//! Shard in a partition.
|
//! Shard in a partition.
|
||||||
|
|
||||||
use std::cmp::Ordering;
|
use std::cmp::Ordering;
|
||||||
|
use std::time::{Duration, Instant};
|
||||||
|
|
||||||
use store_api::metadata::RegionMetadataRef;
|
use store_api::metadata::RegionMetadataRef;
|
||||||
|
|
||||||
@@ -25,8 +26,10 @@ use crate::memtable::merge_tree::data::{
|
|||||||
};
|
};
|
||||||
use crate::memtable::merge_tree::dict::KeyDictRef;
|
use crate::memtable::merge_tree::dict::KeyDictRef;
|
||||||
use crate::memtable::merge_tree::merger::{Merger, Node};
|
use crate::memtable::merge_tree::merger::{Merger, Node};
|
||||||
|
use crate::memtable::merge_tree::partition::PrimaryKeyFilter;
|
||||||
use crate::memtable::merge_tree::shard_builder::ShardBuilderReader;
|
use crate::memtable::merge_tree::shard_builder::ShardBuilderReader;
|
||||||
use crate::memtable::merge_tree::{PkId, ShardId};
|
use crate::memtable::merge_tree::{PkId, PkIndex, ShardId};
|
||||||
|
use crate::metrics::MERGE_TREE_READ_STAGE_ELAPSED;
|
||||||
|
|
||||||
/// Shard stores data related to the same key dictionary.
|
/// Shard stores data related to the same key dictionary.
|
||||||
pub struct Shard {
|
pub struct Shard {
|
||||||
@@ -36,6 +39,8 @@ pub struct Shard {
|
|||||||
/// Data in the shard.
|
/// Data in the shard.
|
||||||
data_parts: DataParts,
|
data_parts: DataParts,
|
||||||
dedup: bool,
|
dedup: bool,
|
||||||
|
/// Number of rows to freeze a data part.
|
||||||
|
data_freeze_threshold: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Shard {
|
impl Shard {
|
||||||
@@ -45,20 +50,29 @@ impl Shard {
|
|||||||
key_dict: Option<KeyDictRef>,
|
key_dict: Option<KeyDictRef>,
|
||||||
data_parts: DataParts,
|
data_parts: DataParts,
|
||||||
dedup: bool,
|
dedup: bool,
|
||||||
|
data_freeze_threshold: usize,
|
||||||
) -> Shard {
|
) -> Shard {
|
||||||
Shard {
|
Shard {
|
||||||
shard_id,
|
shard_id,
|
||||||
key_dict,
|
key_dict,
|
||||||
data_parts,
|
data_parts,
|
||||||
dedup,
|
dedup,
|
||||||
|
data_freeze_threshold,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Writes a key value into the shard.
|
/// Writes a key value into the shard.
|
||||||
pub fn write_with_pk_id(&mut self, pk_id: PkId, key_value: &KeyValue) {
|
///
|
||||||
|
/// It will freezes the active buffer if it is full.
|
||||||
|
pub fn write_with_pk_id(&mut self, pk_id: PkId, key_value: &KeyValue) -> Result<()> {
|
||||||
debug_assert_eq!(self.shard_id, pk_id.shard_id);
|
debug_assert_eq!(self.shard_id, pk_id.shard_id);
|
||||||
|
|
||||||
|
if self.data_parts.num_active_rows() >= self.data_freeze_threshold {
|
||||||
|
self.data_parts.freeze()?;
|
||||||
|
}
|
||||||
|
|
||||||
self.data_parts.write_row(pk_id.pk_index, key_value);
|
self.data_parts.write_row(pk_id.pk_index, key_value);
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Scans the shard.
|
/// Scans the shard.
|
||||||
@@ -80,6 +94,7 @@ impl Shard {
|
|||||||
key_dict: self.key_dict.clone(),
|
key_dict: self.key_dict.clone(),
|
||||||
data_parts: DataParts::new(metadata, DATA_INIT_CAP, self.dedup),
|
data_parts: DataParts::new(metadata, DATA_INIT_CAP, self.dedup),
|
||||||
dedup: self.dedup,
|
dedup: self.dedup,
|
||||||
|
data_freeze_threshold: self.data_freeze_threshold,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -131,18 +146,15 @@ pub struct ShardReaderBuilder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl ShardReaderBuilder {
|
impl ShardReaderBuilder {
|
||||||
pub(crate) fn build(self) -> Result<ShardReader> {
|
pub(crate) fn build(self, key_filter: Option<PrimaryKeyFilter>) -> Result<ShardReader> {
|
||||||
let ShardReaderBuilder {
|
let ShardReaderBuilder {
|
||||||
shard_id,
|
shard_id,
|
||||||
key_dict,
|
key_dict,
|
||||||
inner,
|
inner,
|
||||||
} = self;
|
} = self;
|
||||||
|
let now = Instant::now();
|
||||||
let parts_reader = inner.build()?;
|
let parts_reader = inner.build()?;
|
||||||
Ok(ShardReader {
|
ShardReader::new(shard_id, key_dict, parts_reader, key_filter, now.elapsed())
|
||||||
shard_id,
|
|
||||||
key_dict,
|
|
||||||
parts_reader,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -151,15 +163,46 @@ pub struct ShardReader {
|
|||||||
shard_id: ShardId,
|
shard_id: ShardId,
|
||||||
key_dict: Option<KeyDictRef>,
|
key_dict: Option<KeyDictRef>,
|
||||||
parts_reader: DataPartsReader,
|
parts_reader: DataPartsReader,
|
||||||
|
key_filter: Option<PrimaryKeyFilter>,
|
||||||
|
last_yield_pk_index: Option<PkIndex>,
|
||||||
|
keys_before_pruning: usize,
|
||||||
|
keys_after_pruning: usize,
|
||||||
|
prune_pk_cost: Duration,
|
||||||
|
data_build_cost: Duration,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ShardReader {
|
impl ShardReader {
|
||||||
|
fn new(
|
||||||
|
shard_id: ShardId,
|
||||||
|
key_dict: Option<KeyDictRef>,
|
||||||
|
parts_reader: DataPartsReader,
|
||||||
|
key_filter: Option<PrimaryKeyFilter>,
|
||||||
|
data_build_cost: Duration,
|
||||||
|
) -> Result<Self> {
|
||||||
|
let has_pk = key_dict.is_some();
|
||||||
|
let mut reader = Self {
|
||||||
|
shard_id,
|
||||||
|
key_dict,
|
||||||
|
parts_reader,
|
||||||
|
key_filter: if has_pk { key_filter } else { None },
|
||||||
|
last_yield_pk_index: None,
|
||||||
|
keys_before_pruning: 0,
|
||||||
|
keys_after_pruning: 0,
|
||||||
|
prune_pk_cost: Duration::default(),
|
||||||
|
data_build_cost,
|
||||||
|
};
|
||||||
|
reader.prune_batch_by_key()?;
|
||||||
|
|
||||||
|
Ok(reader)
|
||||||
|
}
|
||||||
|
|
||||||
fn is_valid(&self) -> bool {
|
fn is_valid(&self) -> bool {
|
||||||
self.parts_reader.is_valid()
|
self.parts_reader.is_valid()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn next(&mut self) -> Result<()> {
|
fn next(&mut self) -> Result<()> {
|
||||||
self.parts_reader.next()
|
self.parts_reader.next()?;
|
||||||
|
self.prune_batch_by_key()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn current_key(&self) -> Option<&[u8]> {
|
fn current_key(&self) -> Option<&[u8]> {
|
||||||
@@ -180,6 +223,54 @@ impl ShardReader {
|
|||||||
fn current_data_batch(&self) -> DataBatch {
|
fn current_data_batch(&self) -> DataBatch {
|
||||||
self.parts_reader.current_data_batch()
|
self.parts_reader.current_data_batch()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn prune_batch_by_key(&mut self) -> Result<()> {
|
||||||
|
let Some(key_filter) = &mut self.key_filter else {
|
||||||
|
return Ok(());
|
||||||
|
};
|
||||||
|
|
||||||
|
while self.parts_reader.is_valid() {
|
||||||
|
let pk_index = self.parts_reader.current_data_batch().pk_index();
|
||||||
|
if let Some(yield_pk_index) = self.last_yield_pk_index {
|
||||||
|
if pk_index == yield_pk_index {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.keys_before_pruning += 1;
|
||||||
|
// Safety: `key_filter` is some so the shard has primary keys.
|
||||||
|
let key = self.key_dict.as_ref().unwrap().key_by_pk_index(pk_index);
|
||||||
|
let now = Instant::now();
|
||||||
|
if key_filter.prune_primary_key(key) {
|
||||||
|
self.prune_pk_cost += now.elapsed();
|
||||||
|
self.last_yield_pk_index = Some(pk_index);
|
||||||
|
self.keys_after_pruning += 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
self.prune_pk_cost += now.elapsed();
|
||||||
|
self.parts_reader.next()?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for ShardReader {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
let shard_prune_pk = self.prune_pk_cost.as_secs_f64();
|
||||||
|
MERGE_TREE_READ_STAGE_ELAPSED
|
||||||
|
.with_label_values(&["shard_prune_pk"])
|
||||||
|
.observe(shard_prune_pk);
|
||||||
|
if self.keys_before_pruning > 0 {
|
||||||
|
common_telemetry::debug!(
|
||||||
|
"ShardReader metrics, data parts: {}, before pruning: {}, after pruning: {}, prune cost: {}s, build cost: {}s",
|
||||||
|
self.parts_reader.num_parts(),
|
||||||
|
self.keys_before_pruning,
|
||||||
|
self.keys_after_pruning,
|
||||||
|
shard_prune_pk,
|
||||||
|
self.data_build_cost.as_secs_f64(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A merger that merges batches from multiple shards.
|
/// A merger that merges batches from multiple shards.
|
||||||
@@ -388,6 +479,7 @@ mod tests {
|
|||||||
shard_id: ShardId,
|
shard_id: ShardId,
|
||||||
metadata: RegionMetadataRef,
|
metadata: RegionMetadataRef,
|
||||||
input: &[(KeyValues, PkIndex)],
|
input: &[(KeyValues, PkIndex)],
|
||||||
|
data_freeze_threshold: usize,
|
||||||
) -> Shard {
|
) -> Shard {
|
||||||
let mut dict_builder = KeyDictBuilder::new(1024);
|
let mut dict_builder = KeyDictBuilder::new(1024);
|
||||||
let mut metrics = WriteMetrics::default();
|
let mut metrics = WriteMetrics::default();
|
||||||
@@ -402,27 +494,17 @@ mod tests {
|
|||||||
let dict = dict_builder.finish(&mut BTreeMap::new()).unwrap();
|
let dict = dict_builder.finish(&mut BTreeMap::new()).unwrap();
|
||||||
let data_parts = DataParts::new(metadata, DATA_INIT_CAP, true);
|
let data_parts = DataParts::new(metadata, DATA_INIT_CAP, true);
|
||||||
|
|
||||||
Shard::new(shard_id, Some(Arc::new(dict)), data_parts, true)
|
Shard::new(
|
||||||
|
shard_id,
|
||||||
|
Some(Arc::new(dict)),
|
||||||
|
data_parts,
|
||||||
|
true,
|
||||||
|
data_freeze_threshold,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
fn collect_timestamps(shard: &Shard) -> Vec<i64> {
|
||||||
fn test_write_read_shard() {
|
let mut reader = shard.read().unwrap().build(None).unwrap();
|
||||||
let metadata = metadata_for_test();
|
|
||||||
let input = input_with_key(&metadata);
|
|
||||||
let mut shard = new_shard_with_dict(8, metadata, &input);
|
|
||||||
assert!(shard.is_empty());
|
|
||||||
for (key_values, pk_index) in &input {
|
|
||||||
for kv in key_values.iter() {
|
|
||||||
let pk_id = PkId {
|
|
||||||
shard_id: shard.shard_id,
|
|
||||||
pk_index: *pk_index,
|
|
||||||
};
|
|
||||||
shard.write_with_pk_id(pk_id, &kv);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
assert!(!shard.is_empty());
|
|
||||||
|
|
||||||
let mut reader = shard.read().unwrap().build().unwrap();
|
|
||||||
let mut timestamps = Vec::new();
|
let mut timestamps = Vec::new();
|
||||||
while reader.is_valid() {
|
while reader.is_valid() {
|
||||||
let rb = reader.current_data_batch().slice_record_batch();
|
let rb = reader.current_data_batch().slice_record_batch();
|
||||||
@@ -432,6 +514,64 @@ mod tests {
|
|||||||
|
|
||||||
reader.next().unwrap();
|
reader.next().unwrap();
|
||||||
}
|
}
|
||||||
|
timestamps
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_write_read_shard() {
|
||||||
|
let metadata = metadata_for_test();
|
||||||
|
let input = input_with_key(&metadata);
|
||||||
|
let mut shard = new_shard_with_dict(8, metadata, &input, 100);
|
||||||
|
assert!(shard.is_empty());
|
||||||
|
for (key_values, pk_index) in &input {
|
||||||
|
for kv in key_values.iter() {
|
||||||
|
let pk_id = PkId {
|
||||||
|
shard_id: shard.shard_id,
|
||||||
|
pk_index: *pk_index,
|
||||||
|
};
|
||||||
|
shard.write_with_pk_id(pk_id, &kv).unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert!(!shard.is_empty());
|
||||||
|
|
||||||
|
let timestamps = collect_timestamps(&shard);
|
||||||
assert_eq!(vec![0, 1, 10, 11, 20, 21], timestamps);
|
assert_eq!(vec![0, 1, 10, 11, 20, 21], timestamps);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_shard_freeze() {
|
||||||
|
let metadata = metadata_for_test();
|
||||||
|
let kvs = build_key_values_with_ts_seq_values(
|
||||||
|
&metadata,
|
||||||
|
"shard".to_string(),
|
||||||
|
0,
|
||||||
|
[0].into_iter(),
|
||||||
|
[Some(0.0)].into_iter(),
|
||||||
|
0,
|
||||||
|
);
|
||||||
|
let mut shard = new_shard_with_dict(8, metadata.clone(), &[(kvs, 0)], 50);
|
||||||
|
let expected: Vec<_> = (0..200).collect();
|
||||||
|
for i in &expected {
|
||||||
|
let kvs = build_key_values_with_ts_seq_values(
|
||||||
|
&metadata,
|
||||||
|
"shard".to_string(),
|
||||||
|
0,
|
||||||
|
[*i].into_iter(),
|
||||||
|
[Some(0.0)].into_iter(),
|
||||||
|
*i as u64,
|
||||||
|
);
|
||||||
|
let pk_id = PkId {
|
||||||
|
shard_id: shard.shard_id,
|
||||||
|
pk_index: *i as PkIndex,
|
||||||
|
};
|
||||||
|
for kv in kvs.iter() {
|
||||||
|
shard.write_with_pk_id(pk_id, &kv).unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert!(!shard.is_empty());
|
||||||
|
assert_eq!(3, shard.data_parts.frozen_len());
|
||||||
|
|
||||||
|
let timestamps = collect_timestamps(&shard);
|
||||||
|
assert_eq!(expected, timestamps);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,6 +16,7 @@
|
|||||||
|
|
||||||
use std::collections::{BTreeMap, HashMap};
|
use std::collections::{BTreeMap, HashMap};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
use std::time::{Duration, Instant};
|
||||||
|
|
||||||
use store_api::metadata::RegionMetadataRef;
|
use store_api::metadata::RegionMetadataRef;
|
||||||
|
|
||||||
@@ -26,8 +27,9 @@ use crate::memtable::merge_tree::data::{
|
|||||||
};
|
};
|
||||||
use crate::memtable::merge_tree::dict::{DictBuilderReader, KeyDictBuilder};
|
use crate::memtable::merge_tree::dict::{DictBuilderReader, KeyDictBuilder};
|
||||||
use crate::memtable::merge_tree::metrics::WriteMetrics;
|
use crate::memtable::merge_tree::metrics::WriteMetrics;
|
||||||
|
use crate::memtable::merge_tree::partition::PrimaryKeyFilter;
|
||||||
use crate::memtable::merge_tree::shard::Shard;
|
use crate::memtable::merge_tree::shard::Shard;
|
||||||
use crate::memtable::merge_tree::{MergeTreeConfig, PkId, ShardId};
|
use crate::memtable::merge_tree::{MergeTreeConfig, PkId, PkIndex, ShardId};
|
||||||
use crate::metrics::MERGE_TREE_READ_STAGE_ELAPSED;
|
use crate::metrics::MERGE_TREE_READ_STAGE_ELAPSED;
|
||||||
|
|
||||||
/// Builder to write keys and data to a shard that the key dictionary
|
/// Builder to write keys and data to a shard that the key dictionary
|
||||||
@@ -136,7 +138,13 @@ impl ShardBuilder {
|
|||||||
let shard_id = self.current_shard_id;
|
let shard_id = self.current_shard_id;
|
||||||
self.current_shard_id += 1;
|
self.current_shard_id += 1;
|
||||||
|
|
||||||
Ok(Some(Shard::new(shard_id, key_dict, data_parts, self.dedup)))
|
Ok(Some(Shard::new(
|
||||||
|
shard_id,
|
||||||
|
key_dict,
|
||||||
|
data_parts,
|
||||||
|
self.dedup,
|
||||||
|
self.data_freeze_threshold,
|
||||||
|
)))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Scans the shard builder.
|
/// Scans the shard builder.
|
||||||
@@ -176,13 +184,20 @@ pub(crate) struct ShardBuilderReaderBuilder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl ShardBuilderReaderBuilder {
|
impl ShardBuilderReaderBuilder {
|
||||||
pub(crate) fn build(self, pk_weights: Option<&[u16]>) -> Result<ShardBuilderReader> {
|
pub(crate) fn build(
|
||||||
|
self,
|
||||||
|
pk_weights: Option<&[u16]>,
|
||||||
|
key_filter: Option<PrimaryKeyFilter>,
|
||||||
|
) -> Result<ShardBuilderReader> {
|
||||||
|
let now = Instant::now();
|
||||||
let data_reader = self.data_reader.build(pk_weights)?;
|
let data_reader = self.data_reader.build(pk_weights)?;
|
||||||
Ok(ShardBuilderReader {
|
ShardBuilderReader::new(
|
||||||
shard_id: self.shard_id,
|
self.shard_id,
|
||||||
dict_reader: self.dict_reader,
|
self.dict_reader,
|
||||||
data_reader,
|
data_reader,
|
||||||
})
|
key_filter,
|
||||||
|
now.elapsed(),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -191,15 +206,45 @@ pub struct ShardBuilderReader {
|
|||||||
shard_id: ShardId,
|
shard_id: ShardId,
|
||||||
dict_reader: DictBuilderReader,
|
dict_reader: DictBuilderReader,
|
||||||
data_reader: DataBufferReader,
|
data_reader: DataBufferReader,
|
||||||
|
key_filter: Option<PrimaryKeyFilter>,
|
||||||
|
last_yield_pk_index: Option<PkIndex>,
|
||||||
|
keys_before_pruning: usize,
|
||||||
|
keys_after_pruning: usize,
|
||||||
|
prune_pk_cost: Duration,
|
||||||
|
data_build_cost: Duration,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ShardBuilderReader {
|
impl ShardBuilderReader {
|
||||||
|
fn new(
|
||||||
|
shard_id: ShardId,
|
||||||
|
dict_reader: DictBuilderReader,
|
||||||
|
data_reader: DataBufferReader,
|
||||||
|
key_filter: Option<PrimaryKeyFilter>,
|
||||||
|
data_build_cost: Duration,
|
||||||
|
) -> Result<Self> {
|
||||||
|
let mut reader = ShardBuilderReader {
|
||||||
|
shard_id,
|
||||||
|
dict_reader,
|
||||||
|
data_reader,
|
||||||
|
key_filter,
|
||||||
|
last_yield_pk_index: None,
|
||||||
|
keys_before_pruning: 0,
|
||||||
|
keys_after_pruning: 0,
|
||||||
|
prune_pk_cost: Duration::default(),
|
||||||
|
data_build_cost,
|
||||||
|
};
|
||||||
|
reader.prune_batch_by_key()?;
|
||||||
|
|
||||||
|
Ok(reader)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn is_valid(&self) -> bool {
|
pub fn is_valid(&self) -> bool {
|
||||||
self.data_reader.is_valid()
|
self.data_reader.is_valid()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn next(&mut self) -> Result<()> {
|
pub fn next(&mut self) -> Result<()> {
|
||||||
self.data_reader.next()
|
self.data_reader.next()?;
|
||||||
|
self.prune_batch_by_key()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn current_key(&self) -> Option<&[u8]> {
|
pub fn current_key(&self) -> Option<&[u8]> {
|
||||||
@@ -218,6 +263,52 @@ impl ShardBuilderReader {
|
|||||||
pub fn current_data_batch(&self) -> DataBatch {
|
pub fn current_data_batch(&self) -> DataBatch {
|
||||||
self.data_reader.current_data_batch()
|
self.data_reader.current_data_batch()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn prune_batch_by_key(&mut self) -> Result<()> {
|
||||||
|
let Some(key_filter) = &mut self.key_filter else {
|
||||||
|
return Ok(());
|
||||||
|
};
|
||||||
|
|
||||||
|
while self.data_reader.is_valid() {
|
||||||
|
let pk_index = self.data_reader.current_data_batch().pk_index();
|
||||||
|
if let Some(yield_pk_index) = self.last_yield_pk_index {
|
||||||
|
if pk_index == yield_pk_index {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.keys_before_pruning += 1;
|
||||||
|
let key = self.dict_reader.key_by_pk_index(pk_index);
|
||||||
|
let now = Instant::now();
|
||||||
|
if key_filter.prune_primary_key(key) {
|
||||||
|
self.prune_pk_cost += now.elapsed();
|
||||||
|
self.last_yield_pk_index = Some(pk_index);
|
||||||
|
self.keys_after_pruning += 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
self.prune_pk_cost += now.elapsed();
|
||||||
|
self.data_reader.next()?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for ShardBuilderReader {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
let shard_builder_prune_pk = self.prune_pk_cost.as_secs_f64();
|
||||||
|
MERGE_TREE_READ_STAGE_ELAPSED
|
||||||
|
.with_label_values(&["shard_builder_prune_pk"])
|
||||||
|
.observe(shard_builder_prune_pk);
|
||||||
|
if self.keys_before_pruning > 0 {
|
||||||
|
common_telemetry::debug!(
|
||||||
|
"ShardBuilderReader metrics, before pruning: {}, after pruning: {}, prune cost: {}s, build cost: {}s",
|
||||||
|
self.keys_before_pruning,
|
||||||
|
self.keys_after_pruning,
|
||||||
|
shard_builder_prune_pk,
|
||||||
|
self.data_build_cost.as_secs_f64(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
@@ -306,7 +397,7 @@ mod tests {
|
|||||||
let mut reader = shard_builder
|
let mut reader = shard_builder
|
||||||
.read(&mut pk_weights)
|
.read(&mut pk_weights)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.build(Some(&pk_weights))
|
.build(Some(&pk_weights), None)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let mut timestamps = Vec::new();
|
let mut timestamps = Vec::new();
|
||||||
while reader.is_valid() {
|
while reader.is_valid() {
|
||||||
|
|||||||
@@ -124,7 +124,7 @@ impl MergeTree {
|
|||||||
|
|
||||||
if !has_pk {
|
if !has_pk {
|
||||||
// No primary key.
|
// No primary key.
|
||||||
self.write_no_key(kv);
|
self.write_no_key(kv)?;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -299,7 +299,7 @@ impl MergeTree {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_no_key(&self, key_value: KeyValue) {
|
fn write_no_key(&self, key_value: KeyValue) -> Result<()> {
|
||||||
let partition_key = Partition::get_partition_key(&key_value, self.is_partitioned);
|
let partition_key = Partition::get_partition_key(&key_value, self.is_partitioned);
|
||||||
let partition = self.get_or_create_partition(partition_key);
|
let partition = self.get_or_create_partition(partition_key);
|
||||||
|
|
||||||
|
|||||||
@@ -171,6 +171,8 @@ impl RegionOpener {
|
|||||||
// Initial memtable id is 0.
|
// Initial memtable id is 0.
|
||||||
let mutable = self.memtable_builder.build(0, &metadata);
|
let mutable = self.memtable_builder.build(0, &metadata);
|
||||||
|
|
||||||
|
debug!("Create region {} with options: {:?}", region_id, options);
|
||||||
|
|
||||||
let version = VersionBuilder::new(metadata, mutable)
|
let version = VersionBuilder::new(metadata, mutable)
|
||||||
.options(options)
|
.options(options)
|
||||||
.build();
|
.build();
|
||||||
@@ -249,6 +251,9 @@ impl RegionOpener {
|
|||||||
|
|
||||||
let region_id = self.region_id;
|
let region_id = self.region_id;
|
||||||
let object_store = self.object_store(®ion_options.storage)?.clone();
|
let object_store = self.object_store(®ion_options.storage)?.clone();
|
||||||
|
|
||||||
|
debug!("Open region {} with options: {:?}", region_id, self.options);
|
||||||
|
|
||||||
let access_layer = Arc::new(AccessLayer::new(
|
let access_layer = Arc::new(AccessLayer::new(
|
||||||
self.region_dir.clone(),
|
self.region_dir.clone(),
|
||||||
object_store,
|
object_store,
|
||||||
|
|||||||
@@ -13,6 +13,8 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
//! Options for a region.
|
//! Options for a region.
|
||||||
|
//!
|
||||||
|
//! If we add options in this mod, we also need to modify [store_api::mito_engine_options].
|
||||||
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
@@ -358,6 +360,7 @@ mod tests {
|
|||||||
("compaction.type", "twcs"),
|
("compaction.type", "twcs"),
|
||||||
("storage", "S3"),
|
("storage", "S3"),
|
||||||
("index.inverted_index.ignore_column_ids", "1,2,3"),
|
("index.inverted_index.ignore_column_ids", "1,2,3"),
|
||||||
|
("index.inverted_index.segment_row_count", "512"),
|
||||||
(
|
(
|
||||||
WAL_OPTIONS_KEY,
|
WAL_OPTIONS_KEY,
|
||||||
&serde_json::to_string(&wal_options).unwrap(),
|
&serde_json::to_string(&wal_options).unwrap(),
|
||||||
@@ -376,7 +379,7 @@ mod tests {
|
|||||||
index_options: IndexOptions {
|
index_options: IndexOptions {
|
||||||
inverted_index: InvertedIndexOptions {
|
inverted_index: InvertedIndexOptions {
|
||||||
ignore_column_ids: vec![1, 2, 3],
|
ignore_column_ids: vec![1, 2, 3],
|
||||||
segment_row_count: 1024,
|
segment_row_count: 512,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -215,6 +215,61 @@ impl SortField {
|
|||||||
Decimal128, Decimal128
|
Decimal128, Decimal128
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Skip deserializing this field, returns the length of it.
|
||||||
|
fn skip_deserialize(
|
||||||
|
&self,
|
||||||
|
bytes: &[u8],
|
||||||
|
deserializer: &mut Deserializer<&[u8]>,
|
||||||
|
) -> Result<usize> {
|
||||||
|
let pos = deserializer.position();
|
||||||
|
if bytes[pos] == 0 {
|
||||||
|
deserializer.advance(1);
|
||||||
|
return Ok(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
let to_skip = match &self.data_type {
|
||||||
|
ConcreteDataType::Boolean(_) => 2,
|
||||||
|
ConcreteDataType::Int8(_) | ConcreteDataType::UInt8(_) => 2,
|
||||||
|
ConcreteDataType::Int16(_) | ConcreteDataType::UInt16(_) => 3,
|
||||||
|
ConcreteDataType::Int32(_) | ConcreteDataType::UInt32(_) => 5,
|
||||||
|
ConcreteDataType::Int64(_) | ConcreteDataType::UInt64(_) => 9,
|
||||||
|
ConcreteDataType::Float32(_) => 5,
|
||||||
|
ConcreteDataType::Float64(_) => 9,
|
||||||
|
ConcreteDataType::Binary(_) => {
|
||||||
|
// Now the encoder encode binary as a list of bytes so we can't use
|
||||||
|
// skip bytes.
|
||||||
|
let pos_before = deserializer.position();
|
||||||
|
let mut current = pos_before + 1;
|
||||||
|
while bytes[current] == 1 {
|
||||||
|
current += 2;
|
||||||
|
}
|
||||||
|
let to_skip = current - pos_before + 1;
|
||||||
|
deserializer.advance(to_skip);
|
||||||
|
return Ok(to_skip);
|
||||||
|
}
|
||||||
|
ConcreteDataType::String(_) => {
|
||||||
|
let pos_before = deserializer.position();
|
||||||
|
deserializer.advance(1);
|
||||||
|
deserializer
|
||||||
|
.skip_bytes()
|
||||||
|
.context(error::DeserializeFieldSnafu)?;
|
||||||
|
return Ok(deserializer.position() - pos_before);
|
||||||
|
}
|
||||||
|
ConcreteDataType::Date(_) => 5,
|
||||||
|
ConcreteDataType::DateTime(_) => 9,
|
||||||
|
ConcreteDataType::Timestamp(_) => 9, // We treat timestamp as Option<i64>
|
||||||
|
ConcreteDataType::Time(_) => 10, // i64 and 1 byte time unit
|
||||||
|
ConcreteDataType::Duration(_) => 10,
|
||||||
|
ConcreteDataType::Interval(_) => 18,
|
||||||
|
ConcreteDataType::Decimal128(_) => 19,
|
||||||
|
ConcreteDataType::Null(_)
|
||||||
|
| ConcreteDataType::List(_)
|
||||||
|
| ConcreteDataType::Dictionary(_) => 0,
|
||||||
|
};
|
||||||
|
deserializer.advance(to_skip);
|
||||||
|
Ok(to_skip)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A memory-comparable row [Value] encoder/decoder.
|
/// A memory-comparable row [Value] encoder/decoder.
|
||||||
@@ -236,6 +291,52 @@ impl McmpRowCodec {
|
|||||||
pub fn estimated_size(&self) -> usize {
|
pub fn estimated_size(&self) -> usize {
|
||||||
self.fields.iter().map(|f| f.estimated_size()).sum()
|
self.fields.iter().map(|f| f.estimated_size()).sum()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Decode value at `pos` in `bytes`.
|
||||||
|
///
|
||||||
|
/// The i-th element in offsets buffer is how many bytes to skip in order to read value at `pos`.
|
||||||
|
pub fn decode_value_at(
|
||||||
|
&self,
|
||||||
|
bytes: &[u8],
|
||||||
|
pos: usize,
|
||||||
|
offsets_buf: &mut Vec<usize>,
|
||||||
|
) -> Result<Value> {
|
||||||
|
let mut deserializer = Deserializer::new(bytes);
|
||||||
|
if pos < offsets_buf.len() {
|
||||||
|
// We computed the offset before.
|
||||||
|
let to_skip = offsets_buf[pos];
|
||||||
|
deserializer.advance(to_skip);
|
||||||
|
return self.fields[pos].deserialize(&mut deserializer);
|
||||||
|
}
|
||||||
|
|
||||||
|
if offsets_buf.is_empty() {
|
||||||
|
let mut offset = 0;
|
||||||
|
// Skip values before `pos`.
|
||||||
|
for i in 0..pos {
|
||||||
|
// Offset to skip before reading value i.
|
||||||
|
offsets_buf.push(offset);
|
||||||
|
let skip = self.fields[i].skip_deserialize(bytes, &mut deserializer)?;
|
||||||
|
offset += skip;
|
||||||
|
}
|
||||||
|
// Offset to skip before reading this value.
|
||||||
|
offsets_buf.push(offset);
|
||||||
|
} else {
|
||||||
|
// Offsets are not enough.
|
||||||
|
let value_start = offsets_buf.len() - 1;
|
||||||
|
// Advances to decode value at `value_start`.
|
||||||
|
let mut offset = offsets_buf[value_start];
|
||||||
|
deserializer.advance(offset);
|
||||||
|
for i in value_start..pos {
|
||||||
|
// Skip value i.
|
||||||
|
let skip = self.fields[i].skip_deserialize(bytes, &mut deserializer)?;
|
||||||
|
// Offset for the value at i + 1.
|
||||||
|
offset += skip;
|
||||||
|
offsets_buf.push(offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
self.fields[pos].deserialize(&mut deserializer)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RowCodec for McmpRowCodec {
|
impl RowCodec for McmpRowCodec {
|
||||||
@@ -274,7 +375,7 @@ impl RowCodec for McmpRowCodec {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use common_base::bytes::StringBytes;
|
use common_base::bytes::StringBytes;
|
||||||
use common_time::Timestamp;
|
use common_time::{DateTime, Timestamp};
|
||||||
use datatypes::value::Value;
|
use datatypes::value::Value;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
@@ -292,6 +393,18 @@ mod tests {
|
|||||||
let result = encoder.encode(value_ref.iter().cloned()).unwrap();
|
let result = encoder.encode(value_ref.iter().cloned()).unwrap();
|
||||||
let decoded = encoder.decode(&result).unwrap();
|
let decoded = encoder.decode(&result).unwrap();
|
||||||
assert_eq!(decoded, row);
|
assert_eq!(decoded, row);
|
||||||
|
let mut decoded = Vec::new();
|
||||||
|
let mut offsets = Vec::new();
|
||||||
|
// Iter two times to test offsets buffer.
|
||||||
|
for _ in 0..2 {
|
||||||
|
decoded.clear();
|
||||||
|
for i in 0..data_types.len() {
|
||||||
|
let value = encoder.decode_value_at(&result, i, &mut offsets).unwrap();
|
||||||
|
decoded.push(value);
|
||||||
|
}
|
||||||
|
assert_eq!(data_types.len(), offsets.len(), "offsets: {:?}", offsets);
|
||||||
|
assert_eq!(decoded, row);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -416,5 +529,53 @@ mod tests {
|
|||||||
],
|
],
|
||||||
vec![Value::Null, Value::Int64(43), Value::Boolean(true)],
|
vec![Value::Null, Value::Int64(43), Value::Boolean(true)],
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// All types.
|
||||||
|
check_encode_and_decode(
|
||||||
|
&[
|
||||||
|
ConcreteDataType::boolean_datatype(),
|
||||||
|
ConcreteDataType::int8_datatype(),
|
||||||
|
ConcreteDataType::uint8_datatype(),
|
||||||
|
ConcreteDataType::int16_datatype(),
|
||||||
|
ConcreteDataType::uint16_datatype(),
|
||||||
|
ConcreteDataType::int32_datatype(),
|
||||||
|
ConcreteDataType::uint32_datatype(),
|
||||||
|
ConcreteDataType::int64_datatype(),
|
||||||
|
ConcreteDataType::uint64_datatype(),
|
||||||
|
ConcreteDataType::float32_datatype(),
|
||||||
|
ConcreteDataType::float64_datatype(),
|
||||||
|
ConcreteDataType::binary_datatype(),
|
||||||
|
ConcreteDataType::string_datatype(),
|
||||||
|
ConcreteDataType::date_datatype(),
|
||||||
|
ConcreteDataType::datetime_datatype(),
|
||||||
|
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||||
|
ConcreteDataType::time_millisecond_datatype(),
|
||||||
|
ConcreteDataType::duration_millisecond_datatype(),
|
||||||
|
ConcreteDataType::interval_month_day_nano_datatype(),
|
||||||
|
ConcreteDataType::decimal128_default_datatype(),
|
||||||
|
],
|
||||||
|
vec![
|
||||||
|
Value::Boolean(true),
|
||||||
|
Value::Int8(8),
|
||||||
|
Value::UInt8(8),
|
||||||
|
Value::Int16(16),
|
||||||
|
Value::UInt16(16),
|
||||||
|
Value::Int32(32),
|
||||||
|
Value::UInt32(32),
|
||||||
|
Value::Int64(64),
|
||||||
|
Value::UInt64(64),
|
||||||
|
Value::Float32(1.0.into()),
|
||||||
|
Value::Float64(1.0.into()),
|
||||||
|
Value::Binary(b"hello"[..].into()),
|
||||||
|
Value::String("world".into()),
|
||||||
|
Value::Date(Date::new(10)),
|
||||||
|
Value::DateTime(DateTime::new(11)),
|
||||||
|
Value::Timestamp(Timestamp::new_millisecond(12)),
|
||||||
|
Value::Time(Time::new_millisecond(13)),
|
||||||
|
Value::Duration(Duration::new_millisecond(14)),
|
||||||
|
Value::Interval(Interval::from_month_day_nano(1, 1, 15)),
|
||||||
|
Value::Decimal128(Decimal128::from(16)),
|
||||||
|
],
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -219,25 +219,14 @@ pub(crate) fn extract_data_batch(batch: &DataBatch) -> (u16, Vec<(i64, u64)>) {
|
|||||||
|
|
||||||
/// Builds key values with timestamps (ms) and sequences for test.
|
/// Builds key values with timestamps (ms) and sequences for test.
|
||||||
pub(crate) fn build_key_values_with_ts_seq_values(
|
pub(crate) fn build_key_values_with_ts_seq_values(
|
||||||
schema: &RegionMetadataRef,
|
metadata: &RegionMetadataRef,
|
||||||
k0: String,
|
k0: String,
|
||||||
k1: u32,
|
k1: u32,
|
||||||
timestamps: impl Iterator<Item = i64>,
|
timestamps: impl Iterator<Item = i64>,
|
||||||
values: impl Iterator<Item = Option<f64>>,
|
values: impl Iterator<Item = Option<f64>>,
|
||||||
sequence: SequenceNumber,
|
sequence: SequenceNumber,
|
||||||
) -> KeyValues {
|
) -> KeyValues {
|
||||||
let column_schema = schema
|
let column_schema = region_metadata_to_row_schema(metadata);
|
||||||
.column_metadatas
|
|
||||||
.iter()
|
|
||||||
.map(|c| api::v1::ColumnSchema {
|
|
||||||
column_name: c.column_schema.name.clone(),
|
|
||||||
datatype: ColumnDataTypeWrapper::try_from(c.column_schema.data_type.clone())
|
|
||||||
.unwrap()
|
|
||||||
.datatype() as i32,
|
|
||||||
semantic_type: c.semantic_type as i32,
|
|
||||||
..Default::default()
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let rows = timestamps
|
let rows = timestamps
|
||||||
.zip(values)
|
.zip(values)
|
||||||
@@ -269,7 +258,23 @@ pub(crate) fn build_key_values_with_ts_seq_values(
|
|||||||
rows,
|
rows,
|
||||||
}),
|
}),
|
||||||
};
|
};
|
||||||
KeyValues::new(schema.as_ref(), mutation).unwrap()
|
KeyValues::new(metadata.as_ref(), mutation).unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Converts the region metadata to column schemas for a row.
|
||||||
|
pub fn region_metadata_to_row_schema(metadata: &RegionMetadataRef) -> Vec<api::v1::ColumnSchema> {
|
||||||
|
metadata
|
||||||
|
.column_metadatas
|
||||||
|
.iter()
|
||||||
|
.map(|c| api::v1::ColumnSchema {
|
||||||
|
column_name: c.column_schema.name.clone(),
|
||||||
|
datatype: ColumnDataTypeWrapper::try_from(c.column_schema.data_type.clone())
|
||||||
|
.unwrap()
|
||||||
|
.datatype() as i32,
|
||||||
|
semantic_type: c.semantic_type as i32,
|
||||||
|
..Default::default()
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Encode keys.
|
/// Encode keys.
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ futures.workspace = true
|
|||||||
lazy_static.workspace = true
|
lazy_static.workspace = true
|
||||||
md5 = "0.7"
|
md5 = "0.7"
|
||||||
moka = { workspace = true, features = ["future"] }
|
moka = { workspace = true, features = ["future"] }
|
||||||
opendal = { version = "0.44", features = [
|
opendal = { version = "0.45", features = [
|
||||||
"layers-tracing",
|
"layers-tracing",
|
||||||
] }
|
] }
|
||||||
prometheus.workspace = true
|
prometheus.workspace = true
|
||||||
|
|||||||
@@ -91,7 +91,8 @@ impl Deleter {
|
|||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
let affected_rows = self.do_request(deletes, &ctx).await?;
|
let affected_rows = self.do_request(deletes, &ctx).await?;
|
||||||
Ok(Output::AffectedRows(affected_rows as _))
|
|
||||||
|
Ok(Output::new_with_affected_rows(affected_rows))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn handle_table_delete(
|
pub async fn handle_table_delete(
|
||||||
|
|||||||
@@ -111,7 +111,7 @@ impl Inserter {
|
|||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
let affected_rows = self.do_request(inserts, &ctx).await?;
|
let affected_rows = self.do_request(inserts, &ctx).await?;
|
||||||
Ok(Output::AffectedRows(affected_rows as _))
|
Ok(Output::new_with_affected_rows(affected_rows))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Handle row inserts request with metric engine.
|
/// Handle row inserts request with metric engine.
|
||||||
@@ -149,7 +149,7 @@ impl Inserter {
|
|||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
let affected_rows = self.do_request(inserts, &ctx).await?;
|
let affected_rows = self.do_request(inserts, &ctx).await?;
|
||||||
Ok(Output::AffectedRows(affected_rows as _))
|
Ok(Output::new_with_affected_rows(affected_rows))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn handle_table_insert(
|
pub async fn handle_table_insert(
|
||||||
@@ -185,7 +185,7 @@ impl Inserter {
|
|||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
let affected_rows = self.do_request(inserts, ctx).await?;
|
let affected_rows = self.do_request(inserts, ctx).await?;
|
||||||
Ok(Output::AffectedRows(affected_rows as _))
|
Ok(Output::new_with_affected_rows(affected_rows))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -468,8 +468,6 @@ impl Inserter {
|
|||||||
&req.table_name,
|
&req.table_name,
|
||||||
);
|
);
|
||||||
|
|
||||||
info!("Logical table `{table_ref}` does not exist, try creating table");
|
|
||||||
|
|
||||||
let request_schema = req.rows.as_ref().unwrap().schema.as_slice();
|
let request_schema = req.rows.as_ref().unwrap().schema.as_slice();
|
||||||
let mut create_table_expr = build_create_table_expr(&table_ref, request_schema)?;
|
let mut create_table_expr = build_create_table_expr(&table_ref, request_schema)?;
|
||||||
|
|
||||||
|
|||||||
@@ -40,12 +40,13 @@ use query::plan::LogicalPlan;
|
|||||||
use query::QueryEngineRef;
|
use query::QueryEngineRef;
|
||||||
use session::context::QueryContextRef;
|
use session::context::QueryContextRef;
|
||||||
use session::table_name::table_idents_to_full_name;
|
use session::table_name::table_idents_to_full_name;
|
||||||
use snafu::{OptionExt, ResultExt};
|
use snafu::{ensure, OptionExt, ResultExt};
|
||||||
use sql::statements::copy::{CopyDatabase, CopyDatabaseArgument, CopyTable, CopyTableArgument};
|
use sql::statements::copy::{CopyDatabase, CopyDatabaseArgument, CopyTable, CopyTableArgument};
|
||||||
|
use sql::statements::set_variables::SetVariables;
|
||||||
use sql::statements::statement::Statement;
|
use sql::statements::statement::Statement;
|
||||||
use sql::statements::OptionMap;
|
use sql::statements::OptionMap;
|
||||||
use sql::util::format_raw_object_name;
|
use sql::util::format_raw_object_name;
|
||||||
use sqlparser::ast::{Expr, ObjectName, Value};
|
use sqlparser::ast::{Expr, Ident, ObjectName, Value};
|
||||||
use table::requests::{CopyDatabaseRequest, CopyDirection, CopyTableRequest};
|
use table::requests::{CopyDatabaseRequest, CopyDirection, CopyTableRequest};
|
||||||
use table::table_reference::TableReference;
|
use table::table_reference::TableReference;
|
||||||
use table::TableRef;
|
use table::TableRef;
|
||||||
@@ -122,11 +123,11 @@ impl StatementExecutor {
|
|||||||
CopyDirection::Export => self
|
CopyDirection::Export => self
|
||||||
.copy_table_to(req, query_ctx)
|
.copy_table_to(req, query_ctx)
|
||||||
.await
|
.await
|
||||||
.map(Output::AffectedRows),
|
.map(Output::new_with_affected_rows),
|
||||||
CopyDirection::Import => self
|
CopyDirection::Import => self
|
||||||
.copy_table_from(req, query_ctx)
|
.copy_table_from(req, query_ctx)
|
||||||
.await
|
.await
|
||||||
.map(Output::AffectedRows),
|
.map(Output::new_with_affected_rows),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -151,15 +152,15 @@ impl StatementExecutor {
|
|||||||
|
|
||||||
Statement::CreateTable(stmt) => {
|
Statement::CreateTable(stmt) => {
|
||||||
let _ = self.create_table(stmt, query_ctx).await?;
|
let _ = self.create_table(stmt, query_ctx).await?;
|
||||||
Ok(Output::AffectedRows(0))
|
Ok(Output::new_with_affected_rows(0))
|
||||||
}
|
}
|
||||||
Statement::CreateTableLike(stmt) => {
|
Statement::CreateTableLike(stmt) => {
|
||||||
let _ = self.create_table_like(stmt, query_ctx).await?;
|
let _ = self.create_table_like(stmt, query_ctx).await?;
|
||||||
Ok(Output::AffectedRows(0))
|
Ok(Output::new_with_affected_rows(0))
|
||||||
}
|
}
|
||||||
Statement::CreateExternalTable(stmt) => {
|
Statement::CreateExternalTable(stmt) => {
|
||||||
let _ = self.create_external_table(stmt, query_ctx).await?;
|
let _ = self.create_external_table(stmt, query_ctx).await?;
|
||||||
Ok(Output::AffectedRows(0))
|
Ok(Output::new_with_affected_rows(0))
|
||||||
}
|
}
|
||||||
Statement::Alter(alter_table) => self.alter_table(alter_table, query_ctx).await,
|
Statement::Alter(alter_table) => self.alter_table(alter_table, query_ctx).await,
|
||||||
Statement::DropTable(stmt) => {
|
Statement::DropTable(stmt) => {
|
||||||
@@ -207,6 +208,22 @@ impl StatementExecutor {
|
|||||||
let var_name = set_var.variable.to_string().to_uppercase();
|
let var_name = set_var.variable.to_string().to_uppercase();
|
||||||
match var_name.as_str() {
|
match var_name.as_str() {
|
||||||
"TIMEZONE" | "TIME_ZONE" => set_timezone(set_var.value, query_ctx)?,
|
"TIMEZONE" | "TIME_ZONE" => set_timezone(set_var.value, query_ctx)?,
|
||||||
|
|
||||||
|
// Some postgresql client app may submit a "SET bytea_output" stmt upon connection.
|
||||||
|
// However, currently we lack the support for it (tracked in https://github.com/GreptimeTeam/greptimedb/issues/3438),
|
||||||
|
// so we just ignore it here instead of returning an error to break the connection.
|
||||||
|
// Since the "bytea_output" only determines the output format of binary values,
|
||||||
|
// it won't cause much trouble if we do so.
|
||||||
|
// TODO(#3438): Remove this temporary workaround after the feature is implemented.
|
||||||
|
"BYTEA_OUTPUT" => (),
|
||||||
|
|
||||||
|
// Same as "bytea_output", we just ignore it here.
|
||||||
|
// Not harmful since it only relates to how date is viewed in client app's output.
|
||||||
|
// The tracked issue is https://github.com/GreptimeTeam/greptimedb/issues/3442.
|
||||||
|
// TODO(#3442): Remove this temporary workaround after the feature is implemented.
|
||||||
|
"DATESTYLE" => (),
|
||||||
|
|
||||||
|
"CLIENT_ENCODING" => validate_client_encoding(set_var)?,
|
||||||
_ => {
|
_ => {
|
||||||
return NotSupportedSnafu {
|
return NotSupportedSnafu {
|
||||||
feat: format!("Unsupported set variable {}", var_name),
|
feat: format!("Unsupported set variable {}", var_name),
|
||||||
@@ -214,7 +231,7 @@ impl StatementExecutor {
|
|||||||
.fail()
|
.fail()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(Output::AffectedRows(0))
|
Ok(Output::new_with_affected_rows(0))
|
||||||
}
|
}
|
||||||
Statement::ShowVariables(show_variable) => self.show_variable(show_variable, query_ctx),
|
Statement::ShowVariables(show_variable) => self.show_variable(show_variable, query_ctx),
|
||||||
}
|
}
|
||||||
@@ -257,6 +274,39 @@ impl StatementExecutor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn validate_client_encoding(set: SetVariables) -> Result<()> {
|
||||||
|
let Some((encoding, [])) = set.value.split_first() else {
|
||||||
|
return InvalidSqlSnafu {
|
||||||
|
err_msg: "must provide one and only one client encoding value",
|
||||||
|
}
|
||||||
|
.fail();
|
||||||
|
};
|
||||||
|
let encoding = match encoding {
|
||||||
|
Expr::Value(Value::SingleQuotedString(x))
|
||||||
|
| Expr::Identifier(Ident {
|
||||||
|
value: x,
|
||||||
|
quote_style: _,
|
||||||
|
}) => x.to_uppercase(),
|
||||||
|
_ => {
|
||||||
|
return InvalidSqlSnafu {
|
||||||
|
err_msg: format!("client encoding must be a string, actual: {:?}", encoding),
|
||||||
|
}
|
||||||
|
.fail();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
// For the sake of simplicity, we only support "UTF8" ("UNICODE" is the alias for it,
|
||||||
|
// see https://www.postgresql.org/docs/current/multibyte.html#MULTIBYTE-CHARSET-SUPPORTED).
|
||||||
|
// "UTF8" is universal and sufficient for almost all cases.
|
||||||
|
// GreptimeDB itself is always using "UTF8" as the internal encoding.
|
||||||
|
ensure!(
|
||||||
|
encoding == "UTF8" || encoding == "UNICODE",
|
||||||
|
NotSupportedSnafu {
|
||||||
|
feat: format!("client encoding of '{}'", encoding)
|
||||||
|
}
|
||||||
|
);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
fn set_timezone(exprs: Vec<Expr>, ctx: QueryContextRef) -> Result<()> {
|
fn set_timezone(exprs: Vec<Expr>, ctx: QueryContextRef) -> Result<()> {
|
||||||
let tz_expr = exprs.first().context(NotSupportedSnafu {
|
let tz_expr = exprs.first().context(NotSupportedSnafu {
|
||||||
feat: "No timezone find in set variable statement",
|
feat: "No timezone find in set variable statement",
|
||||||
|
|||||||
@@ -15,10 +15,10 @@
|
|||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
|
||||||
|
use client::Output;
|
||||||
use common_datasource::file_format::Format;
|
use common_datasource::file_format::Format;
|
||||||
use common_datasource::lister::{Lister, Source};
|
use common_datasource::lister::{Lister, Source};
|
||||||
use common_datasource::object_store::build_backend;
|
use common_datasource::object_store::build_backend;
|
||||||
use common_query::Output;
|
|
||||||
use common_telemetry::{debug, error, info, tracing};
|
use common_telemetry::{debug, error, info, tracing};
|
||||||
use object_store::Entry;
|
use object_store::Entry;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
@@ -96,7 +96,7 @@ impl StatementExecutor {
|
|||||||
.await?;
|
.await?;
|
||||||
exported_rows += exported;
|
exported_rows += exported;
|
||||||
}
|
}
|
||||||
Ok(Output::AffectedRows(exported_rows))
|
Ok(Output::new_with_affected_rows(exported_rows))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Imports data to database from a given location and returns total rows imported.
|
/// Imports data to database from a given location and returns total rows imported.
|
||||||
@@ -169,7 +169,7 @@ impl StatementExecutor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(Output::AffectedRows(rows_inserted))
|
Ok(Output::new_with_affected_rows(rows_inserted))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -14,6 +14,7 @@
|
|||||||
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use client::OutputData;
|
||||||
use common_base::readable_size::ReadableSize;
|
use common_base::readable_size::ReadableSize;
|
||||||
use common_datasource::file_format::csv::stream_to_csv;
|
use common_datasource::file_format::csv::stream_to_csv;
|
||||||
use common_datasource::file_format::json::stream_to_json;
|
use common_datasource::file_format::json::stream_to_json;
|
||||||
@@ -21,7 +22,6 @@ use common_datasource::file_format::parquet::stream_to_parquet;
|
|||||||
use common_datasource::file_format::Format;
|
use common_datasource::file_format::Format;
|
||||||
use common_datasource::object_store::{build_backend, parse_url};
|
use common_datasource::object_store::{build_backend, parse_url};
|
||||||
use common_datasource::util::find_dir_and_filename;
|
use common_datasource::util::find_dir_and_filename;
|
||||||
use common_query::Output;
|
|
||||||
use common_recordbatch::adapter::DfRecordBatchStreamAdapter;
|
use common_recordbatch::adapter::DfRecordBatchStreamAdapter;
|
||||||
use common_recordbatch::SendableRecordBatchStream;
|
use common_recordbatch::SendableRecordBatchStream;
|
||||||
use common_telemetry::{debug, tracing};
|
use common_telemetry::{debug, tracing};
|
||||||
@@ -134,9 +134,9 @@ impl StatementExecutor {
|
|||||||
.execute(LogicalPlan::DfPlan(plan), query_ctx)
|
.execute(LogicalPlan::DfPlan(plan), query_ctx)
|
||||||
.await
|
.await
|
||||||
.context(ExecLogicalPlanSnafu)?;
|
.context(ExecLogicalPlanSnafu)?;
|
||||||
let stream = match output {
|
let stream = match output.data {
|
||||||
Output::Stream(stream, _) => stream,
|
OutputData::Stream(stream) => stream,
|
||||||
Output::RecordBatches(record_batches) => record_batches.as_stream(),
|
OutputData::RecordBatches(record_batches) => record_batches.as_stream(),
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -338,10 +338,10 @@ impl StatementExecutor {
|
|||||||
.await
|
.await
|
||||||
.context(error::InvalidateTableCacheSnafu)?;
|
.context(error::InvalidateTableCacheSnafu)?;
|
||||||
|
|
||||||
Ok(Output::AffectedRows(0))
|
Ok(Output::new_with_affected_rows(0))
|
||||||
} else if drop_if_exists {
|
} else if drop_if_exists {
|
||||||
// DROP TABLE IF EXISTS meets table not found - ignored
|
// DROP TABLE IF EXISTS meets table not found - ignored
|
||||||
Ok(Output::AffectedRows(0))
|
Ok(Output::new_with_affected_rows(0))
|
||||||
} else {
|
} else {
|
||||||
Err(TableNotFoundSnafu {
|
Err(TableNotFoundSnafu {
|
||||||
table_name: table_name.to_string(),
|
table_name: table_name.to_string(),
|
||||||
@@ -367,7 +367,7 @@ impl StatementExecutor {
|
|||||||
let table_id = table.table_info().table_id();
|
let table_id = table.table_info().table_id();
|
||||||
self.truncate_table_procedure(&table_name, table_id).await?;
|
self.truncate_table_procedure(&table_name, table_id).await?;
|
||||||
|
|
||||||
Ok(Output::AffectedRows(0))
|
Ok(Output::new_with_affected_rows(0))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn verify_alter(
|
fn verify_alter(
|
||||||
@@ -471,7 +471,7 @@ impl StatementExecutor {
|
|||||||
.await
|
.await
|
||||||
.context(error::InvalidateTableCacheSnafu)?;
|
.context(error::InvalidateTableCacheSnafu)?;
|
||||||
|
|
||||||
Ok(Output::AffectedRows(0))
|
Ok(Output::new_with_affected_rows(0))
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn create_table_procedure(
|
async fn create_table_procedure(
|
||||||
@@ -580,7 +580,7 @@ impl StatementExecutor {
|
|||||||
|
|
||||||
if exists {
|
if exists {
|
||||||
return if create_if_not_exists {
|
return if create_if_not_exists {
|
||||||
Ok(Output::AffectedRows(1))
|
Ok(Output::new_with_affected_rows(1))
|
||||||
} else {
|
} else {
|
||||||
error::SchemaExistsSnafu { name: database }.fail()
|
error::SchemaExistsSnafu { name: database }.fail()
|
||||||
};
|
};
|
||||||
@@ -592,7 +592,7 @@ impl StatementExecutor {
|
|||||||
.await
|
.await
|
||||||
.context(TableMetadataManagerSnafu)?;
|
.context(TableMetadataManagerSnafu)?;
|
||||||
|
|
||||||
Ok(Output::AffectedRows(1))
|
Ok(Output::new_with_affected_rows(1))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -429,7 +429,7 @@ mod test {
|
|||||||
ts_range,
|
ts_range,
|
||||||
value_range,
|
value_range,
|
||||||
timestamps,
|
timestamps,
|
||||||
// that two `2.0` is because `duration_to_start` are shrunk to to
|
// that two `2.0` is because `duration_to_start` are shrunk to
|
||||||
// `duration_to_zero`, and causes `duration_to_zero` less than
|
// `duration_to_zero`, and causes `duration_to_zero` less than
|
||||||
// `extrapolation_threshold`.
|
// `extrapolation_threshold`.
|
||||||
vec![2.0, 1.5, 1.5, 1.5, 2.0, 1.5, 1.5, 1.5],
|
vec![2.0, 1.5, 1.5, 1.5, 2.0, 1.5, 1.5, 1.5],
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ use common_function::function::FunctionRef;
|
|||||||
use common_function::scalars::aggregate::AggregateFunctionMetaRef;
|
use common_function::scalars::aggregate::AggregateFunctionMetaRef;
|
||||||
use common_query::physical_plan::{DfPhysicalPlanAdapter, PhysicalPlan, PhysicalPlanAdapter};
|
use common_query::physical_plan::{DfPhysicalPlanAdapter, PhysicalPlan, PhysicalPlanAdapter};
|
||||||
use common_query::prelude::ScalarUdf;
|
use common_query::prelude::ScalarUdf;
|
||||||
use common_query::Output;
|
use common_query::{Output, OutputData, OutputMeta};
|
||||||
use common_recordbatch::adapter::RecordBatchStreamAdapter;
|
use common_recordbatch::adapter::RecordBatchStreamAdapter;
|
||||||
use common_recordbatch::{EmptyRecordBatchStream, SendableRecordBatchStream};
|
use common_recordbatch::{EmptyRecordBatchStream, SendableRecordBatchStream};
|
||||||
use common_telemetry::tracing;
|
use common_telemetry::tracing;
|
||||||
@@ -90,9 +90,9 @@ impl DatafusionQueryEngine {
|
|||||||
optimized_physical_plan
|
optimized_physical_plan
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(Output::Stream(
|
Ok(Output::new(
|
||||||
self.execute_stream(&ctx, &physical_plan)?,
|
OutputData::Stream(self.execute_stream(&ctx, &physical_plan)?),
|
||||||
Some(physical_plan),
|
OutputMeta::new_with_plan(physical_plan),
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -121,9 +121,9 @@ impl DatafusionQueryEngine {
|
|||||||
let output = self
|
let output = self
|
||||||
.exec_query_plan(LogicalPlan::DfPlan((*dml.input).clone()), query_ctx.clone())
|
.exec_query_plan(LogicalPlan::DfPlan((*dml.input).clone()), query_ctx.clone())
|
||||||
.await?;
|
.await?;
|
||||||
let mut stream = match output {
|
let mut stream = match output.data {
|
||||||
Output::RecordBatches(batches) => batches.as_stream(),
|
OutputData::RecordBatches(batches) => batches.as_stream(),
|
||||||
Output::Stream(stream, _) => stream,
|
OutputData::Stream(stream) => stream,
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -148,7 +148,7 @@ impl DatafusionQueryEngine {
|
|||||||
};
|
};
|
||||||
affected_rows += rows;
|
affected_rows += rows;
|
||||||
}
|
}
|
||||||
Ok(Output::AffectedRows(affected_rows))
|
Ok(Output::new_with_affected_rows(affected_rows))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tracing::instrument(skip_all)]
|
#[tracing::instrument(skip_all)]
|
||||||
@@ -471,7 +471,6 @@ mod tests {
|
|||||||
|
|
||||||
use catalog::RegisterTableRequest;
|
use catalog::RegisterTableRequest;
|
||||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, NUMBERS_TABLE_ID};
|
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, NUMBERS_TABLE_ID};
|
||||||
use common_query::Output;
|
|
||||||
use common_recordbatch::util;
|
use common_recordbatch::util;
|
||||||
use datafusion::prelude::{col, lit};
|
use datafusion::prelude::{col, lit};
|
||||||
use datatypes::prelude::ConcreteDataType;
|
use datatypes::prelude::ConcreteDataType;
|
||||||
@@ -534,8 +533,8 @@ mod tests {
|
|||||||
|
|
||||||
let output = engine.execute(plan, QueryContext::arc()).await.unwrap();
|
let output = engine.execute(plan, QueryContext::arc()).await.unwrap();
|
||||||
|
|
||||||
match output {
|
match output.data {
|
||||||
Output::Stream(recordbatch, _) => {
|
OutputData::Stream(recordbatch) => {
|
||||||
let numbers = util::collect(recordbatch).await.unwrap();
|
let numbers = util::collect(recordbatch).await.unwrap();
|
||||||
assert_eq!(1, numbers.len());
|
assert_eq!(1, numbers.len());
|
||||||
assert_eq!(numbers[0].num_columns(), 1);
|
assert_eq!(numbers[0].num_columns(), 1);
|
||||||
|
|||||||
@@ -15,7 +15,8 @@
|
|||||||
use std::pin::Pin;
|
use std::pin::Pin;
|
||||||
use std::task::{Context, Poll};
|
use std::task::{Context, Poll};
|
||||||
|
|
||||||
use common_recordbatch::{RecordBatch, RecordBatchStream, SendableRecordBatchStream};
|
use common_recordbatch::adapter::RecordBatchMetrics;
|
||||||
|
use common_recordbatch::{OrderOption, RecordBatch, RecordBatchStream, SendableRecordBatchStream};
|
||||||
use datatypes::schema::SchemaRef;
|
use datatypes::schema::SchemaRef;
|
||||||
use futures::Stream;
|
use futures::Stream;
|
||||||
use futures_util::ready;
|
use futures_util::ready;
|
||||||
@@ -78,6 +79,14 @@ impl<F: FnOnce() + Unpin> RecordBatchStream for OnDone<F> {
|
|||||||
fn schema(&self) -> SchemaRef {
|
fn schema(&self) -> SchemaRef {
|
||||||
self.stream.schema()
|
self.stream.schema()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn output_ordering(&self) -> Option<&[OrderOption]> {
|
||||||
|
self.stream.output_ordering()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn metrics(&self) -> Option<RecordBatchMetrics> {
|
||||||
|
self.stream.metrics()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<F: FnOnce() + Unpin> Stream for OnDone<F> {
|
impl<F: FnOnce() + Unpin> Stream for OnDone<F> {
|
||||||
|
|||||||
@@ -12,8 +12,10 @@
|
|||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
use std::collections::hash_map::Entry;
|
use std::any::Any;
|
||||||
use std::collections::HashMap;
|
use std::cmp::Ordering;
|
||||||
|
use std::collections::btree_map::Entry;
|
||||||
|
use std::collections::{BTreeMap, HashMap};
|
||||||
use std::fmt::Display;
|
use std::fmt::Display;
|
||||||
use std::pin::Pin;
|
use std::pin::Pin;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
@@ -21,8 +23,8 @@ use std::task::{Context, Poll};
|
|||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use ahash::RandomState;
|
use ahash::RandomState;
|
||||||
use arrow::compute::{self, cast_with_options, CastOptions};
|
use arrow::compute::{self, cast_with_options, CastOptions, SortColumn};
|
||||||
use arrow_schema::{DataType, Field, Schema, SchemaRef, TimeUnit};
|
use arrow_schema::{DataType, Field, Schema, SchemaRef, SortOptions, TimeUnit};
|
||||||
use common_query::DfPhysicalPlan;
|
use common_query::DfPhysicalPlan;
|
||||||
use common_recordbatch::DfSendableRecordBatchStream;
|
use common_recordbatch::DfSendableRecordBatchStream;
|
||||||
use datafusion::common::{Result as DataFusionResult, Statistics};
|
use datafusion::common::{Result as DataFusionResult, Statistics};
|
||||||
@@ -35,10 +37,14 @@ use datafusion::physical_plan::{
|
|||||||
SendableRecordBatchStream,
|
SendableRecordBatchStream,
|
||||||
};
|
};
|
||||||
use datafusion::physical_planner::create_physical_sort_expr;
|
use datafusion::physical_planner::create_physical_sort_expr;
|
||||||
use datafusion_common::utils::get_arrayref_at_indices;
|
use datafusion_common::utils::{get_arrayref_at_indices, get_row_at_idx};
|
||||||
use datafusion_common::{DFField, DFSchema, DFSchemaRef, DataFusionError, ScalarValue};
|
use datafusion_common::{DFField, DFSchema, DFSchemaRef, DataFusionError, ScalarValue};
|
||||||
use datafusion_expr::utils::exprlist_to_fields;
|
use datafusion_expr::utils::{exprlist_to_fields, COUNT_STAR_EXPANSION};
|
||||||
use datafusion_expr::{Accumulator, Expr, ExprSchemable, LogicalPlan, UserDefinedLogicalNodeCore};
|
use datafusion_expr::{
|
||||||
|
lit, Accumulator, AggregateFunction, Expr, ExprSchemable, LogicalPlan,
|
||||||
|
UserDefinedLogicalNodeCore,
|
||||||
|
};
|
||||||
|
use datafusion_physical_expr::aggregate::utils::down_cast_any_ref;
|
||||||
use datafusion_physical_expr::expressions::create_aggregate_expr as create_aggr_expr;
|
use datafusion_physical_expr::expressions::create_aggregate_expr as create_aggr_expr;
|
||||||
use datafusion_physical_expr::hash_utils::create_hashes;
|
use datafusion_physical_expr::hash_utils::create_hashes;
|
||||||
use datafusion_physical_expr::{
|
use datafusion_physical_expr::{
|
||||||
@@ -58,6 +64,140 @@ use crate::error::{DataFusionSnafu, RangeQuerySnafu, Result};
|
|||||||
|
|
||||||
type Millisecond = <TimestampMillisecondType as ArrowPrimitiveType>::Native;
|
type Millisecond = <TimestampMillisecondType as ArrowPrimitiveType>::Native;
|
||||||
|
|
||||||
|
/// Implementation of `first_value`/`last_value`
|
||||||
|
/// aggregate function adapted to range query
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct RangeFirstListValue {
|
||||||
|
/// calculate expr
|
||||||
|
expr: Arc<dyn PhysicalExpr>,
|
||||||
|
order_bys: Vec<PhysicalSortExpr>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RangeFirstListValue {
|
||||||
|
pub fn new_aggregate_expr(
|
||||||
|
expr: Arc<dyn PhysicalExpr>,
|
||||||
|
order_bys: Vec<PhysicalSortExpr>,
|
||||||
|
) -> Arc<dyn AggregateExpr> {
|
||||||
|
Arc::new(Self { expr, order_bys })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialEq<dyn Any> for RangeFirstListValue {
|
||||||
|
fn eq(&self, other: &dyn Any) -> bool {
|
||||||
|
down_cast_any_ref(other)
|
||||||
|
.downcast_ref::<Self>()
|
||||||
|
.map(|x| self.expr.eq(&x.expr) && self.order_bys.iter().eq(x.order_bys.iter()))
|
||||||
|
.unwrap_or(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AggregateExpr for RangeFirstListValue {
|
||||||
|
fn as_any(&self) -> &dyn std::any::Any {
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
fn create_accumulator(&self) -> DataFusionResult<Box<dyn Accumulator>> {
|
||||||
|
Ok(Box::new(RangeFirstListValueAcc::new(
|
||||||
|
self.order_bys.iter().map(|order| order.options).collect(),
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
|
||||||
|
let mut exprs: Vec<_> = self
|
||||||
|
.order_bys
|
||||||
|
.iter()
|
||||||
|
.map(|order| order.expr.clone())
|
||||||
|
.collect();
|
||||||
|
exprs.push(self.expr.clone());
|
||||||
|
exprs
|
||||||
|
}
|
||||||
|
|
||||||
|
fn field(&self) -> DataFusionResult<Field> {
|
||||||
|
unreachable!("AggregateExpr::field will not be used in range query")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn state_fields(&self) -> DataFusionResult<Vec<Field>> {
|
||||||
|
unreachable!("AggregateExpr::state_fields will not be used in range query")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct RangeFirstListValueAcc {
|
||||||
|
pub sort_options: Vec<SortOptions>,
|
||||||
|
pub sort_columns: Vec<ScalarValue>,
|
||||||
|
pub data: Option<ScalarValue>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RangeFirstListValueAcc {
|
||||||
|
pub fn new(sort_options: Vec<SortOptions>) -> Self {
|
||||||
|
Self {
|
||||||
|
sort_options,
|
||||||
|
sort_columns: vec![],
|
||||||
|
data: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Accumulator for RangeFirstListValueAcc {
|
||||||
|
fn update_batch(&mut self, values: &[ArrayRef]) -> DataFusionResult<()> {
|
||||||
|
let columns: Vec<_> = values
|
||||||
|
.iter()
|
||||||
|
.zip(self.sort_options.iter())
|
||||||
|
.map(|(v, s)| SortColumn {
|
||||||
|
values: v.clone(),
|
||||||
|
options: Some(*s),
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
// finding the Top1 problem with complexity O(n)
|
||||||
|
let idx = compute::lexsort_to_indices(&columns, Some(1))?.value(0);
|
||||||
|
let vs = get_row_at_idx(values, idx as usize)?;
|
||||||
|
let need_update = self.data.is_none()
|
||||||
|
|| vs
|
||||||
|
.iter()
|
||||||
|
.zip(self.sort_columns.iter())
|
||||||
|
.zip(self.sort_options.iter())
|
||||||
|
.find_map(|((new_value, old_value), sort_option)| {
|
||||||
|
if new_value.is_null() && old_value.is_null() {
|
||||||
|
None
|
||||||
|
} else if sort_option.nulls_first
|
||||||
|
&& (new_value.is_null() || old_value.is_null())
|
||||||
|
{
|
||||||
|
Some(new_value.is_null())
|
||||||
|
} else {
|
||||||
|
new_value.partial_cmp(old_value).map(|x| {
|
||||||
|
(x == Ordering::Greater && sort_option.descending)
|
||||||
|
|| (x == Ordering::Less && !sort_option.descending)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.unwrap_or(false);
|
||||||
|
if need_update {
|
||||||
|
self.sort_columns = vs;
|
||||||
|
self.data = Some(ScalarValue::try_from_array(
|
||||||
|
&values[self.sort_options.len()],
|
||||||
|
idx as usize,
|
||||||
|
)?);
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn evaluate(&self) -> DataFusionResult<ScalarValue> {
|
||||||
|
Ok(self.data.clone().unwrap_or(ScalarValue::Null))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn size(&self) -> usize {
|
||||||
|
std::mem::size_of_val(self)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn state(&self) -> DataFusionResult<Vec<ScalarValue>> {
|
||||||
|
unreachable!("Accumulator::state will not be used in range query")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn merge_batch(&mut self, _states: &[ArrayRef]) -> DataFusionResult<()> {
|
||||||
|
unreachable!("Accumulator::merge_batch will not be used in range query")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(PartialEq, Eq, Debug, Hash, Clone)]
|
#[derive(PartialEq, Eq, Debug, Hash, Clone)]
|
||||||
pub enum Fill {
|
pub enum Fill {
|
||||||
Null,
|
Null,
|
||||||
@@ -78,14 +218,15 @@ impl Display for Fill {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Fill {
|
impl Fill {
|
||||||
pub fn try_from_str(value: &str, datatype: &DataType) -> DfResult<Self> {
|
pub fn try_from_str(value: &str, datatype: &DataType) -> DfResult<Option<Self>> {
|
||||||
let s = value.to_uppercase();
|
let s = value.to_uppercase();
|
||||||
match s.as_str() {
|
match s.as_str() {
|
||||||
"NULL" | "" => Ok(Self::Null),
|
"" => Ok(None),
|
||||||
"PREV" => Ok(Self::Prev),
|
"NULL" => Ok(Some(Self::Null)),
|
||||||
|
"PREV" => Ok(Some(Self::Prev)),
|
||||||
"LINEAR" => {
|
"LINEAR" => {
|
||||||
if datatype.is_numeric() {
|
if datatype.is_numeric() {
|
||||||
Ok(Self::Linear)
|
Ok(Some(Self::Linear))
|
||||||
} else {
|
} else {
|
||||||
Err(DataFusionError::Plan(format!(
|
Err(DataFusionError::Plan(format!(
|
||||||
"Use FILL LINEAR on Non-numeric DataType {}",
|
"Use FILL LINEAR on Non-numeric DataType {}",
|
||||||
@@ -100,13 +241,17 @@ impl Fill {
|
|||||||
s, err
|
s, err
|
||||||
))
|
))
|
||||||
})
|
})
|
||||||
.map(Fill::Const),
|
.map(|x| Some(Fill::Const(x))),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The input `data` contains data on a complete time series.
|
/// The input `data` contains data on a complete time series.
|
||||||
/// If the filling strategy is `PREV` or `LINEAR`, caller must be ensured that the incoming `ts`&`data` is ascending time order.
|
/// If the filling strategy is `PREV` or `LINEAR`, caller must be ensured that the incoming `ts`&`data` is ascending time order.
|
||||||
pub fn apply_fill_strategy(&self, ts: &[i64], data: &mut [ScalarValue]) -> DfResult<()> {
|
pub fn apply_fill_strategy(&self, ts: &[i64], data: &mut [ScalarValue]) -> DfResult<()> {
|
||||||
|
// No calculation need in `Fill::Null`
|
||||||
|
if matches!(self, Fill::Null) {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
let len = data.len();
|
let len = data.len();
|
||||||
if *self == Fill::Linear {
|
if *self == Fill::Linear {
|
||||||
return Self::fill_linear(ts, data);
|
return Self::fill_linear(ts, data);
|
||||||
@@ -114,7 +259,6 @@ impl Fill {
|
|||||||
for i in 0..len {
|
for i in 0..len {
|
||||||
if data[i].is_null() {
|
if data[i].is_null() {
|
||||||
match self {
|
match self {
|
||||||
Fill::Null => continue,
|
|
||||||
Fill::Prev => {
|
Fill::Prev => {
|
||||||
if i != 0 {
|
if i != 0 {
|
||||||
data[i] = data[i - 1].clone()
|
data[i] = data[i - 1].clone()
|
||||||
@@ -122,7 +266,8 @@ impl Fill {
|
|||||||
}
|
}
|
||||||
// The calculation of linear interpolation is relatively complicated.
|
// The calculation of linear interpolation is relatively complicated.
|
||||||
// `Self::fill_linear` is used to dispose `Fill::Linear`.
|
// `Self::fill_linear` is used to dispose `Fill::Linear`.
|
||||||
Fill::Linear => unreachable!(),
|
// No calculation need in `Fill::Null`
|
||||||
|
Fill::Linear | Fill::Null => unreachable!(),
|
||||||
Fill::Const(v) => data[i] = v.clone(),
|
Fill::Const(v) => data[i] = v.clone(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -219,12 +364,12 @@ fn linear_interpolation(
|
|||||||
|
|
||||||
#[derive(Eq, Clone, Debug)]
|
#[derive(Eq, Clone, Debug)]
|
||||||
pub struct RangeFn {
|
pub struct RangeFn {
|
||||||
/// with format like `max(a) RANGE 300s FILL NULL`
|
/// with format like `max(a) RANGE 300s [FILL NULL]`
|
||||||
pub name: String,
|
pub name: String,
|
||||||
pub data_type: DataType,
|
pub data_type: DataType,
|
||||||
pub expr: Expr,
|
pub expr: Expr,
|
||||||
pub range: Duration,
|
pub range: Duration,
|
||||||
pub fill: Fill,
|
pub fill: Option<Fill>,
|
||||||
/// If the `FIll` strategy is `Linear` and the output is an integer,
|
/// If the `FIll` strategy is `Linear` and the output is an integer,
|
||||||
/// it is possible to calculate a floating point number.
|
/// it is possible to calculate a floating point number.
|
||||||
/// So for `FILL==LINEAR`, the entire data will be implicitly converted to Float type
|
/// So for `FILL==LINEAR`, the entire data will be implicitly converted to Float type
|
||||||
@@ -271,6 +416,7 @@ pub struct RangeSelect {
|
|||||||
pub align: Duration,
|
pub align: Duration,
|
||||||
pub align_to: i64,
|
pub align_to: i64,
|
||||||
pub time_index: String,
|
pub time_index: String,
|
||||||
|
pub time_expr: Expr,
|
||||||
pub by: Vec<Expr>,
|
pub by: Vec<Expr>,
|
||||||
pub schema: DFSchemaRef,
|
pub schema: DFSchemaRef,
|
||||||
pub by_schema: DFSchemaRef,
|
pub by_schema: DFSchemaRef,
|
||||||
@@ -324,7 +470,7 @@ impl RangeSelect {
|
|||||||
name,
|
name,
|
||||||
data_type.clone(),
|
data_type.clone(),
|
||||||
// Only when data fill with Const option, the data can't be null
|
// Only when data fill with Const option, the data can't be null
|
||||||
!matches!(fill, Fill::Const(..)),
|
!matches!(fill, Some(Fill::Const(..))),
|
||||||
))
|
))
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
@@ -382,6 +528,7 @@ impl RangeSelect {
|
|||||||
align,
|
align,
|
||||||
align_to,
|
align_to,
|
||||||
time_index: time_index_name,
|
time_index: time_index_name,
|
||||||
|
time_expr: time_index,
|
||||||
schema,
|
schema,
|
||||||
by_schema,
|
by_schema,
|
||||||
by,
|
by,
|
||||||
@@ -440,6 +587,7 @@ impl UserDefinedLogicalNodeCore for RangeSelect {
|
|||||||
range_expr: self.range_expr.clone(),
|
range_expr: self.range_expr.clone(),
|
||||||
input: Arc::new(inputs[0].clone()),
|
input: Arc::new(inputs[0].clone()),
|
||||||
time_index: self.time_index.clone(),
|
time_index: self.time_index.clone(),
|
||||||
|
time_expr: self.time_expr.clone(),
|
||||||
schema: self.schema.clone(),
|
schema: self.schema.clone(),
|
||||||
by: self.by.clone(),
|
by: self.by.clone(),
|
||||||
by_schema: self.by_schema.clone(),
|
by_schema: self.by_schema.clone(),
|
||||||
@@ -452,6 +600,7 @@ impl UserDefinedLogicalNodeCore for RangeSelect {
|
|||||||
impl RangeSelect {
|
impl RangeSelect {
|
||||||
fn create_physical_expr_list(
|
fn create_physical_expr_list(
|
||||||
&self,
|
&self,
|
||||||
|
is_count_aggr: bool,
|
||||||
exprs: &[Expr],
|
exprs: &[Expr],
|
||||||
df_schema: &Arc<DFSchema>,
|
df_schema: &Arc<DFSchema>,
|
||||||
schema: &Schema,
|
schema: &Schema,
|
||||||
@@ -459,7 +608,20 @@ impl RangeSelect {
|
|||||||
) -> DfResult<Vec<Arc<dyn PhysicalExpr>>> {
|
) -> DfResult<Vec<Arc<dyn PhysicalExpr>>> {
|
||||||
exprs
|
exprs
|
||||||
.iter()
|
.iter()
|
||||||
.map(|by| create_physical_expr(by, df_schema, schema, session_state.execution_props()))
|
.map(|e| match e {
|
||||||
|
// `count(*)` will be rewritten by `CountWildcardRule` into `count(1)` when optimizing logical plan.
|
||||||
|
// The modification occurs after range plan rewrite.
|
||||||
|
// At this time, aggregate plan has been replaced by a custom range plan,
|
||||||
|
// so `CountWildcardRule` has not been applied.
|
||||||
|
// We manually modify it when creating the physical plan.
|
||||||
|
Expr::Wildcard if is_count_aggr => create_physical_expr(
|
||||||
|
&lit(COUNT_STAR_EXPANSION),
|
||||||
|
df_schema,
|
||||||
|
schema,
|
||||||
|
session_state.execution_props(),
|
||||||
|
),
|
||||||
|
_ => create_physical_expr(e, df_schema, schema, session_state.execution_props()),
|
||||||
|
})
|
||||||
.collect::<DfResult<Vec<_>>>()
|
.collect::<DfResult<Vec<_>>>()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -488,6 +650,72 @@ impl RangeSelect {
|
|||||||
.iter()
|
.iter()
|
||||||
.map(|range_fn| {
|
.map(|range_fn| {
|
||||||
let expr = match &range_fn.expr {
|
let expr = match &range_fn.expr {
|
||||||
|
Expr::AggregateFunction(aggr)
|
||||||
|
if aggr.fun == AggregateFunction::FirstValue
|
||||||
|
|| aggr.fun == AggregateFunction::LastValue =>
|
||||||
|
{
|
||||||
|
// Because we only need to find the first_value/last_value,
|
||||||
|
// the complexity of sorting the entire batch is O(nlogn).
|
||||||
|
// We can sort the batch with limit 1.
|
||||||
|
// In this case, the algorithm degenerates into finding the Top1 problem with complexity O(n).
|
||||||
|
// We need reverse the sort order of last_value to correctly apply limit 1 when sorting.
|
||||||
|
let order_by = if let Some(exprs) = &aggr.order_by {
|
||||||
|
exprs
|
||||||
|
.iter()
|
||||||
|
.map(|x| {
|
||||||
|
create_physical_sort_expr(
|
||||||
|
x,
|
||||||
|
input_dfschema,
|
||||||
|
&input_schema,
|
||||||
|
session_state.execution_props(),
|
||||||
|
)
|
||||||
|
.map(|expr| {
|
||||||
|
// reverse the last_value sort
|
||||||
|
if aggr.fun == AggregateFunction::LastValue {
|
||||||
|
PhysicalSortExpr {
|
||||||
|
expr: expr.expr,
|
||||||
|
options: SortOptions {
|
||||||
|
descending: !expr.options.descending,
|
||||||
|
nulls_first: !expr.options.nulls_first,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
expr
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect::<DfResult<Vec<_>>>()?
|
||||||
|
} else {
|
||||||
|
// if user not assign order by, time index is needed as default ordering
|
||||||
|
let time_index = create_physical_expr(
|
||||||
|
&self.time_expr,
|
||||||
|
input_dfschema,
|
||||||
|
&input_schema,
|
||||||
|
session_state.execution_props(),
|
||||||
|
)?;
|
||||||
|
vec![PhysicalSortExpr {
|
||||||
|
expr: time_index,
|
||||||
|
options: SortOptions {
|
||||||
|
descending: aggr.fun == AggregateFunction::LastValue,
|
||||||
|
nulls_first: false,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
};
|
||||||
|
let arg = self.create_physical_expr_list(
|
||||||
|
false,
|
||||||
|
&aggr.args,
|
||||||
|
input_dfschema,
|
||||||
|
&input_schema,
|
||||||
|
session_state,
|
||||||
|
)?;
|
||||||
|
// first_value/last_value has only one param.
|
||||||
|
// The param have been checked by datafusion in logical plan stage.
|
||||||
|
// We can safely assume that there is only one element here.
|
||||||
|
Ok(RangeFirstListValue::new_aggregate_expr(
|
||||||
|
arg[0].clone(),
|
||||||
|
order_by,
|
||||||
|
))
|
||||||
|
}
|
||||||
Expr::AggregateFunction(aggr) => {
|
Expr::AggregateFunction(aggr) => {
|
||||||
let order_by = if let Some(exprs) = &aggr.order_by {
|
let order_by = if let Some(exprs) = &aggr.order_by {
|
||||||
exprs
|
exprs
|
||||||
@@ -508,6 +736,7 @@ impl RangeSelect {
|
|||||||
&aggr.fun,
|
&aggr.fun,
|
||||||
false,
|
false,
|
||||||
&self.create_physical_expr_list(
|
&self.create_physical_expr_list(
|
||||||
|
aggr.fun == AggregateFunction::Count,
|
||||||
&aggr.args,
|
&aggr.args,
|
||||||
input_dfschema,
|
input_dfschema,
|
||||||
&input_schema,
|
&input_schema,
|
||||||
@@ -523,6 +752,7 @@ impl RangeSelect {
|
|||||||
let expr = create_aggr_udf_expr(
|
let expr = create_aggr_udf_expr(
|
||||||
&aggr_udf.fun,
|
&aggr_udf.fun,
|
||||||
&self.create_physical_expr_list(
|
&self.create_physical_expr_list(
|
||||||
|
false,
|
||||||
&aggr_udf.args,
|
&aggr_udf.args,
|
||||||
input_dfschema,
|
input_dfschema,
|
||||||
&input_schema,
|
&input_schema,
|
||||||
@@ -564,6 +794,7 @@ impl RangeSelect {
|
|||||||
align: self.align.as_millis() as Millisecond,
|
align: self.align.as_millis() as Millisecond,
|
||||||
align_to: self.align_to,
|
align_to: self.align_to,
|
||||||
by: self.create_physical_expr_list(
|
by: self.create_physical_expr_list(
|
||||||
|
false,
|
||||||
&self.by,
|
&self.by,
|
||||||
input_dfschema,
|
input_dfschema,
|
||||||
&input_schema,
|
&input_schema,
|
||||||
@@ -584,10 +815,26 @@ struct RangeFnExec {
|
|||||||
pub expr: Arc<dyn AggregateExpr>,
|
pub expr: Arc<dyn AggregateExpr>,
|
||||||
pub args: Vec<Arc<dyn PhysicalExpr>>,
|
pub args: Vec<Arc<dyn PhysicalExpr>>,
|
||||||
pub range: Millisecond,
|
pub range: Millisecond,
|
||||||
pub fill: Fill,
|
pub fill: Option<Fill>,
|
||||||
pub need_cast: Option<DataType>,
|
pub need_cast: Option<DataType>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Display for RangeFnExec {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
if let Some(fill) = &self.fill {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"{} RANGE {}s FILL {}",
|
||||||
|
self.expr.name(),
|
||||||
|
self.range / 1000,
|
||||||
|
fill
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
write!(f, "{} RANGE {}s", self.expr.name(), self.range / 1000)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct RangeSelectExec {
|
pub struct RangeSelectExec {
|
||||||
input: Arc<dyn ExecutionPlan>,
|
input: Arc<dyn ExecutionPlan>,
|
||||||
@@ -608,18 +855,8 @@ impl DisplayAs for RangeSelectExec {
|
|||||||
match t {
|
match t {
|
||||||
DisplayFormatType::Default | DisplayFormatType::Verbose => {
|
DisplayFormatType::Default | DisplayFormatType::Verbose => {
|
||||||
write!(f, "RangeSelectExec: ")?;
|
write!(f, "RangeSelectExec: ")?;
|
||||||
let range_expr_strs: Vec<String> = self
|
let range_expr_strs: Vec<String> =
|
||||||
.range_exec
|
self.range_exec.iter().map(RangeFnExec::to_string).collect();
|
||||||
.iter()
|
|
||||||
.map(|e| {
|
|
||||||
format!(
|
|
||||||
"{} RANGE {}s FILL {}",
|
|
||||||
e.expr.name(),
|
|
||||||
e.range / 1000,
|
|
||||||
e.fill
|
|
||||||
)
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
let by: Vec<String> = self.by.iter().map(|e| e.to_string()).collect();
|
let by: Vec<String> = self.by.iter().map(|e| e.to_string()).collect();
|
||||||
write!(
|
write!(
|
||||||
f,
|
f,
|
||||||
@@ -713,7 +950,7 @@ impl ExecutionPlan for RangeSelectExec {
|
|||||||
by: self.by.clone(),
|
by: self.by.clone(),
|
||||||
series_map: HashMap::new(),
|
series_map: HashMap::new(),
|
||||||
exec_state: ExecutionState::ReadingInput,
|
exec_state: ExecutionState::ReadingInput,
|
||||||
output_num_rows: 0,
|
num_not_null_rows: 0,
|
||||||
row_converter,
|
row_converter,
|
||||||
modify_map: HashMap::new(),
|
modify_map: HashMap::new(),
|
||||||
metric: baseline_metric,
|
metric: baseline_metric,
|
||||||
@@ -753,8 +990,8 @@ struct RangeSelectStream {
|
|||||||
/// value: `[row_ids]`
|
/// value: `[row_ids]`
|
||||||
/// It is used to record the data that needs to be aggregated in each time slot during the data update process
|
/// It is used to record the data that needs to be aggregated in each time slot during the data update process
|
||||||
modify_map: HashMap<(u64, Millisecond), Vec<u32>>,
|
modify_map: HashMap<(u64, Millisecond), Vec<u32>>,
|
||||||
/// The number of rows of the final output
|
/// The number of rows of not null rows in the final output
|
||||||
output_num_rows: usize,
|
num_not_null_rows: usize,
|
||||||
metric: BaselineMetrics,
|
metric: BaselineMetrics,
|
||||||
schema_project: Option<Vec<usize>>,
|
schema_project: Option<Vec<usize>>,
|
||||||
schema_before_project: SchemaRef,
|
schema_before_project: SchemaRef,
|
||||||
@@ -766,7 +1003,7 @@ struct SeriesState {
|
|||||||
row: OwnedRow,
|
row: OwnedRow,
|
||||||
/// key: align_ts
|
/// key: align_ts
|
||||||
/// value: a vector, each element is a range_fn follow the order of `range_exec`
|
/// value: a vector, each element is a range_fn follow the order of `range_exec`
|
||||||
align_ts_accumulator: HashMap<Millisecond, Vec<Box<dyn Accumulator>>>,
|
align_ts_accumulator: BTreeMap<Millisecond, Vec<Box<dyn Accumulator>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Use `align_to` as time origin.
|
/// Use `align_to` as time origin.
|
||||||
@@ -882,7 +1119,7 @@ impl RangeSelectStream {
|
|||||||
let accumulators_map =
|
let accumulators_map =
|
||||||
self.series_map.entry(*hash).or_insert_with(|| SeriesState {
|
self.series_map.entry(*hash).or_insert_with(|| SeriesState {
|
||||||
row: by_rows.row(*row as usize).owned(),
|
row: by_rows.row(*row as usize).owned(),
|
||||||
align_ts_accumulator: HashMap::new(),
|
align_ts_accumulator: BTreeMap::new(),
|
||||||
});
|
});
|
||||||
match accumulators_map.align_ts_accumulator.entry(*ts) {
|
match accumulators_map.align_ts_accumulator.entry(*ts) {
|
||||||
Entry::Occupied(mut e) => {
|
Entry::Occupied(mut e) => {
|
||||||
@@ -890,7 +1127,7 @@ impl RangeSelectStream {
|
|||||||
accumulators[i].update_batch(&sliced_arrays)
|
accumulators[i].update_batch(&sliced_arrays)
|
||||||
}
|
}
|
||||||
Entry::Vacant(e) => {
|
Entry::Vacant(e) => {
|
||||||
self.output_num_rows += 1;
|
self.num_not_null_rows += 1;
|
||||||
let mut accumulators = self
|
let mut accumulators = self
|
||||||
.range_exec
|
.range_exec
|
||||||
.iter()
|
.iter()
|
||||||
@@ -915,29 +1152,47 @@ impl RangeSelectStream {
|
|||||||
// 1 for time index column
|
// 1 for time index column
|
||||||
let mut columns: Vec<Arc<dyn Array>> =
|
let mut columns: Vec<Arc<dyn Array>> =
|
||||||
Vec::with_capacity(1 + self.range_exec.len() + self.by.len());
|
Vec::with_capacity(1 + self.range_exec.len() + self.by.len());
|
||||||
let mut ts_builder = TimestampMillisecondBuilder::with_capacity(self.output_num_rows);
|
let mut ts_builder = TimestampMillisecondBuilder::with_capacity(self.num_not_null_rows);
|
||||||
let mut all_scalar = vec![Vec::with_capacity(self.output_num_rows); self.range_exec.len()];
|
let mut all_scalar =
|
||||||
let mut by_rows = Vec::with_capacity(self.output_num_rows);
|
vec![Vec::with_capacity(self.num_not_null_rows); self.range_exec.len()];
|
||||||
|
let mut by_rows = Vec::with_capacity(self.num_not_null_rows);
|
||||||
let mut start_index = 0;
|
let mut start_index = 0;
|
||||||
// RangePlan is calculated on a row basis. If a column uses the PREV or LINEAR filling strategy,
|
// If any range expr need fill, we need fill both the missing align_ts and null value.
|
||||||
// we must arrange the data in the entire data row to determine the NULL filling value.
|
let need_fill_output = self.range_exec.iter().any(|range| range.fill.is_some());
|
||||||
let need_sort_output = self
|
// The padding value for each accumulator
|
||||||
|
let padding_values = self
|
||||||
.range_exec
|
.range_exec
|
||||||
.iter()
|
.iter()
|
||||||
.any(|range| range.fill == Fill::Linear || range.fill == Fill::Prev);
|
.map(|e| e.expr.create_accumulator()?.evaluate())
|
||||||
|
.collect::<DfResult<Vec<_>>>()?;
|
||||||
for SeriesState {
|
for SeriesState {
|
||||||
row,
|
row,
|
||||||
align_ts_accumulator,
|
align_ts_accumulator,
|
||||||
} in self.series_map.values()
|
} in self.series_map.values()
|
||||||
{
|
{
|
||||||
// collect data on time series
|
// skip empty time series
|
||||||
let mut align_ts = align_ts_accumulator.keys().copied().collect::<Vec<_>>();
|
if align_ts_accumulator.is_empty() {
|
||||||
if need_sort_output {
|
continue;
|
||||||
align_ts.sort();
|
|
||||||
}
|
}
|
||||||
|
// find the first and last align_ts
|
||||||
|
let begin_ts = *align_ts_accumulator.first_key_value().unwrap().0;
|
||||||
|
let end_ts = *align_ts_accumulator.last_key_value().unwrap().0;
|
||||||
|
let align_ts = if need_fill_output {
|
||||||
|
// we need to fill empty align_ts which not data in that solt
|
||||||
|
(begin_ts..=end_ts).step_by(self.align as usize).collect()
|
||||||
|
} else {
|
||||||
|
align_ts_accumulator.keys().copied().collect::<Vec<_>>()
|
||||||
|
};
|
||||||
for ts in &align_ts {
|
for ts in &align_ts {
|
||||||
for (i, accumulator) in align_ts_accumulator.get(ts).unwrap().iter().enumerate() {
|
if let Some(slot) = align_ts_accumulator.get(ts) {
|
||||||
all_scalar[i].push(accumulator.evaluate()?);
|
for (column, acc) in all_scalar.iter_mut().zip(slot.iter()) {
|
||||||
|
column.push(acc.evaluate()?);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// fill null in empty time solt
|
||||||
|
for (column, padding) in all_scalar.iter_mut().zip(padding_values.iter()) {
|
||||||
|
column.push(padding.clone())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ts_builder.append_slice(&align_ts);
|
ts_builder.append_slice(&align_ts);
|
||||||
@@ -950,14 +1205,16 @@ impl RangeSelectStream {
|
|||||||
) in self.range_exec.iter().enumerate()
|
) in self.range_exec.iter().enumerate()
|
||||||
{
|
{
|
||||||
let time_series_data =
|
let time_series_data =
|
||||||
&mut all_scalar[i][start_index..start_index + align_ts_accumulator.len()];
|
&mut all_scalar[i][start_index..start_index + align_ts.len()];
|
||||||
if let Some(data_type) = need_cast {
|
if let Some(data_type) = need_cast {
|
||||||
cast_scalar_values(time_series_data, data_type)?;
|
cast_scalar_values(time_series_data, data_type)?;
|
||||||
}
|
}
|
||||||
fill.apply_fill_strategy(&align_ts, time_series_data)?;
|
if let Some(fill) = fill {
|
||||||
|
fill.apply_fill_strategy(&align_ts, time_series_data)?;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
by_rows.resize(by_rows.len() + align_ts_accumulator.len(), row.row());
|
by_rows.resize(by_rows.len() + align_ts.len(), row.row());
|
||||||
start_index += align_ts_accumulator.len();
|
start_index += align_ts.len();
|
||||||
}
|
}
|
||||||
for column_scalar in all_scalar {
|
for column_scalar in all_scalar {
|
||||||
columns.push(ScalarValue::iter_to_array(column_scalar)?);
|
columns.push(ScalarValue::iter_to_array(column_scalar)?);
|
||||||
@@ -1078,7 +1335,7 @@ mod test {
|
|||||||
|
|
||||||
const TIME_INDEX_COLUMN: &str = "timestamp";
|
const TIME_INDEX_COLUMN: &str = "timestamp";
|
||||||
|
|
||||||
fn prepare_test_data(is_float: bool) -> MemoryExec {
|
fn prepare_test_data(is_float: bool, is_gap: bool) -> MemoryExec {
|
||||||
let schema = Arc::new(Schema::new(vec![
|
let schema = Arc::new(Schema::new(vec![
|
||||||
Field::new(TIME_INDEX_COLUMN, TimestampMillisecondType::DATA_TYPE, true),
|
Field::new(TIME_INDEX_COLUMN, TimestampMillisecondType::DATA_TYPE, true),
|
||||||
Field::new(
|
Field::new(
|
||||||
@@ -1092,16 +1349,23 @@ mod test {
|
|||||||
),
|
),
|
||||||
Field::new("host", DataType::Utf8, true),
|
Field::new("host", DataType::Utf8, true),
|
||||||
]));
|
]));
|
||||||
let timestamp_column: Arc<dyn Array> = Arc::new(TimestampMillisecondArray::from(vec![
|
let timestamp_column: Arc<dyn Array> = if !is_gap {
|
||||||
0, 5_000, 10_000, 15_000, 20_000, // host 1 every 5s
|
Arc::new(TimestampMillisecondArray::from(vec![
|
||||||
0, 5_000, 10_000, 15_000, 20_000, // host 2 every 5s
|
0, 5_000, 10_000, 15_000, 20_000, // host 1 every 5s
|
||||||
])) as _;
|
0, 5_000, 10_000, 15_000, 20_000, // host 2 every 5s
|
||||||
let mut host = vec!["host1"; 5];
|
])) as _
|
||||||
host.extend(vec!["host2"; 5]);
|
} else {
|
||||||
let value_column: Arc<dyn Array> = if is_float {
|
Arc::new(TimestampMillisecondArray::from(vec![
|
||||||
Arc::new(nullable_array!(Float64;
|
0, 15_000, // host 1 every 5s, missing data on 5_000, 10_000
|
||||||
0.0, null, 1.0, null, 2.0, // data for host 1
|
0, 15_000, // host 2 every 5s, missing data on 5_000, 10_000
|
||||||
3.0, null, 4.0, null, 5.0 // data for host 2
|
])) as _
|
||||||
|
};
|
||||||
|
let mut host = vec!["host1"; timestamp_column.len() / 2];
|
||||||
|
host.extend(vec!["host2"; timestamp_column.len() / 2]);
|
||||||
|
let mut value_column: Arc<dyn Array> = if is_gap {
|
||||||
|
Arc::new(nullable_array!(Int64;
|
||||||
|
0, 6, // data for host 1
|
||||||
|
6, 12 // data for host 2
|
||||||
)) as _
|
)) as _
|
||||||
} else {
|
} else {
|
||||||
Arc::new(nullable_array!(Int64;
|
Arc::new(nullable_array!(Int64;
|
||||||
@@ -1109,6 +1373,11 @@ mod test {
|
|||||||
3, null, 4, null, 5 // data for host 2
|
3, null, 4, null, 5 // data for host 2
|
||||||
)) as _
|
)) as _
|
||||||
};
|
};
|
||||||
|
if is_float {
|
||||||
|
value_column =
|
||||||
|
cast_with_options(&value_column, &DataType::Float64, &CastOptions::default())
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
let host_column: Arc<dyn Array> = Arc::new(StringArray::from(host)) as _;
|
let host_column: Arc<dyn Array> = Arc::new(StringArray::from(host)) as _;
|
||||||
let data = RecordBatch::try_new(
|
let data = RecordBatch::try_new(
|
||||||
schema.clone(),
|
schema.clone(),
|
||||||
@@ -1123,8 +1392,9 @@ mod test {
|
|||||||
range1: Millisecond,
|
range1: Millisecond,
|
||||||
range2: Millisecond,
|
range2: Millisecond,
|
||||||
align: Millisecond,
|
align: Millisecond,
|
||||||
fill: Fill,
|
fill: Option<Fill>,
|
||||||
is_float: bool,
|
is_float: bool,
|
||||||
|
is_gap: bool,
|
||||||
expected: String,
|
expected: String,
|
||||||
) {
|
) {
|
||||||
let data_type = if is_float {
|
let data_type = if is_float {
|
||||||
@@ -1132,13 +1402,13 @@ mod test {
|
|||||||
} else {
|
} else {
|
||||||
DataType::Int64
|
DataType::Int64
|
||||||
};
|
};
|
||||||
let (need_cast, schema_data_type) = if !is_float && fill == Fill::Linear {
|
let (need_cast, schema_data_type) = if !is_float && matches!(fill, Some(Fill::Linear)) {
|
||||||
// data_type = DataType::Float64;
|
// data_type = DataType::Float64;
|
||||||
(Some(DataType::Float64), DataType::Float64)
|
(Some(DataType::Float64), DataType::Float64)
|
||||||
} else {
|
} else {
|
||||||
(None, data_type.clone())
|
(None, data_type.clone())
|
||||||
};
|
};
|
||||||
let memory_exec = Arc::new(prepare_test_data(is_float));
|
let memory_exec = Arc::new(prepare_test_data(is_float, is_gap));
|
||||||
let schema = Arc::new(Schema::new(vec![
|
let schema = Arc::new(Schema::new(vec![
|
||||||
Field::new("MIN(value)", schema_data_type.clone(), true),
|
Field::new("MIN(value)", schema_data_type.clone(), true),
|
||||||
Field::new("MAX(value)", schema_data_type, true),
|
Field::new("MAX(value)", schema_data_type, true),
|
||||||
@@ -1223,7 +1493,16 @@ mod test {
|
|||||||
\n| 3.0 | 3.0 | 1970-01-01T00:00:00 | host2 |\
|
\n| 3.0 | 3.0 | 1970-01-01T00:00:00 | host2 |\
|
||||||
\n+------------+------------+---------------------+-------+",
|
\n+------------+------------+---------------------+-------+",
|
||||||
);
|
);
|
||||||
do_range_select_test(10_000, 10_000, 1_000_000, Fill::Null, true, expected).await;
|
do_range_select_test(
|
||||||
|
10_000,
|
||||||
|
10_000,
|
||||||
|
1_000_000,
|
||||||
|
Some(Fill::Null),
|
||||||
|
true,
|
||||||
|
false,
|
||||||
|
expected,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
@@ -1246,7 +1525,16 @@ mod test {
|
|||||||
\n| 5.0 | 5.0 | 1970-01-01T00:00:20 | host2 |\
|
\n| 5.0 | 5.0 | 1970-01-01T00:00:20 | host2 |\
|
||||||
\n+------------+------------+---------------------+-------+",
|
\n+------------+------------+---------------------+-------+",
|
||||||
);
|
);
|
||||||
do_range_select_test(10_000, 5_000, 5_000, Fill::Null, true, expected).await;
|
do_range_select_test(
|
||||||
|
10_000,
|
||||||
|
5_000,
|
||||||
|
5_000,
|
||||||
|
Some(Fill::Null),
|
||||||
|
true,
|
||||||
|
false,
|
||||||
|
expected,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
@@ -1269,7 +1557,16 @@ mod test {
|
|||||||
\n| 5.0 | 5.0 | 1970-01-01T00:00:20 | host2 |\
|
\n| 5.0 | 5.0 | 1970-01-01T00:00:20 | host2 |\
|
||||||
\n+------------+------------+---------------------+-------+",
|
\n+------------+------------+---------------------+-------+",
|
||||||
);
|
);
|
||||||
do_range_select_test(10_000, 5_000, 5_000, Fill::Prev, true, expected).await;
|
do_range_select_test(
|
||||||
|
10_000,
|
||||||
|
5_000,
|
||||||
|
5_000,
|
||||||
|
Some(Fill::Prev),
|
||||||
|
true,
|
||||||
|
false,
|
||||||
|
expected,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
@@ -1292,7 +1589,16 @@ mod test {
|
|||||||
\n| 5.0 | 5.0 | 1970-01-01T00:00:20 | host2 |\
|
\n| 5.0 | 5.0 | 1970-01-01T00:00:20 | host2 |\
|
||||||
\n+------------+------------+---------------------+-------+",
|
\n+------------+------------+---------------------+-------+",
|
||||||
);
|
);
|
||||||
do_range_select_test(10_000, 5_000, 5_000, Fill::Linear, true, expected).await;
|
do_range_select_test(
|
||||||
|
10_000,
|
||||||
|
5_000,
|
||||||
|
5_000,
|
||||||
|
Some(Fill::Linear),
|
||||||
|
true,
|
||||||
|
false,
|
||||||
|
expected,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
@@ -1315,7 +1621,16 @@ mod test {
|
|||||||
\n| 5.0 | 5.0 | 1970-01-01T00:00:20 | host2 |\
|
\n| 5.0 | 5.0 | 1970-01-01T00:00:20 | host2 |\
|
||||||
\n+------------+------------+---------------------+-------+",
|
\n+------------+------------+---------------------+-------+",
|
||||||
);
|
);
|
||||||
do_range_select_test(10_000, 5_000, 5_000, Fill::Linear, false, expected).await;
|
do_range_select_test(
|
||||||
|
10_000,
|
||||||
|
5_000,
|
||||||
|
5_000,
|
||||||
|
Some(Fill::Linear),
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
expected,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
@@ -1342,7 +1657,101 @@ mod test {
|
|||||||
10_000,
|
10_000,
|
||||||
5_000,
|
5_000,
|
||||||
5_000,
|
5_000,
|
||||||
Fill::Const(ScalarValue::Float64(Some(6.6))),
|
Some(Fill::Const(ScalarValue::Float64(Some(6.6)))),
|
||||||
|
true,
|
||||||
|
false,
|
||||||
|
expected,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn range_fill_gap() {
|
||||||
|
let expected = String::from(
|
||||||
|
"+------------+------------+---------------------+-------+\
|
||||||
|
\n| MIN(value) | MAX(value) | timestamp | host |\
|
||||||
|
\n+------------+------------+---------------------+-------+\
|
||||||
|
\n| 0.0 | 0.0 | 1970-01-01T00:00:00 | host1 |\
|
||||||
|
\n| 6.0 | 6.0 | 1970-01-01T00:00:15 | host1 |\
|
||||||
|
\n| 6.0 | 6.0 | 1970-01-01T00:00:00 | host2 |\
|
||||||
|
\n| 12.0 | 12.0 | 1970-01-01T00:00:15 | host2 |\
|
||||||
|
\n+------------+------------+---------------------+-------+",
|
||||||
|
);
|
||||||
|
do_range_select_test(5_000, 5_000, 5_000, None, true, true, expected).await;
|
||||||
|
let expected = String::from(
|
||||||
|
"+------------+------------+---------------------+-------+\
|
||||||
|
\n| MIN(value) | MAX(value) | timestamp | host |\
|
||||||
|
\n+------------+------------+---------------------+-------+\
|
||||||
|
\n| 0.0 | 0.0 | 1970-01-01T00:00:00 | host1 |\
|
||||||
|
\n| | | 1970-01-01T00:00:05 | host1 |\
|
||||||
|
\n| | | 1970-01-01T00:00:10 | host1 |\
|
||||||
|
\n| 6.0 | 6.0 | 1970-01-01T00:00:15 | host1 |\
|
||||||
|
\n| 6.0 | 6.0 | 1970-01-01T00:00:00 | host2 |\
|
||||||
|
\n| | | 1970-01-01T00:00:05 | host2 |\
|
||||||
|
\n| | | 1970-01-01T00:00:10 | host2 |\
|
||||||
|
\n| 12.0 | 12.0 | 1970-01-01T00:00:15 | host2 |\
|
||||||
|
\n+------------+------------+---------------------+-------+",
|
||||||
|
);
|
||||||
|
do_range_select_test(5_000, 5_000, 5_000, Some(Fill::Null), true, true, expected).await;
|
||||||
|
let expected = String::from(
|
||||||
|
"+------------+------------+---------------------+-------+\
|
||||||
|
\n| MIN(value) | MAX(value) | timestamp | host |\
|
||||||
|
\n+------------+------------+---------------------+-------+\
|
||||||
|
\n| 0.0 | 0.0 | 1970-01-01T00:00:00 | host1 |\
|
||||||
|
\n| 0.0 | 0.0 | 1970-01-01T00:00:05 | host1 |\
|
||||||
|
\n| 0.0 | 0.0 | 1970-01-01T00:00:10 | host1 |\
|
||||||
|
\n| 6.0 | 6.0 | 1970-01-01T00:00:15 | host1 |\
|
||||||
|
\n| 6.0 | 6.0 | 1970-01-01T00:00:00 | host2 |\
|
||||||
|
\n| 6.0 | 6.0 | 1970-01-01T00:00:05 | host2 |\
|
||||||
|
\n| 6.0 | 6.0 | 1970-01-01T00:00:10 | host2 |\
|
||||||
|
\n| 12.0 | 12.0 | 1970-01-01T00:00:15 | host2 |\
|
||||||
|
\n+------------+------------+---------------------+-------+",
|
||||||
|
);
|
||||||
|
do_range_select_test(5_000, 5_000, 5_000, Some(Fill::Prev), true, true, expected).await;
|
||||||
|
let expected = String::from(
|
||||||
|
"+------------+------------+---------------------+-------+\
|
||||||
|
\n| MIN(value) | MAX(value) | timestamp | host |\
|
||||||
|
\n+------------+------------+---------------------+-------+\
|
||||||
|
\n| 0.0 | 0.0 | 1970-01-01T00:00:00 | host1 |\
|
||||||
|
\n| 2.0 | 2.0 | 1970-01-01T00:00:05 | host1 |\
|
||||||
|
\n| 4.0 | 4.0 | 1970-01-01T00:00:10 | host1 |\
|
||||||
|
\n| 6.0 | 6.0 | 1970-01-01T00:00:15 | host1 |\
|
||||||
|
\n| 6.0 | 6.0 | 1970-01-01T00:00:00 | host2 |\
|
||||||
|
\n| 8.0 | 8.0 | 1970-01-01T00:00:05 | host2 |\
|
||||||
|
\n| 10.0 | 10.0 | 1970-01-01T00:00:10 | host2 |\
|
||||||
|
\n| 12.0 | 12.0 | 1970-01-01T00:00:15 | host2 |\
|
||||||
|
\n+------------+------------+---------------------+-------+",
|
||||||
|
);
|
||||||
|
do_range_select_test(
|
||||||
|
5_000,
|
||||||
|
5_000,
|
||||||
|
5_000,
|
||||||
|
Some(Fill::Linear),
|
||||||
|
true,
|
||||||
|
true,
|
||||||
|
expected,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
let expected = String::from(
|
||||||
|
"+------------+------------+---------------------+-------+\
|
||||||
|
\n| MIN(value) | MAX(value) | timestamp | host |\
|
||||||
|
\n+------------+------------+---------------------+-------+\
|
||||||
|
\n| 0.0 | 0.0 | 1970-01-01T00:00:00 | host1 |\
|
||||||
|
\n| 6.0 | 6.0 | 1970-01-01T00:00:05 | host1 |\
|
||||||
|
\n| 6.0 | 6.0 | 1970-01-01T00:00:10 | host1 |\
|
||||||
|
\n| 6.0 | 6.0 | 1970-01-01T00:00:15 | host1 |\
|
||||||
|
\n| 6.0 | 6.0 | 1970-01-01T00:00:00 | host2 |\
|
||||||
|
\n| 6.0 | 6.0 | 1970-01-01T00:00:05 | host2 |\
|
||||||
|
\n| 6.0 | 6.0 | 1970-01-01T00:00:10 | host2 |\
|
||||||
|
\n| 12.0 | 12.0 | 1970-01-01T00:00:15 | host2 |\
|
||||||
|
\n+------------+------------+---------------------+-------+",
|
||||||
|
);
|
||||||
|
do_range_select_test(
|
||||||
|
5_000,
|
||||||
|
5_000,
|
||||||
|
5_000,
|
||||||
|
Some(Fill::Const(ScalarValue::Float64(Some(6.0)))),
|
||||||
|
true,
|
||||||
true,
|
true,
|
||||||
expected,
|
expected,
|
||||||
)
|
)
|
||||||
@@ -1351,7 +1760,8 @@ mod test {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn fill_test() {
|
fn fill_test() {
|
||||||
assert!(Fill::try_from_str("Linear", &DataType::UInt8).unwrap() == Fill::Linear);
|
assert!(Fill::try_from_str("", &DataType::UInt8).unwrap().is_none());
|
||||||
|
assert!(Fill::try_from_str("Linear", &DataType::UInt8).unwrap() == Some(Fill::Linear));
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
Fill::try_from_str("Linear", &DataType::Boolean)
|
Fill::try_from_str("Linear", &DataType::Boolean)
|
||||||
.unwrap_err()
|
.unwrap_err()
|
||||||
@@ -1372,7 +1782,7 @@ mod test {
|
|||||||
);
|
);
|
||||||
assert!(
|
assert!(
|
||||||
Fill::try_from_str("8", &DataType::UInt8).unwrap()
|
Fill::try_from_str("8", &DataType::UInt8).unwrap()
|
||||||
== Fill::Const(ScalarValue::UInt8(Some(8)))
|
== Some(Fill::Const(ScalarValue::UInt8(Some(8))))
|
||||||
);
|
);
|
||||||
let mut test1 = vec![
|
let mut test1 = vec![
|
||||||
ScalarValue::UInt8(Some(8)),
|
ScalarValue::UInt8(Some(8)),
|
||||||
@@ -1447,4 +1857,44 @@ mod test {
|
|||||||
Fill::Linear.apply_fill_strategy(&ts, &mut test1).unwrap();
|
Fill::Linear.apply_fill_strategy(&ts, &mut test1).unwrap();
|
||||||
assert_eq!(test, test1);
|
assert_eq!(test, test1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_fist_last_accumulator() {
|
||||||
|
let mut acc = RangeFirstListValueAcc::new(vec![
|
||||||
|
SortOptions {
|
||||||
|
descending: true,
|
||||||
|
nulls_first: false,
|
||||||
|
},
|
||||||
|
SortOptions {
|
||||||
|
descending: false,
|
||||||
|
nulls_first: true,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
let batch1: Vec<Arc<dyn Array>> = vec![
|
||||||
|
Arc::new(nullable_array!(Float64;
|
||||||
|
0.0, null, 0.0, null, 1.0
|
||||||
|
)),
|
||||||
|
Arc::new(nullable_array!(Float64;
|
||||||
|
5.0, null, 4.0, null, 3.0
|
||||||
|
)),
|
||||||
|
Arc::new(nullable_array!(Int64;
|
||||||
|
1, 2, 3, 4, 5
|
||||||
|
)),
|
||||||
|
];
|
||||||
|
let batch2: Vec<Arc<dyn Array>> = vec![
|
||||||
|
Arc::new(nullable_array!(Float64;
|
||||||
|
3.0, 3.0, 3.0, 3.0, 3.0
|
||||||
|
)),
|
||||||
|
Arc::new(nullable_array!(Float64;
|
||||||
|
null,3.0, 3.0, 3.0, 3.0
|
||||||
|
)),
|
||||||
|
Arc::new(nullable_array!(Int64;
|
||||||
|
6, 7, 8, 9, 10
|
||||||
|
)),
|
||||||
|
];
|
||||||
|
acc.update_batch(&batch1).unwrap();
|
||||||
|
assert_eq!(acc.evaluate().unwrap(), ScalarValue::Int64(Some(5)));
|
||||||
|
acc.update_batch(&batch2).unwrap();
|
||||||
|
assert_eq!(acc.evaluate().unwrap(), ScalarValue::Int64(Some(6)));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -216,7 +216,7 @@ impl<'a> TreeNodeRewriter for RangeExprRewriter<'a> {
|
|||||||
let mut data_type = range_expr.get_type(self.input_plan.schema())?;
|
let mut data_type = range_expr.get_type(self.input_plan.schema())?;
|
||||||
let mut need_cast = false;
|
let mut need_cast = false;
|
||||||
let fill = Fill::try_from_str(parse_str_expr(&func.args, 2)?, &data_type)?;
|
let fill = Fill::try_from_str(parse_str_expr(&func.args, 2)?, &data_type)?;
|
||||||
if matches!(fill, Fill::Linear) && data_type.is_integer() {
|
if matches!(fill, Some(Fill::Linear)) && data_type.is_integer() {
|
||||||
data_type = DataType::Float64;
|
data_type = DataType::Float64;
|
||||||
need_cast = true;
|
need_cast = true;
|
||||||
}
|
}
|
||||||
@@ -224,12 +224,20 @@ impl<'a> TreeNodeRewriter for RangeExprRewriter<'a> {
|
|||||||
inconsistent_check!(self.align, self.align != Duration::default());
|
inconsistent_check!(self.align, self.align != Duration::default());
|
||||||
inconsistent_check!(self.align_to, self.align_to != 0);
|
inconsistent_check!(self.align_to, self.align_to != 0);
|
||||||
let range_fn = RangeFn {
|
let range_fn = RangeFn {
|
||||||
name: format!(
|
name: if let Some(fill) = &fill {
|
||||||
"{} RANGE {} FILL {}",
|
format!(
|
||||||
range_expr.display_name()?,
|
"{} RANGE {} FILL {}",
|
||||||
parse_expr_to_string(&func.args, 1)?,
|
range_expr.display_name()?,
|
||||||
fill
|
parse_expr_to_string(&func.args, 1)?,
|
||||||
),
|
fill
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
format!(
|
||||||
|
"{} RANGE {}",
|
||||||
|
range_expr.display_name()?,
|
||||||
|
parse_expr_to_string(&func.args, 1)?,
|
||||||
|
)
|
||||||
|
},
|
||||||
data_type,
|
data_type,
|
||||||
expr: range_expr,
|
expr: range_expr,
|
||||||
range,
|
range,
|
||||||
@@ -551,7 +559,7 @@ mod test {
|
|||||||
async fn range_no_project() {
|
async fn range_no_project() {
|
||||||
let query = r#"SELECT timestamp, tag_0, tag_1, avg(field_0 + field_1) RANGE '5m' FROM test ALIGN '1h' by (tag_0,tag_1);"#;
|
let query = r#"SELECT timestamp, tag_0, tag_1, avg(field_0 + field_1) RANGE '5m' FROM test ALIGN '1h' by (tag_0,tag_1);"#;
|
||||||
let expected = String::from(
|
let expected = String::from(
|
||||||
"RangeSelect: range_exprs=[AVG(test.field_0 + test.field_1) RANGE 5m FILL NULL], align=3600000ms, align_to=0ms, align_by=[test.tag_0, test.tag_1], time_index=timestamp [timestamp:Timestamp(Millisecond, None), tag_0:Utf8, tag_1:Utf8, AVG(test.field_0 + test.field_1) RANGE 5m FILL NULL:Float64;N]\
|
"RangeSelect: range_exprs=[AVG(test.field_0 + test.field_1) RANGE 5m], align=3600000ms, align_to=0ms, align_by=[test.tag_0, test.tag_1], time_index=timestamp [timestamp:Timestamp(Millisecond, None), tag_0:Utf8, tag_1:Utf8, AVG(test.field_0 + test.field_1) RANGE 5m:Float64;N]\
|
||||||
\n TableScan: test [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, tag_4:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N, field_1:Float64;N, field_2:Float64;N, field_3:Float64;N, field_4:Float64;N]"
|
\n TableScan: test [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, tag_4:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N, field_1:Float64;N, field_2:Float64;N, field_3:Float64;N, field_4:Float64;N]"
|
||||||
);
|
);
|
||||||
query_plan_compare(query, expected).await;
|
query_plan_compare(query, expected).await;
|
||||||
@@ -561,8 +569,8 @@ mod test {
|
|||||||
async fn range_expr_calculation() {
|
async fn range_expr_calculation() {
|
||||||
let query = r#"SELECT (avg(field_0 + field_1)/4) RANGE '5m' FROM test ALIGN '1h' by (tag_0,tag_1);"#;
|
let query = r#"SELECT (avg(field_0 + field_1)/4) RANGE '5m' FROM test ALIGN '1h' by (tag_0,tag_1);"#;
|
||||||
let expected = String::from(
|
let expected = String::from(
|
||||||
"Projection: AVG(test.field_0 + test.field_1) RANGE 5m FILL NULL / Int64(4) [AVG(test.field_0 + test.field_1) RANGE 5m FILL NULL / Int64(4):Float64;N]\
|
"Projection: AVG(test.field_0 + test.field_1) RANGE 5m / Int64(4) [AVG(test.field_0 + test.field_1) RANGE 5m / Int64(4):Float64;N]\
|
||||||
\n RangeSelect: range_exprs=[AVG(test.field_0 + test.field_1) RANGE 5m FILL NULL], align=3600000ms, align_to=0ms, align_by=[test.tag_0, test.tag_1], time_index=timestamp [AVG(test.field_0 + test.field_1) RANGE 5m FILL NULL:Float64;N, timestamp:Timestamp(Millisecond, None), tag_0:Utf8, tag_1:Utf8]\
|
\n RangeSelect: range_exprs=[AVG(test.field_0 + test.field_1) RANGE 5m], align=3600000ms, align_to=0ms, align_by=[test.tag_0, test.tag_1], time_index=timestamp [AVG(test.field_0 + test.field_1) RANGE 5m:Float64;N, timestamp:Timestamp(Millisecond, None), tag_0:Utf8, tag_1:Utf8]\
|
||||||
\n TableScan: test [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, tag_4:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N, field_1:Float64;N, field_2:Float64;N, field_3:Float64;N, field_4:Float64;N]"
|
\n TableScan: test [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, tag_4:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N, field_1:Float64;N, field_2:Float64;N, field_3:Float64;N, field_4:Float64;N]"
|
||||||
);
|
);
|
||||||
query_plan_compare(query, expected).await;
|
query_plan_compare(query, expected).await;
|
||||||
@@ -573,8 +581,8 @@ mod test {
|
|||||||
let query =
|
let query =
|
||||||
r#"SELECT (covar(field_0 + field_1, field_1)/4) RANGE '5m' FROM test ALIGN '1h';"#;
|
r#"SELECT (covar(field_0 + field_1, field_1)/4) RANGE '5m' FROM test ALIGN '1h';"#;
|
||||||
let expected = String::from(
|
let expected = String::from(
|
||||||
"Projection: COVARIANCE(test.field_0 + test.field_1,test.field_1) RANGE 5m FILL NULL / Int64(4) [COVARIANCE(test.field_0 + test.field_1,test.field_1) RANGE 5m FILL NULL / Int64(4):Float64;N]\
|
"Projection: COVARIANCE(test.field_0 + test.field_1,test.field_1) RANGE 5m / Int64(4) [COVARIANCE(test.field_0 + test.field_1,test.field_1) RANGE 5m / Int64(4):Float64;N]\
|
||||||
\n RangeSelect: range_exprs=[COVARIANCE(test.field_0 + test.field_1,test.field_1) RANGE 5m FILL NULL], align=3600000ms, align_to=0ms, align_by=[test.tag_0, test.tag_1, test.tag_2, test.tag_3, test.tag_4], time_index=timestamp [COVARIANCE(test.field_0 + test.field_1,test.field_1) RANGE 5m FILL NULL:Float64;N, timestamp:Timestamp(Millisecond, None), tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, tag_4:Utf8]\
|
\n RangeSelect: range_exprs=[COVARIANCE(test.field_0 + test.field_1,test.field_1) RANGE 5m], align=3600000ms, align_to=0ms, align_by=[test.tag_0, test.tag_1, test.tag_2, test.tag_3, test.tag_4], time_index=timestamp [COVARIANCE(test.field_0 + test.field_1,test.field_1) RANGE 5m:Float64;N, timestamp:Timestamp(Millisecond, None), tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, tag_4:Utf8]\
|
||||||
\n TableScan: test [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, tag_4:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N, field_1:Float64;N, field_2:Float64;N, field_3:Float64;N, field_4:Float64;N]"
|
\n TableScan: test [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, tag_4:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N, field_1:Float64;N, field_2:Float64;N, field_3:Float64;N, field_4:Float64;N]"
|
||||||
);
|
);
|
||||||
query_plan_compare(query, expected).await;
|
query_plan_compare(query, expected).await;
|
||||||
@@ -621,8 +629,8 @@ mod test {
|
|||||||
async fn range_in_expr() {
|
async fn range_in_expr() {
|
||||||
let query = r#"SELECT sin(avg(field_0 + field_1) RANGE '5m' + 1) FROM test ALIGN '1h' by (tag_0,tag_1);"#;
|
let query = r#"SELECT sin(avg(field_0 + field_1) RANGE '5m' + 1) FROM test ALIGN '1h' by (tag_0,tag_1);"#;
|
||||||
let expected = String::from(
|
let expected = String::from(
|
||||||
"Projection: sin(AVG(test.field_0 + test.field_1) RANGE 5m FILL NULL + Int64(1)) [sin(AVG(test.field_0 + test.field_1) RANGE 5m FILL NULL + Int64(1)):Float64;N]\
|
"Projection: sin(AVG(test.field_0 + test.field_1) RANGE 5m + Int64(1)) [sin(AVG(test.field_0 + test.field_1) RANGE 5m + Int64(1)):Float64;N]\
|
||||||
\n RangeSelect: range_exprs=[AVG(test.field_0 + test.field_1) RANGE 5m FILL NULL], align=3600000ms, align_to=0ms, align_by=[test.tag_0, test.tag_1], time_index=timestamp [AVG(test.field_0 + test.field_1) RANGE 5m FILL NULL:Float64;N, timestamp:Timestamp(Millisecond, None), tag_0:Utf8, tag_1:Utf8]\
|
\n RangeSelect: range_exprs=[AVG(test.field_0 + test.field_1) RANGE 5m], align=3600000ms, align_to=0ms, align_by=[test.tag_0, test.tag_1], time_index=timestamp [AVG(test.field_0 + test.field_1) RANGE 5m:Float64;N, timestamp:Timestamp(Millisecond, None), tag_0:Utf8, tag_1:Utf8]\
|
||||||
\n TableScan: test [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, tag_4:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N, field_1:Float64;N, field_2:Float64;N, field_3:Float64;N, field_4:Float64;N]"
|
\n TableScan: test [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, tag_4:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N, field_1:Float64;N, field_2:Float64;N, field_3:Float64;N, field_4:Float64;N]"
|
||||||
);
|
);
|
||||||
query_plan_compare(query, expected).await;
|
query_plan_compare(query, expected).await;
|
||||||
@@ -643,8 +651,8 @@ mod test {
|
|||||||
async fn deep_nest_range_expr() {
|
async fn deep_nest_range_expr() {
|
||||||
let query = r#"SELECT round(sin(avg(field_0 + field_1) RANGE '5m' + 1)) FROM test ALIGN '1h' by (tag_0,tag_1);"#;
|
let query = r#"SELECT round(sin(avg(field_0 + field_1) RANGE '5m' + 1)) FROM test ALIGN '1h' by (tag_0,tag_1);"#;
|
||||||
let expected = String::from(
|
let expected = String::from(
|
||||||
"Projection: round(sin(AVG(test.field_0 + test.field_1) RANGE 5m FILL NULL + Int64(1))) [round(sin(AVG(test.field_0 + test.field_1) RANGE 5m FILL NULL + Int64(1))):Float64;N]\
|
"Projection: round(sin(AVG(test.field_0 + test.field_1) RANGE 5m + Int64(1))) [round(sin(AVG(test.field_0 + test.field_1) RANGE 5m + Int64(1))):Float64;N]\
|
||||||
\n RangeSelect: range_exprs=[AVG(test.field_0 + test.field_1) RANGE 5m FILL NULL], align=3600000ms, align_to=0ms, align_by=[test.tag_0, test.tag_1], time_index=timestamp [AVG(test.field_0 + test.field_1) RANGE 5m FILL NULL:Float64;N, timestamp:Timestamp(Millisecond, None), tag_0:Utf8, tag_1:Utf8]\
|
\n RangeSelect: range_exprs=[AVG(test.field_0 + test.field_1) RANGE 5m], align=3600000ms, align_to=0ms, align_by=[test.tag_0, test.tag_1], time_index=timestamp [AVG(test.field_0 + test.field_1) RANGE 5m:Float64;N, timestamp:Timestamp(Millisecond, None), tag_0:Utf8, tag_1:Utf8]\
|
||||||
\n TableScan: test [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, tag_4:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N, field_1:Float64;N, field_2:Float64;N, field_3:Float64;N, field_4:Float64;N]"
|
\n TableScan: test [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, tag_4:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N, field_1:Float64;N, field_2:Float64;N, field_3:Float64;N, field_4:Float64;N]"
|
||||||
);
|
);
|
||||||
query_plan_compare(query, expected).await;
|
query_plan_compare(query, expected).await;
|
||||||
|
|||||||
@@ -237,10 +237,9 @@ async fn query_from_information_schema_table(
|
|||||||
.await
|
.await
|
||||||
.context(error::DataFusionSnafu)?;
|
.context(error::DataFusionSnafu)?;
|
||||||
|
|
||||||
Ok(Output::Stream(
|
Ok(Output::new_with_stream(Box::pin(
|
||||||
Box::pin(RecordBatchStreamAdapter::try_new(stream).context(error::CreateRecordBatchSnafu)?),
|
RecordBatchStreamAdapter::try_new(stream).context(error::CreateRecordBatchSnafu)?,
|
||||||
None,
|
)))
|
||||||
))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn show_tables(
|
pub async fn show_tables(
|
||||||
@@ -303,7 +302,7 @@ pub fn show_variable(stmt: ShowVariables, query_ctx: QueryContextRef) -> Result<
|
|||||||
vec![Arc::new(StringVector::from(vec![value])) as _],
|
vec![Arc::new(StringVector::from(vec![value])) as _],
|
||||||
)
|
)
|
||||||
.context(error::CreateRecordBatchSnafu)?;
|
.context(error::CreateRecordBatchSnafu)?;
|
||||||
Ok(Output::RecordBatches(records))
|
Ok(Output::new_with_record_batches(records))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn show_create_table(
|
pub fn show_create_table(
|
||||||
@@ -329,7 +328,7 @@ pub fn show_create_table(
|
|||||||
let records = RecordBatches::try_from_columns(SHOW_CREATE_TABLE_OUTPUT_SCHEMA.clone(), columns)
|
let records = RecordBatches::try_from_columns(SHOW_CREATE_TABLE_OUTPUT_SCHEMA.clone(), columns)
|
||||||
.context(error::CreateRecordBatchSnafu)?;
|
.context(error::CreateRecordBatchSnafu)?;
|
||||||
|
|
||||||
Ok(Output::RecordBatches(records))
|
Ok(Output::new_with_record_batches(records))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn describe_table(table: TableRef) -> Result<Output> {
|
pub fn describe_table(table: TableRef) -> Result<Output> {
|
||||||
@@ -345,7 +344,7 @@ pub fn describe_table(table: TableRef) -> Result<Output> {
|
|||||||
];
|
];
|
||||||
let records = RecordBatches::try_from_columns(DESCRIBE_TABLE_OUTPUT_SCHEMA.clone(), columns)
|
let records = RecordBatches::try_from_columns(DESCRIBE_TABLE_OUTPUT_SCHEMA.clone(), columns)
|
||||||
.context(error::CreateRecordBatchSnafu)?;
|
.context(error::CreateRecordBatchSnafu)?;
|
||||||
Ok(Output::RecordBatches(records))
|
Ok(Output::new_with_record_batches(records))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn describe_column_names(columns_schemas: &[ColumnSchema]) -> VectorRef {
|
fn describe_column_names(columns_schemas: &[ColumnSchema]) -> VectorRef {
|
||||||
@@ -572,7 +571,7 @@ fn parse_file_table_format(options: &HashMap<String, String>) -> Result<Box<dyn
|
|||||||
mod test {
|
mod test {
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use common_query::Output;
|
use common_query::{Output, OutputData};
|
||||||
use common_recordbatch::{RecordBatch, RecordBatches};
|
use common_recordbatch::{RecordBatch, RecordBatches};
|
||||||
use common_time::timestamp::TimeUnit;
|
use common_time::timestamp::TimeUnit;
|
||||||
use common_time::Timezone;
|
use common_time::Timezone;
|
||||||
@@ -642,7 +641,7 @@ mod test {
|
|||||||
RecordBatches::try_from_columns(DESCRIBE_TABLE_OUTPUT_SCHEMA.clone(), expected_columns)
|
RecordBatches::try_from_columns(DESCRIBE_TABLE_OUTPUT_SCHEMA.clone(), expected_columns)
|
||||||
.context(error::CreateRecordBatchSnafu)?;
|
.context(error::CreateRecordBatchSnafu)?;
|
||||||
|
|
||||||
if let Output::RecordBatches(res) = describe_table(table)? {
|
if let OutputData::RecordBatches(res) = describe_table(table)?.data {
|
||||||
assert_eq!(res.take(), expected.take());
|
assert_eq!(res.take(), expected.take());
|
||||||
} else {
|
} else {
|
||||||
panic!("describe table must return record batch");
|
panic!("describe table must return record batch");
|
||||||
@@ -690,7 +689,10 @@ mod test {
|
|||||||
.timezone(Arc::new(Timezone::from_tz_string(tz).unwrap()))
|
.timezone(Arc::new(Timezone::from_tz_string(tz).unwrap()))
|
||||||
.build();
|
.build();
|
||||||
match show_variable(stmt, ctx) {
|
match show_variable(stmt, ctx) {
|
||||||
Ok(Output::RecordBatches(record)) => {
|
Ok(Output {
|
||||||
|
data: OutputData::RecordBatches(record),
|
||||||
|
..
|
||||||
|
}) => {
|
||||||
let record = record.take().first().cloned().unwrap();
|
let record = record.take().first().cloned().unwrap();
|
||||||
let data = record.column(0);
|
let data = record.column(0);
|
||||||
Ok(data.get(0).to_string())
|
Ok(data.get(0).to_string())
|
||||||
|
|||||||
@@ -13,7 +13,7 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
use catalog::memory::MemoryCatalogManager;
|
use catalog::memory::MemoryCatalogManager;
|
||||||
use common_query::Output;
|
use common_query::OutputData;
|
||||||
use common_recordbatch::{util, RecordBatch};
|
use common_recordbatch::{util, RecordBatch};
|
||||||
use session::context::QueryContext;
|
use session::context::QueryContext;
|
||||||
use table::TableRef;
|
use table::TableRef;
|
||||||
@@ -43,7 +43,7 @@ async fn exec_selection(engine: QueryEngineRef, sql: &str) -> Vec<RecordBatch> {
|
|||||||
.plan(stmt, query_ctx.clone())
|
.plan(stmt, query_ctx.clone())
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let Output::Stream(stream, _) = engine.execute(plan, query_ctx).await.unwrap() else {
|
let OutputData::Stream(stream) = engine.execute(plan, query_ctx).await.unwrap().data else {
|
||||||
unreachable!()
|
unreachable!()
|
||||||
};
|
};
|
||||||
util::collect(stream).await.unwrap()
|
util::collect(stream).await.unwrap()
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ use common_base::Plugins;
|
|||||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, NUMBERS_TABLE_ID};
|
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, NUMBERS_TABLE_ID};
|
||||||
use common_error::ext::BoxedError;
|
use common_error::ext::BoxedError;
|
||||||
use common_query::prelude::{create_udf, make_scalar_function, Volatility};
|
use common_query::prelude::{create_udf, make_scalar_function, Volatility};
|
||||||
use common_query::Output;
|
use common_query::OutputData;
|
||||||
use common_recordbatch::{util, RecordBatch};
|
use common_recordbatch::{util, RecordBatch};
|
||||||
use datafusion::datasource::DefaultTableSource;
|
use datafusion::datasource::DefaultTableSource;
|
||||||
use datafusion_expr::logical_plan::builder::LogicalPlanBuilder;
|
use datafusion_expr::logical_plan::builder::LogicalPlanBuilder;
|
||||||
@@ -79,8 +79,8 @@ async fn test_datafusion_query_engine() -> Result<()> {
|
|||||||
|
|
||||||
let output = engine.execute(plan, QueryContext::arc()).await?;
|
let output = engine.execute(plan, QueryContext::arc()).await?;
|
||||||
|
|
||||||
let recordbatch = match output {
|
let recordbatch = match output.data {
|
||||||
Output::Stream(recordbatch, _) => recordbatch,
|
OutputData::Stream(recordbatch) => recordbatch,
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ use std::sync::Arc;
|
|||||||
|
|
||||||
use catalog::memory::MemoryCatalogManager;
|
use catalog::memory::MemoryCatalogManager;
|
||||||
use common_catalog::consts::NUMBERS_TABLE_ID;
|
use common_catalog::consts::NUMBERS_TABLE_ID;
|
||||||
use common_query::Output;
|
use common_query::OutputData;
|
||||||
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||||
use futures::Future;
|
use futures::Future;
|
||||||
use once_cell::sync::{Lazy, OnceCell};
|
use once_cell::sync::{Lazy, OnceCell};
|
||||||
@@ -69,9 +69,9 @@ async fn run_compiled(script: &PyScript) {
|
|||||||
.execute(HashMap::default(), EvalContext::default())
|
.execute(HashMap::default(), EvalContext::default())
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let _res = match output {
|
let _res = match output.data {
|
||||||
Output::Stream(s, _) => common_recordbatch::util::collect_batches(s).await.unwrap(),
|
OutputData::Stream(s) => common_recordbatch::util::collect_batches(s).await.unwrap(),
|
||||||
Output::RecordBatches(rbs) => rbs,
|
OutputData::RecordBatches(rbs) => rbs,
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -211,6 +211,8 @@ impl<E: ErrorExt + Send + Sync + 'static> ScriptManager<E> {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use common_query::OutputData;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::test::setup_scripts_manager;
|
use crate::test::setup_scripts_manager;
|
||||||
|
|
||||||
@@ -261,8 +263,8 @@ def test() -> vector[str]:
|
|||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
match output {
|
match output.data {
|
||||||
Output::RecordBatches(batches) => {
|
OutputData::RecordBatches(batches) => {
|
||||||
let expected = "\
|
let expected = "\
|
||||||
+-------+
|
+-------+
|
||||||
| n |
|
| n |
|
||||||
|
|||||||
@@ -25,10 +25,11 @@ use common_function::function::Function;
|
|||||||
use common_function::function_registry::FUNCTION_REGISTRY;
|
use common_function::function_registry::FUNCTION_REGISTRY;
|
||||||
use common_query::error::{PyUdfSnafu, UdfTempRecordBatchSnafu};
|
use common_query::error::{PyUdfSnafu, UdfTempRecordBatchSnafu};
|
||||||
use common_query::prelude::Signature;
|
use common_query::prelude::Signature;
|
||||||
use common_query::Output;
|
use common_query::{Output, OutputData};
|
||||||
|
use common_recordbatch::adapter::RecordBatchMetrics;
|
||||||
use common_recordbatch::error::{ExternalSnafu, Result as RecordBatchResult};
|
use common_recordbatch::error::{ExternalSnafu, Result as RecordBatchResult};
|
||||||
use common_recordbatch::{
|
use common_recordbatch::{
|
||||||
RecordBatch, RecordBatchStream, RecordBatches, SendableRecordBatchStream,
|
OrderOption, RecordBatch, RecordBatchStream, RecordBatches, SendableRecordBatchStream,
|
||||||
};
|
};
|
||||||
use datafusion_expr::Volatility;
|
use datafusion_expr::Volatility;
|
||||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||||
@@ -255,6 +256,14 @@ impl RecordBatchStream for CoprStream {
|
|||||||
// FIXME(discord9): use copr returns for schema
|
// FIXME(discord9): use copr returns for schema
|
||||||
self.ret_schema.clone()
|
self.ret_schema.clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn output_ordering(&self) -> Option<&[OrderOption]> {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
fn metrics(&self) -> Option<RecordBatchMetrics> {
|
||||||
|
None
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Stream for CoprStream {
|
impl Stream for CoprStream {
|
||||||
@@ -311,10 +320,10 @@ impl Script for PyScript {
|
|||||||
.await
|
.await
|
||||||
.context(DatabaseQuerySnafu)?;
|
.context(DatabaseQuerySnafu)?;
|
||||||
let copr = self.copr.clone();
|
let copr = self.copr.clone();
|
||||||
match res {
|
match res.data {
|
||||||
Output::Stream(stream, _) => Ok(Output::new_stream(Box::pin(CoprStream::try_new(
|
OutputData::Stream(stream) => Ok(Output::new_with_stream(Box::pin(
|
||||||
stream, copr, params, ctx,
|
CoprStream::try_new(stream, copr, params, ctx)?,
|
||||||
)?))),
|
))),
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -324,7 +333,7 @@ impl Script for PyScript {
|
|||||||
.await
|
.await
|
||||||
.context(TokioJoinSnafu)??;
|
.context(TokioJoinSnafu)??;
|
||||||
let batches = RecordBatches::try_new(batch.schema.clone(), vec![batch]).unwrap();
|
let batches = RecordBatches::try_new(batch.schema.clone(), vec![batch]).unwrap();
|
||||||
Ok(Output::RecordBatches(batches))
|
Ok(Output::new_with_record_batches(batches))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -410,8 +419,8 @@ def test(number) -> vector[u32]:
|
|||||||
.execute(HashMap::default(), EvalContext::default())
|
.execute(HashMap::default(), EvalContext::default())
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let res = common_recordbatch::util::collect_batches(match output {
|
let res = common_recordbatch::util::collect_batches(match output.data {
|
||||||
Output::Stream(s, _) => s,
|
OutputData::Stream(s) => s,
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
})
|
})
|
||||||
.await
|
.await
|
||||||
@@ -441,8 +450,8 @@ def test(**params) -> vector[i64]:
|
|||||||
.execute(params, EvalContext::default())
|
.execute(params, EvalContext::default())
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let res = match _output {
|
let res = match _output.data {
|
||||||
Output::RecordBatches(s) => s,
|
OutputData::RecordBatches(s) => s,
|
||||||
_ => todo!(),
|
_ => todo!(),
|
||||||
};
|
};
|
||||||
let rb = res.iter().next().expect("One and only one recordbatch");
|
let rb = res.iter().next().expect("One and only one recordbatch");
|
||||||
@@ -471,8 +480,8 @@ def test(number) -> vector[u32]:
|
|||||||
.execute(HashMap::new(), EvalContext::default())
|
.execute(HashMap::new(), EvalContext::default())
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let res = common_recordbatch::util::collect_batches(match _output {
|
let res = common_recordbatch::util::collect_batches(match _output.data {
|
||||||
Output::Stream(s, _) => s,
|
OutputData::Stream(s) => s,
|
||||||
_ => todo!(),
|
_ => todo!(),
|
||||||
})
|
})
|
||||||
.await
|
.await
|
||||||
@@ -503,8 +512,8 @@ def test(a, b, c) -> vector[f64]:
|
|||||||
.execute(HashMap::new(), EvalContext::default())
|
.execute(HashMap::new(), EvalContext::default())
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
match output {
|
match output.data {
|
||||||
Output::Stream(stream, _) => {
|
OutputData::Stream(stream) => {
|
||||||
let numbers = util::collect(stream).await.unwrap();
|
let numbers = util::collect(stream).await.unwrap();
|
||||||
|
|
||||||
assert_eq!(1, numbers.len());
|
assert_eq!(1, numbers.len());
|
||||||
@@ -541,8 +550,8 @@ def test(a) -> vector[i64]:
|
|||||||
.execute(HashMap::new(), EvalContext::default())
|
.execute(HashMap::new(), EvalContext::default())
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
match output {
|
match output.data {
|
||||||
Output::Stream(stream, _) => {
|
OutputData::Stream(stream) => {
|
||||||
let numbers = util::collect(stream).await.unwrap();
|
let numbers = util::collect(stream).await.unwrap();
|
||||||
|
|
||||||
assert_eq!(1, numbers.len());
|
assert_eq!(1, numbers.len());
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ use std::collections::HashMap;
|
|||||||
use std::result::Result as StdResult;
|
use std::result::Result as StdResult;
|
||||||
use std::sync::{Arc, Weak};
|
use std::sync::{Arc, Weak};
|
||||||
|
|
||||||
|
use common_query::OutputData;
|
||||||
use common_recordbatch::{RecordBatch, RecordBatches};
|
use common_recordbatch::{RecordBatch, RecordBatches};
|
||||||
use datatypes::arrow::compute;
|
use datatypes::arrow::compute;
|
||||||
use datatypes::data_type::{ConcreteDataType, DataType};
|
use datatypes::data_type::{ConcreteDataType, DataType};
|
||||||
@@ -399,13 +400,14 @@ impl PyQueryEngine {
|
|||||||
.await
|
.await
|
||||||
.map_err(|e| e.to_string());
|
.map_err(|e| e.to_string());
|
||||||
match res {
|
match res {
|
||||||
Ok(common_query::Output::AffectedRows(cnt)) => {
|
Ok(o) => match o.data {
|
||||||
Ok(Either::AffectedRows(cnt))
|
OutputData::AffectedRows(cnt) => Ok(Either::AffectedRows(cnt)),
|
||||||
}
|
OutputData::RecordBatches(rbs) => Ok(Either::Rb(rbs)),
|
||||||
Ok(common_query::Output::RecordBatches(rbs)) => Ok(Either::Rb(rbs)),
|
OutputData::Stream(s) => Ok(Either::Rb(
|
||||||
Ok(common_query::Output::Stream(s, _)) => Ok(Either::Rb(
|
common_recordbatch::util::collect_batches(s).await.unwrap(),
|
||||||
common_recordbatch::util::collect_batches(s).await.unwrap(),
|
)),
|
||||||
)),
|
},
|
||||||
|
|
||||||
Err(e) => Err(e),
|
Err(e) => Err(e),
|
||||||
}
|
}
|
||||||
})?;
|
})?;
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ use std::collections::HashMap;
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use arrow::compute::kernels::numeric;
|
use arrow::compute::kernels::numeric;
|
||||||
use common_query::Output;
|
use common_query::OutputData;
|
||||||
use common_recordbatch::RecordBatch;
|
use common_recordbatch::RecordBatch;
|
||||||
use datafusion::arrow::array::Float64Array;
|
use datafusion::arrow::array::Float64Array;
|
||||||
use datafusion::arrow::compute;
|
use datafusion::arrow::compute;
|
||||||
@@ -87,9 +87,9 @@ async fn integrated_py_copr_test() {
|
|||||||
.execute(HashMap::default(), EvalContext::default())
|
.execute(HashMap::default(), EvalContext::default())
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let res = match output {
|
let res = match output.data {
|
||||||
Output::Stream(s, _) => common_recordbatch::util::collect_batches(s).await.unwrap(),
|
OutputData::Stream(s) => common_recordbatch::util::collect_batches(s).await.unwrap(),
|
||||||
Output::RecordBatches(rbs) => rbs,
|
OutputData::RecordBatches(rbs) => rbs,
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
};
|
};
|
||||||
let rb = res.iter().next().expect("One and only one recordbatch");
|
let rb = res.iter().next().expect("One and only one recordbatch");
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ use api::v1::{
|
|||||||
};
|
};
|
||||||
use catalog::error::CompileScriptInternalSnafu;
|
use catalog::error::CompileScriptInternalSnafu;
|
||||||
use common_error::ext::{BoxedError, ErrorExt};
|
use common_error::ext::{BoxedError, ErrorExt};
|
||||||
use common_query::Output;
|
use common_query::OutputData;
|
||||||
use common_recordbatch::{util as record_util, RecordBatch, SendableRecordBatchStream};
|
use common_recordbatch::{util as record_util, RecordBatch, SendableRecordBatchStream};
|
||||||
use common_telemetry::logging;
|
use common_telemetry::logging;
|
||||||
use common_time::util;
|
use common_time::util;
|
||||||
@@ -230,9 +230,9 @@ impl<E: ErrorExt + Send + Sync + 'static> ScriptsTable<E> {
|
|||||||
.execute(LogicalPlan::DfPlan(plan), query_ctx(&table_info))
|
.execute(LogicalPlan::DfPlan(plan), query_ctx(&table_info))
|
||||||
.await
|
.await
|
||||||
.context(ExecuteInternalStatementSnafu)?;
|
.context(ExecuteInternalStatementSnafu)?;
|
||||||
let stream = match output {
|
let stream = match output.data {
|
||||||
Output::Stream(stream, _) => stream,
|
OutputData::Stream(stream) => stream,
|
||||||
Output::RecordBatches(record_batches) => record_batches.as_stream(),
|
OutputData::RecordBatches(record_batches) => record_batches.as_stream(),
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -285,9 +285,9 @@ impl<E: ErrorExt + Send + Sync + 'static> ScriptsTable<E> {
|
|||||||
.execute(LogicalPlan::DfPlan(plan), query_ctx(&table_info))
|
.execute(LogicalPlan::DfPlan(plan), query_ctx(&table_info))
|
||||||
.await
|
.await
|
||||||
.context(ExecuteInternalStatementSnafu)?;
|
.context(ExecuteInternalStatementSnafu)?;
|
||||||
let stream = match output {
|
let stream = match output.data {
|
||||||
Output::Stream(stream, _) => stream,
|
OutputData::Stream(stream) => stream,
|
||||||
Output::RecordBatches(record_batches) => record_batches.as_stream(),
|
OutputData::RecordBatches(record_batches) => record_batches.as_stream(),
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
};
|
};
|
||||||
Ok(stream)
|
Ok(stream)
|
||||||
|
|||||||
@@ -73,6 +73,6 @@ impl GrpcQueryHandler for MockGrpcQueryHandler {
|
|||||||
type Error = Error;
|
type Error = Error;
|
||||||
|
|
||||||
async fn do_query(&self, _query: Request, _ctx: QueryContextRef) -> Result<Output> {
|
async fn do_query(&self, _query: Request, _ctx: QueryContextRef) -> Result<Output> {
|
||||||
Ok(Output::AffectedRows(1))
|
Ok(Output::new_with_affected_rows(1))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user