mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-27 08:29:59 +00:00
Compare commits
43 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8ca9e01455 | ||
|
|
3a326775ee | ||
|
|
5ad3b7984e | ||
|
|
4fc27bdc75 | ||
|
|
e3c82568e5 | ||
|
|
61f0703af8 | ||
|
|
b85d7bb575 | ||
|
|
d334d74986 | ||
|
|
5ca8521e87 | ||
|
|
e4333969b4 | ||
|
|
b55905cf66 | ||
|
|
fb4da05f25 | ||
|
|
904484b525 | ||
|
|
cafb4708ce | ||
|
|
7c895e2605 | ||
|
|
9afe327bca | ||
|
|
58bd065c6b | ||
|
|
9aa8f756ab | ||
|
|
7639c227ca | ||
|
|
1255c1fc9e | ||
|
|
06dcd0f6ed | ||
|
|
0a4444a43a | ||
|
|
b7ac8d6aa8 | ||
|
|
e767f37241 | ||
|
|
da098f5568 | ||
|
|
aa953dcc34 | ||
|
|
aa125a50f9 | ||
|
|
d8939eb891 | ||
|
|
0bb949787c | ||
|
|
8c37c3fc0f | ||
|
|
21ff3620be | ||
|
|
aeca0d8e8a | ||
|
|
a309cd018a | ||
|
|
3ee53360ee | ||
|
|
352bd7b6fd | ||
|
|
3f3ef2e7af | ||
|
|
a218f12bd9 | ||
|
|
c884c56151 | ||
|
|
9ec288cab9 | ||
|
|
1f1491e429 | ||
|
|
c52bc613e0 | ||
|
|
a9d42f7b87 | ||
|
|
86ce2d8713 |
10
.editorconfig
Normal file
10
.editorconfig
Normal file
@@ -0,0 +1,10 @@
|
||||
root = true
|
||||
|
||||
[*]
|
||||
end_of_line = lf
|
||||
indent_style = space
|
||||
insert_final_newline = true
|
||||
trim_trailing_whitespace = true
|
||||
|
||||
[{Makefile,**.mk}]
|
||||
indent_style = tab
|
||||
@@ -21,3 +21,6 @@ GT_GCS_CREDENTIAL_PATH = GCS credential path
|
||||
GT_GCS_ENDPOINT = GCS end point
|
||||
# Settings for kafka wal test
|
||||
GT_KAFKA_ENDPOINTS = localhost:9092
|
||||
|
||||
# Setting for fuzz tests
|
||||
GT_MYSQL_ADDR = localhost:4002
|
||||
|
||||
@@ -70,7 +70,7 @@ runs:
|
||||
|
||||
- name: Build greptime binary
|
||||
shell: pwsh
|
||||
run: cargo build --profile ${{ inputs.cargo-profile }} --features ${{ inputs.features }} --target ${{ inputs.arch }}
|
||||
run: cargo build --profile ${{ inputs.cargo-profile }} --features ${{ inputs.features }} --target ${{ inputs.arch }} --bin greptime
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: ./.github/actions/upload-artifacts
|
||||
|
||||
13
.github/actions/fuzz-test/action.yaml
vendored
Normal file
13
.github/actions/fuzz-test/action.yaml
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
name: Fuzz Test
|
||||
description: 'Fuzz test given setup and service'
|
||||
inputs:
|
||||
target:
|
||||
description: "The fuzz target to test"
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- name: Run Fuzz Test
|
||||
shell: bash
|
||||
run: cargo fuzz run ${{ inputs.target }} --fuzz-dir tests-fuzz -D -s none -- -max_total_time=120
|
||||
env:
|
||||
GT_MYSQL_ADDR: 127.0.0.1:4002
|
||||
46
.github/workflows/develop.yml
vendored
46
.github/workflows/develop.yml
vendored
@@ -102,7 +102,7 @@ jobs:
|
||||
shared-key: "build-binaries"
|
||||
- name: Build greptime binaries
|
||||
shell: bash
|
||||
run: cargo build
|
||||
run: cargo build --bin greptime --bin sqlness-runner
|
||||
- name: Pack greptime binaries
|
||||
shell: bash
|
||||
run: |
|
||||
@@ -117,6 +117,46 @@ jobs:
|
||||
artifacts-dir: bins
|
||||
version: current
|
||||
|
||||
fuzztest:
|
||||
name: Fuzz Test
|
||||
needs: build
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
target: [ "fuzz_create_table", "fuzz_alter_table" ]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: arduino/setup-protoc@v3
|
||||
- uses: dtolnay/rust-toolchain@master
|
||||
with:
|
||||
toolchain: ${{ env.RUST_TOOLCHAIN }}
|
||||
- name: Rust Cache
|
||||
uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
# Shares across multiple jobs
|
||||
shared-key: "fuzz-test-targets"
|
||||
- name: Set Rust Fuzz
|
||||
shell: bash
|
||||
run: |
|
||||
sudo apt update && sudo apt install -y libfuzzer-14-dev
|
||||
cargo install cargo-fuzz
|
||||
- name: Download pre-built binaries
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: bins
|
||||
path: .
|
||||
- name: Unzip binaries
|
||||
run: tar -xvf ./bins.tar.gz
|
||||
- name: Run GreptimeDB
|
||||
run: |
|
||||
./bins/greptime standalone start&
|
||||
- name: Fuzz Test
|
||||
uses: ./.github/actions/fuzz-test
|
||||
env:
|
||||
CUSTOM_LIBFUZZER_PATH: /usr/lib/llvm-14/lib/libFuzzer.a
|
||||
with:
|
||||
target: ${{ matrix.target }}
|
||||
|
||||
sqlness:
|
||||
name: Sqlness Test
|
||||
needs: build
|
||||
@@ -239,6 +279,10 @@ jobs:
|
||||
with:
|
||||
# Shares cross multiple jobs
|
||||
shared-key: "coverage-test"
|
||||
- name: Docker Cache
|
||||
uses: ScribeMD/docker-cache@0.3.7
|
||||
with:
|
||||
key: docker-${{ runner.os }}-coverage
|
||||
- name: Install latest nextest release
|
||||
uses: taiki-e/install-action@nextest
|
||||
- name: Install cargo-llvm-cov
|
||||
|
||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -46,3 +46,7 @@ benchmarks/data
|
||||
*.code-workspace
|
||||
|
||||
venv/
|
||||
|
||||
# Fuzz tests
|
||||
tests-fuzz/artifacts/
|
||||
tests-fuzz/corpus/
|
||||
|
||||
316
Cargo.lock
generated
316
Cargo.lock
generated
@@ -29,6 +29,17 @@ version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234"
|
||||
|
||||
[[package]]
|
||||
name = "aes"
|
||||
version = "0.8.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"cipher",
|
||||
"cpufeatures",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ahash"
|
||||
version = "0.7.7"
|
||||
@@ -196,7 +207,7 @@ checksum = "8f1f8f5a6f3d50d89e3797d7593a50f96bb2aaa20ca0cc7be1fb673232c91d72"
|
||||
|
||||
[[package]]
|
||||
name = "api"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-decimal",
|
||||
@@ -241,6 +252,15 @@ dependencies = [
|
||||
"syn 1.0.109",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arbitrary"
|
||||
version = "1.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110"
|
||||
dependencies = [
|
||||
"derive_arbitrary",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arc-swap"
|
||||
version = "1.6.0"
|
||||
@@ -550,7 +570,6 @@ version = "0.3.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "942c7cd7ae39e91bde4820d74132e9862e62c2f386c3aa90ccf55949f5bad63a"
|
||||
dependencies = [
|
||||
"brotli",
|
||||
"bzip2",
|
||||
"flate2",
|
||||
"futures-core",
|
||||
@@ -569,6 +588,7 @@ version = "0.4.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bc2d0cfb2a7388d34f590e76686704c494ed7aaceed62ee1ba35cbf363abc2a5"
|
||||
dependencies = [
|
||||
"brotli",
|
||||
"bzip2",
|
||||
"flate2",
|
||||
"futures-core",
|
||||
@@ -675,7 +695,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "auth"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -861,7 +881,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "benchmarks"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"chrono",
|
||||
@@ -992,6 +1012,15 @@ dependencies = [
|
||||
"generic-array",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "block-padding"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a8894febbff9f758034a5b8e12d87918f56dfc64a8e1fe757d65e29041538d93"
|
||||
dependencies = [
|
||||
"generic-array",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "borsh"
|
||||
version = "1.3.0"
|
||||
@@ -1219,7 +1248,7 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
|
||||
|
||||
[[package]]
|
||||
name = "catalog"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -1266,6 +1295,15 @@ dependencies = [
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cbc"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "26b52a9543ae338f279b96b0b9fed9c8093744685043739079ce85cd58f289a6"
|
||||
dependencies = [
|
||||
"cipher",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.83"
|
||||
@@ -1421,6 +1459,16 @@ dependencies = [
|
||||
"half 1.8.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cipher"
|
||||
version = "0.4.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad"
|
||||
dependencies = [
|
||||
"crypto-common",
|
||||
"inout",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clang-sys"
|
||||
version = "1.6.1"
|
||||
@@ -1510,7 +1558,7 @@ checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1"
|
||||
|
||||
[[package]]
|
||||
name = "client"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -1546,7 +1594,7 @@ dependencies = [
|
||||
"session",
|
||||
"snafu",
|
||||
"substrait 0.17.1",
|
||||
"substrait 0.7.0",
|
||||
"substrait 0.7.1",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tonic 0.10.2",
|
||||
@@ -1576,7 +1624,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cmd"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"anymap",
|
||||
"async-trait",
|
||||
@@ -1629,7 +1677,7 @@ dependencies = [
|
||||
"session",
|
||||
"snafu",
|
||||
"store-api",
|
||||
"substrait 0.7.0",
|
||||
"substrait 0.7.1",
|
||||
"table",
|
||||
"temp-env",
|
||||
"tikv-jemallocator",
|
||||
@@ -1672,7 +1720,7 @@ checksum = "55b672471b4e9f9e95499ea597ff64941a309b2cdbffcc46f2cc5e2d971fd335"
|
||||
|
||||
[[package]]
|
||||
name = "common-base"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"anymap",
|
||||
"bitvec",
|
||||
@@ -1687,7 +1735,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-catalog"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"chrono",
|
||||
"common-error",
|
||||
@@ -1698,7 +1746,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-config"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"humantime-serde",
|
||||
@@ -1709,7 +1757,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-datasource"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-schema",
|
||||
@@ -1741,7 +1789,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-decimal"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"bigdecimal",
|
||||
@@ -1755,7 +1803,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-error"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"snafu",
|
||||
"strum 0.25.0",
|
||||
@@ -1763,7 +1811,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-function"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -1798,7 +1846,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-greptimedb-telemetry"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-error",
|
||||
@@ -1817,7 +1865,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-grpc"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -1847,7 +1895,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-grpc-expr"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -1866,7 +1914,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-macro"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"common-query",
|
||||
@@ -1881,7 +1929,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-mem-prof"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"common-error",
|
||||
"common-macro",
|
||||
@@ -1894,7 +1942,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-meta"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-recursion",
|
||||
@@ -1944,11 +1992,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-plugins"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
|
||||
[[package]]
|
||||
name = "common-procedure"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
@@ -1972,7 +2020,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-procedure-test"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-procedure",
|
||||
@@ -1980,7 +2028,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-query"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -2003,7 +2051,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-recordbatch"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"common-base",
|
||||
@@ -2023,7 +2071,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-runtime"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"common-error",
|
||||
@@ -2043,7 +2091,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-telemetry"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"backtrace",
|
||||
@@ -2071,7 +2119,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-test-util"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"client",
|
||||
"common-query",
|
||||
@@ -2083,7 +2131,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-time"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"chrono",
|
||||
@@ -2099,14 +2147,14 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "common-version"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"build-data",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "common-wal"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-error",
|
||||
@@ -2754,7 +2802,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datanode"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -2812,7 +2860,7 @@ dependencies = [
|
||||
"snafu",
|
||||
"sql",
|
||||
"store-api",
|
||||
"substrait 0.7.0",
|
||||
"substrait 0.7.1",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
@@ -2826,7 +2874,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "datatypes"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"arrow-array",
|
||||
@@ -2912,6 +2960,17 @@ dependencies = [
|
||||
"syn 2.0.43",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_arbitrary"
|
||||
version = "1.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.43",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_builder"
|
||||
version = "0.11.2"
|
||||
@@ -3302,7 +3361,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "file-engine"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -3403,7 +3462,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "flow"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"bimap",
|
||||
@@ -3415,10 +3474,12 @@ dependencies = [
|
||||
"common-telemetry",
|
||||
"common-time",
|
||||
"datatypes",
|
||||
"enum_dispatch",
|
||||
"hydroflow",
|
||||
"itertools 0.10.5",
|
||||
"num-traits",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"servers",
|
||||
"session",
|
||||
"snafu",
|
||||
@@ -3458,7 +3519,7 @@ checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa"
|
||||
|
||||
[[package]]
|
||||
name = "frontend"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -3522,7 +3583,7 @@ dependencies = [
|
||||
"sqlparser 0.38.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=6a93567ae38d42be5c8d08b13c8ff4dde26502ef)",
|
||||
"store-api",
|
||||
"strfmt",
|
||||
"substrait 0.7.0",
|
||||
"substrait 0.7.1",
|
||||
"table",
|
||||
"tokio",
|
||||
"toml 0.8.8",
|
||||
@@ -4291,7 +4352,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "index"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"asynchronous-codec",
|
||||
@@ -4406,6 +4467,16 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "inout"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5"
|
||||
dependencies = [
|
||||
"block-padding",
|
||||
"generic-array",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "instant"
|
||||
version = "0.1.12"
|
||||
@@ -4455,11 +4526,12 @@ checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3"
|
||||
|
||||
[[package]]
|
||||
name = "iri-string"
|
||||
version = "0.4.1"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8f0f7638c1e223529f1bfdc48c8b133b9e0b434094d1d28473161ee48b235f78"
|
||||
checksum = "21859b667d66a4c1dacd9df0863b3efb65785474255face87f5bca39dd8407c0"
|
||||
dependencies = [
|
||||
"nom",
|
||||
"memchr",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4746,9 +4818,20 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.151"
|
||||
version = "0.2.153"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4"
|
||||
checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
|
||||
|
||||
[[package]]
|
||||
name = "libfuzzer-sys"
|
||||
version = "0.4.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a96cfd5557eb82f2b83fed4955246c988d331975a002961b07c81584d107e7f7"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
"cc",
|
||||
"once_cell",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libgit2-sys"
|
||||
@@ -4848,7 +4931,7 @@ checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
|
||||
|
||||
[[package]]
|
||||
name = "log-store"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
@@ -5137,7 +5220,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meta-client"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -5167,7 +5250,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meta-srv"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"anymap",
|
||||
"api",
|
||||
@@ -5247,7 +5330,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "metric-engine"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"aquamarine",
|
||||
@@ -5319,7 +5402,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "mito2"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"anymap",
|
||||
"api",
|
||||
@@ -5933,7 +6016,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "object-store"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
@@ -5989,9 +6072,9 @@ checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
|
||||
|
||||
[[package]]
|
||||
name = "opendal"
|
||||
version = "0.44.2"
|
||||
version = "0.45.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4af824652d4d2ffabf606d337a071677ae621b05622adf35df9562f69d9b4498"
|
||||
checksum = "52c17c077f23fa2d2c25d9d22af98baa43b8bbe2ef0de80cf66339aa70401467"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
@@ -6007,7 +6090,7 @@ dependencies = [
|
||||
"md-5",
|
||||
"once_cell",
|
||||
"percent-encoding",
|
||||
"quick-xml 0.30.0",
|
||||
"quick-xml 0.31.0",
|
||||
"reqsign",
|
||||
"reqwest",
|
||||
"serde",
|
||||
@@ -6176,7 +6259,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "operator"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -6223,7 +6306,7 @@ dependencies = [
|
||||
"sql",
|
||||
"sqlparser 0.38.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=6a93567ae38d42be5c8d08b13c8ff4dde26502ef)",
|
||||
"store-api",
|
||||
"substrait 0.7.0",
|
||||
"substrait 0.7.1",
|
||||
"table",
|
||||
"tokio",
|
||||
"tonic 0.10.2",
|
||||
@@ -6454,7 +6537,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "partition"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"async-trait",
|
||||
@@ -6500,6 +6583,16 @@ version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8835116a5c179084a830efb3adc117ab007512b535bc1a21c991d3b32a6b44dd"
|
||||
|
||||
[[package]]
|
||||
name = "pbkdf2"
|
||||
version = "0.12.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f8ed6a7761f76e3b9f92dfb0a60a6a6477c61024b775147ff0973a02653abaf2"
|
||||
dependencies = [
|
||||
"digest",
|
||||
"hmac",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "peeking_take_while"
|
||||
version = "0.1.2"
|
||||
@@ -6540,6 +6633,12 @@ version = "2.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
|
||||
|
||||
[[package]]
|
||||
name = "permutation"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "df202b0b0f5b8e389955afd5f27b007b00fb948162953f1db9c70d2c7e3157d7"
|
||||
|
||||
[[package]]
|
||||
name = "pest"
|
||||
version = "2.7.5"
|
||||
@@ -6724,6 +6823,21 @@ dependencies = [
|
||||
"spki 0.7.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pkcs5"
|
||||
version = "0.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e847e2c91a18bfa887dd028ec33f2fe6f25db77db3619024764914affe8b69a6"
|
||||
dependencies = [
|
||||
"aes",
|
||||
"cbc",
|
||||
"der 0.7.8",
|
||||
"pbkdf2",
|
||||
"scrypt",
|
||||
"sha2",
|
||||
"spki 0.7.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pkcs8"
|
||||
version = "0.8.0"
|
||||
@@ -6742,6 +6856,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7"
|
||||
dependencies = [
|
||||
"der 0.7.8",
|
||||
"pkcs5",
|
||||
"rand_core",
|
||||
"spki 0.7.3",
|
||||
]
|
||||
|
||||
@@ -6781,7 +6897,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "plugins"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"auth",
|
||||
"common-base",
|
||||
@@ -7048,7 +7164,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "promql"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"ahash 0.8.6",
|
||||
"async-recursion",
|
||||
@@ -7259,7 +7375,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "puffin"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bitflags 2.4.1",
|
||||
@@ -7380,7 +7496,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "query"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"ahash 0.8.6",
|
||||
"api",
|
||||
@@ -7441,7 +7557,7 @@ dependencies = [
|
||||
"stats-cli",
|
||||
"store-api",
|
||||
"streaming-stats",
|
||||
"substrait 0.7.0",
|
||||
"substrait 0.7.1",
|
||||
"table",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
@@ -7456,16 +7572,6 @@ dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quick-xml"
|
||||
version = "0.30.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eff6510e86862b57b210fd8cbe8ed3f0d7d600b9c2863cd4549a2e033c66e956"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quick-xml"
|
||||
version = "0.31.0"
|
||||
@@ -7748,9 +7854,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "reqsign"
|
||||
version = "0.14.6"
|
||||
version = "0.14.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dce87f66ba6c6acef277a729f989a0eca946cb9ce6a15bcc036bda0f72d4b9fd"
|
||||
checksum = "43e319d9de9ff4d941abf4ac718897118b0fe04577ea3f8e0f5788971784eef5"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
@@ -7775,7 +7881,6 @@ dependencies = [
|
||||
"serde_json",
|
||||
"sha1",
|
||||
"sha2",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -7968,6 +8073,7 @@ dependencies = [
|
||||
"pkcs1 0.7.5",
|
||||
"pkcs8 0.10.2",
|
||||
"rand_core",
|
||||
"sha2",
|
||||
"signature",
|
||||
"spki 0.7.3",
|
||||
"subtle",
|
||||
@@ -8702,6 +8808,15 @@ dependencies = [
|
||||
"bytemuck",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "salsa20"
|
||||
version = "0.10.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "97a22f5af31f73a954c10289c93e8a50cc23d971e80ee446f1f6f7137a088213"
|
||||
dependencies = [
|
||||
"cipher",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "same-file"
|
||||
version = "1.0.6"
|
||||
@@ -8759,7 +8874,7 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||
|
||||
[[package]]
|
||||
name = "script"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -8815,6 +8930,17 @@ dependencies = [
|
||||
"tokio-test",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "scrypt"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0516a385866c09368f0b5bcd1caff3366aace790fcd46e2bb032697bb172fd1f"
|
||||
dependencies = [
|
||||
"pbkdf2",
|
||||
"salsa20",
|
||||
"sha2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sct"
|
||||
version = "0.7.1"
|
||||
@@ -9032,7 +9158,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "servers"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"aide",
|
||||
"api",
|
||||
@@ -9074,6 +9200,7 @@ dependencies = [
|
||||
"derive_builder 0.12.0",
|
||||
"digest",
|
||||
"futures",
|
||||
"hashbrown 0.14.3",
|
||||
"headers",
|
||||
"hex",
|
||||
"hostname",
|
||||
@@ -9092,6 +9219,7 @@ dependencies = [
|
||||
"opensrv-mysql",
|
||||
"opentelemetry-proto 0.3.0",
|
||||
"parking_lot 0.12.1",
|
||||
"permutation",
|
||||
"pgwire",
|
||||
"pin-project",
|
||||
"postgres-types",
|
||||
@@ -9136,7 +9264,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "session"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arc-swap",
|
||||
@@ -9406,7 +9534,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sql"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"common-base",
|
||||
@@ -9458,7 +9586,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "sqlness-runner"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"clap 4.4.11",
|
||||
@@ -9665,7 +9793,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "store-api"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"aquamarine",
|
||||
@@ -9805,7 +9933,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "substrait"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"async-recursion",
|
||||
"async-trait",
|
||||
@@ -9978,7 +10106,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "table"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"anymap",
|
||||
"async-trait",
|
||||
@@ -10090,17 +10218,21 @@ checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76"
|
||||
|
||||
[[package]]
|
||||
name = "tests-fuzz"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
"async-trait",
|
||||
"common-error",
|
||||
"common-macro",
|
||||
"common-query",
|
||||
"common-runtime",
|
||||
"common-telemetry",
|
||||
"common-time",
|
||||
"datatypes",
|
||||
"derive_builder 0.12.0",
|
||||
"dotenv",
|
||||
"lazy_static",
|
||||
"libfuzzer-sys",
|
||||
"partition",
|
||||
"rand",
|
||||
"rand_chacha",
|
||||
@@ -10115,7 +10247,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tests-integration"
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
dependencies = [
|
||||
"api",
|
||||
"arrow-flight",
|
||||
@@ -10172,7 +10304,7 @@ dependencies = [
|
||||
"sql",
|
||||
"sqlx",
|
||||
"store-api",
|
||||
"substrait 0.7.0",
|
||||
"substrait 0.7.1",
|
||||
"table",
|
||||
"tempfile",
|
||||
"time",
|
||||
@@ -10731,13 +10863,13 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tower-http"
|
||||
version = "0.3.5"
|
||||
version = "0.4.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f873044bf02dd1e8239e9c1293ea39dad76dc594ec16185d0a1bf31d8dc8d858"
|
||||
checksum = "61c5bb1d698276a2443e5ecfabc1008bf15a36c12e6a7176e7bf089ea9131140"
|
||||
dependencies = [
|
||||
"async-compression 0.3.15",
|
||||
"base64 0.13.1",
|
||||
"bitflags 1.3.2",
|
||||
"async-compression 0.4.5",
|
||||
"base64 0.21.5",
|
||||
"bitflags 2.4.1",
|
||||
"bytes",
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
|
||||
@@ -62,7 +62,7 @@ members = [
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
version = "0.7.0"
|
||||
version = "0.7.1"
|
||||
edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
|
||||
@@ -134,7 +134,7 @@ reqwest = { version = "0.11", default-features = false, features = [
|
||||
rskafka = "0.5"
|
||||
rust_decimal = "1.33"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
serde_json = { version = "1.0", features = ["float_roundtrip"] }
|
||||
serde_with = "3"
|
||||
smallvec = { version = "1", features = ["serde"] }
|
||||
snafu = "0.7"
|
||||
|
||||
5
Makefile
5
Makefile
@@ -3,6 +3,7 @@ CARGO_PROFILE ?=
|
||||
FEATURES ?=
|
||||
TARGET_DIR ?=
|
||||
TARGET ?=
|
||||
BUILD_BIN ?= greptime
|
||||
CARGO_BUILD_OPTS := --locked
|
||||
IMAGE_REGISTRY ?= docker.io
|
||||
IMAGE_NAMESPACE ?= greptime
|
||||
@@ -45,6 +46,10 @@ ifneq ($(strip $(TARGET)),)
|
||||
CARGO_BUILD_OPTS += --target ${TARGET}
|
||||
endif
|
||||
|
||||
ifneq ($(strip $(BUILD_BIN)),)
|
||||
CARGO_BUILD_OPTS += --bin ${BUILD_BIN}
|
||||
endif
|
||||
|
||||
ifneq ($(strip $(RELEASE)),)
|
||||
CARGO_BUILD_OPTS += --release
|
||||
endif
|
||||
|
||||
@@ -29,7 +29,7 @@ use client::api::v1::column::Values;
|
||||
use client::api::v1::{
|
||||
Column, ColumnDataType, ColumnDef, CreateTableExpr, InsertRequest, InsertRequests, SemanticType,
|
||||
};
|
||||
use client::{Client, Database, Output, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use client::{Client, Database, OutputData, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use futures_util::TryStreamExt;
|
||||
use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
|
||||
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
|
||||
@@ -502,9 +502,9 @@ async fn do_query(num_iter: usize, db: &Database, table_name: &str) {
|
||||
for i in 0..num_iter {
|
||||
let now = Instant::now();
|
||||
let res = db.sql(&query).await.unwrap();
|
||||
match res {
|
||||
Output::AffectedRows(_) | Output::RecordBatches(_) => (),
|
||||
Output::Stream(stream, _) => {
|
||||
match res.data {
|
||||
OutputData::AffectedRows(_) | OutputData::RecordBatches(_) => (),
|
||||
OutputData::Stream(stream) => {
|
||||
stream.try_collect::<Vec<_>>().await.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
50
docs/benchmarks/tsbs/v0.7.0.md
Normal file
50
docs/benchmarks/tsbs/v0.7.0.md
Normal file
@@ -0,0 +1,50 @@
|
||||
# TSBS benchmark - v0.7.0
|
||||
|
||||
## Environment
|
||||
|
||||
### Local
|
||||
| | |
|
||||
| ------ | ---------------------------------- |
|
||||
| CPU | AMD Ryzen 7 7735HS (8 core 3.2GHz) |
|
||||
| Memory | 32GB |
|
||||
| Disk | SOLIDIGM SSDPFKNU010TZ |
|
||||
| OS | Ubuntu 22.04.2 LTS |
|
||||
|
||||
### Amazon EC2
|
||||
|
||||
| | |
|
||||
| ------- | -------------- |
|
||||
| Machine | c5d.2xlarge |
|
||||
| CPU | 8 core |
|
||||
| Memory | 16GB |
|
||||
| Disk | 50GB (GP3) |
|
||||
| OS | Ubuntu 22.04.1 |
|
||||
|
||||
|
||||
## Write performance
|
||||
|
||||
| Environment | Ingest rate (rows/s) |
|
||||
| ------------------ | --------------------- |
|
||||
| Local | 3695814.64 |
|
||||
| EC2 c5d.2xlarge | 2987166.64 |
|
||||
|
||||
|
||||
## Query performance
|
||||
|
||||
| Query type | Local (ms) | EC2 c5d.2xlarge (ms) |
|
||||
| --------------------- | ---------- | ---------------------- |
|
||||
| cpu-max-all-1 | 30.56 | 54.74 |
|
||||
| cpu-max-all-8 | 52.69 | 70.50 |
|
||||
| double-groupby-1 | 664.30 | 1366.63 |
|
||||
| double-groupby-5 | 1391.26 | 2141.71 |
|
||||
| double-groupby-all | 2828.94 | 3389.59 |
|
||||
| groupby-orderby-limit | 718.92 | 1213.90 |
|
||||
| high-cpu-1 | 29.21 | 52.98 |
|
||||
| high-cpu-all | 5514.12 | 7194.91 |
|
||||
| lastpoint | 7571.40 | 9423.41 |
|
||||
| single-groupby-1-1-1 | 19.09 | 7.77 |
|
||||
| single-groupby-1-1-12 | 27.28 | 51.64 |
|
||||
| single-groupby-1-8-1 | 31.85 | 11.64 |
|
||||
| single-groupby-5-1-1 | 16.14 | 9.67 |
|
||||
| single-groupby-5-1-12 | 27.21 | 53.62 |
|
||||
| single-groupby-5-8-1 | 39.62 | 14.96 |
|
||||
@@ -79,7 +79,7 @@ This RFC proposes to add a new expression node `MergeScan` to merge result from
|
||||
│ │ │ │
|
||||
└─Frontend──────┘ └─Remote-Sources──────────────┘
|
||||
```
|
||||
This merge operation simply chains all the the underlying remote data sources and return `RecordBatch`, just like a coalesce op. And each remote sources is a gRPC query to datanode via the substrait logical plan interface. The plan is transformed and divided from the original query that comes to frontend.
|
||||
This merge operation simply chains all the underlying remote data sources and return `RecordBatch`, just like a coalesce op. And each remote sources is a gRPC query to datanode via the substrait logical plan interface. The plan is transformed and divided from the original query that comes to frontend.
|
||||
|
||||
## Commutativity of MergeScan
|
||||
|
||||
|
||||
@@ -307,7 +307,7 @@ impl Database {
|
||||
reason: "Expect 'AffectedRows' Flight messages to be the one and the only!"
|
||||
}
|
||||
);
|
||||
Ok(Output::AffectedRows(rows))
|
||||
Ok(Output::new_with_affected_rows(rows))
|
||||
}
|
||||
FlightMessage::Recordbatch(_) | FlightMessage::Metrics(_) => {
|
||||
IllegalFlightMessagesSnafu {
|
||||
@@ -340,7 +340,7 @@ impl Database {
|
||||
output_ordering: None,
|
||||
metrics: Default::default(),
|
||||
};
|
||||
Ok(Output::new_stream(Box::pin(record_batch_stream)))
|
||||
Ok(Output::new_with_stream(Box::pin(record_batch_stream)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,7 +26,7 @@ use api::v1::greptime_response::Response;
|
||||
use api::v1::{AffectedRows, GreptimeResponse};
|
||||
pub use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_error::status_code::StatusCode;
|
||||
pub use common_query::Output;
|
||||
pub use common_query::{Output, OutputData, OutputMeta};
|
||||
pub use common_recordbatch::{RecordBatches, SendableRecordBatchStream};
|
||||
use snafu::OptionExt;
|
||||
|
||||
|
||||
@@ -62,7 +62,9 @@ pub struct BenchTableMetadataCommand {
|
||||
|
||||
impl BenchTableMetadataCommand {
|
||||
pub async fn build(&self) -> Result<Instance> {
|
||||
let etcd_store = EtcdStore::with_endpoints([&self.etcd_addr]).await.unwrap();
|
||||
let etcd_store = EtcdStore::with_endpoints([&self.etcd_addr], 128)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let table_metadata_manager = Arc::new(TableMetadataManager::new(etcd_store));
|
||||
|
||||
|
||||
@@ -19,8 +19,7 @@ use async_trait::async_trait;
|
||||
use clap::{Parser, ValueEnum};
|
||||
use client::api::v1::auth_header::AuthScheme;
|
||||
use client::api::v1::Basic;
|
||||
use client::{Client, Database, DEFAULT_SCHEMA_NAME};
|
||||
use common_query::Output;
|
||||
use client::{Client, Database, OutputData, DEFAULT_SCHEMA_NAME};
|
||||
use common_recordbatch::util::collect;
|
||||
use common_telemetry::{debug, error, info, warn};
|
||||
use datatypes::scalars::ScalarVector;
|
||||
@@ -142,7 +141,7 @@ impl Export {
|
||||
.with_context(|_| RequestDatabaseSnafu {
|
||||
sql: "show databases".to_string(),
|
||||
})?;
|
||||
let Output::Stream(stream, _) = result else {
|
||||
let OutputData::Stream(stream) = result.data else {
|
||||
NotDataFromOutputSnafu.fail()?
|
||||
};
|
||||
let record_batch = collect(stream)
|
||||
@@ -183,7 +182,7 @@ impl Export {
|
||||
.sql(&sql)
|
||||
.await
|
||||
.with_context(|_| RequestDatabaseSnafu { sql })?;
|
||||
let Output::Stream(stream, _) = result else {
|
||||
let OutputData::Stream(stream) = result.data else {
|
||||
NotDataFromOutputSnafu.fail()?
|
||||
};
|
||||
let Some(record_batch) = collect(stream)
|
||||
@@ -235,7 +234,7 @@ impl Export {
|
||||
.sql(&sql)
|
||||
.await
|
||||
.with_context(|_| RequestDatabaseSnafu { sql })?;
|
||||
let Output::Stream(stream, _) = result else {
|
||||
let OutputData::Stream(stream) = result.data else {
|
||||
NotDataFromOutputSnafu.fail()?
|
||||
};
|
||||
let record_batch = collect(stream)
|
||||
|
||||
@@ -19,7 +19,7 @@ use std::time::Instant;
|
||||
use catalog::kvbackend::{
|
||||
CachedMetaKvBackend, CachedMetaKvBackendBuilder, KvBackendCatalogManager,
|
||||
};
|
||||
use client::{Client, Database, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use client::{Client, Database, OutputData, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_base::Plugins;
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_query::Output;
|
||||
@@ -184,15 +184,15 @@ impl Repl {
|
||||
}
|
||||
.context(RequestDatabaseSnafu { sql: &sql })?;
|
||||
|
||||
let either = match output {
|
||||
Output::Stream(s, _) => {
|
||||
let either = match output.data {
|
||||
OutputData::Stream(s) => {
|
||||
let x = RecordBatches::try_collect(s)
|
||||
.await
|
||||
.context(CollectRecordBatchesSnafu)?;
|
||||
Either::Left(x)
|
||||
}
|
||||
Output::RecordBatches(x) => Either::Left(x),
|
||||
Output::AffectedRows(rows) => Either::Right(rows),
|
||||
OutputData::RecordBatches(x) => Either::Left(x),
|
||||
OutputData::AffectedRows(rows) => Either::Right(rows),
|
||||
};
|
||||
|
||||
let end = Instant::now();
|
||||
|
||||
@@ -70,7 +70,7 @@ impl UpgradeCommand {
|
||||
etcd_addr: &self.etcd_addr,
|
||||
})?;
|
||||
let tool = MigrateTableMetadata {
|
||||
etcd_store: EtcdStore::with_etcd_client(client),
|
||||
etcd_store: EtcdStore::with_etcd_client(client, 128),
|
||||
dryrun: self.dryrun,
|
||||
skip_catalog_keys: self.skip_catalog_keys,
|
||||
skip_table_global_keys: self.skip_table_global_keys,
|
||||
|
||||
@@ -117,10 +117,12 @@ struct StartCommand {
|
||||
/// The working home directory of this metasrv instance.
|
||||
#[clap(long)]
|
||||
data_home: Option<String>,
|
||||
|
||||
/// If it's not empty, the metasrv will store all data with this key prefix.
|
||||
#[clap(long, default_value = "")]
|
||||
store_key_prefix: String,
|
||||
/// The max operations per txn
|
||||
#[clap(long)]
|
||||
max_txn_ops: Option<usize>,
|
||||
}
|
||||
|
||||
impl StartCommand {
|
||||
@@ -181,6 +183,10 @@ impl StartCommand {
|
||||
opts.store_key_prefix = self.store_key_prefix.clone()
|
||||
}
|
||||
|
||||
if let Some(max_txn_ops) = self.max_txn_ops {
|
||||
opts.max_txn_ops = max_txn_ops;
|
||||
}
|
||||
|
||||
// Disable dashboard in metasrv.
|
||||
opts.http.disable_dashboard = true;
|
||||
|
||||
|
||||
@@ -28,12 +28,15 @@ const REGION: &str = "region";
|
||||
const ENABLE_VIRTUAL_HOST_STYLE: &str = "enable_virtual_host_style";
|
||||
|
||||
pub fn is_supported_in_s3(key: &str) -> bool {
|
||||
key == ENDPOINT
|
||||
|| key == ACCESS_KEY_ID
|
||||
|| key == SECRET_ACCESS_KEY
|
||||
|| key == SESSION_TOKEN
|
||||
|| key == REGION
|
||||
|| key == ENABLE_VIRTUAL_HOST_STYLE
|
||||
[
|
||||
ENDPOINT,
|
||||
ACCESS_KEY_ID,
|
||||
SECRET_ACCESS_KEY,
|
||||
SESSION_TOKEN,
|
||||
REGION,
|
||||
ENABLE_VIRTUAL_HOST_STYLE,
|
||||
]
|
||||
.contains(&key)
|
||||
}
|
||||
|
||||
pub fn build_s3_backend(
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod clamp;
|
||||
mod modulo;
|
||||
mod pow;
|
||||
mod rate;
|
||||
@@ -19,6 +20,7 @@ mod rate;
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
|
||||
pub use clamp::ClampFunction;
|
||||
use common_query::error::{GeneralDataFusionSnafu, Result};
|
||||
use common_query::prelude::Signature;
|
||||
use datafusion::error::DataFusionError;
|
||||
@@ -40,7 +42,8 @@ impl MathFunction {
|
||||
registry.register(Arc::new(ModuloFunction));
|
||||
registry.register(Arc::new(PowFunction));
|
||||
registry.register(Arc::new(RateFunction));
|
||||
registry.register(Arc::new(RangeFunction))
|
||||
registry.register(Arc::new(RangeFunction));
|
||||
registry.register(Arc::new(ClampFunction));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
403
src/common/function/src/scalars/math/clamp.rs
Normal file
403
src/common/function/src/scalars/math/clamp.rs
Normal file
@@ -0,0 +1,403 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt::{self, Display};
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::error::{InvalidFuncArgsSnafu, Result};
|
||||
use common_query::prelude::Signature;
|
||||
use datafusion::arrow::array::{ArrayIter, PrimitiveArray};
|
||||
use datafusion::logical_expr::Volatility;
|
||||
use datatypes::data_type::{ConcreteDataType, DataType};
|
||||
use datatypes::prelude::VectorRef;
|
||||
use datatypes::types::LogicalPrimitiveType;
|
||||
use datatypes::value::TryAsPrimitive;
|
||||
use datatypes::vectors::PrimitiveVector;
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use snafu::{ensure, OptionExt};
|
||||
|
||||
use crate::function::Function;
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct ClampFunction;
|
||||
|
||||
const CLAMP_NAME: &str = "clamp";
|
||||
|
||||
impl Function for ClampFunction {
|
||||
fn name(&self) -> &str {
|
||||
CLAMP_NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
// Type check is done by `signature`
|
||||
Ok(input_types[0].clone())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
// input, min, max
|
||||
Signature::uniform(3, ConcreteDataType::numerics(), Volatility::Immutable)
|
||||
}
|
||||
|
||||
fn eval(
|
||||
&self,
|
||||
_func_ctx: crate::function::FunctionContext,
|
||||
columns: &[VectorRef],
|
||||
) -> Result<VectorRef> {
|
||||
ensure!(
|
||||
columns.len() == 3,
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the args is not correct, expect exactly 3, have: {}",
|
||||
columns.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
ensure!(
|
||||
columns[0].data_type().is_numeric(),
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The first arg's type is not numeric, have: {}",
|
||||
columns[0].data_type()
|
||||
),
|
||||
}
|
||||
);
|
||||
ensure!(
|
||||
columns[0].data_type() == columns[1].data_type()
|
||||
&& columns[1].data_type() == columns[2].data_type(),
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"Arguments don't have identical types: {}, {}, {}",
|
||||
columns[0].data_type(),
|
||||
columns[1].data_type(),
|
||||
columns[2].data_type()
|
||||
),
|
||||
}
|
||||
);
|
||||
ensure!(
|
||||
columns[1].len() == 1 && columns[2].len() == 1,
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The second and third args should be scalar, have: {:?}, {:?}",
|
||||
columns[1], columns[2]
|
||||
),
|
||||
}
|
||||
);
|
||||
|
||||
with_match_primitive_type_id!(columns[0].data_type().logical_type_id(), |$S| {
|
||||
let input_array = columns[0].to_arrow_array();
|
||||
let input = input_array
|
||||
.as_any()
|
||||
.downcast_ref::<PrimitiveArray<<$S as LogicalPrimitiveType>::ArrowPrimitive>>()
|
||||
.unwrap();
|
||||
|
||||
let min = TryAsPrimitive::<$S>::try_as_primitive(&columns[1].get(0))
|
||||
.with_context(|| {
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: "The second arg should not be none",
|
||||
}
|
||||
})?;
|
||||
let max = TryAsPrimitive::<$S>::try_as_primitive(&columns[2].get(0))
|
||||
.with_context(|| {
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: "The third arg should not be none",
|
||||
}
|
||||
})?;
|
||||
|
||||
// ensure min <= max
|
||||
ensure!(
|
||||
min <= max,
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The second arg should be less than or equal to the third arg, have: {:?}, {:?}",
|
||||
columns[1], columns[2]
|
||||
),
|
||||
}
|
||||
);
|
||||
|
||||
clamp_impl::<$S, true, true>(input, min, max)
|
||||
},{
|
||||
unreachable!()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for ClampFunction {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}", CLAMP_NAME.to_ascii_uppercase())
|
||||
}
|
||||
}
|
||||
|
||||
fn clamp_impl<T: LogicalPrimitiveType, const CLAMP_MIN: bool, const CLAMP_MAX: bool>(
|
||||
input: &PrimitiveArray<T::ArrowPrimitive>,
|
||||
min: T::Native,
|
||||
max: T::Native,
|
||||
) -> Result<VectorRef> {
|
||||
common_telemetry::info!("[DEBUG] min {min:?}, max {max:?}");
|
||||
|
||||
let iter = ArrayIter::new(input);
|
||||
let result = iter.map(|x| {
|
||||
x.map(|x| {
|
||||
if CLAMP_MIN && x < min {
|
||||
min
|
||||
} else if CLAMP_MAX && x > max {
|
||||
max
|
||||
} else {
|
||||
x
|
||||
}
|
||||
})
|
||||
});
|
||||
let result = PrimitiveArray::<T::ArrowPrimitive>::from_iter(result);
|
||||
Ok(Arc::new(PrimitiveVector::<T>::from(result)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use datatypes::prelude::ScalarVector;
|
||||
use datatypes::vectors::{
|
||||
ConstantVector, Float64Vector, Int64Vector, StringVector, UInt64Vector,
|
||||
};
|
||||
|
||||
use super::*;
|
||||
use crate::function::FunctionContext;
|
||||
|
||||
#[test]
|
||||
fn clamp_i64() {
|
||||
let inputs = [
|
||||
(
|
||||
vec![Some(-3), Some(-2), Some(-1), Some(0), Some(1), Some(2)],
|
||||
-1,
|
||||
10,
|
||||
vec![Some(-1), Some(-1), Some(-1), Some(0), Some(1), Some(2)],
|
||||
),
|
||||
(
|
||||
vec![Some(-3), Some(-2), Some(-1), Some(0), Some(1), Some(2)],
|
||||
0,
|
||||
0,
|
||||
vec![Some(0), Some(0), Some(0), Some(0), Some(0), Some(0)],
|
||||
),
|
||||
(
|
||||
vec![Some(-3), None, Some(-1), None, None, Some(2)],
|
||||
-2,
|
||||
1,
|
||||
vec![Some(-2), None, Some(-1), None, None, Some(1)],
|
||||
),
|
||||
(
|
||||
vec![None, None, None, None, None],
|
||||
0,
|
||||
1,
|
||||
vec![None, None, None, None, None],
|
||||
),
|
||||
];
|
||||
|
||||
let func = ClampFunction;
|
||||
for (in_data, min, max, expected) in inputs {
|
||||
let args = [
|
||||
Arc::new(Int64Vector::from(in_data)) as _,
|
||||
Arc::new(Int64Vector::from_vec(vec![min])) as _,
|
||||
Arc::new(Int64Vector::from_vec(vec![max])) as _,
|
||||
];
|
||||
let result = func
|
||||
.eval(FunctionContext::default(), args.as_slice())
|
||||
.unwrap();
|
||||
let expected: VectorRef = Arc::new(Int64Vector::from(expected));
|
||||
assert_eq!(expected, result);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn clamp_u64() {
|
||||
let inputs = [
|
||||
(
|
||||
vec![Some(0), Some(1), Some(2), Some(3), Some(4), Some(5)],
|
||||
1,
|
||||
3,
|
||||
vec![Some(1), Some(1), Some(2), Some(3), Some(3), Some(3)],
|
||||
),
|
||||
(
|
||||
vec![Some(0), Some(1), Some(2), Some(3), Some(4), Some(5)],
|
||||
0,
|
||||
0,
|
||||
vec![Some(0), Some(0), Some(0), Some(0), Some(0), Some(0)],
|
||||
),
|
||||
(
|
||||
vec![Some(0), None, Some(2), None, None, Some(5)],
|
||||
1,
|
||||
3,
|
||||
vec![Some(1), None, Some(2), None, None, Some(3)],
|
||||
),
|
||||
(
|
||||
vec![None, None, None, None, None],
|
||||
0,
|
||||
1,
|
||||
vec![None, None, None, None, None],
|
||||
),
|
||||
];
|
||||
|
||||
let func = ClampFunction;
|
||||
for (in_data, min, max, expected) in inputs {
|
||||
let args = [
|
||||
Arc::new(UInt64Vector::from(in_data)) as _,
|
||||
Arc::new(UInt64Vector::from_vec(vec![min])) as _,
|
||||
Arc::new(UInt64Vector::from_vec(vec![max])) as _,
|
||||
];
|
||||
let result = func
|
||||
.eval(FunctionContext::default(), args.as_slice())
|
||||
.unwrap();
|
||||
let expected: VectorRef = Arc::new(UInt64Vector::from(expected));
|
||||
assert_eq!(expected, result);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn clamp_f64() {
|
||||
let inputs = [
|
||||
(
|
||||
vec![Some(-3.0), Some(-2.0), Some(-1.0), Some(0.0), Some(1.0)],
|
||||
-1.0,
|
||||
10.0,
|
||||
vec![Some(-1.0), Some(-1.0), Some(-1.0), Some(0.0), Some(1.0)],
|
||||
),
|
||||
(
|
||||
vec![Some(-2.0), Some(-1.0), Some(0.0), Some(1.0)],
|
||||
0.0,
|
||||
0.0,
|
||||
vec![Some(0.0), Some(0.0), Some(0.0), Some(0.0)],
|
||||
),
|
||||
(
|
||||
vec![Some(-3.0), None, Some(-1.0), None, None, Some(2.0)],
|
||||
-2.0,
|
||||
1.0,
|
||||
vec![Some(-2.0), None, Some(-1.0), None, None, Some(1.0)],
|
||||
),
|
||||
(
|
||||
vec![None, None, None, None, None],
|
||||
0.0,
|
||||
1.0,
|
||||
vec![None, None, None, None, None],
|
||||
),
|
||||
];
|
||||
|
||||
let func = ClampFunction;
|
||||
for (in_data, min, max, expected) in inputs {
|
||||
let args = [
|
||||
Arc::new(Float64Vector::from(in_data)) as _,
|
||||
Arc::new(Float64Vector::from_vec(vec![min])) as _,
|
||||
Arc::new(Float64Vector::from_vec(vec![max])) as _,
|
||||
];
|
||||
let result = func
|
||||
.eval(FunctionContext::default(), args.as_slice())
|
||||
.unwrap();
|
||||
let expected: VectorRef = Arc::new(Float64Vector::from(expected));
|
||||
assert_eq!(expected, result);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn clamp_const_i32() {
|
||||
let input = vec![Some(5)];
|
||||
let min = 2;
|
||||
let max = 4;
|
||||
|
||||
let func = ClampFunction;
|
||||
let args = [
|
||||
Arc::new(ConstantVector::new(Arc::new(Int64Vector::from(input)), 1)) as _,
|
||||
Arc::new(Int64Vector::from_vec(vec![min])) as _,
|
||||
Arc::new(Int64Vector::from_vec(vec![max])) as _,
|
||||
];
|
||||
let result = func
|
||||
.eval(FunctionContext::default(), args.as_slice())
|
||||
.unwrap();
|
||||
let expected: VectorRef = Arc::new(Int64Vector::from(vec![Some(4)]));
|
||||
assert_eq!(expected, result);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn clamp_invalid_min_max() {
|
||||
let input = vec![Some(-3.0), Some(-2.0), Some(-1.0), Some(0.0), Some(1.0)];
|
||||
let min = 10.0;
|
||||
let max = -1.0;
|
||||
|
||||
let func = ClampFunction;
|
||||
let args = [
|
||||
Arc::new(Float64Vector::from(input)) as _,
|
||||
Arc::new(Float64Vector::from_vec(vec![min])) as _,
|
||||
Arc::new(Float64Vector::from_vec(vec![max])) as _,
|
||||
];
|
||||
let result = func.eval(FunctionContext::default(), args.as_slice());
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn clamp_type_not_match() {
|
||||
let input = vec![Some(-3.0), Some(-2.0), Some(-1.0), Some(0.0), Some(1.0)];
|
||||
let min = -1;
|
||||
let max = 10;
|
||||
|
||||
let func = ClampFunction;
|
||||
let args = [
|
||||
Arc::new(Float64Vector::from(input)) as _,
|
||||
Arc::new(Int64Vector::from_vec(vec![min])) as _,
|
||||
Arc::new(UInt64Vector::from_vec(vec![max])) as _,
|
||||
];
|
||||
let result = func.eval(FunctionContext::default(), args.as_slice());
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn clamp_min_is_not_scalar() {
|
||||
let input = vec![Some(-3.0), Some(-2.0), Some(-1.0), Some(0.0), Some(1.0)];
|
||||
let min = -10.0;
|
||||
let max = 1.0;
|
||||
|
||||
let func = ClampFunction;
|
||||
let args = [
|
||||
Arc::new(Float64Vector::from(input)) as _,
|
||||
Arc::new(Float64Vector::from_vec(vec![min, min])) as _,
|
||||
Arc::new(Float64Vector::from_vec(vec![max])) as _,
|
||||
];
|
||||
let result = func.eval(FunctionContext::default(), args.as_slice());
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn clamp_no_max() {
|
||||
let input = vec![Some(-3.0), Some(-2.0), Some(-1.0), Some(0.0), Some(1.0)];
|
||||
let min = -10.0;
|
||||
|
||||
let func = ClampFunction;
|
||||
let args = [
|
||||
Arc::new(Float64Vector::from(input)) as _,
|
||||
Arc::new(Float64Vector::from_vec(vec![min])) as _,
|
||||
];
|
||||
let result = func.eval(FunctionContext::default(), args.as_slice());
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn clamp_on_string() {
|
||||
let input = vec![Some("foo"), Some("foo"), Some("foo"), Some("foo")];
|
||||
|
||||
let func = ClampFunction;
|
||||
let args = [
|
||||
Arc::new(StringVector::from(input)) as _,
|
||||
Arc::new(StringVector::from_vec(vec!["bar"])) as _,
|
||||
Arc::new(StringVector::from_vec(vec!["baz"])) as _,
|
||||
];
|
||||
let result = func.eval(FunctionContext::default(), args.as_slice());
|
||||
assert!(result.is_err());
|
||||
}
|
||||
}
|
||||
@@ -14,9 +14,11 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
mod greatest;
|
||||
mod to_timezone;
|
||||
mod to_unixtime;
|
||||
|
||||
use greatest::GreatestFunction;
|
||||
use to_timezone::ToTimezoneFunction;
|
||||
use to_unixtime::ToUnixtimeFunction;
|
||||
|
||||
use crate::function_registry::FunctionRegistry;
|
||||
@@ -25,6 +27,7 @@ pub(crate) struct TimestampFunction;
|
||||
|
||||
impl TimestampFunction {
|
||||
pub fn register(registry: &FunctionRegistry) {
|
||||
registry.register(Arc::new(ToTimezoneFunction));
|
||||
registry.register(Arc::new(ToUnixtimeFunction));
|
||||
registry.register(Arc::new(GreatestFunction));
|
||||
}
|
||||
|
||||
260
src/common/function/src/scalars/timestamp/to_timezone.rs
Normal file
260
src/common/function/src/scalars/timestamp/to_timezone.rs
Normal file
@@ -0,0 +1,260 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
|
||||
use common_query::prelude::Signature;
|
||||
use common_time::{Timestamp, Timezone};
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::prelude::VectorRef;
|
||||
use datatypes::types::TimestampType;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{
|
||||
StringVector, TimestampMicrosecondVector, TimestampMillisecondVector,
|
||||
TimestampNanosecondVector, TimestampSecondVector, Vector,
|
||||
};
|
||||
use snafu::{ensure, OptionExt};
|
||||
|
||||
use crate::function::{Function, FunctionContext};
|
||||
use crate::helper;
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct ToTimezoneFunction;
|
||||
|
||||
const NAME: &str = "to_timezone";
|
||||
|
||||
fn convert_to_timezone(arg: &str) -> Option<Timezone> {
|
||||
Timezone::from_tz_string(arg).ok()
|
||||
}
|
||||
|
||||
fn convert_to_timestamp(arg: &Value) -> Option<Timestamp> {
|
||||
match arg {
|
||||
Value::Timestamp(ts) => Some(*ts),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for ToTimezoneFunction {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "TO_TIMEZONE")
|
||||
}
|
||||
}
|
||||
|
||||
impl Function for ToTimezoneFunction {
|
||||
fn name(&self) -> &str {
|
||||
NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
// type checked by signature - MUST BE timestamp
|
||||
Ok(input_types[0].clone())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
helper::one_of_sigs2(
|
||||
vec![
|
||||
ConcreteDataType::timestamp_second_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
ConcreteDataType::timestamp_nanosecond_datatype(),
|
||||
],
|
||||
vec![ConcreteDataType::string_datatype()],
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
ensure!(
|
||||
columns.len() == 2,
|
||||
InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the args is not correct, expect exactly 2, have: {}",
|
||||
columns.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
|
||||
// TODO: maybe support epoch timestamp? https://github.com/GreptimeTeam/greptimedb/issues/3477
|
||||
let ts = columns[0].data_type().as_timestamp().with_context(|| {
|
||||
UnsupportedInputDataTypeSnafu {
|
||||
function: NAME,
|
||||
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
||||
}
|
||||
})?;
|
||||
let array = columns[0].to_arrow_array();
|
||||
let times = match ts {
|
||||
TimestampType::Second(_) => {
|
||||
let vector = TimestampSecondVector::try_from_arrow_array(array).unwrap();
|
||||
(0..vector.len())
|
||||
.map(|i| convert_to_timestamp(&vector.get(i)))
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
TimestampType::Millisecond(_) => {
|
||||
let vector = TimestampMillisecondVector::try_from_arrow_array(array).unwrap();
|
||||
(0..vector.len())
|
||||
.map(|i| convert_to_timestamp(&vector.get(i)))
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
TimestampType::Microsecond(_) => {
|
||||
let vector = TimestampMicrosecondVector::try_from_arrow_array(array).unwrap();
|
||||
(0..vector.len())
|
||||
.map(|i| convert_to_timestamp(&vector.get(i)))
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
TimestampType::Nanosecond(_) => {
|
||||
let vector = TimestampNanosecondVector::try_from_arrow_array(array).unwrap();
|
||||
(0..vector.len())
|
||||
.map(|i| convert_to_timestamp(&vector.get(i)))
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
};
|
||||
|
||||
let tzs = {
|
||||
let array = columns[1].to_arrow_array();
|
||||
let vector = StringVector::try_from_arrow_array(&array)
|
||||
.ok()
|
||||
.with_context(|| UnsupportedInputDataTypeSnafu {
|
||||
function: NAME,
|
||||
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
||||
})?;
|
||||
(0..vector.len())
|
||||
.map(|i| convert_to_timezone(&vector.get(i).to_string()))
|
||||
.collect::<Vec<_>>()
|
||||
};
|
||||
|
||||
let result = times
|
||||
.iter()
|
||||
.zip(tzs.iter())
|
||||
.map(|(time, tz)| match (time, tz) {
|
||||
(Some(time), _) => Some(time.to_timezone_aware_string(tz.as_ref())),
|
||||
_ => None,
|
||||
})
|
||||
.collect::<Vec<Option<String>>>();
|
||||
Ok(Arc::new(StringVector::from(result)))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use datatypes::scalars::ScalarVector;
|
||||
use datatypes::timestamp::{
|
||||
TimestampMicrosecond, TimestampMillisecond, TimestampNanosecond, TimestampSecond,
|
||||
};
|
||||
use datatypes::vectors::StringVector;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_timestamp_to_timezone() {
|
||||
let f = ToTimezoneFunction;
|
||||
assert_eq!("to_timezone", f.name());
|
||||
|
||||
let results = vec![
|
||||
Some("1969-12-31 19:00:01"),
|
||||
None,
|
||||
Some("1970-01-01 03:00:01"),
|
||||
None,
|
||||
];
|
||||
let times: Vec<Option<TimestampSecond>> = vec![
|
||||
Some(TimestampSecond::new(1)),
|
||||
None,
|
||||
Some(TimestampSecond::new(1)),
|
||||
None,
|
||||
];
|
||||
let ts_vector: TimestampSecondVector =
|
||||
TimestampSecondVector::from_owned_iterator(times.into_iter());
|
||||
let tzs = vec![Some("America/New_York"), None, Some("Europe/Moscow"), None];
|
||||
let args: Vec<VectorRef> = vec![
|
||||
Arc::new(ts_vector),
|
||||
Arc::new(StringVector::from(tzs.clone())),
|
||||
];
|
||||
let vector = f.eval(FunctionContext::default(), &args).unwrap();
|
||||
assert_eq!(4, vector.len());
|
||||
let expect_times: VectorRef = Arc::new(StringVector::from(results));
|
||||
assert_eq!(expect_times, vector);
|
||||
|
||||
let results = vec![
|
||||
Some("1969-12-31 19:00:00.001"),
|
||||
None,
|
||||
Some("1970-01-01 03:00:00.001"),
|
||||
None,
|
||||
];
|
||||
let times: Vec<Option<TimestampMillisecond>> = vec![
|
||||
Some(TimestampMillisecond::new(1)),
|
||||
None,
|
||||
Some(TimestampMillisecond::new(1)),
|
||||
None,
|
||||
];
|
||||
let ts_vector: TimestampMillisecondVector =
|
||||
TimestampMillisecondVector::from_owned_iterator(times.into_iter());
|
||||
let args: Vec<VectorRef> = vec![
|
||||
Arc::new(ts_vector),
|
||||
Arc::new(StringVector::from(tzs.clone())),
|
||||
];
|
||||
let vector = f.eval(FunctionContext::default(), &args).unwrap();
|
||||
assert_eq!(4, vector.len());
|
||||
let expect_times: VectorRef = Arc::new(StringVector::from(results));
|
||||
assert_eq!(expect_times, vector);
|
||||
|
||||
let results = vec![
|
||||
Some("1969-12-31 19:00:00.000001"),
|
||||
None,
|
||||
Some("1970-01-01 03:00:00.000001"),
|
||||
None,
|
||||
];
|
||||
let times: Vec<Option<TimestampMicrosecond>> = vec![
|
||||
Some(TimestampMicrosecond::new(1)),
|
||||
None,
|
||||
Some(TimestampMicrosecond::new(1)),
|
||||
None,
|
||||
];
|
||||
let ts_vector: TimestampMicrosecondVector =
|
||||
TimestampMicrosecondVector::from_owned_iterator(times.into_iter());
|
||||
|
||||
let args: Vec<VectorRef> = vec![
|
||||
Arc::new(ts_vector),
|
||||
Arc::new(StringVector::from(tzs.clone())),
|
||||
];
|
||||
let vector = f.eval(FunctionContext::default(), &args).unwrap();
|
||||
assert_eq!(4, vector.len());
|
||||
let expect_times: VectorRef = Arc::new(StringVector::from(results));
|
||||
assert_eq!(expect_times, vector);
|
||||
|
||||
let results = vec![
|
||||
Some("1969-12-31 19:00:00.000000001"),
|
||||
None,
|
||||
Some("1970-01-01 03:00:00.000000001"),
|
||||
None,
|
||||
];
|
||||
let times: Vec<Option<TimestampNanosecond>> = vec![
|
||||
Some(TimestampNanosecond::new(1)),
|
||||
None,
|
||||
Some(TimestampNanosecond::new(1)),
|
||||
None,
|
||||
];
|
||||
let ts_vector: TimestampNanosecondVector =
|
||||
TimestampNanosecondVector::from_owned_iterator(times.into_iter());
|
||||
|
||||
let args: Vec<VectorRef> = vec![
|
||||
Arc::new(ts_vector),
|
||||
Arc::new(StringVector::from(tzs.clone())),
|
||||
];
|
||||
let vector = f.eval(FunctionContext::default(), &args).unwrap();
|
||||
assert_eq!(4, vector.len());
|
||||
let expect_times: VectorRef = Arc::new(StringVector::from(results));
|
||||
assert_eq!(expect_times, vector);
|
||||
}
|
||||
}
|
||||
@@ -32,7 +32,7 @@ macro_rules! ok {
|
||||
};
|
||||
}
|
||||
|
||||
/// Internal util macro to to create an error.
|
||||
/// Internal util macro to create an error.
|
||||
macro_rules! error {
|
||||
($span:expr, $msg: expr) => {
|
||||
Err(syn::Error::new($span, $msg))
|
||||
|
||||
@@ -67,6 +67,14 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to execute {} txn operations via Etcd", max_operations))]
|
||||
EtcdTxnFailed {
|
||||
max_operations: usize,
|
||||
#[snafu(source)]
|
||||
error: etcd_client::Error,
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to get sequence: {}", err_msg))]
|
||||
NextSequence { err_msg: String, location: Location },
|
||||
|
||||
@@ -400,6 +408,7 @@ impl ErrorExt for Error {
|
||||
IllegalServerState { .. }
|
||||
| EtcdTxnOpResponse { .. }
|
||||
| EtcdFailed { .. }
|
||||
| EtcdTxnFailed { .. }
|
||||
| ConnectEtcd { .. } => StatusCode::Internal,
|
||||
|
||||
SerdeJson { .. }
|
||||
|
||||
@@ -464,7 +464,7 @@ impl TableMetadataManager {
|
||||
pub fn max_logical_tables_per_batch(&self) -> usize {
|
||||
// The batch size is max_txn_size / 3 because the size of the `tables_data`
|
||||
// is 3 times the size of the `tables_data`.
|
||||
self.kv_backend.max_txn_size() / 3
|
||||
self.kv_backend.max_txn_ops() / 3
|
||||
}
|
||||
|
||||
/// Creates metadata for multiple logical tables and return an error if different metadata exists.
|
||||
@@ -860,6 +860,7 @@ mod tests {
|
||||
use bytes::Bytes;
|
||||
use common_time::util::current_time_millis;
|
||||
use futures::TryStreamExt;
|
||||
use store_api::storage::RegionId;
|
||||
use table::metadata::{RawTableInfo, TableInfo};
|
||||
|
||||
use super::datanode_table::DatanodeTableKey;
|
||||
@@ -1056,6 +1057,36 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_many_logical_tables_metadata() {
|
||||
let kv_backend = Arc::new(MemoryKvBackend::default());
|
||||
let table_metadata_manager = TableMetadataManager::new(kv_backend);
|
||||
|
||||
let mut tables_data = vec![];
|
||||
for i in 0..128 {
|
||||
let table_id = i + 1;
|
||||
let regin_number = table_id * 3;
|
||||
let region_id = RegionId::new(table_id, regin_number);
|
||||
let region_route = new_region_route(region_id.as_u64(), 2);
|
||||
let region_routes = vec![region_route.clone()];
|
||||
let table_info: RawTableInfo = test_utils::new_test_table_info_with_name(
|
||||
table_id,
|
||||
&format!("my_table_{}", table_id),
|
||||
region_routes.iter().map(|r| r.region.id.region_number()),
|
||||
)
|
||||
.into();
|
||||
let table_route_value = TableRouteValue::physical(region_routes.clone());
|
||||
|
||||
tables_data.push((table_info, table_route_value));
|
||||
}
|
||||
|
||||
// creates metadata.
|
||||
table_metadata_manager
|
||||
.create_logical_tables_metadata(tables_data)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_delete_table_metadata() {
|
||||
let mem_kv = Arc::new(MemoryKvBackend::default());
|
||||
|
||||
@@ -19,8 +19,9 @@ use datatypes::schema::{ColumnSchema, SchemaBuilder};
|
||||
use store_api::storage::TableId;
|
||||
use table::metadata::{TableInfo, TableInfoBuilder, TableMetaBuilder};
|
||||
|
||||
pub fn new_test_table_info<I: IntoIterator<Item = u32>>(
|
||||
pub fn new_test_table_info_with_name<I: IntoIterator<Item = u32>>(
|
||||
table_id: TableId,
|
||||
table_name: &str,
|
||||
region_numbers: I,
|
||||
) -> TableInfo {
|
||||
let column_schemas = vec![
|
||||
@@ -50,8 +51,14 @@ pub fn new_test_table_info<I: IntoIterator<Item = u32>>(
|
||||
TableInfoBuilder::default()
|
||||
.table_id(table_id)
|
||||
.table_version(5)
|
||||
.name("mytable")
|
||||
.name(table_name)
|
||||
.meta(meta)
|
||||
.build()
|
||||
.unwrap()
|
||||
}
|
||||
pub fn new_test_table_info<I: IntoIterator<Item = u32>>(
|
||||
table_id: TableId,
|
||||
region_numbers: I,
|
||||
) -> TableInfo {
|
||||
new_test_table_info_with_name(table_id, "mytable", region_numbers)
|
||||
}
|
||||
|
||||
@@ -45,6 +45,10 @@ impl TxnService for ChrootKvBackend {
|
||||
let txn_res = self.inner.txn(txn).await?;
|
||||
Ok(self.chroot_txn_response(txn_res))
|
||||
}
|
||||
|
||||
fn max_txn_ops(&self) -> usize {
|
||||
self.inner.max_txn_ops()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
|
||||
@@ -33,12 +33,6 @@ use crate::rpc::store::{
|
||||
};
|
||||
use crate::rpc::KeyValue;
|
||||
|
||||
// Maximum number of operations permitted in a transaction.
|
||||
// The etcd default configuration's `--max-txn-ops` is 128.
|
||||
//
|
||||
// For more detail, see: https://etcd.io/docs/v3.5/op-guide/configuration/
|
||||
const MAX_TXN_SIZE: usize = 128;
|
||||
|
||||
fn convert_key_value(kv: etcd_client::KeyValue) -> KeyValue {
|
||||
let (key, value) = kv.into_key_value();
|
||||
KeyValue { key, value }
|
||||
@@ -46,10 +40,15 @@ fn convert_key_value(kv: etcd_client::KeyValue) -> KeyValue {
|
||||
|
||||
pub struct EtcdStore {
|
||||
client: Client,
|
||||
// Maximum number of operations permitted in a transaction.
|
||||
// The etcd default configuration's `--max-txn-ops` is 128.
|
||||
//
|
||||
// For more detail, see: https://etcd.io/docs/v3.5/op-guide/configuration/
|
||||
max_txn_ops: usize,
|
||||
}
|
||||
|
||||
impl EtcdStore {
|
||||
pub async fn with_endpoints<E, S>(endpoints: S) -> Result<KvBackendRef>
|
||||
pub async fn with_endpoints<E, S>(endpoints: S, max_txn_ops: usize) -> Result<KvBackendRef>
|
||||
where
|
||||
E: AsRef<str>,
|
||||
S: AsRef<[E]>,
|
||||
@@ -58,16 +57,19 @@ impl EtcdStore {
|
||||
.await
|
||||
.context(error::ConnectEtcdSnafu)?;
|
||||
|
||||
Ok(Self::with_etcd_client(client))
|
||||
Ok(Self::with_etcd_client(client, max_txn_ops))
|
||||
}
|
||||
|
||||
pub fn with_etcd_client(client: Client) -> KvBackendRef {
|
||||
Arc::new(Self { client })
|
||||
pub fn with_etcd_client(client: Client, max_txn_ops: usize) -> KvBackendRef {
|
||||
Arc::new(Self {
|
||||
client,
|
||||
max_txn_ops,
|
||||
})
|
||||
}
|
||||
|
||||
async fn do_multi_txn(&self, txn_ops: Vec<TxnOp>) -> Result<Vec<TxnResponse>> {
|
||||
let max_txn_size = self.max_txn_size();
|
||||
if txn_ops.len() < max_txn_size {
|
||||
let max_txn_ops = self.max_txn_ops();
|
||||
if txn_ops.len() < max_txn_ops {
|
||||
// fast path
|
||||
let _timer = METRIC_META_TXN_REQUEST
|
||||
.with_label_values(&["etcd", "txn"])
|
||||
@@ -83,7 +85,7 @@ impl EtcdStore {
|
||||
}
|
||||
|
||||
let txns = txn_ops
|
||||
.chunks(max_txn_size)
|
||||
.chunks(max_txn_ops)
|
||||
.map(|part| async move {
|
||||
let _timer = METRIC_META_TXN_REQUEST
|
||||
.with_label_values(&["etcd", "txn"])
|
||||
@@ -311,18 +313,20 @@ impl TxnService for EtcdStore {
|
||||
.with_label_values(&["etcd", "txn"])
|
||||
.start_timer();
|
||||
|
||||
let max_operations = txn.max_operations();
|
||||
|
||||
let etcd_txn: Txn = txn.into();
|
||||
let txn_res = self
|
||||
.client
|
||||
.kv_client()
|
||||
.txn(etcd_txn)
|
||||
.await
|
||||
.context(error::EtcdFailedSnafu)?;
|
||||
.context(error::EtcdTxnFailedSnafu { max_operations })?;
|
||||
txn_res.try_into()
|
||||
}
|
||||
|
||||
fn max_txn_size(&self) -> usize {
|
||||
MAX_TXN_SIZE
|
||||
fn max_txn_ops(&self) -> usize {
|
||||
self.max_txn_ops
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -323,6 +323,10 @@ impl<T: ErrorExt + Send + Sync> TxnService for MemoryKvBackend<T> {
|
||||
responses,
|
||||
})
|
||||
}
|
||||
|
||||
fn max_txn_ops(&self) -> usize {
|
||||
usize::MAX
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: ErrorExt + Send + Sync + 'static> ResettableKvBackend for MemoryKvBackend<T> {
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::cmp::max;
|
||||
|
||||
use common_error::ext::ErrorExt;
|
||||
|
||||
use crate::rpc::store::{DeleteRangeResponse, PutResponse, RangeResponse};
|
||||
@@ -27,8 +29,8 @@ pub trait TxnService: Sync + Send {
|
||||
}
|
||||
|
||||
/// Maximum number of operations permitted in a transaction.
|
||||
fn max_txn_size(&self) -> usize {
|
||||
usize::MAX
|
||||
fn max_txn_ops(&self) -> usize {
|
||||
unimplemented!("txn is not implemented")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -192,6 +194,12 @@ impl Txn {
|
||||
self.req.failure = operations.into();
|
||||
self
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn max_operations(&self) -> usize {
|
||||
let opc = max(self.req.compare.len(), self.req.success.len());
|
||||
max(opc, self.req.failure.len())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Txn> for TxnRequest {
|
||||
|
||||
@@ -34,10 +34,14 @@ pub struct SequenceBuilder {
|
||||
max: u64,
|
||||
}
|
||||
|
||||
fn seq_name(name: impl AsRef<str>) -> String {
|
||||
format!("{}-{}", SEQ_PREFIX, name.as_ref())
|
||||
}
|
||||
|
||||
impl SequenceBuilder {
|
||||
pub fn new(name: impl AsRef<str>, generator: KvBackendRef) -> Self {
|
||||
Self {
|
||||
name: format!("{}-{}", SEQ_PREFIX, name.as_ref()),
|
||||
name: seq_name(name),
|
||||
initial: 0,
|
||||
step: 1,
|
||||
generator,
|
||||
@@ -138,13 +142,14 @@ impl Inner {
|
||||
pub async fn next_range(&self) -> Result<Range<u64>> {
|
||||
let key = self.name.as_bytes();
|
||||
let mut start = self.next;
|
||||
for _ in 0..self.force_quit {
|
||||
let expect = if start == self.initial {
|
||||
vec![]
|
||||
} else {
|
||||
u64::to_le_bytes(start).to_vec()
|
||||
};
|
||||
|
||||
let mut expect = if start == self.initial {
|
||||
vec![]
|
||||
} else {
|
||||
u64::to_le_bytes(start).to_vec()
|
||||
};
|
||||
|
||||
for _ in 0..self.force_quit {
|
||||
let step = self.step.min(self.max - start);
|
||||
|
||||
ensure!(
|
||||
@@ -167,15 +172,24 @@ impl Inner {
|
||||
|
||||
if !res.success {
|
||||
if let Some(kv) = res.prev_kv {
|
||||
let value = kv.value;
|
||||
ensure!(
|
||||
value.len() == std::mem::size_of::<u64>(),
|
||||
error::UnexpectedSequenceValueSnafu {
|
||||
err_msg: format!("key={}, unexpected value={:?}", self.name, value)
|
||||
expect = kv.value.clone();
|
||||
|
||||
let v: [u8; 8] = match kv.value.try_into() {
|
||||
Ok(a) => a,
|
||||
Err(v) => {
|
||||
return error::UnexpectedSequenceValueSnafu {
|
||||
err_msg: format!("Not a valid u64 for '{}': {v:?}", self.name),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
);
|
||||
start = u64::from_le_bytes(value.try_into().unwrap());
|
||||
};
|
||||
let v = u64::from_le_bytes(v);
|
||||
|
||||
// If the existed value is smaller than the initial, we should start from the initial.
|
||||
start = v.max(self.initial);
|
||||
} else {
|
||||
expect = vec![];
|
||||
|
||||
start = self.initial;
|
||||
}
|
||||
continue;
|
||||
@@ -197,8 +211,12 @@ impl Inner {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::any::Any;
|
||||
use std::collections::HashSet;
|
||||
use std::sync::Arc;
|
||||
|
||||
use itertools::{Itertools, MinMaxResult};
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
use super::*;
|
||||
use crate::error::Error;
|
||||
use crate::kv_backend::memory::MemoryKvBackend;
|
||||
@@ -209,6 +227,76 @@ mod tests {
|
||||
DeleteRangeResponse, PutRequest, PutResponse, RangeRequest, RangeResponse,
|
||||
};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_sequence_with_existed_value() {
|
||||
async fn test(exist: u64, expected: Vec<u64>) {
|
||||
let kv_backend = Arc::new(MemoryKvBackend::default());
|
||||
|
||||
let exist = u64::to_le_bytes(exist);
|
||||
kv_backend
|
||||
.put(PutRequest::new().with_key(seq_name("s")).with_value(exist))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let initial = 100;
|
||||
let seq = SequenceBuilder::new("s", kv_backend)
|
||||
.initial(initial)
|
||||
.build();
|
||||
|
||||
let mut actual = Vec::with_capacity(expected.len());
|
||||
for _ in 0..expected.len() {
|
||||
actual.push(seq.next().await.unwrap());
|
||||
}
|
||||
assert_eq!(actual, expected);
|
||||
}
|
||||
|
||||
// put a value not greater than the "initial", the sequence should start from "initial"
|
||||
test(1, vec![100, 101, 102]).await;
|
||||
test(100, vec![100, 101, 102]).await;
|
||||
|
||||
// put a value greater than the "initial", the sequence should start from the put value
|
||||
test(200, vec![200, 201, 202]).await;
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_sequence_with_contention() {
|
||||
let seq = Arc::new(
|
||||
SequenceBuilder::new("s", Arc::new(MemoryKvBackend::default()))
|
||||
.initial(1024)
|
||||
.build(),
|
||||
);
|
||||
|
||||
let (tx, mut rx) = mpsc::unbounded_channel();
|
||||
// Spawn 10 tasks to concurrently get the next sequence. Each task will get 100 sequences.
|
||||
for _ in 0..10 {
|
||||
tokio::spawn({
|
||||
let seq = seq.clone();
|
||||
let tx = tx.clone();
|
||||
async move {
|
||||
for _ in 0..100 {
|
||||
tx.send(seq.next().await.unwrap()).unwrap()
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Test that we get 1000 unique sequences, and start from 1024 to 2023.
|
||||
let mut nums = HashSet::new();
|
||||
let mut c = 0;
|
||||
while c < 1000
|
||||
&& let Some(x) = rx.recv().await
|
||||
{
|
||||
nums.insert(x);
|
||||
c += 1;
|
||||
}
|
||||
assert_eq!(nums.len(), 1000);
|
||||
let MinMaxResult::MinMax(min, max) = nums.iter().minmax() else {
|
||||
unreachable!("nums has more than one elements");
|
||||
};
|
||||
assert_eq!(*min, 1024);
|
||||
assert_eq!(*max, 2023);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_sequence() {
|
||||
let kv_backend = Arc::new(MemoryKvBackend::default());
|
||||
|
||||
@@ -152,7 +152,7 @@ impl Runner {
|
||||
guard.key_guards.push(key_guard);
|
||||
}
|
||||
|
||||
// Execute the procedure. We need to release the lock whenever the the execution
|
||||
// Execute the procedure. We need to release the lock whenever the execution
|
||||
// is successful or fail.
|
||||
self.execute_procedure_in_loop().await;
|
||||
|
||||
|
||||
@@ -30,38 +30,87 @@ pub mod prelude;
|
||||
mod signature;
|
||||
use sqlparser_derive::{Visit, VisitMut};
|
||||
|
||||
// sql output
|
||||
pub enum Output {
|
||||
/// new Output struct with output data(previously Output) and output meta
|
||||
#[derive(Debug)]
|
||||
pub struct Output {
|
||||
pub data: OutputData,
|
||||
pub meta: OutputMeta,
|
||||
}
|
||||
|
||||
/// Original Output struct
|
||||
/// carrying result data to response/client/user interface
|
||||
pub enum OutputData {
|
||||
AffectedRows(usize),
|
||||
RecordBatches(RecordBatches),
|
||||
Stream(SendableRecordBatchStream, Option<Arc<dyn PhysicalPlan>>),
|
||||
Stream(SendableRecordBatchStream),
|
||||
}
|
||||
|
||||
/// OutputMeta stores meta information produced/generated during the execution
|
||||
#[derive(Debug, Default)]
|
||||
pub struct OutputMeta {
|
||||
/// May exist for query output. One can retrieve execution metrics from this plan.
|
||||
pub plan: Option<Arc<dyn PhysicalPlan>>,
|
||||
pub cost: usize,
|
||||
}
|
||||
|
||||
impl Output {
|
||||
// helper function to build original `Output::Stream`
|
||||
pub fn new_stream(stream: SendableRecordBatchStream) -> Self {
|
||||
Output::Stream(stream, None)
|
||||
pub fn new_with_affected_rows(affected_rows: usize) -> Self {
|
||||
Self {
|
||||
data: OutputData::AffectedRows(affected_rows),
|
||||
meta: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_with_record_batches(recordbatches: RecordBatches) -> Self {
|
||||
Self {
|
||||
data: OutputData::RecordBatches(recordbatches),
|
||||
meta: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_with_stream(stream: SendableRecordBatchStream) -> Self {
|
||||
Self {
|
||||
data: OutputData::Stream(stream),
|
||||
meta: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new(data: OutputData, meta: OutputMeta) -> Self {
|
||||
Self { data, meta }
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for Output {
|
||||
impl Debug for OutputData {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Output::AffectedRows(rows) => write!(f, "Output::AffectedRows({rows})"),
|
||||
Output::RecordBatches(recordbatches) => {
|
||||
write!(f, "Output::RecordBatches({recordbatches:?})")
|
||||
OutputData::AffectedRows(rows) => write!(f, "OutputData::AffectedRows({rows})"),
|
||||
OutputData::RecordBatches(recordbatches) => {
|
||||
write!(f, "OutputData::RecordBatches({recordbatches:?})")
|
||||
}
|
||||
Output::Stream(_, df) => {
|
||||
if df.is_some() {
|
||||
write!(f, "Output::Stream(<stream>, Some<physical_plan>)")
|
||||
} else {
|
||||
write!(f, "Output::Stream(<stream>)")
|
||||
}
|
||||
OutputData::Stream(_) => {
|
||||
write!(f, "OutputData::Stream(<stream>)")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl OutputMeta {
|
||||
pub fn new(plan: Option<Arc<dyn PhysicalPlan>>, cost: usize) -> Self {
|
||||
Self { plan, cost }
|
||||
}
|
||||
|
||||
pub fn new_with_plan(plan: Arc<dyn PhysicalPlan>) -> Self {
|
||||
Self {
|
||||
plan: Some(plan),
|
||||
cost: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_with_cost(cost: usize) -> Self {
|
||||
Self { plan: None, cost }
|
||||
}
|
||||
}
|
||||
|
||||
pub use datafusion::physical_plan::ExecutionPlan as DfPhysicalPlan;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Visit, VisitMut)]
|
||||
|
||||
@@ -32,7 +32,7 @@ use snafu::ResultExt;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::{
|
||||
DfRecordBatch, DfSendableRecordBatchStream, RecordBatch, RecordBatchStream,
|
||||
DfRecordBatch, DfSendableRecordBatchStream, OrderOption, RecordBatch, RecordBatchStream,
|
||||
SendableRecordBatchStream, Stream,
|
||||
};
|
||||
|
||||
@@ -228,6 +228,10 @@ impl RecordBatchStream for RecordBatchStreamAdapter {
|
||||
Metrics::Unavailable | Metrics::Unresolved(_) => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn output_ordering(&self) -> Option<&[OrderOption]> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for RecordBatchStreamAdapter {
|
||||
@@ -316,6 +320,14 @@ impl RecordBatchStream for AsyncRecordBatchStreamAdapter {
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.schema.clone()
|
||||
}
|
||||
|
||||
fn output_ordering(&self) -> Option<&[OrderOption]> {
|
||||
None
|
||||
}
|
||||
|
||||
fn metrics(&self) -> Option<RecordBatchMetrics> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for AsyncRecordBatchStreamAdapter {
|
||||
@@ -375,6 +387,14 @@ mod test {
|
||||
fn schema(&self) -> SchemaRef {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn output_ordering(&self) -> Option<&[OrderOption]> {
|
||||
None
|
||||
}
|
||||
|
||||
fn metrics(&self) -> Option<RecordBatchMetrics> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for MaybeErrorRecordBatchStream {
|
||||
|
||||
@@ -39,13 +39,9 @@ use snafu::{ensure, ResultExt};
|
||||
pub trait RecordBatchStream: Stream<Item = Result<RecordBatch>> {
|
||||
fn schema(&self) -> SchemaRef;
|
||||
|
||||
fn output_ordering(&self) -> Option<&[OrderOption]> {
|
||||
None
|
||||
}
|
||||
fn output_ordering(&self) -> Option<&[OrderOption]>;
|
||||
|
||||
fn metrics(&self) -> Option<RecordBatchMetrics> {
|
||||
None
|
||||
}
|
||||
fn metrics(&self) -> Option<RecordBatchMetrics>;
|
||||
}
|
||||
|
||||
pub type SendableRecordBatchStream = Pin<Box<dyn RecordBatchStream + Send>>;
|
||||
@@ -74,6 +70,14 @@ impl RecordBatchStream for EmptyRecordBatchStream {
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.schema.clone()
|
||||
}
|
||||
|
||||
fn output_ordering(&self) -> Option<&[OrderOption]> {
|
||||
None
|
||||
}
|
||||
|
||||
fn metrics(&self) -> Option<RecordBatchMetrics> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for EmptyRecordBatchStream {
|
||||
@@ -192,6 +196,14 @@ impl RecordBatchStream for SimpleRecordBatchStream {
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.inner.schema()
|
||||
}
|
||||
|
||||
fn output_ordering(&self) -> Option<&[OrderOption]> {
|
||||
None
|
||||
}
|
||||
|
||||
fn metrics(&self) -> Option<RecordBatchMetrics> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for SimpleRecordBatchStream {
|
||||
|
||||
@@ -41,7 +41,8 @@ mod tests {
|
||||
use futures::Stream;
|
||||
|
||||
use super::*;
|
||||
use crate::RecordBatchStream;
|
||||
use crate::adapter::RecordBatchMetrics;
|
||||
use crate::{OrderOption, RecordBatchStream};
|
||||
|
||||
struct MockRecordBatchStream {
|
||||
batch: Option<RecordBatch>,
|
||||
@@ -52,6 +53,14 @@ mod tests {
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.schema.clone()
|
||||
}
|
||||
|
||||
fn output_ordering(&self) -> Option<&[OrderOption]> {
|
||||
None
|
||||
}
|
||||
|
||||
fn metrics(&self) -> Option<RecordBatchMetrics> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for MockRecordBatchStream {
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
use client::Database;
|
||||
use common_query::Output;
|
||||
use common_query::OutputData;
|
||||
use common_recordbatch::util;
|
||||
|
||||
pub enum ExpectedOutput<'a> {
|
||||
@@ -23,22 +23,24 @@ pub enum ExpectedOutput<'a> {
|
||||
|
||||
pub async fn execute_and_check_output(db: &Database, sql: &str, expected: ExpectedOutput<'_>) {
|
||||
let output = db.sql(sql).await.unwrap();
|
||||
let output = output.data;
|
||||
|
||||
match (&output, expected) {
|
||||
(Output::AffectedRows(x), ExpectedOutput::AffectedRows(y)) => {
|
||||
(OutputData::AffectedRows(x), ExpectedOutput::AffectedRows(y)) => {
|
||||
assert_eq!(*x, y, "actual: \n{}", x)
|
||||
}
|
||||
(Output::RecordBatches(_), ExpectedOutput::QueryResult(x))
|
||||
| (Output::Stream(_, _), ExpectedOutput::QueryResult(x)) => {
|
||||
(OutputData::RecordBatches(_), ExpectedOutput::QueryResult(x))
|
||||
| (OutputData::Stream(_), ExpectedOutput::QueryResult(x)) => {
|
||||
check_output_stream(output, x).await
|
||||
}
|
||||
_ => panic!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn check_output_stream(output: Output, expected: &str) {
|
||||
pub async fn check_output_stream(output: OutputData, expected: &str) {
|
||||
let recordbatches = match output {
|
||||
Output::Stream(stream, _) => util::collect_batches(stream).await.unwrap(),
|
||||
Output::RecordBatches(recordbatches) => recordbatches,
|
||||
OutputData::Stream(stream) => util::collect_batches(stream).await.unwrap(),
|
||||
OutputData::RecordBatches(recordbatches) => recordbatches,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let pretty_print = recordbatches.pretty_print().unwrap();
|
||||
|
||||
@@ -36,7 +36,7 @@ use crate::{error, Interval};
|
||||
/// - for [TimeUnit::Second]: [-262144-01-01 00:00:00, +262143-12-31 23:59:59]
|
||||
/// - for [TimeUnit::Millisecond]: [-262144-01-01 00:00:00.000, +262143-12-31 23:59:59.999]
|
||||
/// - for [TimeUnit::Microsecond]: [-262144-01-01 00:00:00.000000, +262143-12-31 23:59:59.999999]
|
||||
/// - for [TimeUnit::Nanosecond]: [1677-09-21 00:12:43.145225, 2262-04-11 23:47:16.854775807]
|
||||
/// - for [TimeUnit::Nanosecond]: [1677-09-21 00:12:43.145224192, 2262-04-11 23:47:16.854775807]
|
||||
///
|
||||
/// # Note:
|
||||
/// For values out of range, you can still store these timestamps, but while performing arithmetic
|
||||
@@ -187,28 +187,28 @@ impl Timestamp {
|
||||
Self { unit, value }
|
||||
}
|
||||
|
||||
pub fn new_second(value: i64) -> Self {
|
||||
pub const fn new_second(value: i64) -> Self {
|
||||
Self {
|
||||
value,
|
||||
unit: TimeUnit::Second,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_millisecond(value: i64) -> Self {
|
||||
pub const fn new_millisecond(value: i64) -> Self {
|
||||
Self {
|
||||
value,
|
||||
unit: TimeUnit::Millisecond,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_microsecond(value: i64) -> Self {
|
||||
pub const fn new_microsecond(value: i64) -> Self {
|
||||
Self {
|
||||
value,
|
||||
unit: TimeUnit::Microsecond,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_nanosecond(value: i64) -> Self {
|
||||
pub const fn new_nanosecond(value: i64) -> Self {
|
||||
Self {
|
||||
value,
|
||||
unit: TimeUnit::Nanosecond,
|
||||
@@ -281,8 +281,26 @@ impl Timestamp {
|
||||
.and_then(|v| v.checked_add(micros as i64))
|
||||
.map(Timestamp::new_microsecond)
|
||||
} else {
|
||||
// Refer to <https://github.com/chronotope/chrono/issues/1289>
|
||||
//
|
||||
// subsec nanos are always non-negative, however the timestamp itself (both in seconds and in nanos) can be
|
||||
// negative. Now i64::MIN is NOT dividable by 1_000_000_000, so
|
||||
//
|
||||
// (sec * 1_000_000_000) + nsec
|
||||
//
|
||||
// may underflow (even when in theory we COULD represent the datetime as i64) because we add the non-negative
|
||||
// nanos AFTER the multiplication. This is fixed by converting the negative case to
|
||||
//
|
||||
// ((sec + 1) * 1_000_000_000) + (nsec - 1_000_000_000)
|
||||
let mut sec = sec;
|
||||
let mut nsec = nsec as i64;
|
||||
if sec < 0 && nsec > 0 {
|
||||
nsec -= 1_000_000_000;
|
||||
sec += 1;
|
||||
}
|
||||
|
||||
sec.checked_mul(1_000_000_000)
|
||||
.and_then(|v| v.checked_add(nsec as i64))
|
||||
.and_then(|v| v.checked_add(nsec))
|
||||
.map(Timestamp::new_nanosecond)
|
||||
}
|
||||
}
|
||||
@@ -425,6 +443,20 @@ impl Timestamp {
|
||||
}
|
||||
}
|
||||
|
||||
impl Timestamp {
|
||||
pub const MIN_SECOND: Self = Self::new_second(-8_334_601_228_800);
|
||||
pub const MAX_SECOND: Self = Self::new_second(8_210_266_876_799);
|
||||
|
||||
pub const MIN_MILLISECOND: Self = Self::new_millisecond(-8_334_601_228_800_000);
|
||||
pub const MAX_MILLISECOND: Self = Self::new_millisecond(8_210_266_876_799_999);
|
||||
|
||||
pub const MIN_MICROSECOND: Self = Self::new_microsecond(-8_334_601_228_800_000_000);
|
||||
pub const MAX_MICROSECOND: Self = Self::new_microsecond(8_210_266_876_799_999_999);
|
||||
|
||||
pub const MIN_NANOSECOND: Self = Self::new_nanosecond(i64::MIN);
|
||||
pub const MAX_NANOSECOND: Self = Self::new_nanosecond(i64::MAX);
|
||||
}
|
||||
|
||||
/// Converts the naive datetime (which has no specific timezone) to a
|
||||
/// nanosecond epoch timestamp in UTC.
|
||||
fn naive_datetime_to_timestamp(
|
||||
@@ -586,6 +618,7 @@ impl Hash for Timestamp {
|
||||
mod tests {
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
|
||||
use chrono_tz::Tz;
|
||||
use rand::Rng;
|
||||
use serde_json::Value;
|
||||
|
||||
@@ -1297,7 +1330,7 @@ mod tests {
|
||||
"+262142-12-31 23:59:59Z",
|
||||
"+262142-12-31 23:59:59.999Z",
|
||||
"+262142-12-31 23:59:59.999999Z",
|
||||
"1677-09-21 00:12:43.145225Z",
|
||||
"1677-09-21 00:12:43.145224192Z",
|
||||
"2262-04-11 23:47:16.854775807Z",
|
||||
"+100000-01-01 00:00:01.5Z",
|
||||
];
|
||||
@@ -1306,4 +1339,47 @@ mod tests {
|
||||
Timestamp::from_str_utc(s).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_min_nanos_roundtrip() {
|
||||
let (sec, nsec) = Timestamp::MIN_NANOSECOND.split();
|
||||
let ts = Timestamp::from_splits(sec, nsec).unwrap();
|
||||
assert_eq!(Timestamp::MIN_NANOSECOND, ts);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_timestamp_bound_format() {
|
||||
assert_eq!(
|
||||
"1677-09-21 00:12:43.145224192",
|
||||
Timestamp::MIN_NANOSECOND.to_timezone_aware_string(Some(&Timezone::Named(Tz::UTC)))
|
||||
);
|
||||
assert_eq!(
|
||||
"2262-04-11 23:47:16.854775807",
|
||||
Timestamp::MAX_NANOSECOND.to_timezone_aware_string(Some(&Timezone::Named(Tz::UTC)))
|
||||
);
|
||||
assert_eq!(
|
||||
"-262143-01-01 00:00:00",
|
||||
Timestamp::MIN_MICROSECOND.to_timezone_aware_string(Some(&Timezone::Named(Tz::UTC)))
|
||||
);
|
||||
assert_eq!(
|
||||
"+262142-12-31 23:59:59.999999",
|
||||
Timestamp::MAX_MICROSECOND.to_timezone_aware_string(Some(&Timezone::Named(Tz::UTC)))
|
||||
);
|
||||
assert_eq!(
|
||||
"-262143-01-01 00:00:00",
|
||||
Timestamp::MIN_MILLISECOND.to_timezone_aware_string(Some(&Timezone::Named(Tz::UTC)))
|
||||
);
|
||||
assert_eq!(
|
||||
"+262142-12-31 23:59:59.999",
|
||||
Timestamp::MAX_MILLISECOND.to_timezone_aware_string(Some(&Timezone::Named(Tz::UTC)))
|
||||
);
|
||||
assert_eq!(
|
||||
"-262143-01-01 00:00:00",
|
||||
Timestamp::MIN_SECOND.to_timezone_aware_string(Some(&Timezone::Named(Tz::UTC)))
|
||||
);
|
||||
assert_eq!(
|
||||
"+262142-12-31 23:59:59",
|
||||
Timestamp::MAX_SECOND.to_timezone_aware_string(Some(&Timezone::Named(Tz::UTC)))
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -73,7 +73,7 @@ tokio-stream = { workspace = true, features = ["net"] }
|
||||
toml.workspace = true
|
||||
tonic.workspace = true
|
||||
tower = { version = "0.4", features = ["full"] }
|
||||
tower-http = { version = "0.3", features = ["full"] }
|
||||
tower-http = { version = "0.4", features = ["full"] }
|
||||
url = "2.3.1"
|
||||
uuid.workspace = true
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@ use common_error::ext::BoxedError;
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_query::logical_plan::Expr;
|
||||
use common_query::physical_plan::DfPhysicalPlanAdapter;
|
||||
use common_query::{DfPhysicalPlan, Output};
|
||||
use common_query::{DfPhysicalPlan, OutputData};
|
||||
use common_recordbatch::SendableRecordBatchStream;
|
||||
use common_runtime::Runtime;
|
||||
use common_telemetry::tracing::{self, info_span};
|
||||
@@ -651,11 +651,11 @@ impl RegionServerInner {
|
||||
.await
|
||||
.context(ExecuteLogicalPlanSnafu)?;
|
||||
|
||||
match result {
|
||||
Output::AffectedRows(_) | Output::RecordBatches(_) => {
|
||||
match result.data {
|
||||
OutputData::AffectedRows(_) | OutputData::RecordBatches(_) => {
|
||||
UnsupportedOutputSnafu { expected: "stream" }.fail()
|
||||
}
|
||||
Output::Stream(stream, _) => Ok(stream),
|
||||
OutputData::Stream(stream) => Ok(stream),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -370,6 +370,36 @@ impl Value {
|
||||
}
|
||||
}
|
||||
|
||||
pub trait TryAsPrimitive<T: LogicalPrimitiveType> {
|
||||
fn try_as_primitive(&self) -> Option<T::Native>;
|
||||
}
|
||||
|
||||
macro_rules! impl_try_as_primitive {
|
||||
($Type: ident, $Variant: ident) => {
|
||||
impl TryAsPrimitive<crate::types::$Type> for Value {
|
||||
fn try_as_primitive(
|
||||
&self,
|
||||
) -> Option<<crate::types::$Type as crate::types::LogicalPrimitiveType>::Native> {
|
||||
match self {
|
||||
Value::$Variant(v) => Some((*v).into()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl_try_as_primitive!(Int8Type, Int8);
|
||||
impl_try_as_primitive!(Int16Type, Int16);
|
||||
impl_try_as_primitive!(Int32Type, Int32);
|
||||
impl_try_as_primitive!(Int64Type, Int64);
|
||||
impl_try_as_primitive!(UInt8Type, UInt8);
|
||||
impl_try_as_primitive!(UInt16Type, UInt16);
|
||||
impl_try_as_primitive!(UInt32Type, UInt32);
|
||||
impl_try_as_primitive!(UInt64Type, UInt64);
|
||||
impl_try_as_primitive!(Float32Type, Float32);
|
||||
impl_try_as_primitive!(Float64Type, Float64);
|
||||
|
||||
pub fn to_null_scalar_value(output_type: &ConcreteDataType) -> Result<ScalarValue> {
|
||||
Ok(match output_type {
|
||||
ConcreteDataType::Null(_) => ScalarValue::Null,
|
||||
@@ -2387,4 +2417,12 @@ mod tests {
|
||||
);
|
||||
check_value_ref_size_eq(&ValueRef::Decimal128(Decimal128::new(1234, 3, 1)), 32)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_incorrect_default_value_issue_3479() {
|
||||
let value = OrderedF64::from(0.047318541668048164);
|
||||
let serialized = serde_json::to_string(&value).unwrap();
|
||||
let deserialized: OrderedF64 = serde_json::from_str(&serialized).unwrap();
|
||||
assert_eq!(value, deserialized);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,8 +22,9 @@ use std::task::{Context, Poll};
|
||||
use common_datasource::object_store::build_backend;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_query::prelude::Expr;
|
||||
use common_recordbatch::adapter::RecordBatchMetrics;
|
||||
use common_recordbatch::error::{CastVectorSnafu, ExternalSnafu, Result as RecordBatchResult};
|
||||
use common_recordbatch::{RecordBatch, RecordBatchStream, SendableRecordBatchStream};
|
||||
use common_recordbatch::{OrderOption, RecordBatch, RecordBatchStream, SendableRecordBatchStream};
|
||||
use datafusion::logical_expr::utils as df_logical_expr_utils;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
@@ -151,6 +152,14 @@ impl RecordBatchStream for FileToScanRegionStream {
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.scan_schema.clone()
|
||||
}
|
||||
|
||||
fn output_ordering(&self) -> Option<&[OrderOption]> {
|
||||
None
|
||||
}
|
||||
|
||||
fn metrics(&self) -> Option<RecordBatchMetrics> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl Stream for FileToScanRegionStream {
|
||||
|
||||
@@ -18,6 +18,7 @@ common-query.workspace = true
|
||||
common-telemetry.workspace = true
|
||||
common-time.workspace = true
|
||||
datatypes.workspace = true
|
||||
enum_dispatch = "0.3"
|
||||
hydroflow = "0.5.0"
|
||||
itertools.workspace = true
|
||||
num-traits = "0.2"
|
||||
@@ -27,3 +28,6 @@ session.workspace = true
|
||||
snafu.workspace = true
|
||||
tokio.workspace = true
|
||||
tonic.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
serde_json = "1.0"
|
||||
|
||||
@@ -24,5 +24,6 @@ mod scalar;
|
||||
pub(crate) use error::{EvalError, InvalidArgumentSnafu, OptimizeSnafu};
|
||||
pub(crate) use func::{BinaryFunc, UnaryFunc, UnmaterializableFunc, VariadicFunc};
|
||||
pub(crate) use id::{GlobalId, Id, LocalId};
|
||||
pub(crate) use linear::{MapFilterProject, MfpPlan, SafeMfpPlan};
|
||||
pub(crate) use relation::{AggregateExpr, AggregateFunc};
|
||||
pub(crate) use scalar::ScalarExpr;
|
||||
|
||||
@@ -61,4 +61,7 @@ pub enum EvalError {
|
||||
|
||||
#[snafu(display("Unsupported temporal filter: {reason}"))]
|
||||
UnsupportedTemporalFilter { reason: String, location: Location },
|
||||
|
||||
#[snafu(display("Overflowed during evaluation"))]
|
||||
Overflow { location: Location },
|
||||
}
|
||||
|
||||
@@ -45,7 +45,7 @@ use crate::repr::{self, value_to_internal_ts, Diff, Row};
|
||||
/// expressions in `self.expressions`, even though this is not something
|
||||
/// we can directly evaluate. The plan creation methods will defensively
|
||||
/// ensure that the right thing happens.
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize)]
|
||||
pub struct MapFilterProject {
|
||||
/// A sequence of expressions that should be appended to the row.
|
||||
///
|
||||
@@ -415,7 +415,7 @@ impl MapFilterProject {
|
||||
}
|
||||
|
||||
/// A wrapper type which indicates it is safe to simply evaluate all expressions.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq, Ord, PartialOrd)]
|
||||
pub struct SafeMfpPlan {
|
||||
pub(crate) mfp: MapFilterProject,
|
||||
}
|
||||
@@ -800,7 +800,7 @@ mod test {
|
||||
.unwrap();
|
||||
// only retain sum result
|
||||
let mfp = mfp.project(vec![4]).unwrap();
|
||||
// accept only if if the sum is greater than 10
|
||||
// accept only if the sum is greater than 10
|
||||
let mfp = mfp
|
||||
.filter(vec![ScalarExpr::Column(0).call_binary(
|
||||
ScalarExpr::Literal(Value::from(10i32), ConcreteDataType::int32_datatype()),
|
||||
|
||||
@@ -21,7 +21,7 @@ mod accum;
|
||||
mod func;
|
||||
|
||||
/// Describes an aggregation expression.
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize)]
|
||||
pub struct AggregateExpr {
|
||||
/// Names the aggregation function.
|
||||
pub func: AggregateFunc,
|
||||
|
||||
@@ -14,7 +14,10 @@
|
||||
|
||||
//! Accumulators for aggregate functions that's is accumulatable. i.e. sum/count
|
||||
//!
|
||||
//! Currently support sum, count, any, all
|
||||
//! Accumulator will only be restore from row and being updated every time dataflow need process a new batch of rows.
|
||||
//! So the overhead is acceptable.
|
||||
//!
|
||||
//! Currently support sum, count, any, all and min/max(with one caveat that min/max can't support delete with aggregate).
|
||||
|
||||
use std::fmt::Display;
|
||||
|
||||
@@ -22,13 +25,506 @@ use common_decimal::Decimal128;
|
||||
use common_time::{Date, DateTime};
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::value::{OrderedF32, OrderedF64, OrderedFloat, Value};
|
||||
use enum_dispatch::enum_dispatch;
|
||||
use hydroflow::futures::stream::Concat;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::ensure;
|
||||
|
||||
use crate::expr::error::{InternalSnafu, TryFromValueSnafu, TypeMismatchSnafu};
|
||||
use crate::expr::error::{InternalSnafu, OverflowSnafu, TryFromValueSnafu, TypeMismatchSnafu};
|
||||
use crate::expr::relation::func::GenericFn;
|
||||
use crate::expr::{AggregateFunc, EvalError};
|
||||
use crate::repr::Diff;
|
||||
|
||||
/// Accumulates values for the various types of accumulable aggregations.
|
||||
#[enum_dispatch]
|
||||
pub trait Accumulator: Sized {
|
||||
fn into_state(self) -> Vec<Value>;
|
||||
fn update(
|
||||
&mut self,
|
||||
aggr_fn: &AggregateFunc,
|
||||
value: Value,
|
||||
diff: Diff,
|
||||
) -> Result<(), EvalError>;
|
||||
|
||||
fn update_batch<I>(&mut self, aggr_fn: &AggregateFunc, value_diffs: I) -> Result<(), EvalError>
|
||||
where
|
||||
I: IntoIterator<Item = (Value, Diff)>,
|
||||
{
|
||||
for (v, d) in value_diffs {
|
||||
self.update(aggr_fn, v, d)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn eval(&self, aggr_fn: &AggregateFunc) -> Result<Value, EvalError>;
|
||||
}
|
||||
|
||||
/// Bool accumulator, used for `Any` `All` `Max/MinBool`
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
|
||||
pub struct Bool {
|
||||
/// The number of `true` values observed.
|
||||
trues: Diff,
|
||||
/// The number of `false` values observed.
|
||||
falses: Diff,
|
||||
}
|
||||
|
||||
impl TryFrom<Vec<Value>> for Bool {
|
||||
type Error = EvalError;
|
||||
|
||||
fn try_from(state: Vec<Value>) -> Result<Self, Self::Error> {
|
||||
ensure!(
|
||||
state.len() == 2,
|
||||
InternalSnafu {
|
||||
reason: "Bool Accumulator state should have 2 values",
|
||||
}
|
||||
);
|
||||
|
||||
let mut iter = state.into_iter();
|
||||
|
||||
Ok(Self {
|
||||
trues: Diff::try_from(iter.next().unwrap()).map_err(err_try_from_val)?,
|
||||
falses: Diff::try_from(iter.next().unwrap()).map_err(err_try_from_val)?,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Accumulator for Bool {
|
||||
fn into_state(self) -> Vec<Value> {
|
||||
vec![self.trues.into(), self.falses.into()]
|
||||
}
|
||||
|
||||
/// Null values are ignored
|
||||
fn update(
|
||||
&mut self,
|
||||
aggr_fn: &AggregateFunc,
|
||||
value: Value,
|
||||
diff: Diff,
|
||||
) -> Result<(), EvalError> {
|
||||
ensure!(
|
||||
matches!(
|
||||
aggr_fn,
|
||||
AggregateFunc::Any
|
||||
| AggregateFunc::All
|
||||
| AggregateFunc::MaxBool
|
||||
| AggregateFunc::MinBool
|
||||
),
|
||||
InternalSnafu {
|
||||
reason: format!(
|
||||
"Bool Accumulator does not support this aggregation function: {:?}",
|
||||
aggr_fn
|
||||
),
|
||||
}
|
||||
);
|
||||
|
||||
match value {
|
||||
Value::Boolean(true) => self.trues += diff,
|
||||
Value::Boolean(false) => self.falses += diff,
|
||||
Value::Null => (), // ignore nulls
|
||||
x => {
|
||||
return Err(TypeMismatchSnafu {
|
||||
expected: ConcreteDataType::boolean_datatype(),
|
||||
actual: x.data_type(),
|
||||
}
|
||||
.build());
|
||||
}
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn eval(&self, aggr_fn: &AggregateFunc) -> Result<Value, EvalError> {
|
||||
match aggr_fn {
|
||||
AggregateFunc::Any => Ok(Value::from(self.trues > 0)),
|
||||
AggregateFunc::All => Ok(Value::from(self.falses == 0)),
|
||||
AggregateFunc::MaxBool => Ok(Value::from(self.trues > 0)),
|
||||
AggregateFunc::MinBool => Ok(Value::from(self.falses == 0)),
|
||||
_ => Err(InternalSnafu {
|
||||
reason: format!(
|
||||
"Bool Accumulator does not support this aggregation function: {:?}",
|
||||
aggr_fn
|
||||
),
|
||||
}
|
||||
.build()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Accumulates simple numeric values for sum over integer.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
|
||||
pub struct SimpleNumber {
|
||||
/// The accumulation of all non-NULL values observed.
|
||||
accum: i128,
|
||||
/// The number of non-NULL values observed.
|
||||
non_nulls: Diff,
|
||||
}
|
||||
|
||||
impl TryFrom<Vec<Value>> for SimpleNumber {
|
||||
type Error = EvalError;
|
||||
|
||||
fn try_from(state: Vec<Value>) -> Result<Self, Self::Error> {
|
||||
ensure!(
|
||||
state.len() == 2,
|
||||
InternalSnafu {
|
||||
reason: "Number Accumulator state should have 2 values",
|
||||
}
|
||||
);
|
||||
let mut iter = state.into_iter();
|
||||
|
||||
Ok(Self {
|
||||
accum: Decimal128::try_from(iter.next().unwrap())
|
||||
.map_err(err_try_from_val)?
|
||||
.val(),
|
||||
non_nulls: Diff::try_from(iter.next().unwrap()).map_err(err_try_from_val)?,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Accumulator for SimpleNumber {
|
||||
fn into_state(self) -> Vec<Value> {
|
||||
vec![
|
||||
Value::Decimal128(Decimal128::new(self.accum, 38, 0)),
|
||||
self.non_nulls.into(),
|
||||
]
|
||||
}
|
||||
|
||||
fn update(
|
||||
&mut self,
|
||||
aggr_fn: &AggregateFunc,
|
||||
value: Value,
|
||||
diff: Diff,
|
||||
) -> Result<(), EvalError> {
|
||||
ensure!(
|
||||
matches!(
|
||||
aggr_fn,
|
||||
AggregateFunc::SumInt16
|
||||
| AggregateFunc::SumInt32
|
||||
| AggregateFunc::SumInt64
|
||||
| AggregateFunc::SumUInt16
|
||||
| AggregateFunc::SumUInt32
|
||||
| AggregateFunc::SumUInt64
|
||||
),
|
||||
InternalSnafu {
|
||||
reason: format!(
|
||||
"SimpleNumber Accumulator does not support this aggregation function: {:?}",
|
||||
aggr_fn
|
||||
),
|
||||
}
|
||||
);
|
||||
|
||||
let v = match (aggr_fn, value) {
|
||||
(AggregateFunc::SumInt16, Value::Int16(x)) => i128::from(x),
|
||||
(AggregateFunc::SumInt32, Value::Int32(x)) => i128::from(x),
|
||||
(AggregateFunc::SumInt64, Value::Int64(x)) => i128::from(x),
|
||||
(AggregateFunc::SumUInt16, Value::UInt16(x)) => i128::from(x),
|
||||
(AggregateFunc::SumUInt32, Value::UInt32(x)) => i128::from(x),
|
||||
(AggregateFunc::SumUInt64, Value::UInt64(x)) => i128::from(x),
|
||||
(_f, Value::Null) => return Ok(()), // ignore null
|
||||
(f, v) => {
|
||||
let expected_datatype = f.signature().input;
|
||||
return Err(TypeMismatchSnafu {
|
||||
expected: expected_datatype,
|
||||
actual: v.data_type(),
|
||||
}
|
||||
.build())?;
|
||||
}
|
||||
};
|
||||
|
||||
self.accum += v * i128::from(diff);
|
||||
|
||||
self.non_nulls += diff;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn eval(&self, aggr_fn: &AggregateFunc) -> Result<Value, EvalError> {
|
||||
match aggr_fn {
|
||||
AggregateFunc::SumInt16 | AggregateFunc::SumInt32 | AggregateFunc::SumInt64 => {
|
||||
i64::try_from(self.accum)
|
||||
.map_err(|_e| OverflowSnafu {}.build())
|
||||
.map(Value::from)
|
||||
}
|
||||
AggregateFunc::SumUInt16 | AggregateFunc::SumUInt32 | AggregateFunc::SumUInt64 => {
|
||||
u64::try_from(self.accum)
|
||||
.map_err(|_e| OverflowSnafu {}.build())
|
||||
.map(Value::from)
|
||||
}
|
||||
_ => Err(InternalSnafu {
|
||||
reason: format!(
|
||||
"SimpleNumber Accumulator does not support this aggregation function: {:?}",
|
||||
aggr_fn
|
||||
),
|
||||
}
|
||||
.build()),
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Accumulates float values for sum over floating numbers.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
|
||||
|
||||
pub struct Float {
|
||||
/// Accumulates non-special float values, i.e. not NaN, +inf, -inf.
|
||||
/// accum will be set to zero if `non_nulls` is zero.
|
||||
accum: OrderedF64,
|
||||
/// Counts +inf
|
||||
pos_infs: Diff,
|
||||
/// Counts -inf
|
||||
neg_infs: Diff,
|
||||
/// Counts NaNs
|
||||
nans: Diff,
|
||||
/// Counts non-NULL values
|
||||
non_nulls: Diff,
|
||||
}
|
||||
|
||||
impl TryFrom<Vec<Value>> for Float {
|
||||
type Error = EvalError;
|
||||
|
||||
fn try_from(state: Vec<Value>) -> Result<Self, Self::Error> {
|
||||
ensure!(
|
||||
state.len() == 5,
|
||||
InternalSnafu {
|
||||
reason: "Float Accumulator state should have 5 values",
|
||||
}
|
||||
);
|
||||
|
||||
let mut iter = state.into_iter();
|
||||
|
||||
let mut ret = Self {
|
||||
accum: OrderedF64::try_from(iter.next().unwrap()).map_err(err_try_from_val)?,
|
||||
pos_infs: Diff::try_from(iter.next().unwrap()).map_err(err_try_from_val)?,
|
||||
neg_infs: Diff::try_from(iter.next().unwrap()).map_err(err_try_from_val)?,
|
||||
nans: Diff::try_from(iter.next().unwrap()).map_err(err_try_from_val)?,
|
||||
non_nulls: Diff::try_from(iter.next().unwrap()).map_err(err_try_from_val)?,
|
||||
};
|
||||
|
||||
// This prevent counter-intuitive behavior of summing over no values
|
||||
if ret.non_nulls == 0 {
|
||||
ret.accum = OrderedFloat::from(0.0);
|
||||
}
|
||||
|
||||
Ok(ret)
|
||||
}
|
||||
}
|
||||
|
||||
impl Accumulator for Float {
|
||||
fn into_state(self) -> Vec<Value> {
|
||||
vec![
|
||||
self.accum.into(),
|
||||
self.pos_infs.into(),
|
||||
self.neg_infs.into(),
|
||||
self.nans.into(),
|
||||
self.non_nulls.into(),
|
||||
]
|
||||
}
|
||||
|
||||
/// sum ignore null
|
||||
fn update(
|
||||
&mut self,
|
||||
aggr_fn: &AggregateFunc,
|
||||
value: Value,
|
||||
diff: Diff,
|
||||
) -> Result<(), EvalError> {
|
||||
ensure!(
|
||||
matches!(
|
||||
aggr_fn,
|
||||
AggregateFunc::SumFloat32 | AggregateFunc::SumFloat64
|
||||
),
|
||||
InternalSnafu {
|
||||
reason: format!(
|
||||
"Float Accumulator does not support this aggregation function: {:?}",
|
||||
aggr_fn
|
||||
),
|
||||
}
|
||||
);
|
||||
|
||||
let x = match (aggr_fn, value) {
|
||||
(AggregateFunc::SumFloat32, Value::Float32(x)) => OrderedF64::from(*x as f64),
|
||||
(AggregateFunc::SumFloat64, Value::Float64(x)) => OrderedF64::from(x),
|
||||
(_f, Value::Null) => return Ok(()), // ignore null
|
||||
(f, v) => {
|
||||
let expected_datatype = f.signature().input;
|
||||
return Err(TypeMismatchSnafu {
|
||||
expected: expected_datatype,
|
||||
actual: v.data_type(),
|
||||
}
|
||||
.build())?;
|
||||
}
|
||||
};
|
||||
|
||||
if x.is_nan() {
|
||||
self.nans += diff;
|
||||
} else if x.is_infinite() {
|
||||
if x.is_sign_positive() {
|
||||
self.pos_infs += diff;
|
||||
} else {
|
||||
self.neg_infs += diff;
|
||||
}
|
||||
} else {
|
||||
self.accum += *(x * OrderedF64::from(diff as f64));
|
||||
}
|
||||
|
||||
self.non_nulls += diff;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn eval(&self, aggr_fn: &AggregateFunc) -> Result<Value, EvalError> {
|
||||
match aggr_fn {
|
||||
AggregateFunc::SumFloat32 => Ok(Value::Float32(OrderedF32::from(self.accum.0 as f32))),
|
||||
AggregateFunc::SumFloat64 => Ok(Value::Float64(self.accum)),
|
||||
_ => Err(InternalSnafu {
|
||||
reason: format!(
|
||||
"Float Accumulator does not support this aggregation function: {:?}",
|
||||
aggr_fn
|
||||
),
|
||||
}
|
||||
.build()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Accumulates a single `Ord`ed `Value`, useful for min/max aggregations.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
|
||||
pub struct OrdValue {
|
||||
val: Option<Value>,
|
||||
non_nulls: Diff,
|
||||
}
|
||||
|
||||
impl TryFrom<Vec<Value>> for OrdValue {
|
||||
type Error = EvalError;
|
||||
|
||||
fn try_from(state: Vec<Value>) -> Result<Self, Self::Error> {
|
||||
ensure!(
|
||||
state.len() == 2,
|
||||
InternalSnafu {
|
||||
reason: "OrdValue Accumulator state should have 2 values",
|
||||
}
|
||||
);
|
||||
|
||||
let mut iter = state.into_iter();
|
||||
|
||||
Ok(Self {
|
||||
val: {
|
||||
let v = iter.next().unwrap();
|
||||
if v == Value::Null {
|
||||
None
|
||||
} else {
|
||||
Some(v)
|
||||
}
|
||||
},
|
||||
non_nulls: Diff::try_from(iter.next().unwrap()).map_err(err_try_from_val)?,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Accumulator for OrdValue {
|
||||
fn into_state(self) -> Vec<Value> {
|
||||
vec![self.val.unwrap_or(Value::Null), self.non_nulls.into()]
|
||||
}
|
||||
|
||||
/// min/max try to find results in all non-null values, if all values are null, the result is null.
|
||||
/// count(col_name) gives the number of non-null values, count(*) gives the number of rows including nulls.
|
||||
/// TODO(discord9): add count(*) as a aggr function
|
||||
fn update(
|
||||
&mut self,
|
||||
aggr_fn: &AggregateFunc,
|
||||
value: Value,
|
||||
diff: Diff,
|
||||
) -> Result<(), EvalError> {
|
||||
ensure!(
|
||||
aggr_fn.is_max() || aggr_fn.is_min() || matches!(aggr_fn, AggregateFunc::Count),
|
||||
InternalSnafu {
|
||||
reason: format!(
|
||||
"OrdValue Accumulator does not support this aggregation function: {:?}",
|
||||
aggr_fn
|
||||
),
|
||||
}
|
||||
);
|
||||
if diff <= 0 && (aggr_fn.is_max() || aggr_fn.is_min()) {
|
||||
return Err(InternalSnafu {
|
||||
reason: "OrdValue Accumulator does not support non-monotonic input for min/max aggregation".to_string(),
|
||||
}.build());
|
||||
}
|
||||
|
||||
// if aggr_fn is count, the incoming value type doesn't matter in type checking
|
||||
// otherwise, type need to be the same or value can be null
|
||||
let check_type_aggr_fn_and_arg_value =
|
||||
ty_eq_without_precision(value.data_type(), aggr_fn.signature().input)
|
||||
|| matches!(aggr_fn, AggregateFunc::Count)
|
||||
|| value.is_null();
|
||||
let check_type_aggr_fn_and_self_val = self
|
||||
.val
|
||||
.as_ref()
|
||||
.map(|zelf| ty_eq_without_precision(zelf.data_type(), aggr_fn.signature().input))
|
||||
.unwrap_or(true)
|
||||
|| matches!(aggr_fn, AggregateFunc::Count);
|
||||
|
||||
if !check_type_aggr_fn_and_arg_value {
|
||||
return Err(TypeMismatchSnafu {
|
||||
expected: aggr_fn.signature().input,
|
||||
actual: value.data_type(),
|
||||
}
|
||||
.build());
|
||||
} else if !check_type_aggr_fn_and_self_val {
|
||||
return Err(TypeMismatchSnafu {
|
||||
expected: aggr_fn.signature().input,
|
||||
actual: self
|
||||
.val
|
||||
.as_ref()
|
||||
.map(|v| v.data_type())
|
||||
.unwrap_or(ConcreteDataType::null_datatype()),
|
||||
}
|
||||
.build());
|
||||
}
|
||||
|
||||
let is_null = value.is_null();
|
||||
if is_null {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if !is_null {
|
||||
// compile count(*) to count(true) to include null/non-nulls
|
||||
// And the counts of non-null values are updated here
|
||||
self.non_nulls += diff;
|
||||
|
||||
match aggr_fn.signature().generic_fn {
|
||||
GenericFn::Max => {
|
||||
self.val = self
|
||||
.val
|
||||
.clone()
|
||||
.map(|v| v.max(value.clone()))
|
||||
.or_else(|| Some(value))
|
||||
}
|
||||
GenericFn::Min => {
|
||||
self.val = self
|
||||
.val
|
||||
.clone()
|
||||
.map(|v| v.min(value.clone()))
|
||||
.or_else(|| Some(value))
|
||||
}
|
||||
|
||||
GenericFn::Count => (),
|
||||
_ => unreachable!("already checked by ensure!"),
|
||||
}
|
||||
};
|
||||
// min/max ignore nulls
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn eval(&self, aggr_fn: &AggregateFunc) -> Result<Value, EvalError> {
|
||||
if aggr_fn.is_max() || aggr_fn.is_min() {
|
||||
Ok(self.val.clone().unwrap_or(Value::Null))
|
||||
} else if matches!(aggr_fn, AggregateFunc::Count) {
|
||||
Ok(self.non_nulls.into())
|
||||
} else {
|
||||
Err(InternalSnafu {
|
||||
reason: format!(
|
||||
"OrdValue Accumulator does not support this aggregation function: {:?}",
|
||||
aggr_fn
|
||||
),
|
||||
}
|
||||
.build())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Accumulates values for the various types of accumulable aggregations.
|
||||
///
|
||||
/// We assume that there are not more than 2^32 elements for the aggregation.
|
||||
@@ -38,34 +534,407 @@ use crate::repr::Diff;
|
||||
/// The float accumulator performs accumulation with tolerance for floating point error.
|
||||
///
|
||||
/// TODO(discord9): check for overflowing
|
||||
#[enum_dispatch(Accumulator)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
|
||||
pub enum Accum {
|
||||
/// Accumulates boolean values.
|
||||
Bool {
|
||||
/// The number of `true` values observed.
|
||||
trues: Diff,
|
||||
/// The number of `false` values observed.
|
||||
falses: Diff,
|
||||
},
|
||||
Bool(Bool),
|
||||
/// Accumulates simple numeric values.
|
||||
SimpleNumber {
|
||||
/// The accumulation of all non-NULL values observed.
|
||||
accum: i128,
|
||||
/// The number of non-NULL values observed.
|
||||
non_nulls: Diff,
|
||||
},
|
||||
SimpleNumber(SimpleNumber),
|
||||
/// Accumulates float values.
|
||||
Float {
|
||||
/// Accumulates non-special float values, i.e. not NaN, +inf, -inf.
|
||||
/// accum will be set to zero if `non_nulls` is zero.
|
||||
accum: OrderedF64,
|
||||
/// Counts +inf
|
||||
pos_infs: Diff,
|
||||
/// Counts -inf
|
||||
neg_infs: Diff,
|
||||
/// Counts NaNs
|
||||
nans: Diff,
|
||||
/// Counts non-NULL values
|
||||
non_nulls: Diff,
|
||||
},
|
||||
Float(Float),
|
||||
/// Accumulate Values that impl `Ord`
|
||||
OrdValue(OrdValue),
|
||||
}
|
||||
|
||||
impl Accum {
|
||||
pub fn new_accum(aggr_fn: &AggregateFunc) -> Result<Self, EvalError> {
|
||||
Ok(match aggr_fn {
|
||||
AggregateFunc::Any
|
||||
| AggregateFunc::All
|
||||
| AggregateFunc::MaxBool
|
||||
| AggregateFunc::MinBool => Self::from(Bool {
|
||||
trues: 0,
|
||||
falses: 0,
|
||||
}),
|
||||
AggregateFunc::SumInt16
|
||||
| AggregateFunc::SumInt32
|
||||
| AggregateFunc::SumInt64
|
||||
| AggregateFunc::SumUInt16
|
||||
| AggregateFunc::SumUInt32
|
||||
| AggregateFunc::SumUInt64 => Self::from(SimpleNumber {
|
||||
accum: 0,
|
||||
non_nulls: 0,
|
||||
}),
|
||||
AggregateFunc::SumFloat32 | AggregateFunc::SumFloat64 => Self::from(Float {
|
||||
accum: OrderedF64::from(0.0),
|
||||
pos_infs: 0,
|
||||
neg_infs: 0,
|
||||
nans: 0,
|
||||
non_nulls: 0,
|
||||
}),
|
||||
f if f.is_max() || f.is_min() || matches!(f, AggregateFunc::Count) => {
|
||||
Self::from(OrdValue {
|
||||
val: None,
|
||||
non_nulls: 0,
|
||||
})
|
||||
}
|
||||
f => {
|
||||
return Err(InternalSnafu {
|
||||
reason: format!(
|
||||
"Accumulator does not support this aggregation function: {:?}",
|
||||
f
|
||||
),
|
||||
}
|
||||
.build());
|
||||
}
|
||||
})
|
||||
}
|
||||
pub fn try_into_accum(aggr_fn: &AggregateFunc, state: Vec<Value>) -> Result<Self, EvalError> {
|
||||
match aggr_fn {
|
||||
AggregateFunc::Any
|
||||
| AggregateFunc::All
|
||||
| AggregateFunc::MaxBool
|
||||
| AggregateFunc::MinBool => Ok(Self::from(Bool::try_from(state)?)),
|
||||
AggregateFunc::SumInt16
|
||||
| AggregateFunc::SumInt32
|
||||
| AggregateFunc::SumInt64
|
||||
| AggregateFunc::SumUInt16
|
||||
| AggregateFunc::SumUInt32
|
||||
| AggregateFunc::SumUInt64 => Ok(Self::from(SimpleNumber::try_from(state)?)),
|
||||
AggregateFunc::SumFloat32 | AggregateFunc::SumFloat64 => {
|
||||
Ok(Self::from(Float::try_from(state)?))
|
||||
}
|
||||
f if f.is_max() || f.is_min() || matches!(f, AggregateFunc::Count) => {
|
||||
Ok(Self::from(OrdValue::try_from(state)?))
|
||||
}
|
||||
f => Err(InternalSnafu {
|
||||
reason: format!(
|
||||
"Accumulator does not support this aggregation function: {:?}",
|
||||
f
|
||||
),
|
||||
}
|
||||
.build()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn err_try_from_val<T: Display>(reason: T) -> EvalError {
|
||||
TryFromValueSnafu {
|
||||
msg: reason.to_string(),
|
||||
}
|
||||
.build()
|
||||
}
|
||||
|
||||
/// compare type while ignore their precision, including `TimeStamp`, `Time`,
|
||||
/// `Duration`, `Interval`
|
||||
fn ty_eq_without_precision(left: ConcreteDataType, right: ConcreteDataType) -> bool {
|
||||
left == right
|
||||
|| matches!(left, ConcreteDataType::Timestamp(..))
|
||||
&& matches!(right, ConcreteDataType::Timestamp(..))
|
||||
|| matches!(left, ConcreteDataType::Time(..)) && matches!(right, ConcreteDataType::Time(..))
|
||||
|| matches!(left, ConcreteDataType::Duration(..))
|
||||
&& matches!(right, ConcreteDataType::Duration(..))
|
||||
|| matches!(left, ConcreteDataType::Interval(..))
|
||||
&& matches!(right, ConcreteDataType::Interval(..))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
#[test]
|
||||
fn test_accum() {
|
||||
let testcases = vec![
|
||||
(
|
||||
AggregateFunc::SumInt32,
|
||||
vec![(Value::Int32(1), 1), (Value::Null, 1)],
|
||||
(
|
||||
Value::Int64(1),
|
||||
vec![Value::Decimal128(Decimal128::new(1, 38, 0)), 1i64.into()],
|
||||
),
|
||||
),
|
||||
(
|
||||
AggregateFunc::SumFloat32,
|
||||
vec![(Value::Float32(OrderedF32::from(1.0)), 1), (Value::Null, 1)],
|
||||
(
|
||||
Value::Float32(OrderedF32::from(1.0)),
|
||||
vec![
|
||||
Value::Float64(OrderedF64::from(1.0)),
|
||||
0i64.into(),
|
||||
0i64.into(),
|
||||
0i64.into(),
|
||||
1i64.into(),
|
||||
],
|
||||
),
|
||||
),
|
||||
(
|
||||
AggregateFunc::MaxInt32,
|
||||
vec![(Value::Int32(1), 1), (Value::Int32(2), 1), (Value::Null, 1)],
|
||||
(Value::Int32(2), vec![Value::Int32(2), 2i64.into()]),
|
||||
),
|
||||
(
|
||||
AggregateFunc::MinInt32,
|
||||
vec![(Value::Int32(2), 1), (Value::Int32(1), 1), (Value::Null, 1)],
|
||||
(Value::Int32(1), vec![Value::Int32(1), 2i64.into()]),
|
||||
),
|
||||
(
|
||||
AggregateFunc::MaxFloat32,
|
||||
vec![
|
||||
(Value::Float32(OrderedF32::from(1.0)), 1),
|
||||
(Value::Float32(OrderedF32::from(2.0)), 1),
|
||||
(Value::Null, 1),
|
||||
],
|
||||
(
|
||||
Value::Float32(OrderedF32::from(2.0)),
|
||||
vec![Value::Float32(OrderedF32::from(2.0)), 2i64.into()],
|
||||
),
|
||||
),
|
||||
(
|
||||
AggregateFunc::MaxDateTime,
|
||||
vec![
|
||||
(Value::DateTime(DateTime::from(0)), 1),
|
||||
(Value::DateTime(DateTime::from(1)), 1),
|
||||
(Value::Null, 1),
|
||||
],
|
||||
(
|
||||
Value::DateTime(DateTime::from(1)),
|
||||
vec![Value::DateTime(DateTime::from(1)), 2i64.into()],
|
||||
),
|
||||
),
|
||||
(
|
||||
AggregateFunc::Count,
|
||||
vec![
|
||||
(Value::Int32(1), 1),
|
||||
(Value::Int32(2), 1),
|
||||
(Value::Null, 1),
|
||||
(Value::Null, 1),
|
||||
],
|
||||
(2i64.into(), vec![Value::Null, 2i64.into()]),
|
||||
),
|
||||
(
|
||||
AggregateFunc::Any,
|
||||
vec![
|
||||
(Value::Boolean(false), 1),
|
||||
(Value::Boolean(false), 1),
|
||||
(Value::Boolean(true), 1),
|
||||
(Value::Null, 1),
|
||||
],
|
||||
(
|
||||
Value::Boolean(true),
|
||||
vec![Value::from(1i64), Value::from(2i64)],
|
||||
),
|
||||
),
|
||||
(
|
||||
AggregateFunc::All,
|
||||
vec![
|
||||
(Value::Boolean(false), 1),
|
||||
(Value::Boolean(false), 1),
|
||||
(Value::Boolean(true), 1),
|
||||
(Value::Null, 1),
|
||||
],
|
||||
(
|
||||
Value::Boolean(false),
|
||||
vec![Value::from(1i64), Value::from(2i64)],
|
||||
),
|
||||
),
|
||||
(
|
||||
AggregateFunc::MaxBool,
|
||||
vec![
|
||||
(Value::Boolean(false), 1),
|
||||
(Value::Boolean(false), 1),
|
||||
(Value::Boolean(true), 1),
|
||||
(Value::Null, 1),
|
||||
],
|
||||
(
|
||||
Value::Boolean(true),
|
||||
vec![Value::from(1i64), Value::from(2i64)],
|
||||
),
|
||||
),
|
||||
(
|
||||
AggregateFunc::MinBool,
|
||||
vec![
|
||||
(Value::Boolean(false), 1),
|
||||
(Value::Boolean(false), 1),
|
||||
(Value::Boolean(true), 1),
|
||||
(Value::Null, 1),
|
||||
],
|
||||
(
|
||||
Value::Boolean(false),
|
||||
vec![Value::from(1i64), Value::from(2i64)],
|
||||
),
|
||||
),
|
||||
];
|
||||
|
||||
for (aggr_fn, input, (eval_res, state)) in testcases {
|
||||
let create_and_insert = || -> Result<Accum, EvalError> {
|
||||
let mut acc = Accum::new_accum(&aggr_fn)?;
|
||||
acc.update_batch(&aggr_fn, input.clone())?;
|
||||
let row = acc.into_state();
|
||||
let acc = Accum::try_into_accum(&aggr_fn, row)?;
|
||||
Ok(acc)
|
||||
};
|
||||
let acc = match create_and_insert() {
|
||||
Ok(acc) => acc,
|
||||
Err(err) => panic!(
|
||||
"Failed to create accum for {:?} with input {:?} with error: {:?}",
|
||||
aggr_fn, input, err
|
||||
),
|
||||
};
|
||||
|
||||
if acc.eval(&aggr_fn).unwrap() != eval_res {
|
||||
panic!(
|
||||
"Failed to eval accum for {:?} with input {:?}, expect {:?}, got {:?}",
|
||||
aggr_fn,
|
||||
input,
|
||||
eval_res,
|
||||
acc.eval(&aggr_fn).unwrap()
|
||||
);
|
||||
}
|
||||
let actual_state = acc.into_state();
|
||||
if actual_state != state {
|
||||
panic!(
|
||||
"Failed to cast into state from accum for {:?} with input {:?}, expect state {:?}, got state {:?}",
|
||||
aggr_fn,
|
||||
input,
|
||||
state,
|
||||
actual_state
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
#[test]
|
||||
fn test_fail_path_accum() {
|
||||
{
|
||||
let bool_accum = Bool::try_from(vec![Value::Null]);
|
||||
assert!(matches!(bool_accum, Err(EvalError::Internal { .. })));
|
||||
}
|
||||
|
||||
{
|
||||
let mut bool_accum = Bool::try_from(vec![1i64.into(), 1i64.into()]).unwrap();
|
||||
// serde
|
||||
let bool_accum_serde = serde_json::to_string(&bool_accum).unwrap();
|
||||
let bool_accum_de = serde_json::from_str::<Bool>(&bool_accum_serde).unwrap();
|
||||
assert_eq!(bool_accum, bool_accum_de);
|
||||
assert!(matches!(
|
||||
bool_accum.update(&AggregateFunc::MaxDate, 1.into(), 1),
|
||||
Err(EvalError::Internal { .. })
|
||||
));
|
||||
assert!(matches!(
|
||||
bool_accum.update(&AggregateFunc::Any, 1.into(), 1),
|
||||
Err(EvalError::TypeMismatch { .. })
|
||||
));
|
||||
assert!(matches!(
|
||||
bool_accum.eval(&AggregateFunc::MaxDate),
|
||||
Err(EvalError::Internal { .. })
|
||||
));
|
||||
}
|
||||
|
||||
{
|
||||
let ret = SimpleNumber::try_from(vec![Value::Null]);
|
||||
assert!(matches!(ret, Err(EvalError::Internal { .. })));
|
||||
let mut accum =
|
||||
SimpleNumber::try_from(vec![Decimal128::new(0, 38, 0).into(), 0i64.into()])
|
||||
.unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
accum.update(&AggregateFunc::All, 0.into(), 1),
|
||||
Err(EvalError::Internal { .. })
|
||||
));
|
||||
assert!(matches!(
|
||||
accum.update(&AggregateFunc::SumInt64, 0i32.into(), 1),
|
||||
Err(EvalError::TypeMismatch { .. })
|
||||
));
|
||||
assert!(matches!(
|
||||
accum.eval(&AggregateFunc::All),
|
||||
Err(EvalError::Internal { .. })
|
||||
));
|
||||
accum
|
||||
.update(&AggregateFunc::SumInt64, 1i64.into(), 1)
|
||||
.unwrap();
|
||||
accum
|
||||
.update(&AggregateFunc::SumInt64, i64::MAX.into(), 1)
|
||||
.unwrap();
|
||||
assert!(matches!(
|
||||
accum.eval(&AggregateFunc::SumInt64),
|
||||
Err(EvalError::Overflow { .. })
|
||||
));
|
||||
}
|
||||
|
||||
{
|
||||
let ret = Float::try_from(vec![2f64.into(), 0i64.into(), 0i64.into(), 0i64.into()]);
|
||||
assert!(matches!(ret, Err(EvalError::Internal { .. })));
|
||||
let mut accum = Float::try_from(vec![
|
||||
2f64.into(),
|
||||
0i64.into(),
|
||||
0i64.into(),
|
||||
0i64.into(),
|
||||
1i64.into(),
|
||||
])
|
||||
.unwrap();
|
||||
accum
|
||||
.update(&AggregateFunc::SumFloat64, 2f64.into(), -1)
|
||||
.unwrap();
|
||||
assert!(matches!(
|
||||
accum.update(&AggregateFunc::All, 0.into(), 1),
|
||||
Err(EvalError::Internal { .. })
|
||||
));
|
||||
assert!(matches!(
|
||||
accum.update(&AggregateFunc::SumFloat64, 0.0f32.into(), 1),
|
||||
Err(EvalError::TypeMismatch { .. })
|
||||
));
|
||||
// no record, no accum
|
||||
assert_eq!(
|
||||
accum.eval(&AggregateFunc::SumFloat64).unwrap(),
|
||||
0.0f64.into()
|
||||
);
|
||||
|
||||
assert!(matches!(
|
||||
accum.eval(&AggregateFunc::All),
|
||||
Err(EvalError::Internal { .. })
|
||||
));
|
||||
|
||||
accum
|
||||
.update(&AggregateFunc::SumFloat64, f64::INFINITY.into(), 1)
|
||||
.unwrap();
|
||||
accum
|
||||
.update(&AggregateFunc::SumFloat64, (-f64::INFINITY).into(), 1)
|
||||
.unwrap();
|
||||
accum
|
||||
.update(&AggregateFunc::SumFloat64, f64::NAN.into(), 1)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
{
|
||||
let ret = OrdValue::try_from(vec![Value::Null]);
|
||||
assert!(matches!(ret, Err(EvalError::Internal { .. })));
|
||||
let mut accum = OrdValue::try_from(vec![Value::Null, 0i64.into()]).unwrap();
|
||||
assert!(matches!(
|
||||
accum.update(&AggregateFunc::All, 0.into(), 1),
|
||||
Err(EvalError::Internal { .. })
|
||||
));
|
||||
accum
|
||||
.update(&AggregateFunc::MaxInt16, 1i16.into(), 1)
|
||||
.unwrap();
|
||||
assert!(matches!(
|
||||
accum.update(&AggregateFunc::MaxInt16, 0i32.into(), 1),
|
||||
Err(EvalError::TypeMismatch { .. })
|
||||
));
|
||||
assert!(matches!(
|
||||
accum.update(&AggregateFunc::MaxInt16, 0i16.into(), -1),
|
||||
Err(EvalError::Internal { .. })
|
||||
));
|
||||
accum
|
||||
.update(&AggregateFunc::MaxInt16, Value::Null, 1)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
// insert uint64 into max_int64 should fail
|
||||
{
|
||||
let mut accum = OrdValue::try_from(vec![Value::Null, 0i64.into()]).unwrap();
|
||||
assert!(matches!(
|
||||
accum.update(&AggregateFunc::MaxInt64, 0u64.into(), 1),
|
||||
Err(EvalError::TypeMismatch { .. })
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,15 +12,13 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::type_name;
|
||||
|
||||
use common_time::{Date, DateTime};
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::value::{OrderedF32, OrderedF64, Value};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::expr::error::{EvalError, TryFromValueSnafu, TypeMismatchSnafu};
|
||||
use crate::expr::relation::accum::Accum;
|
||||
use crate::expr::relation::accum::{Accum, Accumulator};
|
||||
use crate::repr::Diff;
|
||||
|
||||
/// Aggregate functions that can be applied to a group of rows.
|
||||
@@ -83,3 +81,280 @@ pub enum AggregateFunc {
|
||||
Any,
|
||||
All,
|
||||
}
|
||||
|
||||
impl AggregateFunc {
|
||||
pub fn is_max(&self) -> bool {
|
||||
self.signature().generic_fn == GenericFn::Max
|
||||
}
|
||||
|
||||
pub fn is_min(&self) -> bool {
|
||||
self.signature().generic_fn == GenericFn::Min
|
||||
}
|
||||
|
||||
pub fn is_sum(&self) -> bool {
|
||||
self.signature().generic_fn == GenericFn::Sum
|
||||
}
|
||||
|
||||
/// Eval value, diff with accumulator
|
||||
///
|
||||
/// Expect self to be accumulable aggregate functio, i.e. sum/count
|
||||
///
|
||||
/// TODO(discord9): deal with overflow&better accumulator
|
||||
pub fn eval_diff_accumulable<I>(
|
||||
&self,
|
||||
accum: Vec<Value>,
|
||||
value_diffs: I,
|
||||
) -> Result<(Value, Vec<Value>), EvalError>
|
||||
where
|
||||
I: IntoIterator<Item = (Value, Diff)>,
|
||||
{
|
||||
let mut accum = if accum.is_empty() {
|
||||
Accum::new_accum(self)?
|
||||
} else {
|
||||
Accum::try_into_accum(self, accum)?
|
||||
};
|
||||
accum.update_batch(self, value_diffs)?;
|
||||
let res = accum.eval(self)?;
|
||||
Ok((res, accum.into_state()))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Signature {
|
||||
pub input: ConcreteDataType,
|
||||
pub output: ConcreteDataType,
|
||||
pub generic_fn: GenericFn,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum GenericFn {
|
||||
Max,
|
||||
Min,
|
||||
Sum,
|
||||
Count,
|
||||
Any,
|
||||
All,
|
||||
}
|
||||
|
||||
impl AggregateFunc {
|
||||
/// all concrete datatypes with precision types will be returned with largest possible variant
|
||||
/// as a exception, count have a signature of `null -> i64`, but it's actually `anytype -> i64`
|
||||
pub fn signature(&self) -> Signature {
|
||||
match self {
|
||||
AggregateFunc::MaxInt16 => Signature {
|
||||
input: ConcreteDataType::int16_datatype(),
|
||||
output: ConcreteDataType::int16_datatype(),
|
||||
generic_fn: GenericFn::Max,
|
||||
},
|
||||
AggregateFunc::MaxInt32 => Signature {
|
||||
input: ConcreteDataType::int32_datatype(),
|
||||
output: ConcreteDataType::int32_datatype(),
|
||||
generic_fn: GenericFn::Max,
|
||||
},
|
||||
AggregateFunc::MaxInt64 => Signature {
|
||||
input: ConcreteDataType::int64_datatype(),
|
||||
output: ConcreteDataType::int64_datatype(),
|
||||
generic_fn: GenericFn::Max,
|
||||
},
|
||||
AggregateFunc::MaxUInt16 => Signature {
|
||||
input: ConcreteDataType::uint16_datatype(),
|
||||
output: ConcreteDataType::uint16_datatype(),
|
||||
generic_fn: GenericFn::Max,
|
||||
},
|
||||
AggregateFunc::MaxUInt32 => Signature {
|
||||
input: ConcreteDataType::uint32_datatype(),
|
||||
output: ConcreteDataType::uint32_datatype(),
|
||||
generic_fn: GenericFn::Max,
|
||||
},
|
||||
AggregateFunc::MaxUInt64 => Signature {
|
||||
input: ConcreteDataType::uint64_datatype(),
|
||||
output: ConcreteDataType::uint64_datatype(),
|
||||
generic_fn: GenericFn::Max,
|
||||
},
|
||||
AggregateFunc::MaxFloat32 => Signature {
|
||||
input: ConcreteDataType::float32_datatype(),
|
||||
output: ConcreteDataType::float32_datatype(),
|
||||
generic_fn: GenericFn::Max,
|
||||
},
|
||||
AggregateFunc::MaxFloat64 => Signature {
|
||||
input: ConcreteDataType::float64_datatype(),
|
||||
output: ConcreteDataType::float64_datatype(),
|
||||
generic_fn: GenericFn::Max,
|
||||
},
|
||||
AggregateFunc::MaxBool => Signature {
|
||||
input: ConcreteDataType::boolean_datatype(),
|
||||
output: ConcreteDataType::boolean_datatype(),
|
||||
generic_fn: GenericFn::Max,
|
||||
},
|
||||
AggregateFunc::MaxString => Signature {
|
||||
input: ConcreteDataType::string_datatype(),
|
||||
output: ConcreteDataType::string_datatype(),
|
||||
generic_fn: GenericFn::Max,
|
||||
},
|
||||
AggregateFunc::MaxDate => Signature {
|
||||
input: ConcreteDataType::date_datatype(),
|
||||
output: ConcreteDataType::date_datatype(),
|
||||
generic_fn: GenericFn::Max,
|
||||
},
|
||||
AggregateFunc::MaxDateTime => Signature {
|
||||
input: ConcreteDataType::datetime_datatype(),
|
||||
output: ConcreteDataType::datetime_datatype(),
|
||||
generic_fn: GenericFn::Max,
|
||||
},
|
||||
AggregateFunc::MaxTimestamp => Signature {
|
||||
input: ConcreteDataType::timestamp_second_datatype(),
|
||||
output: ConcreteDataType::timestamp_second_datatype(),
|
||||
generic_fn: GenericFn::Max,
|
||||
},
|
||||
AggregateFunc::MaxTime => Signature {
|
||||
input: ConcreteDataType::time_second_datatype(),
|
||||
output: ConcreteDataType::time_second_datatype(),
|
||||
generic_fn: GenericFn::Max,
|
||||
},
|
||||
AggregateFunc::MaxDuration => Signature {
|
||||
input: ConcreteDataType::duration_second_datatype(),
|
||||
output: ConcreteDataType::duration_second_datatype(),
|
||||
generic_fn: GenericFn::Max,
|
||||
},
|
||||
AggregateFunc::MaxInterval => Signature {
|
||||
input: ConcreteDataType::interval_year_month_datatype(),
|
||||
output: ConcreteDataType::interval_year_month_datatype(),
|
||||
generic_fn: GenericFn::Max,
|
||||
},
|
||||
AggregateFunc::MinInt16 => Signature {
|
||||
input: ConcreteDataType::int16_datatype(),
|
||||
output: ConcreteDataType::int16_datatype(),
|
||||
generic_fn: GenericFn::Min,
|
||||
},
|
||||
AggregateFunc::MinInt32 => Signature {
|
||||
input: ConcreteDataType::int32_datatype(),
|
||||
output: ConcreteDataType::int32_datatype(),
|
||||
generic_fn: GenericFn::Min,
|
||||
},
|
||||
AggregateFunc::MinInt64 => Signature {
|
||||
input: ConcreteDataType::int64_datatype(),
|
||||
output: ConcreteDataType::int64_datatype(),
|
||||
generic_fn: GenericFn::Min,
|
||||
},
|
||||
AggregateFunc::MinUInt16 => Signature {
|
||||
input: ConcreteDataType::uint16_datatype(),
|
||||
output: ConcreteDataType::uint16_datatype(),
|
||||
generic_fn: GenericFn::Min,
|
||||
},
|
||||
AggregateFunc::MinUInt32 => Signature {
|
||||
input: ConcreteDataType::uint32_datatype(),
|
||||
output: ConcreteDataType::uint32_datatype(),
|
||||
generic_fn: GenericFn::Min,
|
||||
},
|
||||
AggregateFunc::MinUInt64 => Signature {
|
||||
input: ConcreteDataType::uint64_datatype(),
|
||||
output: ConcreteDataType::uint64_datatype(),
|
||||
generic_fn: GenericFn::Min,
|
||||
},
|
||||
AggregateFunc::MinFloat32 => Signature {
|
||||
input: ConcreteDataType::float32_datatype(),
|
||||
output: ConcreteDataType::float32_datatype(),
|
||||
generic_fn: GenericFn::Min,
|
||||
},
|
||||
AggregateFunc::MinFloat64 => Signature {
|
||||
input: ConcreteDataType::float64_datatype(),
|
||||
output: ConcreteDataType::float64_datatype(),
|
||||
generic_fn: GenericFn::Min,
|
||||
},
|
||||
AggregateFunc::MinBool => Signature {
|
||||
input: ConcreteDataType::boolean_datatype(),
|
||||
output: ConcreteDataType::boolean_datatype(),
|
||||
generic_fn: GenericFn::Min,
|
||||
},
|
||||
AggregateFunc::MinString => Signature {
|
||||
input: ConcreteDataType::string_datatype(),
|
||||
output: ConcreteDataType::string_datatype(),
|
||||
generic_fn: GenericFn::Min,
|
||||
},
|
||||
AggregateFunc::MinDate => Signature {
|
||||
input: ConcreteDataType::date_datatype(),
|
||||
output: ConcreteDataType::date_datatype(),
|
||||
generic_fn: GenericFn::Min,
|
||||
},
|
||||
AggregateFunc::MinDateTime => Signature {
|
||||
input: ConcreteDataType::datetime_datatype(),
|
||||
output: ConcreteDataType::datetime_datatype(),
|
||||
generic_fn: GenericFn::Min,
|
||||
},
|
||||
AggregateFunc::MinTimestamp => Signature {
|
||||
input: ConcreteDataType::timestamp_second_datatype(),
|
||||
output: ConcreteDataType::timestamp_second_datatype(),
|
||||
generic_fn: GenericFn::Min,
|
||||
},
|
||||
AggregateFunc::MinTime => Signature {
|
||||
input: ConcreteDataType::time_second_datatype(),
|
||||
output: ConcreteDataType::time_second_datatype(),
|
||||
generic_fn: GenericFn::Min,
|
||||
},
|
||||
AggregateFunc::MinDuration => Signature {
|
||||
input: ConcreteDataType::duration_second_datatype(),
|
||||
output: ConcreteDataType::duration_second_datatype(),
|
||||
generic_fn: GenericFn::Min,
|
||||
},
|
||||
AggregateFunc::MinInterval => Signature {
|
||||
input: ConcreteDataType::interval_year_month_datatype(),
|
||||
output: ConcreteDataType::interval_year_month_datatype(),
|
||||
generic_fn: GenericFn::Min,
|
||||
},
|
||||
AggregateFunc::SumInt16 => Signature {
|
||||
input: ConcreteDataType::int16_datatype(),
|
||||
output: ConcreteDataType::int16_datatype(),
|
||||
generic_fn: GenericFn::Sum,
|
||||
},
|
||||
AggregateFunc::SumInt32 => Signature {
|
||||
input: ConcreteDataType::int32_datatype(),
|
||||
output: ConcreteDataType::int32_datatype(),
|
||||
generic_fn: GenericFn::Sum,
|
||||
},
|
||||
AggregateFunc::SumInt64 => Signature {
|
||||
input: ConcreteDataType::int64_datatype(),
|
||||
output: ConcreteDataType::int64_datatype(),
|
||||
generic_fn: GenericFn::Sum,
|
||||
},
|
||||
AggregateFunc::SumUInt16 => Signature {
|
||||
input: ConcreteDataType::uint16_datatype(),
|
||||
output: ConcreteDataType::uint16_datatype(),
|
||||
generic_fn: GenericFn::Sum,
|
||||
},
|
||||
AggregateFunc::SumUInt32 => Signature {
|
||||
input: ConcreteDataType::uint32_datatype(),
|
||||
output: ConcreteDataType::uint32_datatype(),
|
||||
generic_fn: GenericFn::Sum,
|
||||
},
|
||||
AggregateFunc::SumUInt64 => Signature {
|
||||
input: ConcreteDataType::uint64_datatype(),
|
||||
output: ConcreteDataType::uint64_datatype(),
|
||||
generic_fn: GenericFn::Sum,
|
||||
},
|
||||
AggregateFunc::SumFloat32 => Signature {
|
||||
input: ConcreteDataType::float32_datatype(),
|
||||
output: ConcreteDataType::float32_datatype(),
|
||||
generic_fn: GenericFn::Sum,
|
||||
},
|
||||
AggregateFunc::SumFloat64 => Signature {
|
||||
input: ConcreteDataType::float64_datatype(),
|
||||
output: ConcreteDataType::float64_datatype(),
|
||||
generic_fn: GenericFn::Sum,
|
||||
},
|
||||
AggregateFunc::Count => Signature {
|
||||
input: ConcreteDataType::null_datatype(),
|
||||
output: ConcreteDataType::int64_datatype(),
|
||||
generic_fn: GenericFn::Count,
|
||||
},
|
||||
AggregateFunc::Any => Signature {
|
||||
input: ConcreteDataType::boolean_datatype(),
|
||||
output: ConcreteDataType::boolean_datatype(),
|
||||
generic_fn: GenericFn::Any,
|
||||
},
|
||||
AggregateFunc::All => Signature {
|
||||
input: ConcreteDataType::boolean_datatype(),
|
||||
output: ConcreteDataType::boolean_datatype(),
|
||||
generic_fn: GenericFn::All,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,4 +17,5 @@
|
||||
// allow unused for now because it should be use later
|
||||
mod adapter;
|
||||
mod expr;
|
||||
mod plan;
|
||||
mod repr;
|
||||
|
||||
98
src/flow/src/plan.rs
Normal file
98
src/flow/src/plan.rs
Normal file
@@ -0,0 +1,98 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! This module contain basic definition for dataflow's plan
|
||||
//! that can be translate to hydro dataflow
|
||||
|
||||
mod join;
|
||||
mod reduce;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
pub(crate) use self::reduce::{AccumulablePlan, KeyValPlan, ReducePlan};
|
||||
use crate::expr::{
|
||||
AggregateExpr, EvalError, Id, LocalId, MapFilterProject, SafeMfpPlan, ScalarExpr,
|
||||
};
|
||||
use crate::plan::join::JoinPlan;
|
||||
use crate::repr::{DiffRow, RelationType};
|
||||
|
||||
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Deserialize, Serialize)]
|
||||
pub struct TypedPlan {
|
||||
/// output type of the relation
|
||||
pub typ: RelationType,
|
||||
pub plan: Plan,
|
||||
}
|
||||
|
||||
/// TODO(discord9): support `TableFunc`(by define FlatMap that map 1 to n)
|
||||
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Deserialize, Serialize)]
|
||||
pub enum Plan {
|
||||
/// A constant collection of rows.
|
||||
Constant { rows: Vec<DiffRow> },
|
||||
/// Get CDC data from an source, be it external reference to an existing source or an internal
|
||||
/// reference to a `Let` identifier
|
||||
Get { id: Id },
|
||||
/// Create a temporary collection from given `value``, and make this bind only available
|
||||
/// in scope of `body`
|
||||
Let {
|
||||
id: LocalId,
|
||||
value: Box<Plan>,
|
||||
body: Box<Plan>,
|
||||
},
|
||||
/// Map, Filter, and Project operators.
|
||||
Mfp {
|
||||
/// The input collection.
|
||||
input: Box<Plan>,
|
||||
/// Linear operator to apply to each record.
|
||||
mfp: MapFilterProject,
|
||||
},
|
||||
/// Reduce operator, aggregation by key assembled from KeyValPlan
|
||||
Reduce {
|
||||
/// The input collection.
|
||||
input: Box<Plan>,
|
||||
/// A plan for changing input records into key, value pairs.
|
||||
key_val_plan: KeyValPlan,
|
||||
/// A plan for performing the reduce.
|
||||
///
|
||||
/// The implementation of reduction has several different strategies based
|
||||
/// on the properties of the reduction, and the input itself.
|
||||
reduce_plan: ReducePlan,
|
||||
},
|
||||
/// A multiway relational equijoin, with fused map, filter, and projection.
|
||||
///
|
||||
/// This stage performs a multiway join among `inputs`, using the equality
|
||||
/// constraints expressed in `plan`. The plan also describes the implementation
|
||||
/// strategy we will use, and any pushed down per-record work.
|
||||
Join {
|
||||
/// An ordered list of inputs that will be joined.
|
||||
inputs: Vec<Plan>,
|
||||
/// Detailed information about the implementation of the join.
|
||||
///
|
||||
/// This includes information about the implementation strategy, but also
|
||||
/// any map, filter, project work that we might follow the join with, but
|
||||
/// potentially pushed down into the implementation of the join.
|
||||
plan: JoinPlan,
|
||||
},
|
||||
/// Adds the contents of the input collections.
|
||||
///
|
||||
/// Importantly, this is *multiset* union, so the multiplicities of records will
|
||||
/// add. This is in contrast to *set* union, where the multiplicities would be
|
||||
/// capped at one. A set union can be formed with `Union` followed by `Reduce`
|
||||
/// implementing the "distinct" operator.
|
||||
Union {
|
||||
/// The input collections
|
||||
inputs: Vec<Plan>,
|
||||
/// Whether to consolidate the output, e.g., cancel negated records.
|
||||
consolidate_output: bool,
|
||||
},
|
||||
}
|
||||
78
src/flow/src/plan/join.rs
Normal file
78
src/flow/src/plan/join.rs
Normal file
@@ -0,0 +1,78 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::expr::ScalarExpr;
|
||||
use crate::plan::SafeMfpPlan;
|
||||
|
||||
/// TODO(discord9): consider impl more join strategies
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq, Ord, PartialOrd)]
|
||||
pub enum JoinPlan {
|
||||
Linear(LinearJoinPlan),
|
||||
}
|
||||
|
||||
/// Determine if a given row should stay in the output. And apply a map filter project before output the row
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq, Ord, PartialOrd)]
|
||||
pub struct JoinFilter {
|
||||
/// each element in the outer vector will check if each expr in itself can be eval to same value
|
||||
/// if not, the row will be filtered out. Useful for equi-join(join based on equality of some columns)
|
||||
pub ready_equivalences: Vec<Vec<ScalarExpr>>,
|
||||
/// Apply a map filter project before output the row
|
||||
pub before: SafeMfpPlan,
|
||||
}
|
||||
|
||||
/// A plan for the execution of a linear join.
|
||||
///
|
||||
/// A linear join is a sequence of stages, each of which introduces
|
||||
/// a new collection. Each stage is represented by a [LinearStagePlan].
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq, Ord, PartialOrd)]
|
||||
pub struct LinearJoinPlan {
|
||||
/// The source relation from which we start the join.
|
||||
pub source_relation: usize,
|
||||
/// The arrangement to use for the source relation, if any
|
||||
pub source_key: Option<Vec<ScalarExpr>>,
|
||||
/// An initial closure to apply before any stages.
|
||||
///
|
||||
/// Values of `None` indicate the identity closure.
|
||||
pub initial_closure: Option<JoinFilter>,
|
||||
/// A *sequence* of stages to apply one after the other.
|
||||
pub stage_plans: Vec<LinearStagePlan>,
|
||||
/// A concluding filter to apply after the last stage.
|
||||
///
|
||||
/// Values of `None` indicate the identity closure.
|
||||
pub final_closure: Option<JoinFilter>,
|
||||
}
|
||||
|
||||
/// A plan for the execution of one stage of a linear join.
|
||||
///
|
||||
/// Each stage is a binary join between the current accumulated
|
||||
/// join results, and a new collection. The former is referred to
|
||||
/// as the "stream" and the latter the "lookup".
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq, Ord, PartialOrd)]
|
||||
pub struct LinearStagePlan {
|
||||
/// The index of the relation into which we will look up.
|
||||
pub lookup_relation: usize,
|
||||
/// The key expressions to use for the stream relation.
|
||||
pub stream_key: Vec<ScalarExpr>,
|
||||
/// Columns to retain from the stream relation.
|
||||
/// These columns are those that are not redundant with `stream_key`,
|
||||
/// and cannot be read out of the key component of an arrangement.
|
||||
pub stream_thinning: Vec<usize>,
|
||||
/// The key expressions to use for the lookup relation.
|
||||
pub lookup_key: Vec<ScalarExpr>,
|
||||
/// The closure to apply to the concatenation of the key columns,
|
||||
/// the stream value columns, and the lookup value colunms.
|
||||
pub closure: JoinFilter,
|
||||
}
|
||||
50
src/flow/src/plan/reduce.rs
Normal file
50
src/flow/src/plan/reduce.rs
Normal file
@@ -0,0 +1,50 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::expr::{AggregateExpr, Id, LocalId, MapFilterProject, SafeMfpPlan, ScalarExpr};
|
||||
|
||||
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Deserialize, Serialize)]
|
||||
pub struct KeyValPlan {
|
||||
pub key_plan: SafeMfpPlan,
|
||||
pub val_plan: SafeMfpPlan,
|
||||
}
|
||||
|
||||
/// TODO(discord9): def&impl of Hierarchical aggregates(for min/max with support to deletion) and
|
||||
/// basic aggregates(for other aggregate functions) and mixed aggregate
|
||||
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Deserialize, Serialize)]
|
||||
pub enum ReducePlan {
|
||||
/// Plan for not computing any aggregations, just determining the set of
|
||||
/// distinct keys.
|
||||
Distinct,
|
||||
/// Plan for computing only accumulable aggregations.
|
||||
/// Including simple functions like `sum`, `count`, `min/max`(without deletion)
|
||||
Accumulable(AccumulablePlan),
|
||||
}
|
||||
|
||||
/// Accumulable plan for the execution of a reduction.
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Deserialize, Serialize)]
|
||||
pub struct AccumulablePlan {
|
||||
/// All of the aggregations we were asked to compute, stored
|
||||
/// in order.
|
||||
pub full_aggrs: Vec<AggregateExpr>,
|
||||
/// All of the non-distinct accumulable aggregates.
|
||||
/// Each element represents:
|
||||
/// (index of aggr output, index of value among inputs, aggr expr)
|
||||
/// These will all be rendered together in one dataflow fragment.
|
||||
pub simple_aggrs: Vec<(usize, usize, AggregateExpr)>,
|
||||
/// Same as above but for all of the `DISTINCT` accumulable aggregations.
|
||||
pub distinct_aggrs: Vec<(usize, usize, AggregateExpr)>,
|
||||
}
|
||||
@@ -33,7 +33,10 @@ use snafu::ResultExt;
|
||||
|
||||
use crate::expr::error::{CastValueSnafu, EvalError};
|
||||
|
||||
/// System-wide Record count difference type.
|
||||
/// System-wide Record count difference type. Useful for capture data change
|
||||
///
|
||||
/// i.e. +1 means insert one record, -1 means remove,
|
||||
/// and +/-n means insert/remove multiple duplicate records.
|
||||
pub type Diff = i64;
|
||||
|
||||
/// System-wide default timestamp type
|
||||
|
||||
@@ -28,6 +28,7 @@ use api::v1::meta::Role;
|
||||
use async_trait::async_trait;
|
||||
use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq};
|
||||
use catalog::CatalogManagerRef;
|
||||
use client::OutputData;
|
||||
use common_base::Plugins;
|
||||
use common_config::KvBackendConfig;
|
||||
use common_error::ext::BoxedError;
|
||||
@@ -401,13 +402,13 @@ impl SqlQueryHandler for Instance {
|
||||
|
||||
/// Attaches a timer to the output and observes it once the output is exhausted.
|
||||
pub fn attach_timer(output: Output, timer: HistogramTimer) -> Output {
|
||||
match output {
|
||||
Output::AffectedRows(_) | Output::RecordBatches(_) => output,
|
||||
Output::Stream(stream, plan) => {
|
||||
match output.data {
|
||||
OutputData::AffectedRows(_) | OutputData::RecordBatches(_) => output,
|
||||
OutputData::Stream(stream) => {
|
||||
let stream = OnDone::new(stream, move || {
|
||||
timer.observe_duration();
|
||||
});
|
||||
Output::Stream(Box::pin(stream), plan)
|
||||
Output::new(OutputData::Stream(Box::pin(stream)), output.meta)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -113,7 +113,7 @@ impl GrpcQueryHandler for Instance {
|
||||
.statement_executor
|
||||
.create_table_inner(&mut expr, None, &ctx)
|
||||
.await?;
|
||||
Output::AffectedRows(0)
|
||||
Output::new_with_affected_rows(0)
|
||||
}
|
||||
DdlExpr::Alter(expr) => self.statement_executor.alter_table_inner(expr).await?,
|
||||
DdlExpr::CreateDatabase(expr) => {
|
||||
|
||||
@@ -47,8 +47,8 @@ impl OpentsdbProtocolHandler for Instance {
|
||||
.map_err(BoxedError::new)
|
||||
.context(servers::error::ExecuteGrpcQuerySnafu)?;
|
||||
|
||||
Ok(match output {
|
||||
common_query::Output::AffectedRows(rows) => rows,
|
||||
Ok(match output.data {
|
||||
common_query::OutputData::AffectedRows(rows) => rows,
|
||||
_ => unreachable!(),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -19,6 +19,7 @@ use api::prom_store::remote::{Query, QueryResult, ReadRequest, ReadResponse, Wri
|
||||
use api::v1::RowInsertRequests;
|
||||
use async_trait::async_trait;
|
||||
use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq};
|
||||
use client::OutputData;
|
||||
use common_catalog::format_full_table_name;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_query::prelude::GREPTIME_PHYSICAL_TABLE;
|
||||
@@ -77,7 +78,7 @@ fn negotiate_response_type(accepted_response_types: &[i32]) -> ServerResult<Resp
|
||||
}
|
||||
|
||||
async fn to_query_result(table_name: &str, output: Output) -> ServerResult<QueryResult> {
|
||||
let Output::Stream(stream, _) = output else {
|
||||
let OutputData::Stream(stream) = output.data else {
|
||||
unreachable!()
|
||||
};
|
||||
let recordbatches = RecordBatches::try_collect(stream)
|
||||
|
||||
@@ -152,6 +152,10 @@ impl TxnService for RaftEngineBackend {
|
||||
responses,
|
||||
})
|
||||
}
|
||||
|
||||
fn max_txn_ops(&self) -> usize {
|
||||
usize::MAX
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
|
||||
@@ -24,7 +24,9 @@ fn main() {
|
||||
|
||||
#[tokio::main]
|
||||
async fn run() {
|
||||
let kv_backend = EtcdStore::with_endpoints(["127.0.0.1:2380"]).await.unwrap();
|
||||
let kv_backend = EtcdStore::with_endpoints(["127.0.0.1:2380"], 128)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// put
|
||||
let put_req = PutRequest {
|
||||
|
||||
@@ -193,7 +193,8 @@ pub async fn metasrv_builder(
|
||||
(None, false) => {
|
||||
let etcd_client = create_etcd_client(opts).await?;
|
||||
let kv_backend = {
|
||||
let etcd_backend = EtcdStore::with_etcd_client(etcd_client.clone());
|
||||
let etcd_backend =
|
||||
EtcdStore::with_etcd_client(etcd_client.clone(), opts.max_txn_ops);
|
||||
if !opts.store_key_prefix.is_empty() {
|
||||
Arc::new(ChrootKvBackend::new(
|
||||
opts.store_key_prefix.clone().into_bytes(),
|
||||
|
||||
@@ -79,6 +79,17 @@ pub struct MetaSrvOptions {
|
||||
pub wal: MetaSrvWalConfig,
|
||||
pub export_metrics: ExportMetricsOption,
|
||||
pub store_key_prefix: String,
|
||||
/// The max operations per txn
|
||||
///
|
||||
/// This value is usually limited by which store is used for the `KvBackend`.
|
||||
/// For example, if using etcd, this value should ensure that it is less than
|
||||
/// or equal to the `--max-txn-ops` option value of etcd.
|
||||
///
|
||||
/// TODO(jeremy): Currently, this option only affects the etcd store, but it may
|
||||
/// also affect other stores in the future. In other words, each store needs to
|
||||
/// limit the number of operations in a txn because an infinitely large txn could
|
||||
/// potentially block other operations.
|
||||
pub max_txn_ops: usize,
|
||||
}
|
||||
|
||||
impl MetaSrvOptions {
|
||||
@@ -112,6 +123,7 @@ impl Default for MetaSrvOptions {
|
||||
wal: MetaSrvWalConfig::default(),
|
||||
export_metrics: ExportMetricsOption::default(),
|
||||
store_key_prefix: String::new(),
|
||||
max_txn_ops: 128,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -42,7 +42,7 @@ pub async fn mock_with_memstore() -> MockInfo {
|
||||
}
|
||||
|
||||
pub async fn mock_with_etcdstore(addr: &str) -> MockInfo {
|
||||
let kv_backend = EtcdStore::with_endpoints([addr]).await.unwrap();
|
||||
let kv_backend = EtcdStore::with_endpoints([addr], 128).await.unwrap();
|
||||
mock(Default::default(), kv_backend, None, None).await
|
||||
}
|
||||
|
||||
|
||||
@@ -380,6 +380,10 @@ impl TxnService for LeaderCachedKvBackend {
|
||||
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
fn max_txn_ops(&self) -> usize {
|
||||
self.store.max_txn_ops()
|
||||
}
|
||||
}
|
||||
|
||||
impl ResettableKvBackend for LeaderCachedKvBackend {
|
||||
|
||||
@@ -79,5 +79,6 @@ rand.workspace = true
|
||||
toml.workspace = true
|
||||
|
||||
[[bench]]
|
||||
name = "bench_merge_tree"
|
||||
name = "memtable_bench"
|
||||
harness = false
|
||||
required-features = ["test"]
|
||||
|
||||
@@ -7,3 +7,9 @@ The Alfa Romeo [MiTo](https://en.wikipedia.org/wiki/Alfa_Romeo_MiTo) is a front-
|
||||
|
||||
> "You can't be a true petrolhead until you've owned an Alfa Romeo."
|
||||
> <div align="right">-- by Jeremy Clarkson</div>
|
||||
|
||||
## Benchmarks
|
||||
Run benchmarks in this crate:
|
||||
```bash
|
||||
cargo bench -p mito2 -F test
|
||||
```
|
||||
|
||||
352
src/mito2/benches/memtable_bench.rs
Normal file
352
src/mito2/benches/memtable_bench.rs
Normal file
@@ -0,0 +1,352 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::value::ValueData;
|
||||
use api::v1::{Row, Rows, SemanticType};
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
use datafusion_common::Column;
|
||||
use datafusion_expr::{lit, Expr};
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::schema::ColumnSchema;
|
||||
use mito2::memtable::merge_tree::{MergeTreeConfig, MergeTreeMemtable};
|
||||
use mito2::memtable::time_series::TimeSeriesMemtable;
|
||||
use mito2::memtable::{KeyValues, Memtable};
|
||||
use mito2::test_util::memtable_util::{self, region_metadata_to_row_schema};
|
||||
use rand::rngs::ThreadRng;
|
||||
use rand::seq::SliceRandom;
|
||||
use rand::Rng;
|
||||
use store_api::metadata::{
|
||||
ColumnMetadata, RegionMetadata, RegionMetadataBuilder, RegionMetadataRef,
|
||||
};
|
||||
use store_api::storage::RegionId;
|
||||
use table::predicate::Predicate;
|
||||
|
||||
/// Writes rows.
|
||||
fn write_rows(c: &mut Criterion) {
|
||||
let metadata = memtable_util::metadata_with_primary_key(vec![1, 0], true);
|
||||
let timestamps = (0..100).collect::<Vec<_>>();
|
||||
|
||||
// Note that this test only generate one time series.
|
||||
let mut group = c.benchmark_group("write");
|
||||
group.bench_function("merge_tree", |b| {
|
||||
let memtable =
|
||||
MergeTreeMemtable::new(1, metadata.clone(), None, &MergeTreeConfig::default());
|
||||
let kvs =
|
||||
memtable_util::build_key_values(&metadata, "hello".to_string(), 42, ×tamps, 1);
|
||||
b.iter(|| {
|
||||
memtable.write(&kvs).unwrap();
|
||||
});
|
||||
});
|
||||
group.bench_function("time_series", |b| {
|
||||
let memtable = TimeSeriesMemtable::new(metadata.clone(), 1, None);
|
||||
let kvs =
|
||||
memtable_util::build_key_values(&metadata, "hello".to_string(), 42, ×tamps, 1);
|
||||
b.iter(|| {
|
||||
memtable.write(&kvs).unwrap();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/// Scans all rows.
|
||||
fn full_scan(c: &mut Criterion) {
|
||||
let metadata = Arc::new(cpu_metadata());
|
||||
let config = MergeTreeConfig::default();
|
||||
let start_sec = 1710043200;
|
||||
let generator = CpuDataGenerator::new(metadata.clone(), 4000, start_sec, start_sec + 3600 * 2);
|
||||
|
||||
let mut group = c.benchmark_group("full_scan");
|
||||
group.sample_size(10);
|
||||
group.bench_function("merge_tree", |b| {
|
||||
let memtable = MergeTreeMemtable::new(1, metadata.clone(), None, &config);
|
||||
for kvs in generator.iter() {
|
||||
memtable.write(&kvs).unwrap();
|
||||
}
|
||||
|
||||
b.iter(|| {
|
||||
let iter = memtable.iter(None, None).unwrap();
|
||||
for batch in iter {
|
||||
let _batch = batch.unwrap();
|
||||
}
|
||||
});
|
||||
});
|
||||
group.bench_function("time_series", |b| {
|
||||
let memtable = TimeSeriesMemtable::new(metadata.clone(), 1, None);
|
||||
for kvs in generator.iter() {
|
||||
memtable.write(&kvs).unwrap();
|
||||
}
|
||||
|
||||
b.iter(|| {
|
||||
let iter = memtable.iter(None, None).unwrap();
|
||||
for batch in iter {
|
||||
let _batch = batch.unwrap();
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/// Filters 1 host.
|
||||
fn filter_1_host(c: &mut Criterion) {
|
||||
let metadata = Arc::new(cpu_metadata());
|
||||
let config = MergeTreeConfig::default();
|
||||
let start_sec = 1710043200;
|
||||
let generator = CpuDataGenerator::new(metadata.clone(), 4000, start_sec, start_sec + 3600 * 2);
|
||||
|
||||
let mut group = c.benchmark_group("filter_1_host");
|
||||
group.sample_size(10);
|
||||
group.bench_function("merge_tree", |b| {
|
||||
let memtable = MergeTreeMemtable::new(1, metadata.clone(), None, &config);
|
||||
for kvs in generator.iter() {
|
||||
memtable.write(&kvs).unwrap();
|
||||
}
|
||||
let predicate = generator.random_host_filter();
|
||||
|
||||
b.iter(|| {
|
||||
let iter = memtable.iter(None, Some(predicate.clone())).unwrap();
|
||||
for batch in iter {
|
||||
let _batch = batch.unwrap();
|
||||
}
|
||||
});
|
||||
});
|
||||
group.bench_function("time_series", |b| {
|
||||
let memtable = TimeSeriesMemtable::new(metadata.clone(), 1, None);
|
||||
for kvs in generator.iter() {
|
||||
memtable.write(&kvs).unwrap();
|
||||
}
|
||||
let predicate = generator.random_host_filter();
|
||||
|
||||
b.iter(|| {
|
||||
let iter = memtable.iter(None, Some(predicate.clone())).unwrap();
|
||||
for batch in iter {
|
||||
let _batch = batch.unwrap();
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
struct Host {
|
||||
hostname: String,
|
||||
region: String,
|
||||
datacenter: String,
|
||||
rack: String,
|
||||
os: String,
|
||||
arch: String,
|
||||
team: String,
|
||||
service: String,
|
||||
service_version: String,
|
||||
service_environment: String,
|
||||
}
|
||||
|
||||
impl Host {
|
||||
fn random_with_id(id: usize) -> Host {
|
||||
let mut rng = rand::thread_rng();
|
||||
let region = format!("ap-southeast-{}", rng.gen_range(0..10));
|
||||
let datacenter = format!(
|
||||
"{}{}",
|
||||
region,
|
||||
['a', 'b', 'c', 'd', 'e'].choose(&mut rng).unwrap()
|
||||
);
|
||||
Host {
|
||||
hostname: format!("host_{id}"),
|
||||
region,
|
||||
datacenter,
|
||||
rack: rng.gen_range(0..100).to_string(),
|
||||
os: "Ubuntu16.04LTS".to_string(),
|
||||
arch: "x86".to_string(),
|
||||
team: "CHI".to_string(),
|
||||
service: rng.gen_range(0..100).to_string(),
|
||||
service_version: rng.gen_range(0..10).to_string(),
|
||||
service_environment: "test".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn fill_values(&self, values: &mut Vec<api::v1::Value>) {
|
||||
let tags = [
|
||||
api::v1::Value {
|
||||
value_data: Some(ValueData::StringValue(self.hostname.clone())),
|
||||
},
|
||||
api::v1::Value {
|
||||
value_data: Some(ValueData::StringValue(self.region.clone())),
|
||||
},
|
||||
api::v1::Value {
|
||||
value_data: Some(ValueData::StringValue(self.datacenter.clone())),
|
||||
},
|
||||
api::v1::Value {
|
||||
value_data: Some(ValueData::StringValue(self.rack.clone())),
|
||||
},
|
||||
api::v1::Value {
|
||||
value_data: Some(ValueData::StringValue(self.os.clone())),
|
||||
},
|
||||
api::v1::Value {
|
||||
value_data: Some(ValueData::StringValue(self.arch.clone())),
|
||||
},
|
||||
api::v1::Value {
|
||||
value_data: Some(ValueData::StringValue(self.team.clone())),
|
||||
},
|
||||
api::v1::Value {
|
||||
value_data: Some(ValueData::StringValue(self.service.clone())),
|
||||
},
|
||||
api::v1::Value {
|
||||
value_data: Some(ValueData::StringValue(self.service_version.clone())),
|
||||
},
|
||||
api::v1::Value {
|
||||
value_data: Some(ValueData::StringValue(self.service_environment.clone())),
|
||||
},
|
||||
];
|
||||
for tag in tags {
|
||||
values.push(tag);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct CpuDataGenerator {
|
||||
metadata: RegionMetadataRef,
|
||||
column_schemas: Vec<api::v1::ColumnSchema>,
|
||||
hosts: Vec<Host>,
|
||||
start_sec: i64,
|
||||
end_sec: i64,
|
||||
}
|
||||
|
||||
impl CpuDataGenerator {
|
||||
fn new(metadata: RegionMetadataRef, num_hosts: usize, start_sec: i64, end_sec: i64) -> Self {
|
||||
let column_schemas = region_metadata_to_row_schema(&metadata);
|
||||
Self {
|
||||
metadata,
|
||||
column_schemas,
|
||||
hosts: Self::generate_hosts(num_hosts),
|
||||
start_sec,
|
||||
end_sec,
|
||||
}
|
||||
}
|
||||
|
||||
fn iter(&self) -> impl Iterator<Item = KeyValues> + '_ {
|
||||
// point per 10s.
|
||||
(self.start_sec..self.end_sec)
|
||||
.step_by(10)
|
||||
.enumerate()
|
||||
.map(|(seq, ts)| self.build_key_values(seq, ts))
|
||||
}
|
||||
|
||||
fn build_key_values(&self, seq: usize, current_sec: i64) -> KeyValues {
|
||||
let rows = self
|
||||
.hosts
|
||||
.iter()
|
||||
.map(|host| {
|
||||
let mut rng = rand::thread_rng();
|
||||
let mut values = Vec::with_capacity(21);
|
||||
values.push(api::v1::Value {
|
||||
value_data: Some(ValueData::TimestampMillisecondValue(current_sec * 1000)),
|
||||
});
|
||||
host.fill_values(&mut values);
|
||||
for _ in 0..10 {
|
||||
values.push(api::v1::Value {
|
||||
value_data: Some(ValueData::F64Value(Self::random_f64(&mut rng))),
|
||||
});
|
||||
}
|
||||
Row { values }
|
||||
})
|
||||
.collect();
|
||||
let mutation = api::v1::Mutation {
|
||||
op_type: api::v1::OpType::Put as i32,
|
||||
sequence: seq as u64,
|
||||
rows: Some(Rows {
|
||||
schema: self.column_schemas.clone(),
|
||||
rows,
|
||||
}),
|
||||
};
|
||||
|
||||
KeyValues::new(&self.metadata, mutation).unwrap()
|
||||
}
|
||||
|
||||
fn random_host_filter(&self) -> Predicate {
|
||||
let host = self.random_hostname();
|
||||
let expr = Expr::Column(Column::from_name("hostname")).eq(lit(host));
|
||||
Predicate::new(vec![expr.into()])
|
||||
}
|
||||
|
||||
fn random_hostname(&self) -> String {
|
||||
let mut rng = rand::thread_rng();
|
||||
self.hosts.choose(&mut rng).unwrap().hostname.clone()
|
||||
}
|
||||
|
||||
fn random_f64(rng: &mut ThreadRng) -> f64 {
|
||||
let base: u32 = rng.gen_range(30..95);
|
||||
base as f64
|
||||
}
|
||||
|
||||
fn generate_hosts(num_hosts: usize) -> Vec<Host> {
|
||||
(0..num_hosts).map(Host::random_with_id).collect()
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a metadata for TSBS cpu-like table.
|
||||
fn cpu_metadata() -> RegionMetadata {
|
||||
let mut builder = RegionMetadataBuilder::new(RegionId::new(1, 1));
|
||||
builder.push_column_metadata(ColumnMetadata {
|
||||
column_schema: ColumnSchema::new(
|
||||
"ts",
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
false,
|
||||
),
|
||||
semantic_type: SemanticType::Timestamp,
|
||||
column_id: 0,
|
||||
});
|
||||
let mut column_id = 1;
|
||||
let tags = [
|
||||
"hostname",
|
||||
"region",
|
||||
"datacenter",
|
||||
"rack",
|
||||
"os",
|
||||
"arch",
|
||||
"team",
|
||||
"service",
|
||||
"service_version",
|
||||
"service_environment",
|
||||
];
|
||||
for tag in tags {
|
||||
builder.push_column_metadata(ColumnMetadata {
|
||||
column_schema: ColumnSchema::new(tag, ConcreteDataType::string_datatype(), true),
|
||||
semantic_type: SemanticType::Tag,
|
||||
column_id,
|
||||
});
|
||||
column_id += 1;
|
||||
}
|
||||
let fields = [
|
||||
"usage_user",
|
||||
"usage_system",
|
||||
"usage_idle",
|
||||
"usage_nice",
|
||||
"usage_iowait",
|
||||
"usage_irq",
|
||||
"usage_softirq",
|
||||
"usage_steal",
|
||||
"usage_guest",
|
||||
"usage_guest_nice",
|
||||
];
|
||||
for field in fields {
|
||||
builder.push_column_metadata(ColumnMetadata {
|
||||
column_schema: ColumnSchema::new(field, ConcreteDataType::float64_datatype(), true),
|
||||
semantic_type: SemanticType::Field,
|
||||
column_id,
|
||||
});
|
||||
column_id += 1;
|
||||
}
|
||||
builder.primary_key(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
|
||||
builder.build().unwrap()
|
||||
}
|
||||
|
||||
criterion_group!(benches, write_rows, full_scan, filter_1_host);
|
||||
criterion_main!(benches);
|
||||
@@ -1,36 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
use mito2::memtable::merge_tree::{MergeTreeConfig, MergeTreeMemtable};
|
||||
use mito2::memtable::Memtable;
|
||||
use mito2::test_util::memtable_util;
|
||||
|
||||
fn bench_merge_tree_memtable(c: &mut Criterion) {
|
||||
let metadata = memtable_util::metadata_with_primary_key(vec![1, 0], true);
|
||||
let timestamps = (0..100).collect::<Vec<_>>();
|
||||
|
||||
let memtable = MergeTreeMemtable::new(1, metadata.clone(), None, &MergeTreeConfig::default());
|
||||
|
||||
let _ = c.bench_function("MergeTreeMemtable", |b| {
|
||||
let kvs =
|
||||
memtable_util::build_key_values(&metadata, "hello".to_string(), 42, ×tamps, 1);
|
||||
b.iter(|| {
|
||||
memtable.write(&kvs).unwrap();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_merge_tree_memtable);
|
||||
criterion_main!(benches);
|
||||
@@ -158,7 +158,7 @@ impl CacheManager {
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets the the write cache.
|
||||
/// Gets the write cache.
|
||||
pub(crate) fn write_cache(&self) -> Option<&WriteCacheRef> {
|
||||
self.write_cache.as_ref()
|
||||
}
|
||||
|
||||
@@ -26,6 +26,7 @@ use table::predicate::Predicate;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::flush::WriteBufferManagerRef;
|
||||
use crate::memtable::key_values::KeyValue;
|
||||
pub use crate::memtable::key_values::KeyValues;
|
||||
use crate::memtable::merge_tree::MergeTreeConfig;
|
||||
use crate::metrics::WRITE_BUFFER_BYTES;
|
||||
@@ -33,6 +34,7 @@ use crate::read::Batch;
|
||||
|
||||
pub mod key_values;
|
||||
pub mod merge_tree;
|
||||
pub mod time_partition;
|
||||
pub mod time_series;
|
||||
pub(crate) mod version;
|
||||
|
||||
@@ -82,9 +84,12 @@ pub trait Memtable: Send + Sync + fmt::Debug {
|
||||
/// Returns the id of this memtable.
|
||||
fn id(&self) -> MemtableId;
|
||||
|
||||
/// Write key values into the memtable.
|
||||
/// Writes key values into the memtable.
|
||||
fn write(&self, kvs: &KeyValues) -> Result<()>;
|
||||
|
||||
/// Writes one key value pair into the memtable.
|
||||
fn write_one(&self, key_value: KeyValue) -> Result<()>;
|
||||
|
||||
/// Scans the memtable.
|
||||
/// `projection` selects columns to read, `None` means reading all columns.
|
||||
/// `filters` are the predicates to be pushed down to memtable.
|
||||
|
||||
@@ -71,7 +71,7 @@ impl KeyValues {
|
||||
/// Primary key columns have the same order as region's primary key. Field
|
||||
/// columns are ordered by their position in the region schema (The same order
|
||||
/// as users defined while creating the region).
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct KeyValue<'a> {
|
||||
row: &'a Row,
|
||||
schema: &'a Vec<ColumnSchema>,
|
||||
|
||||
@@ -36,6 +36,7 @@ use table::predicate::Predicate;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::flush::WriteBufferManagerRef;
|
||||
use crate::memtable::key_values::KeyValue;
|
||||
use crate::memtable::merge_tree::metrics::WriteMetrics;
|
||||
use crate::memtable::merge_tree::tree::MergeTree;
|
||||
use crate::memtable::{
|
||||
@@ -85,7 +86,7 @@ impl Default for MergeTreeConfig {
|
||||
|
||||
Self {
|
||||
index_max_keys_per_shard: 8192,
|
||||
data_freeze_threshold: 32768,
|
||||
data_freeze_threshold: 131072,
|
||||
dedup: true,
|
||||
fork_dictionary_bytes,
|
||||
}
|
||||
@@ -127,6 +128,17 @@ impl Memtable for MergeTreeMemtable {
|
||||
res
|
||||
}
|
||||
|
||||
fn write_one(&self, key_value: KeyValue) -> Result<()> {
|
||||
let mut metrics = WriteMetrics::default();
|
||||
let mut pk_buffer = Vec::new();
|
||||
// Ensures the memtable always updates stats.
|
||||
let res = self.tree.write_one(key_value, &mut pk_buffer, &mut metrics);
|
||||
|
||||
self.update_stats(&metrics);
|
||||
|
||||
res
|
||||
}
|
||||
|
||||
fn iter(
|
||||
&self,
|
||||
projection: Option<&[ColumnId]>,
|
||||
@@ -290,14 +302,14 @@ impl MemtableBuilder for MergeTreeMemtableBuilder {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
use common_time::Timestamp;
|
||||
use datafusion_common::{Column, ScalarValue};
|
||||
use datafusion_expr::{BinaryExpr, Expr, Operator};
|
||||
use datatypes::scalars::ScalarVector;
|
||||
use datatypes::vectors::{Int64Vector, TimestampMillisecondVector};
|
||||
use datatypes::vectors::Int64Vector;
|
||||
|
||||
use super::*;
|
||||
use crate::test_util::memtable_util;
|
||||
use crate::test_util::memtable_util::{self, collect_iter_timestamps};
|
||||
|
||||
#[test]
|
||||
fn test_memtable_sorted_input() {
|
||||
@@ -320,23 +332,10 @@ mod tests {
|
||||
let expected_ts = kvs
|
||||
.iter()
|
||||
.map(|kv| kv.timestamp().as_timestamp().unwrap().unwrap().value())
|
||||
.collect::<BTreeSet<_>>();
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let iter = memtable.iter(None, None).unwrap();
|
||||
let read = iter
|
||||
.flat_map(|batch| {
|
||||
batch
|
||||
.unwrap()
|
||||
.timestamps()
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampMillisecondVector>()
|
||||
.unwrap()
|
||||
.iter_data()
|
||||
.collect::<Vec<_>>()
|
||||
.into_iter()
|
||||
})
|
||||
.map(|v| v.unwrap().0.value())
|
||||
.collect::<BTreeSet<_>>();
|
||||
let read = collect_iter_timestamps(iter);
|
||||
assert_eq!(expected_ts, read);
|
||||
|
||||
let stats = memtable.stats();
|
||||
@@ -384,20 +383,7 @@ mod tests {
|
||||
memtable.write(&kvs).unwrap();
|
||||
|
||||
let iter = memtable.iter(None, None).unwrap();
|
||||
let read = iter
|
||||
.flat_map(|batch| {
|
||||
batch
|
||||
.unwrap()
|
||||
.timestamps()
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampMillisecondVector>()
|
||||
.unwrap()
|
||||
.iter_data()
|
||||
.collect::<Vec<_>>()
|
||||
.into_iter()
|
||||
})
|
||||
.map(|v| v.unwrap().0.value())
|
||||
.collect::<Vec<_>>();
|
||||
let read = collect_iter_timestamps(iter);
|
||||
assert_eq!(vec![0, 1, 2, 3, 4, 5, 6, 7], read);
|
||||
|
||||
let iter = memtable.iter(None, None).unwrap();
|
||||
@@ -512,20 +498,45 @@ mod tests {
|
||||
|
||||
let expect = data.into_iter().map(|x| x.2).collect::<Vec<_>>();
|
||||
let iter = memtable.iter(None, None).unwrap();
|
||||
let read = iter
|
||||
.flat_map(|batch| {
|
||||
batch
|
||||
.unwrap()
|
||||
.timestamps()
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampMillisecondVector>()
|
||||
.unwrap()
|
||||
.iter_data()
|
||||
.collect::<Vec<_>>()
|
||||
.into_iter()
|
||||
})
|
||||
.map(|v| v.unwrap().0.value())
|
||||
.collect::<Vec<_>>();
|
||||
let read = collect_iter_timestamps(iter);
|
||||
assert_eq!(expect, read);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_memtable_filter() {
|
||||
let metadata = memtable_util::metadata_with_primary_key(vec![0, 1], false);
|
||||
// Try to build a memtable via the builder.
|
||||
let memtable = MergeTreeMemtableBuilder::new(
|
||||
MergeTreeConfig {
|
||||
index_max_keys_per_shard: 40,
|
||||
..Default::default()
|
||||
},
|
||||
None,
|
||||
)
|
||||
.build(1, &metadata);
|
||||
|
||||
for i in 0..100 {
|
||||
let timestamps: Vec<_> = (0..10).map(|v| i as i64 * 1000 + v).collect();
|
||||
let kvs =
|
||||
memtable_util::build_key_values(&metadata, "hello".to_string(), i, ×tamps, 1);
|
||||
memtable.write(&kvs).unwrap();
|
||||
}
|
||||
|
||||
for i in 0..100 {
|
||||
let timestamps: Vec<_> = (0..10).map(|v| i as i64 * 1000 + v).collect();
|
||||
let expr = Expr::BinaryExpr(BinaryExpr {
|
||||
left: Box::new(Expr::Column(Column {
|
||||
relation: None,
|
||||
name: "k1".to_string(),
|
||||
})),
|
||||
op: Operator::Eq,
|
||||
right: Box::new(Expr::Literal(ScalarValue::UInt32(Some(i)))),
|
||||
});
|
||||
let iter = memtable
|
||||
.iter(None, Some(Predicate::new(vec![expr.into()])))
|
||||
.unwrap();
|
||||
let read = collect_iter_timestamps(iter);
|
||||
assert_eq!(timestamps, read);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -957,6 +957,18 @@ impl DataParts {
|
||||
self.active.write_row(pk_index, kv)
|
||||
}
|
||||
|
||||
/// Returns the number of rows in the active buffer.
|
||||
pub fn num_active_rows(&self) -> usize {
|
||||
self.active.num_rows()
|
||||
}
|
||||
|
||||
/// Freezes active buffer and creates a new active buffer.
|
||||
pub fn freeze(&mut self) -> Result<()> {
|
||||
let part = self.active.freeze(None, false)?;
|
||||
self.frozen.push(part);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Reads data from all parts including active and frozen parts.
|
||||
/// The returned iterator yields a record batch of one primary key at a time.
|
||||
/// The order of yielding primary keys is determined by provided weights.
|
||||
@@ -976,6 +988,11 @@ impl DataParts {
|
||||
pub(crate) fn is_empty(&self) -> bool {
|
||||
self.active.is_empty() && self.frozen.iter().all(|part| part.is_empty())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn frozen_len(&self) -> usize {
|
||||
self.frozen.len()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DataPartsReaderBuilder {
|
||||
@@ -994,9 +1011,11 @@ impl DataPartsReaderBuilder {
|
||||
for p in self.parts {
|
||||
nodes.push(DataNode::new(DataSource::Part(p)));
|
||||
}
|
||||
let num_parts = nodes.len();
|
||||
let merger = Merger::try_new(nodes)?;
|
||||
Ok(DataPartsReader {
|
||||
merger,
|
||||
num_parts,
|
||||
elapsed: Default::default(),
|
||||
})
|
||||
}
|
||||
@@ -1005,6 +1024,7 @@ impl DataPartsReaderBuilder {
|
||||
/// Reader for all parts inside a `DataParts`.
|
||||
pub struct DataPartsReader {
|
||||
merger: Merger<DataNode>,
|
||||
num_parts: usize,
|
||||
elapsed: Duration,
|
||||
}
|
||||
|
||||
@@ -1032,6 +1052,10 @@ impl DataPartsReader {
|
||||
pub(crate) fn is_valid(&self) -> bool {
|
||||
self.merger.is_valid()
|
||||
}
|
||||
|
||||
pub(crate) fn num_parts(&self) -> usize {
|
||||
self.num_parts
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -45,7 +45,7 @@ impl<T: DataBatchSource> DataBatchSource for DedupReader<T> {
|
||||
}
|
||||
|
||||
fn next(&mut self) -> Result<()> {
|
||||
loop {
|
||||
while self.inner.is_valid() {
|
||||
match &mut self.prev_batch_last_row {
|
||||
None => {
|
||||
// First shot, fill prev_batch_last_row and current_batch_range with first batch.
|
||||
|
||||
@@ -78,7 +78,7 @@ impl Partition {
|
||||
|
||||
// Finds key in shards, now we ensure one key only exists in one shard.
|
||||
if let Some(pk_id) = inner.find_key_in_shards(primary_key) {
|
||||
inner.write_to_shard(pk_id, &key_value);
|
||||
inner.write_to_shard(pk_id, &key_value)?;
|
||||
inner.num_rows += 1;
|
||||
return Ok(());
|
||||
}
|
||||
@@ -106,7 +106,7 @@ impl Partition {
|
||||
}
|
||||
|
||||
/// Writes to the partition without a primary key.
|
||||
pub fn write_no_key(&self, key_value: KeyValue) {
|
||||
pub fn write_no_key(&self, key_value: KeyValue) -> Result<()> {
|
||||
let mut inner = self.inner.write().unwrap();
|
||||
// If no primary key, always write to the first shard.
|
||||
debug_assert!(!inner.shards.is_empty());
|
||||
@@ -117,12 +117,24 @@ impl Partition {
|
||||
shard_id: 0,
|
||||
pk_index: 0,
|
||||
};
|
||||
inner.shards[0].write_with_pk_id(pk_id, &key_value);
|
||||
inner.shards[0].write_with_pk_id(pk_id, &key_value)?;
|
||||
inner.num_rows += 1;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Scans data in the partition.
|
||||
pub fn read(&self, mut context: ReadPartitionContext) -> Result<PartitionReader> {
|
||||
let start = Instant::now();
|
||||
let key_filter = if context.need_prune_key {
|
||||
Some(PrimaryKeyFilter::new(
|
||||
context.metadata.clone(),
|
||||
context.filters.clone(),
|
||||
context.row_codec.clone(),
|
||||
))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let (builder_source, shard_reader_builders) = {
|
||||
let inner = self.inner.read().unwrap();
|
||||
let mut shard_source = Vec::with_capacity(inner.shards.len() + 1);
|
||||
@@ -141,14 +153,21 @@ impl Partition {
|
||||
(builder_reader, shard_source)
|
||||
};
|
||||
|
||||
context.metrics.num_shards += shard_reader_builders.len();
|
||||
let mut nodes = shard_reader_builders
|
||||
.into_iter()
|
||||
.map(|builder| Ok(ShardNode::new(ShardSource::Shard(builder.build()?))))
|
||||
.map(|builder| {
|
||||
Ok(ShardNode::new(ShardSource::Shard(
|
||||
builder.build(key_filter.clone())?,
|
||||
)))
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
if let Some(builder) = builder_source {
|
||||
context.metrics.num_builder += 1;
|
||||
// Move the initialization of ShardBuilderReader out of read lock.
|
||||
let shard_builder_reader = builder.build(Some(&context.pk_weights))?;
|
||||
let shard_builder_reader =
|
||||
builder.build(Some(&context.pk_weights), key_filter.clone())?;
|
||||
nodes.push(ShardNode::new(ShardSource::Builder(shard_builder_reader)));
|
||||
}
|
||||
|
||||
@@ -156,8 +175,10 @@ impl Partition {
|
||||
let merger = ShardMerger::try_new(nodes)?;
|
||||
if self.dedup {
|
||||
let source = DedupReader::try_new(merger)?;
|
||||
context.metrics.build_partition_reader += start.elapsed();
|
||||
PartitionReader::new(context, Box::new(source))
|
||||
} else {
|
||||
context.metrics.build_partition_reader += start.elapsed();
|
||||
PartitionReader::new(context, Box::new(merger))
|
||||
}
|
||||
}
|
||||
@@ -266,11 +287,11 @@ pub(crate) struct PartitionStats {
|
||||
|
||||
#[derive(Default)]
|
||||
struct PartitionReaderMetrics {
|
||||
prune_pk: Duration,
|
||||
build_partition_reader: Duration,
|
||||
read_source: Duration,
|
||||
data_batch_to_batch: Duration,
|
||||
keys_before_pruning: usize,
|
||||
keys_after_pruning: usize,
|
||||
num_builder: usize,
|
||||
num_shards: usize,
|
||||
}
|
||||
|
||||
/// Reader to scan rows in a partition.
|
||||
@@ -279,18 +300,11 @@ struct PartitionReaderMetrics {
|
||||
pub struct PartitionReader {
|
||||
context: ReadPartitionContext,
|
||||
source: BoxedDataBatchSource,
|
||||
last_yield_pk_id: Option<PkId>,
|
||||
}
|
||||
|
||||
impl PartitionReader {
|
||||
fn new(context: ReadPartitionContext, source: BoxedDataBatchSource) -> Result<Self> {
|
||||
let mut reader = Self {
|
||||
context,
|
||||
source,
|
||||
last_yield_pk_id: None,
|
||||
};
|
||||
// Find next valid batch.
|
||||
reader.prune_batch_by_key()?;
|
||||
let reader = Self { context, source };
|
||||
|
||||
Ok(reader)
|
||||
}
|
||||
@@ -305,8 +319,7 @@ impl PartitionReader {
|
||||
/// # Panics
|
||||
/// Panics if the reader is invalid.
|
||||
pub fn next(&mut self) -> Result<()> {
|
||||
self.advance_source()?;
|
||||
self.prune_batch_by_key()
|
||||
self.advance_source()
|
||||
}
|
||||
|
||||
/// Converts current data batch into a [Batch].
|
||||
@@ -336,106 +349,77 @@ impl PartitionReader {
|
||||
self.context.metrics.read_source += read_source.elapsed();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn prune_batch_by_key(&mut self) -> Result<()> {
|
||||
if self.context.metadata.primary_key.is_empty() || !self.context.need_prune_key {
|
||||
// Nothing to prune.
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
while self.source.is_valid() {
|
||||
let pk_id = self.source.current_pk_id();
|
||||
if let Some(yield_pk_id) = self.last_yield_pk_id {
|
||||
if pk_id == yield_pk_id {
|
||||
// If this batch has the same key as last returned batch.
|
||||
// We can return it without evaluating filters.
|
||||
break;
|
||||
}
|
||||
}
|
||||
let key = self.source.current_key().unwrap();
|
||||
self.context.metrics.keys_before_pruning += 1;
|
||||
// Prune batch by primary key.
|
||||
if prune_primary_key(
|
||||
&self.context.metadata,
|
||||
&self.context.filters,
|
||||
&self.context.row_codec,
|
||||
key,
|
||||
&mut self.context.metrics,
|
||||
) {
|
||||
// We need this key.
|
||||
self.last_yield_pk_id = Some(pk_id);
|
||||
self.context.metrics.keys_after_pruning += 1;
|
||||
break;
|
||||
}
|
||||
self.advance_source()?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn prune_primary_key(
|
||||
metadata: &RegionMetadataRef,
|
||||
filters: &[SimpleFilterEvaluator],
|
||||
codec: &McmpRowCodec,
|
||||
pk: &[u8],
|
||||
metrics: &mut PartitionReaderMetrics,
|
||||
) -> bool {
|
||||
let start = Instant::now();
|
||||
let res = prune_primary_key_inner(metadata, filters, codec, pk);
|
||||
metrics.prune_pk += start.elapsed();
|
||||
res
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct PrimaryKeyFilter {
|
||||
metadata: RegionMetadataRef,
|
||||
filters: Arc<Vec<SimpleFilterEvaluator>>,
|
||||
codec: Arc<McmpRowCodec>,
|
||||
offsets_buf: Vec<usize>,
|
||||
}
|
||||
|
||||
// TODO(yingwen): Improve performance of key pruning. Now we need to find index and
|
||||
// then decode and convert each value.
|
||||
/// Returns true if the `pk` is still needed.
|
||||
fn prune_primary_key_inner(
|
||||
metadata: &RegionMetadataRef,
|
||||
filters: &[SimpleFilterEvaluator],
|
||||
codec: &McmpRowCodec,
|
||||
pk: &[u8],
|
||||
) -> bool {
|
||||
if filters.is_empty() {
|
||||
return true;
|
||||
impl PrimaryKeyFilter {
|
||||
pub(crate) fn new(
|
||||
metadata: RegionMetadataRef,
|
||||
filters: Arc<Vec<SimpleFilterEvaluator>>,
|
||||
codec: Arc<McmpRowCodec>,
|
||||
) -> Self {
|
||||
Self {
|
||||
metadata,
|
||||
filters,
|
||||
codec,
|
||||
offsets_buf: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
// no primary key, we simply return true.
|
||||
if metadata.primary_key.is_empty() {
|
||||
return true;
|
||||
}
|
||||
|
||||
let pk_values = match codec.decode(pk) {
|
||||
Ok(values) => values,
|
||||
Err(e) => {
|
||||
common_telemetry::error!(e; "Failed to decode primary key");
|
||||
pub(crate) fn prune_primary_key(&mut self, pk: &[u8]) -> bool {
|
||||
if self.filters.is_empty() {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
// evaluate filters against primary key values
|
||||
let mut result = true;
|
||||
for filter in filters {
|
||||
if Partition::is_partition_column(filter.column_name()) {
|
||||
continue;
|
||||
// no primary key, we simply return true.
|
||||
if self.metadata.primary_key.is_empty() {
|
||||
return true;
|
||||
}
|
||||
let Some(column) = metadata.column_by_name(filter.column_name()) else {
|
||||
continue;
|
||||
};
|
||||
// ignore filters that are not referencing primary key columns
|
||||
if column.semantic_type != SemanticType::Tag {
|
||||
continue;
|
||||
|
||||
// evaluate filters against primary key values
|
||||
let mut result = true;
|
||||
self.offsets_buf.clear();
|
||||
for filter in &*self.filters {
|
||||
if Partition::is_partition_column(filter.column_name()) {
|
||||
continue;
|
||||
}
|
||||
let Some(column) = self.metadata.column_by_name(filter.column_name()) else {
|
||||
continue;
|
||||
};
|
||||
// ignore filters that are not referencing primary key columns
|
||||
if column.semantic_type != SemanticType::Tag {
|
||||
continue;
|
||||
}
|
||||
// index of the column in primary keys.
|
||||
// Safety: A tag column is always in primary key.
|
||||
let index = self.metadata.primary_key_index(column.column_id).unwrap();
|
||||
let value = match self.codec.decode_value_at(pk, index, &mut self.offsets_buf) {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
common_telemetry::error!(e; "Failed to decode primary key");
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
// TODO(yingwen): `evaluate_scalar()` creates temporary arrays to compare scalars. We
|
||||
// can compare the bytes directly without allocation and matching types as we use
|
||||
// comparable encoding.
|
||||
// Safety: arrow schema and datatypes are constructed from the same source.
|
||||
let scalar_value = value
|
||||
.try_to_scalar_value(&column.column_schema.data_type)
|
||||
.unwrap();
|
||||
result &= filter.evaluate_scalar(&scalar_value).unwrap_or(true);
|
||||
}
|
||||
// index of the column in primary keys.
|
||||
// Safety: A tag column is always in primary key.
|
||||
let index = metadata.primary_key_index(column.column_id).unwrap();
|
||||
// Safety: arrow schema and datatypes are constructed from the same source.
|
||||
let scalar_value = pk_values[index]
|
||||
.try_to_scalar_value(&column.column_schema.data_type)
|
||||
.unwrap();
|
||||
result &= filter.evaluate_scalar(&scalar_value).unwrap_or(true);
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Structs to reuse across readers to avoid allocating for each reader.
|
||||
@@ -443,7 +427,7 @@ pub(crate) struct ReadPartitionContext {
|
||||
metadata: RegionMetadataRef,
|
||||
row_codec: Arc<McmpRowCodec>,
|
||||
projection: HashSet<ColumnId>,
|
||||
filters: Vec<SimpleFilterEvaluator>,
|
||||
filters: Arc<Vec<SimpleFilterEvaluator>>,
|
||||
/// Buffer to store pk weights.
|
||||
pk_weights: Vec<u16>,
|
||||
need_prune_key: bool,
|
||||
@@ -452,10 +436,6 @@ pub(crate) struct ReadPartitionContext {
|
||||
|
||||
impl Drop for ReadPartitionContext {
|
||||
fn drop(&mut self) {
|
||||
let partition_prune_pk = self.metrics.prune_pk.as_secs_f64();
|
||||
MERGE_TREE_READ_STAGE_ELAPSED
|
||||
.with_label_values(&["partition_prune_pk"])
|
||||
.observe(partition_prune_pk);
|
||||
let partition_read_source = self.metrics.read_source.as_secs_f64();
|
||||
MERGE_TREE_READ_STAGE_ELAPSED
|
||||
.with_label_values(&["partition_read_source"])
|
||||
@@ -465,16 +445,19 @@ impl Drop for ReadPartitionContext {
|
||||
.with_label_values(&["partition_data_batch_to_batch"])
|
||||
.observe(partition_data_batch_to_batch);
|
||||
|
||||
if self.metrics.keys_before_pruning != 0 {
|
||||
common_telemetry::debug!(
|
||||
"TreeIter pruning, before: {}, after: {}, partition_read_source: {}s, partition_prune_pk: {}s, partition_data_batch_to_batch: {}s",
|
||||
self.metrics.keys_before_pruning,
|
||||
self.metrics.keys_after_pruning,
|
||||
partition_read_source,
|
||||
partition_prune_pk,
|
||||
partition_data_batch_to_batch,
|
||||
);
|
||||
}
|
||||
common_telemetry::debug!(
|
||||
"TreeIter partitions metrics, \
|
||||
num_builder: {}, \
|
||||
num_shards: {}, \
|
||||
build_partition_reader: {}s, \
|
||||
partition_read_source: {}s, \
|
||||
partition_data_batch_to_batch: {}s",
|
||||
self.metrics.num_builder,
|
||||
self.metrics.num_shards,
|
||||
self.metrics.build_partition_reader.as_secs_f64(),
|
||||
partition_read_source,
|
||||
partition_data_batch_to_batch,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -490,7 +473,7 @@ impl ReadPartitionContext {
|
||||
metadata,
|
||||
row_codec,
|
||||
projection,
|
||||
filters,
|
||||
filters: Arc::new(filters),
|
||||
pk_weights: Vec::new(),
|
||||
need_prune_key,
|
||||
metrics: Default::default(),
|
||||
@@ -578,7 +561,16 @@ impl Inner {
|
||||
fn new(metadata: RegionMetadataRef, config: &MergeTreeConfig) -> Self {
|
||||
let (shards, current_shard_id) = if metadata.primary_key.is_empty() {
|
||||
let data_parts = DataParts::new(metadata.clone(), DATA_INIT_CAP, config.dedup);
|
||||
(vec![Shard::new(0, None, data_parts, config.dedup)], 1)
|
||||
(
|
||||
vec![Shard::new(
|
||||
0,
|
||||
None,
|
||||
data_parts,
|
||||
config.dedup,
|
||||
config.data_freeze_threshold,
|
||||
)],
|
||||
1,
|
||||
)
|
||||
} else {
|
||||
(Vec::new(), 0)
|
||||
};
|
||||
@@ -598,18 +590,22 @@ impl Inner {
|
||||
self.pk_to_pk_id.get(primary_key).copied()
|
||||
}
|
||||
|
||||
fn write_to_shard(&mut self, pk_id: PkId, key_value: &KeyValue) {
|
||||
fn write_to_shard(&mut self, pk_id: PkId, key_value: &KeyValue) -> Result<()> {
|
||||
if pk_id.shard_id == self.shard_builder.current_shard_id() {
|
||||
self.shard_builder.write_with_pk_id(pk_id, key_value);
|
||||
return;
|
||||
}
|
||||
for shard in &mut self.shards {
|
||||
if shard.shard_id == pk_id.shard_id {
|
||||
shard.write_with_pk_id(pk_id, key_value);
|
||||
self.num_rows += 1;
|
||||
return;
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Safety: We find the shard by shard id.
|
||||
let shard = self
|
||||
.shards
|
||||
.iter_mut()
|
||||
.find(|shard| shard.shard_id == pk_id.shard_id)
|
||||
.unwrap();
|
||||
shard.write_with_pk_id(pk_id, key_value)?;
|
||||
self.num_rows += 1;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn freeze_active_shard(&mut self) -> Result<()> {
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
//! Shard in a partition.
|
||||
|
||||
use std::cmp::Ordering;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
|
||||
@@ -25,8 +26,10 @@ use crate::memtable::merge_tree::data::{
|
||||
};
|
||||
use crate::memtable::merge_tree::dict::KeyDictRef;
|
||||
use crate::memtable::merge_tree::merger::{Merger, Node};
|
||||
use crate::memtable::merge_tree::partition::PrimaryKeyFilter;
|
||||
use crate::memtable::merge_tree::shard_builder::ShardBuilderReader;
|
||||
use crate::memtable::merge_tree::{PkId, ShardId};
|
||||
use crate::memtable::merge_tree::{PkId, PkIndex, ShardId};
|
||||
use crate::metrics::MERGE_TREE_READ_STAGE_ELAPSED;
|
||||
|
||||
/// Shard stores data related to the same key dictionary.
|
||||
pub struct Shard {
|
||||
@@ -36,6 +39,8 @@ pub struct Shard {
|
||||
/// Data in the shard.
|
||||
data_parts: DataParts,
|
||||
dedup: bool,
|
||||
/// Number of rows to freeze a data part.
|
||||
data_freeze_threshold: usize,
|
||||
}
|
||||
|
||||
impl Shard {
|
||||
@@ -45,20 +50,29 @@ impl Shard {
|
||||
key_dict: Option<KeyDictRef>,
|
||||
data_parts: DataParts,
|
||||
dedup: bool,
|
||||
data_freeze_threshold: usize,
|
||||
) -> Shard {
|
||||
Shard {
|
||||
shard_id,
|
||||
key_dict,
|
||||
data_parts,
|
||||
dedup,
|
||||
data_freeze_threshold,
|
||||
}
|
||||
}
|
||||
|
||||
/// Writes a key value into the shard.
|
||||
pub fn write_with_pk_id(&mut self, pk_id: PkId, key_value: &KeyValue) {
|
||||
///
|
||||
/// It will freezes the active buffer if it is full.
|
||||
pub fn write_with_pk_id(&mut self, pk_id: PkId, key_value: &KeyValue) -> Result<()> {
|
||||
debug_assert_eq!(self.shard_id, pk_id.shard_id);
|
||||
|
||||
if self.data_parts.num_active_rows() >= self.data_freeze_threshold {
|
||||
self.data_parts.freeze()?;
|
||||
}
|
||||
|
||||
self.data_parts.write_row(pk_id.pk_index, key_value);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Scans the shard.
|
||||
@@ -80,6 +94,7 @@ impl Shard {
|
||||
key_dict: self.key_dict.clone(),
|
||||
data_parts: DataParts::new(metadata, DATA_INIT_CAP, self.dedup),
|
||||
dedup: self.dedup,
|
||||
data_freeze_threshold: self.data_freeze_threshold,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -131,18 +146,15 @@ pub struct ShardReaderBuilder {
|
||||
}
|
||||
|
||||
impl ShardReaderBuilder {
|
||||
pub(crate) fn build(self) -> Result<ShardReader> {
|
||||
pub(crate) fn build(self, key_filter: Option<PrimaryKeyFilter>) -> Result<ShardReader> {
|
||||
let ShardReaderBuilder {
|
||||
shard_id,
|
||||
key_dict,
|
||||
inner,
|
||||
} = self;
|
||||
let now = Instant::now();
|
||||
let parts_reader = inner.build()?;
|
||||
Ok(ShardReader {
|
||||
shard_id,
|
||||
key_dict,
|
||||
parts_reader,
|
||||
})
|
||||
ShardReader::new(shard_id, key_dict, parts_reader, key_filter, now.elapsed())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -151,15 +163,46 @@ pub struct ShardReader {
|
||||
shard_id: ShardId,
|
||||
key_dict: Option<KeyDictRef>,
|
||||
parts_reader: DataPartsReader,
|
||||
key_filter: Option<PrimaryKeyFilter>,
|
||||
last_yield_pk_index: Option<PkIndex>,
|
||||
keys_before_pruning: usize,
|
||||
keys_after_pruning: usize,
|
||||
prune_pk_cost: Duration,
|
||||
data_build_cost: Duration,
|
||||
}
|
||||
|
||||
impl ShardReader {
|
||||
fn new(
|
||||
shard_id: ShardId,
|
||||
key_dict: Option<KeyDictRef>,
|
||||
parts_reader: DataPartsReader,
|
||||
key_filter: Option<PrimaryKeyFilter>,
|
||||
data_build_cost: Duration,
|
||||
) -> Result<Self> {
|
||||
let has_pk = key_dict.is_some();
|
||||
let mut reader = Self {
|
||||
shard_id,
|
||||
key_dict,
|
||||
parts_reader,
|
||||
key_filter: if has_pk { key_filter } else { None },
|
||||
last_yield_pk_index: None,
|
||||
keys_before_pruning: 0,
|
||||
keys_after_pruning: 0,
|
||||
prune_pk_cost: Duration::default(),
|
||||
data_build_cost,
|
||||
};
|
||||
reader.prune_batch_by_key()?;
|
||||
|
||||
Ok(reader)
|
||||
}
|
||||
|
||||
fn is_valid(&self) -> bool {
|
||||
self.parts_reader.is_valid()
|
||||
}
|
||||
|
||||
fn next(&mut self) -> Result<()> {
|
||||
self.parts_reader.next()
|
||||
self.parts_reader.next()?;
|
||||
self.prune_batch_by_key()
|
||||
}
|
||||
|
||||
fn current_key(&self) -> Option<&[u8]> {
|
||||
@@ -180,6 +223,54 @@ impl ShardReader {
|
||||
fn current_data_batch(&self) -> DataBatch {
|
||||
self.parts_reader.current_data_batch()
|
||||
}
|
||||
|
||||
fn prune_batch_by_key(&mut self) -> Result<()> {
|
||||
let Some(key_filter) = &mut self.key_filter else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
while self.parts_reader.is_valid() {
|
||||
let pk_index = self.parts_reader.current_data_batch().pk_index();
|
||||
if let Some(yield_pk_index) = self.last_yield_pk_index {
|
||||
if pk_index == yield_pk_index {
|
||||
break;
|
||||
}
|
||||
}
|
||||
self.keys_before_pruning += 1;
|
||||
// Safety: `key_filter` is some so the shard has primary keys.
|
||||
let key = self.key_dict.as_ref().unwrap().key_by_pk_index(pk_index);
|
||||
let now = Instant::now();
|
||||
if key_filter.prune_primary_key(key) {
|
||||
self.prune_pk_cost += now.elapsed();
|
||||
self.last_yield_pk_index = Some(pk_index);
|
||||
self.keys_after_pruning += 1;
|
||||
break;
|
||||
}
|
||||
self.prune_pk_cost += now.elapsed();
|
||||
self.parts_reader.next()?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for ShardReader {
|
||||
fn drop(&mut self) {
|
||||
let shard_prune_pk = self.prune_pk_cost.as_secs_f64();
|
||||
MERGE_TREE_READ_STAGE_ELAPSED
|
||||
.with_label_values(&["shard_prune_pk"])
|
||||
.observe(shard_prune_pk);
|
||||
if self.keys_before_pruning > 0 {
|
||||
common_telemetry::debug!(
|
||||
"ShardReader metrics, data parts: {}, before pruning: {}, after pruning: {}, prune cost: {}s, build cost: {}s",
|
||||
self.parts_reader.num_parts(),
|
||||
self.keys_before_pruning,
|
||||
self.keys_after_pruning,
|
||||
shard_prune_pk,
|
||||
self.data_build_cost.as_secs_f64(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A merger that merges batches from multiple shards.
|
||||
@@ -388,6 +479,7 @@ mod tests {
|
||||
shard_id: ShardId,
|
||||
metadata: RegionMetadataRef,
|
||||
input: &[(KeyValues, PkIndex)],
|
||||
data_freeze_threshold: usize,
|
||||
) -> Shard {
|
||||
let mut dict_builder = KeyDictBuilder::new(1024);
|
||||
let mut metrics = WriteMetrics::default();
|
||||
@@ -402,27 +494,17 @@ mod tests {
|
||||
let dict = dict_builder.finish(&mut BTreeMap::new()).unwrap();
|
||||
let data_parts = DataParts::new(metadata, DATA_INIT_CAP, true);
|
||||
|
||||
Shard::new(shard_id, Some(Arc::new(dict)), data_parts, true)
|
||||
Shard::new(
|
||||
shard_id,
|
||||
Some(Arc::new(dict)),
|
||||
data_parts,
|
||||
true,
|
||||
data_freeze_threshold,
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_write_read_shard() {
|
||||
let metadata = metadata_for_test();
|
||||
let input = input_with_key(&metadata);
|
||||
let mut shard = new_shard_with_dict(8, metadata, &input);
|
||||
assert!(shard.is_empty());
|
||||
for (key_values, pk_index) in &input {
|
||||
for kv in key_values.iter() {
|
||||
let pk_id = PkId {
|
||||
shard_id: shard.shard_id,
|
||||
pk_index: *pk_index,
|
||||
};
|
||||
shard.write_with_pk_id(pk_id, &kv);
|
||||
}
|
||||
}
|
||||
assert!(!shard.is_empty());
|
||||
|
||||
let mut reader = shard.read().unwrap().build().unwrap();
|
||||
fn collect_timestamps(shard: &Shard) -> Vec<i64> {
|
||||
let mut reader = shard.read().unwrap().build(None).unwrap();
|
||||
let mut timestamps = Vec::new();
|
||||
while reader.is_valid() {
|
||||
let rb = reader.current_data_batch().slice_record_batch();
|
||||
@@ -432,6 +514,64 @@ mod tests {
|
||||
|
||||
reader.next().unwrap();
|
||||
}
|
||||
timestamps
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_write_read_shard() {
|
||||
let metadata = metadata_for_test();
|
||||
let input = input_with_key(&metadata);
|
||||
let mut shard = new_shard_with_dict(8, metadata, &input, 100);
|
||||
assert!(shard.is_empty());
|
||||
for (key_values, pk_index) in &input {
|
||||
for kv in key_values.iter() {
|
||||
let pk_id = PkId {
|
||||
shard_id: shard.shard_id,
|
||||
pk_index: *pk_index,
|
||||
};
|
||||
shard.write_with_pk_id(pk_id, &kv).unwrap();
|
||||
}
|
||||
}
|
||||
assert!(!shard.is_empty());
|
||||
|
||||
let timestamps = collect_timestamps(&shard);
|
||||
assert_eq!(vec![0, 1, 10, 11, 20, 21], timestamps);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_shard_freeze() {
|
||||
let metadata = metadata_for_test();
|
||||
let kvs = build_key_values_with_ts_seq_values(
|
||||
&metadata,
|
||||
"shard".to_string(),
|
||||
0,
|
||||
[0].into_iter(),
|
||||
[Some(0.0)].into_iter(),
|
||||
0,
|
||||
);
|
||||
let mut shard = new_shard_with_dict(8, metadata.clone(), &[(kvs, 0)], 50);
|
||||
let expected: Vec<_> = (0..200).collect();
|
||||
for i in &expected {
|
||||
let kvs = build_key_values_with_ts_seq_values(
|
||||
&metadata,
|
||||
"shard".to_string(),
|
||||
0,
|
||||
[*i].into_iter(),
|
||||
[Some(0.0)].into_iter(),
|
||||
*i as u64,
|
||||
);
|
||||
let pk_id = PkId {
|
||||
shard_id: shard.shard_id,
|
||||
pk_index: *i as PkIndex,
|
||||
};
|
||||
for kv in kvs.iter() {
|
||||
shard.write_with_pk_id(pk_id, &kv).unwrap();
|
||||
}
|
||||
}
|
||||
assert!(!shard.is_empty());
|
||||
assert_eq!(3, shard.data_parts.frozen_len());
|
||||
|
||||
let timestamps = collect_timestamps(&shard);
|
||||
assert_eq!(expected, timestamps);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
|
||||
@@ -26,8 +27,9 @@ use crate::memtable::merge_tree::data::{
|
||||
};
|
||||
use crate::memtable::merge_tree::dict::{DictBuilderReader, KeyDictBuilder};
|
||||
use crate::memtable::merge_tree::metrics::WriteMetrics;
|
||||
use crate::memtable::merge_tree::partition::PrimaryKeyFilter;
|
||||
use crate::memtable::merge_tree::shard::Shard;
|
||||
use crate::memtable::merge_tree::{MergeTreeConfig, PkId, ShardId};
|
||||
use crate::memtable::merge_tree::{MergeTreeConfig, PkId, PkIndex, ShardId};
|
||||
use crate::metrics::MERGE_TREE_READ_STAGE_ELAPSED;
|
||||
|
||||
/// Builder to write keys and data to a shard that the key dictionary
|
||||
@@ -136,7 +138,13 @@ impl ShardBuilder {
|
||||
let shard_id = self.current_shard_id;
|
||||
self.current_shard_id += 1;
|
||||
|
||||
Ok(Some(Shard::new(shard_id, key_dict, data_parts, self.dedup)))
|
||||
Ok(Some(Shard::new(
|
||||
shard_id,
|
||||
key_dict,
|
||||
data_parts,
|
||||
self.dedup,
|
||||
self.data_freeze_threshold,
|
||||
)))
|
||||
}
|
||||
|
||||
/// Scans the shard builder.
|
||||
@@ -176,13 +184,20 @@ pub(crate) struct ShardBuilderReaderBuilder {
|
||||
}
|
||||
|
||||
impl ShardBuilderReaderBuilder {
|
||||
pub(crate) fn build(self, pk_weights: Option<&[u16]>) -> Result<ShardBuilderReader> {
|
||||
pub(crate) fn build(
|
||||
self,
|
||||
pk_weights: Option<&[u16]>,
|
||||
key_filter: Option<PrimaryKeyFilter>,
|
||||
) -> Result<ShardBuilderReader> {
|
||||
let now = Instant::now();
|
||||
let data_reader = self.data_reader.build(pk_weights)?;
|
||||
Ok(ShardBuilderReader {
|
||||
shard_id: self.shard_id,
|
||||
dict_reader: self.dict_reader,
|
||||
ShardBuilderReader::new(
|
||||
self.shard_id,
|
||||
self.dict_reader,
|
||||
data_reader,
|
||||
})
|
||||
key_filter,
|
||||
now.elapsed(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -191,15 +206,45 @@ pub struct ShardBuilderReader {
|
||||
shard_id: ShardId,
|
||||
dict_reader: DictBuilderReader,
|
||||
data_reader: DataBufferReader,
|
||||
key_filter: Option<PrimaryKeyFilter>,
|
||||
last_yield_pk_index: Option<PkIndex>,
|
||||
keys_before_pruning: usize,
|
||||
keys_after_pruning: usize,
|
||||
prune_pk_cost: Duration,
|
||||
data_build_cost: Duration,
|
||||
}
|
||||
|
||||
impl ShardBuilderReader {
|
||||
fn new(
|
||||
shard_id: ShardId,
|
||||
dict_reader: DictBuilderReader,
|
||||
data_reader: DataBufferReader,
|
||||
key_filter: Option<PrimaryKeyFilter>,
|
||||
data_build_cost: Duration,
|
||||
) -> Result<Self> {
|
||||
let mut reader = ShardBuilderReader {
|
||||
shard_id,
|
||||
dict_reader,
|
||||
data_reader,
|
||||
key_filter,
|
||||
last_yield_pk_index: None,
|
||||
keys_before_pruning: 0,
|
||||
keys_after_pruning: 0,
|
||||
prune_pk_cost: Duration::default(),
|
||||
data_build_cost,
|
||||
};
|
||||
reader.prune_batch_by_key()?;
|
||||
|
||||
Ok(reader)
|
||||
}
|
||||
|
||||
pub fn is_valid(&self) -> bool {
|
||||
self.data_reader.is_valid()
|
||||
}
|
||||
|
||||
pub fn next(&mut self) -> Result<()> {
|
||||
self.data_reader.next()
|
||||
self.data_reader.next()?;
|
||||
self.prune_batch_by_key()
|
||||
}
|
||||
|
||||
pub fn current_key(&self) -> Option<&[u8]> {
|
||||
@@ -218,6 +263,52 @@ impl ShardBuilderReader {
|
||||
pub fn current_data_batch(&self) -> DataBatch {
|
||||
self.data_reader.current_data_batch()
|
||||
}
|
||||
|
||||
fn prune_batch_by_key(&mut self) -> Result<()> {
|
||||
let Some(key_filter) = &mut self.key_filter else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
while self.data_reader.is_valid() {
|
||||
let pk_index = self.data_reader.current_data_batch().pk_index();
|
||||
if let Some(yield_pk_index) = self.last_yield_pk_index {
|
||||
if pk_index == yield_pk_index {
|
||||
break;
|
||||
}
|
||||
}
|
||||
self.keys_before_pruning += 1;
|
||||
let key = self.dict_reader.key_by_pk_index(pk_index);
|
||||
let now = Instant::now();
|
||||
if key_filter.prune_primary_key(key) {
|
||||
self.prune_pk_cost += now.elapsed();
|
||||
self.last_yield_pk_index = Some(pk_index);
|
||||
self.keys_after_pruning += 1;
|
||||
break;
|
||||
}
|
||||
self.prune_pk_cost += now.elapsed();
|
||||
self.data_reader.next()?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for ShardBuilderReader {
|
||||
fn drop(&mut self) {
|
||||
let shard_builder_prune_pk = self.prune_pk_cost.as_secs_f64();
|
||||
MERGE_TREE_READ_STAGE_ELAPSED
|
||||
.with_label_values(&["shard_builder_prune_pk"])
|
||||
.observe(shard_builder_prune_pk);
|
||||
if self.keys_before_pruning > 0 {
|
||||
common_telemetry::debug!(
|
||||
"ShardBuilderReader metrics, before pruning: {}, after pruning: {}, prune cost: {}s, build cost: {}s",
|
||||
self.keys_before_pruning,
|
||||
self.keys_after_pruning,
|
||||
shard_builder_prune_pk,
|
||||
self.data_build_cost.as_secs_f64(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -306,7 +397,7 @@ mod tests {
|
||||
let mut reader = shard_builder
|
||||
.read(&mut pk_weights)
|
||||
.unwrap()
|
||||
.build(Some(&pk_weights))
|
||||
.build(Some(&pk_weights), None)
|
||||
.unwrap();
|
||||
let mut timestamps = Vec::new();
|
||||
while reader.is_valid() {
|
||||
|
||||
@@ -124,7 +124,7 @@ impl MergeTree {
|
||||
|
||||
if !has_pk {
|
||||
// No primary key.
|
||||
self.write_no_key(kv);
|
||||
self.write_no_key(kv)?;
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -148,6 +148,54 @@ impl MergeTree {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Write one key value pair into the tree.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if the tree is immutable (frozen).
|
||||
pub fn write_one(
|
||||
&self,
|
||||
kv: KeyValue,
|
||||
pk_buffer: &mut Vec<u8>,
|
||||
metrics: &mut WriteMetrics,
|
||||
) -> Result<()> {
|
||||
let has_pk = !self.metadata.primary_key.is_empty();
|
||||
|
||||
ensure!(
|
||||
kv.num_primary_keys() == self.row_codec.num_fields(),
|
||||
PrimaryKeyLengthMismatchSnafu {
|
||||
expect: self.row_codec.num_fields(),
|
||||
actual: kv.num_primary_keys(),
|
||||
}
|
||||
);
|
||||
// Safety: timestamp of kv must be both present and a valid timestamp value.
|
||||
let ts = kv.timestamp().as_timestamp().unwrap().unwrap().value();
|
||||
metrics.min_ts = metrics.min_ts.min(ts);
|
||||
metrics.max_ts = metrics.max_ts.max(ts);
|
||||
metrics.value_bytes += kv.fields().map(|v| v.data_size()).sum::<usize>();
|
||||
|
||||
if !has_pk {
|
||||
// No primary key.
|
||||
return self.write_no_key(kv);
|
||||
}
|
||||
|
||||
// Encode primary key.
|
||||
pk_buffer.clear();
|
||||
if self.is_partitioned {
|
||||
// Use sparse encoder for metric engine.
|
||||
self.sparse_encoder
|
||||
.encode_to_vec(kv.primary_keys(), pk_buffer)?;
|
||||
} else {
|
||||
self.row_codec.encode_to_vec(kv.primary_keys(), pk_buffer)?;
|
||||
}
|
||||
|
||||
// Write rows with
|
||||
self.write_with_key(pk_buffer, kv, metrics)?;
|
||||
|
||||
metrics.value_bytes += std::mem::size_of::<Timestamp>() + std::mem::size_of::<OpType>();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Scans the tree.
|
||||
pub fn read(
|
||||
&self,
|
||||
@@ -299,7 +347,7 @@ impl MergeTree {
|
||||
)
|
||||
}
|
||||
|
||||
fn write_no_key(&self, key_value: KeyValue) {
|
||||
fn write_no_key(&self, key_value: KeyValue) -> Result<()> {
|
||||
let partition_key = Partition::get_partition_key(&key_value, self.is_partitioned);
|
||||
let partition = self.get_or_create_partition(partition_key);
|
||||
|
||||
|
||||
551
src/mito2/src/memtable/time_partition.rs
Normal file
551
src/mito2/src/memtable/time_partition.rs
Normal file
@@ -0,0 +1,551 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Partitions memtables by time.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::Duration;
|
||||
|
||||
use common_telemetry::debug;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::timestamp_millis::BucketAligned;
|
||||
use common_time::Timestamp;
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use snafu::OptionExt;
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
|
||||
use crate::error::{InvalidRequestSnafu, Result};
|
||||
use crate::memtable::key_values::KeyValue;
|
||||
use crate::memtable::version::SmallMemtableVec;
|
||||
use crate::memtable::{KeyValues, MemtableBuilderRef, MemtableId, MemtableRef};
|
||||
|
||||
/// A partition holds rows with timestamps between `[min, max)`.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TimePartition {
|
||||
/// Memtable of the partition.
|
||||
memtable: MemtableRef,
|
||||
/// Time range of the partition. `None` means there is no time range. The time
|
||||
/// range is `None` if and only if the [TimePartitions::part_duration] is `None`.
|
||||
time_range: Option<PartTimeRange>,
|
||||
}
|
||||
|
||||
impl TimePartition {
|
||||
/// Returns whether the `ts` belongs to the partition.
|
||||
fn contains_timestamp(&self, ts: Timestamp) -> bool {
|
||||
let Some(range) = self.time_range else {
|
||||
return true;
|
||||
};
|
||||
|
||||
range.contains_timestamp(ts)
|
||||
}
|
||||
|
||||
/// Write rows to the part.
|
||||
fn write(&self, kvs: &KeyValues) -> Result<()> {
|
||||
self.memtable.write(kvs)
|
||||
}
|
||||
}
|
||||
|
||||
type PartitionVec = SmallVec<[TimePartition; 2]>;
|
||||
|
||||
/// Partitions.
|
||||
#[derive(Debug)]
|
||||
pub struct TimePartitions {
|
||||
/// Mutable data of partitions.
|
||||
inner: Mutex<PartitionsInner>,
|
||||
/// Duration of a partition.
|
||||
///
|
||||
/// `None` means there is only one partition and the [TimePartition::time_range] is
|
||||
/// also `None`.
|
||||
part_duration: Option<Duration>,
|
||||
/// Metadata of the region.
|
||||
metadata: RegionMetadataRef,
|
||||
/// Builder of memtables.
|
||||
builder: MemtableBuilderRef,
|
||||
}
|
||||
|
||||
pub type TimePartitionsRef = Arc<TimePartitions>;
|
||||
|
||||
impl TimePartitions {
|
||||
/// Returns a new empty partition list with optional duration.
|
||||
pub fn new(
|
||||
metadata: RegionMetadataRef,
|
||||
builder: MemtableBuilderRef,
|
||||
next_memtable_id: MemtableId,
|
||||
part_duration: Option<Duration>,
|
||||
) -> Self {
|
||||
let mut inner = PartitionsInner::new(next_memtable_id);
|
||||
if part_duration.is_none() {
|
||||
// If `part_duration` is None, then we create a partition with `None` time
|
||||
// range so we will write all rows to that partition.
|
||||
let memtable = builder.build(inner.alloc_memtable_id(), &metadata);
|
||||
debug!(
|
||||
"Creates a time partition for all timestamps, region: {}, memtable_id: {}",
|
||||
metadata.region_id,
|
||||
memtable.id(),
|
||||
);
|
||||
let part = TimePartition {
|
||||
memtable,
|
||||
time_range: None,
|
||||
};
|
||||
inner.parts.push(part);
|
||||
}
|
||||
|
||||
Self {
|
||||
inner: Mutex::new(inner),
|
||||
part_duration,
|
||||
metadata,
|
||||
builder,
|
||||
}
|
||||
}
|
||||
|
||||
/// Write key values to memtables.
|
||||
///
|
||||
/// It creates new partitions if necessary.
|
||||
pub fn write(&self, kvs: &KeyValues) -> Result<()> {
|
||||
// Get all parts.
|
||||
let parts = self.list_partitions();
|
||||
|
||||
// Checks whether all rows belongs to a single part. Checks in reverse order as we usually
|
||||
// put to latest part.
|
||||
for part in parts.iter().rev() {
|
||||
let mut all_in_partition = true;
|
||||
for kv in kvs.iter() {
|
||||
// Safety: We checked the schema in the write request.
|
||||
let ts = kv.timestamp().as_timestamp().unwrap().unwrap();
|
||||
if !part.contains_timestamp(ts) {
|
||||
all_in_partition = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if !all_in_partition {
|
||||
continue;
|
||||
}
|
||||
|
||||
// We can write all rows to this part.
|
||||
return part.write(kvs);
|
||||
}
|
||||
|
||||
// Slow path: We have to split kvs by partitions.
|
||||
self.write_multi_parts(kvs, &parts)
|
||||
}
|
||||
|
||||
/// Append memtables in partitions to `memtables`.
|
||||
pub fn list_memtables(&self, memtables: &mut Vec<MemtableRef>) {
|
||||
let inner = self.inner.lock().unwrap();
|
||||
memtables.extend(inner.parts.iter().map(|part| part.memtable.clone()));
|
||||
}
|
||||
|
||||
/// Returns the number of partitions.
|
||||
pub fn num_partitions(&self) -> usize {
|
||||
let inner = self.inner.lock().unwrap();
|
||||
inner.parts.len()
|
||||
}
|
||||
|
||||
/// Returns true if all memtables are empty.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
let inner = self.inner.lock().unwrap();
|
||||
inner.parts.iter().all(|part| part.memtable.is_empty())
|
||||
}
|
||||
|
||||
/// Freezes all memtables.
|
||||
pub fn freeze(&self) -> Result<()> {
|
||||
let inner = self.inner.lock().unwrap();
|
||||
for part in &*inner.parts {
|
||||
part.memtable.freeze()?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Forks latest partition.
|
||||
pub fn fork(&self, metadata: &RegionMetadataRef) -> Self {
|
||||
let mut inner = self.inner.lock().unwrap();
|
||||
let latest_part = inner
|
||||
.parts
|
||||
.iter()
|
||||
.max_by_key(|part| part.time_range.map(|range| range.min_timestamp))
|
||||
.cloned();
|
||||
|
||||
let Some(old_part) = latest_part else {
|
||||
return Self::new(
|
||||
metadata.clone(),
|
||||
self.builder.clone(),
|
||||
inner.next_memtable_id,
|
||||
self.part_duration,
|
||||
);
|
||||
};
|
||||
let memtable = old_part.memtable.fork(inner.alloc_memtable_id(), metadata);
|
||||
let new_part = TimePartition {
|
||||
memtable,
|
||||
time_range: old_part.time_range,
|
||||
};
|
||||
Self {
|
||||
inner: Mutex::new(PartitionsInner::with_partition(
|
||||
new_part,
|
||||
inner.next_memtable_id,
|
||||
)),
|
||||
part_duration: self.part_duration,
|
||||
metadata: metadata.clone(),
|
||||
builder: self.builder.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns partition duration.
|
||||
pub(crate) fn part_duration(&self) -> Option<Duration> {
|
||||
self.part_duration
|
||||
}
|
||||
|
||||
/// Returns memory usage.
|
||||
pub(crate) fn memory_usage(&self) -> usize {
|
||||
let inner = self.inner.lock().unwrap();
|
||||
inner
|
||||
.parts
|
||||
.iter()
|
||||
.map(|part| part.memtable.stats().estimated_bytes)
|
||||
.sum()
|
||||
}
|
||||
|
||||
/// Append memtables in partitions to small vec.
|
||||
pub(crate) fn list_memtables_to_small_vec(&self, memtables: &mut SmallMemtableVec) {
|
||||
let inner = self.inner.lock().unwrap();
|
||||
memtables.extend(inner.parts.iter().map(|part| part.memtable.clone()));
|
||||
}
|
||||
|
||||
/// Returns the next memtable id.
|
||||
pub(crate) fn next_memtable_id(&self) -> MemtableId {
|
||||
let inner = self.inner.lock().unwrap();
|
||||
inner.next_memtable_id
|
||||
}
|
||||
|
||||
/// Returns all partitions.
|
||||
fn list_partitions(&self) -> PartitionVec {
|
||||
let inner = self.inner.lock().unwrap();
|
||||
inner.parts.clone()
|
||||
}
|
||||
|
||||
/// Write to multiple partitions.
|
||||
fn write_multi_parts(&self, kvs: &KeyValues, parts: &PartitionVec) -> Result<()> {
|
||||
// If part duration is `None` then there is always one partition and all rows
|
||||
// will be put in that partition before invoking this method.
|
||||
debug_assert!(self.part_duration.is_some());
|
||||
|
||||
let mut parts_to_write = HashMap::new();
|
||||
let mut missing_parts = HashMap::new();
|
||||
for kv in kvs.iter() {
|
||||
let mut part_found = false;
|
||||
// Safety: We used the timestamp before.
|
||||
let ts = kv.timestamp().as_timestamp().unwrap().unwrap();
|
||||
for part in parts {
|
||||
if part.contains_timestamp(ts) {
|
||||
// Safety: Since part duration is `Some` so all time range should be `Some`.
|
||||
parts_to_write
|
||||
.entry(part.time_range.unwrap().min_timestamp)
|
||||
.or_insert_with(|| PartitionToWrite {
|
||||
partition: part.clone(),
|
||||
key_values: Vec::new(),
|
||||
})
|
||||
.key_values
|
||||
.push(kv);
|
||||
part_found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if !part_found {
|
||||
// We need to write it to a new part.
|
||||
// Safety: `new()` ensures duration is always Some if we do to this method.
|
||||
let part_duration = self.part_duration.unwrap();
|
||||
let part_start =
|
||||
partition_start_timestamp(ts, part_duration).with_context(|| {
|
||||
InvalidRequestSnafu {
|
||||
region_id: self.metadata.region_id,
|
||||
reason: format!(
|
||||
"timestamp {ts:?} and bucket {part_duration:?} are out of range"
|
||||
),
|
||||
}
|
||||
})?;
|
||||
missing_parts
|
||||
.entry(part_start)
|
||||
.or_insert_with(Vec::new)
|
||||
.push(kv);
|
||||
}
|
||||
}
|
||||
|
||||
// Writes rows to existing parts.
|
||||
for part_to_write in parts_to_write.into_values() {
|
||||
for kv in part_to_write.key_values {
|
||||
part_to_write.partition.memtable.write_one(kv)?;
|
||||
}
|
||||
}
|
||||
|
||||
let part_duration = self.part_duration.unwrap();
|
||||
// Creates new parts and writes to them. Acquires the lock to avoid others create
|
||||
// the same partition.
|
||||
let mut inner = self.inner.lock().unwrap();
|
||||
for (part_start, key_values) in missing_parts {
|
||||
let part_pos = match inner
|
||||
.parts
|
||||
.iter()
|
||||
.position(|part| part.time_range.unwrap().min_timestamp == part_start)
|
||||
{
|
||||
Some(pos) => pos,
|
||||
None => {
|
||||
let range = PartTimeRange::from_start_duration(part_start, part_duration)
|
||||
.with_context(|| InvalidRequestSnafu {
|
||||
region_id: self.metadata.region_id,
|
||||
reason: format!(
|
||||
"Partition time range for {part_start:?} is out of bound, bucket size: {part_duration:?}",
|
||||
),
|
||||
})?;
|
||||
let memtable = self
|
||||
.builder
|
||||
.build(inner.alloc_memtable_id(), &self.metadata);
|
||||
debug!(
|
||||
"Create time partition {:?} for region {}, duration: {:?}, memtable_id: {}, parts_total: {}",
|
||||
range,
|
||||
self.metadata.region_id,
|
||||
part_duration,
|
||||
memtable.id(),
|
||||
inner.parts.len() + 1
|
||||
);
|
||||
let pos = inner.parts.len();
|
||||
inner.parts.push(TimePartition {
|
||||
memtable,
|
||||
time_range: Some(range),
|
||||
});
|
||||
pos
|
||||
}
|
||||
};
|
||||
|
||||
let memtable = &inner.parts[part_pos].memtable;
|
||||
for kv in key_values {
|
||||
memtable.write_one(kv)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Computes the start timestamp of the partition for `ts`.
|
||||
///
|
||||
/// It always use bucket size in seconds which should fit all timestamp resolution.
|
||||
fn partition_start_timestamp(ts: Timestamp, bucket: Duration) -> Option<Timestamp> {
|
||||
// Safety: We convert it to seconds so it never returns `None`.
|
||||
let ts_sec = ts.convert_to(TimeUnit::Second).unwrap();
|
||||
let bucket_sec: i64 = bucket.as_secs().try_into().ok()?;
|
||||
let start_sec = ts_sec.align_by_bucket(bucket_sec)?;
|
||||
start_sec.convert_to(ts.unit())
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct PartitionsInner {
|
||||
/// All partitions.
|
||||
parts: PartitionVec,
|
||||
/// Next memtable id.
|
||||
next_memtable_id: MemtableId,
|
||||
}
|
||||
|
||||
impl PartitionsInner {
|
||||
fn new(next_memtable_id: MemtableId) -> Self {
|
||||
Self {
|
||||
parts: Default::default(),
|
||||
next_memtable_id,
|
||||
}
|
||||
}
|
||||
|
||||
fn with_partition(part: TimePartition, next_memtable_id: MemtableId) -> Self {
|
||||
Self {
|
||||
parts: smallvec![part],
|
||||
next_memtable_id,
|
||||
}
|
||||
}
|
||||
|
||||
fn alloc_memtable_id(&mut self) -> MemtableId {
|
||||
let id = self.next_memtable_id;
|
||||
self.next_memtable_id += 1;
|
||||
id
|
||||
}
|
||||
}
|
||||
|
||||
/// Time range of a partition.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
struct PartTimeRange {
|
||||
/// Inclusive min timestamp of rows in the partition.
|
||||
min_timestamp: Timestamp,
|
||||
/// Exclusive max timestamp of rows in the partition.
|
||||
max_timestamp: Timestamp,
|
||||
}
|
||||
|
||||
impl PartTimeRange {
|
||||
fn from_start_duration(start: Timestamp, duration: Duration) -> Option<Self> {
|
||||
let start_sec = start.convert_to(TimeUnit::Second)?;
|
||||
let end_sec = start_sec.add_duration(duration).ok()?;
|
||||
let min_timestamp = start_sec.convert_to(start.unit())?;
|
||||
let max_timestamp = end_sec.convert_to(start.unit())?;
|
||||
|
||||
Some(Self {
|
||||
min_timestamp,
|
||||
max_timestamp,
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns whether the `ts` belongs to the partition.
|
||||
fn contains_timestamp(&self, ts: Timestamp) -> bool {
|
||||
self.min_timestamp <= ts && ts < self.max_timestamp
|
||||
}
|
||||
}
|
||||
|
||||
struct PartitionToWrite<'a> {
|
||||
partition: TimePartition,
|
||||
key_values: Vec<KeyValue<'a>>,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::memtable::merge_tree::MergeTreeMemtableBuilder;
|
||||
use crate::test_util::memtable_util::{self, collect_iter_timestamps};
|
||||
|
||||
#[test]
|
||||
fn test_no_duration() {
|
||||
let metadata = memtable_util::metadata_for_test();
|
||||
let builder = Arc::new(MergeTreeMemtableBuilder::default());
|
||||
let partitions = TimePartitions::new(metadata.clone(), builder, 0, None);
|
||||
assert_eq!(1, partitions.num_partitions());
|
||||
assert!(partitions.is_empty());
|
||||
|
||||
let kvs = memtable_util::build_key_values(
|
||||
&metadata,
|
||||
"hello".to_string(),
|
||||
0,
|
||||
&[1000, 3000, 7000, 5000, 6000],
|
||||
0, // sequence 0, 1, 2, 3, 4
|
||||
);
|
||||
partitions.write(&kvs).unwrap();
|
||||
|
||||
assert_eq!(1, partitions.num_partitions());
|
||||
assert!(!partitions.is_empty());
|
||||
assert!(!partitions.is_empty());
|
||||
let mut memtables = Vec::new();
|
||||
partitions.list_memtables(&mut memtables);
|
||||
|
||||
let iter = memtables[0].iter(None, None).unwrap();
|
||||
let timestamps = collect_iter_timestamps(iter);
|
||||
assert_eq!(&[1000, 3000, 5000, 6000, 7000], ×tamps[..]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_write_single_part() {
|
||||
let metadata = memtable_util::metadata_for_test();
|
||||
let builder = Arc::new(MergeTreeMemtableBuilder::default());
|
||||
let partitions =
|
||||
TimePartitions::new(metadata.clone(), builder, 0, Some(Duration::from_secs(10)));
|
||||
assert_eq!(0, partitions.num_partitions());
|
||||
|
||||
let kvs = memtable_util::build_key_values(
|
||||
&metadata,
|
||||
"hello".to_string(),
|
||||
0,
|
||||
&[5000, 2000, 0],
|
||||
0, // sequence 0, 1, 2
|
||||
);
|
||||
// It should creates a new partition.
|
||||
partitions.write(&kvs).unwrap();
|
||||
assert_eq!(1, partitions.num_partitions());
|
||||
assert!(!partitions.is_empty());
|
||||
|
||||
let kvs = memtable_util::build_key_values(
|
||||
&metadata,
|
||||
"hello".to_string(),
|
||||
0,
|
||||
&[3000, 7000, 4000],
|
||||
3, // sequence 3, 4, 5
|
||||
);
|
||||
// Still writes to the same partition.
|
||||
partitions.write(&kvs).unwrap();
|
||||
assert_eq!(1, partitions.num_partitions());
|
||||
|
||||
let mut memtables = Vec::new();
|
||||
partitions.list_memtables(&mut memtables);
|
||||
let iter = memtables[0].iter(None, None).unwrap();
|
||||
let timestamps = collect_iter_timestamps(iter);
|
||||
assert_eq!(&[0, 2000, 3000, 4000, 5000, 7000], ×tamps[..]);
|
||||
let parts = partitions.list_partitions();
|
||||
assert_eq!(
|
||||
Timestamp::new_millisecond(0),
|
||||
parts[0].time_range.unwrap().min_timestamp
|
||||
);
|
||||
assert_eq!(
|
||||
Timestamp::new_millisecond(10000),
|
||||
parts[0].time_range.unwrap().max_timestamp
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_write_multi_parts() {
|
||||
let metadata = memtable_util::metadata_for_test();
|
||||
let builder = Arc::new(MergeTreeMemtableBuilder::default());
|
||||
let partitions =
|
||||
TimePartitions::new(metadata.clone(), builder, 0, Some(Duration::from_secs(5)));
|
||||
assert_eq!(0, partitions.num_partitions());
|
||||
|
||||
let kvs = memtable_util::build_key_values(
|
||||
&metadata,
|
||||
"hello".to_string(),
|
||||
0,
|
||||
&[2000, 0],
|
||||
0, // sequence 0, 1
|
||||
);
|
||||
// It should creates a new partition.
|
||||
partitions.write(&kvs).unwrap();
|
||||
assert_eq!(1, partitions.num_partitions());
|
||||
assert!(!partitions.is_empty());
|
||||
|
||||
let kvs = memtable_util::build_key_values(
|
||||
&metadata,
|
||||
"hello".to_string(),
|
||||
0,
|
||||
&[3000, 7000, 4000, 5000],
|
||||
2, // sequence 2, 3, 4, 5
|
||||
);
|
||||
// Writes 2 rows to the old partition and 1 row to a new partition.
|
||||
partitions.write(&kvs).unwrap();
|
||||
assert_eq!(2, partitions.num_partitions());
|
||||
|
||||
let parts = partitions.list_partitions();
|
||||
let iter = parts[0].memtable.iter(None, None).unwrap();
|
||||
let timestamps = collect_iter_timestamps(iter);
|
||||
assert_eq!(
|
||||
Timestamp::new_millisecond(0),
|
||||
parts[0].time_range.unwrap().min_timestamp
|
||||
);
|
||||
assert_eq!(
|
||||
Timestamp::new_millisecond(5000),
|
||||
parts[0].time_range.unwrap().max_timestamp
|
||||
);
|
||||
assert_eq!(&[0, 2000, 3000, 4000], ×tamps[..]);
|
||||
let iter = parts[1].memtable.iter(None, None).unwrap();
|
||||
let timestamps = collect_iter_timestamps(iter);
|
||||
assert_eq!(&[5000, 7000], ×tamps[..]);
|
||||
assert_eq!(
|
||||
Timestamp::new_millisecond(5000),
|
||||
parts[1].time_range.unwrap().min_timestamp
|
||||
);
|
||||
assert_eq!(
|
||||
Timestamp::new_millisecond(10000),
|
||||
parts[1].time_range.unwrap().max_timestamp
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -38,6 +38,7 @@ use table::predicate::Predicate;
|
||||
|
||||
use crate::error::{ComputeArrowSnafu, ConvertVectorSnafu, PrimaryKeyLengthMismatchSnafu, Result};
|
||||
use crate::flush::WriteBufferManagerRef;
|
||||
use crate::memtable::key_values::KeyValue;
|
||||
use crate::memtable::{
|
||||
AllocTracker, BoxedBatchIterator, KeyValues, Memtable, MemtableBuilder, MemtableId,
|
||||
MemtableRef, MemtableStats,
|
||||
@@ -110,49 +111,75 @@ impl TimeSeriesMemtable {
|
||||
}
|
||||
|
||||
/// Updates memtable stats.
|
||||
fn update_stats(&self, request_size: usize, min: i64, max: i64) {
|
||||
self.alloc_tracker.on_allocation(request_size);
|
||||
fn update_stats(&self, stats: LocalStats) {
|
||||
self.alloc_tracker.on_allocation(stats.allocated);
|
||||
|
||||
loop {
|
||||
let current_min = self.min_timestamp.load(Ordering::Relaxed);
|
||||
if min >= current_min {
|
||||
if stats.min_ts >= current_min {
|
||||
break;
|
||||
}
|
||||
|
||||
let Err(updated) = self.min_timestamp.compare_exchange(
|
||||
current_min,
|
||||
min,
|
||||
stats.min_ts,
|
||||
Ordering::Relaxed,
|
||||
Ordering::Relaxed,
|
||||
) else {
|
||||
break;
|
||||
};
|
||||
|
||||
if updated == min {
|
||||
if updated == stats.min_ts {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
loop {
|
||||
let current_max = self.max_timestamp.load(Ordering::Relaxed);
|
||||
if max <= current_max {
|
||||
if stats.max_ts <= current_max {
|
||||
break;
|
||||
}
|
||||
|
||||
let Err(updated) = self.max_timestamp.compare_exchange(
|
||||
current_max,
|
||||
max,
|
||||
stats.max_ts,
|
||||
Ordering::Relaxed,
|
||||
Ordering::Relaxed,
|
||||
) else {
|
||||
break;
|
||||
};
|
||||
|
||||
if updated == max {
|
||||
if updated == stats.max_ts {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn write_key_value(&self, kv: KeyValue, stats: &mut LocalStats) -> Result<()> {
|
||||
ensure!(
|
||||
kv.num_primary_keys() == self.row_codec.num_fields(),
|
||||
PrimaryKeyLengthMismatchSnafu {
|
||||
expect: self.row_codec.num_fields(),
|
||||
actual: kv.num_primary_keys()
|
||||
}
|
||||
);
|
||||
let primary_key_encoded = self.row_codec.encode(kv.primary_keys())?;
|
||||
let fields = kv.fields().collect::<Vec<_>>();
|
||||
|
||||
stats.allocated += fields.iter().map(|v| v.data_size()).sum::<usize>();
|
||||
let (series, series_allocated) = self.series_set.get_or_add_series(primary_key_encoded);
|
||||
stats.allocated += series_allocated;
|
||||
|
||||
// safety: timestamp of kv must be both present and a valid timestamp value.
|
||||
let ts = kv.timestamp().as_timestamp().unwrap().unwrap().value();
|
||||
stats.min_ts = stats.min_ts.min(ts);
|
||||
stats.max_ts = stats.max_ts.max(ts);
|
||||
|
||||
let mut guard = series.write().unwrap();
|
||||
guard.push(kv.timestamp(), kv.sequence(), kv.op_type(), fields);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for TimeSeriesMemtable {
|
||||
@@ -167,43 +194,30 @@ impl Memtable for TimeSeriesMemtable {
|
||||
}
|
||||
|
||||
fn write(&self, kvs: &KeyValues) -> Result<()> {
|
||||
let mut allocated = 0;
|
||||
let mut min_ts = i64::MAX;
|
||||
let mut max_ts = i64::MIN;
|
||||
let mut local_stats = LocalStats::default();
|
||||
|
||||
for kv in kvs.iter() {
|
||||
ensure!(
|
||||
kv.num_primary_keys() == self.row_codec.num_fields(),
|
||||
PrimaryKeyLengthMismatchSnafu {
|
||||
expect: self.row_codec.num_fields(),
|
||||
actual: kv.num_primary_keys()
|
||||
}
|
||||
);
|
||||
let primary_key_encoded = self.row_codec.encode(kv.primary_keys())?;
|
||||
let fields = kv.fields().collect::<Vec<_>>();
|
||||
|
||||
allocated += fields.iter().map(|v| v.data_size()).sum::<usize>();
|
||||
let (series, series_allocated) = self.series_set.get_or_add_series(primary_key_encoded);
|
||||
allocated += series_allocated;
|
||||
|
||||
// safety: timestamp of kv must be both present and a valid timestamp value.
|
||||
let ts = kv.timestamp().as_timestamp().unwrap().unwrap().value();
|
||||
min_ts = min_ts.min(ts);
|
||||
max_ts = max_ts.max(ts);
|
||||
|
||||
let mut guard = series.write().unwrap();
|
||||
guard.push(kv.timestamp(), kv.sequence(), kv.op_type(), fields);
|
||||
self.write_key_value(kv, &mut local_stats)?;
|
||||
}
|
||||
allocated += kvs.num_rows() * std::mem::size_of::<Timestamp>();
|
||||
allocated += kvs.num_rows() * std::mem::size_of::<OpType>();
|
||||
local_stats.allocated += kvs.num_rows() * std::mem::size_of::<Timestamp>();
|
||||
local_stats.allocated += kvs.num_rows() * std::mem::size_of::<OpType>();
|
||||
|
||||
// TODO(hl): this maybe inaccurate since for-iteration may return early.
|
||||
// We may lift the primary key length check out of Memtable::write
|
||||
// so that we can ensure writing to memtable will succeed.
|
||||
self.update_stats(allocated, min_ts, max_ts);
|
||||
self.update_stats(local_stats);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_one(&self, key_value: KeyValue) -> Result<()> {
|
||||
let mut local_stats = LocalStats::default();
|
||||
let res = self.write_key_value(key_value, &mut local_stats);
|
||||
local_stats.allocated += std::mem::size_of::<Timestamp>() + std::mem::size_of::<OpType>();
|
||||
|
||||
self.update_stats(local_stats);
|
||||
res
|
||||
}
|
||||
|
||||
fn iter(
|
||||
&self,
|
||||
projection: Option<&[ColumnId]>,
|
||||
@@ -267,6 +281,22 @@ impl Memtable for TimeSeriesMemtable {
|
||||
}
|
||||
}
|
||||
|
||||
struct LocalStats {
|
||||
allocated: usize,
|
||||
min_ts: i64,
|
||||
max_ts: i64,
|
||||
}
|
||||
|
||||
impl Default for LocalStats {
|
||||
fn default() -> Self {
|
||||
LocalStats {
|
||||
allocated: 0,
|
||||
min_ts: i64::MAX,
|
||||
max_ts: i64::MIN,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type SeriesRwLockMap = RwLock<BTreeMap<Vec<u8>, Arc<RwLock<Series>>>>;
|
||||
|
||||
struct SeriesSet {
|
||||
|
||||
@@ -20,26 +20,29 @@ use smallvec::SmallVec;
|
||||
use store_api::metadata::RegionMetadataRef;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::memtable::time_partition::TimePartitionsRef;
|
||||
use crate::memtable::{MemtableId, MemtableRef};
|
||||
|
||||
pub(crate) type SmallMemtableVec = SmallVec<[MemtableRef; 2]>;
|
||||
|
||||
/// A version of current memtables in a region.
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct MemtableVersion {
|
||||
/// Mutable memtable.
|
||||
pub(crate) mutable: MemtableRef,
|
||||
pub(crate) mutable: TimePartitionsRef,
|
||||
/// Immutable memtables.
|
||||
///
|
||||
/// We only allow one flush job per region but if a flush job failed, then we
|
||||
/// might need to store more than one immutable memtable on the next time we
|
||||
/// flush the region.
|
||||
immutables: SmallVec<[MemtableRef; 2]>,
|
||||
immutables: SmallMemtableVec,
|
||||
}
|
||||
|
||||
pub(crate) type MemtableVersionRef = Arc<MemtableVersion>;
|
||||
|
||||
impl MemtableVersion {
|
||||
/// Returns a new [MemtableVersion] with specific mutable memtable.
|
||||
pub(crate) fn new(mutable: MemtableRef) -> MemtableVersion {
|
||||
pub(crate) fn new(mutable: TimePartitionsRef) -> MemtableVersion {
|
||||
MemtableVersion {
|
||||
mutable,
|
||||
immutables: SmallVec::new(),
|
||||
@@ -53,8 +56,8 @@ impl MemtableVersion {
|
||||
|
||||
/// Lists mutable and immutable memtables.
|
||||
pub(crate) fn list_memtables(&self) -> Vec<MemtableRef> {
|
||||
let mut mems = Vec::with_capacity(self.immutables.len() + 1);
|
||||
mems.push(self.mutable.clone());
|
||||
let mut mems = Vec::with_capacity(self.immutables.len() + self.mutable.num_partitions());
|
||||
self.mutable.list_memtables(&mut mems);
|
||||
mems.extend_from_slice(&self.immutables);
|
||||
mems
|
||||
}
|
||||
@@ -76,15 +79,13 @@ impl MemtableVersion {
|
||||
// soft limit.
|
||||
self.mutable.freeze()?;
|
||||
// Fork the memtable.
|
||||
let mutable = self.mutable.fork(self.next_memtable_id(), metadata);
|
||||
let mutable = Arc::new(self.mutable.fork(metadata));
|
||||
|
||||
// Pushes the mutable memtable to immutable list.
|
||||
let immutables = self
|
||||
.immutables
|
||||
.iter()
|
||||
.cloned()
|
||||
.chain([self.mutable.clone()])
|
||||
.collect();
|
||||
let mut immutables =
|
||||
SmallVec::with_capacity(self.immutables.len() + self.mutable.num_partitions());
|
||||
self.mutable.list_memtables_to_small_vec(&mut immutables);
|
||||
immutables.extend(self.immutables.iter().cloned());
|
||||
Ok(Some(MemtableVersion {
|
||||
mutable,
|
||||
immutables,
|
||||
@@ -103,7 +104,7 @@ impl MemtableVersion {
|
||||
|
||||
/// Returns the memory usage of the mutable memtable.
|
||||
pub(crate) fn mutable_usage(&self) -> usize {
|
||||
self.mutable.stats().estimated_bytes
|
||||
self.mutable.memory_usage()
|
||||
}
|
||||
|
||||
/// Returns the memory usage of the immutable memtables.
|
||||
@@ -121,9 +122,4 @@ impl MemtableVersion {
|
||||
pub(crate) fn is_empty(&self) -> bool {
|
||||
self.mutable.is_empty() && self.immutables.is_empty()
|
||||
}
|
||||
|
||||
/// Returns the next memtable id.
|
||||
pub(crate) fn next_memtable_id(&self) -> MemtableId {
|
||||
self.mutable.id() + 1
|
||||
}
|
||||
}
|
||||
|
||||
@@ -37,6 +37,7 @@ use crate::error::{
|
||||
};
|
||||
use crate::manifest::manager::{RegionManifestManager, RegionManifestOptions};
|
||||
use crate::manifest::storage::manifest_compress_type;
|
||||
use crate::memtable::time_partition::TimePartitions;
|
||||
use crate::memtable::MemtableBuilderRef;
|
||||
use crate::region::options::RegionOptions;
|
||||
use crate::region::version::{VersionBuilder, VersionControl, VersionControlRef};
|
||||
@@ -169,7 +170,15 @@ impl RegionOpener {
|
||||
RegionManifestManager::new(metadata.clone(), region_manifest_options).await?;
|
||||
|
||||
// Initial memtable id is 0.
|
||||
let mutable = self.memtable_builder.build(0, &metadata);
|
||||
let part_duration = options.compaction.time_window();
|
||||
let mutable = Arc::new(TimePartitions::new(
|
||||
metadata.clone(),
|
||||
self.memtable_builder,
|
||||
0,
|
||||
part_duration,
|
||||
));
|
||||
|
||||
debug!("Create region {} with options: {:?}", region_id, options);
|
||||
|
||||
let version = VersionBuilder::new(metadata, mutable)
|
||||
.options(options)
|
||||
@@ -249,6 +258,9 @@ impl RegionOpener {
|
||||
|
||||
let region_id = self.region_id;
|
||||
let object_store = self.object_store(®ion_options.storage)?.clone();
|
||||
|
||||
debug!("Open region {} with options: {:?}", region_id, self.options);
|
||||
|
||||
let access_layer = Arc::new(AccessLayer::new(
|
||||
self.region_dir.clone(),
|
||||
object_store,
|
||||
@@ -260,7 +272,13 @@ impl RegionOpener {
|
||||
self.cache_manager.clone(),
|
||||
));
|
||||
// Initial memtable id is 0.
|
||||
let mutable = self.memtable_builder.build(0, &metadata);
|
||||
let part_duration = region_options.compaction.time_window();
|
||||
let mutable = Arc::new(TimePartitions::new(
|
||||
metadata.clone(),
|
||||
self.memtable_builder.clone(),
|
||||
0,
|
||||
part_duration,
|
||||
));
|
||||
let version = VersionBuilder::new(metadata, mutable)
|
||||
.add_files(file_purger.clone(), manifest.files.values().cloned())
|
||||
.flushed_entry_id(manifest.flushed_entry_id)
|
||||
|
||||
@@ -13,6 +13,8 @@
|
||||
// limitations under the License.
|
||||
|
||||
//! Options for a region.
|
||||
//!
|
||||
//! If we add options in this mod, we also need to modify [store_api::mito_engine_options].
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::time::Duration;
|
||||
@@ -92,6 +94,14 @@ pub enum CompactionOptions {
|
||||
Twcs(TwcsOptions),
|
||||
}
|
||||
|
||||
impl CompactionOptions {
|
||||
pub(crate) fn time_window(&self) -> Option<Duration> {
|
||||
match self {
|
||||
CompactionOptions::Twcs(opts) => opts.time_window,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for CompactionOptions {
|
||||
fn default() -> Self {
|
||||
Self::Twcs(TwcsOptions::default())
|
||||
@@ -358,6 +368,7 @@ mod tests {
|
||||
("compaction.type", "twcs"),
|
||||
("storage", "S3"),
|
||||
("index.inverted_index.ignore_column_ids", "1,2,3"),
|
||||
("index.inverted_index.segment_row_count", "512"),
|
||||
(
|
||||
WAL_OPTIONS_KEY,
|
||||
&serde_json::to_string(&wal_options).unwrap(),
|
||||
@@ -376,7 +387,7 @@ mod tests {
|
||||
index_options: IndexOptions {
|
||||
inverted_index: InvertedIndexOptions {
|
||||
ignore_column_ids: vec![1, 2, 3],
|
||||
segment_row_count: 1024,
|
||||
segment_row_count: 512,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
@@ -31,8 +31,9 @@ use store_api::storage::SequenceNumber;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::manifest::action::RegionEdit;
|
||||
use crate::memtable::time_partition::{TimePartitions, TimePartitionsRef};
|
||||
use crate::memtable::version::{MemtableVersion, MemtableVersionRef};
|
||||
use crate::memtable::{MemtableBuilderRef, MemtableId, MemtableRef};
|
||||
use crate::memtable::{MemtableBuilderRef, MemtableId};
|
||||
use crate::region::options::RegionOptions;
|
||||
use crate::sst::file::FileMeta;
|
||||
use crate::sst::file_purger::FilePurgerRef;
|
||||
@@ -122,8 +123,14 @@ impl VersionControl {
|
||||
/// Mark all opened files as deleted and set the delete marker in [VersionControlData]
|
||||
pub(crate) fn mark_dropped(&self, memtable_builder: &MemtableBuilderRef) {
|
||||
let version = self.current().version;
|
||||
let new_mutable =
|
||||
memtable_builder.build(version.memtables.next_memtable_id(), &version.metadata);
|
||||
let part_duration = version.memtables.mutable.part_duration();
|
||||
let next_memtable_id = version.memtables.mutable.next_memtable_id();
|
||||
let new_mutable = Arc::new(TimePartitions::new(
|
||||
version.metadata.clone(),
|
||||
memtable_builder.clone(),
|
||||
next_memtable_id,
|
||||
part_duration,
|
||||
));
|
||||
|
||||
let mut data = self.data.write().unwrap();
|
||||
data.is_dropped = true;
|
||||
@@ -140,7 +147,14 @@ impl VersionControl {
|
||||
/// new schema. Memtables of the version must be empty.
|
||||
pub(crate) fn alter_schema(&self, metadata: RegionMetadataRef, builder: &MemtableBuilderRef) {
|
||||
let version = self.current().version;
|
||||
let new_mutable = builder.build(version.memtables.next_memtable_id(), &metadata);
|
||||
let part_duration = version.memtables.mutable.part_duration();
|
||||
let next_memtable_id = version.memtables.mutable.next_memtable_id();
|
||||
let new_mutable = Arc::new(TimePartitions::new(
|
||||
metadata.clone(),
|
||||
builder.clone(),
|
||||
next_memtable_id,
|
||||
part_duration,
|
||||
));
|
||||
debug_assert!(version.memtables.mutable.is_empty());
|
||||
debug_assert!(version.memtables.immutables().is_empty());
|
||||
let new_version = Arc::new(
|
||||
@@ -163,8 +177,14 @@ impl VersionControl {
|
||||
) {
|
||||
let version = self.current().version;
|
||||
|
||||
let new_mutable =
|
||||
memtable_builder.build(version.memtables.next_memtable_id(), &version.metadata);
|
||||
let part_duration = version.memtables.mutable.part_duration();
|
||||
let next_memtable_id = version.memtables.mutable.next_memtable_id();
|
||||
let new_mutable = Arc::new(TimePartitions::new(
|
||||
version.metadata.clone(),
|
||||
memtable_builder.clone(),
|
||||
next_memtable_id,
|
||||
part_duration,
|
||||
));
|
||||
let new_version = Arc::new(
|
||||
VersionBuilder::new(version.metadata.clone(), new_mutable)
|
||||
.flushed_entry_id(truncated_entry_id)
|
||||
@@ -242,7 +262,7 @@ pub(crate) struct VersionBuilder {
|
||||
|
||||
impl VersionBuilder {
|
||||
/// Returns a new builder.
|
||||
pub(crate) fn new(metadata: RegionMetadataRef, mutable: MemtableRef) -> Self {
|
||||
pub(crate) fn new(metadata: RegionMetadataRef, mutable: TimePartitionsRef) -> Self {
|
||||
VersionBuilder {
|
||||
metadata,
|
||||
memtables: Arc::new(MemtableVersion::new(mutable)),
|
||||
|
||||
@@ -215,6 +215,61 @@ impl SortField {
|
||||
Decimal128, Decimal128
|
||||
)
|
||||
}
|
||||
|
||||
/// Skip deserializing this field, returns the length of it.
|
||||
fn skip_deserialize(
|
||||
&self,
|
||||
bytes: &[u8],
|
||||
deserializer: &mut Deserializer<&[u8]>,
|
||||
) -> Result<usize> {
|
||||
let pos = deserializer.position();
|
||||
if bytes[pos] == 0 {
|
||||
deserializer.advance(1);
|
||||
return Ok(1);
|
||||
}
|
||||
|
||||
let to_skip = match &self.data_type {
|
||||
ConcreteDataType::Boolean(_) => 2,
|
||||
ConcreteDataType::Int8(_) | ConcreteDataType::UInt8(_) => 2,
|
||||
ConcreteDataType::Int16(_) | ConcreteDataType::UInt16(_) => 3,
|
||||
ConcreteDataType::Int32(_) | ConcreteDataType::UInt32(_) => 5,
|
||||
ConcreteDataType::Int64(_) | ConcreteDataType::UInt64(_) => 9,
|
||||
ConcreteDataType::Float32(_) => 5,
|
||||
ConcreteDataType::Float64(_) => 9,
|
||||
ConcreteDataType::Binary(_) => {
|
||||
// Now the encoder encode binary as a list of bytes so we can't use
|
||||
// skip bytes.
|
||||
let pos_before = deserializer.position();
|
||||
let mut current = pos_before + 1;
|
||||
while bytes[current] == 1 {
|
||||
current += 2;
|
||||
}
|
||||
let to_skip = current - pos_before + 1;
|
||||
deserializer.advance(to_skip);
|
||||
return Ok(to_skip);
|
||||
}
|
||||
ConcreteDataType::String(_) => {
|
||||
let pos_before = deserializer.position();
|
||||
deserializer.advance(1);
|
||||
deserializer
|
||||
.skip_bytes()
|
||||
.context(error::DeserializeFieldSnafu)?;
|
||||
return Ok(deserializer.position() - pos_before);
|
||||
}
|
||||
ConcreteDataType::Date(_) => 5,
|
||||
ConcreteDataType::DateTime(_) => 9,
|
||||
ConcreteDataType::Timestamp(_) => 9, // We treat timestamp as Option<i64>
|
||||
ConcreteDataType::Time(_) => 10, // i64 and 1 byte time unit
|
||||
ConcreteDataType::Duration(_) => 10,
|
||||
ConcreteDataType::Interval(_) => 18,
|
||||
ConcreteDataType::Decimal128(_) => 19,
|
||||
ConcreteDataType::Null(_)
|
||||
| ConcreteDataType::List(_)
|
||||
| ConcreteDataType::Dictionary(_) => 0,
|
||||
};
|
||||
deserializer.advance(to_skip);
|
||||
Ok(to_skip)
|
||||
}
|
||||
}
|
||||
|
||||
/// A memory-comparable row [Value] encoder/decoder.
|
||||
@@ -236,6 +291,52 @@ impl McmpRowCodec {
|
||||
pub fn estimated_size(&self) -> usize {
|
||||
self.fields.iter().map(|f| f.estimated_size()).sum()
|
||||
}
|
||||
|
||||
/// Decode value at `pos` in `bytes`.
|
||||
///
|
||||
/// The i-th element in offsets buffer is how many bytes to skip in order to read value at `pos`.
|
||||
pub fn decode_value_at(
|
||||
&self,
|
||||
bytes: &[u8],
|
||||
pos: usize,
|
||||
offsets_buf: &mut Vec<usize>,
|
||||
) -> Result<Value> {
|
||||
let mut deserializer = Deserializer::new(bytes);
|
||||
if pos < offsets_buf.len() {
|
||||
// We computed the offset before.
|
||||
let to_skip = offsets_buf[pos];
|
||||
deserializer.advance(to_skip);
|
||||
return self.fields[pos].deserialize(&mut deserializer);
|
||||
}
|
||||
|
||||
if offsets_buf.is_empty() {
|
||||
let mut offset = 0;
|
||||
// Skip values before `pos`.
|
||||
for i in 0..pos {
|
||||
// Offset to skip before reading value i.
|
||||
offsets_buf.push(offset);
|
||||
let skip = self.fields[i].skip_deserialize(bytes, &mut deserializer)?;
|
||||
offset += skip;
|
||||
}
|
||||
// Offset to skip before reading this value.
|
||||
offsets_buf.push(offset);
|
||||
} else {
|
||||
// Offsets are not enough.
|
||||
let value_start = offsets_buf.len() - 1;
|
||||
// Advances to decode value at `value_start`.
|
||||
let mut offset = offsets_buf[value_start];
|
||||
deserializer.advance(offset);
|
||||
for i in value_start..pos {
|
||||
// Skip value i.
|
||||
let skip = self.fields[i].skip_deserialize(bytes, &mut deserializer)?;
|
||||
// Offset for the value at i + 1.
|
||||
offset += skip;
|
||||
offsets_buf.push(offset);
|
||||
}
|
||||
}
|
||||
|
||||
self.fields[pos].deserialize(&mut deserializer)
|
||||
}
|
||||
}
|
||||
|
||||
impl RowCodec for McmpRowCodec {
|
||||
@@ -274,7 +375,7 @@ impl RowCodec for McmpRowCodec {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_base::bytes::StringBytes;
|
||||
use common_time::Timestamp;
|
||||
use common_time::{DateTime, Timestamp};
|
||||
use datatypes::value::Value;
|
||||
|
||||
use super::*;
|
||||
@@ -292,6 +393,18 @@ mod tests {
|
||||
let result = encoder.encode(value_ref.iter().cloned()).unwrap();
|
||||
let decoded = encoder.decode(&result).unwrap();
|
||||
assert_eq!(decoded, row);
|
||||
let mut decoded = Vec::new();
|
||||
let mut offsets = Vec::new();
|
||||
// Iter two times to test offsets buffer.
|
||||
for _ in 0..2 {
|
||||
decoded.clear();
|
||||
for i in 0..data_types.len() {
|
||||
let value = encoder.decode_value_at(&result, i, &mut offsets).unwrap();
|
||||
decoded.push(value);
|
||||
}
|
||||
assert_eq!(data_types.len(), offsets.len(), "offsets: {:?}", offsets);
|
||||
assert_eq!(decoded, row);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -416,5 +529,53 @@ mod tests {
|
||||
],
|
||||
vec![Value::Null, Value::Int64(43), Value::Boolean(true)],
|
||||
);
|
||||
|
||||
// All types.
|
||||
check_encode_and_decode(
|
||||
&[
|
||||
ConcreteDataType::boolean_datatype(),
|
||||
ConcreteDataType::int8_datatype(),
|
||||
ConcreteDataType::uint8_datatype(),
|
||||
ConcreteDataType::int16_datatype(),
|
||||
ConcreteDataType::uint16_datatype(),
|
||||
ConcreteDataType::int32_datatype(),
|
||||
ConcreteDataType::uint32_datatype(),
|
||||
ConcreteDataType::int64_datatype(),
|
||||
ConcreteDataType::uint64_datatype(),
|
||||
ConcreteDataType::float32_datatype(),
|
||||
ConcreteDataType::float64_datatype(),
|
||||
ConcreteDataType::binary_datatype(),
|
||||
ConcreteDataType::string_datatype(),
|
||||
ConcreteDataType::date_datatype(),
|
||||
ConcreteDataType::datetime_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
ConcreteDataType::time_millisecond_datatype(),
|
||||
ConcreteDataType::duration_millisecond_datatype(),
|
||||
ConcreteDataType::interval_month_day_nano_datatype(),
|
||||
ConcreteDataType::decimal128_default_datatype(),
|
||||
],
|
||||
vec![
|
||||
Value::Boolean(true),
|
||||
Value::Int8(8),
|
||||
Value::UInt8(8),
|
||||
Value::Int16(16),
|
||||
Value::UInt16(16),
|
||||
Value::Int32(32),
|
||||
Value::UInt32(32),
|
||||
Value::Int64(64),
|
||||
Value::UInt64(64),
|
||||
Value::Float32(1.0.into()),
|
||||
Value::Float64(1.0.into()),
|
||||
Value::Binary(b"hello"[..].into()),
|
||||
Value::String("world".into()),
|
||||
Value::Date(Date::new(10)),
|
||||
Value::DateTime(DateTime::new(11)),
|
||||
Value::Timestamp(Timestamp::new_millisecond(12)),
|
||||
Value::Time(Time::new_millisecond(13)),
|
||||
Value::Duration(Duration::new_millisecond(14)),
|
||||
Value::Interval(Interval::from_month_day_nano(1, 1, 15)),
|
||||
Value::Decimal128(Decimal128::from(16)),
|
||||
],
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,7 +21,9 @@ use api::v1::value::ValueData;
|
||||
use api::v1::{Row, Rows, SemanticType};
|
||||
use datatypes::arrow::array::UInt64Array;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::scalars::ScalarVector;
|
||||
use datatypes::schema::ColumnSchema;
|
||||
use datatypes::vectors::TimestampMillisecondVector;
|
||||
use store_api::metadata::{ColumnMetadata, RegionMetadataBuilder, RegionMetadataRef};
|
||||
use store_api::storage::{ColumnId, RegionId, SequenceNumber};
|
||||
use table::predicate::Predicate;
|
||||
@@ -58,6 +60,10 @@ impl Memtable for EmptyMemtable {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_one(&self, _key_value: KeyValue) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn iter(
|
||||
&self,
|
||||
_projection: Option<&[ColumnId]>,
|
||||
@@ -219,25 +225,14 @@ pub(crate) fn extract_data_batch(batch: &DataBatch) -> (u16, Vec<(i64, u64)>) {
|
||||
|
||||
/// Builds key values with timestamps (ms) and sequences for test.
|
||||
pub(crate) fn build_key_values_with_ts_seq_values(
|
||||
schema: &RegionMetadataRef,
|
||||
metadata: &RegionMetadataRef,
|
||||
k0: String,
|
||||
k1: u32,
|
||||
timestamps: impl Iterator<Item = i64>,
|
||||
values: impl Iterator<Item = Option<f64>>,
|
||||
sequence: SequenceNumber,
|
||||
) -> KeyValues {
|
||||
let column_schema = schema
|
||||
.column_metadatas
|
||||
.iter()
|
||||
.map(|c| api::v1::ColumnSchema {
|
||||
column_name: c.column_schema.name.clone(),
|
||||
datatype: ColumnDataTypeWrapper::try_from(c.column_schema.data_type.clone())
|
||||
.unwrap()
|
||||
.datatype() as i32,
|
||||
semantic_type: c.semantic_type as i32,
|
||||
..Default::default()
|
||||
})
|
||||
.collect();
|
||||
let column_schema = region_metadata_to_row_schema(metadata);
|
||||
|
||||
let rows = timestamps
|
||||
.zip(values)
|
||||
@@ -269,7 +264,23 @@ pub(crate) fn build_key_values_with_ts_seq_values(
|
||||
rows,
|
||||
}),
|
||||
};
|
||||
KeyValues::new(schema.as_ref(), mutation).unwrap()
|
||||
KeyValues::new(metadata.as_ref(), mutation).unwrap()
|
||||
}
|
||||
|
||||
/// Converts the region metadata to column schemas for a row.
|
||||
pub fn region_metadata_to_row_schema(metadata: &RegionMetadataRef) -> Vec<api::v1::ColumnSchema> {
|
||||
metadata
|
||||
.column_metadatas
|
||||
.iter()
|
||||
.map(|c| api::v1::ColumnSchema {
|
||||
column_name: c.column_schema.name.clone(),
|
||||
datatype: ColumnDataTypeWrapper::try_from(c.column_schema.data_type.clone())
|
||||
.unwrap()
|
||||
.datatype() as i32,
|
||||
semantic_type: c.semantic_type as i32,
|
||||
..Default::default()
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Encode keys.
|
||||
@@ -298,3 +309,20 @@ pub(crate) fn encode_key_by_kv(key_value: &KeyValue) -> Vec<u8> {
|
||||
]);
|
||||
row_codec.encode(key_value.primary_keys()).unwrap()
|
||||
}
|
||||
|
||||
/// Collects timestamps from the batch iter.
|
||||
pub(crate) fn collect_iter_timestamps(iter: BoxedBatchIterator) -> Vec<i64> {
|
||||
iter.flat_map(|batch| {
|
||||
batch
|
||||
.unwrap()
|
||||
.timestamps()
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampMillisecondVector>()
|
||||
.unwrap()
|
||||
.iter_data()
|
||||
.collect::<Vec<_>>()
|
||||
.into_iter()
|
||||
})
|
||||
.map(|v| v.unwrap().0.value())
|
||||
.collect()
|
||||
}
|
||||
|
||||
@@ -25,7 +25,7 @@ use store_api::metadata::{ColumnMetadata, RegionMetadata, RegionMetadataBuilder}
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::manifest::action::RegionEdit;
|
||||
use crate::memtable::MemtableBuilder;
|
||||
use crate::memtable::time_partition::TimePartitions;
|
||||
use crate::region::version::{Version, VersionBuilder, VersionControl};
|
||||
use crate::sst::file::{FileId, FileMeta};
|
||||
use crate::sst::file_purger::FilePurgerRef;
|
||||
@@ -101,7 +101,12 @@ impl VersionControlBuilder {
|
||||
|
||||
pub(crate) fn build_version(&self) -> Version {
|
||||
let metadata = Arc::new(self.metadata.clone());
|
||||
let mutable = self.memtable_builder.build(0, &metadata);
|
||||
let mutable = Arc::new(TimePartitions::new(
|
||||
metadata.clone(),
|
||||
self.memtable_builder.clone(),
|
||||
0,
|
||||
None,
|
||||
));
|
||||
VersionBuilder::new(metadata, mutable)
|
||||
.add_files(self.file_purger.clone(), self.files.values().cloned())
|
||||
.build()
|
||||
|
||||
@@ -18,7 +18,7 @@ futures.workspace = true
|
||||
lazy_static.workspace = true
|
||||
md5 = "0.7"
|
||||
moka = { workspace = true, features = ["future"] }
|
||||
opendal = { version = "0.44", features = [
|
||||
opendal = { version = "0.45", features = [
|
||||
"layers-tracing",
|
||||
] }
|
||||
prometheus.workspace = true
|
||||
|
||||
@@ -91,7 +91,8 @@ impl Deleter {
|
||||
.await?;
|
||||
|
||||
let affected_rows = self.do_request(deletes, &ctx).await?;
|
||||
Ok(Output::AffectedRows(affected_rows as _))
|
||||
|
||||
Ok(Output::new_with_affected_rows(affected_rows))
|
||||
}
|
||||
|
||||
pub async fn handle_table_delete(
|
||||
|
||||
@@ -111,7 +111,7 @@ impl Inserter {
|
||||
.await?;
|
||||
|
||||
let affected_rows = self.do_request(inserts, &ctx).await?;
|
||||
Ok(Output::AffectedRows(affected_rows as _))
|
||||
Ok(Output::new_with_affected_rows(affected_rows))
|
||||
}
|
||||
|
||||
/// Handle row inserts request with metric engine.
|
||||
@@ -149,7 +149,7 @@ impl Inserter {
|
||||
.await?;
|
||||
|
||||
let affected_rows = self.do_request(inserts, &ctx).await?;
|
||||
Ok(Output::AffectedRows(affected_rows as _))
|
||||
Ok(Output::new_with_affected_rows(affected_rows))
|
||||
}
|
||||
|
||||
pub async fn handle_table_insert(
|
||||
@@ -185,7 +185,7 @@ impl Inserter {
|
||||
.await?;
|
||||
|
||||
let affected_rows = self.do_request(inserts, ctx).await?;
|
||||
Ok(Output::AffectedRows(affected_rows as _))
|
||||
Ok(Output::new_with_affected_rows(affected_rows))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -468,8 +468,6 @@ impl Inserter {
|
||||
&req.table_name,
|
||||
);
|
||||
|
||||
info!("Logical table `{table_ref}` does not exist, try creating table");
|
||||
|
||||
let request_schema = req.rows.as_ref().unwrap().schema.as_slice();
|
||||
let mut create_table_expr = build_create_table_expr(&table_ref, request_schema)?;
|
||||
|
||||
|
||||
@@ -40,12 +40,13 @@ use query::plan::LogicalPlan;
|
||||
use query::QueryEngineRef;
|
||||
use session::context::QueryContextRef;
|
||||
use session::table_name::table_idents_to_full_name;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use sql::statements::copy::{CopyDatabase, CopyDatabaseArgument, CopyTable, CopyTableArgument};
|
||||
use sql::statements::set_variables::SetVariables;
|
||||
use sql::statements::statement::Statement;
|
||||
use sql::statements::OptionMap;
|
||||
use sql::util::format_raw_object_name;
|
||||
use sqlparser::ast::{Expr, ObjectName, Value};
|
||||
use sqlparser::ast::{Expr, Ident, ObjectName, Value};
|
||||
use table::requests::{CopyDatabaseRequest, CopyDirection, CopyTableRequest};
|
||||
use table::table_reference::TableReference;
|
||||
use table::TableRef;
|
||||
@@ -122,11 +123,11 @@ impl StatementExecutor {
|
||||
CopyDirection::Export => self
|
||||
.copy_table_to(req, query_ctx)
|
||||
.await
|
||||
.map(Output::AffectedRows),
|
||||
.map(Output::new_with_affected_rows),
|
||||
CopyDirection::Import => self
|
||||
.copy_table_from(req, query_ctx)
|
||||
.await
|
||||
.map(Output::AffectedRows),
|
||||
.map(Output::new_with_affected_rows),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -151,15 +152,15 @@ impl StatementExecutor {
|
||||
|
||||
Statement::CreateTable(stmt) => {
|
||||
let _ = self.create_table(stmt, query_ctx).await?;
|
||||
Ok(Output::AffectedRows(0))
|
||||
Ok(Output::new_with_affected_rows(0))
|
||||
}
|
||||
Statement::CreateTableLike(stmt) => {
|
||||
let _ = self.create_table_like(stmt, query_ctx).await?;
|
||||
Ok(Output::AffectedRows(0))
|
||||
Ok(Output::new_with_affected_rows(0))
|
||||
}
|
||||
Statement::CreateExternalTable(stmt) => {
|
||||
let _ = self.create_external_table(stmt, query_ctx).await?;
|
||||
Ok(Output::AffectedRows(0))
|
||||
Ok(Output::new_with_affected_rows(0))
|
||||
}
|
||||
Statement::Alter(alter_table) => self.alter_table(alter_table, query_ctx).await,
|
||||
Statement::DropTable(stmt) => {
|
||||
@@ -207,6 +208,22 @@ impl StatementExecutor {
|
||||
let var_name = set_var.variable.to_string().to_uppercase();
|
||||
match var_name.as_str() {
|
||||
"TIMEZONE" | "TIME_ZONE" => set_timezone(set_var.value, query_ctx)?,
|
||||
|
||||
// Some postgresql client app may submit a "SET bytea_output" stmt upon connection.
|
||||
// However, currently we lack the support for it (tracked in https://github.com/GreptimeTeam/greptimedb/issues/3438),
|
||||
// so we just ignore it here instead of returning an error to break the connection.
|
||||
// Since the "bytea_output" only determines the output format of binary values,
|
||||
// it won't cause much trouble if we do so.
|
||||
// TODO(#3438): Remove this temporary workaround after the feature is implemented.
|
||||
"BYTEA_OUTPUT" => (),
|
||||
|
||||
// Same as "bytea_output", we just ignore it here.
|
||||
// Not harmful since it only relates to how date is viewed in client app's output.
|
||||
// The tracked issue is https://github.com/GreptimeTeam/greptimedb/issues/3442.
|
||||
// TODO(#3442): Remove this temporary workaround after the feature is implemented.
|
||||
"DATESTYLE" => (),
|
||||
|
||||
"CLIENT_ENCODING" => validate_client_encoding(set_var)?,
|
||||
_ => {
|
||||
return NotSupportedSnafu {
|
||||
feat: format!("Unsupported set variable {}", var_name),
|
||||
@@ -214,7 +231,7 @@ impl StatementExecutor {
|
||||
.fail()
|
||||
}
|
||||
}
|
||||
Ok(Output::AffectedRows(0))
|
||||
Ok(Output::new_with_affected_rows(0))
|
||||
}
|
||||
Statement::ShowVariables(show_variable) => self.show_variable(show_variable, query_ctx),
|
||||
}
|
||||
@@ -257,6 +274,39 @@ impl StatementExecutor {
|
||||
}
|
||||
}
|
||||
|
||||
fn validate_client_encoding(set: SetVariables) -> Result<()> {
|
||||
let Some((encoding, [])) = set.value.split_first() else {
|
||||
return InvalidSqlSnafu {
|
||||
err_msg: "must provide one and only one client encoding value",
|
||||
}
|
||||
.fail();
|
||||
};
|
||||
let encoding = match encoding {
|
||||
Expr::Value(Value::SingleQuotedString(x))
|
||||
| Expr::Identifier(Ident {
|
||||
value: x,
|
||||
quote_style: _,
|
||||
}) => x.to_uppercase(),
|
||||
_ => {
|
||||
return InvalidSqlSnafu {
|
||||
err_msg: format!("client encoding must be a string, actual: {:?}", encoding),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
};
|
||||
// For the sake of simplicity, we only support "UTF8" ("UNICODE" is the alias for it,
|
||||
// see https://www.postgresql.org/docs/current/multibyte.html#MULTIBYTE-CHARSET-SUPPORTED).
|
||||
// "UTF8" is universal and sufficient for almost all cases.
|
||||
// GreptimeDB itself is always using "UTF8" as the internal encoding.
|
||||
ensure!(
|
||||
encoding == "UTF8" || encoding == "UNICODE",
|
||||
NotSupportedSnafu {
|
||||
feat: format!("client encoding of '{}'", encoding)
|
||||
}
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn set_timezone(exprs: Vec<Expr>, ctx: QueryContextRef) -> Result<()> {
|
||||
let tz_expr = exprs.first().context(NotSupportedSnafu {
|
||||
feat: "No timezone find in set variable statement",
|
||||
|
||||
@@ -15,10 +15,10 @@
|
||||
use std::path::Path;
|
||||
use std::str::FromStr;
|
||||
|
||||
use client::Output;
|
||||
use common_datasource::file_format::Format;
|
||||
use common_datasource::lister::{Lister, Source};
|
||||
use common_datasource::object_store::build_backend;
|
||||
use common_query::Output;
|
||||
use common_telemetry::{debug, error, info, tracing};
|
||||
use object_store::Entry;
|
||||
use regex::Regex;
|
||||
@@ -96,7 +96,7 @@ impl StatementExecutor {
|
||||
.await?;
|
||||
exported_rows += exported;
|
||||
}
|
||||
Ok(Output::AffectedRows(exported_rows))
|
||||
Ok(Output::new_with_affected_rows(exported_rows))
|
||||
}
|
||||
|
||||
/// Imports data to database from a given location and returns total rows imported.
|
||||
@@ -169,7 +169,7 @@ impl StatementExecutor {
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(Output::AffectedRows(rows_inserted))
|
||||
Ok(Output::new_with_affected_rows(rows_inserted))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use client::OutputData;
|
||||
use common_base::readable_size::ReadableSize;
|
||||
use common_datasource::file_format::csv::stream_to_csv;
|
||||
use common_datasource::file_format::json::stream_to_json;
|
||||
@@ -21,7 +22,6 @@ use common_datasource::file_format::parquet::stream_to_parquet;
|
||||
use common_datasource::file_format::Format;
|
||||
use common_datasource::object_store::{build_backend, parse_url};
|
||||
use common_datasource::util::find_dir_and_filename;
|
||||
use common_query::Output;
|
||||
use common_recordbatch::adapter::DfRecordBatchStreamAdapter;
|
||||
use common_recordbatch::SendableRecordBatchStream;
|
||||
use common_telemetry::{debug, tracing};
|
||||
@@ -134,9 +134,9 @@ impl StatementExecutor {
|
||||
.execute(LogicalPlan::DfPlan(plan), query_ctx)
|
||||
.await
|
||||
.context(ExecLogicalPlanSnafu)?;
|
||||
let stream = match output {
|
||||
Output::Stream(stream, _) => stream,
|
||||
Output::RecordBatches(record_batches) => record_batches.as_stream(),
|
||||
let stream = match output.data {
|
||||
OutputData::Stream(stream) => stream,
|
||||
OutputData::RecordBatches(record_batches) => record_batches.as_stream(),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
|
||||
@@ -338,10 +338,10 @@ impl StatementExecutor {
|
||||
.await
|
||||
.context(error::InvalidateTableCacheSnafu)?;
|
||||
|
||||
Ok(Output::AffectedRows(0))
|
||||
Ok(Output::new_with_affected_rows(0))
|
||||
} else if drop_if_exists {
|
||||
// DROP TABLE IF EXISTS meets table not found - ignored
|
||||
Ok(Output::AffectedRows(0))
|
||||
Ok(Output::new_with_affected_rows(0))
|
||||
} else {
|
||||
Err(TableNotFoundSnafu {
|
||||
table_name: table_name.to_string(),
|
||||
@@ -367,7 +367,7 @@ impl StatementExecutor {
|
||||
let table_id = table.table_info().table_id();
|
||||
self.truncate_table_procedure(&table_name, table_id).await?;
|
||||
|
||||
Ok(Output::AffectedRows(0))
|
||||
Ok(Output::new_with_affected_rows(0))
|
||||
}
|
||||
|
||||
fn verify_alter(
|
||||
@@ -471,7 +471,7 @@ impl StatementExecutor {
|
||||
.await
|
||||
.context(error::InvalidateTableCacheSnafu)?;
|
||||
|
||||
Ok(Output::AffectedRows(0))
|
||||
Ok(Output::new_with_affected_rows(0))
|
||||
}
|
||||
|
||||
async fn create_table_procedure(
|
||||
@@ -580,7 +580,7 @@ impl StatementExecutor {
|
||||
|
||||
if exists {
|
||||
return if create_if_not_exists {
|
||||
Ok(Output::AffectedRows(1))
|
||||
Ok(Output::new_with_affected_rows(1))
|
||||
} else {
|
||||
error::SchemaExistsSnafu { name: database }.fail()
|
||||
};
|
||||
@@ -592,7 +592,7 @@ impl StatementExecutor {
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?;
|
||||
|
||||
Ok(Output::AffectedRows(1))
|
||||
Ok(Output::new_with_affected_rows(1))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -429,7 +429,7 @@ mod test {
|
||||
ts_range,
|
||||
value_range,
|
||||
timestamps,
|
||||
// that two `2.0` is because `duration_to_start` are shrunk to to
|
||||
// that two `2.0` is because `duration_to_start` are shrunk to
|
||||
// `duration_to_zero`, and causes `duration_to_zero` less than
|
||||
// `extrapolation_threshold`.
|
||||
vec![2.0, 1.5, 1.5, 1.5, 2.0, 1.5, 1.5, 1.5],
|
||||
|
||||
@@ -28,7 +28,7 @@ use common_function::function::FunctionRef;
|
||||
use common_function::scalars::aggregate::AggregateFunctionMetaRef;
|
||||
use common_query::physical_plan::{DfPhysicalPlanAdapter, PhysicalPlan, PhysicalPlanAdapter};
|
||||
use common_query::prelude::ScalarUdf;
|
||||
use common_query::Output;
|
||||
use common_query::{Output, OutputData, OutputMeta};
|
||||
use common_recordbatch::adapter::RecordBatchStreamAdapter;
|
||||
use common_recordbatch::{EmptyRecordBatchStream, SendableRecordBatchStream};
|
||||
use common_telemetry::tracing;
|
||||
@@ -90,9 +90,9 @@ impl DatafusionQueryEngine {
|
||||
optimized_physical_plan
|
||||
};
|
||||
|
||||
Ok(Output::Stream(
|
||||
self.execute_stream(&ctx, &physical_plan)?,
|
||||
Some(physical_plan),
|
||||
Ok(Output::new(
|
||||
OutputData::Stream(self.execute_stream(&ctx, &physical_plan)?),
|
||||
OutputMeta::new_with_plan(physical_plan),
|
||||
))
|
||||
}
|
||||
|
||||
@@ -121,9 +121,9 @@ impl DatafusionQueryEngine {
|
||||
let output = self
|
||||
.exec_query_plan(LogicalPlan::DfPlan((*dml.input).clone()), query_ctx.clone())
|
||||
.await?;
|
||||
let mut stream = match output {
|
||||
Output::RecordBatches(batches) => batches.as_stream(),
|
||||
Output::Stream(stream, _) => stream,
|
||||
let mut stream = match output.data {
|
||||
OutputData::RecordBatches(batches) => batches.as_stream(),
|
||||
OutputData::Stream(stream) => stream,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
@@ -148,7 +148,7 @@ impl DatafusionQueryEngine {
|
||||
};
|
||||
affected_rows += rows;
|
||||
}
|
||||
Ok(Output::AffectedRows(affected_rows))
|
||||
Ok(Output::new_with_affected_rows(affected_rows))
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
@@ -471,7 +471,6 @@ mod tests {
|
||||
|
||||
use catalog::RegisterTableRequest;
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, NUMBERS_TABLE_ID};
|
||||
use common_query::Output;
|
||||
use common_recordbatch::util;
|
||||
use datafusion::prelude::{col, lit};
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
@@ -534,8 +533,8 @@ mod tests {
|
||||
|
||||
let output = engine.execute(plan, QueryContext::arc()).await.unwrap();
|
||||
|
||||
match output {
|
||||
Output::Stream(recordbatch, _) => {
|
||||
match output.data {
|
||||
OutputData::Stream(recordbatch) => {
|
||||
let numbers = util::collect(recordbatch).await.unwrap();
|
||||
assert_eq!(1, numbers.len());
|
||||
assert_eq!(numbers[0].num_columns(), 1);
|
||||
|
||||
@@ -15,7 +15,8 @@
|
||||
use std::pin::Pin;
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
use common_recordbatch::{RecordBatch, RecordBatchStream, SendableRecordBatchStream};
|
||||
use common_recordbatch::adapter::RecordBatchMetrics;
|
||||
use common_recordbatch::{OrderOption, RecordBatch, RecordBatchStream, SendableRecordBatchStream};
|
||||
use datatypes::schema::SchemaRef;
|
||||
use futures::Stream;
|
||||
use futures_util::ready;
|
||||
@@ -78,6 +79,14 @@ impl<F: FnOnce() + Unpin> RecordBatchStream for OnDone<F> {
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.stream.schema()
|
||||
}
|
||||
|
||||
fn output_ordering(&self) -> Option<&[OrderOption]> {
|
||||
self.stream.output_ordering()
|
||||
}
|
||||
|
||||
fn metrics(&self) -> Option<RecordBatchMetrics> {
|
||||
self.stream.metrics()
|
||||
}
|
||||
}
|
||||
|
||||
impl<F: FnOnce() + Unpin> Stream for OnDone<F> {
|
||||
|
||||
@@ -12,8 +12,10 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::HashMap;
|
||||
use std::any::Any;
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::btree_map::Entry;
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::fmt::Display;
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
@@ -21,8 +23,8 @@ use std::task::{Context, Poll};
|
||||
use std::time::Duration;
|
||||
|
||||
use ahash::RandomState;
|
||||
use arrow::compute::{self, cast_with_options, CastOptions};
|
||||
use arrow_schema::{DataType, Field, Schema, SchemaRef, TimeUnit};
|
||||
use arrow::compute::{self, cast_with_options, CastOptions, SortColumn};
|
||||
use arrow_schema::{DataType, Field, Schema, SchemaRef, SortOptions, TimeUnit};
|
||||
use common_query::DfPhysicalPlan;
|
||||
use common_recordbatch::DfSendableRecordBatchStream;
|
||||
use datafusion::common::{Result as DataFusionResult, Statistics};
|
||||
@@ -35,10 +37,14 @@ use datafusion::physical_plan::{
|
||||
SendableRecordBatchStream,
|
||||
};
|
||||
use datafusion::physical_planner::create_physical_sort_expr;
|
||||
use datafusion_common::utils::get_arrayref_at_indices;
|
||||
use datafusion_common::utils::{get_arrayref_at_indices, get_row_at_idx};
|
||||
use datafusion_common::{DFField, DFSchema, DFSchemaRef, DataFusionError, ScalarValue};
|
||||
use datafusion_expr::utils::exprlist_to_fields;
|
||||
use datafusion_expr::{Accumulator, Expr, ExprSchemable, LogicalPlan, UserDefinedLogicalNodeCore};
|
||||
use datafusion_expr::utils::{exprlist_to_fields, COUNT_STAR_EXPANSION};
|
||||
use datafusion_expr::{
|
||||
lit, Accumulator, AggregateFunction, Expr, ExprSchemable, LogicalPlan,
|
||||
UserDefinedLogicalNodeCore,
|
||||
};
|
||||
use datafusion_physical_expr::aggregate::utils::down_cast_any_ref;
|
||||
use datafusion_physical_expr::expressions::create_aggregate_expr as create_aggr_expr;
|
||||
use datafusion_physical_expr::hash_utils::create_hashes;
|
||||
use datafusion_physical_expr::{
|
||||
@@ -58,6 +64,140 @@ use crate::error::{DataFusionSnafu, RangeQuerySnafu, Result};
|
||||
|
||||
type Millisecond = <TimestampMillisecondType as ArrowPrimitiveType>::Native;
|
||||
|
||||
/// Implementation of `first_value`/`last_value`
|
||||
/// aggregate function adapted to range query
|
||||
#[derive(Debug)]
|
||||
struct RangeFirstListValue {
|
||||
/// calculate expr
|
||||
expr: Arc<dyn PhysicalExpr>,
|
||||
order_bys: Vec<PhysicalSortExpr>,
|
||||
}
|
||||
|
||||
impl RangeFirstListValue {
|
||||
pub fn new_aggregate_expr(
|
||||
expr: Arc<dyn PhysicalExpr>,
|
||||
order_bys: Vec<PhysicalSortExpr>,
|
||||
) -> Arc<dyn AggregateExpr> {
|
||||
Arc::new(Self { expr, order_bys })
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq<dyn Any> for RangeFirstListValue {
|
||||
fn eq(&self, other: &dyn Any) -> bool {
|
||||
down_cast_any_ref(other)
|
||||
.downcast_ref::<Self>()
|
||||
.map(|x| self.expr.eq(&x.expr) && self.order_bys.iter().eq(x.order_bys.iter()))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
}
|
||||
|
||||
impl AggregateExpr for RangeFirstListValue {
|
||||
fn as_any(&self) -> &dyn std::any::Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn create_accumulator(&self) -> DataFusionResult<Box<dyn Accumulator>> {
|
||||
Ok(Box::new(RangeFirstListValueAcc::new(
|
||||
self.order_bys.iter().map(|order| order.options).collect(),
|
||||
)))
|
||||
}
|
||||
|
||||
fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
|
||||
let mut exprs: Vec<_> = self
|
||||
.order_bys
|
||||
.iter()
|
||||
.map(|order| order.expr.clone())
|
||||
.collect();
|
||||
exprs.push(self.expr.clone());
|
||||
exprs
|
||||
}
|
||||
|
||||
fn field(&self) -> DataFusionResult<Field> {
|
||||
unreachable!("AggregateExpr::field will not be used in range query")
|
||||
}
|
||||
|
||||
fn state_fields(&self) -> DataFusionResult<Vec<Field>> {
|
||||
unreachable!("AggregateExpr::state_fields will not be used in range query")
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RangeFirstListValueAcc {
|
||||
pub sort_options: Vec<SortOptions>,
|
||||
pub sort_columns: Vec<ScalarValue>,
|
||||
pub data: Option<ScalarValue>,
|
||||
}
|
||||
|
||||
impl RangeFirstListValueAcc {
|
||||
pub fn new(sort_options: Vec<SortOptions>) -> Self {
|
||||
Self {
|
||||
sort_options,
|
||||
sort_columns: vec![],
|
||||
data: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Accumulator for RangeFirstListValueAcc {
|
||||
fn update_batch(&mut self, values: &[ArrayRef]) -> DataFusionResult<()> {
|
||||
let columns: Vec<_> = values
|
||||
.iter()
|
||||
.zip(self.sort_options.iter())
|
||||
.map(|(v, s)| SortColumn {
|
||||
values: v.clone(),
|
||||
options: Some(*s),
|
||||
})
|
||||
.collect();
|
||||
// finding the Top1 problem with complexity O(n)
|
||||
let idx = compute::lexsort_to_indices(&columns, Some(1))?.value(0);
|
||||
let vs = get_row_at_idx(values, idx as usize)?;
|
||||
let need_update = self.data.is_none()
|
||||
|| vs
|
||||
.iter()
|
||||
.zip(self.sort_columns.iter())
|
||||
.zip(self.sort_options.iter())
|
||||
.find_map(|((new_value, old_value), sort_option)| {
|
||||
if new_value.is_null() && old_value.is_null() {
|
||||
None
|
||||
} else if sort_option.nulls_first
|
||||
&& (new_value.is_null() || old_value.is_null())
|
||||
{
|
||||
Some(new_value.is_null())
|
||||
} else {
|
||||
new_value.partial_cmp(old_value).map(|x| {
|
||||
(x == Ordering::Greater && sort_option.descending)
|
||||
|| (x == Ordering::Less && !sort_option.descending)
|
||||
})
|
||||
}
|
||||
})
|
||||
.unwrap_or(false);
|
||||
if need_update {
|
||||
self.sort_columns = vs;
|
||||
self.data = Some(ScalarValue::try_from_array(
|
||||
&values[self.sort_options.len()],
|
||||
idx as usize,
|
||||
)?);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate(&self) -> DataFusionResult<ScalarValue> {
|
||||
Ok(self.data.clone().unwrap_or(ScalarValue::Null))
|
||||
}
|
||||
|
||||
fn size(&self) -> usize {
|
||||
std::mem::size_of_val(self)
|
||||
}
|
||||
|
||||
fn state(&self) -> DataFusionResult<Vec<ScalarValue>> {
|
||||
unreachable!("Accumulator::state will not be used in range query")
|
||||
}
|
||||
|
||||
fn merge_batch(&mut self, _states: &[ArrayRef]) -> DataFusionResult<()> {
|
||||
unreachable!("Accumulator::merge_batch will not be used in range query")
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq, Debug, Hash, Clone)]
|
||||
pub enum Fill {
|
||||
Null,
|
||||
@@ -78,14 +218,15 @@ impl Display for Fill {
|
||||
}
|
||||
|
||||
impl Fill {
|
||||
pub fn try_from_str(value: &str, datatype: &DataType) -> DfResult<Self> {
|
||||
pub fn try_from_str(value: &str, datatype: &DataType) -> DfResult<Option<Self>> {
|
||||
let s = value.to_uppercase();
|
||||
match s.as_str() {
|
||||
"NULL" | "" => Ok(Self::Null),
|
||||
"PREV" => Ok(Self::Prev),
|
||||
"" => Ok(None),
|
||||
"NULL" => Ok(Some(Self::Null)),
|
||||
"PREV" => Ok(Some(Self::Prev)),
|
||||
"LINEAR" => {
|
||||
if datatype.is_numeric() {
|
||||
Ok(Self::Linear)
|
||||
Ok(Some(Self::Linear))
|
||||
} else {
|
||||
Err(DataFusionError::Plan(format!(
|
||||
"Use FILL LINEAR on Non-numeric DataType {}",
|
||||
@@ -100,13 +241,17 @@ impl Fill {
|
||||
s, err
|
||||
))
|
||||
})
|
||||
.map(Fill::Const),
|
||||
.map(|x| Some(Fill::Const(x))),
|
||||
}
|
||||
}
|
||||
|
||||
/// The input `data` contains data on a complete time series.
|
||||
/// If the filling strategy is `PREV` or `LINEAR`, caller must be ensured that the incoming `ts`&`data` is ascending time order.
|
||||
pub fn apply_fill_strategy(&self, ts: &[i64], data: &mut [ScalarValue]) -> DfResult<()> {
|
||||
// No calculation need in `Fill::Null`
|
||||
if matches!(self, Fill::Null) {
|
||||
return Ok(());
|
||||
}
|
||||
let len = data.len();
|
||||
if *self == Fill::Linear {
|
||||
return Self::fill_linear(ts, data);
|
||||
@@ -114,7 +259,6 @@ impl Fill {
|
||||
for i in 0..len {
|
||||
if data[i].is_null() {
|
||||
match self {
|
||||
Fill::Null => continue,
|
||||
Fill::Prev => {
|
||||
if i != 0 {
|
||||
data[i] = data[i - 1].clone()
|
||||
@@ -122,7 +266,8 @@ impl Fill {
|
||||
}
|
||||
// The calculation of linear interpolation is relatively complicated.
|
||||
// `Self::fill_linear` is used to dispose `Fill::Linear`.
|
||||
Fill::Linear => unreachable!(),
|
||||
// No calculation need in `Fill::Null`
|
||||
Fill::Linear | Fill::Null => unreachable!(),
|
||||
Fill::Const(v) => data[i] = v.clone(),
|
||||
}
|
||||
}
|
||||
@@ -219,12 +364,12 @@ fn linear_interpolation(
|
||||
|
||||
#[derive(Eq, Clone, Debug)]
|
||||
pub struct RangeFn {
|
||||
/// with format like `max(a) RANGE 300s FILL NULL`
|
||||
/// with format like `max(a) RANGE 300s [FILL NULL]`
|
||||
pub name: String,
|
||||
pub data_type: DataType,
|
||||
pub expr: Expr,
|
||||
pub range: Duration,
|
||||
pub fill: Fill,
|
||||
pub fill: Option<Fill>,
|
||||
/// If the `FIll` strategy is `Linear` and the output is an integer,
|
||||
/// it is possible to calculate a floating point number.
|
||||
/// So for `FILL==LINEAR`, the entire data will be implicitly converted to Float type
|
||||
@@ -271,6 +416,7 @@ pub struct RangeSelect {
|
||||
pub align: Duration,
|
||||
pub align_to: i64,
|
||||
pub time_index: String,
|
||||
pub time_expr: Expr,
|
||||
pub by: Vec<Expr>,
|
||||
pub schema: DFSchemaRef,
|
||||
pub by_schema: DFSchemaRef,
|
||||
@@ -324,7 +470,7 @@ impl RangeSelect {
|
||||
name,
|
||||
data_type.clone(),
|
||||
// Only when data fill with Const option, the data can't be null
|
||||
!matches!(fill, Fill::Const(..)),
|
||||
!matches!(fill, Some(Fill::Const(..))),
|
||||
))
|
||||
},
|
||||
)
|
||||
@@ -382,6 +528,7 @@ impl RangeSelect {
|
||||
align,
|
||||
align_to,
|
||||
time_index: time_index_name,
|
||||
time_expr: time_index,
|
||||
schema,
|
||||
by_schema,
|
||||
by,
|
||||
@@ -440,6 +587,7 @@ impl UserDefinedLogicalNodeCore for RangeSelect {
|
||||
range_expr: self.range_expr.clone(),
|
||||
input: Arc::new(inputs[0].clone()),
|
||||
time_index: self.time_index.clone(),
|
||||
time_expr: self.time_expr.clone(),
|
||||
schema: self.schema.clone(),
|
||||
by: self.by.clone(),
|
||||
by_schema: self.by_schema.clone(),
|
||||
@@ -452,6 +600,7 @@ impl UserDefinedLogicalNodeCore for RangeSelect {
|
||||
impl RangeSelect {
|
||||
fn create_physical_expr_list(
|
||||
&self,
|
||||
is_count_aggr: bool,
|
||||
exprs: &[Expr],
|
||||
df_schema: &Arc<DFSchema>,
|
||||
schema: &Schema,
|
||||
@@ -459,7 +608,20 @@ impl RangeSelect {
|
||||
) -> DfResult<Vec<Arc<dyn PhysicalExpr>>> {
|
||||
exprs
|
||||
.iter()
|
||||
.map(|by| create_physical_expr(by, df_schema, schema, session_state.execution_props()))
|
||||
.map(|e| match e {
|
||||
// `count(*)` will be rewritten by `CountWildcardRule` into `count(1)` when optimizing logical plan.
|
||||
// The modification occurs after range plan rewrite.
|
||||
// At this time, aggregate plan has been replaced by a custom range plan,
|
||||
// so `CountWildcardRule` has not been applied.
|
||||
// We manually modify it when creating the physical plan.
|
||||
Expr::Wildcard if is_count_aggr => create_physical_expr(
|
||||
&lit(COUNT_STAR_EXPANSION),
|
||||
df_schema,
|
||||
schema,
|
||||
session_state.execution_props(),
|
||||
),
|
||||
_ => create_physical_expr(e, df_schema, schema, session_state.execution_props()),
|
||||
})
|
||||
.collect::<DfResult<Vec<_>>>()
|
||||
}
|
||||
|
||||
@@ -488,6 +650,72 @@ impl RangeSelect {
|
||||
.iter()
|
||||
.map(|range_fn| {
|
||||
let expr = match &range_fn.expr {
|
||||
Expr::AggregateFunction(aggr)
|
||||
if aggr.fun == AggregateFunction::FirstValue
|
||||
|| aggr.fun == AggregateFunction::LastValue =>
|
||||
{
|
||||
// Because we only need to find the first_value/last_value,
|
||||
// the complexity of sorting the entire batch is O(nlogn).
|
||||
// We can sort the batch with limit 1.
|
||||
// In this case, the algorithm degenerates into finding the Top1 problem with complexity O(n).
|
||||
// We need reverse the sort order of last_value to correctly apply limit 1 when sorting.
|
||||
let order_by = if let Some(exprs) = &aggr.order_by {
|
||||
exprs
|
||||
.iter()
|
||||
.map(|x| {
|
||||
create_physical_sort_expr(
|
||||
x,
|
||||
input_dfschema,
|
||||
&input_schema,
|
||||
session_state.execution_props(),
|
||||
)
|
||||
.map(|expr| {
|
||||
// reverse the last_value sort
|
||||
if aggr.fun == AggregateFunction::LastValue {
|
||||
PhysicalSortExpr {
|
||||
expr: expr.expr,
|
||||
options: SortOptions {
|
||||
descending: !expr.options.descending,
|
||||
nulls_first: !expr.options.nulls_first,
|
||||
},
|
||||
}
|
||||
} else {
|
||||
expr
|
||||
}
|
||||
})
|
||||
})
|
||||
.collect::<DfResult<Vec<_>>>()?
|
||||
} else {
|
||||
// if user not assign order by, time index is needed as default ordering
|
||||
let time_index = create_physical_expr(
|
||||
&self.time_expr,
|
||||
input_dfschema,
|
||||
&input_schema,
|
||||
session_state.execution_props(),
|
||||
)?;
|
||||
vec![PhysicalSortExpr {
|
||||
expr: time_index,
|
||||
options: SortOptions {
|
||||
descending: aggr.fun == AggregateFunction::LastValue,
|
||||
nulls_first: false,
|
||||
},
|
||||
}]
|
||||
};
|
||||
let arg = self.create_physical_expr_list(
|
||||
false,
|
||||
&aggr.args,
|
||||
input_dfschema,
|
||||
&input_schema,
|
||||
session_state,
|
||||
)?;
|
||||
// first_value/last_value has only one param.
|
||||
// The param have been checked by datafusion in logical plan stage.
|
||||
// We can safely assume that there is only one element here.
|
||||
Ok(RangeFirstListValue::new_aggregate_expr(
|
||||
arg[0].clone(),
|
||||
order_by,
|
||||
))
|
||||
}
|
||||
Expr::AggregateFunction(aggr) => {
|
||||
let order_by = if let Some(exprs) = &aggr.order_by {
|
||||
exprs
|
||||
@@ -508,6 +736,7 @@ impl RangeSelect {
|
||||
&aggr.fun,
|
||||
false,
|
||||
&self.create_physical_expr_list(
|
||||
aggr.fun == AggregateFunction::Count,
|
||||
&aggr.args,
|
||||
input_dfschema,
|
||||
&input_schema,
|
||||
@@ -523,6 +752,7 @@ impl RangeSelect {
|
||||
let expr = create_aggr_udf_expr(
|
||||
&aggr_udf.fun,
|
||||
&self.create_physical_expr_list(
|
||||
false,
|
||||
&aggr_udf.args,
|
||||
input_dfschema,
|
||||
&input_schema,
|
||||
@@ -564,6 +794,7 @@ impl RangeSelect {
|
||||
align: self.align.as_millis() as Millisecond,
|
||||
align_to: self.align_to,
|
||||
by: self.create_physical_expr_list(
|
||||
false,
|
||||
&self.by,
|
||||
input_dfschema,
|
||||
&input_schema,
|
||||
@@ -584,10 +815,26 @@ struct RangeFnExec {
|
||||
pub expr: Arc<dyn AggregateExpr>,
|
||||
pub args: Vec<Arc<dyn PhysicalExpr>>,
|
||||
pub range: Millisecond,
|
||||
pub fill: Fill,
|
||||
pub fill: Option<Fill>,
|
||||
pub need_cast: Option<DataType>,
|
||||
}
|
||||
|
||||
impl Display for RangeFnExec {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
if let Some(fill) = &self.fill {
|
||||
write!(
|
||||
f,
|
||||
"{} RANGE {}s FILL {}",
|
||||
self.expr.name(),
|
||||
self.range / 1000,
|
||||
fill
|
||||
)
|
||||
} else {
|
||||
write!(f, "{} RANGE {}s", self.expr.name(), self.range / 1000)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RangeSelectExec {
|
||||
input: Arc<dyn ExecutionPlan>,
|
||||
@@ -608,18 +855,8 @@ impl DisplayAs for RangeSelectExec {
|
||||
match t {
|
||||
DisplayFormatType::Default | DisplayFormatType::Verbose => {
|
||||
write!(f, "RangeSelectExec: ")?;
|
||||
let range_expr_strs: Vec<String> = self
|
||||
.range_exec
|
||||
.iter()
|
||||
.map(|e| {
|
||||
format!(
|
||||
"{} RANGE {}s FILL {}",
|
||||
e.expr.name(),
|
||||
e.range / 1000,
|
||||
e.fill
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
let range_expr_strs: Vec<String> =
|
||||
self.range_exec.iter().map(RangeFnExec::to_string).collect();
|
||||
let by: Vec<String> = self.by.iter().map(|e| e.to_string()).collect();
|
||||
write!(
|
||||
f,
|
||||
@@ -713,7 +950,7 @@ impl ExecutionPlan for RangeSelectExec {
|
||||
by: self.by.clone(),
|
||||
series_map: HashMap::new(),
|
||||
exec_state: ExecutionState::ReadingInput,
|
||||
output_num_rows: 0,
|
||||
num_not_null_rows: 0,
|
||||
row_converter,
|
||||
modify_map: HashMap::new(),
|
||||
metric: baseline_metric,
|
||||
@@ -753,8 +990,8 @@ struct RangeSelectStream {
|
||||
/// value: `[row_ids]`
|
||||
/// It is used to record the data that needs to be aggregated in each time slot during the data update process
|
||||
modify_map: HashMap<(u64, Millisecond), Vec<u32>>,
|
||||
/// The number of rows of the final output
|
||||
output_num_rows: usize,
|
||||
/// The number of rows of not null rows in the final output
|
||||
num_not_null_rows: usize,
|
||||
metric: BaselineMetrics,
|
||||
schema_project: Option<Vec<usize>>,
|
||||
schema_before_project: SchemaRef,
|
||||
@@ -766,7 +1003,7 @@ struct SeriesState {
|
||||
row: OwnedRow,
|
||||
/// key: align_ts
|
||||
/// value: a vector, each element is a range_fn follow the order of `range_exec`
|
||||
align_ts_accumulator: HashMap<Millisecond, Vec<Box<dyn Accumulator>>>,
|
||||
align_ts_accumulator: BTreeMap<Millisecond, Vec<Box<dyn Accumulator>>>,
|
||||
}
|
||||
|
||||
/// Use `align_to` as time origin.
|
||||
@@ -882,7 +1119,7 @@ impl RangeSelectStream {
|
||||
let accumulators_map =
|
||||
self.series_map.entry(*hash).or_insert_with(|| SeriesState {
|
||||
row: by_rows.row(*row as usize).owned(),
|
||||
align_ts_accumulator: HashMap::new(),
|
||||
align_ts_accumulator: BTreeMap::new(),
|
||||
});
|
||||
match accumulators_map.align_ts_accumulator.entry(*ts) {
|
||||
Entry::Occupied(mut e) => {
|
||||
@@ -890,7 +1127,7 @@ impl RangeSelectStream {
|
||||
accumulators[i].update_batch(&sliced_arrays)
|
||||
}
|
||||
Entry::Vacant(e) => {
|
||||
self.output_num_rows += 1;
|
||||
self.num_not_null_rows += 1;
|
||||
let mut accumulators = self
|
||||
.range_exec
|
||||
.iter()
|
||||
@@ -915,29 +1152,47 @@ impl RangeSelectStream {
|
||||
// 1 for time index column
|
||||
let mut columns: Vec<Arc<dyn Array>> =
|
||||
Vec::with_capacity(1 + self.range_exec.len() + self.by.len());
|
||||
let mut ts_builder = TimestampMillisecondBuilder::with_capacity(self.output_num_rows);
|
||||
let mut all_scalar = vec![Vec::with_capacity(self.output_num_rows); self.range_exec.len()];
|
||||
let mut by_rows = Vec::with_capacity(self.output_num_rows);
|
||||
let mut ts_builder = TimestampMillisecondBuilder::with_capacity(self.num_not_null_rows);
|
||||
let mut all_scalar =
|
||||
vec![Vec::with_capacity(self.num_not_null_rows); self.range_exec.len()];
|
||||
let mut by_rows = Vec::with_capacity(self.num_not_null_rows);
|
||||
let mut start_index = 0;
|
||||
// RangePlan is calculated on a row basis. If a column uses the PREV or LINEAR filling strategy,
|
||||
// we must arrange the data in the entire data row to determine the NULL filling value.
|
||||
let need_sort_output = self
|
||||
// If any range expr need fill, we need fill both the missing align_ts and null value.
|
||||
let need_fill_output = self.range_exec.iter().any(|range| range.fill.is_some());
|
||||
// The padding value for each accumulator
|
||||
let padding_values = self
|
||||
.range_exec
|
||||
.iter()
|
||||
.any(|range| range.fill == Fill::Linear || range.fill == Fill::Prev);
|
||||
.map(|e| e.expr.create_accumulator()?.evaluate())
|
||||
.collect::<DfResult<Vec<_>>>()?;
|
||||
for SeriesState {
|
||||
row,
|
||||
align_ts_accumulator,
|
||||
} in self.series_map.values()
|
||||
{
|
||||
// collect data on time series
|
||||
let mut align_ts = align_ts_accumulator.keys().copied().collect::<Vec<_>>();
|
||||
if need_sort_output {
|
||||
align_ts.sort();
|
||||
// skip empty time series
|
||||
if align_ts_accumulator.is_empty() {
|
||||
continue;
|
||||
}
|
||||
// find the first and last align_ts
|
||||
let begin_ts = *align_ts_accumulator.first_key_value().unwrap().0;
|
||||
let end_ts = *align_ts_accumulator.last_key_value().unwrap().0;
|
||||
let align_ts = if need_fill_output {
|
||||
// we need to fill empty align_ts which not data in that solt
|
||||
(begin_ts..=end_ts).step_by(self.align as usize).collect()
|
||||
} else {
|
||||
align_ts_accumulator.keys().copied().collect::<Vec<_>>()
|
||||
};
|
||||
for ts in &align_ts {
|
||||
for (i, accumulator) in align_ts_accumulator.get(ts).unwrap().iter().enumerate() {
|
||||
all_scalar[i].push(accumulator.evaluate()?);
|
||||
if let Some(slot) = align_ts_accumulator.get(ts) {
|
||||
for (column, acc) in all_scalar.iter_mut().zip(slot.iter()) {
|
||||
column.push(acc.evaluate()?);
|
||||
}
|
||||
} else {
|
||||
// fill null in empty time solt
|
||||
for (column, padding) in all_scalar.iter_mut().zip(padding_values.iter()) {
|
||||
column.push(padding.clone())
|
||||
}
|
||||
}
|
||||
}
|
||||
ts_builder.append_slice(&align_ts);
|
||||
@@ -950,14 +1205,16 @@ impl RangeSelectStream {
|
||||
) in self.range_exec.iter().enumerate()
|
||||
{
|
||||
let time_series_data =
|
||||
&mut all_scalar[i][start_index..start_index + align_ts_accumulator.len()];
|
||||
&mut all_scalar[i][start_index..start_index + align_ts.len()];
|
||||
if let Some(data_type) = need_cast {
|
||||
cast_scalar_values(time_series_data, data_type)?;
|
||||
}
|
||||
fill.apply_fill_strategy(&align_ts, time_series_data)?;
|
||||
if let Some(fill) = fill {
|
||||
fill.apply_fill_strategy(&align_ts, time_series_data)?;
|
||||
}
|
||||
}
|
||||
by_rows.resize(by_rows.len() + align_ts_accumulator.len(), row.row());
|
||||
start_index += align_ts_accumulator.len();
|
||||
by_rows.resize(by_rows.len() + align_ts.len(), row.row());
|
||||
start_index += align_ts.len();
|
||||
}
|
||||
for column_scalar in all_scalar {
|
||||
columns.push(ScalarValue::iter_to_array(column_scalar)?);
|
||||
@@ -1078,7 +1335,7 @@ mod test {
|
||||
|
||||
const TIME_INDEX_COLUMN: &str = "timestamp";
|
||||
|
||||
fn prepare_test_data(is_float: bool) -> MemoryExec {
|
||||
fn prepare_test_data(is_float: bool, is_gap: bool) -> MemoryExec {
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new(TIME_INDEX_COLUMN, TimestampMillisecondType::DATA_TYPE, true),
|
||||
Field::new(
|
||||
@@ -1092,16 +1349,23 @@ mod test {
|
||||
),
|
||||
Field::new("host", DataType::Utf8, true),
|
||||
]));
|
||||
let timestamp_column: Arc<dyn Array> = Arc::new(TimestampMillisecondArray::from(vec![
|
||||
0, 5_000, 10_000, 15_000, 20_000, // host 1 every 5s
|
||||
0, 5_000, 10_000, 15_000, 20_000, // host 2 every 5s
|
||||
])) as _;
|
||||
let mut host = vec!["host1"; 5];
|
||||
host.extend(vec!["host2"; 5]);
|
||||
let value_column: Arc<dyn Array> = if is_float {
|
||||
Arc::new(nullable_array!(Float64;
|
||||
0.0, null, 1.0, null, 2.0, // data for host 1
|
||||
3.0, null, 4.0, null, 5.0 // data for host 2
|
||||
let timestamp_column: Arc<dyn Array> = if !is_gap {
|
||||
Arc::new(TimestampMillisecondArray::from(vec![
|
||||
0, 5_000, 10_000, 15_000, 20_000, // host 1 every 5s
|
||||
0, 5_000, 10_000, 15_000, 20_000, // host 2 every 5s
|
||||
])) as _
|
||||
} else {
|
||||
Arc::new(TimestampMillisecondArray::from(vec![
|
||||
0, 15_000, // host 1 every 5s, missing data on 5_000, 10_000
|
||||
0, 15_000, // host 2 every 5s, missing data on 5_000, 10_000
|
||||
])) as _
|
||||
};
|
||||
let mut host = vec!["host1"; timestamp_column.len() / 2];
|
||||
host.extend(vec!["host2"; timestamp_column.len() / 2]);
|
||||
let mut value_column: Arc<dyn Array> = if is_gap {
|
||||
Arc::new(nullable_array!(Int64;
|
||||
0, 6, // data for host 1
|
||||
6, 12 // data for host 2
|
||||
)) as _
|
||||
} else {
|
||||
Arc::new(nullable_array!(Int64;
|
||||
@@ -1109,6 +1373,11 @@ mod test {
|
||||
3, null, 4, null, 5 // data for host 2
|
||||
)) as _
|
||||
};
|
||||
if is_float {
|
||||
value_column =
|
||||
cast_with_options(&value_column, &DataType::Float64, &CastOptions::default())
|
||||
.unwrap();
|
||||
}
|
||||
let host_column: Arc<dyn Array> = Arc::new(StringArray::from(host)) as _;
|
||||
let data = RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
@@ -1123,8 +1392,9 @@ mod test {
|
||||
range1: Millisecond,
|
||||
range2: Millisecond,
|
||||
align: Millisecond,
|
||||
fill: Fill,
|
||||
fill: Option<Fill>,
|
||||
is_float: bool,
|
||||
is_gap: bool,
|
||||
expected: String,
|
||||
) {
|
||||
let data_type = if is_float {
|
||||
@@ -1132,13 +1402,13 @@ mod test {
|
||||
} else {
|
||||
DataType::Int64
|
||||
};
|
||||
let (need_cast, schema_data_type) = if !is_float && fill == Fill::Linear {
|
||||
let (need_cast, schema_data_type) = if !is_float && matches!(fill, Some(Fill::Linear)) {
|
||||
// data_type = DataType::Float64;
|
||||
(Some(DataType::Float64), DataType::Float64)
|
||||
} else {
|
||||
(None, data_type.clone())
|
||||
};
|
||||
let memory_exec = Arc::new(prepare_test_data(is_float));
|
||||
let memory_exec = Arc::new(prepare_test_data(is_float, is_gap));
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new("MIN(value)", schema_data_type.clone(), true),
|
||||
Field::new("MAX(value)", schema_data_type, true),
|
||||
@@ -1223,7 +1493,16 @@ mod test {
|
||||
\n| 3.0 | 3.0 | 1970-01-01T00:00:00 | host2 |\
|
||||
\n+------------+------------+---------------------+-------+",
|
||||
);
|
||||
do_range_select_test(10_000, 10_000, 1_000_000, Fill::Null, true, expected).await;
|
||||
do_range_select_test(
|
||||
10_000,
|
||||
10_000,
|
||||
1_000_000,
|
||||
Some(Fill::Null),
|
||||
true,
|
||||
false,
|
||||
expected,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -1246,7 +1525,16 @@ mod test {
|
||||
\n| 5.0 | 5.0 | 1970-01-01T00:00:20 | host2 |\
|
||||
\n+------------+------------+---------------------+-------+",
|
||||
);
|
||||
do_range_select_test(10_000, 5_000, 5_000, Fill::Null, true, expected).await;
|
||||
do_range_select_test(
|
||||
10_000,
|
||||
5_000,
|
||||
5_000,
|
||||
Some(Fill::Null),
|
||||
true,
|
||||
false,
|
||||
expected,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -1269,7 +1557,16 @@ mod test {
|
||||
\n| 5.0 | 5.0 | 1970-01-01T00:00:20 | host2 |\
|
||||
\n+------------+------------+---------------------+-------+",
|
||||
);
|
||||
do_range_select_test(10_000, 5_000, 5_000, Fill::Prev, true, expected).await;
|
||||
do_range_select_test(
|
||||
10_000,
|
||||
5_000,
|
||||
5_000,
|
||||
Some(Fill::Prev),
|
||||
true,
|
||||
false,
|
||||
expected,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -1292,7 +1589,16 @@ mod test {
|
||||
\n| 5.0 | 5.0 | 1970-01-01T00:00:20 | host2 |\
|
||||
\n+------------+------------+---------------------+-------+",
|
||||
);
|
||||
do_range_select_test(10_000, 5_000, 5_000, Fill::Linear, true, expected).await;
|
||||
do_range_select_test(
|
||||
10_000,
|
||||
5_000,
|
||||
5_000,
|
||||
Some(Fill::Linear),
|
||||
true,
|
||||
false,
|
||||
expected,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -1315,7 +1621,16 @@ mod test {
|
||||
\n| 5.0 | 5.0 | 1970-01-01T00:00:20 | host2 |\
|
||||
\n+------------+------------+---------------------+-------+",
|
||||
);
|
||||
do_range_select_test(10_000, 5_000, 5_000, Fill::Linear, false, expected).await;
|
||||
do_range_select_test(
|
||||
10_000,
|
||||
5_000,
|
||||
5_000,
|
||||
Some(Fill::Linear),
|
||||
false,
|
||||
false,
|
||||
expected,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -1342,7 +1657,101 @@ mod test {
|
||||
10_000,
|
||||
5_000,
|
||||
5_000,
|
||||
Fill::Const(ScalarValue::Float64(Some(6.6))),
|
||||
Some(Fill::Const(ScalarValue::Float64(Some(6.6)))),
|
||||
true,
|
||||
false,
|
||||
expected,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn range_fill_gap() {
|
||||
let expected = String::from(
|
||||
"+------------+------------+---------------------+-------+\
|
||||
\n| MIN(value) | MAX(value) | timestamp | host |\
|
||||
\n+------------+------------+---------------------+-------+\
|
||||
\n| 0.0 | 0.0 | 1970-01-01T00:00:00 | host1 |\
|
||||
\n| 6.0 | 6.0 | 1970-01-01T00:00:15 | host1 |\
|
||||
\n| 6.0 | 6.0 | 1970-01-01T00:00:00 | host2 |\
|
||||
\n| 12.0 | 12.0 | 1970-01-01T00:00:15 | host2 |\
|
||||
\n+------------+------------+---------------------+-------+",
|
||||
);
|
||||
do_range_select_test(5_000, 5_000, 5_000, None, true, true, expected).await;
|
||||
let expected = String::from(
|
||||
"+------------+------------+---------------------+-------+\
|
||||
\n| MIN(value) | MAX(value) | timestamp | host |\
|
||||
\n+------------+------------+---------------------+-------+\
|
||||
\n| 0.0 | 0.0 | 1970-01-01T00:00:00 | host1 |\
|
||||
\n| | | 1970-01-01T00:00:05 | host1 |\
|
||||
\n| | | 1970-01-01T00:00:10 | host1 |\
|
||||
\n| 6.0 | 6.0 | 1970-01-01T00:00:15 | host1 |\
|
||||
\n| 6.0 | 6.0 | 1970-01-01T00:00:00 | host2 |\
|
||||
\n| | | 1970-01-01T00:00:05 | host2 |\
|
||||
\n| | | 1970-01-01T00:00:10 | host2 |\
|
||||
\n| 12.0 | 12.0 | 1970-01-01T00:00:15 | host2 |\
|
||||
\n+------------+------------+---------------------+-------+",
|
||||
);
|
||||
do_range_select_test(5_000, 5_000, 5_000, Some(Fill::Null), true, true, expected).await;
|
||||
let expected = String::from(
|
||||
"+------------+------------+---------------------+-------+\
|
||||
\n| MIN(value) | MAX(value) | timestamp | host |\
|
||||
\n+------------+------------+---------------------+-------+\
|
||||
\n| 0.0 | 0.0 | 1970-01-01T00:00:00 | host1 |\
|
||||
\n| 0.0 | 0.0 | 1970-01-01T00:00:05 | host1 |\
|
||||
\n| 0.0 | 0.0 | 1970-01-01T00:00:10 | host1 |\
|
||||
\n| 6.0 | 6.0 | 1970-01-01T00:00:15 | host1 |\
|
||||
\n| 6.0 | 6.0 | 1970-01-01T00:00:00 | host2 |\
|
||||
\n| 6.0 | 6.0 | 1970-01-01T00:00:05 | host2 |\
|
||||
\n| 6.0 | 6.0 | 1970-01-01T00:00:10 | host2 |\
|
||||
\n| 12.0 | 12.0 | 1970-01-01T00:00:15 | host2 |\
|
||||
\n+------------+------------+---------------------+-------+",
|
||||
);
|
||||
do_range_select_test(5_000, 5_000, 5_000, Some(Fill::Prev), true, true, expected).await;
|
||||
let expected = String::from(
|
||||
"+------------+------------+---------------------+-------+\
|
||||
\n| MIN(value) | MAX(value) | timestamp | host |\
|
||||
\n+------------+------------+---------------------+-------+\
|
||||
\n| 0.0 | 0.0 | 1970-01-01T00:00:00 | host1 |\
|
||||
\n| 2.0 | 2.0 | 1970-01-01T00:00:05 | host1 |\
|
||||
\n| 4.0 | 4.0 | 1970-01-01T00:00:10 | host1 |\
|
||||
\n| 6.0 | 6.0 | 1970-01-01T00:00:15 | host1 |\
|
||||
\n| 6.0 | 6.0 | 1970-01-01T00:00:00 | host2 |\
|
||||
\n| 8.0 | 8.0 | 1970-01-01T00:00:05 | host2 |\
|
||||
\n| 10.0 | 10.0 | 1970-01-01T00:00:10 | host2 |\
|
||||
\n| 12.0 | 12.0 | 1970-01-01T00:00:15 | host2 |\
|
||||
\n+------------+------------+---------------------+-------+",
|
||||
);
|
||||
do_range_select_test(
|
||||
5_000,
|
||||
5_000,
|
||||
5_000,
|
||||
Some(Fill::Linear),
|
||||
true,
|
||||
true,
|
||||
expected,
|
||||
)
|
||||
.await;
|
||||
let expected = String::from(
|
||||
"+------------+------------+---------------------+-------+\
|
||||
\n| MIN(value) | MAX(value) | timestamp | host |\
|
||||
\n+------------+------------+---------------------+-------+\
|
||||
\n| 0.0 | 0.0 | 1970-01-01T00:00:00 | host1 |\
|
||||
\n| 6.0 | 6.0 | 1970-01-01T00:00:05 | host1 |\
|
||||
\n| 6.0 | 6.0 | 1970-01-01T00:00:10 | host1 |\
|
||||
\n| 6.0 | 6.0 | 1970-01-01T00:00:15 | host1 |\
|
||||
\n| 6.0 | 6.0 | 1970-01-01T00:00:00 | host2 |\
|
||||
\n| 6.0 | 6.0 | 1970-01-01T00:00:05 | host2 |\
|
||||
\n| 6.0 | 6.0 | 1970-01-01T00:00:10 | host2 |\
|
||||
\n| 12.0 | 12.0 | 1970-01-01T00:00:15 | host2 |\
|
||||
\n+------------+------------+---------------------+-------+",
|
||||
);
|
||||
do_range_select_test(
|
||||
5_000,
|
||||
5_000,
|
||||
5_000,
|
||||
Some(Fill::Const(ScalarValue::Float64(Some(6.0)))),
|
||||
true,
|
||||
true,
|
||||
expected,
|
||||
)
|
||||
@@ -1351,7 +1760,8 @@ mod test {
|
||||
|
||||
#[test]
|
||||
fn fill_test() {
|
||||
assert!(Fill::try_from_str("Linear", &DataType::UInt8).unwrap() == Fill::Linear);
|
||||
assert!(Fill::try_from_str("", &DataType::UInt8).unwrap().is_none());
|
||||
assert!(Fill::try_from_str("Linear", &DataType::UInt8).unwrap() == Some(Fill::Linear));
|
||||
assert_eq!(
|
||||
Fill::try_from_str("Linear", &DataType::Boolean)
|
||||
.unwrap_err()
|
||||
@@ -1372,7 +1782,7 @@ mod test {
|
||||
);
|
||||
assert!(
|
||||
Fill::try_from_str("8", &DataType::UInt8).unwrap()
|
||||
== Fill::Const(ScalarValue::UInt8(Some(8)))
|
||||
== Some(Fill::Const(ScalarValue::UInt8(Some(8))))
|
||||
);
|
||||
let mut test1 = vec![
|
||||
ScalarValue::UInt8(Some(8)),
|
||||
@@ -1447,4 +1857,44 @@ mod test {
|
||||
Fill::Linear.apply_fill_strategy(&ts, &mut test1).unwrap();
|
||||
assert_eq!(test, test1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fist_last_accumulator() {
|
||||
let mut acc = RangeFirstListValueAcc::new(vec![
|
||||
SortOptions {
|
||||
descending: true,
|
||||
nulls_first: false,
|
||||
},
|
||||
SortOptions {
|
||||
descending: false,
|
||||
nulls_first: true,
|
||||
},
|
||||
]);
|
||||
let batch1: Vec<Arc<dyn Array>> = vec![
|
||||
Arc::new(nullable_array!(Float64;
|
||||
0.0, null, 0.0, null, 1.0
|
||||
)),
|
||||
Arc::new(nullable_array!(Float64;
|
||||
5.0, null, 4.0, null, 3.0
|
||||
)),
|
||||
Arc::new(nullable_array!(Int64;
|
||||
1, 2, 3, 4, 5
|
||||
)),
|
||||
];
|
||||
let batch2: Vec<Arc<dyn Array>> = vec![
|
||||
Arc::new(nullable_array!(Float64;
|
||||
3.0, 3.0, 3.0, 3.0, 3.0
|
||||
)),
|
||||
Arc::new(nullable_array!(Float64;
|
||||
null,3.0, 3.0, 3.0, 3.0
|
||||
)),
|
||||
Arc::new(nullable_array!(Int64;
|
||||
6, 7, 8, 9, 10
|
||||
)),
|
||||
];
|
||||
acc.update_batch(&batch1).unwrap();
|
||||
assert_eq!(acc.evaluate().unwrap(), ScalarValue::Int64(Some(5)));
|
||||
acc.update_batch(&batch2).unwrap();
|
||||
assert_eq!(acc.evaluate().unwrap(), ScalarValue::Int64(Some(6)));
|
||||
}
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user