mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-06 13:22:57 +00:00
Compare commits
23 Commits
v0.1.0-rel
...
v0.1.0-alp
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2c9bcbe885 | ||
|
|
dfd4b10493 | ||
|
|
dd488e8d21 | ||
|
|
857054f70d | ||
|
|
a41aec0a86 | ||
|
|
cff8fe4e0e | ||
|
|
a2f9b788f1 | ||
|
|
43f9c40f43 | ||
|
|
af1df2066c | ||
|
|
f34a99ff5a | ||
|
|
89a3b39728 | ||
|
|
2137587091 | ||
|
|
172c9a1e21 | ||
|
|
ae147c2a74 | ||
|
|
c2e1b0857c | ||
|
|
6e99bb8490 | ||
|
|
eef20887cc | ||
|
|
16500b045b | ||
|
|
3d195ff858 | ||
|
|
bc701d3e7f | ||
|
|
6373bb04f9 | ||
|
|
bfcd74fd16 | ||
|
|
fc6d73b06b |
14
.github/workflows/coverage.yml
vendored
14
.github/workflows/coverage.yml
vendored
@@ -18,7 +18,17 @@ jobs:
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Cache LLVM and Clang
|
||||
id: cache-llvm
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ./llvm
|
||||
key: llvm
|
||||
- uses: arduino/setup-protoc@v1
|
||||
- uses: KyleMayes/install-llvm-action@v1
|
||||
with:
|
||||
version: "14.0"
|
||||
cached: ${{ steps.cache-llvm.outputs.cache-hit }}
|
||||
- name: Install toolchain
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
@@ -30,7 +40,7 @@ jobs:
|
||||
- name: Cleanup disk
|
||||
uses: curoky/cleanup-disk-action@v2.0
|
||||
with:
|
||||
retain: 'rust'
|
||||
retain: 'rust,llvm'
|
||||
- name: Execute tests
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -39,7 +49,7 @@ jobs:
|
||||
env:
|
||||
RUST_BACKTRACE: 1
|
||||
CARGO_INCREMENTAL: 0
|
||||
RUSTFLAGS: "-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=unwind -Zpanic_abort_tests"
|
||||
RUSTFLAGS: "-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=unwind -Zpanic_abort_tests -Clink-arg=-fuse-ld=lld"
|
||||
GT_S3_BUCKET: ${{ secrets.S3_BUCKET }}
|
||||
GT_S3_ACCESS_KEY_ID: ${{ secrets.S3_ACCESS_KEY_ID }}
|
||||
GT_S3_ACCESS_KEY: ${{ secrets.S3_ACCESS_KEY }}
|
||||
|
||||
15
.github/workflows/develop.yml
vendored
15
.github/workflows/develop.yml
vendored
@@ -47,7 +47,17 @@ jobs:
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Cache LLVM and Clang
|
||||
id: cache-llvm
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ./llvm
|
||||
key: llvm
|
||||
- uses: arduino/setup-protoc@v1
|
||||
- uses: KyleMayes/install-llvm-action@v1
|
||||
with:
|
||||
version: "14.0"
|
||||
cached: ${{ steps.cache-llvm.outputs.cache-hit }}
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
@@ -55,11 +65,16 @@ jobs:
|
||||
override: true
|
||||
- name: Rust Cache
|
||||
uses: Swatinem/rust-cache@v2.0.0
|
||||
- name: Cleanup disk
|
||||
uses: curoky/cleanup-disk-action@v2.0
|
||||
with:
|
||||
retain: 'rust,llvm'
|
||||
- uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --workspace
|
||||
env:
|
||||
CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=lld"
|
||||
RUST_BACKTRACE: 1
|
||||
GT_S3_BUCKET: ${{ secrets.S3_BUCKET }}
|
||||
GT_S3_ACCESS_KEY_ID: ${{ secrets.S3_ACCESS_KEY_ID }}
|
||||
|
||||
9
.github/workflows/release.yml
vendored
9
.github/workflows/release.yml
vendored
@@ -33,9 +33,11 @@ jobs:
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Cache cargo assets
|
||||
id: cache
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
./llvm
|
||||
~/.cargo/bin/
|
||||
~/.cargo/registry/index/
|
||||
~/.cargo/registry/cache/
|
||||
@@ -51,6 +53,11 @@ jobs:
|
||||
sudo cp protoc/bin/protoc /usr/local/bin/
|
||||
sudo cp -r protoc/include/google /usr/local/include/
|
||||
|
||||
- uses: KyleMayes/install-llvm-action@v1
|
||||
with:
|
||||
version: "14.0"
|
||||
cached: ${{ steps.cache.outputs.cache-hit }}
|
||||
|
||||
- name: Install Protoc for macos
|
||||
if: contains(matrix.arch, 'darwin')
|
||||
run: |
|
||||
@@ -78,6 +85,8 @@ jobs:
|
||||
with:
|
||||
command: build
|
||||
args: ${{ matrix.opts }} --release --locked --target ${{ matrix.arch }}
|
||||
env:
|
||||
CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=lld"
|
||||
|
||||
- name: Calculate checksum and rename binary
|
||||
shell: bash
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -28,3 +28,6 @@ logs/
|
||||
|
||||
# cpython's generated python byte code
|
||||
**/__pycache__/
|
||||
|
||||
# Benchmark dataset
|
||||
benchmarks/data
|
||||
|
||||
@@ -9,7 +9,7 @@ repos:
|
||||
rev: e6a795bc6b2c0958f9ef52af4863bbd7cc17238f
|
||||
hooks:
|
||||
- id: cargo-sort
|
||||
args: ["--workspace"]
|
||||
args: ["--workspace", "--print"]
|
||||
|
||||
- repo: https://github.com/doublify/pre-commit-rust
|
||||
rev: v1.0
|
||||
|
||||
280
Cargo.lock
generated
280
Cargo.lock
generated
@@ -93,6 +93,12 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anes"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
|
||||
|
||||
[[package]]
|
||||
name = "ansi_term"
|
||||
version = "0.12.1"
|
||||
@@ -112,6 +118,8 @@ checksum = "98161a4e3e2184da77bb14f02184cdd111e83bbbcc9979dfee3c44b9a85f5602"
|
||||
name = "api"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"common-base",
|
||||
"common-time",
|
||||
"datatypes",
|
||||
"prost 0.11.0",
|
||||
"snafu",
|
||||
@@ -158,6 +166,30 @@ version = "0.7.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
|
||||
|
||||
[[package]]
|
||||
name = "arrow"
|
||||
version = "10.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1328dbc6d5d76a08b13df3ac630f61a6a31276d9e9d08eb813e98efa624c2382"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"chrono",
|
||||
"csv",
|
||||
"flatbuffers",
|
||||
"half",
|
||||
"hex",
|
||||
"indexmap",
|
||||
"lazy_static",
|
||||
"lexical-core",
|
||||
"multiversion",
|
||||
"num",
|
||||
"rand 0.8.5",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrow-format"
|
||||
version = "0.4.0"
|
||||
@@ -428,6 +460,20 @@ dependencies = [
|
||||
"tower-service",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "backoff"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b62ddb9cb1ec0a098ad4bbf9344d0713fa193ae1a80af55febcff2627b6a00c1"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"getrandom",
|
||||
"instant",
|
||||
"pin-project-lite",
|
||||
"rand 0.8.5",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "backon"
|
||||
version = "0.1.0"
|
||||
@@ -461,6 +507,19 @@ version = "0.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd"
|
||||
|
||||
[[package]]
|
||||
name = "benchmarks"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"clap 4.0.18",
|
||||
"client",
|
||||
"indicatif",
|
||||
"itertools",
|
||||
"parquet",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bigdecimal"
|
||||
version = "0.3.0"
|
||||
@@ -663,6 +722,7 @@ dependencies = [
|
||||
"arc-swap",
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
"backoff",
|
||||
"chrono",
|
||||
"common-catalog",
|
||||
"common-error",
|
||||
@@ -760,6 +820,33 @@ version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fff857943da45f546682664a79488be82e69e43c1a7a2307679ab9afb3a66d2e"
|
||||
|
||||
[[package]]
|
||||
name = "ciborium"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b0c137568cc60b904a7724001b35ce2630fd00d5d84805fbb608ab89509d788f"
|
||||
dependencies = [
|
||||
"ciborium-io",
|
||||
"ciborium-ll",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ciborium-io"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "346de753af073cc87b52b2083a506b38ac176a44cfb05497b622e27be899b369"
|
||||
|
||||
[[package]]
|
||||
name = "ciborium-ll"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "213030a2b5a4e0c0892b6652260cf6ccac84827b83a85a534e178e3906c4cf1b"
|
||||
dependencies = [
|
||||
"ciborium-io",
|
||||
"half",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clang-sys"
|
||||
version = "1.4.0"
|
||||
@@ -794,8 +881,8 @@ checksum = "86447ad904c7fb335a790c9d7fe3d0d971dc523b8ccd1561a520de9a85302750"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"bitflags",
|
||||
"clap_derive",
|
||||
"clap_lex",
|
||||
"clap_derive 3.2.18",
|
||||
"clap_lex 0.2.4",
|
||||
"indexmap",
|
||||
"once_cell",
|
||||
"strsim 0.10.0",
|
||||
@@ -803,6 +890,21 @@ dependencies = [
|
||||
"textwrap 0.15.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "4.0.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "335867764ed2de42325fafe6d18b8af74ba97ee0c590fa016f157535b42ab04b"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"bitflags",
|
||||
"clap_derive 4.0.18",
|
||||
"clap_lex 0.3.0",
|
||||
"once_cell",
|
||||
"strsim 0.10.0",
|
||||
"termcolor",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_derive"
|
||||
version = "3.2.18"
|
||||
@@ -816,6 +918,19 @@ dependencies = [
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_derive"
|
||||
version = "4.0.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "16a1b0f6422af32d5da0c58e2703320f379216ee70198241c84173a8c5ac28f3"
|
||||
dependencies = [
|
||||
"heck 0.4.0",
|
||||
"proc-macro-error",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_lex"
|
||||
version = "0.2.4"
|
||||
@@ -825,6 +940,15 @@ dependencies = [
|
||||
"os_str_bytes",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_lex"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0d4198f73e42b4936b35b5bb248d81d2b595ecb170da0bac7655c54eedfa8da8"
|
||||
dependencies = [
|
||||
"os_str_bytes",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "client"
|
||||
version = "0.1.0"
|
||||
@@ -950,6 +1074,7 @@ dependencies = [
|
||||
"common-error",
|
||||
"common-function-macro",
|
||||
"common-query",
|
||||
"common-time",
|
||||
"datafusion-common",
|
||||
"datatypes",
|
||||
"libc",
|
||||
@@ -986,7 +1111,10 @@ dependencies = [
|
||||
"common-base",
|
||||
"common-error",
|
||||
"common-runtime",
|
||||
"criterion 0.4.0",
|
||||
"dashmap",
|
||||
"datafusion",
|
||||
"rand 0.8.5",
|
||||
"snafu",
|
||||
"tokio",
|
||||
"tonic",
|
||||
@@ -1197,7 +1325,7 @@ dependencies = [
|
||||
"atty",
|
||||
"cast",
|
||||
"clap 2.34.0",
|
||||
"criterion-plot",
|
||||
"criterion-plot 0.4.5",
|
||||
"csv",
|
||||
"itertools",
|
||||
"lazy_static",
|
||||
@@ -1214,6 +1342,32 @@ dependencies = [
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "criterion"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e7c76e09c1aae2bc52b3d2f29e13c6572553b30c4aa1b8a49fd70de6412654cb"
|
||||
dependencies = [
|
||||
"anes",
|
||||
"atty",
|
||||
"cast",
|
||||
"ciborium",
|
||||
"clap 3.2.22",
|
||||
"criterion-plot 0.5.0",
|
||||
"itertools",
|
||||
"lazy_static",
|
||||
"num-traits",
|
||||
"oorandom",
|
||||
"plotters",
|
||||
"rayon",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
"serde_json",
|
||||
"tinytemplate",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "criterion-plot"
|
||||
version = "0.4.5"
|
||||
@@ -1224,6 +1378,16 @@ dependencies = [
|
||||
"itertools",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "criterion-plot"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
|
||||
dependencies = [
|
||||
"cast",
|
||||
"itertools",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam"
|
||||
version = "0.8.2"
|
||||
@@ -1366,6 +1530,19 @@ dependencies = [
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dashmap"
|
||||
version = "5.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "907076dfda823b0b36d2a1bb5f90c96660a5bbcd7729e10727f07858f22c4edc"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"hashbrown",
|
||||
"lock_api",
|
||||
"once_cell",
|
||||
"parking_lot_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "datafusion"
|
||||
version = "7.0.0"
|
||||
@@ -1469,6 +1646,8 @@ dependencies = [
|
||||
"futures",
|
||||
"hyper",
|
||||
"log-store",
|
||||
"meta-client",
|
||||
"meta-srv",
|
||||
"metrics",
|
||||
"object-store",
|
||||
"query",
|
||||
@@ -1764,6 +1943,17 @@ version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cda653ca797810c02f7ca4b804b40b8b95ae046eb989d356bce17919a8c25499"
|
||||
|
||||
[[package]]
|
||||
name = "flatbuffers"
|
||||
version = "2.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6ea97b4fe4b84e2f2765449bcea21cbdb3ee28cecb88afbf38a0c2e1639f5eb5"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"smallvec",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "flate2"
|
||||
version = "1.0.24"
|
||||
@@ -1807,6 +1997,7 @@ dependencies = [
|
||||
"arrow2",
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
"catalog",
|
||||
"client",
|
||||
"common-base",
|
||||
"common-error",
|
||||
@@ -1822,8 +2013,10 @@ dependencies = [
|
||||
"datanode",
|
||||
"datatypes",
|
||||
"futures",
|
||||
"itertools",
|
||||
"openmetrics-parser",
|
||||
"prost 0.11.0",
|
||||
"query",
|
||||
"serde",
|
||||
"servers",
|
||||
"snafu",
|
||||
@@ -2291,6 +2484,17 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "indicatif"
|
||||
version = "0.17.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bfddc9561e8baf264e0e45e197fd7696320026eb10a8180340debc27b18f535b"
|
||||
dependencies = [
|
||||
"console",
|
||||
"number_prefix",
|
||||
"unicode-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "influxdb_line_protocol"
|
||||
version = "0.1.0"
|
||||
@@ -2311,6 +2515,12 @@ dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "integer-encoding"
|
||||
version = "1.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "48dc51180a9b377fd75814d0cc02199c20f8e99433d6762f650d39cdbbd3b56f"
|
||||
|
||||
[[package]]
|
||||
name = "integer-encoding"
|
||||
version = "3.0.4"
|
||||
@@ -3129,6 +3339,12 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "number_prefix"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
|
||||
|
||||
[[package]]
|
||||
name = "object"
|
||||
version = "0.29.0"
|
||||
@@ -3152,9 +3368,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.15.0"
|
||||
version = "1.16.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e82dad04139b71a90c080c8463fe0dc7902db5192d939bd0950f074d014339e1"
|
||||
checksum = "86f0b0d4bf799edbc74508c1e8bf170ff5f41238e5f8225603ca7caaae2b7860"
|
||||
|
||||
[[package]]
|
||||
name = "oorandom"
|
||||
@@ -3256,7 +3472,7 @@ dependencies = [
|
||||
"opentelemetry",
|
||||
"opentelemetry-semantic-conventions",
|
||||
"thiserror",
|
||||
"thrift",
|
||||
"thrift 0.15.0",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
@@ -3345,6 +3561,37 @@ dependencies = [
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "parquet"
|
||||
version = "10.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "53e9c8fc20af9b92d85d42ec86e5217b2eaf1340fbba75c4b4296de764ea7921"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"base64",
|
||||
"brotli",
|
||||
"byteorder",
|
||||
"chrono",
|
||||
"flate2",
|
||||
"lz4",
|
||||
"num",
|
||||
"num-bigint",
|
||||
"parquet-format",
|
||||
"rand 0.8.5",
|
||||
"snap",
|
||||
"thrift 0.13.0",
|
||||
"zstd",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "parquet-format"
|
||||
version = "4.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1f0c06cdcd5460967c485f9c40a821746f5955ad81990533c7fae95dbd9bc0b5"
|
||||
dependencies = [
|
||||
"thrift 0.13.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "parquet-format-async-temp"
|
||||
version = "0.2.0"
|
||||
@@ -3354,7 +3601,7 @@ dependencies = [
|
||||
"async-trait",
|
||||
"byteorder",
|
||||
"futures",
|
||||
"integer-encoding",
|
||||
"integer-encoding 3.0.4",
|
||||
"ordered-float 1.1.1",
|
||||
]
|
||||
|
||||
@@ -4801,12 +5048,14 @@ dependencies = [
|
||||
"metrics",
|
||||
"mysql_async",
|
||||
"num_cpus",
|
||||
"once_cell",
|
||||
"openmetrics-parser",
|
||||
"opensrv-mysql",
|
||||
"pgwire",
|
||||
"prost 0.11.0",
|
||||
"query",
|
||||
"rand 0.8.5",
|
||||
"regex",
|
||||
"schemars",
|
||||
"script",
|
||||
"serde",
|
||||
@@ -5073,7 +5322,7 @@ dependencies = [
|
||||
"common-runtime",
|
||||
"common-telemetry",
|
||||
"common-time",
|
||||
"criterion",
|
||||
"criterion 0.3.6",
|
||||
"datatypes",
|
||||
"futures",
|
||||
"futures-util",
|
||||
@@ -5454,6 +5703,19 @@ dependencies = [
|
||||
"num_cpus",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thrift"
|
||||
version = "0.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0c6d965454947cc7266d22716ebfd07b18d84ebaf35eec558586bbb2a8cb6b5b"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"integer-encoding 1.1.7",
|
||||
"log",
|
||||
"ordered-float 1.1.1",
|
||||
"threadpool",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thrift"
|
||||
version = "0.15.0"
|
||||
@@ -5461,7 +5723,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b82ca8f46f95b3ce96081fe3dd89160fdea970c254bb72925255d1b62aae692e"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"integer-encoding",
|
||||
"integer-encoding 3.0.4",
|
||||
"log",
|
||||
"ordered-float 1.1.1",
|
||||
"threadpool",
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
[workspace]
|
||||
members = [
|
||||
"benchmarks",
|
||||
"src/api",
|
||||
"src/catalog",
|
||||
"src/client",
|
||||
@@ -32,3 +33,6 @@ members = [
|
||||
"src/table",
|
||||
"src/table-engine",
|
||||
]
|
||||
|
||||
[profile.release]
|
||||
debug = true
|
||||
|
||||
14
benchmarks/Cargo.toml
Normal file
14
benchmarks/Cargo.toml
Normal file
@@ -0,0 +1,14 @@
|
||||
[package]
|
||||
name = "benchmarks"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
arrow = "10"
|
||||
clap = { version = "4.0", features = ["derive"] }
|
||||
client = { path = "../src/client" }
|
||||
indicatif = "0.17.1"
|
||||
itertools = "0.10.5"
|
||||
parquet = { version = "*" }
|
||||
tokio = { version = "1.21", features = ["full"] }
|
||||
439
benchmarks/src/bin/nyc-taxi.rs
Normal file
439
benchmarks/src/bin/nyc-taxi.rs
Normal file
@@ -0,0 +1,439 @@
|
||||
//! Use the taxi trip records from New York City dataset to bench. You can download the dataset from
|
||||
//! [here](https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page).
|
||||
|
||||
#![feature(once_cell)]
|
||||
#![allow(clippy::print_stdout)]
|
||||
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
path::{Path, PathBuf},
|
||||
sync::Arc,
|
||||
time::Instant,
|
||||
};
|
||||
|
||||
use arrow::{
|
||||
array::{ArrayRef, PrimitiveArray, StringArray, TimestampNanosecondArray},
|
||||
datatypes::{DataType, Float64Type, Int64Type},
|
||||
record_batch::RecordBatch,
|
||||
};
|
||||
use clap::Parser;
|
||||
use client::{
|
||||
admin::Admin,
|
||||
api::v1::{
|
||||
codec::InsertBatch, column::Values, insert_expr, Column, ColumnDataType, ColumnDef,
|
||||
CreateExpr, InsertExpr,
|
||||
},
|
||||
Client, Database, Select,
|
||||
};
|
||||
use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
|
||||
use parquet::{
|
||||
arrow::{ArrowReader, ParquetFileArrowReader},
|
||||
file::{reader::FileReader, serialized_reader::SerializedFileReader},
|
||||
};
|
||||
use tokio::task::JoinSet;
|
||||
|
||||
const DATABASE_NAME: &str = "greptime";
|
||||
const CATALOG_NAME: &str = "greptime";
|
||||
const SCHEMA_NAME: &str = "public";
|
||||
const TABLE_NAME: &str = "nyc_taxi";
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(name = "NYC benchmark runner")]
|
||||
struct Args {
|
||||
/// Path to the dataset
|
||||
#[arg(short, long)]
|
||||
path: Option<String>,
|
||||
|
||||
/// Batch size of insert request.
|
||||
#[arg(short = 's', long = "batch-size", default_value_t = 4096)]
|
||||
batch_size: usize,
|
||||
|
||||
/// Number of client threads on write (parallel on file level)
|
||||
#[arg(short = 't', long = "thread-num", default_value_t = 4)]
|
||||
thread_num: usize,
|
||||
|
||||
/// Number of query iteration
|
||||
#[arg(short = 'i', long = "iter-num", default_value_t = 3)]
|
||||
iter_num: usize,
|
||||
|
||||
#[arg(long = "skip-write")]
|
||||
skip_write: bool,
|
||||
|
||||
#[arg(long = "skip-read")]
|
||||
skip_read: bool,
|
||||
|
||||
#[arg(short, long, default_value_t = String::from("127.0.0.1:3001"))]
|
||||
endpoint: String,
|
||||
}
|
||||
|
||||
fn get_file_list<P: AsRef<Path>>(path: P) -> Vec<PathBuf> {
|
||||
std::fs::read_dir(path)
|
||||
.unwrap()
|
||||
.map(|dir| dir.unwrap().path().canonicalize().unwrap())
|
||||
.collect()
|
||||
}
|
||||
|
||||
async fn write_data(
|
||||
batch_size: usize,
|
||||
db: &Database,
|
||||
path: PathBuf,
|
||||
mpb: MultiProgress,
|
||||
pb_style: ProgressStyle,
|
||||
) -> u128 {
|
||||
let file = std::fs::File::open(&path).unwrap();
|
||||
let file_reader = Arc::new(SerializedFileReader::new(file).unwrap());
|
||||
let row_num = file_reader.metadata().file_metadata().num_rows();
|
||||
let record_batch_reader = ParquetFileArrowReader::new(file_reader)
|
||||
.get_record_reader(batch_size)
|
||||
.unwrap();
|
||||
let progress_bar = mpb.add(ProgressBar::new(row_num as _));
|
||||
progress_bar.set_style(pb_style);
|
||||
progress_bar.set_message(format!("{:?}", path));
|
||||
|
||||
let mut total_rpc_elapsed_ms = 0;
|
||||
|
||||
for record_batch in record_batch_reader {
|
||||
let record_batch = record_batch.unwrap();
|
||||
let row_count = record_batch.num_rows();
|
||||
let insert_batch = convert_record_batch(record_batch).into();
|
||||
let insert_expr = InsertExpr {
|
||||
table_name: TABLE_NAME.to_string(),
|
||||
expr: Some(insert_expr::Expr::Values(insert_expr::Values {
|
||||
values: vec![insert_batch],
|
||||
})),
|
||||
options: HashMap::default(),
|
||||
};
|
||||
let now = Instant::now();
|
||||
db.insert(insert_expr).await.unwrap();
|
||||
let elapsed = now.elapsed();
|
||||
total_rpc_elapsed_ms += elapsed.as_millis();
|
||||
progress_bar.inc(row_count as _);
|
||||
}
|
||||
|
||||
progress_bar.finish_with_message(format!(
|
||||
"file {:?} done in {}ms",
|
||||
path, total_rpc_elapsed_ms
|
||||
));
|
||||
total_rpc_elapsed_ms
|
||||
}
|
||||
|
||||
fn convert_record_batch(record_batch: RecordBatch) -> InsertBatch {
|
||||
let schema = record_batch.schema();
|
||||
let fields = schema.fields();
|
||||
let row_count = record_batch.num_rows();
|
||||
let mut columns = vec![];
|
||||
|
||||
for (array, field) in record_batch.columns().iter().zip(fields.iter()) {
|
||||
let values = build_values(array);
|
||||
let column = Column {
|
||||
column_name: field.name().to_owned(),
|
||||
values: Some(values),
|
||||
null_mask: vec![],
|
||||
// datatype and semantic_type are set to default
|
||||
..Default::default()
|
||||
};
|
||||
columns.push(column);
|
||||
}
|
||||
|
||||
InsertBatch {
|
||||
columns,
|
||||
row_count: row_count as _,
|
||||
}
|
||||
}
|
||||
|
||||
fn build_values(column: &ArrayRef) -> Values {
|
||||
match column.data_type() {
|
||||
DataType::Int64 => {
|
||||
let array = column
|
||||
.as_any()
|
||||
.downcast_ref::<PrimitiveArray<Int64Type>>()
|
||||
.unwrap();
|
||||
let values = array.values();
|
||||
Values {
|
||||
i64_values: values.to_vec(),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
DataType::Float64 => {
|
||||
let array = column
|
||||
.as_any()
|
||||
.downcast_ref::<PrimitiveArray<Float64Type>>()
|
||||
.unwrap();
|
||||
let values = array.values();
|
||||
Values {
|
||||
f64_values: values.to_vec(),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
DataType::Timestamp(_, _) => {
|
||||
let array = column
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampNanosecondArray>()
|
||||
.unwrap();
|
||||
let values = array.values();
|
||||
Values {
|
||||
i64_values: values.to_vec(),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
DataType::Utf8 => {
|
||||
let array = column.as_any().downcast_ref::<StringArray>().unwrap();
|
||||
let values = array.iter().filter_map(|s| s.map(String::from)).collect();
|
||||
Values {
|
||||
string_values: values,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
DataType::Null
|
||||
| DataType::Boolean
|
||||
| DataType::Int8
|
||||
| DataType::Int16
|
||||
| DataType::Int32
|
||||
| DataType::UInt8
|
||||
| DataType::UInt16
|
||||
| DataType::UInt32
|
||||
| DataType::UInt64
|
||||
| DataType::Float16
|
||||
| DataType::Float32
|
||||
| DataType::Date32
|
||||
| DataType::Date64
|
||||
| DataType::Time32(_)
|
||||
| DataType::Time64(_)
|
||||
| DataType::Duration(_)
|
||||
| DataType::Interval(_)
|
||||
| DataType::Binary
|
||||
| DataType::FixedSizeBinary(_)
|
||||
| DataType::LargeBinary
|
||||
| DataType::LargeUtf8
|
||||
| DataType::List(_)
|
||||
| DataType::FixedSizeList(_, _)
|
||||
| DataType::LargeList(_)
|
||||
| DataType::Struct(_)
|
||||
| DataType::Union(_, _)
|
||||
| DataType::Dictionary(_, _)
|
||||
| DataType::Decimal(_, _)
|
||||
| DataType::Map(_, _) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn create_table_expr() -> CreateExpr {
|
||||
CreateExpr {
|
||||
catalog_name: Some(CATALOG_NAME.to_string()),
|
||||
schema_name: Some(SCHEMA_NAME.to_string()),
|
||||
table_name: TABLE_NAME.to_string(),
|
||||
desc: None,
|
||||
column_defs: vec![
|
||||
ColumnDef {
|
||||
name: "VendorID".to_string(),
|
||||
datatype: ColumnDataType::Int64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "tpep_pickup_datetime".to_string(),
|
||||
datatype: ColumnDataType::Int64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "tpep_dropoff_datetime".to_string(),
|
||||
datatype: ColumnDataType::Int64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "passenger_count".to_string(),
|
||||
datatype: ColumnDataType::Float64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "trip_distance".to_string(),
|
||||
datatype: ColumnDataType::Float64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "RatecodeID".to_string(),
|
||||
datatype: ColumnDataType::Float64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "store_and_fwd_flag".to_string(),
|
||||
datatype: ColumnDataType::String as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "PULocationID".to_string(),
|
||||
datatype: ColumnDataType::Int64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "DOLocationID".to_string(),
|
||||
datatype: ColumnDataType::Int64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "payment_type".to_string(),
|
||||
datatype: ColumnDataType::Int64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "fare_amount".to_string(),
|
||||
datatype: ColumnDataType::Float64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "extra".to_string(),
|
||||
datatype: ColumnDataType::Float64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "mta_tax".to_string(),
|
||||
datatype: ColumnDataType::Float64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "tip_amount".to_string(),
|
||||
datatype: ColumnDataType::Float64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "tolls_amount".to_string(),
|
||||
datatype: ColumnDataType::Float64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "improvement_surcharge".to_string(),
|
||||
datatype: ColumnDataType::Float64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "total_amount".to_string(),
|
||||
datatype: ColumnDataType::Float64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "congestion_surcharge".to_string(),
|
||||
datatype: ColumnDataType::Float64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
ColumnDef {
|
||||
name: "airport_fee".to_string(),
|
||||
datatype: ColumnDataType::Float64 as i32,
|
||||
is_nullable: true,
|
||||
default_constraint: None,
|
||||
},
|
||||
],
|
||||
time_index: "tpep_pickup_datetime".to_string(),
|
||||
primary_keys: vec!["VendorID".to_string()],
|
||||
create_if_not_exists: false,
|
||||
table_options: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
fn query_set() -> HashMap<String, String> {
|
||||
let mut ret = HashMap::new();
|
||||
|
||||
ret.insert(
|
||||
"count_all".to_string(),
|
||||
format!("SELECT COUNT(*) FROM {};", TABLE_NAME),
|
||||
);
|
||||
|
||||
ret.insert(
|
||||
"fare_amt_by_passenger".to_string(),
|
||||
format!("SELECT passenger_count, MIN(fare_amount), MAX(fare_amount), SUM(fare_amount) FROM {} GROUP BY passenger_count",TABLE_NAME)
|
||||
);
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
async fn do_write(args: &Args, client: &Client) {
|
||||
let admin = Admin::new("admin", client.clone());
|
||||
|
||||
let mut file_list = get_file_list(args.path.clone().expect("Specify data path in argument"));
|
||||
let mut write_jobs = JoinSet::new();
|
||||
|
||||
let create_table_result = admin.create(create_table_expr()).await;
|
||||
println!("Create table result: {:?}", create_table_result);
|
||||
|
||||
let progress_bar_style = ProgressStyle::with_template(
|
||||
"[{elapsed_precise}] {bar:60.cyan/blue} {pos:>7}/{len:7} {msg}",
|
||||
)
|
||||
.unwrap()
|
||||
.progress_chars("##-");
|
||||
let multi_progress_bar = MultiProgress::new();
|
||||
let file_progress = multi_progress_bar.add(ProgressBar::new(file_list.len() as _));
|
||||
file_progress.inc(0);
|
||||
|
||||
let batch_size = args.batch_size;
|
||||
for _ in 0..args.thread_num {
|
||||
if let Some(path) = file_list.pop() {
|
||||
let db = Database::new(DATABASE_NAME, client.clone());
|
||||
let mpb = multi_progress_bar.clone();
|
||||
let pb_style = progress_bar_style.clone();
|
||||
write_jobs.spawn(async move { write_data(batch_size, &db, path, mpb, pb_style).await });
|
||||
}
|
||||
}
|
||||
while write_jobs.join_next().await.is_some() {
|
||||
file_progress.inc(1);
|
||||
if let Some(path) = file_list.pop() {
|
||||
let db = Database::new(DATABASE_NAME, client.clone());
|
||||
let mpb = multi_progress_bar.clone();
|
||||
let pb_style = progress_bar_style.clone();
|
||||
write_jobs.spawn(async move { write_data(batch_size, &db, path, mpb, pb_style).await });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn do_query(num_iter: usize, db: &Database) {
|
||||
for (query_name, query) in query_set() {
|
||||
println!("Running query: {}", query);
|
||||
for i in 0..num_iter {
|
||||
let now = Instant::now();
|
||||
let _res = db.select(Select::Sql(query.clone())).await.unwrap();
|
||||
let elapsed = now.elapsed();
|
||||
println!(
|
||||
"query {}, iteration {}: {}ms",
|
||||
query_name,
|
||||
i,
|
||||
elapsed.as_millis()
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let args = Args::parse();
|
||||
|
||||
tokio::runtime::Builder::new_multi_thread()
|
||||
.worker_threads(args.thread_num)
|
||||
.enable_all()
|
||||
.build()
|
||||
.unwrap()
|
||||
.block_on(async {
|
||||
let client = Client::with_urls(vec![&args.endpoint]);
|
||||
|
||||
if !args.skip_write {
|
||||
do_write(&args, &client).await;
|
||||
}
|
||||
|
||||
if !args.skip_read {
|
||||
let db = Database::new(DATABASE_NAME, client.clone());
|
||||
do_query(args.iter_num, &db).await;
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -1,8 +1,9 @@
|
||||
node_id = 42
|
||||
http_addr = '0.0.0.0:3000'
|
||||
rpc_addr = '0.0.0.0:3001'
|
||||
wal_dir = '/tmp/greptimedb/wal'
|
||||
rpc_runtime_size = 8
|
||||
|
||||
mode = "standalone"
|
||||
mysql_addr = '0.0.0.0:3306'
|
||||
mysql_runtime_size = 4
|
||||
|
||||
@@ -13,3 +14,9 @@ postgres_runtime_size = 4
|
||||
[storage]
|
||||
type = 'File'
|
||||
data_dir = '/tmp/greptimedb/data/'
|
||||
|
||||
[meta_client_opts]
|
||||
metasrv_addr = "1.1.1.1:3002"
|
||||
timeout_millis = 3000
|
||||
connect_timeout_millis = 5000
|
||||
tcp_nodelay = true
|
||||
|
||||
@@ -5,6 +5,8 @@ edition = "2021"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
common-base = { path = "../common/base" }
|
||||
common-time = { path = "../common/time" }
|
||||
datatypes = { path = "../datatypes" }
|
||||
prost = "0.11"
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
|
||||
@@ -8,3 +8,7 @@ message InsertBatch {
|
||||
repeated Column columns = 1;
|
||||
uint32 row_count = 2;
|
||||
}
|
||||
|
||||
message RegionId {
|
||||
uint64 id = 1;
|
||||
}
|
||||
|
||||
@@ -1,8 +1,13 @@
|
||||
use common_base::BitVec;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::VectorRef;
|
||||
use snafu::prelude::*;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::v1::column::Values;
|
||||
use crate::v1::Column;
|
||||
use crate::v1::ColumnDataType;
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
@@ -143,8 +148,47 @@ impl Values {
|
||||
}
|
||||
}
|
||||
|
||||
impl Column {
|
||||
// The type of vals must be same.
|
||||
pub fn push_vals(&mut self, origin_count: usize, vector: VectorRef) {
|
||||
let values = self.values.get_or_insert_with(Values::default);
|
||||
let mut null_mask = BitVec::from_slice(&self.null_mask);
|
||||
let len = vector.len();
|
||||
null_mask.reserve_exact(origin_count + len);
|
||||
null_mask.extend(BitVec::repeat(false, len));
|
||||
|
||||
(0..len).into_iter().for_each(|idx| match vector.get(idx) {
|
||||
Value::Null => null_mask.set(idx + origin_count, true),
|
||||
Value::Boolean(val) => values.bool_values.push(val),
|
||||
Value::UInt8(val) => values.u8_values.push(val.into()),
|
||||
Value::UInt16(val) => values.u16_values.push(val.into()),
|
||||
Value::UInt32(val) => values.u32_values.push(val),
|
||||
Value::UInt64(val) => values.u64_values.push(val),
|
||||
Value::Int8(val) => values.i8_values.push(val.into()),
|
||||
Value::Int16(val) => values.i16_values.push(val.into()),
|
||||
Value::Int32(val) => values.i32_values.push(val),
|
||||
Value::Int64(val) => values.i64_values.push(val),
|
||||
Value::Float32(val) => values.f32_values.push(*val),
|
||||
Value::Float64(val) => values.f64_values.push(*val),
|
||||
Value::String(val) => values.string_values.push(val.as_utf8().to_string()),
|
||||
Value::Binary(val) => values.binary_values.push(val.to_vec()),
|
||||
Value::Date(val) => values.date_values.push(val.val()),
|
||||
Value::DateTime(val) => values.datetime_values.push(val.val()),
|
||||
Value::Timestamp(val) => values
|
||||
.ts_millis_values
|
||||
.push(val.convert_to(TimeUnit::Millisecond)),
|
||||
Value::List(_) => unreachable!(),
|
||||
});
|
||||
self.null_mask = null_mask.into_vec();
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use datatypes::vectors::BooleanVector;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
@@ -358,4 +402,29 @@ mod tests {
|
||||
"Failed to create column datatype from List(ListType { inner: Boolean(BooleanType) })"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_put_vector() {
|
||||
use crate::v1::column::SemanticType;
|
||||
// Some(false), None, Some(true), Some(true)
|
||||
let mut column = Column {
|
||||
column_name: "test".to_string(),
|
||||
semantic_type: SemanticType::Field as i32,
|
||||
values: Some(Values {
|
||||
bool_values: vec![false, true, true],
|
||||
..Default::default()
|
||||
}),
|
||||
null_mask: vec![2],
|
||||
datatype: ColumnDataType::Boolean as i32,
|
||||
};
|
||||
let row_count = 4;
|
||||
|
||||
let vector = Arc::new(BooleanVector::from(vec![Some(true), None, Some(false)]));
|
||||
column.push_vals(row_count, vector);
|
||||
// Some(false), None, Some(true), Some(true), Some(true), None, Some(false)
|
||||
let bool_values = column.values.unwrap().bool_values;
|
||||
assert_eq!(vec![false, true, true, true, false], bool_values);
|
||||
let null_mask = column.null_mask;
|
||||
assert_eq!(34, null_mask[0]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
pub use prost::DecodeError;
|
||||
use prost::Message;
|
||||
|
||||
use crate::v1::codec::{InsertBatch, PhysicalPlanNode, SelectResult};
|
||||
use crate::v1::codec::{InsertBatch, PhysicalPlanNode, RegionId, SelectResult};
|
||||
|
||||
macro_rules! impl_convert_with_bytes {
|
||||
($data_type: ty) => {
|
||||
@@ -24,6 +24,7 @@ macro_rules! impl_convert_with_bytes {
|
||||
impl_convert_with_bytes!(InsertBatch);
|
||||
impl_convert_with_bytes!(SelectResult);
|
||||
impl_convert_with_bytes!(PhysicalPlanNode);
|
||||
impl_convert_with_bytes!(RegionId);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
@@ -127,6 +128,16 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_convert_region_id() {
|
||||
let region_id = RegionId { id: 12 };
|
||||
|
||||
let bytes: Vec<u8> = region_id.into();
|
||||
let region_id: RegionId = bytes.deref().try_into().unwrap();
|
||||
|
||||
assert_eq!(12, region_id.id);
|
||||
}
|
||||
|
||||
fn mock_insert_batch() -> InsertBatch {
|
||||
let values = column::Values {
|
||||
i32_values: vec![2, 3, 4, 5, 6, 7, 8],
|
||||
|
||||
@@ -9,6 +9,7 @@ api = { path = "../api" }
|
||||
arc-swap = "1.0"
|
||||
async-stream = "0.3"
|
||||
async-trait = "0.1"
|
||||
backoff = { version = "0.4", features = ["tokio"] }
|
||||
common-catalog = { path = "../common/catalog" }
|
||||
common-error = { path = "../common/error" }
|
||||
common-grpc = { path = "../common/grpc" }
|
||||
|
||||
@@ -4,6 +4,7 @@ use common_error::ext::{BoxedError, ErrorExt};
|
||||
use common_error::prelude::{Snafu, StatusCode};
|
||||
use datafusion::error::DataFusionError;
|
||||
use datatypes::arrow;
|
||||
use datatypes::schema::RawSchema;
|
||||
use snafu::{Backtrace, ErrorCompat};
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
@@ -110,6 +111,19 @@ pub enum Error {
|
||||
source: table::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Invalid table schema in catalog entry, table:{}, schema: {:?}, source: {}",
|
||||
table_info,
|
||||
schema,
|
||||
source
|
||||
))]
|
||||
InvalidTableSchema {
|
||||
table_info: String,
|
||||
schema: RawSchema,
|
||||
#[snafu(backtrace)]
|
||||
source: datatypes::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to execute system catalog table scan, source: {}", source))]
|
||||
SystemCatalogTableScanExec {
|
||||
#[snafu(backtrace)]
|
||||
@@ -135,6 +149,12 @@ pub enum Error {
|
||||
#[snafu(backtrace)]
|
||||
source: meta_client::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to bump table id"))]
|
||||
BumpTableId { msg: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Failed to parse table id from metasrv, data: {:?}", data))]
|
||||
ParseTableId { data: String, backtrace: Backtrace },
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -170,6 +190,10 @@ impl ErrorExt for Error {
|
||||
Error::MetaSrv { source, .. } => source.status_code(),
|
||||
Error::SystemCatalogTableScan { source } => source.status_code(),
|
||||
Error::SystemCatalogTableScanExec { source } => source.status_code(),
|
||||
Error::InvalidTableSchema { source, .. } => source.status_code(),
|
||||
Error::BumpTableId { .. } | Error::ParseTableId { .. } => {
|
||||
StatusCode::StorageUnavailable
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -67,32 +67,27 @@ pub type CatalogProviderRef = Arc<dyn CatalogProvider>;
|
||||
#[async_trait::async_trait]
|
||||
pub trait CatalogManager: CatalogList {
|
||||
/// Starts a catalog manager.
|
||||
async fn start(&self) -> error::Result<()>;
|
||||
async fn start(&self) -> Result<()>;
|
||||
|
||||
/// Returns next available table id.
|
||||
fn next_table_id(&self) -> TableId;
|
||||
async fn next_table_id(&self) -> Result<TableId>;
|
||||
|
||||
/// Registers a table given given catalog/schema to catalog manager,
|
||||
/// returns table registered.
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> error::Result<usize>;
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<usize>;
|
||||
|
||||
/// Register a system table, should be called before starting the manager.
|
||||
async fn register_system_table(&self, request: RegisterSystemTableRequest)
|
||||
-> error::Result<()>;
|
||||
|
||||
/// Returns the table by catalog, schema and table name.
|
||||
fn table(
|
||||
&self,
|
||||
catalog: &str,
|
||||
schema: &str,
|
||||
table_name: &str,
|
||||
) -> error::Result<Option<TableRef>>;
|
||||
fn table(&self, catalog: &str, schema: &str, table_name: &str) -> Result<Option<TableRef>>;
|
||||
}
|
||||
|
||||
pub type CatalogManagerRef = Arc<dyn CatalogManager>;
|
||||
|
||||
/// Hook called after system table opening.
|
||||
pub type OpenSystemTableHook = Arc<dyn Fn(TableRef) -> error::Result<()> + Send + Sync>;
|
||||
pub type OpenSystemTableHook = Arc<dyn Fn(TableRef) -> Result<()> + Send + Sync>;
|
||||
|
||||
/// Register system table request:
|
||||
/// - When system table is already created and registered, the hook will be called
|
||||
|
||||
@@ -7,7 +7,7 @@ use common_catalog::consts::{
|
||||
SYSTEM_CATALOG_NAME, SYSTEM_CATALOG_TABLE_NAME,
|
||||
};
|
||||
use common_recordbatch::RecordBatch;
|
||||
use common_telemetry::{debug, info};
|
||||
use common_telemetry::info;
|
||||
use datatypes::prelude::ScalarVector;
|
||||
use datatypes::vectors::{BinaryVector, UInt8Vector};
|
||||
use futures_util::lock::Mutex;
|
||||
@@ -183,7 +183,6 @@ impl LocalCatalogManager {
|
||||
info!("Registered schema: {:?}", s);
|
||||
}
|
||||
Entry::Table(t) => {
|
||||
debug!("t: {:?}", t);
|
||||
self.open_and_register_table(&t).await?;
|
||||
info!("Registered table: {:?}", t);
|
||||
max_table_id = max_table_id.max(t.table_id);
|
||||
@@ -273,8 +272,8 @@ impl CatalogManager for LocalCatalogManager {
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn next_table_id(&self) -> TableId {
|
||||
self.next_table_id.fetch_add(1, Ordering::Relaxed)
|
||||
async fn next_table_id(&self) -> Result<TableId> {
|
||||
Ok(self.next_table_id.fetch_add(1, Ordering::Relaxed))
|
||||
}
|
||||
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<usize> {
|
||||
|
||||
@@ -19,13 +19,27 @@ pub type ValueIter<'a, E> = Pin<Box<dyn Stream<Item = Result<Kv, E>> + Send + 'a
|
||||
|
||||
#[async_trait::async_trait]
|
||||
pub trait KvBackend: Send + Sync {
|
||||
fn range<'a, 'b>(&'a self, key: &[u8]) -> ValueIter<'b, crate::error::Error>
|
||||
fn range<'a, 'b>(&'a self, key: &[u8]) -> ValueIter<'b, Error>
|
||||
where
|
||||
'a: 'b;
|
||||
|
||||
async fn set(&self, key: &[u8], val: &[u8]) -> Result<(), crate::error::Error>;
|
||||
async fn set(&self, key: &[u8], val: &[u8]) -> Result<(), Error>;
|
||||
|
||||
async fn delete_range(&self, key: &[u8], end: &[u8]) -> Result<(), crate::error::Error>;
|
||||
/// Compare and set value of key. `expect` is the expected value, if backend's current value associated
|
||||
/// with key is the same as `expect`, the value will be updated to `val`.
|
||||
///
|
||||
/// - If the compare-and-set operation successfully updated value, this method will return an `Ok(Ok())`
|
||||
/// - If associated value is not the same as `expect`, no value will be updated and an `Ok(Err(Vec<u8>))`
|
||||
/// will be returned, the `Err(Vec<u8>)` indicates the current associated value of key.
|
||||
/// - If any error happens during operation, an `Err(Error)` will be returned.
|
||||
async fn compare_and_set(
|
||||
&self,
|
||||
key: &[u8],
|
||||
expect: &[u8],
|
||||
val: &[u8],
|
||||
) -> Result<Result<(), Option<Vec<u8>>>, Error>;
|
||||
|
||||
async fn delete_range(&self, key: &[u8], end: &[u8]) -> Result<(), Error>;
|
||||
|
||||
async fn delete(&self, key: &[u8]) -> Result<(), Error> {
|
||||
self.delete_range(key, &[]).await
|
||||
@@ -74,6 +88,15 @@ mod tests {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn compare_and_set(
|
||||
&self,
|
||||
_key: &[u8],
|
||||
_expect: &[u8],
|
||||
_val: &[u8],
|
||||
) -> Result<Result<(), Option<Vec<u8>>>, Error> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn delete_range(&self, _key: &[u8], _end: &[u8]) -> Result<(), Error> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@ use std::fmt::Debug;
|
||||
use async_stream::stream;
|
||||
use common_telemetry::info;
|
||||
use meta_client::client::MetaClient;
|
||||
use meta_client::rpc::{DeleteRangeRequest, PutRequest, RangeRequest};
|
||||
use meta_client::rpc::{CompareAndPutRequest, DeleteRangeRequest, PutRequest, RangeRequest};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{Error, MetaSrvSnafu};
|
||||
@@ -68,4 +68,26 @@ impl KvBackend for MetaKvBackend {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn compare_and_set(
|
||||
&self,
|
||||
key: &[u8],
|
||||
expect: &[u8],
|
||||
val: &[u8],
|
||||
) -> Result<Result<(), Option<Vec<u8>>>, Error> {
|
||||
let request = CompareAndPutRequest::new()
|
||||
.with_key(key.to_vec())
|
||||
.with_expect(expect.to_vec())
|
||||
.with_value(val.to_vec());
|
||||
let mut response = self
|
||||
.client
|
||||
.compare_and_put(request)
|
||||
.await
|
||||
.context(MetaSrvSnafu)?;
|
||||
if response.is_success() {
|
||||
Ok(Ok(()))
|
||||
} else {
|
||||
Ok(Err(response.take_prev_kv().map(|v| v.value().to_vec())))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,32 +1,34 @@
|
||||
use std::any::Any;
|
||||
use std::collections::HashMap;
|
||||
use std::pin::Pin;
|
||||
use std::sync::atomic::{AtomicU32, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use arc_swap::ArcSwap;
|
||||
use async_stream::stream;
|
||||
use backoff::exponential::ExponentialBackoffBuilder;
|
||||
use backoff::ExponentialBackoff;
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MIN_USER_TABLE_ID};
|
||||
use common_catalog::{
|
||||
build_catalog_prefix, build_schema_prefix, build_table_prefix, CatalogKey, CatalogValue,
|
||||
SchemaKey, SchemaValue, TableKey, TableValue,
|
||||
build_catalog_prefix, build_schema_prefix, build_table_global_prefix, CatalogKey, CatalogValue,
|
||||
SchemaKey, SchemaValue, TableGlobalKey, TableGlobalValue, TableRegionalKey, TableRegionalValue,
|
||||
};
|
||||
use common_telemetry::{debug, info};
|
||||
use datatypes::schema::Schema;
|
||||
use common_telemetry::{debug, error, info};
|
||||
use futures::Stream;
|
||||
use futures_util::StreamExt;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use table::engine::{EngineContext, TableEngineRef};
|
||||
use table::metadata::{TableId, TableVersion};
|
||||
use table::metadata::TableId;
|
||||
use table::requests::{CreateTableRequest, OpenTableRequest};
|
||||
use table::table::numbers::NumbersTable;
|
||||
use table::TableRef;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::error::{
|
||||
CatalogNotFoundSnafu, CreateTableSnafu, InvalidCatalogValueSnafu, OpenTableSnafu,
|
||||
SchemaNotFoundSnafu, TableExistsSnafu,
|
||||
BumpTableIdSnafu, CatalogNotFoundSnafu, CreateTableSnafu, InvalidCatalogValueSnafu,
|
||||
OpenTableSnafu, ParseTableIdSnafu, SchemaNotFoundSnafu, TableExistsSnafu,
|
||||
};
|
||||
use crate::error::{InvalidTableSchemaSnafu, Result};
|
||||
use crate::remote::{Kv, KvBackendRef};
|
||||
use crate::{
|
||||
handle_system_table_request, CatalogList, CatalogManager, CatalogProvider, CatalogProviderRef,
|
||||
@@ -38,7 +40,6 @@ pub struct RemoteCatalogManager {
|
||||
node_id: u64,
|
||||
backend: KvBackendRef,
|
||||
catalogs: Arc<ArcSwap<HashMap<String, CatalogProviderRef>>>,
|
||||
next_table_id: Arc<AtomicU32>,
|
||||
engine: TableEngineRef,
|
||||
system_table_requests: Mutex<Vec<RegisterSystemTableRequest>>,
|
||||
mutex: Arc<Mutex<()>>,
|
||||
@@ -51,7 +52,6 @@ impl RemoteCatalogManager {
|
||||
node_id,
|
||||
backend,
|
||||
catalogs: Default::default(),
|
||||
next_table_id: Default::default(),
|
||||
system_table_requests: Default::default(),
|
||||
mutex: Default::default(),
|
||||
}
|
||||
@@ -60,14 +60,12 @@ impl RemoteCatalogManager {
|
||||
fn build_catalog_key(&self, catalog_name: impl AsRef<str>) -> CatalogKey {
|
||||
CatalogKey {
|
||||
catalog_name: catalog_name.as_ref().to_string(),
|
||||
node_id: self.node_id,
|
||||
}
|
||||
}
|
||||
|
||||
fn new_catalog_provider(&self, catalog_name: &str) -> CatalogProviderRef {
|
||||
Arc::new(RemoteCatalogProvider {
|
||||
catalog_name: catalog_name.to_string(),
|
||||
node_id: self.node_id,
|
||||
backend: self.backend.clone(),
|
||||
schemas: Default::default(),
|
||||
mutex: Default::default(),
|
||||
@@ -100,9 +98,7 @@ impl RemoteCatalogManager {
|
||||
}
|
||||
let key = CatalogKey::parse(&String::from_utf8_lossy(&k))
|
||||
.context(InvalidCatalogValueSnafu)?;
|
||||
if key.node_id == self.node_id {
|
||||
yield Ok(key)
|
||||
}
|
||||
yield Ok(key)
|
||||
}
|
||||
}))
|
||||
}
|
||||
@@ -124,10 +120,7 @@ impl RemoteCatalogManager {
|
||||
|
||||
let schema_key = SchemaKey::parse(&String::from_utf8_lossy(&k))
|
||||
.context(InvalidCatalogValueSnafu)?;
|
||||
|
||||
if schema_key.node_id == self.node_id {
|
||||
yield Ok(schema_key)
|
||||
}
|
||||
yield Ok(schema_key)
|
||||
}
|
||||
}))
|
||||
}
|
||||
@@ -139,8 +132,8 @@ impl RemoteCatalogManager {
|
||||
&self,
|
||||
catalog_name: &str,
|
||||
schema_name: &str,
|
||||
) -> Pin<Box<dyn Stream<Item = Result<(TableKey, TableValue)>> + Send + '_>> {
|
||||
let table_prefix = build_table_prefix(catalog_name, schema_name);
|
||||
) -> Pin<Box<dyn Stream<Item = Result<(TableGlobalKey, TableGlobalValue)>> + Send + '_>> {
|
||||
let table_prefix = build_table_global_prefix(catalog_name, schema_name);
|
||||
let mut tables = self.backend.range(table_prefix.as_bytes());
|
||||
Box::pin(stream!({
|
||||
while let Some(r) = tables.next().await {
|
||||
@@ -149,12 +142,22 @@ impl RemoteCatalogManager {
|
||||
debug!("Ignoring non-table prefix: {}", String::from_utf8_lossy(&k));
|
||||
continue;
|
||||
}
|
||||
let table_key = TableKey::parse(&String::from_utf8_lossy(&k))
|
||||
let table_key = TableGlobalKey::parse(&String::from_utf8_lossy(&k))
|
||||
.context(InvalidCatalogValueSnafu)?;
|
||||
let table_value = TableValue::parse(&String::from_utf8_lossy(&v))
|
||||
let table_value = TableGlobalValue::parse(&String::from_utf8_lossy(&v))
|
||||
.context(InvalidCatalogValueSnafu)?;
|
||||
|
||||
if table_value.node_id == self.node_id {
|
||||
debug!(
|
||||
"Found catalog table entry, key: {}, value: {:?}",
|
||||
table_key, table_value
|
||||
);
|
||||
// metasrv has allocated region ids to current datanode
|
||||
if table_value
|
||||
.regions_id_map
|
||||
.get(&self.node_id)
|
||||
.map(|v| !v.is_empty())
|
||||
.unwrap_or(false)
|
||||
{
|
||||
yield Ok((table_key, table_value))
|
||||
}
|
||||
}
|
||||
@@ -164,7 +167,7 @@ impl RemoteCatalogManager {
|
||||
/// Fetch catalogs/schemas/tables from remote catalog manager along with max table id allocated.
|
||||
async fn initiate_catalogs(&self) -> Result<(HashMap<String, CatalogProviderRef>, TableId)> {
|
||||
let mut res = HashMap::new();
|
||||
let max_table_id = MIN_USER_TABLE_ID;
|
||||
let max_table_id = MIN_USER_TABLE_ID - 1;
|
||||
|
||||
// initiate default catalog and schema
|
||||
let default_catalog = self.initiate_default_catalog().await?;
|
||||
@@ -246,18 +249,17 @@ impl RemoteCatalogManager {
|
||||
async fn initiate_default_catalog(&self) -> Result<CatalogProviderRef> {
|
||||
let default_catalog = self.new_catalog_provider(DEFAULT_CATALOG_NAME);
|
||||
let default_schema = self.new_schema_provider(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME);
|
||||
default_catalog.register_schema(DEFAULT_SCHEMA_NAME.to_string(), default_schema)?;
|
||||
default_catalog.register_schema(DEFAULT_SCHEMA_NAME.to_string(), default_schema.clone())?;
|
||||
let schema_key = SchemaKey {
|
||||
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
|
||||
node_id: self.node_id,
|
||||
}
|
||||
.to_string();
|
||||
self.backend
|
||||
.set(
|
||||
schema_key.as_bytes(),
|
||||
&SchemaValue {}
|
||||
.to_bytes()
|
||||
.as_bytes()
|
||||
.context(InvalidCatalogValueSnafu)?,
|
||||
)
|
||||
.await?;
|
||||
@@ -265,14 +267,13 @@ impl RemoteCatalogManager {
|
||||
|
||||
let catalog_key = CatalogKey {
|
||||
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
|
||||
node_id: self.node_id,
|
||||
}
|
||||
.to_string();
|
||||
self.backend
|
||||
.set(
|
||||
catalog_key.as_bytes(),
|
||||
&CatalogValue {}
|
||||
.to_bytes()
|
||||
.as_bytes()
|
||||
.context(InvalidCatalogValueSnafu)?,
|
||||
)
|
||||
.await?;
|
||||
@@ -282,18 +283,23 @@ impl RemoteCatalogManager {
|
||||
|
||||
async fn open_or_create_table(
|
||||
&self,
|
||||
table_key: &TableKey,
|
||||
table_value: &TableValue,
|
||||
table_key: &TableGlobalKey,
|
||||
table_value: &TableGlobalValue,
|
||||
) -> Result<TableRef> {
|
||||
let context = EngineContext {};
|
||||
let TableKey {
|
||||
let TableGlobalKey {
|
||||
catalog_name,
|
||||
schema_name,
|
||||
table_name,
|
||||
..
|
||||
} = table_key;
|
||||
|
||||
let TableValue { id, meta, .. } = table_value;
|
||||
let TableGlobalValue {
|
||||
id,
|
||||
meta,
|
||||
regions_id_map,
|
||||
..
|
||||
} = table_value;
|
||||
|
||||
let request = OpenTableRequest {
|
||||
catalog_name: catalog_name.clone(),
|
||||
@@ -310,13 +316,22 @@ impl RemoteCatalogManager {
|
||||
})? {
|
||||
Some(table) => Ok(table),
|
||||
None => {
|
||||
let schema = meta
|
||||
.schema
|
||||
.clone()
|
||||
.try_into()
|
||||
.context(InvalidTableSchemaSnafu {
|
||||
table_info: format!("{}.{}.{}", catalog_name, schema_name, table_name,),
|
||||
schema: meta.schema.clone(),
|
||||
})?;
|
||||
let req = CreateTableRequest {
|
||||
id: *id,
|
||||
catalog_name: catalog_name.clone(),
|
||||
schema_name: schema_name.clone(),
|
||||
table_name: table_name.clone(),
|
||||
desc: None,
|
||||
schema: Arc::new(Schema::new(meta.schema.column_schemas.clone())),
|
||||
schema: Arc::new(schema),
|
||||
region_numbers: regions_id_map.get(&self.node_id).unwrap().clone(), // this unwrap is safe because region_id_map is checked in `iter_remote_tables`
|
||||
primary_key_indices: meta.primary_key_indices.clone(),
|
||||
create_if_not_exists: true,
|
||||
table_options: meta.options.clone(),
|
||||
@@ -345,18 +360,78 @@ impl CatalogManager for RemoteCatalogManager {
|
||||
catalogs.keys().cloned().collect::<Vec<_>>()
|
||||
);
|
||||
self.catalogs.store(Arc::new(catalogs));
|
||||
self.next_table_id
|
||||
.store(max_table_id + 1, Ordering::Relaxed);
|
||||
info!("Max table id allocated: {}", max_table_id);
|
||||
|
||||
let mut system_table_requests = self.system_table_requests.lock().await;
|
||||
handle_system_table_request(self, self.engine.clone(), &mut system_table_requests).await?;
|
||||
info!("All system table opened");
|
||||
|
||||
self.catalog(DEFAULT_CATALOG_NAME)
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
.schema(DEFAULT_SCHEMA_NAME)
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
.register_table("numbers".to_string(), Arc::new(NumbersTable::default()))
|
||||
.unwrap();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn next_table_id(&self) -> TableId {
|
||||
self.next_table_id.fetch_add(1, Ordering::Relaxed)
|
||||
/// Bump table id in a CAS manner with backoff.
|
||||
async fn next_table_id(&self) -> Result<TableId> {
|
||||
let key = common_catalog::consts::TABLE_ID_KEY_PREFIX.as_bytes();
|
||||
let op = || async {
|
||||
// TODO(hl): optimize this get
|
||||
let (prev, prev_bytes) = match self.backend.get(key).await? {
|
||||
None => (MIN_USER_TABLE_ID, vec![]),
|
||||
Some(kv) => (parse_table_id(&kv.1)?, kv.1),
|
||||
};
|
||||
|
||||
match self
|
||||
.backend
|
||||
.compare_and_set(key, &prev_bytes, &(prev + 1).to_le_bytes())
|
||||
.await
|
||||
{
|
||||
Ok(cas_res) => match cas_res {
|
||||
Ok(_) => Ok(prev),
|
||||
Err(e) => {
|
||||
info!("Table id {:?} already occupied", e);
|
||||
Err(backoff::Error::transient(
|
||||
BumpTableIdSnafu {
|
||||
msg: "Table id occupied",
|
||||
}
|
||||
.build(),
|
||||
))
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
error!(e;"Failed to CAS table id");
|
||||
Err(backoff::Error::permanent(
|
||||
BumpTableIdSnafu {
|
||||
msg: format!("Failed to perform CAS operation: {:?}", e),
|
||||
}
|
||||
.build(),
|
||||
))
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let retry_policy: ExponentialBackoff = ExponentialBackoffBuilder::new()
|
||||
.with_initial_interval(Duration::from_millis(4))
|
||||
.with_multiplier(2.0)
|
||||
.with_max_interval(Duration::from_millis(1000))
|
||||
.with_max_elapsed_time(Some(Duration::from_millis(3000)))
|
||||
.build();
|
||||
|
||||
backoff::future::retry(retry_policy, op).await.map_err(|e| {
|
||||
BumpTableIdSnafu {
|
||||
msg: format!(
|
||||
"Bump table id exceeds max fail times, last error msg: {:?}",
|
||||
e
|
||||
),
|
||||
}
|
||||
.build()
|
||||
})
|
||||
}
|
||||
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<usize> {
|
||||
@@ -427,7 +502,7 @@ impl CatalogList for RemoteCatalogManager {
|
||||
.set(
|
||||
key.as_bytes(),
|
||||
&CatalogValue {}
|
||||
.to_bytes()
|
||||
.as_bytes()
|
||||
.context(InvalidCatalogValueSnafu)?,
|
||||
)
|
||||
.await?;
|
||||
@@ -456,17 +531,15 @@ impl CatalogList for RemoteCatalogManager {
|
||||
|
||||
pub struct RemoteCatalogProvider {
|
||||
catalog_name: String,
|
||||
node_id: u64,
|
||||
backend: KvBackendRef,
|
||||
schemas: Arc<ArcSwap<HashMap<String, SchemaProviderRef>>>,
|
||||
mutex: Arc<Mutex<()>>,
|
||||
}
|
||||
|
||||
impl RemoteCatalogProvider {
|
||||
pub fn new(catalog_name: String, node_id: u64, backend: KvBackendRef) -> Self {
|
||||
pub fn new(catalog_name: String, backend: KvBackendRef) -> Self {
|
||||
Self {
|
||||
catalog_name,
|
||||
node_id,
|
||||
backend,
|
||||
schemas: Default::default(),
|
||||
mutex: Default::default(),
|
||||
@@ -477,7 +550,6 @@ impl RemoteCatalogProvider {
|
||||
SchemaKey {
|
||||
catalog_name: self.catalog_name.clone(),
|
||||
schema_name: schema_name.as_ref().to_string(),
|
||||
node_id: self.node_id,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -508,10 +580,11 @@ impl CatalogProvider for RemoteCatalogProvider {
|
||||
.set(
|
||||
key.as_bytes(),
|
||||
&SchemaValue {}
|
||||
.to_bytes()
|
||||
.as_bytes()
|
||||
.context(InvalidCatalogValueSnafu)?,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let prev_schemas = schemas.load();
|
||||
let mut new_schemas = HashMap::with_capacity(prev_schemas.len() + 1);
|
||||
new_schemas.clone_from(&prev_schemas);
|
||||
@@ -529,6 +602,16 @@ impl CatalogProvider for RemoteCatalogProvider {
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse u8 slice to `TableId`
|
||||
fn parse_table_id(val: &[u8]) -> Result<TableId> {
|
||||
Ok(TableId::from_le_bytes(val.try_into().map_err(|_| {
|
||||
ParseTableIdSnafu {
|
||||
data: format!("{:?}", val),
|
||||
}
|
||||
.build()
|
||||
})?))
|
||||
}
|
||||
|
||||
pub struct RemoteSchemaProvider {
|
||||
catalog_name: String,
|
||||
schema_name: String,
|
||||
@@ -555,16 +638,11 @@ impl RemoteSchemaProvider {
|
||||
}
|
||||
}
|
||||
|
||||
fn build_table_key(
|
||||
&self,
|
||||
table_name: impl AsRef<str>,
|
||||
table_version: TableVersion,
|
||||
) -> TableKey {
|
||||
TableKey {
|
||||
fn build_regional_table_key(&self, table_name: impl AsRef<str>) -> TableRegionalKey {
|
||||
TableRegionalKey {
|
||||
catalog_name: self.catalog_name.clone(),
|
||||
schema_name: self.schema_name.clone(),
|
||||
table_name: table_name.as_ref().to_string(),
|
||||
version: table_version,
|
||||
node_id: self.node_id,
|
||||
}
|
||||
}
|
||||
@@ -586,19 +664,14 @@ impl SchemaProvider for RemoteSchemaProvider {
|
||||
fn register_table(&self, name: String, table: TableRef) -> Result<Option<TableRef>> {
|
||||
let table_info = table.table_info();
|
||||
let table_version = table_info.ident.version;
|
||||
let table_value = TableValue {
|
||||
meta: table_info.meta.clone().into(),
|
||||
id: table_info.ident.table_id,
|
||||
node_id: self.node_id,
|
||||
regions_ids: vec![],
|
||||
let table_value = TableRegionalValue {
|
||||
version: table_version,
|
||||
regions_ids: table.table_info().meta.region_numbers.clone(),
|
||||
};
|
||||
let backend = self.backend.clone();
|
||||
let mutex = self.mutex.clone();
|
||||
let tables = self.tables.clone();
|
||||
|
||||
let table_key = self
|
||||
.build_table_key(name.clone(), table_version)
|
||||
.to_string();
|
||||
let table_key = self.build_regional_table_key(&name).to_string();
|
||||
|
||||
let prev = std::thread::spawn(move || {
|
||||
common_runtime::block_on_read(async move {
|
||||
@@ -628,18 +701,11 @@ impl SchemaProvider for RemoteSchemaProvider {
|
||||
}
|
||||
|
||||
fn deregister_table(&self, name: &str) -> Result<Option<TableRef>> {
|
||||
let table_version = match self.tables.load().get(name) {
|
||||
None => return Ok(None),
|
||||
Some(t) => t.table_info().ident.version,
|
||||
};
|
||||
|
||||
let table_name = name.to_string();
|
||||
let table_key = self.build_table_key(&table_name, table_version).to_string();
|
||||
|
||||
let table_key = self.build_regional_table_key(&table_name).to_string();
|
||||
let backend = self.backend.clone();
|
||||
let mutex = self.mutex.clone();
|
||||
let tables = self.tables.clone();
|
||||
|
||||
let prev = std::thread::spawn(move || {
|
||||
common_runtime::block_on_read(async move {
|
||||
let _guard = mutex.lock().await;
|
||||
@@ -667,3 +733,17 @@ impl SchemaProvider for RemoteSchemaProvider {
|
||||
Ok(self.tables.load().contains_key(name))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_table_id() {
|
||||
assert_eq!(12, parse_table_id(&12_i32.to_le_bytes()).unwrap());
|
||||
let mut data = vec![];
|
||||
data.extend_from_slice(&12_i32.to_le_bytes());
|
||||
data.push(0);
|
||||
assert!(parse_table_id(&data).is_err());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -96,6 +96,7 @@ impl SystemCatalogTable {
|
||||
table_name: SYSTEM_CATALOG_TABLE_NAME.to_string(),
|
||||
desc: Some("System catalog table".to_string()),
|
||||
schema: schema.clone(),
|
||||
region_numbers: vec![0],
|
||||
primary_key_indices: vec![ENTRY_TYPE_INDEX, KEY_INDEX, TIMESTAMP_INDEX],
|
||||
create_if_not_exists: true,
|
||||
table_options: HashMap::new(),
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use std::collections::btree_map::Entry;
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::fmt::{Display, Formatter};
|
||||
use std::str::FromStr;
|
||||
@@ -68,6 +69,34 @@ impl KvBackend for MockKvBackend {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn compare_and_set(
|
||||
&self,
|
||||
key: &[u8],
|
||||
expect: &[u8],
|
||||
val: &[u8],
|
||||
) -> Result<Result<(), Option<Vec<u8>>>, Error> {
|
||||
let mut map = self.map.write().await;
|
||||
let existing = map.entry(key.to_vec());
|
||||
match existing {
|
||||
Entry::Vacant(e) => {
|
||||
if expect.is_empty() {
|
||||
e.insert(val.to_vec());
|
||||
Ok(Ok(()))
|
||||
} else {
|
||||
Ok(Err(None))
|
||||
}
|
||||
}
|
||||
Entry::Occupied(mut existing) => {
|
||||
if existing.get() == expect {
|
||||
existing.insert(val.to_vec());
|
||||
Ok(Ok(()))
|
||||
} else {
|
||||
Ok(Err(Some(existing.get().clone())))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn delete_range(&self, key: &[u8], end: &[u8]) -> Result<(), Error> {
|
||||
let start = key.to_vec();
|
||||
let end = end.to_vec();
|
||||
|
||||
@@ -12,7 +12,7 @@ mod tests {
|
||||
KvBackend, KvBackendRef, RemoteCatalogManager, RemoteCatalogProvider, RemoteSchemaProvider,
|
||||
};
|
||||
use catalog::{CatalogManager, CatalogManagerRef, RegisterTableRequest};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MIN_USER_TABLE_ID};
|
||||
use common_catalog::{CatalogKey, CatalogValue, SchemaKey, SchemaValue};
|
||||
use datatypes::schema::Schema;
|
||||
use futures_util::StreamExt;
|
||||
@@ -24,19 +24,17 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_backend() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let node_id = 42;
|
||||
let backend = MockKvBackend::default();
|
||||
|
||||
let default_catalog_key = CatalogKey {
|
||||
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
|
||||
node_id,
|
||||
}
|
||||
.to_string();
|
||||
|
||||
backend
|
||||
.set(
|
||||
default_catalog_key.as_bytes(),
|
||||
&CatalogValue {}.to_bytes().unwrap(),
|
||||
&CatalogValue {}.as_bytes().unwrap(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -44,11 +42,10 @@ mod tests {
|
||||
let schema_key = SchemaKey {
|
||||
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
node_id,
|
||||
}
|
||||
.to_string();
|
||||
backend
|
||||
.set(schema_key.as_bytes(), &SchemaValue {}.to_bytes().unwrap())
|
||||
.set(schema_key.as_bytes(), &SchemaValue {}.as_bytes().unwrap())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
@@ -59,7 +56,7 @@ mod tests {
|
||||
res.insert(String::from_utf8_lossy(&kv.0).to_string());
|
||||
}
|
||||
assert_eq!(
|
||||
vec!["__c-greptime-42".to_string()],
|
||||
vec!["__c-greptime".to_string()],
|
||||
res.into_iter().collect::<Vec<_>>()
|
||||
);
|
||||
}
|
||||
@@ -114,6 +111,7 @@ mod tests {
|
||||
table_name: table_name.clone(),
|
||||
desc: None,
|
||||
schema: table_schema.clone(),
|
||||
region_numbers: vec![0],
|
||||
primary_key_indices: vec![],
|
||||
create_if_not_exists: false,
|
||||
table_options: Default::default(),
|
||||
@@ -154,7 +152,7 @@ mod tests {
|
||||
.schema(DEFAULT_SCHEMA_NAME)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
assert_eq!(Vec::<String>::new(), default_schema.table_names().unwrap());
|
||||
assert_eq!(vec!["numbers"], default_schema.table_names().unwrap());
|
||||
|
||||
// register a new table with an nonexistent catalog
|
||||
let catalog_name = DEFAULT_CATALOG_NAME.to_string();
|
||||
@@ -173,6 +171,7 @@ mod tests {
|
||||
table_name: table_name.clone(),
|
||||
desc: None,
|
||||
schema: table_schema.clone(),
|
||||
region_numbers: vec![0],
|
||||
primary_key_indices: vec![],
|
||||
create_if_not_exists: false,
|
||||
table_options: Default::default(),
|
||||
@@ -188,7 +187,14 @@ mod tests {
|
||||
table,
|
||||
};
|
||||
assert_eq!(1, catalog_manager.register_table(reg_req).await.unwrap());
|
||||
assert_eq!(vec![table_name], default_schema.table_names().unwrap());
|
||||
assert_eq!(
|
||||
HashSet::from([table_name, "numbers".to_string()]),
|
||||
default_schema
|
||||
.table_names()
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.collect::<HashSet<_>>()
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -200,7 +206,6 @@ mod tests {
|
||||
let schema_name = "nonexistent_schema".to_string();
|
||||
let catalog = Arc::new(RemoteCatalogProvider::new(
|
||||
catalog_name.clone(),
|
||||
node_id,
|
||||
backend.clone(),
|
||||
));
|
||||
|
||||
@@ -225,6 +230,7 @@ mod tests {
|
||||
table_name: "".to_string(),
|
||||
desc: None,
|
||||
schema: Arc::new(Schema::new(vec![])),
|
||||
region_numbers: vec![0],
|
||||
primary_key_indices: vec![],
|
||||
create_if_not_exists: false,
|
||||
table_options: Default::default(),
|
||||
@@ -271,4 +277,19 @@ mod tests {
|
||||
new_catalog.schema_names().unwrap().into_iter().collect()
|
||||
)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_next_table_id() {
|
||||
let node_id = 42;
|
||||
let (_, _, catalog_manager) = prepare_components(node_id).await;
|
||||
assert_eq!(
|
||||
MIN_USER_TABLE_ID,
|
||||
catalog_manager.next_table_id().await.unwrap()
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
MIN_USER_TABLE_ID + 1,
|
||||
catalog_manager.next_table_id().await.unwrap()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,6 +4,8 @@ mod database;
|
||||
mod error;
|
||||
pub mod load_balance;
|
||||
|
||||
pub use api;
|
||||
|
||||
pub use self::{
|
||||
client::Client,
|
||||
database::{Database, ObjectResult, Select},
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
name = "cmd"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
default-run = "greptime"
|
||||
|
||||
[[bin]]
|
||||
name = "greptime"
|
||||
@@ -10,7 +11,9 @@ path = "src/bin/greptime.rs"
|
||||
[dependencies]
|
||||
clap = { version = "3.1", features = ["derive"] }
|
||||
common-error = { path = "../common/error" }
|
||||
common-telemetry = { path = "../common/telemetry", features = ["deadlock_detection"] }
|
||||
common-telemetry = { path = "../common/telemetry", features = [
|
||||
"deadlock_detection",
|
||||
] }
|
||||
datanode = { path = "../datanode" }
|
||||
frontend = { path = "../frontend" }
|
||||
futures = "0.3"
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
use clap::Parser;
|
||||
use common_telemetry::logging;
|
||||
use datanode::datanode::{Datanode, DatanodeOptions};
|
||||
use datanode::datanode::{Datanode, DatanodeOptions, Mode};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{Error, Result, StartDatanodeSnafu};
|
||||
use crate::error::{Error, MissingConfigSnafu, Result, StartDatanodeSnafu};
|
||||
use crate::toml_loader;
|
||||
|
||||
#[derive(Parser)]
|
||||
@@ -33,6 +33,8 @@ impl SubCommand {
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
struct StartCommand {
|
||||
#[clap(long)]
|
||||
node_id: Option<u64>,
|
||||
#[clap(long)]
|
||||
http_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
@@ -41,6 +43,8 @@ struct StartCommand {
|
||||
mysql_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
postgres_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
metasrv_addr: Option<String>,
|
||||
#[clap(short, long)]
|
||||
config_file: Option<String>,
|
||||
}
|
||||
@@ -84,6 +88,31 @@ impl TryFrom<StartCommand> for DatanodeOptions {
|
||||
opts.postgres_addr = addr;
|
||||
}
|
||||
|
||||
match (cmd.metasrv_addr, cmd.node_id) {
|
||||
(Some(meta_addr), Some(node_id)) => {
|
||||
// Running mode is only set to Distributed when
|
||||
// both metasrv addr and node id are set in
|
||||
// commandline options
|
||||
opts.meta_client_opts.metasrv_addr = meta_addr;
|
||||
opts.node_id = node_id;
|
||||
opts.mode = Mode::Distributed;
|
||||
}
|
||||
(None, None) => {
|
||||
opts.mode = Mode::Standalone;
|
||||
}
|
||||
(None, Some(_)) => {
|
||||
return MissingConfigSnafu {
|
||||
msg: "Missing metasrv address option",
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
(Some(_), None) => {
|
||||
return MissingConfigSnafu {
|
||||
msg: "Missing node id option",
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
Ok(opts)
|
||||
}
|
||||
}
|
||||
@@ -97,10 +126,12 @@ mod tests {
|
||||
#[test]
|
||||
fn test_read_from_config_file() {
|
||||
let cmd = StartCommand {
|
||||
node_id: None,
|
||||
http_addr: None,
|
||||
rpc_addr: None,
|
||||
mysql_addr: None,
|
||||
postgres_addr: None,
|
||||
metasrv_addr: None,
|
||||
config_file: Some(format!(
|
||||
"{}/../../config/datanode.example.toml",
|
||||
std::env::current_dir().unwrap().as_path().to_str().unwrap()
|
||||
@@ -112,6 +143,13 @@ mod tests {
|
||||
assert_eq!("/tmp/greptimedb/wal".to_string(), options.wal_dir);
|
||||
assert_eq!("0.0.0.0:3306".to_string(), options.mysql_addr);
|
||||
assert_eq!(4, options.mysql_runtime_size);
|
||||
assert_eq!(
|
||||
"1.1.1.1:3002".to_string(),
|
||||
options.meta_client_opts.metasrv_addr
|
||||
);
|
||||
assert_eq!(5000, options.meta_client_opts.connect_timeout_millis);
|
||||
assert_eq!(3000, options.meta_client_opts.timeout_millis);
|
||||
assert!(options.meta_client_opts.tcp_nodelay);
|
||||
|
||||
assert_eq!("0.0.0.0:5432".to_string(), options.postgres_addr);
|
||||
assert_eq!(4, options.postgres_runtime_size);
|
||||
@@ -122,4 +160,58 @@ mod tests {
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_try_from_cmd() {
|
||||
assert_eq!(
|
||||
Mode::Standalone,
|
||||
DatanodeOptions::try_from(StartCommand {
|
||||
node_id: None,
|
||||
http_addr: None,
|
||||
rpc_addr: None,
|
||||
mysql_addr: None,
|
||||
postgres_addr: None,
|
||||
metasrv_addr: None,
|
||||
config_file: None
|
||||
})
|
||||
.unwrap()
|
||||
.mode
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
Mode::Distributed,
|
||||
DatanodeOptions::try_from(StartCommand {
|
||||
node_id: Some(42),
|
||||
http_addr: None,
|
||||
rpc_addr: None,
|
||||
mysql_addr: None,
|
||||
postgres_addr: None,
|
||||
metasrv_addr: Some("127.0.0.1:3002".to_string()),
|
||||
config_file: None
|
||||
})
|
||||
.unwrap()
|
||||
.mode
|
||||
);
|
||||
|
||||
assert!(DatanodeOptions::try_from(StartCommand {
|
||||
node_id: None,
|
||||
http_addr: None,
|
||||
rpc_addr: None,
|
||||
mysql_addr: None,
|
||||
postgres_addr: None,
|
||||
metasrv_addr: Some("127.0.0.1:3002".to_string()),
|
||||
config_file: None,
|
||||
})
|
||||
.is_err());
|
||||
assert!(DatanodeOptions::try_from(StartCommand {
|
||||
node_id: Some(42),
|
||||
http_addr: None,
|
||||
rpc_addr: None,
|
||||
mysql_addr: None,
|
||||
postgres_addr: None,
|
||||
metasrv_addr: None,
|
||||
config_file: None,
|
||||
})
|
||||
.is_err());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,6 +35,9 @@ pub enum Error {
|
||||
source: toml::de::Error,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Missing config, msg: {}", msg))]
|
||||
MissingConfig { msg: String, backtrace: Backtrace },
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -45,7 +48,9 @@ impl ErrorExt for Error {
|
||||
Error::StartDatanode { source } => source.status_code(),
|
||||
Error::StartFrontend { source } => source.status_code(),
|
||||
Error::StartMetaServer { source } => source.status_code(),
|
||||
Error::ReadConfig { .. } | Error::ParseConfig { .. } => StatusCode::InvalidArguments,
|
||||
Error::ReadConfig { .. } | Error::ParseConfig { .. } | Error::MissingConfig { .. } => {
|
||||
StatusCode::InvalidArguments
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ impl SubCommand {
|
||||
}
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
struct StartCommand {
|
||||
pub struct StartCommand {
|
||||
#[clap(long)]
|
||||
http_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
|
||||
@@ -14,4 +14,6 @@ pub const SCRIPTS_TABLE_ID: u32 = 1;
|
||||
|
||||
pub(crate) const CATALOG_KEY_PREFIX: &str = "__c";
|
||||
pub(crate) const SCHEMA_KEY_PREFIX: &str = "__s";
|
||||
pub(crate) const TABLE_KEY_PREFIX: &str = "__t";
|
||||
pub(crate) const TABLE_GLOBAL_KEY_PREFIX: &str = "__tg";
|
||||
pub(crate) const TABLE_REGIONAL_KEY_PREFIX: &str = "__tr";
|
||||
pub const TABLE_ID_KEY_PREFIX: &str = "__tid";
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::{Display, Formatter};
|
||||
use std::str::FromStr;
|
||||
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
@@ -7,29 +7,38 @@ use serde::{Deserialize, Serialize, Serializer};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use table::metadata::{RawTableMeta, TableId, TableVersion};
|
||||
|
||||
use crate::consts::{CATALOG_KEY_PREFIX, SCHEMA_KEY_PREFIX, TABLE_KEY_PREFIX};
|
||||
use crate::consts::{
|
||||
CATALOG_KEY_PREFIX, SCHEMA_KEY_PREFIX, TABLE_GLOBAL_KEY_PREFIX, TABLE_REGIONAL_KEY_PREFIX,
|
||||
};
|
||||
use crate::error::{
|
||||
DeserializeCatalogEntryValueSnafu, Error, InvalidCatalogSnafu, ParseNodeIdSnafu,
|
||||
SerializeCatalogEntryValueSnafu,
|
||||
DeserializeCatalogEntryValueSnafu, Error, InvalidCatalogSnafu, SerializeCatalogEntryValueSnafu,
|
||||
};
|
||||
|
||||
lazy_static! {
|
||||
static ref CATALOG_KEY_PATTERN: Regex =
|
||||
Regex::new(&format!("^{}-([a-zA-Z_]+)-([0-9]+)$", CATALOG_KEY_PREFIX)).unwrap();
|
||||
Regex::new(&format!("^{}-([a-zA-Z_]+)$", CATALOG_KEY_PREFIX)).unwrap();
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref SCHEMA_KEY_PATTERN: Regex = Regex::new(&format!(
|
||||
"^{}-([a-zA-Z_]+)-([a-zA-Z_]+)-([0-9]+)$",
|
||||
"^{}-([a-zA-Z_]+)-([a-zA-Z_]+)$",
|
||||
SCHEMA_KEY_PREFIX
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref TABLE_KEY_PATTERN: Regex = Regex::new(&format!(
|
||||
"^{}-([a-zA-Z_]+)-([a-zA-Z_]+)-([a-zA-Z_]+)-([0-9]+)-([0-9]+)$",
|
||||
TABLE_KEY_PREFIX
|
||||
static ref TABLE_GLOBAL_KEY_PATTERN: Regex = Regex::new(&format!(
|
||||
"^{}-([a-zA-Z_]+)-([a-zA-Z_]+)-([a-zA-Z_]+)$",
|
||||
TABLE_GLOBAL_KEY_PREFIX
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref TABLE_REGIONAL_KEY_PATTERN: Regex = Regex::new(&format!(
|
||||
"^{}-([a-zA-Z_]+)-([a-zA-Z_]+)-([a-zA-Z_]+)-([0-9]+)$",
|
||||
TABLE_REGIONAL_KEY_PREFIX
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
@@ -42,26 +51,92 @@ pub fn build_schema_prefix(catalog_name: impl AsRef<str>) -> String {
|
||||
format!("{}-{}-", SCHEMA_KEY_PREFIX, catalog_name.as_ref())
|
||||
}
|
||||
|
||||
pub fn build_table_prefix(catalog_name: impl AsRef<str>, schema_name: impl AsRef<str>) -> String {
|
||||
pub fn build_table_global_prefix(
|
||||
catalog_name: impl AsRef<str>,
|
||||
schema_name: impl AsRef<str>,
|
||||
) -> String {
|
||||
format!(
|
||||
"{}-{}-{}-",
|
||||
TABLE_KEY_PREFIX,
|
||||
TABLE_GLOBAL_KEY_PREFIX,
|
||||
catalog_name.as_ref(),
|
||||
schema_name.as_ref()
|
||||
)
|
||||
}
|
||||
|
||||
pub struct TableKey {
|
||||
pub fn build_table_regional_prefix(
|
||||
catalog_name: impl AsRef<str>,
|
||||
schema_name: impl AsRef<str>,
|
||||
) -> String {
|
||||
format!(
|
||||
"{}-{}-{}-",
|
||||
TABLE_REGIONAL_KEY_PREFIX,
|
||||
catalog_name.as_ref(),
|
||||
schema_name.as_ref()
|
||||
)
|
||||
}
|
||||
|
||||
/// Table global info has only one key across all datanodes so it does not have `node_id` field.
|
||||
pub struct TableGlobalKey {
|
||||
pub catalog_name: String,
|
||||
pub schema_name: String,
|
||||
pub table_name: String,
|
||||
}
|
||||
|
||||
impl Display for TableGlobalKey {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str(TABLE_GLOBAL_KEY_PREFIX)?;
|
||||
f.write_str("-")?;
|
||||
f.write_str(&self.catalog_name)?;
|
||||
f.write_str("-")?;
|
||||
f.write_str(&self.schema_name)?;
|
||||
f.write_str("-")?;
|
||||
f.write_str(&self.table_name)
|
||||
}
|
||||
}
|
||||
|
||||
impl TableGlobalKey {
|
||||
pub fn parse<S: AsRef<str>>(s: S) -> Result<Self, Error> {
|
||||
let key = s.as_ref();
|
||||
let captures = TABLE_GLOBAL_KEY_PATTERN
|
||||
.captures(key)
|
||||
.context(InvalidCatalogSnafu { key })?;
|
||||
ensure!(captures.len() == 4, InvalidCatalogSnafu { key });
|
||||
|
||||
Ok(Self {
|
||||
catalog_name: captures[1].to_string(),
|
||||
schema_name: captures[2].to_string(),
|
||||
table_name: captures[3].to_string(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Table global info contains necessary info for a datanode to create table regions, including
|
||||
/// table id, table meta(schema...), region id allocation across datanodes.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub struct TableGlobalValue {
|
||||
/// Table id is the same across all datanodes.
|
||||
pub id: TableId,
|
||||
/// Id of datanode that created the global table info kv. only for debugging.
|
||||
pub node_id: u64,
|
||||
/// Allocation of region ids across all datanodes.
|
||||
pub regions_id_map: HashMap<u64, Vec<u32>>,
|
||||
/// Node id -> region ids
|
||||
pub meta: RawTableMeta,
|
||||
/// Partition rules for table
|
||||
pub partition_rules: String,
|
||||
}
|
||||
|
||||
/// Table regional info that varies between datanode, so it contains a `node_id` field.
|
||||
pub struct TableRegionalKey {
|
||||
pub catalog_name: String,
|
||||
pub schema_name: String,
|
||||
pub table_name: String,
|
||||
pub version: TableVersion,
|
||||
pub node_id: u64,
|
||||
}
|
||||
|
||||
impl Display for TableKey {
|
||||
impl Display for TableRegionalKey {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str(TABLE_KEY_PREFIX)?;
|
||||
f.write_str(TABLE_REGIONAL_KEY_PREFIX)?;
|
||||
f.write_str("-")?;
|
||||
f.write_str(&self.catalog_name)?;
|
||||
f.write_str("-")?;
|
||||
@@ -69,68 +144,47 @@ impl Display for TableKey {
|
||||
f.write_str("-")?;
|
||||
f.write_str(&self.table_name)?;
|
||||
f.write_str("-")?;
|
||||
f.serialize_u64(self.version)?;
|
||||
f.write_str("-")?;
|
||||
f.serialize_u64(self.node_id)
|
||||
}
|
||||
}
|
||||
|
||||
impl TableKey {
|
||||
impl TableRegionalKey {
|
||||
pub fn parse<S: AsRef<str>>(s: S) -> Result<Self, Error> {
|
||||
let key = s.as_ref();
|
||||
let captures = TABLE_KEY_PATTERN
|
||||
let captures = TABLE_REGIONAL_KEY_PATTERN
|
||||
.captures(key)
|
||||
.context(InvalidCatalogSnafu { key })?;
|
||||
ensure!(captures.len() == 6, InvalidCatalogSnafu { key });
|
||||
|
||||
let version =
|
||||
u64::from_str(&captures[4]).map_err(|_| InvalidCatalogSnafu { key }.build())?;
|
||||
let node_id_str = captures[5].to_string();
|
||||
let node_id = u64::from_str(&node_id_str)
|
||||
.map_err(|_| ParseNodeIdSnafu { key: node_id_str }.build())?;
|
||||
ensure!(captures.len() == 5, InvalidCatalogSnafu { key });
|
||||
let node_id = captures[4]
|
||||
.to_string()
|
||||
.parse()
|
||||
.map_err(|_| InvalidCatalogSnafu { key }.build())?;
|
||||
Ok(Self {
|
||||
catalog_name: captures[1].to_string(),
|
||||
schema_name: captures[2].to_string(),
|
||||
table_name: captures[3].to_string(),
|
||||
version,
|
||||
node_id,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub struct TableValue {
|
||||
pub id: TableId,
|
||||
pub node_id: u64,
|
||||
pub regions_ids: Vec<u64>,
|
||||
pub meta: RawTableMeta,
|
||||
}
|
||||
|
||||
impl TableValue {
|
||||
pub fn parse(s: impl AsRef<str>) -> Result<Self, Error> {
|
||||
serde_json::from_str(s.as_ref())
|
||||
.context(DeserializeCatalogEntryValueSnafu { raw: s.as_ref() })
|
||||
}
|
||||
|
||||
pub fn as_bytes(&self) -> Result<Vec<u8>, Error> {
|
||||
Ok(serde_json::to_string(self)
|
||||
.context(SerializeCatalogEntryValueSnafu)?
|
||||
.into_bytes())
|
||||
}
|
||||
/// Regional table info of specific datanode, including table version on that datanode and
|
||||
/// region ids allocated by metasrv.
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct TableRegionalValue {
|
||||
pub version: TableVersion,
|
||||
pub regions_ids: Vec<u32>,
|
||||
}
|
||||
|
||||
pub struct CatalogKey {
|
||||
pub catalog_name: String,
|
||||
pub node_id: u64,
|
||||
}
|
||||
|
||||
impl Display for CatalogKey {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str(CATALOG_KEY_PREFIX)?;
|
||||
f.write_str("-")?;
|
||||
f.write_str(&self.catalog_name)?;
|
||||
f.write_str("-")?;
|
||||
f.serialize_u64(self.node_id)
|
||||
f.write_str(&self.catalog_name)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -140,15 +194,9 @@ impl CatalogKey {
|
||||
let captures = CATALOG_KEY_PATTERN
|
||||
.captures(key)
|
||||
.context(InvalidCatalogSnafu { key })?;
|
||||
ensure!(captures.len() == 3, InvalidCatalogSnafu { key });
|
||||
|
||||
let node_id_str = captures[2].to_string();
|
||||
let node_id = u64::from_str(&node_id_str)
|
||||
.map_err(|_| ParseNodeIdSnafu { key: node_id_str }.build())?;
|
||||
|
||||
ensure!(captures.len() == 2, InvalidCatalogSnafu { key });
|
||||
Ok(Self {
|
||||
catalog_name: captures[1].to_string(),
|
||||
node_id,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -156,18 +204,9 @@ impl CatalogKey {
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct CatalogValue;
|
||||
|
||||
impl CatalogValue {
|
||||
pub fn to_bytes(&self) -> Result<Vec<u8>, Error> {
|
||||
Ok(serde_json::to_string(self)
|
||||
.context(SerializeCatalogEntryValueSnafu)?
|
||||
.into_bytes())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct SchemaKey {
|
||||
pub catalog_name: String,
|
||||
pub schema_name: String,
|
||||
pub node_id: u64,
|
||||
}
|
||||
|
||||
impl Display for SchemaKey {
|
||||
@@ -176,9 +215,7 @@ impl Display for SchemaKey {
|
||||
f.write_str("-")?;
|
||||
f.write_str(&self.catalog_name)?;
|
||||
f.write_str("-")?;
|
||||
f.write_str(&self.schema_name)?;
|
||||
f.write_str("-")?;
|
||||
f.serialize_u64(self.node_id)
|
||||
f.write_str(&self.schema_name)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -188,16 +225,10 @@ impl SchemaKey {
|
||||
let captures = SCHEMA_KEY_PATTERN
|
||||
.captures(key)
|
||||
.context(InvalidCatalogSnafu { key })?;
|
||||
ensure!(captures.len() == 4, InvalidCatalogSnafu { key });
|
||||
|
||||
let node_id_str = captures[3].to_string();
|
||||
let node_id = u64::from_str(&node_id_str)
|
||||
.map_err(|_| ParseNodeIdSnafu { key: node_id_str }.build())?;
|
||||
|
||||
ensure!(captures.len() == 3, InvalidCatalogSnafu { key });
|
||||
Ok(Self {
|
||||
catalog_name: captures[1].to_string(),
|
||||
schema_name: captures[2].to_string(),
|
||||
node_id,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -205,14 +236,32 @@ impl SchemaKey {
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct SchemaValue;
|
||||
|
||||
impl SchemaValue {
|
||||
pub fn to_bytes(&self) -> Result<Vec<u8>, Error> {
|
||||
Ok(serde_json::to_string(self)
|
||||
.context(SerializeCatalogEntryValueSnafu)?
|
||||
.into_bytes())
|
||||
}
|
||||
macro_rules! define_catalog_value {
|
||||
( $($val_ty: ty), *) => {
|
||||
$(
|
||||
impl $val_ty {
|
||||
pub fn parse(s: impl AsRef<str>) -> Result<Self, Error> {
|
||||
serde_json::from_str(s.as_ref())
|
||||
.context(DeserializeCatalogEntryValueSnafu { raw: s.as_ref() })
|
||||
}
|
||||
|
||||
pub fn as_bytes(&self) -> Result<Vec<u8>, Error> {
|
||||
Ok(serde_json::to_string(self)
|
||||
.context(SerializeCatalogEntryValueSnafu)?
|
||||
.into_bytes())
|
||||
}
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
define_catalog_value!(
|
||||
TableRegionalValue,
|
||||
TableGlobalValue,
|
||||
CatalogValue,
|
||||
SchemaValue
|
||||
);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
@@ -222,32 +271,28 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_parse_catalog_key() {
|
||||
let key = "__c-C-2";
|
||||
let key = "__c-C";
|
||||
let catalog_key = CatalogKey::parse(key).unwrap();
|
||||
assert_eq!("C", catalog_key.catalog_name);
|
||||
assert_eq!(2, catalog_key.node_id);
|
||||
assert_eq!(key, catalog_key.to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_schema_key() {
|
||||
let key = "__s-C-S-3";
|
||||
let key = "__s-C-S";
|
||||
let schema_key = SchemaKey::parse(key).unwrap();
|
||||
assert_eq!("C", schema_key.catalog_name);
|
||||
assert_eq!("S", schema_key.schema_name);
|
||||
assert_eq!(3, schema_key.node_id);
|
||||
assert_eq!(key, schema_key.to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_table_key() {
|
||||
let key = "__t-C-S-T-42-1";
|
||||
let entry = TableKey::parse(key).unwrap();
|
||||
let key = "__tg-C-S-T";
|
||||
let entry = TableGlobalKey::parse(key).unwrap();
|
||||
assert_eq!("C", entry.catalog_name);
|
||||
assert_eq!("S", entry.schema_name);
|
||||
assert_eq!("T", entry.table_name);
|
||||
assert_eq!(1, entry.node_id);
|
||||
assert_eq!(42, entry.version);
|
||||
assert_eq!(key, &entry.to_string());
|
||||
}
|
||||
|
||||
@@ -256,8 +301,8 @@ mod tests {
|
||||
assert_eq!("__c-", build_catalog_prefix());
|
||||
assert_eq!("__s-CATALOG-", build_schema_prefix("CATALOG"));
|
||||
assert_eq!(
|
||||
"__t-CATALOG-SCHEMA-",
|
||||
build_table_prefix("CATALOG", "SCHEMA")
|
||||
"__tg-CATALOG-SCHEMA-",
|
||||
build_table_global_prefix("CATALOG", "SCHEMA")
|
||||
);
|
||||
}
|
||||
|
||||
@@ -278,16 +323,18 @@ mod tests {
|
||||
engine_options: Default::default(),
|
||||
value_indices: vec![2, 3],
|
||||
options: Default::default(),
|
||||
region_numbers: vec![1],
|
||||
};
|
||||
|
||||
let value = TableValue {
|
||||
let value = TableGlobalValue {
|
||||
id: 42,
|
||||
node_id: 32,
|
||||
regions_ids: vec![1, 2, 3],
|
||||
node_id: 0,
|
||||
regions_id_map: HashMap::from([(0, vec![1, 2, 3])]),
|
||||
meta,
|
||||
partition_rules: "{}".to_string(),
|
||||
};
|
||||
let serialized = serde_json::to_string(&value).unwrap();
|
||||
let deserialized = TableValue::parse(&serialized).unwrap();
|
||||
let deserialized = TableGlobalValue::parse(&serialized).unwrap();
|
||||
assert_eq!(value, deserialized);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ pub mod error;
|
||||
mod helper;
|
||||
|
||||
pub use helper::{
|
||||
build_catalog_prefix, build_schema_prefix, build_table_prefix, CatalogKey, CatalogValue,
|
||||
SchemaKey, SchemaValue, TableKey, TableValue,
|
||||
build_catalog_prefix, build_schema_prefix, build_table_global_prefix,
|
||||
build_table_regional_prefix, CatalogKey, CatalogValue, SchemaKey, SchemaValue, TableGlobalKey,
|
||||
TableGlobalValue, TableRegionalKey, TableRegionalValue,
|
||||
};
|
||||
|
||||
@@ -8,6 +8,7 @@ arc-swap = "1.0"
|
||||
chrono-tz = "0.6"
|
||||
common-error = { path = "../error" }
|
||||
common-function-macro = { path = "../function-macro" }
|
||||
common-time = { path = "../time" }
|
||||
common-query = { path = "../query" }
|
||||
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
|
||||
datatypes = { path = "../../datatypes" }
|
||||
|
||||
@@ -6,6 +6,7 @@ pub mod math;
|
||||
pub mod numpy;
|
||||
#[cfg(test)]
|
||||
pub(crate) mod test;
|
||||
mod timestamp;
|
||||
pub mod udf;
|
||||
|
||||
pub use aggregate::MedianAccumulatorCreator;
|
||||
|
||||
@@ -9,6 +9,7 @@ use crate::scalars::aggregate::{AggregateFunctionMetaRef, AggregateFunctions};
|
||||
use crate::scalars::function::FunctionRef;
|
||||
use crate::scalars::math::MathFunction;
|
||||
use crate::scalars::numpy::NumpyFunction;
|
||||
use crate::scalars::timestamp::TimestampFunction;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct FunctionRegistry {
|
||||
@@ -31,6 +32,10 @@ impl FunctionRegistry {
|
||||
.insert(func.name(), func);
|
||||
}
|
||||
|
||||
pub fn get_aggr_function(&self, name: &str) -> Option<AggregateFunctionMetaRef> {
|
||||
self.aggregate_functions.read().unwrap().get(name).cloned()
|
||||
}
|
||||
|
||||
pub fn get_function(&self, name: &str) -> Option<FunctionRef> {
|
||||
self.functions.read().unwrap().get(name).cloned()
|
||||
}
|
||||
@@ -54,6 +59,7 @@ pub static FUNCTION_REGISTRY: Lazy<Arc<FunctionRegistry>> = Lazy::new(|| {
|
||||
|
||||
MathFunction::register(&function_registry);
|
||||
NumpyFunction::register(&function_registry);
|
||||
TimestampFunction::register(&function_registry);
|
||||
|
||||
AggregateFunctions::register(&function_registry);
|
||||
|
||||
|
||||
116
src/common/function/src/scalars/timestamp/from_unixtime.rs
Normal file
116
src/common/function/src/scalars/timestamp/from_unixtime.rs
Normal file
@@ -0,0 +1,116 @@
|
||||
//! from_unixtime function.
|
||||
/// TODO(dennis) It can be removed after we upgrade datafusion.
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::compute::arithmetics;
|
||||
use arrow::datatypes::DataType as ArrowDatatype;
|
||||
use arrow::scalar::PrimitiveScalar;
|
||||
use common_query::error::{IntoVectorSnafu, UnsupportedInputDataTypeSnafu};
|
||||
use common_query::prelude::{Signature, Volatility};
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::vectors::TimestampVector;
|
||||
use datatypes::vectors::VectorRef;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::scalars::function::{Function, FunctionContext};
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct FromUnixtimeFunction;
|
||||
|
||||
const NAME: &str = "from_unixtime";
|
||||
|
||||
impl Function for FromUnixtimeFunction {
|
||||
fn name(&self) -> &str {
|
||||
NAME
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::timestamp_millis_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::uniform(
|
||||
1,
|
||||
vec![ConcreteDataType::int64_datatype()],
|
||||
Volatility::Immutable,
|
||||
)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
match columns[0].data_type() {
|
||||
ConcreteDataType::Int64(_) => {
|
||||
let array = columns[0].to_arrow_array();
|
||||
// Our timestamp vector's time unit is millisecond
|
||||
let array = arithmetics::mul_scalar(
|
||||
&*array,
|
||||
&PrimitiveScalar::new(ArrowDatatype::Int64, Some(1000i64)),
|
||||
);
|
||||
|
||||
Ok(Arc::new(
|
||||
TimestampVector::try_from_arrow_array(array).context(IntoVectorSnafu {
|
||||
data_type: ArrowDatatype::Int64,
|
||||
})?,
|
||||
))
|
||||
}
|
||||
_ => UnsupportedInputDataTypeSnafu {
|
||||
function: NAME,
|
||||
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
||||
}
|
||||
.fail()
|
||||
.map_err(|e| e.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for FromUnixtimeFunction {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "FROM_UNIXTIME")
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_query::prelude::TypeSignature;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::Int64Vector;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_from_unixtime() {
|
||||
let f = FromUnixtimeFunction::default();
|
||||
assert_eq!("from_unixtime", f.name());
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_millis_datatype(),
|
||||
f.return_type(&[]).unwrap()
|
||||
);
|
||||
|
||||
assert!(matches!(f.signature(),
|
||||
Signature {
|
||||
type_signature: TypeSignature::Uniform(1, valid_types),
|
||||
volatility: Volatility::Immutable
|
||||
} if valid_types == vec![ConcreteDataType::int64_datatype()]
|
||||
));
|
||||
|
||||
let times = vec![Some(1494410783), None, Some(1494410983)];
|
||||
let args: Vec<VectorRef> = vec![Arc::new(Int64Vector::from(times.clone()))];
|
||||
|
||||
let vector = f.eval(FunctionContext::default(), &args).unwrap();
|
||||
assert_eq!(3, vector.len());
|
||||
for (i, t) in times.iter().enumerate() {
|
||||
let v = vector.get(i);
|
||||
if i == 1 {
|
||||
assert_eq!(Value::Null, v);
|
||||
continue;
|
||||
}
|
||||
match v {
|
||||
Value::Timestamp(ts) => {
|
||||
assert_eq!(ts.value(), t.unwrap() * 1000);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
14
src/common/function/src/scalars/timestamp/mod.rs
Normal file
14
src/common/function/src/scalars/timestamp/mod.rs
Normal file
@@ -0,0 +1,14 @@
|
||||
use std::sync::Arc;
|
||||
mod from_unixtime;
|
||||
|
||||
use from_unixtime::FromUnixtimeFunction;
|
||||
|
||||
use crate::scalars::function_registry::FunctionRegistry;
|
||||
|
||||
pub(crate) struct TimestampFunction;
|
||||
|
||||
impl TimestampFunction {
|
||||
pub fn register(registry: &FunctionRegistry) {
|
||||
registry.register(Arc::new(FromUnixtimeFunction::default()));
|
||||
}
|
||||
}
|
||||
@@ -9,6 +9,7 @@ async-trait = "0.1"
|
||||
common-base = { path = "../base" }
|
||||
common-error = { path = "../error" }
|
||||
common-runtime = { path = "../runtime" }
|
||||
dashmap = "5.4"
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = ["simd"] }
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
tokio = { version = "1.0", features = ["full"] }
|
||||
@@ -19,3 +20,11 @@ tower = "0.4"
|
||||
package = "arrow2"
|
||||
version = "0.10"
|
||||
features = ["io_csv", "io_json", "io_parquet", "io_parquet_compression", "io_ipc", "ahash", "compute", "serde_types"]
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.4"
|
||||
rand = "0.8"
|
||||
|
||||
[[bench]]
|
||||
name = "bench_main"
|
||||
harness = false
|
||||
|
||||
7
src/common/grpc/benches/bench_main.rs
Normal file
7
src/common/grpc/benches/bench_main.rs
Normal file
@@ -0,0 +1,7 @@
|
||||
use criterion::criterion_main;
|
||||
|
||||
mod channel_manager;
|
||||
|
||||
criterion_main! {
|
||||
channel_manager::benches
|
||||
}
|
||||
34
src/common/grpc/benches/channel_manager.rs
Normal file
34
src/common/grpc/benches/channel_manager.rs
Normal file
@@ -0,0 +1,34 @@
|
||||
use common_grpc::channel_manager::ChannelManager;
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
|
||||
#[tokio::main]
|
||||
async fn do_bench_channel_manager() {
|
||||
let m = ChannelManager::new();
|
||||
let task_count = 8;
|
||||
let mut joins = Vec::with_capacity(task_count);
|
||||
|
||||
for _ in 0..task_count {
|
||||
let m_clone = m.clone();
|
||||
let join = tokio::spawn(async move {
|
||||
for _ in 0..10000 {
|
||||
let idx = rand::random::<usize>() % 100;
|
||||
let ret = m_clone.get(format!("{}", idx));
|
||||
assert!(ret.is_ok());
|
||||
}
|
||||
});
|
||||
joins.push(join);
|
||||
}
|
||||
|
||||
for join in joins {
|
||||
let _ = join.await;
|
||||
}
|
||||
}
|
||||
|
||||
fn bench_channel_manager(c: &mut Criterion) {
|
||||
c.bench_function("bench channel manager", |b| {
|
||||
b.iter(do_bench_channel_manager);
|
||||
});
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_channel_manager);
|
||||
criterion_main!(benches);
|
||||
@@ -1,8 +1,10 @@
|
||||
use std::collections::HashMap;
|
||||
use std::sync::atomic::AtomicUsize;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::sync::Arc;
|
||||
use std::sync::Mutex;
|
||||
use std::time::Duration;
|
||||
|
||||
use dashmap::mapref::entry::Entry;
|
||||
use dashmap::DashMap;
|
||||
use snafu::ResultExt;
|
||||
use tonic::transport::Channel as InnerChannel;
|
||||
use tonic::transport::Endpoint;
|
||||
@@ -17,7 +19,7 @@ const RECYCLE_CHANNEL_INTERVAL_SECS: u64 = 60;
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ChannelManager {
|
||||
config: ChannelConfig,
|
||||
pool: Arc<Mutex<Pool>>,
|
||||
pool: Arc<Pool>,
|
||||
}
|
||||
|
||||
impl Default for ChannelManager {
|
||||
@@ -32,17 +34,14 @@ impl ChannelManager {
|
||||
}
|
||||
|
||||
pub fn with_config(config: ChannelConfig) -> Self {
|
||||
let pool = Pool {
|
||||
channels: HashMap::default(),
|
||||
};
|
||||
let pool = Arc::new(Mutex::new(pool));
|
||||
let pool = Arc::new(Pool::default());
|
||||
let cloned_pool = pool.clone();
|
||||
|
||||
common_runtime::spawn_bg(async move {
|
||||
common_runtime::spawn_bg(async {
|
||||
recycle_channel_in_loop(cloned_pool, RECYCLE_CHANNEL_INTERVAL_SECS).await;
|
||||
});
|
||||
|
||||
Self { pool, config }
|
||||
Self { config, pool }
|
||||
}
|
||||
|
||||
pub fn config(&self) -> &ChannelConfig {
|
||||
@@ -51,23 +50,30 @@ impl ChannelManager {
|
||||
|
||||
pub fn get(&self, addr: impl AsRef<str>) -> Result<InnerChannel> {
|
||||
let addr = addr.as_ref();
|
||||
let mut pool = self.pool.lock().unwrap();
|
||||
if let Some(ch) = pool.get_mut(addr) {
|
||||
ch.access += 1;
|
||||
return Ok(ch.channel.clone());
|
||||
// It will acquire the read lock.
|
||||
if let Some(inner_ch) = self.pool.get(addr) {
|
||||
return Ok(inner_ch);
|
||||
}
|
||||
|
||||
let endpoint = self.build_endpoint(addr)?;
|
||||
// It will acquire the write lock.
|
||||
let entry = match self.pool.entry(addr.to_string()) {
|
||||
Entry::Occupied(entry) => {
|
||||
entry.get().increase_access();
|
||||
entry.into_ref()
|
||||
}
|
||||
Entry::Vacant(entry) => {
|
||||
let endpoint = self.build_endpoint(addr)?;
|
||||
let inner_channel = endpoint.connect_lazy();
|
||||
|
||||
let inner_channel = endpoint.connect_lazy();
|
||||
let channel = Channel {
|
||||
channel: inner_channel.clone(),
|
||||
access: 1,
|
||||
use_default_connector: true,
|
||||
let channel = Channel {
|
||||
channel: inner_channel,
|
||||
access: AtomicUsize::new(1),
|
||||
use_default_connector: true,
|
||||
};
|
||||
entry.insert(channel)
|
||||
}
|
||||
};
|
||||
pool.put(addr, channel);
|
||||
|
||||
Ok(inner_channel)
|
||||
Ok(entry.channel.clone())
|
||||
}
|
||||
|
||||
pub fn reset_with_connector<C>(
|
||||
@@ -86,11 +92,10 @@ impl ChannelManager {
|
||||
let inner_channel = endpoint.connect_with_connector_lazy(connector);
|
||||
let channel = Channel {
|
||||
channel: inner_channel.clone(),
|
||||
access: 1,
|
||||
access: AtomicUsize::new(1),
|
||||
use_default_connector: false,
|
||||
};
|
||||
let mut pool = self.pool.lock().unwrap();
|
||||
pool.put(addr, channel);
|
||||
self.pool.put(addr, channel);
|
||||
|
||||
Ok(inner_channel)
|
||||
}
|
||||
@@ -99,8 +104,7 @@ impl ChannelManager {
|
||||
where
|
||||
F: FnMut(&String, &mut Channel) -> bool,
|
||||
{
|
||||
let mut pool = self.pool.lock().unwrap();
|
||||
pool.retain_channel(f);
|
||||
self.pool.retain_channel(f);
|
||||
}
|
||||
|
||||
fn build_endpoint(&self, addr: &str) -> Result<Endpoint> {
|
||||
@@ -297,39 +301,56 @@ impl ChannelConfig {
|
||||
#[derive(Debug)]
|
||||
pub struct Channel {
|
||||
channel: InnerChannel,
|
||||
access: usize,
|
||||
access: AtomicUsize,
|
||||
use_default_connector: bool,
|
||||
}
|
||||
|
||||
impl Channel {
|
||||
#[inline]
|
||||
pub fn access(&self) -> usize {
|
||||
self.access
|
||||
self.access.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn use_default_connector(&self) -> bool {
|
||||
self.use_default_connector
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn increase_access(&self) {
|
||||
self.access.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
#[derive(Debug)]
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct Pool {
|
||||
channels: HashMap<String, Channel>,
|
||||
channels: DashMap<String, Channel>,
|
||||
}
|
||||
|
||||
impl Pool {
|
||||
#[inline]
|
||||
fn get_mut(&mut self, addr: &str) -> Option<&mut Channel> {
|
||||
self.channels.get_mut(addr)
|
||||
fn get(&self, addr: &str) -> Option<InnerChannel> {
|
||||
let channel = self.channels.get(addr);
|
||||
channel.map(|ch| {
|
||||
ch.increase_access();
|
||||
ch.channel.clone()
|
||||
})
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn put(&mut self, addr: &str, channel: Channel) {
|
||||
fn entry(&self, addr: String) -> Entry<String, Channel> {
|
||||
self.channels.entry(addr)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
fn get_access(&self, addr: &str) -> Option<usize> {
|
||||
let channel = self.channels.get(addr);
|
||||
channel.map(|ch| ch.access())
|
||||
}
|
||||
|
||||
fn put(&self, addr: &str, channel: Channel) {
|
||||
self.channels.insert(addr.to_string(), channel);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn retain_channel<F>(&mut self, f: F)
|
||||
fn retain_channel<F>(&self, f: F)
|
||||
where
|
||||
F: FnMut(&String, &mut Channel) -> bool,
|
||||
{
|
||||
@@ -337,20 +358,12 @@ impl Pool {
|
||||
}
|
||||
}
|
||||
|
||||
async fn recycle_channel_in_loop(pool: Arc<Mutex<Pool>>, interval_secs: u64) {
|
||||
async fn recycle_channel_in_loop(pool: Arc<Pool>, interval_secs: u64) {
|
||||
let mut interval = tokio::time::interval(Duration::from_secs(interval_secs));
|
||||
|
||||
loop {
|
||||
interval.tick().await;
|
||||
let mut pool = pool.lock().unwrap();
|
||||
pool.retain_channel(|_, c| {
|
||||
if c.access == 0 {
|
||||
false
|
||||
} else {
|
||||
c.access = 0;
|
||||
true
|
||||
}
|
||||
})
|
||||
pool.retain_channel(|_, c| c.access.swap(0, Ordering::Relaxed) != 0)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -363,10 +376,7 @@ mod tests {
|
||||
#[should_panic]
|
||||
#[test]
|
||||
fn test_invalid_addr() {
|
||||
let pool = Pool {
|
||||
channels: HashMap::default(),
|
||||
};
|
||||
let pool = Arc::new(Mutex::new(pool));
|
||||
let pool = Arc::new(Pool::default());
|
||||
let mgr = ChannelManager {
|
||||
pool,
|
||||
..Default::default()
|
||||
@@ -378,36 +388,31 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_access_count() {
|
||||
let pool = Pool {
|
||||
channels: HashMap::default(),
|
||||
};
|
||||
let pool = Arc::new(Mutex::new(pool));
|
||||
let pool = Arc::new(Pool::default());
|
||||
let config = ChannelConfig::new();
|
||||
let mgr = ChannelManager { pool, config };
|
||||
let mgr = Arc::new(ChannelManager { pool, config });
|
||||
let addr = "test_uri";
|
||||
|
||||
for i in 0..10 {
|
||||
{
|
||||
let _ = mgr.get(addr).unwrap();
|
||||
let mut pool = mgr.pool.lock().unwrap();
|
||||
assert_eq!(i + 1, pool.get_mut(addr).unwrap().access);
|
||||
}
|
||||
let mut joins = Vec::with_capacity(10);
|
||||
for _ in 0..10 {
|
||||
let mgr_clone = mgr.clone();
|
||||
let join = tokio::spawn(async move {
|
||||
for _ in 0..100 {
|
||||
let _ = mgr_clone.get(addr);
|
||||
}
|
||||
});
|
||||
joins.push(join);
|
||||
}
|
||||
for join in joins {
|
||||
join.await.unwrap();
|
||||
}
|
||||
|
||||
let mut pool = mgr.pool.lock().unwrap();
|
||||
assert_eq!(1000, mgr.pool.get_access(addr).unwrap());
|
||||
|
||||
assert_eq!(10, pool.get_mut(addr).unwrap().access);
|
||||
mgr.pool
|
||||
.retain_channel(|_, c| c.access.swap(0, Ordering::Relaxed) != 0);
|
||||
|
||||
pool.retain_channel(|_, c| {
|
||||
if c.access == 0 {
|
||||
false
|
||||
} else {
|
||||
c.access = 0;
|
||||
true
|
||||
}
|
||||
});
|
||||
|
||||
assert_eq!(0, pool.get_mut(addr).unwrap().access);
|
||||
assert_eq!(0, mgr.pool.get_access(addr).unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -466,10 +471,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_build_endpoint() {
|
||||
let pool = Pool {
|
||||
channels: HashMap::default(),
|
||||
};
|
||||
let pool = Arc::new(Mutex::new(pool));
|
||||
let pool = Arc::new(Pool::default());
|
||||
let config = ChannelConfig::new()
|
||||
.timeout(Duration::from_secs(3))
|
||||
.connect_timeout(Duration::from_secs(5))
|
||||
@@ -493,9 +495,11 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn test_channel_with_connector() {
|
||||
let pool = Pool {
|
||||
channels: HashMap::default(),
|
||||
channels: DashMap::default(),
|
||||
};
|
||||
let pool = Arc::new(Mutex::new(pool));
|
||||
|
||||
let pool = Arc::new(pool);
|
||||
|
||||
let config = ChannelConfig::new();
|
||||
let mgr = ChannelManager { pool, config };
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ use arrow::datatypes::DataType as ArrowDatatype;
|
||||
use common_error::prelude::*;
|
||||
use datafusion_common::DataFusionError;
|
||||
use datatypes::error::Error as DataTypeError;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use statrs::StatsError;
|
||||
|
||||
common_error::define_opaque_error!(Error);
|
||||
@@ -17,6 +18,13 @@ pub enum InnerError {
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Unsupported input datatypes {:?} in function {}", datatypes, function))]
|
||||
UnsupportedInputDataType {
|
||||
function: String,
|
||||
datatypes: Vec<ConcreteDataType>,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Fail to generate function, source: {}", source))]
|
||||
GenerateFunction {
|
||||
source: StatsError,
|
||||
@@ -116,6 +124,8 @@ impl ErrorExt for InnerError {
|
||||
| InnerError::GeneralDataFusion { .. }
|
||||
| InnerError::DataFusionExecutionPlan { .. } => StatusCode::Unexpected,
|
||||
|
||||
InnerError::UnsupportedInputDataType { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
InnerError::ConvertDfRecordBatchStream { source, .. } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,9 +4,12 @@ mod recordbatch;
|
||||
pub mod util;
|
||||
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
|
||||
use datafusion::arrow_print;
|
||||
pub use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
|
||||
use datatypes::schema::SchemaRef;
|
||||
use datatypes::prelude::VectorRef;
|
||||
use datatypes::schema::{Schema, SchemaRef};
|
||||
use error::Result;
|
||||
use futures::task::{Context, Poll};
|
||||
use futures::Stream;
|
||||
@@ -54,6 +57,35 @@ pub struct RecordBatches {
|
||||
}
|
||||
|
||||
impl RecordBatches {
|
||||
pub fn try_from_columns<I: IntoIterator<Item = VectorRef>>(
|
||||
schema: SchemaRef,
|
||||
columns: I,
|
||||
) -> Result<Self> {
|
||||
let batches = vec![RecordBatch::new(schema.clone(), columns)?];
|
||||
Ok(Self { schema, batches })
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn empty() -> Self {
|
||||
Self {
|
||||
schema: Arc::new(Schema::new(vec![])),
|
||||
batches: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn iter(&self) -> impl Iterator<Item = &RecordBatch> {
|
||||
self.batches.iter()
|
||||
}
|
||||
|
||||
pub fn pretty_print(&self) -> String {
|
||||
arrow_print::write(
|
||||
&self
|
||||
.iter()
|
||||
.map(|x| x.df_recordbatch.clone())
|
||||
.collect::<Vec<_>>(),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn try_new(schema: SchemaRef, batches: Vec<RecordBatch>) -> Result<Self> {
|
||||
for batch in batches.iter() {
|
||||
ensure!(
|
||||
@@ -124,7 +156,26 @@ mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_recordbatches() {
|
||||
fn test_recordbatches_try_from_columns() {
|
||||
let schema = Arc::new(Schema::new(vec![ColumnSchema::new(
|
||||
"a",
|
||||
ConcreteDataType::int32_datatype(),
|
||||
false,
|
||||
)]));
|
||||
let result = RecordBatches::try_from_columns(
|
||||
schema.clone(),
|
||||
vec![Arc::new(StringVector::from(vec!["hello", "world"])) as _],
|
||||
);
|
||||
assert!(result.is_err());
|
||||
|
||||
let v: VectorRef = Arc::new(Int32Vector::from_slice(&[1, 2]));
|
||||
let expected = vec![RecordBatch::new(schema.clone(), vec![v.clone()]).unwrap()];
|
||||
let r = RecordBatches::try_from_columns(schema, vec![v]).unwrap();
|
||||
assert_eq!(r.take(), expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_recordbatches_try_new() {
|
||||
let column_a = ColumnSchema::new("a", ConcreteDataType::int32_datatype(), false);
|
||||
let column_b = ColumnSchema::new("b", ConcreteDataType::string_datatype(), false);
|
||||
let column_c = ColumnSchema::new("c", ConcreteDataType::boolean_datatype(), false);
|
||||
@@ -150,6 +201,15 @@ mod tests {
|
||||
);
|
||||
|
||||
let batches = RecordBatches::try_new(schema1.clone(), vec![batch1.clone()]).unwrap();
|
||||
let expected = "\
|
||||
+---+-------+
|
||||
| a | b |
|
||||
+---+-------+
|
||||
| 1 | hello |
|
||||
| 2 | world |
|
||||
+---+-------+";
|
||||
assert_eq!(batches.pretty_print(), expected);
|
||||
|
||||
assert_eq!(schema1, batches.schema());
|
||||
assert_eq!(vec![batch1], batches.take());
|
||||
}
|
||||
|
||||
@@ -377,6 +377,7 @@ mod test {
|
||||
table_name: table_name.to_string(),
|
||||
desc: None,
|
||||
schema: Arc::new(Schema::new(supported_types())),
|
||||
region_numbers: vec![0],
|
||||
primary_key_indices: vec![],
|
||||
create_if_not_exists: true,
|
||||
table_options: Default::default(),
|
||||
|
||||
@@ -74,7 +74,7 @@ pub trait BucketAligned {
|
||||
|
||||
impl<T: Into<i64>> BucketAligned for T {
|
||||
fn align_by_bucket(self, bucket_duration: i64) -> Option<TimestampMillis> {
|
||||
assert!(bucket_duration > 0);
|
||||
assert!(bucket_duration > 0, "{}", bucket_duration);
|
||||
self.into()
|
||||
.checked_div_euclid(bucket_duration)
|
||||
.and_then(|val| val.checked_mul(bucket_duration))
|
||||
|
||||
@@ -29,6 +29,8 @@ datatypes = { path = "../datatypes" }
|
||||
futures = "0.3"
|
||||
hyper = { version = "0.14", features = ["full"] }
|
||||
log-store = { path = "../log-store" }
|
||||
meta-client = { path = "../meta-client" }
|
||||
meta-srv = { path = "../meta-srv", features = ["mock"] }
|
||||
metrics = "0.20"
|
||||
object-store = { path = "../object-store" }
|
||||
query = { path = "../query" }
|
||||
|
||||
@@ -20,8 +20,16 @@ impl Default for ObjectStoreConfig {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum Mode {
|
||||
Standalone,
|
||||
Distributed,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct DatanodeOptions {
|
||||
pub node_id: u64,
|
||||
pub http_addr: String,
|
||||
pub rpc_addr: String,
|
||||
pub rpc_runtime_size: usize,
|
||||
@@ -29,13 +37,16 @@ pub struct DatanodeOptions {
|
||||
pub mysql_runtime_size: usize,
|
||||
pub postgres_addr: String,
|
||||
pub postgres_runtime_size: usize,
|
||||
pub meta_client_opts: MetaClientOpts,
|
||||
pub wal_dir: String,
|
||||
pub storage: ObjectStoreConfig,
|
||||
pub mode: Mode,
|
||||
}
|
||||
|
||||
impl Default for DatanodeOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
node_id: 0,
|
||||
http_addr: "0.0.0.0:3000".to_string(),
|
||||
rpc_addr: "0.0.0.0:3001".to_string(),
|
||||
rpc_runtime_size: 8,
|
||||
@@ -43,8 +54,10 @@ impl Default for DatanodeOptions {
|
||||
mysql_runtime_size: 2,
|
||||
postgres_addr: "0.0.0.0:5432".to_string(),
|
||||
postgres_runtime_size: 2,
|
||||
meta_client_opts: MetaClientOpts::default(),
|
||||
wal_dir: "/tmp/greptimedb/wal".to_string(),
|
||||
storage: ObjectStoreConfig::default(),
|
||||
mode: Mode::Standalone,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -72,3 +85,23 @@ impl Datanode {
|
||||
self.services.start(&self.opts).await
|
||||
}
|
||||
}
|
||||
|
||||
// Options for meta client in datanode instance.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct MetaClientOpts {
|
||||
pub metasrv_addr: String,
|
||||
pub timeout_millis: u64,
|
||||
pub connect_timeout_millis: u64,
|
||||
pub tcp_nodelay: bool,
|
||||
}
|
||||
|
||||
impl Default for MetaClientOpts {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
metasrv_addr: "127.0.0.1:3002".to_string(),
|
||||
timeout_millis: 3_000u64,
|
||||
connect_timeout_millis: 5_000u64,
|
||||
tcp_nodelay: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -279,6 +279,12 @@ pub enum Error {
|
||||
table_name: String,
|
||||
source: catalog::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to initialize meta client, source: {}", source))]
|
||||
MetaClientInit {
|
||||
#[snafu(backtrace)]
|
||||
source: meta_client::error::Error,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -346,6 +352,7 @@ impl ErrorExt for Error {
|
||||
| Error::CollectRecordBatches { source } => source.status_code(),
|
||||
|
||||
Error::ArrowComputation { .. } => StatusCode::Unexpected,
|
||||
Error::MetaClientInit { source, .. } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
109
src/datanode/src/heartbeat.rs
Normal file
109
src/datanode/src/heartbeat.rs
Normal file
@@ -0,0 +1,109 @@
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use api::v1::meta::{HeartbeatRequest, HeartbeatResponse, Peer};
|
||||
use common_telemetry::{error, info, warn};
|
||||
use meta_client::client::{HeartbeatSender, MetaClient};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{MetaClientInitSnafu, Result};
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct HeartbeatTask {
|
||||
node_id: u64,
|
||||
server_addr: String,
|
||||
running: Arc<AtomicBool>,
|
||||
meta_client: MetaClient,
|
||||
interval: u64,
|
||||
}
|
||||
|
||||
impl Drop for HeartbeatTask {
|
||||
fn drop(&mut self) {
|
||||
self.running.store(false, Ordering::Release);
|
||||
}
|
||||
}
|
||||
|
||||
impl HeartbeatTask {
|
||||
/// Create a new heartbeat task instance.
|
||||
pub fn new(node_id: u64, server_addr: String, meta_client: MetaClient) -> Self {
|
||||
Self {
|
||||
node_id,
|
||||
server_addr,
|
||||
running: Arc::new(AtomicBool::new(false)),
|
||||
meta_client,
|
||||
interval: 5_000, // default interval is set to 5 secs
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn create_streams(
|
||||
meta_client: &MetaClient,
|
||||
running: Arc<AtomicBool>,
|
||||
) -> Result<HeartbeatSender> {
|
||||
let (tx, mut rx) = meta_client.heartbeat().await.context(MetaClientInitSnafu)?;
|
||||
common_runtime::spawn_bg(async move {
|
||||
while let Some(res) = match rx.message().await {
|
||||
Ok(m) => m,
|
||||
Err(e) => {
|
||||
error!(e; "Error while reading heartbeat response");
|
||||
None
|
||||
}
|
||||
} {
|
||||
Self::handle_response(res).await;
|
||||
if !running.load(Ordering::Acquire) {
|
||||
info!("Heartbeat task shutdown");
|
||||
}
|
||||
}
|
||||
info!("Heartbeat handling loop exit.")
|
||||
});
|
||||
Ok(tx)
|
||||
}
|
||||
|
||||
async fn handle_response(resp: HeartbeatResponse) {
|
||||
info!("heartbeat response: {:?}", resp);
|
||||
}
|
||||
|
||||
/// Start heartbeat task, spawn background task.
|
||||
pub async fn start(&self) -> Result<()> {
|
||||
let running = self.running.clone();
|
||||
if running
|
||||
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Acquire)
|
||||
.is_err()
|
||||
{
|
||||
warn!("Heartbeat task started multiple times");
|
||||
return Ok(());
|
||||
}
|
||||
let interval = self.interval;
|
||||
let node_id = self.node_id;
|
||||
let server_addr = self.server_addr.clone();
|
||||
let meta_client = self.meta_client.clone();
|
||||
|
||||
let mut tx = Self::create_streams(&meta_client, running.clone()).await?;
|
||||
common_runtime::spawn_bg(async move {
|
||||
while running.load(Ordering::Acquire) {
|
||||
let req = HeartbeatRequest {
|
||||
peer: Some(Peer {
|
||||
id: node_id,
|
||||
addr: server_addr.clone(),
|
||||
}),
|
||||
..Default::default()
|
||||
};
|
||||
if let Err(e) = tx.send(req).await {
|
||||
error!("Failed to send heartbeat to metasrv, error: {:?}", e);
|
||||
match Self::create_streams(&meta_client, running.clone()).await {
|
||||
Ok(new_tx) => {
|
||||
info!("Reconnected to metasrv");
|
||||
tx = new_tx;
|
||||
}
|
||||
Err(e) => {
|
||||
error!(e;"Failed to reconnect to metasrv!");
|
||||
}
|
||||
}
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(interval)).await;
|
||||
}
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -1,8 +1,12 @@
|
||||
use std::time::Duration;
|
||||
use std::{fs, path, sync::Arc};
|
||||
|
||||
use catalog::remote::MetaKvBackend;
|
||||
use catalog::CatalogManagerRef;
|
||||
use common_grpc::channel_manager::{ChannelConfig, ChannelManager};
|
||||
use common_telemetry::logging::info;
|
||||
use log_store::fs::{config::LogConfig, log::LocalFileLogStore};
|
||||
use meta_client::client::{MetaClient, MetaClientBuilder};
|
||||
use object_store::{services::fs::Builder, util, ObjectStore};
|
||||
use query::query_engine::{QueryEngineFactory, QueryEngineRef};
|
||||
use snafu::prelude::*;
|
||||
@@ -10,8 +14,9 @@ use storage::{config::EngineConfig as StorageEngineConfig, EngineImpl};
|
||||
use table_engine::config::EngineConfig as TableEngineConfig;
|
||||
use table_engine::engine::MitoEngine;
|
||||
|
||||
use crate::datanode::{DatanodeOptions, ObjectStoreConfig};
|
||||
use crate::error::{self, NewCatalogSnafu, Result};
|
||||
use crate::datanode::{DatanodeOptions, MetaClientOpts, Mode, ObjectStoreConfig};
|
||||
use crate::error::{self, CatalogSnafu, MetaClientInitSnafu, NewCatalogSnafu, Result};
|
||||
use crate::heartbeat::HeartbeatTask;
|
||||
use crate::script::ScriptExecutor;
|
||||
use crate::server::grpc::plan::PhysicalPlanner;
|
||||
use crate::sql::SqlHandler;
|
||||
@@ -19,15 +24,18 @@ use crate::sql::SqlHandler;
|
||||
mod grpc;
|
||||
mod sql;
|
||||
|
||||
type DefaultEngine = MitoEngine<EngineImpl<LocalFileLogStore>>;
|
||||
pub(crate) type DefaultEngine = MitoEngine<EngineImpl<LocalFileLogStore>>;
|
||||
|
||||
// An abstraction to read/write services.
|
||||
pub struct Instance {
|
||||
query_engine: QueryEngineRef,
|
||||
sql_handler: SqlHandler,
|
||||
catalog_manager: CatalogManagerRef,
|
||||
physical_planner: PhysicalPlanner,
|
||||
script_executor: ScriptExecutor,
|
||||
pub(crate) query_engine: QueryEngineRef,
|
||||
pub(crate) sql_handler: SqlHandler,
|
||||
pub(crate) catalog_manager: CatalogManagerRef,
|
||||
pub(crate) physical_planner: PhysicalPlanner,
|
||||
pub(crate) script_executor: ScriptExecutor,
|
||||
#[allow(unused)]
|
||||
pub(crate) meta_client: Option<MetaClient>,
|
||||
pub(crate) heartbeat_task: Option<HeartbeatTask>,
|
||||
}
|
||||
|
||||
pub type InstanceRef = Arc<Instance>;
|
||||
@@ -37,6 +45,13 @@ impl Instance {
|
||||
let object_store = new_object_store(&opts.storage).await?;
|
||||
let log_store = create_local_file_log_store(opts).await?;
|
||||
|
||||
let meta_client = match opts.mode {
|
||||
Mode::Standalone => None,
|
||||
Mode::Distributed => {
|
||||
Some(new_metasrv_client(opts.node_id, &opts.meta_client_opts).await?)
|
||||
}
|
||||
};
|
||||
|
||||
let table_engine = Arc::new(DefaultEngine::new(
|
||||
TableEngineConfig::default(),
|
||||
EngineImpl::new(
|
||||
@@ -46,22 +61,52 @@ impl Instance {
|
||||
),
|
||||
object_store,
|
||||
));
|
||||
let catalog_manager = Arc::new(
|
||||
catalog::local::LocalCatalogManager::try_new(table_engine.clone())
|
||||
.await
|
||||
.context(NewCatalogSnafu)?,
|
||||
);
|
||||
let factory = QueryEngineFactory::new(catalog_manager.clone());
|
||||
|
||||
// create remote catalog manager
|
||||
let (catalog_manager, factory) = match opts.mode {
|
||||
Mode::Standalone => {
|
||||
let catalog = Arc::new(
|
||||
catalog::local::LocalCatalogManager::try_new(table_engine.clone())
|
||||
.await
|
||||
.context(CatalogSnafu)?,
|
||||
);
|
||||
let factory = QueryEngineFactory::new(catalog.clone());
|
||||
(catalog as CatalogManagerRef, factory)
|
||||
}
|
||||
|
||||
Mode::Distributed => {
|
||||
let catalog = Arc::new(catalog::remote::RemoteCatalogManager::new(
|
||||
table_engine.clone(),
|
||||
opts.node_id,
|
||||
Arc::new(MetaKvBackend {
|
||||
client: meta_client.as_ref().unwrap().clone(),
|
||||
}),
|
||||
));
|
||||
let factory = QueryEngineFactory::new(catalog.clone());
|
||||
(catalog as CatalogManagerRef, factory)
|
||||
}
|
||||
};
|
||||
|
||||
let query_engine = factory.query_engine().clone();
|
||||
let script_executor =
|
||||
ScriptExecutor::new(catalog_manager.clone(), query_engine.clone()).await?;
|
||||
|
||||
let heartbeat_task = match opts.mode {
|
||||
Mode::Standalone => None,
|
||||
Mode::Distributed => Some(HeartbeatTask::new(
|
||||
opts.node_id, /*node id not set*/
|
||||
opts.rpc_addr.clone(),
|
||||
meta_client.as_ref().unwrap().clone(),
|
||||
)),
|
||||
};
|
||||
Ok(Self {
|
||||
query_engine: query_engine.clone(),
|
||||
sql_handler: SqlHandler::new(table_engine, catalog_manager.clone()),
|
||||
catalog_manager,
|
||||
physical_planner: PhysicalPlanner::new(query_engine),
|
||||
script_executor,
|
||||
meta_client,
|
||||
heartbeat_task,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -70,6 +115,9 @@ impl Instance {
|
||||
.start()
|
||||
.await
|
||||
.context(NewCatalogSnafu)?;
|
||||
if let Some(task) = &self.heartbeat_task {
|
||||
task.start().await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -80,47 +128,9 @@ impl Instance {
|
||||
pub fn catalog_manager(&self) -> &CatalogManagerRef {
|
||||
&self.catalog_manager
|
||||
}
|
||||
|
||||
// This method is used in other crate's testing codes, so move it out of "cfg(test)".
|
||||
// TODO(LFC): Delete it when callers no longer need it.
|
||||
pub async fn new_mock() -> Result<Self> {
|
||||
use table_engine::table::test_util::new_test_object_store;
|
||||
use table_engine::table::test_util::MockEngine;
|
||||
use table_engine::table::test_util::MockMitoEngine;
|
||||
|
||||
let (_dir, object_store) = new_test_object_store("setup_mock_engine_and_table").await;
|
||||
let mock_engine = Arc::new(MockMitoEngine::new(
|
||||
TableEngineConfig::default(),
|
||||
MockEngine::default(),
|
||||
object_store,
|
||||
));
|
||||
|
||||
let catalog_manager = Arc::new(
|
||||
catalog::local::manager::LocalCatalogManager::try_new(mock_engine.clone())
|
||||
.await
|
||||
.unwrap(),
|
||||
);
|
||||
|
||||
let factory = QueryEngineFactory::new(catalog_manager.clone());
|
||||
let query_engine = factory.query_engine().clone();
|
||||
|
||||
let sql_handler = SqlHandler::new(mock_engine.clone(), catalog_manager.clone());
|
||||
let physical_planner = PhysicalPlanner::new(query_engine.clone());
|
||||
let script_executor = ScriptExecutor::new(catalog_manager.clone(), query_engine.clone())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
Ok(Self {
|
||||
query_engine,
|
||||
sql_handler,
|
||||
catalog_manager,
|
||||
physical_planner,
|
||||
script_executor,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
async fn new_object_store(store_config: &ObjectStoreConfig) -> Result<ObjectStore> {
|
||||
pub(crate) async fn new_object_store(store_config: &ObjectStoreConfig) -> Result<ObjectStore> {
|
||||
// TODO(dennis): supports other backend
|
||||
let data_dir = util::normalize_dir(match store_config {
|
||||
ObjectStoreConfig::File { data_dir } => data_dir,
|
||||
@@ -139,7 +149,38 @@ async fn new_object_store(store_config: &ObjectStoreConfig) -> Result<ObjectStor
|
||||
Ok(ObjectStore::new(accessor))
|
||||
}
|
||||
|
||||
async fn create_local_file_log_store(opts: &DatanodeOptions) -> Result<LocalFileLogStore> {
|
||||
/// Create metasrv client instance and spawn heartbeat loop.
|
||||
async fn new_metasrv_client(node_id: u64, meta_config: &MetaClientOpts) -> Result<MetaClient> {
|
||||
let cluster_id = 0; // TODO(hl): read from config
|
||||
let member_id = node_id;
|
||||
|
||||
let config = ChannelConfig::new()
|
||||
.timeout(Duration::from_millis(meta_config.timeout_millis))
|
||||
.connect_timeout(Duration::from_millis(meta_config.connect_timeout_millis))
|
||||
.tcp_nodelay(meta_config.tcp_nodelay);
|
||||
let channel_manager = ChannelManager::with_config(config);
|
||||
let mut meta_client = MetaClientBuilder::new(cluster_id, member_id)
|
||||
.enable_heartbeat()
|
||||
.enable_router()
|
||||
.enable_store()
|
||||
.channel_manager(channel_manager)
|
||||
.build();
|
||||
meta_client
|
||||
.start(&[&meta_config.metasrv_addr])
|
||||
.await
|
||||
.context(MetaClientInitSnafu)?;
|
||||
|
||||
// required only when the heartbeat_client is enabled
|
||||
meta_client
|
||||
.ask_leader()
|
||||
.await
|
||||
.context(MetaClientInitSnafu)?;
|
||||
Ok(meta_client)
|
||||
}
|
||||
|
||||
pub(crate) async fn create_local_file_log_store(
|
||||
opts: &DatanodeOptions,
|
||||
) -> Result<LocalFileLogStore> {
|
||||
// create WAL directory
|
||||
fs::create_dir_all(path::Path::new(&opts.wal_dir))
|
||||
.context(error::CreateDirSnafu { dir: &opts.wal_dir })?;
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
use std::ops::Deref;
|
||||
|
||||
use api::v1::codec::RegionId;
|
||||
use api::v1::{
|
||||
admin_expr, codec::InsertBatch, insert_expr, object_expr, select_expr, AdminExpr, AdminResult,
|
||||
ObjectExpr, ObjectResult, SelectExpr,
|
||||
@@ -62,7 +65,11 @@ impl Instance {
|
||||
insert_batches: &[InsertBatch],
|
||||
) -> Result<()> {
|
||||
// Create table automatically, build schema from data.
|
||||
let table_id = self.catalog_manager.next_table_id();
|
||||
let table_id = self
|
||||
.catalog_manager
|
||||
.next_table_id()
|
||||
.await
|
||||
.context(CatalogSnafu)?;
|
||||
let create_table_request = insert::build_create_table_request(
|
||||
catalog_name,
|
||||
schema_name,
|
||||
@@ -200,6 +207,18 @@ impl GrpcQueryHandler for Instance {
|
||||
.context(servers::error::InvalidQuerySnafu {
|
||||
reason: "missing `expr` in `InsertExpr`",
|
||||
})?;
|
||||
|
||||
// TODO(fys): _region_id is for later use.
|
||||
let _region_id: Option<RegionId> = insert_expr
|
||||
.options
|
||||
.get("region_id")
|
||||
.map(|id| {
|
||||
id.deref()
|
||||
.try_into()
|
||||
.context(servers::error::DecodeRegionIdSnafu)
|
||||
})
|
||||
.transpose()?;
|
||||
|
||||
match expr {
|
||||
insert_expr::Expr::Values(values) => {
|
||||
self.handle_insert(table_name, values).await
|
||||
|
||||
@@ -10,7 +10,7 @@ use servers::query_handler::SqlQueryHandler;
|
||||
use snafu::prelude::*;
|
||||
use sql::statements::statement::Statement;
|
||||
|
||||
use crate::error::{ExecuteSqlSnafu, Result};
|
||||
use crate::error::{CatalogSnafu, ExecuteSqlSnafu, Result};
|
||||
use crate::instance::Instance;
|
||||
use crate::metric;
|
||||
use crate::sql::SqlRequest;
|
||||
@@ -49,7 +49,11 @@ impl Instance {
|
||||
}
|
||||
|
||||
Statement::Create(c) => {
|
||||
let table_id = self.catalog_manager.next_table_id();
|
||||
let table_id = self
|
||||
.catalog_manager
|
||||
.next_table_id()
|
||||
.await
|
||||
.context(CatalogSnafu)?;
|
||||
let _engine_name = c.engine.clone();
|
||||
// TODO(hl): Select table engine by engine_name
|
||||
|
||||
@@ -77,6 +81,9 @@ impl Instance {
|
||||
Statement::ShowTables(stmt) => {
|
||||
self.sql_handler.execute(SqlRequest::ShowTables(stmt)).await
|
||||
}
|
||||
Statement::ShowCreateTable(_stmt) => {
|
||||
unimplemented!("SHOW CREATE TABLE is unimplemented yet");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,8 +2,10 @@
|
||||
|
||||
pub mod datanode;
|
||||
pub mod error;
|
||||
mod heartbeat;
|
||||
pub mod instance;
|
||||
mod metric;
|
||||
mod mock;
|
||||
mod script;
|
||||
pub mod server;
|
||||
mod sql;
|
||||
|
||||
126
src/datanode/src/mock.rs
Normal file
126
src/datanode/src/mock.rs
Normal file
@@ -0,0 +1,126 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use catalog::remote::MetaKvBackend;
|
||||
use meta_client::client::{MetaClient, MetaClientBuilder};
|
||||
use query::QueryEngineFactory;
|
||||
use storage::config::EngineConfig as StorageEngineConfig;
|
||||
use storage::EngineImpl;
|
||||
use table_engine::config::EngineConfig as TableEngineConfig;
|
||||
|
||||
use crate::datanode::DatanodeOptions;
|
||||
use crate::error::Result;
|
||||
use crate::heartbeat::HeartbeatTask;
|
||||
use crate::instance::{create_local_file_log_store, new_object_store, DefaultEngine, Instance};
|
||||
use crate::script::ScriptExecutor;
|
||||
use crate::server::grpc::plan::PhysicalPlanner;
|
||||
use crate::sql::SqlHandler;
|
||||
|
||||
impl Instance {
|
||||
// This method is used in other crate's testing codes, so move it out of "cfg(test)".
|
||||
// TODO(LFC): Delete it when callers no longer need it.
|
||||
pub async fn new_mock() -> Result<Self> {
|
||||
use table_engine::table::test_util::new_test_object_store;
|
||||
use table_engine::table::test_util::MockEngine;
|
||||
use table_engine::table::test_util::MockMitoEngine;
|
||||
|
||||
let meta_client = Some(mock_meta_client().await);
|
||||
let (_dir, object_store) = new_test_object_store("setup_mock_engine_and_table").await;
|
||||
let mock_engine = Arc::new(MockMitoEngine::new(
|
||||
TableEngineConfig::default(),
|
||||
MockEngine::default(),
|
||||
object_store,
|
||||
));
|
||||
|
||||
let catalog_manager = Arc::new(
|
||||
catalog::local::manager::LocalCatalogManager::try_new(mock_engine.clone())
|
||||
.await
|
||||
.unwrap(),
|
||||
);
|
||||
|
||||
let factory = QueryEngineFactory::new(catalog_manager.clone());
|
||||
let query_engine = factory.query_engine().clone();
|
||||
|
||||
let sql_handler = SqlHandler::new(mock_engine.clone(), catalog_manager.clone());
|
||||
let physical_planner = PhysicalPlanner::new(query_engine.clone());
|
||||
let script_executor = ScriptExecutor::new(catalog_manager.clone(), query_engine.clone())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let heartbeat_task = Some(HeartbeatTask::new(
|
||||
0,
|
||||
"127.0.0.1:3302".to_string(),
|
||||
meta_client.as_ref().unwrap().clone(),
|
||||
));
|
||||
Ok(Self {
|
||||
query_engine,
|
||||
sql_handler,
|
||||
catalog_manager,
|
||||
physical_planner,
|
||||
script_executor,
|
||||
meta_client,
|
||||
heartbeat_task,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn with_mock_meta_client(opts: &DatanodeOptions) -> Result<Self> {
|
||||
let object_store = new_object_store(&opts.storage).await?;
|
||||
let log_store = create_local_file_log_store(opts).await?;
|
||||
let meta_client = mock_meta_client().await;
|
||||
let table_engine = Arc::new(DefaultEngine::new(
|
||||
TableEngineConfig::default(),
|
||||
EngineImpl::new(
|
||||
StorageEngineConfig::default(),
|
||||
Arc::new(log_store),
|
||||
object_store.clone(),
|
||||
),
|
||||
object_store,
|
||||
));
|
||||
|
||||
// create remote catalog manager
|
||||
let catalog_manager = Arc::new(catalog::remote::RemoteCatalogManager::new(
|
||||
table_engine.clone(),
|
||||
opts.node_id,
|
||||
Arc::new(MetaKvBackend {
|
||||
client: meta_client.clone(),
|
||||
}),
|
||||
));
|
||||
|
||||
let factory = QueryEngineFactory::new(catalog_manager.clone());
|
||||
let query_engine = factory.query_engine().clone();
|
||||
let script_executor =
|
||||
ScriptExecutor::new(catalog_manager.clone(), query_engine.clone()).await?;
|
||||
|
||||
let heartbeat_task =
|
||||
HeartbeatTask::new(opts.node_id, opts.rpc_addr.clone(), meta_client.clone());
|
||||
Ok(Self {
|
||||
query_engine: query_engine.clone(),
|
||||
sql_handler: SqlHandler::new(table_engine, catalog_manager.clone()),
|
||||
catalog_manager,
|
||||
physical_planner: PhysicalPlanner::new(query_engine),
|
||||
script_executor,
|
||||
meta_client: Some(meta_client),
|
||||
heartbeat_task: Some(heartbeat_task),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
async fn mock_meta_client() -> MetaClient {
|
||||
let mock_info = meta_srv::mocks::mock_with_memstore().await;
|
||||
let meta_srv::mocks::MockInfo {
|
||||
server_addr,
|
||||
channel_manager,
|
||||
} = mock_info;
|
||||
|
||||
let id = (1000u64, 2000u64);
|
||||
let mut meta_client = MetaClientBuilder::new(id.0, id.1)
|
||||
.enable_heartbeat()
|
||||
.enable_router()
|
||||
.enable_store()
|
||||
.channel_manager(channel_manager)
|
||||
.build();
|
||||
meta_client.start(&[&server_addr]).await.unwrap();
|
||||
// // required only when the heartbeat_client is enabled
|
||||
meta_client.ask_leader().await.unwrap();
|
||||
|
||||
meta_client
|
||||
}
|
||||
@@ -11,14 +11,14 @@ use futures::TryFutureExt;
|
||||
use snafu::prelude::*;
|
||||
use table::requests::{AddColumnRequest, AlterKind, AlterTableRequest, CreateTableRequest};
|
||||
|
||||
use crate::error::{self, ColumnDefaultConstraintSnafu, MissingFieldSnafu, Result};
|
||||
use crate::error::{self, CatalogSnafu, ColumnDefaultConstraintSnafu, MissingFieldSnafu, Result};
|
||||
use crate::instance::Instance;
|
||||
use crate::server::grpc::handler::AdminResultBuilder;
|
||||
use crate::sql::SqlRequest;
|
||||
|
||||
impl Instance {
|
||||
pub(crate) async fn handle_create(&self, expr: CreateExpr) -> AdminResult {
|
||||
let request = self.create_expr_to_request(expr);
|
||||
let request = self.create_expr_to_request(expr).await;
|
||||
let result = futures::future::ready(request)
|
||||
.and_then(|request| self.sql_handler().execute(SqlRequest::Create(request)))
|
||||
.await;
|
||||
@@ -63,7 +63,7 @@ impl Instance {
|
||||
}
|
||||
}
|
||||
|
||||
fn create_expr_to_request(&self, expr: CreateExpr) -> Result<CreateTableRequest> {
|
||||
async fn create_expr_to_request(&self, expr: CreateExpr) -> Result<CreateTableRequest> {
|
||||
let schema = create_table_schema(&expr)?;
|
||||
|
||||
let primary_key_indices = expr
|
||||
@@ -76,14 +76,26 @@ impl Instance {
|
||||
})
|
||||
.collect::<Result<Vec<usize>>>()?;
|
||||
|
||||
let table_id = self.catalog_manager().next_table_id();
|
||||
|
||||
let catalog_name = expr
|
||||
.catalog_name
|
||||
.unwrap_or_else(|| DEFAULT_CATALOG_NAME.to_string());
|
||||
let schema_name = expr
|
||||
.schema_name
|
||||
.unwrap_or_else(|| DEFAULT_SCHEMA_NAME.to_string());
|
||||
|
||||
let table_id = self
|
||||
.catalog_manager()
|
||||
.next_table_id()
|
||||
.await
|
||||
.context(CatalogSnafu)?;
|
||||
|
||||
let region_id = expr
|
||||
.table_options
|
||||
.get(&"region_id".to_string())
|
||||
.unwrap()
|
||||
.parse::<u32>()
|
||||
.unwrap();
|
||||
|
||||
Ok(CreateTableRequest {
|
||||
id: table_id,
|
||||
catalog_name,
|
||||
@@ -91,6 +103,7 @@ impl Instance {
|
||||
table_name: expr.table_name,
|
||||
desc: expr.desc,
|
||||
schema,
|
||||
region_numbers: vec![region_id],
|
||||
primary_key_indices,
|
||||
create_if_not_exists: expr.create_if_not_exists,
|
||||
table_options: expr.table_options,
|
||||
@@ -179,14 +192,15 @@ mod tests {
|
||||
use super::*;
|
||||
use crate::tests::test_util;
|
||||
|
||||
#[tokio::test]
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_create_expr_to_request() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let (opts, _guard) = test_util::create_tmp_dir_and_datanode_opts("create_expr_to_request");
|
||||
let instance = Instance::new(&opts).await.unwrap();
|
||||
let instance = Instance::with_mock_meta_client(&opts).await.unwrap();
|
||||
instance.start().await.unwrap();
|
||||
|
||||
let expr = testing_create_expr();
|
||||
let request = instance.create_expr_to_request(expr).unwrap();
|
||||
let request = instance.create_expr_to_request(expr).await.unwrap();
|
||||
assert_eq!(request.id, common_catalog::consts::MIN_USER_TABLE_ID);
|
||||
assert_eq!(request.catalog_name, "greptime".to_string());
|
||||
assert_eq!(request.schema_name, "public".to_string());
|
||||
@@ -198,7 +212,7 @@ mod tests {
|
||||
|
||||
let mut expr = testing_create_expr();
|
||||
expr.primary_keys = vec!["host".to_string(), "not-exist-column".to_string()];
|
||||
let result = instance.create_expr_to_request(expr);
|
||||
let result = instance.create_expr_to_request(expr).await;
|
||||
assert!(result.is_err());
|
||||
assert!(result
|
||||
.unwrap_err()
|
||||
@@ -291,6 +305,9 @@ mod tests {
|
||||
default_constraint: None,
|
||||
},
|
||||
];
|
||||
let table_options = [("region_id".to_string(), "0".to_string())]
|
||||
.into_iter()
|
||||
.collect::<HashMap<_, _>>();
|
||||
CreateExpr {
|
||||
catalog_name: None,
|
||||
schema_name: None,
|
||||
@@ -300,7 +317,7 @@ mod tests {
|
||||
time_index: "ts".to_string(),
|
||||
primary_keys: vec!["ts".to_string(), "host".to_string()],
|
||||
create_if_not_exists: true,
|
||||
table_options: HashMap::new(),
|
||||
table_options,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -168,6 +168,7 @@ pub fn build_create_table_request(
|
||||
create_if_not_exists: true,
|
||||
primary_key_indices,
|
||||
table_options: HashMap::new(),
|
||||
region_numbers: vec![0],
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -155,6 +155,7 @@ impl SqlHandler {
|
||||
table_name,
|
||||
desc: None,
|
||||
schema,
|
||||
region_numbers: vec![0],
|
||||
primary_key_indices: primary_keys,
|
||||
create_if_not_exists: stmt.if_not_exists,
|
||||
table_options: HashMap::new(),
|
||||
|
||||
@@ -34,7 +34,7 @@ impl SqlHandler {
|
||||
stmt: Insert,
|
||||
) -> Result<SqlRequest> {
|
||||
let columns = stmt.columns();
|
||||
let values = stmt.values();
|
||||
let values = stmt.values().context(ParseSqlValueSnafu)?;
|
||||
//TODO(dennis): table name may be in the form of `catalog.schema.table`,
|
||||
// but we don't process it right now.
|
||||
let table_name = stmt.table_name();
|
||||
|
||||
@@ -24,7 +24,7 @@ async fn setup_grpc_server(name: &str, port: usize) -> (String, TestGuard, Arc<G
|
||||
let (mut opts, guard) = test_util::create_tmp_dir_and_datanode_opts(name);
|
||||
let addr = format!("127.0.0.1:{}", port);
|
||||
opts.rpc_addr = addr.clone();
|
||||
let instance = Arc::new(Instance::new(&opts).await.unwrap());
|
||||
let instance = Arc::new(Instance::with_mock_meta_client(&opts).await.unwrap());
|
||||
instance.start().await.unwrap();
|
||||
|
||||
let addr_cloned = addr.clone();
|
||||
@@ -50,7 +50,7 @@ async fn setup_grpc_server(name: &str, port: usize) -> (String, TestGuard, Arc<G
|
||||
(addr, guard, grpc_server)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_auto_create_table() {
|
||||
let (addr, _guard, grpc_server) = setup_grpc_server("auto_create_table", 3991).await;
|
||||
|
||||
@@ -116,8 +116,9 @@ fn expect_data() -> (Column, Column, Column, Column) {
|
||||
)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_insert_and_select() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let (addr, _guard, grpc_server) = setup_grpc_server("insert_and_select", 3990).await;
|
||||
|
||||
let grpc_client = Client::with_urls(vec![addr]);
|
||||
@@ -247,6 +248,6 @@ fn testing_create_expr() -> CreateExpr {
|
||||
time_index: "ts".to_string(),
|
||||
primary_keys: vec!["ts".to_string(), "host".to_string()],
|
||||
create_if_not_exists: true,
|
||||
table_options: HashMap::new(),
|
||||
table_options: HashMap::from([("region_id".to_string(), "0".to_string())]),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,6 @@ use axum::http::StatusCode;
|
||||
use axum::Router;
|
||||
use axum_test_helper::TestClient;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use servers::http::handler::ScriptExecution;
|
||||
use servers::http::HttpServer;
|
||||
use servers::server::Server;
|
||||
use test_util::TestGuard;
|
||||
@@ -15,7 +14,7 @@ use crate::tests::test_util;
|
||||
|
||||
async fn make_test_app(name: &str) -> (Router, TestGuard) {
|
||||
let (opts, guard) = test_util::create_tmp_dir_and_datanode_opts(name);
|
||||
let instance = Arc::new(Instance::new(&opts).await.unwrap());
|
||||
let instance = Arc::new(Instance::with_mock_meta_client(&opts).await.unwrap());
|
||||
instance.start().await.unwrap();
|
||||
test_util::create_test_table(&instance, ConcreteDataType::timestamp_millis_datatype())
|
||||
.await
|
||||
@@ -24,7 +23,7 @@ async fn make_test_app(name: &str) -> (Router, TestGuard) {
|
||||
(http_server.make_app(), guard)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_sql_api() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let (app, _guard) = make_test_app("sql_api").await;
|
||||
@@ -84,7 +83,7 @@ async fn test_sql_api() {
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_metrics_api() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
common_telemetry::init_default_metrics_recorder();
|
||||
@@ -99,28 +98,26 @@ async fn test_metrics_api() {
|
||||
assert_eq!(res.status(), StatusCode::OK);
|
||||
|
||||
// Call metrics api
|
||||
let res = client.get("/v1/metrics").send().await;
|
||||
let res = client.get("/metrics").send().await;
|
||||
assert_eq!(res.status(), StatusCode::OK);
|
||||
let body = res.text().await;
|
||||
assert!(body.contains("datanode_handle_sql_elapsed"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_scripts_api() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let (app, _guard) = make_test_app("scripts_api").await;
|
||||
let client = TestClient::new(app);
|
||||
let res = client
|
||||
.post("/v1/scripts")
|
||||
.json(&ScriptExecution {
|
||||
name: "test".to_string(),
|
||||
script: r#"
|
||||
.post("/v1/scripts?name=test")
|
||||
.body(
|
||||
r#"
|
||||
@copr(sql='select number from numbers limit 10', args=['number'], returns=['n'])
|
||||
def test(n):
|
||||
return n + 1;
|
||||
"#
|
||||
.to_string(),
|
||||
})
|
||||
"#,
|
||||
)
|
||||
.send()
|
||||
.await;
|
||||
assert_eq!(res.status(), StatusCode::OK);
|
||||
|
||||
@@ -1,20 +1,18 @@
|
||||
use arrow::array::{Int64Array, UInt64Array};
|
||||
use common_query::Output;
|
||||
use common_recordbatch::util;
|
||||
use datafusion::arrow_print;
|
||||
use datafusion_common::record_batch::RecordBatch as DfRecordBatch;
|
||||
use datatypes::arrow_array::StringArray;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
|
||||
use crate::instance::Instance;
|
||||
use crate::tests::test_util;
|
||||
|
||||
#[tokio::test]
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_execute_insert() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
|
||||
let (opts, _guard) = test_util::create_tmp_dir_and_datanode_opts("execute_insert");
|
||||
let instance = Instance::new(&opts).await.unwrap();
|
||||
let instance = Instance::with_mock_meta_client(&opts).await.unwrap();
|
||||
instance.start().await.unwrap();
|
||||
|
||||
test_util::create_test_table(&instance, ConcreteDataType::timestamp_millis_datatype())
|
||||
@@ -33,12 +31,12 @@ async fn test_execute_insert() {
|
||||
assert!(matches!(output, Output::AffectedRows(2)));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_execute_insert_query_with_i64_timestamp() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
|
||||
let (opts, _guard) = test_util::create_tmp_dir_and_datanode_opts("insert_query_i64_timestamp");
|
||||
let instance = Instance::new(&opts).await.unwrap();
|
||||
let instance = Instance::with_mock_meta_client(&opts).await.unwrap();
|
||||
instance.start().await.unwrap();
|
||||
|
||||
test_util::create_test_table(&instance, ConcreteDataType::int64_datatype())
|
||||
@@ -72,10 +70,10 @@ async fn test_execute_insert_query_with_i64_timestamp() {
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_execute_query() {
|
||||
let (opts, _guard) = test_util::create_tmp_dir_and_datanode_opts("execute_query");
|
||||
let instance = Instance::new(&opts).await.unwrap();
|
||||
let instance = Instance::with_mock_meta_client(&opts).await.unwrap();
|
||||
instance.start().await.unwrap();
|
||||
|
||||
let output = instance
|
||||
@@ -98,11 +96,11 @@ async fn test_execute_query() {
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_execute_show_databases_tables() {
|
||||
let (opts, _guard) =
|
||||
test_util::create_tmp_dir_and_datanode_opts("execute_show_databases_tables");
|
||||
let instance = Instance::new(&opts).await.unwrap();
|
||||
let instance = Instance::with_mock_meta_client(&opts).await.unwrap();
|
||||
instance.start().await.unwrap();
|
||||
|
||||
let output = instance.execute_sql("show databases").await.unwrap();
|
||||
@@ -188,12 +186,12 @@ async fn test_execute_show_databases_tables() {
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
pub async fn test_execute_create() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
|
||||
let (opts, _guard) = test_util::create_tmp_dir_and_datanode_opts("execute_create");
|
||||
let instance = Instance::new(&opts).await.unwrap();
|
||||
let instance = Instance::with_mock_meta_client(&opts).await.unwrap();
|
||||
instance.start().await.unwrap();
|
||||
|
||||
let output = instance
|
||||
@@ -212,13 +210,13 @@ pub async fn test_execute_create() {
|
||||
assert!(matches!(output, Output::AffectedRows(1)));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
pub async fn test_create_table_illegal_timestamp_type() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
|
||||
let (opts, _guard) =
|
||||
test_util::create_tmp_dir_and_datanode_opts("create_table_illegal_timestamp_type");
|
||||
let instance = Instance::new(&opts).await.unwrap();
|
||||
let instance = Instance::with_mock_meta_client(&opts).await.unwrap();
|
||||
instance.start().await.unwrap();
|
||||
|
||||
let output = instance
|
||||
@@ -244,6 +242,8 @@ pub async fn test_create_table_illegal_timestamp_type() {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_alter_table() {
|
||||
use datafusion::arrow_print;
|
||||
use datafusion_common::record_batch::RecordBatch as DfRecordBatch;
|
||||
// TODO(LFC) Use real Mito engine when we can alter its region schema,
|
||||
// and delete the `new_mock` method.
|
||||
let instance = Instance::new_mock().await.unwrap();
|
||||
|
||||
@@ -72,6 +72,7 @@ pub async fn create_test_table(instance: &Instance, ts_type: ConcreteDataType) -
|
||||
create_if_not_exists: true,
|
||||
primary_key_indices: vec![3, 0], // "host" and "ts" are primary keys
|
||||
table_options: HashMap::new(),
|
||||
region_numbers: vec![0],
|
||||
},
|
||||
)
|
||||
.await
|
||||
|
||||
@@ -7,6 +7,7 @@ edition = "2021"
|
||||
api = { path = "../api" }
|
||||
async-stream = "0.3"
|
||||
async-trait = "0.1"
|
||||
catalog = { path = "../catalog" }
|
||||
client = { path = "../client" }
|
||||
common-base = { path = "../common/base" }
|
||||
common-error = { path = "../common/error" }
|
||||
@@ -16,10 +17,14 @@ common-recordbatch = { path = "../common/recordbatch" }
|
||||
common-runtime = { path = "../common/runtime" }
|
||||
common-telemetry = { path = "../common/telemetry" }
|
||||
common-time = { path = "../common/time" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = ["simd"] }
|
||||
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
|
||||
datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
|
||||
datatypes = { path = "../datatypes" }
|
||||
itertools = "0.10"
|
||||
openmetrics-parser = "0.4"
|
||||
prost = "0.11"
|
||||
query = { path = "../query" }
|
||||
serde = "1.0"
|
||||
servers = { path = "../servers" }
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
@@ -34,8 +39,6 @@ version = "0.10"
|
||||
features = ["io_csv", "io_json", "io_parquet", "io_parquet_compression", "io_ipc", "ahash", "compute", "serde_types"]
|
||||
|
||||
[dev-dependencies]
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = ["simd"] }
|
||||
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
|
||||
datanode = { path = "../datanode" }
|
||||
futures = "0.3"
|
||||
tempdir = "0.3"
|
||||
|
||||
@@ -1,6 +1,11 @@
|
||||
use std::any::Any;
|
||||
|
||||
use common_error::prelude::*;
|
||||
use common_query::logical_plan::Expr;
|
||||
use datafusion_common::ScalarValue;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::mock::DatanodeId;
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
@@ -83,6 +88,17 @@ pub enum Error {
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Failed to convert DataFusion's ScalarValue: {:?}, source: {}",
|
||||
value,
|
||||
source
|
||||
))]
|
||||
ConvertScalarValue {
|
||||
value: ScalarValue,
|
||||
#[snafu(backtrace)]
|
||||
source: datatypes::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to find partition column: {}", column_name))]
|
||||
FindPartitionColumn {
|
||||
column_name: String,
|
||||
@@ -95,6 +111,24 @@ pub enum Error {
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to find regions by filters: {:?}", filters))]
|
||||
FindRegions {
|
||||
filters: Vec<Expr>,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to find Datanode by region: {:?}", region))]
|
||||
FindDatanode {
|
||||
region: RegionId,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to get Datanode instance: {:?}", datanode))]
|
||||
DatanodeInstance {
|
||||
datanode: DatanodeId,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Invaild InsertRequest, reason: {}", reason))]
|
||||
InvalidInsertRequest {
|
||||
reason: String,
|
||||
@@ -107,6 +141,12 @@ pub enum Error {
|
||||
actual: usize,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to join task, source: {}", source))]
|
||||
JoinTask {
|
||||
source: common_runtime::JoinError,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -118,19 +158,31 @@ impl ErrorExt for Error {
|
||||
| Error::ParseAddr { .. }
|
||||
| Error::InvalidSql { .. }
|
||||
| Error::FindRegion { .. }
|
||||
| Error::FindRegions { .. }
|
||||
| Error::InvalidInsertRequest { .. }
|
||||
| Error::FindPartitionColumn { .. }
|
||||
| Error::RegionKeysSize { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
Error::RuntimeResource { source, .. } => source.status_code(),
|
||||
|
||||
Error::StartServer { source, .. } => source.status_code(),
|
||||
|
||||
Error::ParseSql { source } => source.status_code(),
|
||||
Error::ConvertColumnDefaultConstraint { source, .. } => source.status_code(),
|
||||
|
||||
Error::ConvertColumnDefaultConstraint { source, .. }
|
||||
| Error::ConvertScalarValue { source, .. } => source.status_code(),
|
||||
|
||||
Error::RequestDatanode { source } => source.status_code(),
|
||||
Error::ColumnDataType { .. } => StatusCode::Internal,
|
||||
|
||||
Error::ColumnDataType { .. }
|
||||
| Error::FindDatanode { .. }
|
||||
| Error::DatanodeInstance { .. } => StatusCode::Internal,
|
||||
|
||||
Error::IllegalFrontendState { .. } | Error::IncompleteGrpcResult { .. } => {
|
||||
StatusCode::Unexpected
|
||||
}
|
||||
Error::ExecOpentsdbPut { .. } => StatusCode::Internal,
|
||||
Error::JoinTask { .. } => StatusCode::Unexpected,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -161,7 +161,10 @@ fn create_to_expr(create: CreateTable) -> Result<CreateExpr> {
|
||||
primary_keys: find_primary_keys(&create.constraints)?,
|
||||
create_if_not_exists: create.if_not_exists,
|
||||
// TODO(LFC): Fill in other table options.
|
||||
table_options: HashMap::from([("engine".to_string(), create.engine)]),
|
||||
table_options: HashMap::from([
|
||||
("engine".to_string(), create.engine),
|
||||
("region_id".to_string(), "0".to_string()),
|
||||
]),
|
||||
..Default::default()
|
||||
};
|
||||
Ok(expr)
|
||||
@@ -282,8 +285,6 @@ mod tests {
|
||||
admin_expr, admin_result, column, column::SemanticType, object_expr, object_result,
|
||||
select_expr, Column, ExprHeader, MutateResult, SelectExpr,
|
||||
};
|
||||
use datafusion::arrow_print;
|
||||
use datafusion_common::record_batch::RecordBatch as DfRecordBatch;
|
||||
use datatypes::schema::ColumnDefaultConstraint;
|
||||
use datatypes::value::Value;
|
||||
|
||||
@@ -324,12 +325,7 @@ mod tests {
|
||||
let output = SqlQueryHandler::do_query(&*instance, sql).await.unwrap();
|
||||
match output {
|
||||
Output::RecordBatches(recordbatches) => {
|
||||
let recordbatches = recordbatches
|
||||
.take()
|
||||
.into_iter()
|
||||
.map(|r| r.df_recordbatch)
|
||||
.collect::<Vec<DfRecordBatch>>();
|
||||
let pretty_print = arrow_print::write(&recordbatches);
|
||||
let pretty_print = recordbatches.pretty_print();
|
||||
let pretty_print = pretty_print.lines().collect::<Vec<&str>>();
|
||||
let expected = vec![
|
||||
"+----------------+---------------------+-----+--------+-----------+",
|
||||
@@ -349,12 +345,7 @@ mod tests {
|
||||
let output = SqlQueryHandler::do_query(&*instance, sql).await.unwrap();
|
||||
match output {
|
||||
Output::RecordBatches(recordbatches) => {
|
||||
let recordbatches = recordbatches
|
||||
.take()
|
||||
.into_iter()
|
||||
.map(|r| r.df_recordbatch)
|
||||
.collect::<Vec<DfRecordBatch>>();
|
||||
let pretty_print = arrow_print::write(&recordbatches);
|
||||
let pretty_print = recordbatches.pretty_print();
|
||||
let pretty_print = pretty_print.lines().collect::<Vec<&str>>();
|
||||
let expected = vec![
|
||||
"+----------------+---------------------+-----+--------+-----------+",
|
||||
@@ -550,12 +541,15 @@ mod tests {
|
||||
default_constraint: None,
|
||||
},
|
||||
];
|
||||
let mut table_options = HashMap::with_capacity(1);
|
||||
table_options.insert("region_id".to_string(), "0".to_string());
|
||||
CreateExpr {
|
||||
table_name: "demo".to_string(),
|
||||
column_defs,
|
||||
time_index: "ts".to_string(),
|
||||
primary_keys: vec!["ts".to_string(), "host".to_string()],
|
||||
create_if_not_exists: true,
|
||||
table_options,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ pub mod frontend;
|
||||
pub mod grpc;
|
||||
pub mod influxdb;
|
||||
pub mod instance;
|
||||
pub(crate) mod mock;
|
||||
pub mod mysql;
|
||||
pub mod opentsdb;
|
||||
pub mod partitioning;
|
||||
@@ -12,5 +13,6 @@ pub mod postgres;
|
||||
pub mod prometheus;
|
||||
mod server;
|
||||
pub mod spliter;
|
||||
mod table;
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
175
src/frontend/src/mock.rs
Normal file
175
src/frontend/src/mock.rs
Normal file
@@ -0,0 +1,175 @@
|
||||
// FIXME(LFC): no mock
|
||||
|
||||
use std::fmt::Formatter;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::InsertExpr;
|
||||
use catalog::CatalogManagerRef;
|
||||
use client::ObjectResult;
|
||||
use client::{Database, Select};
|
||||
use common_query::prelude::Expr;
|
||||
use common_query::Output;
|
||||
use common_recordbatch::util;
|
||||
use common_recordbatch::RecordBatches;
|
||||
use datafusion::logical_plan::{LogicalPlan as DfLogicPlan, LogicalPlanBuilder};
|
||||
use datafusion_expr::Expr as DfExpr;
|
||||
use datatypes::prelude::Value;
|
||||
use datatypes::schema::SchemaRef;
|
||||
use query::plan::LogicalPlan;
|
||||
use table::table::adapter::DfTableProviderAdapter;
|
||||
|
||||
pub(crate) type DatanodeId = u64;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct DatanodeInstance {
|
||||
pub(crate) datanode_id: DatanodeId,
|
||||
catalog_manager: CatalogManagerRef,
|
||||
db: Database,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for DatanodeInstance {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str("DatanodeInstance")
|
||||
}
|
||||
}
|
||||
|
||||
impl DatanodeInstance {
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn new(
|
||||
datanode_id: DatanodeId,
|
||||
catalog_manager: CatalogManagerRef,
|
||||
db: Database,
|
||||
) -> Self {
|
||||
Self {
|
||||
datanode_id,
|
||||
catalog_manager,
|
||||
db,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn grpc_insert(&self, request: InsertExpr) -> client::Result<ObjectResult> {
|
||||
self.db.insert(request).await
|
||||
}
|
||||
|
||||
#[allow(clippy::print_stdout)]
|
||||
pub(crate) async fn grpc_table_scan(&self, plan: TableScanPlan) -> RecordBatches {
|
||||
let logical_plan = self.build_logical_plan(&plan);
|
||||
|
||||
// TODO(LFC): Directly pass in logical plan to GRPC interface when our substrait codec supports filter.
|
||||
let sql = to_sql(logical_plan);
|
||||
println!("executing sql \"{}\" in datanode {}", sql, self.datanode_id);
|
||||
let result = self.db.select(Select::Sql(sql)).await.unwrap();
|
||||
|
||||
let output: Output = result.try_into().unwrap();
|
||||
let recordbatches = match output {
|
||||
Output::Stream(stream) => util::collect(stream).await.unwrap(),
|
||||
Output::RecordBatches(x) => x.take(),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let schema = recordbatches.first().unwrap().schema.clone();
|
||||
RecordBatches::try_new(schema, recordbatches).unwrap()
|
||||
}
|
||||
|
||||
fn build_logical_plan(&self, table_scan: &TableScanPlan) -> LogicalPlan {
|
||||
let catalog = self.catalog_manager.catalog("greptime").unwrap().unwrap();
|
||||
let schema = catalog.schema("public").unwrap().unwrap();
|
||||
let table = schema.table(&table_scan.table_name).unwrap().unwrap();
|
||||
let table_provider = Arc::new(DfTableProviderAdapter::new(table.clone()));
|
||||
|
||||
let mut builder = LogicalPlanBuilder::scan_with_filters(
|
||||
table_scan.table_name.clone(),
|
||||
table_provider,
|
||||
table_scan.projection.clone(),
|
||||
table_scan
|
||||
.filters
|
||||
.iter()
|
||||
.map(|x| x.df_expr().clone())
|
||||
.collect::<Vec<_>>(),
|
||||
)
|
||||
.unwrap();
|
||||
if let Some(limit) = table_scan.limit {
|
||||
builder = builder.limit(limit).unwrap();
|
||||
}
|
||||
|
||||
let plan = builder.build().unwrap();
|
||||
LogicalPlan::DfPlan(plan)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct TableScanPlan {
|
||||
pub table_name: String,
|
||||
pub projection: Option<Vec<usize>>,
|
||||
pub filters: Vec<Expr>,
|
||||
pub limit: Option<usize>,
|
||||
}
|
||||
|
||||
fn to_sql(plan: LogicalPlan) -> String {
|
||||
let LogicalPlan::DfPlan(plan) = plan;
|
||||
let table_scan = match plan {
|
||||
DfLogicPlan::TableScan(table_scan) => table_scan,
|
||||
_ => unreachable!("unknown plan: {:?}", plan),
|
||||
};
|
||||
|
||||
let schema: SchemaRef = Arc::new(table_scan.source.schema().try_into().unwrap());
|
||||
let projection = table_scan
|
||||
.projection
|
||||
.map(|x| {
|
||||
x.iter()
|
||||
.map(|i| schema.column_name_by_index(*i).to_string())
|
||||
.collect::<Vec<String>>()
|
||||
})
|
||||
.unwrap_or_else(|| {
|
||||
schema
|
||||
.column_schemas()
|
||||
.iter()
|
||||
.map(|x| x.name.clone())
|
||||
.collect::<Vec<String>>()
|
||||
})
|
||||
.join(", ");
|
||||
|
||||
let mut sql = format!("select {} from {}", projection, &table_scan.table_name);
|
||||
|
||||
let filters = table_scan
|
||||
.filters
|
||||
.iter()
|
||||
.map(expr_to_sql)
|
||||
.collect::<Vec<String>>()
|
||||
.join(" AND ");
|
||||
if !filters.is_empty() {
|
||||
sql.push_str(" where ");
|
||||
sql.push_str(&filters);
|
||||
}
|
||||
|
||||
if let Some(limit) = table_scan.limit {
|
||||
sql.push_str(" limit ");
|
||||
sql.push_str(&limit.to_string());
|
||||
}
|
||||
sql
|
||||
}
|
||||
|
||||
fn expr_to_sql(expr: &DfExpr) -> String {
|
||||
match expr {
|
||||
DfExpr::BinaryExpr {
|
||||
ref left,
|
||||
ref right,
|
||||
ref op,
|
||||
} => format!(
|
||||
"{} {} {}",
|
||||
expr_to_sql(left.as_ref()),
|
||||
op,
|
||||
expr_to_sql(right.as_ref())
|
||||
),
|
||||
DfExpr::Column(c) => c.name.clone(),
|
||||
DfExpr::Literal(sv) => {
|
||||
let v: Value = Value::try_from(sv.clone()).unwrap();
|
||||
if v.data_type().is_string() {
|
||||
format!("'{}'", sv)
|
||||
} else {
|
||||
format!("{}", sv)
|
||||
}
|
||||
}
|
||||
_ => unimplemented!("not implemented for {:?}", expr),
|
||||
}
|
||||
}
|
||||
@@ -1,13 +1,16 @@
|
||||
mod columns;
|
||||
mod range;
|
||||
pub(crate) mod range;
|
||||
|
||||
use std::fmt::Debug;
|
||||
use std::sync::Arc;
|
||||
|
||||
pub use datafusion_expr::Operator;
|
||||
use datatypes::prelude::Value;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
pub trait PartitionRule {
|
||||
pub(crate) type PartitionRuleRef<E> = Arc<dyn PartitionRule<Error = E>>;
|
||||
|
||||
pub trait PartitionRule: Sync + Send {
|
||||
type Error: Debug;
|
||||
|
||||
fn partition_columns(&self) -> Vec<String>;
|
||||
@@ -36,6 +39,14 @@ pub struct PartitionExpr {
|
||||
}
|
||||
|
||||
impl PartitionExpr {
|
||||
pub(crate) fn new(column: impl Into<String>, op: Operator, value: Value) -> Self {
|
||||
Self {
|
||||
column: column.into(),
|
||||
op,
|
||||
value,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn value(&self) -> &Value {
|
||||
&self.value
|
||||
}
|
||||
|
||||
@@ -67,6 +67,19 @@ impl RangeColumnsPartitionRule {
|
||||
value_lists: Vec<Vec<PartitionBound>>,
|
||||
regions: Vec<RegionId>,
|
||||
) -> Self {
|
||||
// An example range columns partition rule to calculate the first column bounds and regions:
|
||||
// SQL:
|
||||
// PARTITION p1 VALUES LESS THAN (10, 'c'),
|
||||
// PARTITION p2 VALUES LESS THAN (20, 'h'),
|
||||
// PARTITION p3 VALUES LESS THAN (20, 'm'),
|
||||
// PARTITION p4 VALUES LESS THAN (50, 'p'),
|
||||
// PARTITION p5 VALUES LESS THAN (MAXVALUE, 'x'),
|
||||
// PARTITION p6 VALUES LESS THAN (MAXVALUE, MAXVALUE),
|
||||
// first column bounds:
|
||||
// [10, 20, 50, MAXVALUE]
|
||||
// first column regions:
|
||||
// [[1], [2, 3], [4], [5, 6]]
|
||||
|
||||
let first_column_bounds = value_lists
|
||||
.iter()
|
||||
.map(|x| &x[0])
|
||||
@@ -136,16 +149,6 @@ impl PartitionRule for RangeColumnsPartitionRule {
|
||||
// "unwrap" is safe because we have checked that "self.column_list" contains all columns in "exprs"
|
||||
.unwrap();
|
||||
|
||||
// an example of bounds and regions:
|
||||
// SQL:
|
||||
// PARTITION p1 VALUES LESS THAN (10, 'c'),
|
||||
// PARTITION p2 VALUES LESS THAN (20, 'h'),
|
||||
// PARTITION p3 VALUES LESS THAN (20, 'm'),
|
||||
// PARTITION p4 VALUES LESS THAN (50, 'p'),
|
||||
// PARTITION p5 VALUES LESS THAN (MAXVALUE, 'x'),
|
||||
// PARTITION p6 VALUES LESS THAN (MAXVALUE, MAXVALUE),
|
||||
// bounds: [10, 20, 50, MAXVALUE]
|
||||
// regions: [[1], [2, 3], [4], [5, 6]]
|
||||
let regions = &self.first_column_regions;
|
||||
match self
|
||||
.first_column_bounds
|
||||
|
||||
@@ -41,7 +41,7 @@ use crate::partitioning::{Operator, PartitionExpr, PartitionRule, RegionId};
|
||||
///
|
||||
// TODO(LFC): Further clarify "partition" and "region".
|
||||
// Could be creating an extra layer between partition and region.
|
||||
struct RangePartitionRule {
|
||||
pub(crate) struct RangePartitionRule {
|
||||
column_name: String,
|
||||
// Does not store the last "MAXVALUE" bound; because in this way our binary search in finding
|
||||
// partitions are easier (besides, it's hard to represent "MAXVALUE" in our `Value`).
|
||||
@@ -51,6 +51,20 @@ struct RangePartitionRule {
|
||||
}
|
||||
|
||||
impl RangePartitionRule {
|
||||
// FIXME(LFC): no allow, for clippy temporarily
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn new(
|
||||
column_name: impl Into<String>,
|
||||
bounds: Vec<Value>,
|
||||
regions: Vec<RegionId>,
|
||||
) -> Self {
|
||||
Self {
|
||||
column_name: column_name.into(),
|
||||
bounds,
|
||||
regions,
|
||||
}
|
||||
}
|
||||
|
||||
fn column_name(&self) -> &String {
|
||||
&self.column_name
|
||||
}
|
||||
@@ -72,6 +86,9 @@ impl PartitionRule for RangePartitionRule {
|
||||
}
|
||||
|
||||
fn find_regions(&self, exprs: &[PartitionExpr]) -> Result<Vec<RegionId>, Self::Error> {
|
||||
if exprs.is_empty() {
|
||||
return Ok(self.regions.clone());
|
||||
}
|
||||
debug_assert_eq!(
|
||||
exprs.len(),
|
||||
1,
|
||||
|
||||
@@ -8,23 +8,21 @@ use snafu::OptionExt;
|
||||
use store_api::storage::RegionId;
|
||||
use table::requests::InsertRequest;
|
||||
|
||||
use crate::error::Error;
|
||||
use crate::error::FindPartitionColumnSnafu;
|
||||
use crate::error::FindRegionSnafu;
|
||||
use crate::error::InvalidInsertRequestSnafu;
|
||||
use crate::error::Result;
|
||||
use crate::partitioning::PartitionRule;
|
||||
use crate::partitioning::PartitionRuleRef;
|
||||
|
||||
pub type DistInsertRequest = HashMap<RegionId, InsertRequest>;
|
||||
|
||||
pub struct WriteSpliter<'a, P> {
|
||||
partition_rule: &'a P,
|
||||
pub struct WriteSpliter {
|
||||
partition_rule: PartitionRuleRef<Error>,
|
||||
}
|
||||
|
||||
impl<'a, P> WriteSpliter<'a, P>
|
||||
where
|
||||
P: PartitionRule,
|
||||
{
|
||||
pub fn with_patition_rule(rule: &'a P) -> Self {
|
||||
impl WriteSpliter {
|
||||
pub fn with_patition_rule(rule: PartitionRuleRef<Error>) -> Self {
|
||||
Self {
|
||||
partition_rule: rule,
|
||||
}
|
||||
@@ -156,7 +154,7 @@ fn partition_insert_request(
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::{collections::HashMap, result::Result};
|
||||
use std::{collections::HashMap, result::Result, sync::Arc};
|
||||
|
||||
use datatypes::{
|
||||
data_type::ConcreteDataType,
|
||||
@@ -167,10 +165,13 @@ mod tests {
|
||||
use table::requests::InsertRequest;
|
||||
|
||||
use super::{
|
||||
check_req, find_partitioning_values, partition_insert_request, partition_values,
|
||||
PartitionRule, RegionId, WriteSpliter,
|
||||
check_req, find_partitioning_values, partition_insert_request, partition_values, RegionId,
|
||||
WriteSpliter,
|
||||
};
|
||||
use crate::{
|
||||
error::Error,
|
||||
partitioning::{PartitionExpr, PartitionRule, PartitionRuleRef},
|
||||
};
|
||||
use crate::partitioning::PartitionExpr;
|
||||
|
||||
#[test]
|
||||
fn test_insert_req_check() {
|
||||
@@ -186,7 +187,8 @@ mod tests {
|
||||
#[test]
|
||||
fn test_writer_spliter() {
|
||||
let insert = mock_insert_request();
|
||||
let spliter = WriteSpliter::with_patition_rule(&MockPartitionRule);
|
||||
let rule = Arc::new(MockPartitionRule) as PartitionRuleRef<Error>;
|
||||
let spliter = WriteSpliter::with_patition_rule(rule);
|
||||
let ret = spliter.split(insert).unwrap();
|
||||
|
||||
assert_eq!(2, ret.len());
|
||||
@@ -406,7 +408,7 @@ mod tests {
|
||||
// PARTITION r1 VALUES IN(2, 3),
|
||||
// );
|
||||
impl PartitionRule for MockPartitionRule {
|
||||
type Error = String;
|
||||
type Error = Error;
|
||||
|
||||
fn partition_columns(&self) -> Vec<String> {
|
||||
vec!["id".to_string()]
|
||||
|
||||
622
src/frontend/src/table.rs
Normal file
622
src/frontend/src/table.rs
Normal file
@@ -0,0 +1,622 @@
|
||||
mod insert;
|
||||
|
||||
use std::any::Any;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_query::error::Result as QueryResult;
|
||||
use common_query::logical_plan::Expr;
|
||||
use common_query::physical_plan::{PhysicalPlan, PhysicalPlanRef};
|
||||
use common_recordbatch::{RecordBatches, SendableRecordBatchStream};
|
||||
use datafusion::execution::runtime_env::RuntimeEnv;
|
||||
use datafusion::logical_plan::Expr as DfExpr;
|
||||
use datafusion::physical_plan::Partitioning;
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use snafu::prelude::*;
|
||||
use store_api::storage::RegionId;
|
||||
use table::error::Error as TableError;
|
||||
use table::metadata::{FilterPushDownType, TableInfoRef};
|
||||
use table::requests::InsertRequest;
|
||||
use table::Table;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use crate::error::{self, Error, Result};
|
||||
use crate::mock::{DatanodeId, DatanodeInstance, TableScanPlan};
|
||||
use crate::partitioning::{Operator, PartitionExpr, PartitionRuleRef};
|
||||
use crate::spliter::WriteSpliter;
|
||||
|
||||
struct DistTable {
|
||||
table_name: String,
|
||||
schema: SchemaRef,
|
||||
partition_rule: PartitionRuleRef<Error>,
|
||||
region_dist_map: HashMap<RegionId, DatanodeId>,
|
||||
datanode_instances: HashMap<DatanodeId, DatanodeInstance>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Table for DistTable {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.schema.clone()
|
||||
}
|
||||
|
||||
fn table_info(&self) -> TableInfoRef {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn insert(&self, request: InsertRequest) -> table::Result<usize> {
|
||||
let spliter = WriteSpliter::with_patition_rule(self.partition_rule.clone());
|
||||
let inserts = spliter.split(request).map_err(TableError::new)?;
|
||||
let result = match self.dist_insert(inserts).await.map_err(TableError::new)? {
|
||||
client::ObjectResult::Select(_) => unreachable!(),
|
||||
client::ObjectResult::Mutate(result) => result,
|
||||
};
|
||||
Ok(result.success as usize)
|
||||
}
|
||||
|
||||
async fn scan(
|
||||
&self,
|
||||
projection: &Option<Vec<usize>>,
|
||||
filters: &[Expr],
|
||||
limit: Option<usize>,
|
||||
) -> table::Result<PhysicalPlanRef> {
|
||||
let regions = self.find_regions(filters).map_err(TableError::new)?;
|
||||
let datanodes = self.find_datanodes(regions).map_err(TableError::new)?;
|
||||
|
||||
let partition_execs = datanodes
|
||||
.iter()
|
||||
.map(|(datanode, _regions)| {
|
||||
let datanode_instance = self
|
||||
.datanode_instances
|
||||
.get(datanode)
|
||||
.context(error::DatanodeInstanceSnafu {
|
||||
datanode: *datanode,
|
||||
})?
|
||||
.clone();
|
||||
// TODO(LFC): Pass in "regions" when Datanode supports multi regions for a table.
|
||||
Ok(PartitionExec {
|
||||
table_name: self.table_name.clone(),
|
||||
datanode_instance,
|
||||
projection: projection.clone(),
|
||||
filters: filters.to_vec(),
|
||||
limit,
|
||||
batches: Arc::new(RwLock::new(None)),
|
||||
})
|
||||
})
|
||||
.collect::<Result<Vec<PartitionExec>>>()
|
||||
.map_err(TableError::new)?;
|
||||
|
||||
let dist_scan = DistTableScan {
|
||||
schema: project_schema(self.schema(), projection),
|
||||
partition_execs,
|
||||
};
|
||||
Ok(Arc::new(dist_scan))
|
||||
}
|
||||
|
||||
fn supports_filter_pushdown(&self, _filter: &Expr) -> table::Result<FilterPushDownType> {
|
||||
Ok(FilterPushDownType::Inexact)
|
||||
}
|
||||
}
|
||||
|
||||
impl DistTable {
|
||||
// TODO(LFC): Finding regions now seems less efficient, should be further looked into.
|
||||
fn find_regions(&self, filters: &[Expr]) -> Result<Vec<RegionId>> {
|
||||
let regions = if let Some((first, rest)) = filters.split_first() {
|
||||
let mut target = self.find_regions0(first)?;
|
||||
for filter in rest {
|
||||
let regions = self.find_regions0(filter)?;
|
||||
|
||||
// When all filters are provided as a collection, it often implicitly states that
|
||||
// "all filters must be satisfied". So we join all the results here.
|
||||
target.retain(|x| regions.contains(x));
|
||||
|
||||
// Failed fast, empty collection join any is empty.
|
||||
if target.is_empty() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
target.into_iter().collect::<Vec<RegionId>>()
|
||||
} else {
|
||||
self.partition_rule.find_regions(&[])?
|
||||
};
|
||||
ensure!(
|
||||
!regions.is_empty(),
|
||||
error::FindRegionsSnafu {
|
||||
filters: filters.to_vec()
|
||||
}
|
||||
);
|
||||
Ok(regions)
|
||||
}
|
||||
|
||||
// TODO(LFC): Support other types of filter expr:
|
||||
// - BETWEEN and IN (maybe more)
|
||||
// - expr with arithmetic like "a + 1 < 10" (should have been optimized in logic plan?)
|
||||
// - not comparison or neither "AND" nor "OR" operations, for example, "a LIKE x"
|
||||
fn find_regions0(&self, filter: &Expr) -> Result<HashSet<RegionId>> {
|
||||
let expr = filter.df_expr();
|
||||
match expr {
|
||||
DfExpr::BinaryExpr { left, op, right } if is_compare_op(op) => {
|
||||
let column_op_value = match (left.as_ref(), right.as_ref()) {
|
||||
(DfExpr::Column(c), DfExpr::Literal(v)) => Some((&c.name, *op, v)),
|
||||
(DfExpr::Literal(v), DfExpr::Column(c)) => {
|
||||
Some((&c.name, reverse_operator(op), v))
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
if let Some((column, op, sv)) = column_op_value {
|
||||
let value = sv
|
||||
.clone()
|
||||
.try_into()
|
||||
.with_context(|_| error::ConvertScalarValueSnafu { value: sv.clone() })?;
|
||||
return Ok(self
|
||||
.partition_rule
|
||||
.find_regions(&[PartitionExpr::new(column, op, value)])?
|
||||
.into_iter()
|
||||
.collect::<HashSet<RegionId>>());
|
||||
}
|
||||
}
|
||||
DfExpr::BinaryExpr { left, op, right }
|
||||
if matches!(op, Operator::And | Operator::Or) =>
|
||||
{
|
||||
let left_regions = self.find_regions0(&(*left.clone()).into())?;
|
||||
let right_regions = self.find_regions0(&(*right.clone()).into())?;
|
||||
let regions = match op {
|
||||
Operator::And => left_regions
|
||||
.intersection(&right_regions)
|
||||
.cloned()
|
||||
.collect::<HashSet<RegionId>>(),
|
||||
Operator::Or => left_regions
|
||||
.union(&right_regions)
|
||||
.cloned()
|
||||
.collect::<HashSet<RegionId>>(),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
return Ok(regions);
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
// Returns all regions for not supported partition expr as a safety hatch.
|
||||
Ok(self
|
||||
.partition_rule
|
||||
.find_regions(&[])?
|
||||
.into_iter()
|
||||
.collect::<HashSet<RegionId>>())
|
||||
}
|
||||
|
||||
fn find_datanodes(&self, regions: Vec<RegionId>) -> Result<HashMap<DatanodeId, Vec<RegionId>>> {
|
||||
let mut datanodes = HashMap::new();
|
||||
for region in regions.iter() {
|
||||
let datanode = *self
|
||||
.region_dist_map
|
||||
.get(region)
|
||||
.context(error::FindDatanodeSnafu { region: *region })?;
|
||||
datanodes
|
||||
.entry(datanode)
|
||||
.or_insert_with(Vec::new)
|
||||
.push(*region);
|
||||
}
|
||||
Ok(datanodes)
|
||||
}
|
||||
}
|
||||
|
||||
fn project_schema(table_schema: SchemaRef, projection: &Option<Vec<usize>>) -> SchemaRef {
|
||||
if let Some(projection) = &projection {
|
||||
let columns = table_schema.column_schemas();
|
||||
let projected = projection
|
||||
.iter()
|
||||
.map(|x| columns[*x].clone())
|
||||
.collect::<Vec<ColumnSchema>>();
|
||||
Arc::new(Schema::new(projected))
|
||||
} else {
|
||||
table_schema
|
||||
}
|
||||
}
|
||||
|
||||
fn is_compare_op(op: &Operator) -> bool {
|
||||
matches!(
|
||||
*op,
|
||||
Operator::Eq
|
||||
| Operator::NotEq
|
||||
| Operator::Lt
|
||||
| Operator::LtEq
|
||||
| Operator::Gt
|
||||
| Operator::GtEq
|
||||
)
|
||||
}
|
||||
|
||||
fn reverse_operator(op: &Operator) -> Operator {
|
||||
match *op {
|
||||
Operator::Lt => Operator::Gt,
|
||||
Operator::Gt => Operator::Lt,
|
||||
Operator::LtEq => Operator::GtEq,
|
||||
Operator::GtEq => Operator::LtEq,
|
||||
_ => *op,
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct DistTableScan {
|
||||
schema: SchemaRef,
|
||||
partition_execs: Vec<PartitionExec>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl PhysicalPlan for DistTableScan {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.schema.clone()
|
||||
}
|
||||
|
||||
fn output_partitioning(&self) -> Partitioning {
|
||||
Partitioning::UnknownPartitioning(self.partition_execs.len())
|
||||
}
|
||||
|
||||
fn children(&self) -> Vec<PhysicalPlanRef> {
|
||||
vec![]
|
||||
}
|
||||
|
||||
fn with_new_children(&self, _children: Vec<PhysicalPlanRef>) -> QueryResult<PhysicalPlanRef> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn execute(
|
||||
&self,
|
||||
partition: usize,
|
||||
_runtime: Arc<RuntimeEnv>,
|
||||
) -> QueryResult<SendableRecordBatchStream> {
|
||||
let exec = &self.partition_execs[partition];
|
||||
exec.maybe_init().await;
|
||||
Ok(exec.as_stream().await)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct PartitionExec {
|
||||
table_name: String,
|
||||
datanode_instance: DatanodeInstance,
|
||||
projection: Option<Vec<usize>>,
|
||||
filters: Vec<Expr>,
|
||||
limit: Option<usize>,
|
||||
batches: Arc<RwLock<Option<RecordBatches>>>,
|
||||
}
|
||||
|
||||
impl PartitionExec {
|
||||
async fn maybe_init(&self) {
|
||||
if self.batches.read().await.is_some() {
|
||||
return;
|
||||
}
|
||||
|
||||
let mut batches = self.batches.write().await;
|
||||
if batches.is_some() {
|
||||
return;
|
||||
}
|
||||
|
||||
let plan = TableScanPlan {
|
||||
table_name: self.table_name.clone(),
|
||||
projection: self.projection.clone(),
|
||||
filters: self.filters.clone(),
|
||||
limit: self.limit,
|
||||
};
|
||||
let result = self.datanode_instance.grpc_table_scan(plan).await;
|
||||
let _ = batches.insert(result);
|
||||
}
|
||||
|
||||
async fn as_stream(&self) -> SendableRecordBatchStream {
|
||||
let batches = self.batches.read().await;
|
||||
batches
|
||||
.as_ref()
|
||||
.expect("should have been initialized in \"maybe_init\"")
|
||||
.as_stream()
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME(LFC): no allow, for clippy temporarily
|
||||
#[allow(clippy::print_stdout)]
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use catalog::RegisterTableRequest;
|
||||
use client::Database;
|
||||
use common_recordbatch::{util, RecordBatch};
|
||||
use datafusion::arrow_print;
|
||||
use datafusion_common::record_batch::RecordBatch as DfRecordBatch;
|
||||
use datafusion_expr::expr_fn::col;
|
||||
use datafusion_expr::expr_fn::{and, binary_expr, or};
|
||||
use datafusion_expr::lit;
|
||||
use datanode::datanode::{DatanodeOptions, ObjectStoreConfig};
|
||||
use datanode::instance::Instance;
|
||||
use datatypes::prelude::{ConcreteDataType, VectorRef};
|
||||
use datatypes::schema::{ColumnSchema, Schema};
|
||||
use datatypes::vectors::{Int32Vector, UInt32Vector};
|
||||
use table::test_util::MemTable;
|
||||
use table::TableRef;
|
||||
use tempdir::TempDir;
|
||||
|
||||
use super::*;
|
||||
use crate::partitioning::range::RangePartitionRule;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_dist_table_scan() {
|
||||
let table = Arc::new(new_dist_table().await);
|
||||
|
||||
// should scan all regions
|
||||
// select * from numbers
|
||||
let projection = None;
|
||||
let filters = vec![];
|
||||
exec_table_scan(table.clone(), projection, filters, None).await;
|
||||
println!();
|
||||
|
||||
// should scan only region 1
|
||||
// select a, row_id from numbers where a < 10
|
||||
let projection = Some(vec![0, 1]);
|
||||
let filters = vec![binary_expr(col("a"), Operator::Lt, lit(10)).into()];
|
||||
exec_table_scan(table.clone(), projection, filters, None).await;
|
||||
println!();
|
||||
|
||||
// should scan region 1 and 2
|
||||
// select a, row_id from numbers where a < 15
|
||||
let projection = Some(vec![0, 1]);
|
||||
let filters = vec![binary_expr(col("a"), Operator::Lt, lit(15)).into()];
|
||||
exec_table_scan(table.clone(), projection, filters, None).await;
|
||||
println!();
|
||||
|
||||
// should scan region 2 and 3
|
||||
// select a, row_id from numbers where a < 40 and a >= 10
|
||||
let projection = Some(vec![0, 1]);
|
||||
let filters = vec![and(
|
||||
binary_expr(col("a"), Operator::Lt, lit(40)),
|
||||
binary_expr(col("a"), Operator::GtEq, lit(10)),
|
||||
)
|
||||
.into()];
|
||||
exec_table_scan(table.clone(), projection, filters, None).await;
|
||||
println!();
|
||||
|
||||
// should scan all regions
|
||||
// select a, row_id from numbers where a < 1000 and row_id == 1
|
||||
let projection = Some(vec![0, 1]);
|
||||
let filters = vec![and(
|
||||
binary_expr(col("a"), Operator::Lt, lit(1000)),
|
||||
binary_expr(col("row_id"), Operator::Eq, lit(1)),
|
||||
)
|
||||
.into()];
|
||||
exec_table_scan(table.clone(), projection, filters, None).await;
|
||||
}
|
||||
|
||||
async fn exec_table_scan(
|
||||
table: TableRef,
|
||||
projection: Option<Vec<usize>>,
|
||||
filters: Vec<Expr>,
|
||||
limit: Option<usize>,
|
||||
) {
|
||||
let table_scan = table
|
||||
.scan(&projection, filters.as_slice(), limit)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
for partition in 0..table_scan.output_partitioning().partition_count() {
|
||||
let result = table_scan
|
||||
.execute(partition, Arc::new(RuntimeEnv::default()))
|
||||
.await
|
||||
.unwrap();
|
||||
let recordbatches = util::collect(result).await.unwrap();
|
||||
|
||||
let df_recordbatch = recordbatches
|
||||
.into_iter()
|
||||
.map(|r| r.df_recordbatch)
|
||||
.collect::<Vec<DfRecordBatch>>();
|
||||
|
||||
println!("DataFusion partition {}:", partition);
|
||||
let pretty_print = arrow_print::write(&df_recordbatch);
|
||||
let pretty_print = pretty_print.lines().collect::<Vec<&str>>();
|
||||
pretty_print.iter().for_each(|x| println!("{}", x));
|
||||
}
|
||||
}
|
||||
|
||||
async fn new_dist_table() -> DistTable {
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
ColumnSchema::new("a", ConcreteDataType::int32_datatype(), true),
|
||||
ColumnSchema::new("row_id", ConcreteDataType::uint32_datatype(), true),
|
||||
]));
|
||||
|
||||
// PARTITION BY RANGE (a) (
|
||||
// PARTITION r1 VALUES LESS THAN (10),
|
||||
// PARTITION r2 VALUES LESS THAN (20),
|
||||
// PARTITION r3 VALUES LESS THAN (50),
|
||||
// PARTITION r4 VALUES LESS THAN (MAXVALUE),
|
||||
// )
|
||||
let partition_rule = RangePartitionRule::new(
|
||||
"a",
|
||||
vec![10_i32.into(), 20_i32.into(), 50_i32.into()],
|
||||
vec![1_u64, 2, 3, 4],
|
||||
);
|
||||
|
||||
let table1 = new_memtable(schema.clone(), (0..5).collect::<Vec<i32>>());
|
||||
let table2 = new_memtable(schema.clone(), (10..15).collect::<Vec<i32>>());
|
||||
let table3 = new_memtable(schema.clone(), (30..35).collect::<Vec<i32>>());
|
||||
let table4 = new_memtable(schema.clone(), (100..105).collect::<Vec<i32>>());
|
||||
|
||||
let instance1 = create_datanode_instance(1, table1).await;
|
||||
let instance2 = create_datanode_instance(2, table2).await;
|
||||
let instance3 = create_datanode_instance(3, table3).await;
|
||||
let instance4 = create_datanode_instance(4, table4).await;
|
||||
|
||||
let datanode_instances = HashMap::from([
|
||||
(instance1.datanode_id, instance1),
|
||||
(instance2.datanode_id, instance2),
|
||||
(instance3.datanode_id, instance3),
|
||||
(instance4.datanode_id, instance4),
|
||||
]);
|
||||
|
||||
DistTable {
|
||||
table_name: "dist_numbers".to_string(),
|
||||
schema,
|
||||
partition_rule: Arc::new(partition_rule),
|
||||
region_dist_map: HashMap::from([(1_u64, 1), (2_u64, 2), (3_u64, 3), (4_u64, 4)]),
|
||||
datanode_instances,
|
||||
}
|
||||
}
|
||||
|
||||
fn new_memtable(schema: SchemaRef, data: Vec<i32>) -> MemTable {
|
||||
let rows = data.len() as u32;
|
||||
let columns: Vec<VectorRef> = vec![
|
||||
// column "a"
|
||||
Arc::new(Int32Vector::from_slice(data)),
|
||||
// column "row_id"
|
||||
Arc::new(UInt32Vector::from_slice((1..=rows).collect::<Vec<u32>>())),
|
||||
];
|
||||
let recordbatch = RecordBatch::new(schema, columns).unwrap();
|
||||
MemTable::new("dist_numbers", recordbatch)
|
||||
}
|
||||
|
||||
async fn create_datanode_instance(
|
||||
datanode_id: DatanodeId,
|
||||
table: MemTable,
|
||||
) -> DatanodeInstance {
|
||||
let wal_tmp_dir = TempDir::new_in("/tmp", "gt_wal_dist_table_test").unwrap();
|
||||
let data_tmp_dir = TempDir::new_in("/tmp", "gt_data_dist_table_test").unwrap();
|
||||
let opts = DatanodeOptions {
|
||||
wal_dir: wal_tmp_dir.path().to_str().unwrap().to_string(),
|
||||
storage: ObjectStoreConfig::File {
|
||||
data_dir: data_tmp_dir.path().to_str().unwrap().to_string(),
|
||||
},
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let instance = Arc::new(Instance::with_mock_meta_client(&opts).await.unwrap());
|
||||
instance.start().await.unwrap();
|
||||
|
||||
let catalog_manager = instance.catalog_manager().clone();
|
||||
catalog_manager
|
||||
.register_table(RegisterTableRequest {
|
||||
catalog: "greptime".to_string(),
|
||||
schema: "public".to_string(),
|
||||
table_name: table.table_name().to_string(),
|
||||
table_id: 1234,
|
||||
table: Arc::new(table),
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let client = crate::tests::create_datanode_client(instance).await;
|
||||
DatanodeInstance::new(
|
||||
datanode_id,
|
||||
catalog_manager,
|
||||
Database::new("greptime", client),
|
||||
)
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread")]
|
||||
async fn test_find_regions() {
|
||||
let table = new_dist_table().await;
|
||||
|
||||
let test = |filters: Vec<Expr>, expect_regions: Vec<u64>| {
|
||||
let mut regions = table.find_regions(filters.as_slice()).unwrap();
|
||||
regions.sort();
|
||||
|
||||
assert_eq!(regions, expect_regions);
|
||||
};
|
||||
|
||||
// test simple filter
|
||||
test(
|
||||
vec![binary_expr(col("a"), Operator::Lt, lit(10)).into()], // a < 10
|
||||
vec![1],
|
||||
);
|
||||
test(
|
||||
vec![binary_expr(col("a"), Operator::LtEq, lit(10)).into()], // a <= 10
|
||||
vec![1, 2],
|
||||
);
|
||||
test(
|
||||
vec![binary_expr(lit(20), Operator::Gt, col("a")).into()], // 20 > a
|
||||
vec![1, 2],
|
||||
);
|
||||
test(
|
||||
vec![binary_expr(lit(20), Operator::GtEq, col("a")).into()], // 20 >= a
|
||||
vec![1, 2, 3],
|
||||
);
|
||||
test(
|
||||
vec![binary_expr(lit(45), Operator::Eq, col("a")).into()], // 45 == a
|
||||
vec![3],
|
||||
);
|
||||
test(
|
||||
vec![binary_expr(col("a"), Operator::NotEq, lit(45)).into()], // a != 45
|
||||
vec![1, 2, 3, 4],
|
||||
);
|
||||
test(
|
||||
vec![binary_expr(col("a"), Operator::Gt, lit(50)).into()], // a > 50
|
||||
vec![4],
|
||||
);
|
||||
|
||||
// test multiple filters
|
||||
test(
|
||||
vec![
|
||||
binary_expr(col("a"), Operator::Gt, lit(10)).into(),
|
||||
binary_expr(col("a"), Operator::Gt, lit(50)).into(),
|
||||
], // [a > 10, a > 50]
|
||||
vec![4],
|
||||
);
|
||||
|
||||
// test finding all regions when provided with not supported filters or not partition column
|
||||
test(
|
||||
vec![binary_expr(col("row_id"), Operator::LtEq, lit(123)).into()], // row_id <= 123
|
||||
vec![1, 2, 3, 4],
|
||||
);
|
||||
test(
|
||||
vec![binary_expr(col("b"), Operator::Like, lit("foo%")).into()], // b LIKE 'foo%'
|
||||
vec![1, 2, 3, 4],
|
||||
);
|
||||
test(
|
||||
vec![binary_expr(col("c"), Operator::Gt, lit(123)).into()], // c > 789
|
||||
vec![1, 2, 3, 4],
|
||||
);
|
||||
|
||||
// test complex "AND" or "OR" filters
|
||||
test(
|
||||
vec![and(
|
||||
binary_expr(col("row_id"), Operator::Lt, lit(1)),
|
||||
or(
|
||||
binary_expr(col("row_id"), Operator::Lt, lit(1)),
|
||||
binary_expr(col("a"), Operator::Lt, lit(1)),
|
||||
),
|
||||
)
|
||||
.into()], // row_id < 1 OR (row_id < 1 AND a > 1)
|
||||
vec![1, 2, 3, 4],
|
||||
);
|
||||
test(
|
||||
vec![or(
|
||||
binary_expr(col("a"), Operator::Lt, lit(20)),
|
||||
binary_expr(col("a"), Operator::GtEq, lit(20)),
|
||||
)
|
||||
.into()], // a < 20 OR a >= 20
|
||||
vec![1, 2, 3, 4],
|
||||
);
|
||||
test(
|
||||
vec![and(
|
||||
binary_expr(col("a"), Operator::Lt, lit(20)),
|
||||
binary_expr(col("a"), Operator::Lt, lit(50)),
|
||||
)
|
||||
.into()], // a < 20 AND a < 50
|
||||
vec![1, 2],
|
||||
);
|
||||
|
||||
// test failed to find regions by contradictory filters
|
||||
let regions = table.find_regions(
|
||||
vec![and(
|
||||
binary_expr(col("a"), Operator::Lt, lit(20)),
|
||||
binary_expr(col("a"), Operator::GtEq, lit(20)),
|
||||
)
|
||||
.into()]
|
||||
.as_slice(),
|
||||
); // a < 20 AND a >= 20
|
||||
assert!(matches!(
|
||||
regions.unwrap_err(),
|
||||
error::Error::FindRegions { .. }
|
||||
));
|
||||
}
|
||||
}
|
||||
202
src/frontend/src/table/insert.rs
Normal file
202
src/frontend/src/table/insert.rs
Normal file
@@ -0,0 +1,202 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use api::helper::ColumnDataTypeWrapper;
|
||||
use api::v1::codec;
|
||||
use api::v1::insert_expr;
|
||||
use api::v1::insert_expr::Expr;
|
||||
use api::v1::Column;
|
||||
use api::v1::InsertExpr;
|
||||
use api::v1::MutateResult;
|
||||
use client::ObjectResult;
|
||||
use snafu::ensure;
|
||||
use snafu::OptionExt;
|
||||
use snafu::ResultExt;
|
||||
use store_api::storage::RegionId;
|
||||
use table::requests::InsertRequest;
|
||||
|
||||
use super::DistTable;
|
||||
use crate::error;
|
||||
use crate::error::Result;
|
||||
|
||||
impl DistTable {
|
||||
pub async fn dist_insert(
|
||||
&self,
|
||||
inserts: HashMap<RegionId, InsertRequest>,
|
||||
) -> Result<ObjectResult> {
|
||||
let mut joins = Vec::with_capacity(inserts.len());
|
||||
|
||||
for (region_id, insert) in inserts {
|
||||
let db = self
|
||||
.region_dist_map
|
||||
.get(®ion_id)
|
||||
.context(error::FindDatanodeSnafu { region: region_id })?;
|
||||
|
||||
let instance = self
|
||||
.datanode_instances
|
||||
.get(db)
|
||||
.context(error::DatanodeInstanceSnafu { datanode: *db })?;
|
||||
|
||||
let instance = instance.clone();
|
||||
|
||||
// TODO(fys): a separate runtime should be used here.
|
||||
let join = tokio::spawn(async move {
|
||||
instance
|
||||
.grpc_insert(to_insert_expr(region_id, insert)?)
|
||||
.await
|
||||
.context(error::RequestDatanodeSnafu)
|
||||
});
|
||||
|
||||
joins.push(join);
|
||||
}
|
||||
|
||||
let mut success = 0;
|
||||
let mut failure = 0;
|
||||
|
||||
for join in joins {
|
||||
let object_result = join.await.context(error::JoinTaskSnafu)??;
|
||||
let result = match object_result {
|
||||
client::ObjectResult::Select(_) => unreachable!(),
|
||||
client::ObjectResult::Mutate(result) => result,
|
||||
};
|
||||
success += result.success;
|
||||
failure += result.failure;
|
||||
}
|
||||
|
||||
Ok(ObjectResult::Mutate(MutateResult { success, failure }))
|
||||
}
|
||||
}
|
||||
|
||||
fn to_insert_expr(region_id: RegionId, insert: InsertRequest) -> Result<InsertExpr> {
|
||||
let mut row_count = None;
|
||||
|
||||
let columns = insert
|
||||
.columns_values
|
||||
.into_iter()
|
||||
.map(|(column_name, vector)| {
|
||||
match row_count {
|
||||
Some(rows) => ensure!(
|
||||
rows == vector.len(),
|
||||
error::InvalidInsertRequestSnafu {
|
||||
reason: "The row count of columns is not the same."
|
||||
}
|
||||
),
|
||||
|
||||
None => row_count = Some(vector.len()),
|
||||
}
|
||||
|
||||
let datatype: ColumnDataTypeWrapper = vector
|
||||
.data_type()
|
||||
.try_into()
|
||||
.context(error::ColumnDataTypeSnafu)?;
|
||||
|
||||
let mut column = Column {
|
||||
column_name,
|
||||
datatype: datatype.datatype() as i32,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
column.push_vals(0, vector);
|
||||
Ok(column)
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
let insert_batch = codec::InsertBatch {
|
||||
columns,
|
||||
row_count: row_count.map(|rows| rows as u32).unwrap_or(0),
|
||||
};
|
||||
|
||||
let mut options = HashMap::with_capacity(1);
|
||||
options.insert(
|
||||
// TODO(fys): Temporarily hard code here
|
||||
"region_id".to_string(),
|
||||
codec::RegionId { id: region_id }.into(),
|
||||
);
|
||||
|
||||
Ok(InsertExpr {
|
||||
table_name: insert.table_name,
|
||||
options,
|
||||
expr: Some(Expr::Values(insert_expr::Values {
|
||||
values: vec![insert_batch.into()],
|
||||
})),
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::{collections::HashMap, ops::Deref};
|
||||
|
||||
use api::v1::{
|
||||
codec::{self, InsertBatch},
|
||||
insert_expr::Expr,
|
||||
ColumnDataType, InsertExpr,
|
||||
};
|
||||
use datatypes::{prelude::ConcreteDataType, types::StringType, vectors::VectorBuilder};
|
||||
use table::requests::InsertRequest;
|
||||
|
||||
use super::to_insert_expr;
|
||||
|
||||
#[test]
|
||||
fn test_to_insert_expr() {
|
||||
let insert_request = mock_insert_request();
|
||||
|
||||
let insert_expr = to_insert_expr(12, insert_request).unwrap();
|
||||
|
||||
verify_insert_expr(insert_expr);
|
||||
}
|
||||
|
||||
fn mock_insert_request() -> InsertRequest {
|
||||
let mut columns_values = HashMap::with_capacity(4);
|
||||
|
||||
let mut builder = VectorBuilder::new(ConcreteDataType::String(StringType));
|
||||
builder.push(&"host1".into());
|
||||
builder.push_null();
|
||||
builder.push(&"host3".into());
|
||||
columns_values.insert("host".to_string(), builder.finish());
|
||||
|
||||
let mut builder = VectorBuilder::new(ConcreteDataType::int16_datatype());
|
||||
builder.push(&1_i16.into());
|
||||
builder.push(&2_i16.into());
|
||||
builder.push(&3_i16.into());
|
||||
columns_values.insert("id".to_string(), builder.finish());
|
||||
|
||||
InsertRequest {
|
||||
table_name: "demo".to_string(),
|
||||
columns_values,
|
||||
}
|
||||
}
|
||||
|
||||
fn verify_insert_expr(insert_expr: InsertExpr) {
|
||||
let table_name = insert_expr.table_name;
|
||||
assert_eq!("demo", table_name);
|
||||
|
||||
let expr = insert_expr.expr.as_ref().unwrap();
|
||||
let vals = match expr {
|
||||
Expr::Values(vals) => vals,
|
||||
Expr::Sql(_) => unreachable!(),
|
||||
};
|
||||
|
||||
let batch: &[u8] = vals.values[0].as_ref();
|
||||
let vals: InsertBatch = batch.try_into().unwrap();
|
||||
|
||||
for column in vals.columns {
|
||||
let name = column.column_name;
|
||||
if name == "id" {
|
||||
assert_eq!(0, column.null_mask[0]);
|
||||
assert_eq!(ColumnDataType::Int16 as i32, column.datatype);
|
||||
assert_eq!(vec![1, 2, 3], column.values.as_ref().unwrap().i16_values);
|
||||
}
|
||||
if name == "host" {
|
||||
assert_eq!(2, column.null_mask[0]);
|
||||
assert_eq!(ColumnDataType::String as i32, column.datatype);
|
||||
assert_eq!(
|
||||
vec!["host1", "host3"],
|
||||
column.values.as_ref().unwrap().string_values
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let bytes = insert_expr.options.get("region_id").unwrap();
|
||||
let region_id: codec::RegionId = bytes.deref().try_into().unwrap();
|
||||
assert_eq!(12, region_id.id);
|
||||
}
|
||||
}
|
||||
@@ -20,7 +20,11 @@ async fn create_datanode_instance() -> Arc<DatanodeInstance> {
|
||||
|
||||
pub(crate) async fn create_frontend_instance() -> Arc<Instance> {
|
||||
let datanode_instance = create_datanode_instance().await;
|
||||
let client = create_datanode_client(datanode_instance).await;
|
||||
Arc::new(Instance::with_client(client))
|
||||
}
|
||||
|
||||
pub(crate) async fn create_datanode_client(datanode_instance: Arc<DatanodeInstance>) -> Client {
|
||||
let (client, server) = tokio::io::duplex(1024);
|
||||
|
||||
let runtime = Arc::new(
|
||||
@@ -67,6 +71,5 @@ pub(crate) async fn create_frontend_instance() -> Arc<Instance> {
|
||||
}),
|
||||
)
|
||||
.unwrap();
|
||||
let client = Client::with_manager_and_urls(channel_manager, vec![addr]);
|
||||
Arc::new(Instance::with_client(client))
|
||||
Client::with_manager_and_urls(channel_manager, vec![addr])
|
||||
}
|
||||
|
||||
@@ -9,8 +9,8 @@ use byteorder::ByteOrder;
|
||||
use byteorder::LittleEndian;
|
||||
use bytes::{Bytes, BytesMut};
|
||||
use common_error::ext::BoxedError;
|
||||
use common_telemetry::debug;
|
||||
use common_telemetry::logging::{error, info};
|
||||
use common_telemetry::{debug, trace};
|
||||
use futures::Stream;
|
||||
use futures_util::StreamExt;
|
||||
use snafu::ResultExt;
|
||||
@@ -380,7 +380,7 @@ impl LogFile {
|
||||
}
|
||||
}
|
||||
}
|
||||
debug!("Yield batch size: {}", batch.len());
|
||||
trace!("Yield batch size: {}", batch.len());
|
||||
yield Ok(batch);
|
||||
}
|
||||
});
|
||||
|
||||
@@ -11,8 +11,8 @@ use router::Client as RouterClient;
|
||||
use snafu::OptionExt;
|
||||
use store::Client as StoreClient;
|
||||
|
||||
use self::heartbeat::HeartbeatSender;
|
||||
use self::heartbeat::HeartbeatStream;
|
||||
pub use self::heartbeat::HeartbeatSender;
|
||||
pub use self::heartbeat::HeartbeatStream;
|
||||
use crate::error;
|
||||
use crate::error::Result;
|
||||
use crate::rpc::BatchPutRequest;
|
||||
@@ -92,13 +92,13 @@ impl MetaClientBuilder {
|
||||
let mgr = client.channel_manager.clone();
|
||||
|
||||
if self.enable_heartbeat {
|
||||
client.heartbeat_client = Some(HeartbeatClient::new(self.id, mgr.clone()));
|
||||
client.heartbeat = Some(HeartbeatClient::new(self.id, mgr.clone()));
|
||||
}
|
||||
if self.enable_router {
|
||||
client.router_client = Some(RouterClient::new(self.id, mgr.clone()));
|
||||
client.router = Some(RouterClient::new(self.id, mgr.clone()));
|
||||
}
|
||||
if self.enable_store {
|
||||
client.store_client = Some(StoreClient::new(self.id, mgr));
|
||||
client.store = Some(StoreClient::new(self.id, mgr));
|
||||
}
|
||||
|
||||
client
|
||||
@@ -109,9 +109,9 @@ impl MetaClientBuilder {
|
||||
pub struct MetaClient {
|
||||
id: Id,
|
||||
channel_manager: ChannelManager,
|
||||
heartbeat_client: Option<HeartbeatClient>,
|
||||
router_client: Option<RouterClient>,
|
||||
store_client: Option<StoreClient>,
|
||||
heartbeat: Option<HeartbeatClient>,
|
||||
router: Option<RouterClient>,
|
||||
store: Option<StoreClient>,
|
||||
}
|
||||
|
||||
impl MetaClient {
|
||||
@@ -137,52 +137,46 @@ impl MetaClient {
|
||||
{
|
||||
info!("MetaClient channel config: {:?}", self.channel_config());
|
||||
|
||||
if let Some(heartbeat_client) = &mut self.heartbeat_client {
|
||||
heartbeat_client.start(urls.clone()).await?;
|
||||
if let Some(client) = &mut self.heartbeat {
|
||||
client.start(urls.clone()).await?;
|
||||
info!("Heartbeat client started");
|
||||
}
|
||||
if let Some(router_client) = &mut self.router_client {
|
||||
router_client.start(urls.clone()).await?;
|
||||
if let Some(client) = &mut self.router {
|
||||
client.start(urls.clone()).await?;
|
||||
info!("Router client started");
|
||||
}
|
||||
if let Some(store_client) = &mut self.store_client {
|
||||
store_client.start(urls).await?;
|
||||
if let Some(client) = &mut self.store {
|
||||
client.start(urls).await?;
|
||||
info!("Store client started");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Ask the leader address of `metasrv`, and the heartbeat component
|
||||
/// needs to create a bidirectional streaming to the leader.
|
||||
pub async fn ask_leader(&self) -> Result<()> {
|
||||
self.heartbeat_client()
|
||||
.context(error::NotStartedSnafu {
|
||||
name: "heartbeat_client",
|
||||
})?
|
||||
.ask_leader()
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn refresh_members(&mut self) {
|
||||
todo!()
|
||||
self.heartbeat_client()?.ask_leader().await
|
||||
}
|
||||
|
||||
/// Returns a heartbeat bidirectional streaming: (sender, recever), the
|
||||
/// other end is the leader of `metasrv`.
|
||||
///
|
||||
/// The `datanode` needs to use the sender to continuously send heartbeat
|
||||
/// packets (some self-state data), and the receiver can receive a response
|
||||
/// from "metasrv" (which may contain some scheduling instructions).
|
||||
pub async fn heartbeat(&self) -> Result<(HeartbeatSender, HeartbeatStream)> {
|
||||
self.heartbeat_client()
|
||||
.context(error::NotStartedSnafu {
|
||||
name: "heartbeat_client",
|
||||
})?
|
||||
.heartbeat()
|
||||
.await
|
||||
self.heartbeat_client()?.heartbeat().await
|
||||
}
|
||||
|
||||
/// Provides routing information for distributed create table requests.
|
||||
///
|
||||
/// When a distributed create table request is received, this method returns
|
||||
/// a list of `datanode` addresses that are generated based on the partition
|
||||
/// information contained in the request and using some intelligent policies,
|
||||
/// such as load-based.
|
||||
pub async fn create_route(&self, req: CreateRequest) -> Result<RouteResponse> {
|
||||
self.router_client()
|
||||
.context(error::NotStartedSnafu {
|
||||
name: "route_client",
|
||||
})?
|
||||
.create(req.into())
|
||||
.await?
|
||||
.try_into()
|
||||
self.router_client()?.create(req.into()).await?.try_into()
|
||||
}
|
||||
|
||||
/// Fetch routing information for tables. The smallest unit is the complete
|
||||
@@ -205,46 +199,22 @@ impl MetaClient {
|
||||
/// ```
|
||||
///
|
||||
pub async fn route(&self, req: RouteRequest) -> Result<RouteResponse> {
|
||||
self.router_client()
|
||||
.context(error::NotStartedSnafu {
|
||||
name: "route_client",
|
||||
})?
|
||||
.route(req.into())
|
||||
.await?
|
||||
.try_into()
|
||||
self.router_client()?.route(req.into()).await?.try_into()
|
||||
}
|
||||
|
||||
/// Range gets the keys in the range from the key-value store.
|
||||
pub async fn range(&self, req: RangeRequest) -> Result<RangeResponse> {
|
||||
self.store_client()
|
||||
.context(error::NotStartedSnafu {
|
||||
name: "store_client",
|
||||
})?
|
||||
.range(req.into())
|
||||
.await?
|
||||
.try_into()
|
||||
self.store_client()?.range(req.into()).await?.try_into()
|
||||
}
|
||||
|
||||
/// Put puts the given key into the key-value store.
|
||||
pub async fn put(&self, req: PutRequest) -> Result<PutResponse> {
|
||||
self.store_client()
|
||||
.context(error::NotStartedSnafu {
|
||||
name: "store_client",
|
||||
})?
|
||||
.put(req.into())
|
||||
.await?
|
||||
.try_into()
|
||||
self.store_client()?.put(req.into()).await?.try_into()
|
||||
}
|
||||
|
||||
/// BatchPut atomically puts the given keys into the key-value store.
|
||||
pub async fn batch_put(&self, req: BatchPutRequest) -> Result<BatchPutResponse> {
|
||||
self.store_client()
|
||||
.context(error::NotStartedSnafu {
|
||||
name: "store_client",
|
||||
})?
|
||||
.batch_put(req.into())
|
||||
.await?
|
||||
.try_into()
|
||||
self.store_client()?.batch_put(req.into()).await?.try_into()
|
||||
}
|
||||
|
||||
/// CompareAndPut atomically puts the value to the given updated
|
||||
@@ -253,10 +223,7 @@ impl MetaClient {
|
||||
&self,
|
||||
req: CompareAndPutRequest,
|
||||
) -> Result<CompareAndPutResponse> {
|
||||
self.store_client()
|
||||
.context(error::NotStartedSnafu {
|
||||
name: "store_client",
|
||||
})?
|
||||
self.store_client()?
|
||||
.compare_and_put(req.into())
|
||||
.await?
|
||||
.try_into()
|
||||
@@ -264,28 +231,31 @@ impl MetaClient {
|
||||
|
||||
/// DeleteRange deletes the given range from the key-value store.
|
||||
pub async fn delete_range(&self, req: DeleteRangeRequest) -> Result<DeleteRangeResponse> {
|
||||
self.store_client()
|
||||
.context(error::NotStartedSnafu {
|
||||
name: "store_client",
|
||||
})?
|
||||
self.store_client()?
|
||||
.delete_range(req.into())
|
||||
.await?
|
||||
.try_into()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn heartbeat_client(&self) -> Option<HeartbeatClient> {
|
||||
self.heartbeat_client.clone()
|
||||
pub fn heartbeat_client(&self) -> Result<HeartbeatClient> {
|
||||
self.heartbeat.clone().context(error::NotStartedSnafu {
|
||||
name: "heartbeat_client",
|
||||
})
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn router_client(&self) -> Option<RouterClient> {
|
||||
self.router_client.clone()
|
||||
pub fn router_client(&self) -> Result<RouterClient> {
|
||||
self.router.clone().context(error::NotStartedSnafu {
|
||||
name: "store_client",
|
||||
})
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn store_client(&self) -> Option<StoreClient> {
|
||||
self.store_client.clone()
|
||||
pub fn store_client(&self) -> Result<StoreClient> {
|
||||
self.store.clone().context(error::NotStartedSnafu {
|
||||
name: "store_client",
|
||||
})
|
||||
}
|
||||
|
||||
#[inline]
|
||||
@@ -320,23 +290,23 @@ mod tests {
|
||||
let urls = &["127.0.0.1:3001", "127.0.0.1:3002"];
|
||||
|
||||
let mut meta_client = MetaClientBuilder::new(0, 0).enable_heartbeat().build();
|
||||
assert!(meta_client.heartbeat_client().is_some());
|
||||
assert!(meta_client.router_client().is_none());
|
||||
assert!(meta_client.store_client().is_none());
|
||||
assert!(meta_client.heartbeat_client().is_ok());
|
||||
assert!(meta_client.router_client().is_err());
|
||||
assert!(meta_client.store_client().is_err());
|
||||
meta_client.start(urls).await.unwrap();
|
||||
assert!(meta_client.heartbeat_client().unwrap().is_started().await);
|
||||
|
||||
let mut meta_client = MetaClientBuilder::new(0, 0).enable_router().build();
|
||||
assert!(meta_client.heartbeat_client().is_none());
|
||||
assert!(meta_client.router_client().is_some());
|
||||
assert!(meta_client.store_client().is_none());
|
||||
assert!(meta_client.heartbeat_client().is_err());
|
||||
assert!(meta_client.router_client().is_ok());
|
||||
assert!(meta_client.store_client().is_err());
|
||||
meta_client.start(urls).await.unwrap();
|
||||
assert!(meta_client.router_client().unwrap().is_started().await);
|
||||
|
||||
let mut meta_client = MetaClientBuilder::new(0, 0).enable_store().build();
|
||||
assert!(meta_client.heartbeat_client().is_none());
|
||||
assert!(meta_client.router_client().is_none());
|
||||
assert!(meta_client.store_client().is_some());
|
||||
assert!(meta_client.heartbeat_client().is_err());
|
||||
assert!(meta_client.router_client().is_err());
|
||||
assert!(meta_client.store_client().is_ok());
|
||||
meta_client.start(urls).await.unwrap();
|
||||
assert!(meta_client.store_client().unwrap().is_started().await);
|
||||
|
||||
@@ -347,9 +317,9 @@ mod tests {
|
||||
.build();
|
||||
assert_eq!(1, meta_client.id().0);
|
||||
assert_eq!(2, meta_client.id().1);
|
||||
assert!(meta_client.heartbeat_client().is_some());
|
||||
assert!(meta_client.router_client().is_some());
|
||||
assert!(meta_client.store_client().is_some());
|
||||
assert!(meta_client.heartbeat_client().is_ok());
|
||||
assert!(meta_client.router_client().is_ok());
|
||||
assert!(meta_client.store_client().is_ok());
|
||||
meta_client.start(urls).await.unwrap();
|
||||
assert!(meta_client.heartbeat_client().unwrap().is_started().await);
|
||||
assert!(meta_client.router_client().unwrap().is_started().await);
|
||||
@@ -648,23 +618,26 @@ mod tests {
|
||||
let res = client.compare_and_put(req).await;
|
||||
assert!(!res.unwrap().is_success());
|
||||
|
||||
// empty expect key is not allowed
|
||||
// create if absent
|
||||
let req = CompareAndPutRequest::new()
|
||||
.with_key(b"key".to_vec())
|
||||
.with_value(b"value".to_vec());
|
||||
let res = client.compare_and_put(req).await;
|
||||
let mut res = res.unwrap();
|
||||
assert!(!res.is_success());
|
||||
let mut kv = res.take_prev_kv().unwrap();
|
||||
assert_eq!(b"key".to_vec(), kv.take_key());
|
||||
assert!(kv.take_value().is_empty());
|
||||
assert!(res.is_success());
|
||||
assert!(res.take_prev_kv().is_none());
|
||||
|
||||
let req = PutRequest::new()
|
||||
// compare and put fail
|
||||
let req = CompareAndPutRequest::new()
|
||||
.with_key(b"key".to_vec())
|
||||
.with_value(b"value".to_vec());
|
||||
let res = client.put(req).await;
|
||||
assert!(res.is_ok());
|
||||
.with_expect(b"not_eq".to_vec())
|
||||
.with_value(b"value2".to_vec());
|
||||
let res = client.compare_and_put(req).await;
|
||||
let mut res = res.unwrap();
|
||||
assert!(!res.is_success());
|
||||
assert_eq!(b"value".to_vec(), res.take_prev_kv().unwrap().take_value());
|
||||
|
||||
// compare and put success
|
||||
let req = CompareAndPutRequest::new()
|
||||
.with_key(b"key".to_vec())
|
||||
.with_expect(b"value".to_vec())
|
||||
|
||||
@@ -29,7 +29,7 @@ pub struct HeartbeatSender {
|
||||
|
||||
impl HeartbeatSender {
|
||||
#[inline]
|
||||
const fn new(id: Id, sender: mpsc::Sender<HeartbeatRequest>) -> Self {
|
||||
fn new(id: Id, sender: mpsc::Sender<HeartbeatRequest>) -> Self {
|
||||
Self { id, sender }
|
||||
}
|
||||
|
||||
@@ -58,7 +58,7 @@ pub struct HeartbeatStream {
|
||||
|
||||
impl HeartbeatStream {
|
||||
#[inline]
|
||||
const fn new(id: Id, stream: Streaming<HeartbeatResponse>) -> Self {
|
||||
fn new(id: Id, stream: Streaming<HeartbeatResponse>) -> Self {
|
||||
Self { id, stream }
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
mod router;
|
||||
mod store;
|
||||
mod util;
|
||||
pub mod util;
|
||||
|
||||
use api::v1::meta::KeyValue as PbKeyValue;
|
||||
use api::v1::meta::Peer as PbPeer;
|
||||
|
||||
@@ -143,14 +143,17 @@ impl KvStore for EtcdStore {
|
||||
options,
|
||||
} = req.try_into()?;
|
||||
|
||||
let txn = Txn::new()
|
||||
.when(vec![Compare::value(
|
||||
key.clone(),
|
||||
CompareOp::Equal,
|
||||
expect.clone(),
|
||||
)])
|
||||
.and_then(vec![TxnOp::put(key.clone(), value, options)])
|
||||
.or_else(vec![TxnOp::get(key.clone(), None)]);
|
||||
let put_op = vec![TxnOp::put(key.clone(), value, options)];
|
||||
let get_op = vec![TxnOp::get(key.clone(), None)];
|
||||
let mut txn = if expect.is_empty() {
|
||||
// create if absent
|
||||
// revision 0 means key was not exist
|
||||
Txn::new().when(vec![Compare::create_revision(key, CompareOp::Equal, 0)])
|
||||
} else {
|
||||
// compare and put
|
||||
Txn::new().when(vec![Compare::value(key, CompareOp::Equal, expect)])
|
||||
};
|
||||
txn = txn.and_then(put_op).or_else(get_op);
|
||||
|
||||
let txn_res = self
|
||||
.client
|
||||
@@ -158,6 +161,7 @@ impl KvStore for EtcdStore {
|
||||
.txn(txn)
|
||||
.await
|
||||
.context(error::EtcdFailedSnafu)?;
|
||||
|
||||
let success = txn_res.succeeded();
|
||||
let op_res = txn_res
|
||||
.op_responses()
|
||||
@@ -165,26 +169,26 @@ impl KvStore for EtcdStore {
|
||||
.context(error::InvalidTxnResultSnafu {
|
||||
err_msg: "empty response",
|
||||
})?;
|
||||
let prev_kv = if success {
|
||||
Some(KeyValue { key, value: expect })
|
||||
} else {
|
||||
match op_res {
|
||||
TxnOpResponse::Get(get_res) => {
|
||||
if get_res.count() == 0 {
|
||||
// do not exists
|
||||
Some(KeyValue { key, value: vec![] })
|
||||
} else {
|
||||
ensure!(
|
||||
get_res.count() == 1,
|
||||
error::InvalidTxnResultSnafu {
|
||||
err_msg: format!("expect 1 response, actual {}", get_res.count())
|
||||
}
|
||||
);
|
||||
Some(KeyValue::from(KvPair::new(&get_res.kvs()[0])))
|
||||
}
|
||||
}
|
||||
_ => unreachable!(), // never get here
|
||||
|
||||
let prev_kv = match op_res {
|
||||
TxnOpResponse::Put(put_res) => {
|
||||
put_res.prev_key().map(|kv| KeyValue::from(KvPair::new(kv)))
|
||||
}
|
||||
TxnOpResponse::Get(get_res) => {
|
||||
if get_res.count() == 0 {
|
||||
// do not exists
|
||||
None
|
||||
} else {
|
||||
ensure!(
|
||||
get_res.count() == 1,
|
||||
error::InvalidTxnResultSnafu {
|
||||
err_msg: format!("expect 1 response, actual {}", get_res.count())
|
||||
}
|
||||
);
|
||||
Some(KeyValue::from(KvPair::new(&get_res.kvs()[0])))
|
||||
}
|
||||
}
|
||||
_ => unreachable!(), // never get here
|
||||
};
|
||||
|
||||
let header = Some(ResponseHeader::success(cluster_id));
|
||||
|
||||
@@ -145,27 +145,16 @@ impl KvStore for MemStore {
|
||||
} = req;
|
||||
|
||||
let mut memory = self.inner.write();
|
||||
let (success, prev_kv) = if expect.is_empty() {
|
||||
(
|
||||
false,
|
||||
Some(KeyValue {
|
||||
key: key.clone(),
|
||||
value: vec![],
|
||||
}),
|
||||
)
|
||||
} else {
|
||||
let prev_val = memory.get(&key);
|
||||
let success = prev_val
|
||||
.map(|v| expect.cmp(v) == Ordering::Equal)
|
||||
.unwrap_or(false);
|
||||
(
|
||||
success,
|
||||
prev_val.map(|v| KeyValue {
|
||||
key: key.clone(),
|
||||
value: v.clone(),
|
||||
}),
|
||||
)
|
||||
};
|
||||
|
||||
let prev_val = memory.get(&key);
|
||||
|
||||
let success = prev_val
|
||||
.map(|v| expect.cmp(v) == Ordering::Equal)
|
||||
.unwrap_or(false | expect.is_empty());
|
||||
let prev_kv = prev_val.map(|v| KeyValue {
|
||||
key: key.clone(),
|
||||
value: v.clone(),
|
||||
});
|
||||
|
||||
if success {
|
||||
memory.insert(key, value);
|
||||
|
||||
@@ -48,6 +48,7 @@ where
|
||||
Statement::Query(qb) => self.query_to_plan(qb),
|
||||
Statement::ShowTables(_)
|
||||
| Statement::ShowDatabases(_)
|
||||
| Statement::ShowCreateTable(_)
|
||||
| Statement::Create(_)
|
||||
| Statement::Alter(_)
|
||||
| Statement::Insert(_) => unreachable!(),
|
||||
|
||||
@@ -270,31 +270,45 @@ pub(crate) mod greptime_builtin {
|
||||
// P.S.: not extract to file because not-inlined proc macro attribute is *unstable*
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_function::scalars::math::PowFunction;
|
||||
use common_function::scalars::{function::FunctionContext, Function};
|
||||
use datafusion::arrow::compute::comparison::{gt_eq_scalar, lt_eq_scalar};
|
||||
use datafusion::arrow::datatypes::DataType;
|
||||
use datafusion::arrow::error::ArrowError;
|
||||
use datafusion::arrow::scalar::{PrimitiveScalar, Scalar};
|
||||
use datafusion::physical_plan::expressions;
|
||||
use common_function::scalars::{
|
||||
function::FunctionContext, math::PowFunction, Function, FunctionRef, FUNCTION_REGISTRY,
|
||||
};
|
||||
use datafusion::{
|
||||
arrow::{
|
||||
compute::comparison::{gt_eq_scalar, lt_eq_scalar},
|
||||
datatypes::DataType,
|
||||
error::ArrowError,
|
||||
scalar::{PrimitiveScalar, Scalar},
|
||||
},
|
||||
physical_plan::expressions,
|
||||
};
|
||||
use datafusion_expr::ColumnarValue as DFColValue;
|
||||
use datafusion_physical_expr::math_expressions;
|
||||
use datatypes::arrow;
|
||||
use datatypes::arrow::array::{ArrayRef, NullArray};
|
||||
use datatypes::arrow::compute;
|
||||
use datatypes::vectors::{ConstantVector, Float64Vector, Helper, Int64Vector};
|
||||
use datatypes::{
|
||||
arrow::{
|
||||
self,
|
||||
array::{ArrayRef, NullArray},
|
||||
compute,
|
||||
},
|
||||
vectors::VectorRef,
|
||||
};
|
||||
use paste::paste;
|
||||
use rustpython_vm::builtins::{PyFloat, PyFunction, PyInt, PyStr};
|
||||
use rustpython_vm::function::{FuncArgs, KwArgs, OptionalArg};
|
||||
use rustpython_vm::{AsObject, PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine};
|
||||
use rustpython_vm::{
|
||||
builtins::{PyFloat, PyFunction, PyInt, PyStr},
|
||||
function::{FuncArgs, KwArgs, OptionalArg},
|
||||
AsObject, PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine,
|
||||
};
|
||||
|
||||
use crate::python::builtins::{
|
||||
all_to_f64, eval_aggr_fn, from_df_err, try_into_columnar_value, try_into_py_obj,
|
||||
type_cast_error,
|
||||
};
|
||||
use crate::python::utils::PyVectorRef;
|
||||
use crate::python::utils::{is_instance, py_vec_obj_to_array};
|
||||
use crate::python::PyVector;
|
||||
use crate::python::{
|
||||
utils::{is_instance, py_vec_obj_to_array, PyVectorRef},
|
||||
vector::val_to_pyobj,
|
||||
PyVector,
|
||||
};
|
||||
|
||||
#[pyfunction]
|
||||
fn vector(args: OptionalArg<PyObjectRef>, vm: &VirtualMachine) -> PyResult<PyVector> {
|
||||
@@ -303,10 +317,135 @@ pub(crate) mod greptime_builtin {
|
||||
|
||||
// the main binding code, due to proc macro things, can't directly use a simpler macro
|
||||
// because pyfunction is not a attr?
|
||||
// ------
|
||||
// GrepTime DB's own UDF&UDAF
|
||||
// ------
|
||||
|
||||
fn eval_func(name: &str, v: &[PyVectorRef], vm: &VirtualMachine) -> PyResult<PyVector> {
|
||||
let v: Vec<VectorRef> = v.iter().map(|v| v.as_vector_ref()).collect();
|
||||
let func: Option<FunctionRef> = FUNCTION_REGISTRY.get_function(name);
|
||||
let res = match func {
|
||||
Some(f) => f.eval(Default::default(), &v),
|
||||
None => return Err(vm.new_type_error(format!("Can't find function {}", name))),
|
||||
};
|
||||
match res {
|
||||
Ok(v) => Ok(v.into()),
|
||||
Err(err) => {
|
||||
Err(vm.new_runtime_error(format!("Fail to evaluate the function,: {}", err)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn eval_aggr_func(
|
||||
name: &str,
|
||||
args: &[PyVectorRef],
|
||||
vm: &VirtualMachine,
|
||||
) -> PyResult<PyObjectRef> {
|
||||
let v: Vec<VectorRef> = args.iter().map(|v| v.as_vector_ref()).collect();
|
||||
let func = FUNCTION_REGISTRY.get_aggr_function(name);
|
||||
let f = match func {
|
||||
Some(f) => f.create().creator(),
|
||||
None => return Err(vm.new_type_error(format!("Can't find function {}", name))),
|
||||
};
|
||||
let types: Vec<_> = v.iter().map(|v| v.data_type()).collect();
|
||||
let acc = f(&types);
|
||||
let mut acc = match acc {
|
||||
Ok(acc) => acc,
|
||||
Err(err) => {
|
||||
return Err(vm.new_runtime_error(format!("Failed to create accumulator: {}", err)))
|
||||
}
|
||||
};
|
||||
match acc.update_batch(&v) {
|
||||
Ok(_) => (),
|
||||
Err(err) => {
|
||||
return Err(vm.new_runtime_error(format!("Failed to update batch: {}", err)))
|
||||
}
|
||||
};
|
||||
let res = match acc.evaluate() {
|
||||
Ok(r) => r,
|
||||
Err(err) => {
|
||||
return Err(vm.new_runtime_error(format!("Failed to evaluate accumulator: {}", err)))
|
||||
}
|
||||
};
|
||||
let res = val_to_pyobj(res, vm);
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
/// GrepTime's own impl of pow function
|
||||
#[pyfunction]
|
||||
fn pow_gp(v0: PyVectorRef, v1: PyVectorRef, vm: &VirtualMachine) -> PyResult<PyVector> {
|
||||
eval_func("pow", &[v0, v1], vm)
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
fn clip(
|
||||
v0: PyVectorRef,
|
||||
v1: PyVectorRef,
|
||||
v2: PyVectorRef,
|
||||
vm: &VirtualMachine,
|
||||
) -> PyResult<PyVector> {
|
||||
eval_func("clip", &[v0, v1, v2], vm)
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
fn median(v: PyVectorRef, vm: &VirtualMachine) -> PyResult<PyObjectRef> {
|
||||
eval_aggr_func("median", &[v], vm)
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
fn diff(v: PyVectorRef, vm: &VirtualMachine) -> PyResult<PyObjectRef> {
|
||||
eval_aggr_func("diff", &[v], vm)
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
fn mean(v: PyVectorRef, vm: &VirtualMachine) -> PyResult<PyObjectRef> {
|
||||
eval_aggr_func("mean", &[v], vm)
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
fn polyval(v0: PyVectorRef, v1: PyVectorRef, vm: &VirtualMachine) -> PyResult<PyObjectRef> {
|
||||
eval_aggr_func("polyval", &[v0, v1], vm)
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
fn argmax(v0: PyVectorRef, vm: &VirtualMachine) -> PyResult<PyObjectRef> {
|
||||
eval_aggr_func("argmax", &[v0], vm)
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
fn argmin(v0: PyVectorRef, vm: &VirtualMachine) -> PyResult<PyObjectRef> {
|
||||
eval_aggr_func("argmin", &[v0], vm)
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
fn percentile(v0: PyVectorRef, v1: PyVectorRef, vm: &VirtualMachine) -> PyResult<PyObjectRef> {
|
||||
eval_aggr_func("percentile", &[v0, v1], vm)
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
fn scipy_stats_norm_cdf(
|
||||
v0: PyVectorRef,
|
||||
v1: PyVectorRef,
|
||||
vm: &VirtualMachine,
|
||||
) -> PyResult<PyObjectRef> {
|
||||
eval_aggr_func("scipystatsnormcdf", &[v0, v1], vm)
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
fn scipy_stats_norm_pdf(
|
||||
v0: PyVectorRef,
|
||||
v1: PyVectorRef,
|
||||
vm: &VirtualMachine,
|
||||
) -> PyResult<PyObjectRef> {
|
||||
eval_aggr_func("scipystatsnormpdf", &[v0, v1], vm)
|
||||
}
|
||||
|
||||
// The math function return a general PyObjectRef
|
||||
// so it can return both PyVector or a scalar PyInt/Float/Bool
|
||||
|
||||
// ------
|
||||
// DataFusion's UDF&UDAF
|
||||
// ------
|
||||
/// simple math function, the backing implement is datafusion's `sqrt` math function
|
||||
#[pyfunction]
|
||||
fn sqrt(val: PyObjectRef, vm: &VirtualMachine) -> PyResult<PyObjectRef> {
|
||||
|
||||
@@ -924,5 +924,198 @@ sum(prev(values))"#,
|
||||
ty: Float64,
|
||||
value: Float(3.0)
|
||||
))
|
||||
),
|
||||
TestCase(
|
||||
input: {
|
||||
"values": Var(
|
||||
ty: Float64,
|
||||
value: FloatVec([1.0, 2.0, 3.0])
|
||||
),
|
||||
"pows": Var(
|
||||
ty: Float64,
|
||||
value: FloatVec([1.0, 2.0, 3.0])
|
||||
),
|
||||
},
|
||||
script: r#"
|
||||
from greptime import *
|
||||
pow_gp(values, pows)"#,
|
||||
expect: Ok((
|
||||
ty: Float64,
|
||||
value: FloatVec([1.0, 4.0, 27.0])
|
||||
))
|
||||
),
|
||||
TestCase(
|
||||
input: {
|
||||
"values": Var(
|
||||
ty: Float64,
|
||||
value: FloatVec([-1.0, 2.0, 0.5])
|
||||
),
|
||||
"lower": Var(
|
||||
ty: Float64,
|
||||
value: FloatVec([0.0, 0.0, 0.0])
|
||||
),
|
||||
"upper": Var(
|
||||
ty: Float64,
|
||||
value: FloatVec([1.0, 1.0, 1.0])
|
||||
),
|
||||
},
|
||||
script: r#"
|
||||
from greptime import *
|
||||
clip(values, lower, upper)"#,
|
||||
expect: Ok((
|
||||
ty: Float64,
|
||||
value: FloatVec([0.0, 1.0, 0.5])
|
||||
))
|
||||
),
|
||||
TestCase(
|
||||
input: {
|
||||
"values": Var(
|
||||
ty: Float64,
|
||||
value: FloatVec([-1.0, 2.0, 2.0, 0.5])
|
||||
)
|
||||
},
|
||||
script: r#"
|
||||
from greptime import *
|
||||
median(values)"#,
|
||||
expect: Ok((
|
||||
ty: Float64,
|
||||
value: Float(1.25)
|
||||
))
|
||||
),
|
||||
TestCase(
|
||||
input: {
|
||||
"values": Var(
|
||||
ty: Float64,
|
||||
value: FloatVec([-1.0, 2.0, 2.0, 0.5])
|
||||
)
|
||||
},
|
||||
script: r#"
|
||||
from greptime import *
|
||||
diff(values)"#,
|
||||
expect: Ok((
|
||||
ty: Float64,
|
||||
value: FloatVec([3.0, 0.0, -1.5])
|
||||
))
|
||||
),
|
||||
TestCase(
|
||||
input: {
|
||||
"values": Var(
|
||||
ty: Float64,
|
||||
value: FloatVec([-1.0, 2.0, 2.0, 0.0])
|
||||
)
|
||||
},
|
||||
script: r#"
|
||||
from greptime import *
|
||||
mean(values)"#,
|
||||
expect: Ok((
|
||||
ty: Float64,
|
||||
value: Float(0.75)
|
||||
))
|
||||
),
|
||||
TestCase(
|
||||
input: {
|
||||
"p": Var(
|
||||
ty: Float64,
|
||||
value: FloatVec([-1.0, 2.0])
|
||||
),
|
||||
"x": Var(
|
||||
ty: Int64,
|
||||
value: IntVec([1, 1])
|
||||
)
|
||||
},
|
||||
script: r#"
|
||||
from greptime import *
|
||||
polyval(p, x)"#,
|
||||
expect: Ok((
|
||||
ty: Float64,
|
||||
value: Float(1.0)
|
||||
))
|
||||
),
|
||||
TestCase(
|
||||
input: {
|
||||
"p": Var(
|
||||
ty: Float64,
|
||||
value: FloatVec([-1.0, 2.0, 3.0])
|
||||
)
|
||||
},
|
||||
script: r#"
|
||||
from greptime import *
|
||||
argmax(p)"#,
|
||||
expect: Ok((
|
||||
ty: Int64,
|
||||
value: Int(2)
|
||||
))
|
||||
),
|
||||
TestCase(
|
||||
input: {
|
||||
"p": Var(
|
||||
ty: Float64,
|
||||
value: FloatVec([-1.0, 2.0, 3.0])
|
||||
)
|
||||
},
|
||||
script: r#"
|
||||
from greptime import *
|
||||
argmin(p)"#,
|
||||
expect: Ok((
|
||||
ty: Int64,
|
||||
value: Int(0)
|
||||
))
|
||||
),
|
||||
TestCase(
|
||||
input: {
|
||||
"x": Var(
|
||||
ty: Float64,
|
||||
value: FloatVec([-1.0, 2.0, 3.0])
|
||||
),
|
||||
"p": Var(
|
||||
ty: Float64,
|
||||
value: FloatVec([0.5, 0.5, 0.5])
|
||||
)
|
||||
},
|
||||
script: r#"
|
||||
from greptime import *
|
||||
percentile(x, p)"#,
|
||||
expect: Ok((
|
||||
ty: Float64,
|
||||
value: Float(-0.97)
|
||||
))
|
||||
),
|
||||
TestCase(
|
||||
input: {
|
||||
"x": Var(
|
||||
ty: Float64,
|
||||
value: FloatVec([-1.0, 2.0, 3.0])
|
||||
),
|
||||
"p": Var(
|
||||
ty: Float64,
|
||||
value: FloatVec([0.5, 0.5, 0.5])
|
||||
)
|
||||
},
|
||||
script: r#"
|
||||
from greptime import *
|
||||
scipy_stats_norm_cdf(x, p)"#,
|
||||
expect: Ok((
|
||||
ty: Float64,
|
||||
value: Float(0.3444602779022303)
|
||||
))
|
||||
),
|
||||
TestCase(
|
||||
input: {
|
||||
"x": Var(
|
||||
ty: Float64,
|
||||
value: FloatVec([-1.0, 2.0, 3.0])
|
||||
),
|
||||
"p": Var(
|
||||
ty: Float64,
|
||||
value: FloatVec([0.5, 0.5, 0.5])
|
||||
)
|
||||
},
|
||||
script: r#"
|
||||
from greptime import *
|
||||
scipy_stats_norm_pdf(x, p)"#,
|
||||
expect: Ok((
|
||||
ty: Float64,
|
||||
value: Float(0.1768885735289059)
|
||||
))
|
||||
)
|
||||
]
|
||||
|
||||
@@ -939,7 +939,16 @@ pub fn val_to_pyobj(val: value::Value, vm: &VirtualMachine) -> PyObjectRef {
|
||||
value::Value::DateTime(v) => vm.ctx.new_int(v.val()).into(),
|
||||
// FIXME(dennis): lose the timestamp unit here
|
||||
Value::Timestamp(v) => vm.ctx.new_int(v.value()).into(),
|
||||
value::Value::List(_) => unreachable!(),
|
||||
value::Value::List(list) => {
|
||||
let list = list.items().as_ref();
|
||||
match list {
|
||||
Some(list) => {
|
||||
let list: Vec<_> = list.iter().map(|v| val_to_pyobj(v.clone(), vm)).collect();
|
||||
vm.ctx.new_list(list).into()
|
||||
}
|
||||
None => vm.ctx.new_list(Vec::new()).into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -47,6 +47,7 @@ impl ScriptsTable {
|
||||
desc: Some("Scripts table".to_string()),
|
||||
schema,
|
||||
// name and timestamp as primary key
|
||||
region_numbers: vec![0],
|
||||
primary_key_indices: vec![0, 3],
|
||||
create_if_not_exists: true,
|
||||
table_options: HashMap::default(),
|
||||
|
||||
@@ -26,10 +26,12 @@ hyper = { version = "0.14", features = ["full"] }
|
||||
influxdb_line_protocol = { git = "https://github.com/evenyag/influxdb_iox", branch = "feat/line-protocol" }
|
||||
metrics = "0.20"
|
||||
num_cpus = "1.13"
|
||||
once_cell = "1.16"
|
||||
openmetrics-parser = "0.4"
|
||||
opensrv-mysql = "0.1"
|
||||
pgwire = { version = "0.4" }
|
||||
prost = "0.11"
|
||||
regex = "1.6"
|
||||
schemars = "0.8"
|
||||
serde = "1.0"
|
||||
serde_json = "1.0"
|
||||
|
||||
166
src/servers/src/context.rs
Normal file
166
src/servers/src/context.rs
Normal file
@@ -0,0 +1,166 @@
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::context::AuthMethod::Token;
|
||||
use crate::context::Channel::HTTP;
|
||||
|
||||
type CtxFnRef = Arc<dyn Fn(&Context) -> bool + Send + Sync>;
|
||||
|
||||
#[derive(Default, Serialize, Deserialize)]
|
||||
pub struct Context {
|
||||
pub exec_info: ExecInfo,
|
||||
pub client_info: ClientInfo,
|
||||
pub user_info: UserInfo,
|
||||
pub quota: Quota,
|
||||
#[serde(skip)]
|
||||
pub predicates: Vec<CtxFnRef>,
|
||||
}
|
||||
|
||||
impl Context {
|
||||
pub fn new() -> Self {
|
||||
Context::default()
|
||||
}
|
||||
|
||||
pub fn add_predicate(&mut self, predicate: CtxFnRef) {
|
||||
self.predicates.push(predicate);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Serialize, Deserialize)]
|
||||
pub struct ExecInfo {
|
||||
pub catalog: Option<String>,
|
||||
pub schema: Option<String>,
|
||||
// should opts to be thread safe?
|
||||
pub extra_opts: HashMap<String, String>,
|
||||
pub trace_id: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Default, Serialize, Deserialize)]
|
||||
pub struct ClientInfo {
|
||||
pub client_host: Option<String>,
|
||||
}
|
||||
|
||||
impl ClientInfo {
|
||||
pub fn new(host: Option<String>) -> Self {
|
||||
ClientInfo { client_host: host }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Serialize, Deserialize)]
|
||||
pub struct UserInfo {
|
||||
pub username: Option<String>,
|
||||
pub from_channel: Option<Channel>,
|
||||
pub auth_method: Option<AuthMethod>,
|
||||
}
|
||||
|
||||
impl UserInfo {
|
||||
pub fn with_http_token(token: String) -> Self {
|
||||
UserInfo {
|
||||
username: None,
|
||||
from_channel: Some(HTTP),
|
||||
auth_method: Some(Token(token)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum Channel {
|
||||
GRPC,
|
||||
HTTP,
|
||||
MYSQL,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum AuthMethod {
|
||||
None,
|
||||
Password {
|
||||
hash_method: AuthHashMethod,
|
||||
hashed_value: Vec<u8>,
|
||||
},
|
||||
Token(String),
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum AuthHashMethod {
|
||||
DoubleSha1,
|
||||
Sha256,
|
||||
}
|
||||
|
||||
#[derive(Default, Serialize, Deserialize)]
|
||||
pub struct Quota {
|
||||
pub total: u64,
|
||||
pub consumed: u64,
|
||||
pub estimated: u64,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::context::AuthMethod::Token;
|
||||
use crate::context::Channel::HTTP;
|
||||
use crate::context::{ClientInfo, Context, ExecInfo, Quota, UserInfo};
|
||||
|
||||
#[test]
|
||||
fn test_predicate() {
|
||||
let mut ctx = Context::default();
|
||||
ctx.add_predicate(Arc::new(|ctx: &Context| {
|
||||
ctx.quota.total > ctx.quota.consumed
|
||||
}));
|
||||
ctx.quota.total = 10;
|
||||
ctx.quota.consumed = 5;
|
||||
|
||||
let predicates = ctx.predicates.clone();
|
||||
let mut re = true;
|
||||
for predicate in predicates {
|
||||
re &= predicate(&ctx);
|
||||
}
|
||||
assert!(re);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build() {
|
||||
let ctx = Context {
|
||||
exec_info: ExecInfo {
|
||||
catalog: Some(String::from("greptime")),
|
||||
schema: Some(String::from("public")),
|
||||
extra_opts: HashMap::new(),
|
||||
trace_id: None,
|
||||
},
|
||||
client_info: ClientInfo::new(Some(String::from("127.0.0.1:4001"))),
|
||||
user_info: UserInfo::with_http_token(String::from("HELLO")),
|
||||
quota: Quota {
|
||||
total: 10,
|
||||
consumed: 5,
|
||||
estimated: 2,
|
||||
},
|
||||
predicates: vec![],
|
||||
};
|
||||
|
||||
assert_eq!(ctx.exec_info.catalog.unwrap(), String::from("greptime"));
|
||||
assert_eq!(ctx.exec_info.schema.unwrap(), String::from("public"));
|
||||
assert_eq!(ctx.exec_info.extra_opts.capacity(), 0);
|
||||
assert_eq!(ctx.exec_info.trace_id, None);
|
||||
|
||||
assert_eq!(
|
||||
ctx.client_info.client_host.unwrap(),
|
||||
String::from("127.0.0.1:4001")
|
||||
);
|
||||
|
||||
assert_eq!(ctx.user_info.username, None);
|
||||
assert_eq!(ctx.user_info.from_channel.unwrap(), HTTP);
|
||||
assert_eq!(
|
||||
ctx.user_info.auth_method.unwrap(),
|
||||
Token(String::from("HELLO"))
|
||||
);
|
||||
|
||||
assert!(ctx.quota.total > 0);
|
||||
assert!(ctx.quota.consumed > 0);
|
||||
assert!(ctx.quota.estimated > 0);
|
||||
|
||||
assert_eq!(ctx.predicates.capacity(), 0);
|
||||
}
|
||||
}
|
||||
@@ -152,6 +152,9 @@ pub enum Error {
|
||||
|
||||
#[snafu(display("Invalid prometheus remote read query result, msg: {}", msg))]
|
||||
InvalidPromRemoteReadQueryResult { msg: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Failed to decode region id, source: {}", source))]
|
||||
DecodeRegionId { source: api::DecodeError },
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -186,6 +189,7 @@ impl ErrorExt for Error {
|
||||
| DecodePromRemoteRequest { .. }
|
||||
| DecompressPromRemoteRequest { .. }
|
||||
| InvalidPromRemoteRequest { .. }
|
||||
| DecodeRegionId { .. }
|
||||
| TimePrecision { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
InfluxdbLinesWrite { source, .. } => source.status_code(),
|
||||
|
||||
@@ -304,8 +304,7 @@ impl HttpServer {
|
||||
router = router.nest(&format!("/{}/prometheus", HTTP_API_VERSION), prom_router);
|
||||
}
|
||||
|
||||
let metrics_router = Router::new().route("/", routing::get(handler::metrics));
|
||||
router = router.nest(&format!("/{}/metrics", HTTP_API_VERSION), metrics_router);
|
||||
router = router.route("/metrics", routing::get(handler::metrics));
|
||||
|
||||
router
|
||||
// middlewares
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use aide::transform::TransformOperation;
|
||||
use axum::extract::{Json, Query, State};
|
||||
use axum::extract::{Json, Query, RawBody, State};
|
||||
use common_error::prelude::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_telemetry::metric;
|
||||
@@ -47,54 +47,67 @@ pub async fn metrics(Query(_params): Query<HashMap<String, String>>) -> String {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize, JsonSchema)]
|
||||
pub struct ScriptExecution {
|
||||
pub name: String,
|
||||
pub script: String,
|
||||
macro_rules! json_err {
|
||||
($e: expr) => {{
|
||||
return Json(JsonResponse::with_error(
|
||||
format!("Invalid argument: {}", $e),
|
||||
common_error::status_code::StatusCode::InvalidArguments,
|
||||
));
|
||||
}};
|
||||
|
||||
($msg: expr, $code: expr) => {{
|
||||
return Json(JsonResponse::with_error($msg.to_string(), $code));
|
||||
}};
|
||||
}
|
||||
|
||||
macro_rules! unwrap_or_json_err {
|
||||
($result: expr) => {
|
||||
match $result {
|
||||
Ok(result) => result,
|
||||
Err(e) => json_err!(e),
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// Handler to insert and compile script
|
||||
#[axum_macros::debug_handler]
|
||||
pub async fn scripts(
|
||||
State(query_handler): State<SqlQueryHandlerRef>,
|
||||
Json(payload): Json<ScriptExecution>,
|
||||
Query(params): Query<ScriptQuery>,
|
||||
RawBody(body): RawBody,
|
||||
) -> Json<JsonResponse> {
|
||||
if payload.name.is_empty() || payload.script.is_empty() {
|
||||
return Json(JsonResponse::with_error(
|
||||
"Invalid name or script".to_string(),
|
||||
StatusCode::InvalidArguments,
|
||||
));
|
||||
}
|
||||
let name = params.name.as_ref();
|
||||
|
||||
let body = match query_handler
|
||||
.insert_script(&payload.name, &payload.script)
|
||||
.await
|
||||
{
|
||||
if name.is_none() || name.unwrap().is_empty() {
|
||||
json_err!("invalid name");
|
||||
}
|
||||
let bytes = unwrap_or_json_err!(hyper::body::to_bytes(body).await);
|
||||
|
||||
let script = unwrap_or_json_err!(String::from_utf8(bytes.to_vec()));
|
||||
|
||||
let body = match query_handler.insert_script(name.unwrap(), &script).await {
|
||||
Ok(()) => JsonResponse::with_output(None),
|
||||
Err(e) => JsonResponse::with_error(format!("Insert script error: {}", e), e.status_code()),
|
||||
Err(e) => json_err!(format!("Insert script error: {}", e), e.status_code()),
|
||||
};
|
||||
|
||||
Json(body)
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
|
||||
pub struct RunScriptQuery {
|
||||
name: Option<String>,
|
||||
pub struct ScriptQuery {
|
||||
pub name: Option<String>,
|
||||
}
|
||||
|
||||
/// Handler to execute script
|
||||
#[axum_macros::debug_handler]
|
||||
pub async fn run_script(
|
||||
State(query_handler): State<SqlQueryHandlerRef>,
|
||||
Query(params): Query<RunScriptQuery>,
|
||||
Query(params): Query<ScriptQuery>,
|
||||
) -> Json<JsonResponse> {
|
||||
let name = params.name.as_ref();
|
||||
|
||||
if name.is_none() || name.unwrap().is_empty() {
|
||||
return Json(JsonResponse::with_error(
|
||||
"Invalid name".to_string(),
|
||||
StatusCode::InvalidArguments,
|
||||
));
|
||||
json_err!("invalid name");
|
||||
}
|
||||
|
||||
let output = query_handler.execute_script(name.unwrap()).await;
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
#![feature(assert_matches)]
|
||||
|
||||
pub mod context;
|
||||
pub mod error;
|
||||
pub mod grpc;
|
||||
pub mod http;
|
||||
|
||||
374
src/servers/src/mysql/federated.rs
Normal file
374
src/servers/src/mysql/federated.rs
Normal file
@@ -0,0 +1,374 @@
|
||||
//! Use regex to filter out some MySQL federated components' emitted statements.
|
||||
//! Inspired by Databend's "[mysql_federated.rs](https://github.com/datafuselabs/databend/blob/ac706bf65845e6895141c96c0a10bad6fdc2d367/src/query/service/src/servers/mysql/mysql_federated.rs)".
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::Output;
|
||||
use common_recordbatch::RecordBatches;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, Schema};
|
||||
use datatypes::vectors::StringVector;
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::bytes::RegexSet;
|
||||
use regex::Regex;
|
||||
|
||||
// TODO(LFC): Include GreptimeDB's version and git commit tag etc.
|
||||
const MYSQL_VERSION: &str = "8.0.26";
|
||||
|
||||
static SELECT_VAR_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new("(?i)^(SELECT @@(.*))").unwrap());
|
||||
static MYSQL_CONN_JAVA_PATTERN: Lazy<Regex> =
|
||||
Lazy::new(|| Regex::new("(?i)^(/\\* mysql-connector-java(.*))").unwrap());
|
||||
static SHOW_LOWER_CASE_PATTERN: Lazy<Regex> =
|
||||
Lazy::new(|| Regex::new("(?i)^(SHOW VARIABLES LIKE 'lower_case_table_names'(.*))").unwrap());
|
||||
static SHOW_COLLATION_PATTERN: Lazy<Regex> =
|
||||
Lazy::new(|| Regex::new("(?i)^(show collation where(.*))").unwrap());
|
||||
static SHOW_VARIABLES_PATTERN: Lazy<Regex> =
|
||||
Lazy::new(|| Regex::new("(?i)^(SHOW VARIABLES(.*))").unwrap());
|
||||
static SELECT_VERSION_PATTERN: Lazy<Regex> =
|
||||
Lazy::new(|| Regex::new(r"(?i)^(SELECT VERSION\(\s*\))").unwrap());
|
||||
|
||||
// SELECT TIMEDIFF(NOW(), UTC_TIMESTAMP());
|
||||
static SELECT_TIME_DIFF_FUNC_PATTERN: Lazy<Regex> =
|
||||
Lazy::new(|| Regex::new("(?i)^(SELECT TIMEDIFF\\(NOW\\(\\), UTC_TIMESTAMP\\(\\)\\))").unwrap());
|
||||
|
||||
// sqlalchemy < 1.4.30
|
||||
static SHOW_SQL_MODE_PATTERN: Lazy<Regex> =
|
||||
Lazy::new(|| Regex::new("(?i)^(SHOW VARIABLES LIKE 'sql_mode'(.*))").unwrap());
|
||||
|
||||
static OTHER_NOT_SUPPORTED_STMT: Lazy<RegexSet> = Lazy::new(|| {
|
||||
RegexSet::new(&[
|
||||
// Txn.
|
||||
"(?i)^(ROLLBACK(.*))",
|
||||
"(?i)^(COMMIT(.*))",
|
||||
"(?i)^(START(.*))",
|
||||
|
||||
// Set.
|
||||
"(?i)^(SET NAMES(.*))",
|
||||
"(?i)^(SET character_set_results(.*))",
|
||||
"(?i)^(SET net_write_timeout(.*))",
|
||||
"(?i)^(SET FOREIGN_KEY_CHECKS(.*))",
|
||||
"(?i)^(SET AUTOCOMMIT(.*))",
|
||||
"(?i)^(SET SQL_LOG_BIN(.*))",
|
||||
"(?i)^(SET sql_mode(.*))",
|
||||
"(?i)^(SET SQL_SELECT_LIMIT(.*))",
|
||||
"(?i)^(SET @@(.*))",
|
||||
|
||||
"(?i)^(SHOW COLLATION)",
|
||||
"(?i)^(SHOW CHARSET)",
|
||||
|
||||
// mysqldump.
|
||||
"(?i)^(SET SESSION(.*))",
|
||||
"(?i)^(SET SQL_QUOTE_SHOW_CREATE(.*))",
|
||||
"(?i)^(LOCK TABLES(.*))",
|
||||
"(?i)^(UNLOCK TABLES(.*))",
|
||||
"(?i)^(SELECT LOGFILE_GROUP_NAME, FILE_NAME, TOTAL_EXTENTS, INITIAL_SIZE, ENGINE, EXTRA FROM INFORMATION_SCHEMA.FILES(.*))",
|
||||
|
||||
// mydumper.
|
||||
"(?i)^(/\\*!80003 SET(.*) \\*/)$",
|
||||
"(?i)^(SHOW MASTER STATUS)",
|
||||
"(?i)^(SHOW ALL SLAVES STATUS)",
|
||||
"(?i)^(LOCK BINLOG FOR BACKUP)",
|
||||
"(?i)^(LOCK TABLES FOR BACKUP)",
|
||||
"(?i)^(UNLOCK BINLOG(.*))",
|
||||
"(?i)^(/\\*!40101 SET(.*) \\*/)$",
|
||||
|
||||
// DBeaver.
|
||||
"(?i)^(SHOW WARNINGS)",
|
||||
"(?i)^(/\\* ApplicationName=(.*)SHOW WARNINGS)",
|
||||
"(?i)^(/\\* ApplicationName=(.*)SHOW PLUGINS)",
|
||||
"(?i)^(/\\* ApplicationName=(.*)SHOW COLLATION)",
|
||||
"(?i)^(/\\* ApplicationName=(.*)SHOW CHARSET)",
|
||||
"(?i)^(/\\* ApplicationName=(.*)SHOW ENGINES)",
|
||||
"(?i)^(/\\* ApplicationName=(.*)SELECT @@(.*))",
|
||||
"(?i)^(/\\* ApplicationName=(.*)SHOW @@(.*))",
|
||||
"(?i)^(/\\* ApplicationName=(.*)SET net_write_timeout(.*))",
|
||||
"(?i)^(/\\* ApplicationName=(.*)SET SQL_SELECT_LIMIT(.*))",
|
||||
"(?i)^(/\\* ApplicationName=(.*)SHOW VARIABLES(.*))",
|
||||
|
||||
// pt-toolkit
|
||||
"(?i)^(/\\*!40101 SET(.*) \\*/)$",
|
||||
|
||||
// mysqldump 5.7.16
|
||||
"(?i)^(/\\*!40100 SET(.*) \\*/)$",
|
||||
"(?i)^(/\\*!40103 SET(.*) \\*/)$",
|
||||
"(?i)^(/\\*!40111 SET(.*) \\*/)$",
|
||||
"(?i)^(/\\*!40101 SET(.*) \\*/)$",
|
||||
"(?i)^(/\\*!40014 SET(.*) \\*/)$",
|
||||
"(?i)^(/\\*!40000 SET(.*) \\*/)$",
|
||||
]).unwrap()
|
||||
});
|
||||
|
||||
static VAR_VALUES: Lazy<HashMap<&str, &str>> = Lazy::new(|| {
|
||||
HashMap::from([
|
||||
("tx_isolation", "REPEATABLE-READ"),
|
||||
("session.tx_isolation", "REPEATABLE-READ"),
|
||||
("transaction_isolation", "REPEATABLE-READ"),
|
||||
("session.transaction_isolation", "REPEATABLE-READ"),
|
||||
("session.transaction_read_only", "0"),
|
||||
("time_zone", "UTC"),
|
||||
("system_time_zone", "UTC"),
|
||||
("max_allowed_packet", "134217728"),
|
||||
("interactive_timeout", "31536000"),
|
||||
("wait_timeout", "31536000"),
|
||||
("net_write_timeout", "31536000"),
|
||||
("version_comment", "Greptime"),
|
||||
])
|
||||
});
|
||||
|
||||
// Recordbatches for select function.
|
||||
// Format:
|
||||
// |function_name|
|
||||
// |value|
|
||||
fn select_function(name: &str, value: &str) -> RecordBatches {
|
||||
let schema = Arc::new(Schema::new(vec![ColumnSchema::new(
|
||||
name,
|
||||
ConcreteDataType::string_datatype(),
|
||||
true,
|
||||
)]));
|
||||
let columns = vec![Arc::new(StringVector::from(vec![value])) as _];
|
||||
RecordBatches::try_from_columns(schema, columns)
|
||||
// unwrap is safe because the schema and data are definitely able to form a recordbatch, they are all string type
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
// Recordbatches for show variable statement.
|
||||
// Format is:
|
||||
// | Variable_name | Value |
|
||||
// | xx | yy |
|
||||
fn show_variables(name: &str, value: &str) -> RecordBatches {
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
ColumnSchema::new("Variable_name", ConcreteDataType::string_datatype(), true),
|
||||
ColumnSchema::new("Value", ConcreteDataType::string_datatype(), true),
|
||||
]));
|
||||
let columns = vec![
|
||||
Arc::new(StringVector::from(vec![name])) as _,
|
||||
Arc::new(StringVector::from(vec![value])) as _,
|
||||
];
|
||||
RecordBatches::try_from_columns(schema, columns)
|
||||
// unwrap is safe because the schema and data are definitely able to form a recordbatch, they are all string type
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn select_variable(query: &str) -> Option<Output> {
|
||||
let mut fields = vec![];
|
||||
let mut values = vec![];
|
||||
|
||||
// query like "SELECT @@aa, @@bb as cc, @dd..."
|
||||
let query = query.to_lowercase();
|
||||
let vars: Vec<&str> = query.split("@@").collect();
|
||||
if vars.len() <= 1 {
|
||||
return None;
|
||||
}
|
||||
|
||||
// skip the first "select"
|
||||
for var in vars.iter().skip(1) {
|
||||
let var = var.trim_matches(|c| c == ' ' || c == ',');
|
||||
let var_as: Vec<&str> = var
|
||||
.split(" as ")
|
||||
.map(|x| {
|
||||
x.trim_matches(|c| c == ' ')
|
||||
.split_whitespace()
|
||||
.next()
|
||||
.unwrap_or("")
|
||||
})
|
||||
.collect();
|
||||
match var_as.len() {
|
||||
1 => {
|
||||
// @@aa
|
||||
let value = VAR_VALUES.get(var_as[0]).unwrap_or(&"0");
|
||||
values.push(Arc::new(StringVector::from(vec![*value])) as _);
|
||||
|
||||
// field is '@@aa'
|
||||
fields.push(ColumnSchema::new(
|
||||
&format!("@@{}", var_as[0]),
|
||||
ConcreteDataType::string_datatype(),
|
||||
true,
|
||||
));
|
||||
}
|
||||
2 => {
|
||||
// @@bb as cc:
|
||||
// var is 'bb'.
|
||||
let value = VAR_VALUES.get(var_as[0]).unwrap_or(&"0");
|
||||
values.push(Arc::new(StringVector::from(vec![*value])) as _);
|
||||
|
||||
// field is 'cc'.
|
||||
fields.push(ColumnSchema::new(
|
||||
var_as[1],
|
||||
ConcreteDataType::string_datatype(),
|
||||
true,
|
||||
));
|
||||
}
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
|
||||
let schema = Arc::new(Schema::new(fields));
|
||||
// unwrap is safe because the schema and data are definitely able to form a recordbatch, they are all string type
|
||||
let batches = RecordBatches::try_from_columns(schema, values).unwrap();
|
||||
Some(Output::RecordBatches(batches))
|
||||
}
|
||||
|
||||
fn check_select_variable(query: &str) -> Option<Output> {
|
||||
if vec![&SELECT_VAR_PATTERN, &MYSQL_CONN_JAVA_PATTERN]
|
||||
.iter()
|
||||
.any(|r| r.is_match(query))
|
||||
{
|
||||
select_variable(query)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn check_show_variables(query: &str) -> Option<Output> {
|
||||
let recordbatches = if SHOW_SQL_MODE_PATTERN.is_match(query) {
|
||||
Some(show_variables("sql_mode", "ONLY_FULL_GROUP_BY STRICT_TRANS_TABLES NO_ZERO_IN_DATE NO_ZERO_DATE ERROR_FOR_DIVISION_BY_ZERO NO_ENGINE_SUBSTITUTION"))
|
||||
} else if SHOW_LOWER_CASE_PATTERN.is_match(query) {
|
||||
Some(show_variables("lower_case_table_names", "0"))
|
||||
} else if SHOW_COLLATION_PATTERN.is_match(query) || SHOW_VARIABLES_PATTERN.is_match(query) {
|
||||
Some(show_variables("", ""))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
recordbatches.map(Output::RecordBatches)
|
||||
}
|
||||
|
||||
// Check for SET or others query, this is the final check of the federated query.
|
||||
fn check_others(query: &str) -> Option<Output> {
|
||||
if OTHER_NOT_SUPPORTED_STMT.is_match(query.as_bytes()) {
|
||||
return Some(Output::RecordBatches(RecordBatches::empty()));
|
||||
}
|
||||
|
||||
let recordbatches = if SELECT_VERSION_PATTERN.is_match(query) {
|
||||
Some(select_function("version()", MYSQL_VERSION))
|
||||
} else if SELECT_TIME_DIFF_FUNC_PATTERN.is_match(query) {
|
||||
Some(select_function(
|
||||
"TIMEDIFF(NOW(), UTC_TIMESTAMP())",
|
||||
"00:00:00",
|
||||
))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
recordbatches.map(Output::RecordBatches)
|
||||
}
|
||||
|
||||
// Check whether the query is a federated or driver setup command,
|
||||
// and return some faked results if there are any.
|
||||
pub fn check(query: &str) -> Option<Output> {
|
||||
// First to check the query is like "select @@variables".
|
||||
let output = check_select_variable(query);
|
||||
if output.is_some() {
|
||||
return output;
|
||||
}
|
||||
|
||||
// Then to check "show variables like ...".
|
||||
let output = check_show_variables(query);
|
||||
if output.is_some() {
|
||||
return output;
|
||||
}
|
||||
|
||||
// Last check.
|
||||
check_others(query)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_check() {
|
||||
let query = "select 1";
|
||||
let result = check(query);
|
||||
assert!(result.is_none());
|
||||
|
||||
let query = "select versiona";
|
||||
let output = check(query);
|
||||
assert!(output.is_none());
|
||||
|
||||
fn test(query: &str, expected: Vec<&str>) {
|
||||
let output = check(query);
|
||||
match output.unwrap() {
|
||||
Output::RecordBatches(r) => {
|
||||
assert_eq!(r.pretty_print().lines().collect::<Vec<_>>(), expected)
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
let query = "select version()";
|
||||
let expected = vec![
|
||||
"+-----------+",
|
||||
"| version() |",
|
||||
"+-----------+",
|
||||
"| 8.0.26 |",
|
||||
"+-----------+",
|
||||
];
|
||||
test(query, expected);
|
||||
|
||||
let query = "SELECT @@version_comment LIMIT 1";
|
||||
let expected = vec![
|
||||
"+-------------------+",
|
||||
"| @@version_comment |",
|
||||
"+-------------------+",
|
||||
"| Greptime |",
|
||||
"+-------------------+",
|
||||
];
|
||||
test(query, expected);
|
||||
|
||||
// variables
|
||||
let query = "select @@tx_isolation, @@session.tx_isolation";
|
||||
let expected = vec![
|
||||
"+-----------------+------------------------+",
|
||||
"| @@tx_isolation | @@session.tx_isolation |",
|
||||
"+-----------------+------------------------+",
|
||||
"| REPEATABLE-READ | REPEATABLE-READ |",
|
||||
"+-----------------+------------------------+",
|
||||
];
|
||||
test(query, expected);
|
||||
|
||||
// complex variables
|
||||
let query = "/* mysql-connector-java-8.0.17 (Revision: 16a712ddb3f826a1933ab42b0039f7fb9eebc6ec) */SELECT @@session.auto_increment_increment AS auto_increment_increment, @@character_set_client AS character_set_client, @@character_set_connection AS character_set_connection, @@character_set_results AS character_set_results, @@character_set_server AS character_set_server, @@collation_server AS collation_server, @@collation_connection AS collation_connection, @@init_connect AS init_connect, @@interactive_timeout AS interactive_timeout, @@license AS license, @@lower_case_table_names AS lower_case_table_names, @@max_allowed_packet AS max_allowed_packet, @@net_write_timeout AS net_write_timeout, @@performance_schema AS performance_schema, @@sql_mode AS sql_mode, @@system_time_zone AS system_time_zone, @@time_zone AS time_zone, @@transaction_isolation AS transaction_isolation, @@wait_timeout AS wait_timeout;";
|
||||
let expected = vec![
|
||||
"+--------------------------+----------------------+--------------------------+-----------------------+----------------------+------------------+----------------------+--------------+---------------------+---------+------------------------+--------------------+-------------------+--------------------+----------+------------------+-----------+-----------------------+---------------+",
|
||||
"| auto_increment_increment | character_set_client | character_set_connection | character_set_results | character_set_server | collation_server | collation_connection | init_connect | interactive_timeout | license | lower_case_table_names | max_allowed_packet | net_write_timeout | performance_schema | sql_mode | system_time_zone | time_zone | transaction_isolation | wait_timeout; |",
|
||||
"+--------------------------+----------------------+--------------------------+-----------------------+----------------------+------------------+----------------------+--------------+---------------------+---------+------------------------+--------------------+-------------------+--------------------+----------+------------------+-----------+-----------------------+---------------+",
|
||||
"| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 31536000 | 0 | 0 | 134217728 | 31536000 | 0 | 0 | UTC | UTC | REPEATABLE-READ | 31536000 |",
|
||||
"+--------------------------+----------------------+--------------------------+-----------------------+----------------------+------------------+----------------------+--------------+---------------------+---------+------------------------+--------------------+-------------------+--------------------+----------+------------------+-----------+-----------------------+---------------+",
|
||||
];
|
||||
test(query, expected);
|
||||
|
||||
let query = "show variables";
|
||||
let expected = vec![
|
||||
"+---------------+-------+",
|
||||
"| Variable_name | Value |",
|
||||
"+---------------+-------+",
|
||||
"| | |",
|
||||
"+---------------+-------+",
|
||||
];
|
||||
test(query, expected);
|
||||
|
||||
let query = "show variables like 'lower_case_table_names'";
|
||||
let expected = vec![
|
||||
"+------------------------+-------+",
|
||||
"| Variable_name | Value |",
|
||||
"+------------------------+-------+",
|
||||
"| lower_case_table_names | 0 |",
|
||||
"+------------------------+-------+",
|
||||
];
|
||||
test(query, expected);
|
||||
|
||||
let query = "show collation";
|
||||
let expected = vec!["++", "++"]; // empty
|
||||
test(query, expected);
|
||||
|
||||
let query = "SELECT TIMEDIFF(NOW(), UTC_TIMESTAMP())";
|
||||
let expected = vec![
|
||||
"+----------------------------------+",
|
||||
"| TIMEDIFF(NOW(), UTC_TIMESTAMP()) |",
|
||||
"+----------------------------------+",
|
||||
"| 00:00:00 |",
|
||||
"+----------------------------------+",
|
||||
];
|
||||
test(query, expected);
|
||||
}
|
||||
}
|
||||
@@ -63,7 +63,14 @@ impl<W: io::Write + Send + Sync> AsyncMysqlShim<W> for MysqlInstanceShim {
|
||||
query: &'a str,
|
||||
writer: QueryResultWriter<'a, W>,
|
||||
) -> Result<()> {
|
||||
let output = self.query_handler.do_query(query).await;
|
||||
// TODO(LFC): Find a better way:
|
||||
// `check` uses regex to filter out unsupported statements emitted by MySQL's federated
|
||||
// components, this is quick and dirty, there must be a better way to do it.
|
||||
let output = if let Some(output) = crate::mysql::federated::check(query) {
|
||||
Ok(output)
|
||||
} else {
|
||||
self.query_handler.do_query(query).await
|
||||
};
|
||||
|
||||
let mut writer = MysqlResultWriter::new(writer);
|
||||
writer.write(output).await
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
mod federated;
|
||||
pub mod handler;
|
||||
pub mod server;
|
||||
pub mod writer;
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use axum::extract::{Json, Query, State};
|
||||
use axum::body::Body;
|
||||
use axum::extract::{Json, Query, RawBody, State};
|
||||
use common_telemetry::metric;
|
||||
use metrics::counter;
|
||||
use servers::http::handler as http_handler;
|
||||
use servers::http::handler::ScriptExecution;
|
||||
use servers::http::JsonOutput;
|
||||
use table::test_util::MemTable;
|
||||
|
||||
@@ -58,27 +58,38 @@ async fn test_metrics() {
|
||||
async fn test_scripts() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
|
||||
let exec = create_script_payload();
|
||||
let query_handler = create_testing_sql_query_handler(MemTable::default_numbers_table());
|
||||
let script = r#"
|
||||
@copr(sql='select uint32s as number from numbers', args=['number'], returns=['n'])
|
||||
def test(n):
|
||||
return n;
|
||||
"#
|
||||
.to_string();
|
||||
|
||||
let Json(json) = http_handler::scripts(State(query_handler), exec).await;
|
||||
let query_handler = create_testing_sql_query_handler(MemTable::default_numbers_table());
|
||||
let body = RawBody(Body::from(script.clone()));
|
||||
let invalid_query = create_invalid_script_query();
|
||||
let Json(json) = http_handler::scripts(State(query_handler.clone()), invalid_query, body).await;
|
||||
assert!(!json.success(), "{:?}", json);
|
||||
assert_eq!(json.error().unwrap(), "Invalid argument: invalid name");
|
||||
|
||||
let body = RawBody(Body::from(script));
|
||||
let exec = create_script_query();
|
||||
let Json(json) = http_handler::scripts(State(query_handler), exec, body).await;
|
||||
assert!(json.success(), "{:?}", json);
|
||||
assert!(json.error().is_none());
|
||||
assert!(json.output().is_none());
|
||||
}
|
||||
|
||||
fn create_script_payload() -> Json<ScriptExecution> {
|
||||
Json(ScriptExecution {
|
||||
name: "test".to_string(),
|
||||
script: r#"
|
||||
@copr(sql='select uint32s as number from numbers', args=['number'], returns=['n'])
|
||||
def test(n):
|
||||
return n;
|
||||
"#
|
||||
.to_string(),
|
||||
fn create_script_query() -> Query<http_handler::ScriptQuery> {
|
||||
Query(http_handler::ScriptQuery {
|
||||
name: Some("test".to_string()),
|
||||
})
|
||||
}
|
||||
|
||||
fn create_invalid_script_query() -> Query<http_handler::ScriptQuery> {
|
||||
Query(http_handler::ScriptQuery { name: None })
|
||||
}
|
||||
|
||||
fn create_query() -> Query<http_handler::SqlQuery> {
|
||||
Query(http_handler::SqlQuery {
|
||||
sql: Some("select sum(uint32s) from numbers limit 20".to_string()),
|
||||
|
||||
@@ -83,6 +83,9 @@ pub enum Error {
|
||||
#[snafu(display("Invalid database name: {}", name))]
|
||||
InvalidDatabaseName { name: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Invalid table name: {}", name))]
|
||||
InvalidTableName { name: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Invalid default constraint, column: {}, source: {}", column, source))]
|
||||
InvalidDefault {
|
||||
column: String,
|
||||
@@ -106,7 +109,9 @@ impl ErrorExt for Error {
|
||||
| SqlTypeNotSupported { .. }
|
||||
| InvalidDefault { .. } => StatusCode::InvalidSyntax,
|
||||
|
||||
InvalidDatabaseName { .. } | ColumnTypeMismatch { .. } => StatusCode::InvalidArguments,
|
||||
InvalidDatabaseName { .. } | ColumnTypeMismatch { .. } | InvalidTableName { .. } => {
|
||||
StatusCode::InvalidArguments
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -5,8 +5,10 @@ use sqlparser::parser::Parser;
|
||||
use sqlparser::parser::ParserError;
|
||||
use sqlparser::tokenizer::{Token, Tokenizer};
|
||||
|
||||
use crate::error::{self, InvalidDatabaseNameSnafu, Result, SyntaxSnafu, TokenizerSnafu};
|
||||
use crate::statements::show::{ShowDatabases, ShowKind, ShowTables};
|
||||
use crate::error::{
|
||||
self, InvalidDatabaseNameSnafu, InvalidTableNameSnafu, Result, SyntaxSnafu, TokenizerSnafu,
|
||||
};
|
||||
use crate::statements::show::{ShowCreateTable, ShowDatabases, ShowKind, ShowTables};
|
||||
use crate::statements::statement::Statement;
|
||||
|
||||
/// GrepTime SQL parser context, a simple wrapper for Datafusion SQL parser.
|
||||
@@ -102,11 +104,38 @@ impl<'a> ParserContext<'a> {
|
||||
} else if self.matches_keyword(Keyword::TABLES) {
|
||||
self.parser.next_token();
|
||||
self.parse_show_tables()
|
||||
} else if self.consume_token("CREATE") {
|
||||
if self.consume_token("TABLE") {
|
||||
self.parse_show_create_table()
|
||||
} else {
|
||||
self.unsupported(self.peek_token_as_string())
|
||||
}
|
||||
} else {
|
||||
self.unsupported(self.peek_token_as_string())
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse SHOW CREATE TABLE statement
|
||||
fn parse_show_create_table(&mut self) -> Result<Statement> {
|
||||
let table_name =
|
||||
self.parser
|
||||
.parse_object_name()
|
||||
.with_context(|_| error::UnexpectedSnafu {
|
||||
sql: self.sql,
|
||||
expected: "a table name",
|
||||
actual: self.peek_token_as_string(),
|
||||
})?;
|
||||
ensure!(
|
||||
!table_name.0.is_empty(),
|
||||
InvalidTableNameSnafu {
|
||||
name: table_name.to_string(),
|
||||
}
|
||||
);
|
||||
Ok(Statement::ShowCreateTable(ShowCreateTable {
|
||||
table_name: table_name.to_string(),
|
||||
}))
|
||||
}
|
||||
|
||||
fn parse_show_tables(&mut self) -> Result<Statement> {
|
||||
let database = match self.parser.peek_token() {
|
||||
Token::EOF | Token::SemiColon => {
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
use sqlparser::ast::{SetExpr, Statement, Values};
|
||||
use sqlparser::ast::{SetExpr, Statement, UnaryOperator, Values};
|
||||
use sqlparser::parser::ParserError;
|
||||
|
||||
use crate::ast::{Expr, Value};
|
||||
use crate::error::{self, Result};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Insert {
|
||||
@@ -27,34 +28,59 @@ impl Insert {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn values(&self) -> Vec<Vec<Value>> {
|
||||
match &self.inner {
|
||||
pub fn values(&self) -> Result<Vec<Vec<Value>>> {
|
||||
let values = match &self.inner {
|
||||
Statement::Insert { source, .. } => match &source.body {
|
||||
SetExpr::Values(Values(values)) => values
|
||||
.iter()
|
||||
.map(|v| {
|
||||
v.iter()
|
||||
.map(|expr| match expr {
|
||||
Expr::Value(v) => v.clone(),
|
||||
Expr::Identifier(ident) => {
|
||||
Value::SingleQuotedString(ident.value.clone())
|
||||
}
|
||||
_ => unreachable!(),
|
||||
})
|
||||
.collect::<Vec<Value>>()
|
||||
})
|
||||
.collect(),
|
||||
SetExpr::Values(Values(exprs)) => sql_exprs_to_values(exprs)?,
|
||||
_ => unreachable!(),
|
||||
},
|
||||
_ => unreachable!(),
|
||||
}
|
||||
};
|
||||
Ok(values)
|
||||
}
|
||||
}
|
||||
|
||||
fn sql_exprs_to_values(exprs: &Vec<Vec<Expr>>) -> Result<Vec<Vec<Value>>> {
|
||||
let mut values = Vec::with_capacity(exprs.len());
|
||||
for es in exprs.iter() {
|
||||
let mut vs = Vec::with_capacity(es.len());
|
||||
for expr in es.iter() {
|
||||
vs.push(match expr {
|
||||
Expr::Value(v) => v.clone(),
|
||||
Expr::Identifier(ident) => Value::SingleQuotedString(ident.value.clone()),
|
||||
Expr::UnaryOp { op, expr }
|
||||
if matches!(op, UnaryOperator::Minus | UnaryOperator::Plus) =>
|
||||
{
|
||||
if let Expr::Value(Value::Number(s, b)) = &**expr {
|
||||
match op {
|
||||
UnaryOperator::Minus => Value::Number(format!("-{}", s), *b),
|
||||
UnaryOperator::Plus => Value::Number(s.to_string(), *b),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
} else {
|
||||
return error::ParseSqlValueSnafu {
|
||||
msg: format!("{:?}", expr),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
return error::ParseSqlValueSnafu {
|
||||
msg: format!("{:?}", expr),
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
});
|
||||
}
|
||||
values.push(vs);
|
||||
}
|
||||
Ok(values)
|
||||
}
|
||||
|
||||
impl TryFrom<Statement> for Insert {
|
||||
type Error = ParserError;
|
||||
|
||||
fn try_from(value: Statement) -> Result<Self, Self::Error> {
|
||||
fn try_from(value: Statement) -> std::result::Result<Self, Self::Error> {
|
||||
match value {
|
||||
Statement::Insert { .. } => Ok(Insert { inner: value }),
|
||||
unexp => Err(ParserError::ParserError(format!(
|
||||
@@ -78,7 +104,37 @@ mod tests {
|
||||
let mut stmts = ParserContext::create_with_dialect(sql, &GenericDialect {}).unwrap();
|
||||
assert_eq!(1, stmts.len());
|
||||
let insert = stmts.pop().unwrap();
|
||||
let r: Result<Statement, ParserError> = insert.try_into();
|
||||
r.unwrap();
|
||||
let _stmt: Statement = insert.try_into().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_insert_value_with_unary_op() {
|
||||
use crate::statements::statement::Statement;
|
||||
|
||||
// insert "-1"
|
||||
let sql = "INSERT INTO my_table VALUES(-1)";
|
||||
let stmt = ParserContext::create_with_dialect(sql, &GenericDialect {})
|
||||
.unwrap()
|
||||
.remove(0);
|
||||
match stmt {
|
||||
Statement::Insert(insert) => {
|
||||
let values = insert.values().unwrap();
|
||||
assert_eq!(values, vec![vec![Value::Number("-1".to_string(), false)]]);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
// insert "+1"
|
||||
let sql = "INSERT INTO my_table VALUES(+1)";
|
||||
let stmt = ParserContext::create_with_dialect(sql, &GenericDialect {})
|
||||
.unwrap()
|
||||
.remove(0);
|
||||
match stmt {
|
||||
Statement::Insert(insert) => {
|
||||
let values = insert.values().unwrap();
|
||||
assert_eq!(values, vec![vec![Value::Number("1".to_string(), false)]]);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40,6 +40,12 @@ pub struct ShowTables {
|
||||
pub database: Option<String>,
|
||||
}
|
||||
|
||||
/// SQL structure for `SHOW CREATE TABLE`.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct ShowCreateTable {
|
||||
pub table_name: String,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::assert_matches::assert_matches;
|
||||
@@ -94,4 +100,27 @@ mod tests {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_show_create_table() {
|
||||
let sql = "SHOW CREATE TABLE test";
|
||||
let stmts: Vec<Statement> =
|
||||
ParserContext::create_with_dialect(sql, &GenericDialect {}).unwrap();
|
||||
assert_eq!(1, stmts.len());
|
||||
assert_matches!(&stmts[0], Statement::ShowCreateTable { .. });
|
||||
match &stmts[0] {
|
||||
Statement::ShowCreateTable(show) => {
|
||||
let table_name = show.table_name.as_str();
|
||||
assert_eq!(table_name, "test");
|
||||
}
|
||||
_ => {
|
||||
unreachable!();
|
||||
}
|
||||
}
|
||||
}
|
||||
#[test]
|
||||
pub fn test_show_create_missing_table_name() {
|
||||
let sql = "SHOW CREATE TABLE";
|
||||
ParserContext::create_with_dialect(sql, &GenericDialect {}).unwrap_err();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@ use crate::statements::alter::AlterTable;
|
||||
use crate::statements::create_table::CreateTable;
|
||||
use crate::statements::insert::Insert;
|
||||
use crate::statements::query::Query;
|
||||
use crate::statements::show::{ShowDatabases, ShowTables};
|
||||
use crate::statements::show::{ShowCreateTable, ShowDatabases, ShowTables};
|
||||
|
||||
/// Tokens parsed by `DFParser` are converted into these values.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
@@ -22,6 +22,8 @@ pub enum Statement {
|
||||
ShowDatabases(ShowDatabases),
|
||||
// SHOW TABLES
|
||||
ShowTables(ShowTables),
|
||||
// SHOW CREATE TABLE
|
||||
ShowCreateTable(ShowCreateTable),
|
||||
}
|
||||
|
||||
/// Converts Statement to sqlparser statement
|
||||
@@ -36,6 +38,9 @@ impl TryFrom<Statement> for SpStatement {
|
||||
Statement::ShowTables(_) => Err(ParserError::ParserError(
|
||||
"sqlparser does not support SHOW TABLES query.".to_string(),
|
||||
)),
|
||||
Statement::ShowCreateTable(_) => Err(ParserError::ParserError(
|
||||
"sqlparser does not support SHOW CREATE TABLE query.".to_string(),
|
||||
)),
|
||||
Statement::Query(s) => Ok(SpStatement::Query(Box::new(s.inner))),
|
||||
Statement::Insert(i) => Ok(i.inner),
|
||||
Statement::Create(_) | Statement::Alter(_) => unimplemented!(),
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user