Compare commits

..

23 Commits

Author SHA1 Message Date
Lei, Huang
2c9bcbe885 refactor: catalog crate (#415)
* chore: refactor dir for local catalog manager

* refactor: CatalogProvider returns Result

* refactor: SchemaProvider returns Result

* feat: add kv operations to remote catalog

* chore: refactor some code

* feat: impl catalog initialization

* feat: add register table and register system table function

* refactor: add table_info method for Table trait

* chore: add some tests

* chore: add register schema test

* chore: fix build issue after rebase onto develop

* refactor: mock to separate file

* build: failed to compile

* fix: use a container struct to bridge KvBackend and Accessor trait

* feat: upgrade opendal to 0.17

* test: add more tests

* chore: add catalog name and schema name to table info

* chore: add catalog name and schema name to table info

* chore: rebase onto develop

* refactor: common-catalog crate

* chore: refactor dir for local catalog manager

* refactor: CatalogProvider returns Result

* refactor: SchemaProvider returns Result

* feat: add kv operations to remote catalog

* chore: refactor some code

* feat: impl catalog initialization

* feat: add register table and register system table function

* refactor: add table_info method for Table trait

* chore: add some tests

* chore: add register schema test

* chore: fix build issue after rebase onto develop

* refactor: mock to separate file

* build: failed to compile

* fix: use a container struct to bridge KvBackend and Accessor trait

* feat: upgrade opendal to 0.17

* test: add more tests

* chore: add catalog name and schema name to table info

* chore: add catalog name and schema name to table info

* chore: rebase onto develop

* refactor: common-catalog crate

* refactor: remove remote catalog related files

* fix: compilation

* feat: add table version to TableKey

* feat: add node id to TableValue

* fix: some CR comments

* chore: change async fn create_expr_to_request to sync

* fix: add backtrace to errors

* fix: code style

* refactor: merge refactor/catalog-crate

* feat: table key with version

* feat: impl KvBackend for MetaClient

* fix: integrate metaclient

* fix: catalog use local table info as baseline

* fix: sync metsrv

* fix: wip

* fix: update remote catalog on register and deregister

* refactor: CatalogProvider

* refactor: CatalogManager

* fix: catalog key filtering

* fix: pass some test

* refactor: catalog iterating

* fix: CatalogManager::table also requires both catalog_name and schema_name

* chore: merge develop

* chore: merge catalog crate

* fix: adapt to recent meta-client api change

* feat: databode lease

* feat: remote catalog (#356)

* chore: refactor dir for local catalog manager

* refactor: CatalogProvider returns Result

* refactor: SchemaProvider returns Result

* feat: add kv operations to remote catalog

* chore: refactor some code

* feat: impl catalog initialization

* feat: add register table and register system table function

* refactor: add table_info method for Table trait

* chore: add some tests

* chore: add register schema test

* chore: fix build issue after rebase onto develop

* refactor: mock to separate file

* build: failed to compile

* fix: use a container struct to bridge KvBackend and Accessor trait

* feat: upgrade opendal to 0.17

* test: add more tests

* chore: add catalog name and schema name to table info

* chore: add catalog name and schema name to table info

* chore: rebase onto develop

* refactor: common-catalog crate

* chore: refactor dir for local catalog manager

* refactor: CatalogProvider returns Result

* refactor: SchemaProvider returns Result

* feat: add kv operations to remote catalog

* chore: refactor some code

* feat: impl catalog initialization

* feat: add register table and register system table function

* refactor: add table_info method for Table trait

* chore: add some tests

* chore: add register schema test

* chore: fix build issue after rebase onto develop

* refactor: mock to separate file

* build: failed to compile

* fix: use a container struct to bridge KvBackend and Accessor trait

* feat: upgrade opendal to 0.17

* test: add more tests

* chore: add catalog name and schema name to table info

* chore: add catalog name and schema name to table info

* chore: rebase onto develop

* refactor: common-catalog crate

* refactor: remove remote catalog related files

* fix: compilation

* feat: add table version to TableKey

* feat: add node id to TableValue

* fix: some CR comments

* chore: change async fn create_expr_to_request to sync

* fix: add backtrace to errors

* fix: code style

* refactor: merge refactor/catalog-crate

* feat: table key with version

* feat: impl KvBackend for MetaClient

* fix: integrate metaclient

* fix: catalog use local table info as baseline

* fix: sync metsrv

* fix: wip

* fix: update remote catalog on register and deregister

* refactor: CatalogProvider

* refactor: CatalogManager

* fix: catalog key filtering

* fix: pass some test

* refactor: catalog iterating

* fix: CatalogManager::table also requires both catalog_name and schema_name

* chore: merge develop

* chore: merge catalog crate

* fix: adapt to recent meta-client api change

* feat: datanode heartbeat (#355)

* feat: add heartbeat task to instance

* feat: add node_id datanode opts

* fix: use real node id in heartbeat and meta client

* feat: distribute table in frontend

* test: distribute read demo

* test: distribute read demo

* test: distribute read demo

* add write spliter

* fix: node id changed to u64

* feat: datanode uses remote catalog implementation

* dist insert integrate table

* feat: specify region ids on creating table (#359)

* fix: compiling issues

* feat: datanode lease (#354)

* Some glue code about dist_insert

* fix: correctly wrap string value with quotes

* feat: create route

* feat: frontend catalog (#362)

* feat: integrate catalog to frontend

* feat: preserve partition rule on create

* fix: print tables on start

* chore: log in create route

* test: distribute read demo

* feat: support metasrv addr command line options

* feat: optimize DataNodeInstance creation (#368)

* chore: remove unnecessary changes

* chore: revert changes to src/api

* chore: revert changes to src/datanode/src/server.rs

* chore: remove opendal backend

* chore: optimize imports

* chore: revert changes to instance and region ids

* refactor: MetaKvBackend range

* fix: remove some wrap

* refactor: initiation of catalog

* feat: add region id to create table request and add heartbeat task to datanode instance

* fix: fix auto reconnect for heartbeat task

* chore: change TableValue::region_numbers to vec<u32>.

* fix: some tests

* fix: avoid concurrently start Heartbeat task by compare_exchange

* feat: refactor catalog key and values, separate table info into two kinds of keys

* feat: bump table id from metasrv

* fix: compare and set table id

* chore: merge develop

* fix: use integer serialization instead of string serialization

Co-authored-by: jiachun <jiachun_fjc@163.com>
Co-authored-by: luofucong <luofucong@greptime.com>
Co-authored-by: fys <1113014250@qq.com>
Co-authored-by: Jiachun Feng <jiachun_feng@proton.me>
2022-11-08 20:40:16 +08:00
Lei, Huang
dfd4b10493 feat: add shutdown mechanism for HeartbeatTask (#424) 2022-11-08 19:23:02 +08:00
dennis zhuang
dd488e8d21 feat: adds from_unixtime function (#420) 2022-11-08 18:22:00 +08:00
fys
857054f70d feat: impl insert for DistTable (#406)
* feat: impl insert for dist_table in frontend

* add the logic of decode region id in datanode.
2022-11-08 17:19:17 +08:00
Lei, Huang
a41aec0a86 fix: use same tmp data location as default config (#422) 2022-11-08 16:58:48 +08:00
Sheng hui
cff8fe4e0e feat: Allow sql parser to parse show-create-table statement (#347)
* feat: Add ShowCreateTable to Statement

* feat: Add ShowCreateTable to Statement

* feat: Add ShowCreateTable to Statement

* feat: Add ShowCreateTable to Statement

* feat: Add ShowCreateTable to Statement

* feat: Add ShowCreateTable to Statement

* feat: Add ShowCreateTable to Statement

* feat: Add ShowCreateTable to Statement

* feat: Add ShowCreateTable to Statement
2022-11-08 16:35:56 +08:00
Lei, Huang
a2f9b788f1 fix: datanode start in standalone mode by default (#418)
* fix: datanode start in standalone mode by default

* fix: detech misconfig on startup

* fix: some CR comments and add tests
2022-11-08 16:18:13 +08:00
shuiyisong
43f9c40f43 feat: add context to query_handler (#417)
This pr merely create the Context struct, fields within Context is not stable yet. Feel free to modify at will.
2022-11-08 13:29:32 +08:00
Ruihang Xia
af1df2066c perf: enlarge write row group size (#413)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-11-08 11:23:10 +08:00
LFC
f34a99ff5a feat: use regex to filter out not supported MySQL stmt (#396)
* feat: use regex to filter out not supported MySQL stmt

* fix: resolve PR comments

Co-authored-by: luofucong <luofucong@greptime.com>
2022-11-08 11:09:46 +08:00
Ruihang Xia
89a3b39728 perf: improve table scan performance (#407)
* refactor: improve table scan performance

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* use BufReader to avoid pre-loading all content

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-11-07 17:28:53 +08:00
Lei, Huang
2137587091 feat: datanode heartbeat (#377)
* chore: refactor dir for local catalog manager

* refactor: CatalogProvider returns Result

* refactor: SchemaProvider returns Result

* feat: add kv operations to remote catalog

* chore: refactor some code

* feat: impl catalog initialization

* feat: add register table and register system table function

* refactor: add table_info method for Table trait

* chore: add some tests

* chore: add register schema test

* chore: fix build issue after rebase onto develop

* refactor: mock to separate file

* build: failed to compile

* fix: use a container struct to bridge KvBackend and Accessor trait

* feat: upgrade opendal to 0.17

* test: add more tests

* chore: add catalog name and schema name to table info

* chore: add catalog name and schema name to table info

* chore: rebase onto develop

* refactor: common-catalog crate

* chore: refactor dir for local catalog manager

* refactor: CatalogProvider returns Result

* refactor: SchemaProvider returns Result

* feat: add kv operations to remote catalog

* chore: refactor some code

* feat: impl catalog initialization

* feat: add register table and register system table function

* refactor: add table_info method for Table trait

* chore: add some tests

* chore: add register schema test

* chore: fix build issue after rebase onto develop

* refactor: mock to separate file

* build: failed to compile

* fix: use a container struct to bridge KvBackend and Accessor trait

* feat: upgrade opendal to 0.17

* test: add more tests

* chore: add catalog name and schema name to table info

* chore: add catalog name and schema name to table info

* chore: rebase onto develop

* refactor: common-catalog crate

* refactor: remove remote catalog related files

* fix: compilation

* feat: add table version to TableKey

* feat: add node id to TableValue

* fix: some CR comments

* chore: change async fn create_expr_to_request to sync

* fix: add backtrace to errors

* fix: code style

* refactor: merge refactor/catalog-crate

* feat: table key with version

* feat: impl KvBackend for MetaClient

* fix: integrate metaclient

* fix: catalog use local table info as baseline

* fix: sync metsrv

* fix: wip

* fix: update remote catalog on register and deregister

* refactor: CatalogProvider

* refactor: CatalogManager

* fix: catalog key filtering

* fix: pass some test

* refactor: catalog iterating

* fix: CatalogManager::table also requires both catalog_name and schema_name

* chore: merge develop

* chore: merge catalog crate

* fix: adapt to recent meta-client api change

* feat: databode lease

* feat: remote catalog (#356)

* chore: refactor dir for local catalog manager

* refactor: CatalogProvider returns Result

* refactor: SchemaProvider returns Result

* feat: add kv operations to remote catalog

* chore: refactor some code

* feat: impl catalog initialization

* feat: add register table and register system table function

* refactor: add table_info method for Table trait

* chore: add some tests

* chore: add register schema test

* chore: fix build issue after rebase onto develop

* refactor: mock to separate file

* build: failed to compile

* fix: use a container struct to bridge KvBackend and Accessor trait

* feat: upgrade opendal to 0.17

* test: add more tests

* chore: add catalog name and schema name to table info

* chore: add catalog name and schema name to table info

* chore: rebase onto develop

* refactor: common-catalog crate

* chore: refactor dir for local catalog manager

* refactor: CatalogProvider returns Result

* refactor: SchemaProvider returns Result

* feat: add kv operations to remote catalog

* chore: refactor some code

* feat: impl catalog initialization

* feat: add register table and register system table function

* refactor: add table_info method for Table trait

* chore: add some tests

* chore: add register schema test

* chore: fix build issue after rebase onto develop

* refactor: mock to separate file

* build: failed to compile

* fix: use a container struct to bridge KvBackend and Accessor trait

* feat: upgrade opendal to 0.17

* test: add more tests

* chore: add catalog name and schema name to table info

* chore: add catalog name and schema name to table info

* chore: rebase onto develop

* refactor: common-catalog crate

* refactor: remove remote catalog related files

* fix: compilation

* feat: add table version to TableKey

* feat: add node id to TableValue

* fix: some CR comments

* chore: change async fn create_expr_to_request to sync

* fix: add backtrace to errors

* fix: code style

* refactor: merge refactor/catalog-crate

* feat: table key with version

* feat: impl KvBackend for MetaClient

* fix: integrate metaclient

* fix: catalog use local table info as baseline

* fix: sync metsrv

* fix: wip

* fix: update remote catalog on register and deregister

* refactor: CatalogProvider

* refactor: CatalogManager

* fix: catalog key filtering

* fix: pass some test

* refactor: catalog iterating

* fix: CatalogManager::table also requires both catalog_name and schema_name

* chore: merge develop

* chore: merge catalog crate

* fix: adapt to recent meta-client api change

* feat: datanode heartbeat (#355)

* feat: add heartbeat task to instance

* feat: add node_id datanode opts

* fix: use real node id in heartbeat and meta client

* feat: distribute table in frontend

* test: distribute read demo

* test: distribute read demo

* test: distribute read demo

* add write spliter

* fix: node id changed to u64

* feat: datanode uses remote catalog implementation

* dist insert integrate table

* feat: specify region ids on creating table (#359)

* fix: compiling issues

* feat: datanode lease (#354)

* Some glue code about dist_insert

* fix: correctly wrap string value with quotes

* feat: create route

* feat: frontend catalog (#362)

* feat: integrate catalog to frontend

* feat: preserve partition rule on create

* fix: print tables on start

* chore: log in create route

* test: distribute read demo

* feat: support metasrv addr command line options

* feat: optimize DataNodeInstance creation (#368)

* chore: remove unnecessary changes

* chore: revert changes to src/api

* chore: revert changes to src/datanode/src/server.rs

* chore: remove opendal backend

* chore: optimize imports

* chore: revert changes to instance and region ids

* refactor: MetaKvBackend range

* fix: remove some wrap

* refactor: initiation of catalog

* feat: add region id to create table request and add heartbeat task to datanode instance

* fix: fix auto reconnect for heartbeat task

* chore: change TableValue::region_numbers to vec<u32>.

* fix: some tests

* fix: avoid concurrently start Heartbeat task by compare_exchange

* fix: some cr comments

* fix: fix unit tests

Co-authored-by: jiachun <jiachun_fjc@163.com>
Co-authored-by: luofucong <luofucong@greptime.com>
Co-authored-by: fys <1113014250@qq.com>
Co-authored-by: Jiachun Feng <jiachun_feng@proton.me>
2022-11-07 17:10:43 +08:00
Jiachun Feng
172c9a1e21 chore: minor refactor with meta_client (#393)
* chore: minor refactor

* feat: support none expect value on CAS
2022-11-07 17:03:31 +08:00
Ruihang Xia
ae147c2a74 chore: refine some unnecessary log (#410)
* remove some unnecessary informations in log

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* further cleaning

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-11-07 16:36:27 +08:00
fys
c2e1b0857c refactor: optimize channel_manager (#401)
* refactor: use dashmap in channel manager

* add benchmark for channel manager

* access field in channel use AtomicUsize

* cr
2022-11-07 16:09:01 +08:00
dennis zhuang
6e99bb8490 fix: /metrics endpoint (#404) 2022-11-07 10:34:13 +08:00
Ning Sun
eef20887cc ci: use lld linker for ci (#398)
* ci: use lld linker for ci

* ci: do a disk cleanup before test

* ci: add llvm cache to speedup installation

* ci: use lld linker for coverage as well

* feat: use lld for release too
2022-11-07 10:28:58 +08:00
LFC
16500b045b feat: distribute table in frontend (#328)
Co-authored-by: luofucong <luofucong@greptime.com>
2022-11-05 10:41:59 +08:00
discord9
3d195ff858 feat: bind Greptime's own UDF&UDAF into Python Coprocessor Module (#335)
* feat: port own UDF&UDAF into py copr(untest yet)

* refactor: move UDF&UDAF to greptime_builtins

* feat: support List in val2py_obj

* test: some testcases for newly added UDFs

* test: complete test for all added gpdb's own UDF

* refactor: add underscore for long func name

* feat: better error message

* fix: typo
2022-11-04 15:49:41 +08:00
zyy17
bc701d3e7f ci: push image to dockerhub (#394) 2022-11-04 15:07:12 +08:00
LFC
6373bb04f9 fix: insert negative values (#383)
* fix: insert negative values

* fix: resolve PR comments

* fix: resolve PR comments

Co-authored-by: luofucong <luofucong@greptime.com>
2022-11-04 14:22:31 +08:00
Ruihang Xia
bfcd74fd16 feat: benchmark suit based on nyc taxi dataset (#384)
* solve dep conflict

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* feat: nyc taxi dataset writer

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix some literals

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add some queries

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix progress bar

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* able to skip write or read

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* rename to nyc-taxi.rs

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove main.rs

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* adapt new client api

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* allow stdout output in this cli

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* some default values

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-11-04 14:13:17 +08:00
dennis zhuang
fc6d73b06b feat: improve /scripts API (#390)
* feat: improve /scripts API

* chore: json_err macro

* chore: json_err macro and refactor code

* fix: test
2022-11-04 14:09:07 +08:00
111 changed files with 4664 additions and 694 deletions

View File

@@ -18,7 +18,17 @@ jobs:
timeout-minutes: 60
steps:
- uses: actions/checkout@v2
- name: Cache LLVM and Clang
id: cache-llvm
uses: actions/cache@v3
with:
path: ./llvm
key: llvm
- uses: arduino/setup-protoc@v1
- uses: KyleMayes/install-llvm-action@v1
with:
version: "14.0"
cached: ${{ steps.cache-llvm.outputs.cache-hit }}
- name: Install toolchain
uses: actions-rs/toolchain@v1
with:
@@ -30,7 +40,7 @@ jobs:
- name: Cleanup disk
uses: curoky/cleanup-disk-action@v2.0
with:
retain: 'rust'
retain: 'rust,llvm'
- name: Execute tests
uses: actions-rs/cargo@v1
with:
@@ -39,7 +49,7 @@ jobs:
env:
RUST_BACKTRACE: 1
CARGO_INCREMENTAL: 0
RUSTFLAGS: "-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=unwind -Zpanic_abort_tests"
RUSTFLAGS: "-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=unwind -Zpanic_abort_tests -Clink-arg=-fuse-ld=lld"
GT_S3_BUCKET: ${{ secrets.S3_BUCKET }}
GT_S3_ACCESS_KEY_ID: ${{ secrets.S3_ACCESS_KEY_ID }}
GT_S3_ACCESS_KEY: ${{ secrets.S3_ACCESS_KEY }}

View File

@@ -47,7 +47,17 @@ jobs:
timeout-minutes: 60
steps:
- uses: actions/checkout@v2
- name: Cache LLVM and Clang
id: cache-llvm
uses: actions/cache@v3
with:
path: ./llvm
key: llvm
- uses: arduino/setup-protoc@v1
- uses: KyleMayes/install-llvm-action@v1
with:
version: "14.0"
cached: ${{ steps.cache-llvm.outputs.cache-hit }}
- uses: actions-rs/toolchain@v1
with:
profile: minimal
@@ -55,11 +65,16 @@ jobs:
override: true
- name: Rust Cache
uses: Swatinem/rust-cache@v2.0.0
- name: Cleanup disk
uses: curoky/cleanup-disk-action@v2.0
with:
retain: 'rust,llvm'
- uses: actions-rs/cargo@v1
with:
command: test
args: --workspace
env:
CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=lld"
RUST_BACKTRACE: 1
GT_S3_BUCKET: ${{ secrets.S3_BUCKET }}
GT_S3_ACCESS_KEY_ID: ${{ secrets.S3_ACCESS_KEY_ID }}

View File

@@ -33,9 +33,11 @@ jobs:
uses: actions/checkout@v3
- name: Cache cargo assets
id: cache
uses: actions/cache@v3
with:
path: |
./llvm
~/.cargo/bin/
~/.cargo/registry/index/
~/.cargo/registry/cache/
@@ -51,6 +53,11 @@ jobs:
sudo cp protoc/bin/protoc /usr/local/bin/
sudo cp -r protoc/include/google /usr/local/include/
- uses: KyleMayes/install-llvm-action@v1
with:
version: "14.0"
cached: ${{ steps.cache.outputs.cache-hit }}
- name: Install Protoc for macos
if: contains(matrix.arch, 'darwin')
run: |
@@ -78,6 +85,8 @@ jobs:
with:
command: build
args: ${{ matrix.opts }} --release --locked --target ${{ matrix.arch }}
env:
CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=lld"
- name: Calculate checksum and rename binary
shell: bash

3
.gitignore vendored
View File

@@ -28,3 +28,6 @@ logs/
# cpython's generated python byte code
**/__pycache__/
# Benchmark dataset
benchmarks/data

View File

@@ -9,7 +9,7 @@ repos:
rev: e6a795bc6b2c0958f9ef52af4863bbd7cc17238f
hooks:
- id: cargo-sort
args: ["--workspace"]
args: ["--workspace", "--print"]
- repo: https://github.com/doublify/pre-commit-rust
rev: v1.0

280
Cargo.lock generated
View File

@@ -93,6 +93,12 @@ dependencies = [
"libc",
]
[[package]]
name = "anes"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
[[package]]
name = "ansi_term"
version = "0.12.1"
@@ -112,6 +118,8 @@ checksum = "98161a4e3e2184da77bb14f02184cdd111e83bbbcc9979dfee3c44b9a85f5602"
name = "api"
version = "0.1.0"
dependencies = [
"common-base",
"common-time",
"datatypes",
"prost 0.11.0",
"snafu",
@@ -158,6 +166,30 @@ version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
[[package]]
name = "arrow"
version = "10.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1328dbc6d5d76a08b13df3ac630f61a6a31276d9e9d08eb813e98efa624c2382"
dependencies = [
"bitflags",
"chrono",
"csv",
"flatbuffers",
"half",
"hex",
"indexmap",
"lazy_static",
"lexical-core",
"multiversion",
"num",
"rand 0.8.5",
"regex",
"serde",
"serde_derive",
"serde_json",
]
[[package]]
name = "arrow-format"
version = "0.4.0"
@@ -428,6 +460,20 @@ dependencies = [
"tower-service",
]
[[package]]
name = "backoff"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b62ddb9cb1ec0a098ad4bbf9344d0713fa193ae1a80af55febcff2627b6a00c1"
dependencies = [
"futures-core",
"getrandom",
"instant",
"pin-project-lite",
"rand 0.8.5",
"tokio",
]
[[package]]
name = "backon"
version = "0.1.0"
@@ -461,6 +507,19 @@ version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd"
[[package]]
name = "benchmarks"
version = "0.1.0"
dependencies = [
"arrow",
"clap 4.0.18",
"client",
"indicatif",
"itertools",
"parquet",
"tokio",
]
[[package]]
name = "bigdecimal"
version = "0.3.0"
@@ -663,6 +722,7 @@ dependencies = [
"arc-swap",
"async-stream",
"async-trait",
"backoff",
"chrono",
"common-catalog",
"common-error",
@@ -760,6 +820,33 @@ version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fff857943da45f546682664a79488be82e69e43c1a7a2307679ab9afb3a66d2e"
[[package]]
name = "ciborium"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0c137568cc60b904a7724001b35ce2630fd00d5d84805fbb608ab89509d788f"
dependencies = [
"ciborium-io",
"ciborium-ll",
"serde",
]
[[package]]
name = "ciborium-io"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "346de753af073cc87b52b2083a506b38ac176a44cfb05497b622e27be899b369"
[[package]]
name = "ciborium-ll"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "213030a2b5a4e0c0892b6652260cf6ccac84827b83a85a534e178e3906c4cf1b"
dependencies = [
"ciborium-io",
"half",
]
[[package]]
name = "clang-sys"
version = "1.4.0"
@@ -794,8 +881,8 @@ checksum = "86447ad904c7fb335a790c9d7fe3d0d971dc523b8ccd1561a520de9a85302750"
dependencies = [
"atty",
"bitflags",
"clap_derive",
"clap_lex",
"clap_derive 3.2.18",
"clap_lex 0.2.4",
"indexmap",
"once_cell",
"strsim 0.10.0",
@@ -803,6 +890,21 @@ dependencies = [
"textwrap 0.15.1",
]
[[package]]
name = "clap"
version = "4.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "335867764ed2de42325fafe6d18b8af74ba97ee0c590fa016f157535b42ab04b"
dependencies = [
"atty",
"bitflags",
"clap_derive 4.0.18",
"clap_lex 0.3.0",
"once_cell",
"strsim 0.10.0",
"termcolor",
]
[[package]]
name = "clap_derive"
version = "3.2.18"
@@ -816,6 +918,19 @@ dependencies = [
"syn",
]
[[package]]
name = "clap_derive"
version = "4.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "16a1b0f6422af32d5da0c58e2703320f379216ee70198241c84173a8c5ac28f3"
dependencies = [
"heck 0.4.0",
"proc-macro-error",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "clap_lex"
version = "0.2.4"
@@ -825,6 +940,15 @@ dependencies = [
"os_str_bytes",
]
[[package]]
name = "clap_lex"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d4198f73e42b4936b35b5bb248d81d2b595ecb170da0bac7655c54eedfa8da8"
dependencies = [
"os_str_bytes",
]
[[package]]
name = "client"
version = "0.1.0"
@@ -950,6 +1074,7 @@ dependencies = [
"common-error",
"common-function-macro",
"common-query",
"common-time",
"datafusion-common",
"datatypes",
"libc",
@@ -986,7 +1111,10 @@ dependencies = [
"common-base",
"common-error",
"common-runtime",
"criterion 0.4.0",
"dashmap",
"datafusion",
"rand 0.8.5",
"snafu",
"tokio",
"tonic",
@@ -1197,7 +1325,7 @@ dependencies = [
"atty",
"cast",
"clap 2.34.0",
"criterion-plot",
"criterion-plot 0.4.5",
"csv",
"itertools",
"lazy_static",
@@ -1214,6 +1342,32 @@ dependencies = [
"walkdir",
]
[[package]]
name = "criterion"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e7c76e09c1aae2bc52b3d2f29e13c6572553b30c4aa1b8a49fd70de6412654cb"
dependencies = [
"anes",
"atty",
"cast",
"ciborium",
"clap 3.2.22",
"criterion-plot 0.5.0",
"itertools",
"lazy_static",
"num-traits",
"oorandom",
"plotters",
"rayon",
"regex",
"serde",
"serde_derive",
"serde_json",
"tinytemplate",
"walkdir",
]
[[package]]
name = "criterion-plot"
version = "0.4.5"
@@ -1224,6 +1378,16 @@ dependencies = [
"itertools",
]
[[package]]
name = "criterion-plot"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
dependencies = [
"cast",
"itertools",
]
[[package]]
name = "crossbeam"
version = "0.8.2"
@@ -1366,6 +1530,19 @@ dependencies = [
"syn",
]
[[package]]
name = "dashmap"
version = "5.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "907076dfda823b0b36d2a1bb5f90c96660a5bbcd7729e10727f07858f22c4edc"
dependencies = [
"cfg-if",
"hashbrown",
"lock_api",
"once_cell",
"parking_lot_core",
]
[[package]]
name = "datafusion"
version = "7.0.0"
@@ -1469,6 +1646,8 @@ dependencies = [
"futures",
"hyper",
"log-store",
"meta-client",
"meta-srv",
"metrics",
"object-store",
"query",
@@ -1764,6 +1943,17 @@ version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cda653ca797810c02f7ca4b804b40b8b95ae046eb989d356bce17919a8c25499"
[[package]]
name = "flatbuffers"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ea97b4fe4b84e2f2765449bcea21cbdb3ee28cecb88afbf38a0c2e1639f5eb5"
dependencies = [
"bitflags",
"smallvec",
"thiserror",
]
[[package]]
name = "flate2"
version = "1.0.24"
@@ -1807,6 +1997,7 @@ dependencies = [
"arrow2",
"async-stream",
"async-trait",
"catalog",
"client",
"common-base",
"common-error",
@@ -1822,8 +2013,10 @@ dependencies = [
"datanode",
"datatypes",
"futures",
"itertools",
"openmetrics-parser",
"prost 0.11.0",
"query",
"serde",
"servers",
"snafu",
@@ -2291,6 +2484,17 @@ dependencies = [
"serde",
]
[[package]]
name = "indicatif"
version = "0.17.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfddc9561e8baf264e0e45e197fd7696320026eb10a8180340debc27b18f535b"
dependencies = [
"console",
"number_prefix",
"unicode-width",
]
[[package]]
name = "influxdb_line_protocol"
version = "0.1.0"
@@ -2311,6 +2515,12 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "integer-encoding"
version = "1.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48dc51180a9b377fd75814d0cc02199c20f8e99433d6762f650d39cdbbd3b56f"
[[package]]
name = "integer-encoding"
version = "3.0.4"
@@ -3129,6 +3339,12 @@ dependencies = [
"libc",
]
[[package]]
name = "number_prefix"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
[[package]]
name = "object"
version = "0.29.0"
@@ -3152,9 +3368,9 @@ dependencies = [
[[package]]
name = "once_cell"
version = "1.15.0"
version = "1.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e82dad04139b71a90c080c8463fe0dc7902db5192d939bd0950f074d014339e1"
checksum = "86f0b0d4bf799edbc74508c1e8bf170ff5f41238e5f8225603ca7caaae2b7860"
[[package]]
name = "oorandom"
@@ -3256,7 +3472,7 @@ dependencies = [
"opentelemetry",
"opentelemetry-semantic-conventions",
"thiserror",
"thrift",
"thrift 0.15.0",
"tokio",
]
@@ -3345,6 +3561,37 @@ dependencies = [
"windows-sys",
]
[[package]]
name = "parquet"
version = "10.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53e9c8fc20af9b92d85d42ec86e5217b2eaf1340fbba75c4b4296de764ea7921"
dependencies = [
"arrow",
"base64",
"brotli",
"byteorder",
"chrono",
"flate2",
"lz4",
"num",
"num-bigint",
"parquet-format",
"rand 0.8.5",
"snap",
"thrift 0.13.0",
"zstd",
]
[[package]]
name = "parquet-format"
version = "4.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f0c06cdcd5460967c485f9c40a821746f5955ad81990533c7fae95dbd9bc0b5"
dependencies = [
"thrift 0.13.0",
]
[[package]]
name = "parquet-format-async-temp"
version = "0.2.0"
@@ -3354,7 +3601,7 @@ dependencies = [
"async-trait",
"byteorder",
"futures",
"integer-encoding",
"integer-encoding 3.0.4",
"ordered-float 1.1.1",
]
@@ -4801,12 +5048,14 @@ dependencies = [
"metrics",
"mysql_async",
"num_cpus",
"once_cell",
"openmetrics-parser",
"opensrv-mysql",
"pgwire",
"prost 0.11.0",
"query",
"rand 0.8.5",
"regex",
"schemars",
"script",
"serde",
@@ -5073,7 +5322,7 @@ dependencies = [
"common-runtime",
"common-telemetry",
"common-time",
"criterion",
"criterion 0.3.6",
"datatypes",
"futures",
"futures-util",
@@ -5454,6 +5703,19 @@ dependencies = [
"num_cpus",
]
[[package]]
name = "thrift"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c6d965454947cc7266d22716ebfd07b18d84ebaf35eec558586bbb2a8cb6b5b"
dependencies = [
"byteorder",
"integer-encoding 1.1.7",
"log",
"ordered-float 1.1.1",
"threadpool",
]
[[package]]
name = "thrift"
version = "0.15.0"
@@ -5461,7 +5723,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b82ca8f46f95b3ce96081fe3dd89160fdea970c254bb72925255d1b62aae692e"
dependencies = [
"byteorder",
"integer-encoding",
"integer-encoding 3.0.4",
"log",
"ordered-float 1.1.1",
"threadpool",

View File

@@ -1,5 +1,6 @@
[workspace]
members = [
"benchmarks",
"src/api",
"src/catalog",
"src/client",
@@ -32,3 +33,6 @@ members = [
"src/table",
"src/table-engine",
]
[profile.release]
debug = true

14
benchmarks/Cargo.toml Normal file
View File

@@ -0,0 +1,14 @@
[package]
name = "benchmarks"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
arrow = "10"
clap = { version = "4.0", features = ["derive"] }
client = { path = "../src/client" }
indicatif = "0.17.1"
itertools = "0.10.5"
parquet = { version = "*" }
tokio = { version = "1.21", features = ["full"] }

View File

@@ -0,0 +1,439 @@
//! Use the taxi trip records from New York City dataset to bench. You can download the dataset from
//! [here](https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page).
#![feature(once_cell)]
#![allow(clippy::print_stdout)]
use std::{
collections::HashMap,
path::{Path, PathBuf},
sync::Arc,
time::Instant,
};
use arrow::{
array::{ArrayRef, PrimitiveArray, StringArray, TimestampNanosecondArray},
datatypes::{DataType, Float64Type, Int64Type},
record_batch::RecordBatch,
};
use clap::Parser;
use client::{
admin::Admin,
api::v1::{
codec::InsertBatch, column::Values, insert_expr, Column, ColumnDataType, ColumnDef,
CreateExpr, InsertExpr,
},
Client, Database, Select,
};
use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
use parquet::{
arrow::{ArrowReader, ParquetFileArrowReader},
file::{reader::FileReader, serialized_reader::SerializedFileReader},
};
use tokio::task::JoinSet;
const DATABASE_NAME: &str = "greptime";
const CATALOG_NAME: &str = "greptime";
const SCHEMA_NAME: &str = "public";
const TABLE_NAME: &str = "nyc_taxi";
#[derive(Parser)]
#[command(name = "NYC benchmark runner")]
struct Args {
/// Path to the dataset
#[arg(short, long)]
path: Option<String>,
/// Batch size of insert request.
#[arg(short = 's', long = "batch-size", default_value_t = 4096)]
batch_size: usize,
/// Number of client threads on write (parallel on file level)
#[arg(short = 't', long = "thread-num", default_value_t = 4)]
thread_num: usize,
/// Number of query iteration
#[arg(short = 'i', long = "iter-num", default_value_t = 3)]
iter_num: usize,
#[arg(long = "skip-write")]
skip_write: bool,
#[arg(long = "skip-read")]
skip_read: bool,
#[arg(short, long, default_value_t = String::from("127.0.0.1:3001"))]
endpoint: String,
}
fn get_file_list<P: AsRef<Path>>(path: P) -> Vec<PathBuf> {
std::fs::read_dir(path)
.unwrap()
.map(|dir| dir.unwrap().path().canonicalize().unwrap())
.collect()
}
async fn write_data(
batch_size: usize,
db: &Database,
path: PathBuf,
mpb: MultiProgress,
pb_style: ProgressStyle,
) -> u128 {
let file = std::fs::File::open(&path).unwrap();
let file_reader = Arc::new(SerializedFileReader::new(file).unwrap());
let row_num = file_reader.metadata().file_metadata().num_rows();
let record_batch_reader = ParquetFileArrowReader::new(file_reader)
.get_record_reader(batch_size)
.unwrap();
let progress_bar = mpb.add(ProgressBar::new(row_num as _));
progress_bar.set_style(pb_style);
progress_bar.set_message(format!("{:?}", path));
let mut total_rpc_elapsed_ms = 0;
for record_batch in record_batch_reader {
let record_batch = record_batch.unwrap();
let row_count = record_batch.num_rows();
let insert_batch = convert_record_batch(record_batch).into();
let insert_expr = InsertExpr {
table_name: TABLE_NAME.to_string(),
expr: Some(insert_expr::Expr::Values(insert_expr::Values {
values: vec![insert_batch],
})),
options: HashMap::default(),
};
let now = Instant::now();
db.insert(insert_expr).await.unwrap();
let elapsed = now.elapsed();
total_rpc_elapsed_ms += elapsed.as_millis();
progress_bar.inc(row_count as _);
}
progress_bar.finish_with_message(format!(
"file {:?} done in {}ms",
path, total_rpc_elapsed_ms
));
total_rpc_elapsed_ms
}
fn convert_record_batch(record_batch: RecordBatch) -> InsertBatch {
let schema = record_batch.schema();
let fields = schema.fields();
let row_count = record_batch.num_rows();
let mut columns = vec![];
for (array, field) in record_batch.columns().iter().zip(fields.iter()) {
let values = build_values(array);
let column = Column {
column_name: field.name().to_owned(),
values: Some(values),
null_mask: vec![],
// datatype and semantic_type are set to default
..Default::default()
};
columns.push(column);
}
InsertBatch {
columns,
row_count: row_count as _,
}
}
fn build_values(column: &ArrayRef) -> Values {
match column.data_type() {
DataType::Int64 => {
let array = column
.as_any()
.downcast_ref::<PrimitiveArray<Int64Type>>()
.unwrap();
let values = array.values();
Values {
i64_values: values.to_vec(),
..Default::default()
}
}
DataType::Float64 => {
let array = column
.as_any()
.downcast_ref::<PrimitiveArray<Float64Type>>()
.unwrap();
let values = array.values();
Values {
f64_values: values.to_vec(),
..Default::default()
}
}
DataType::Timestamp(_, _) => {
let array = column
.as_any()
.downcast_ref::<TimestampNanosecondArray>()
.unwrap();
let values = array.values();
Values {
i64_values: values.to_vec(),
..Default::default()
}
}
DataType::Utf8 => {
let array = column.as_any().downcast_ref::<StringArray>().unwrap();
let values = array.iter().filter_map(|s| s.map(String::from)).collect();
Values {
string_values: values,
..Default::default()
}
}
DataType::Null
| DataType::Boolean
| DataType::Int8
| DataType::Int16
| DataType::Int32
| DataType::UInt8
| DataType::UInt16
| DataType::UInt32
| DataType::UInt64
| DataType::Float16
| DataType::Float32
| DataType::Date32
| DataType::Date64
| DataType::Time32(_)
| DataType::Time64(_)
| DataType::Duration(_)
| DataType::Interval(_)
| DataType::Binary
| DataType::FixedSizeBinary(_)
| DataType::LargeBinary
| DataType::LargeUtf8
| DataType::List(_)
| DataType::FixedSizeList(_, _)
| DataType::LargeList(_)
| DataType::Struct(_)
| DataType::Union(_, _)
| DataType::Dictionary(_, _)
| DataType::Decimal(_, _)
| DataType::Map(_, _) => todo!(),
}
}
fn create_table_expr() -> CreateExpr {
CreateExpr {
catalog_name: Some(CATALOG_NAME.to_string()),
schema_name: Some(SCHEMA_NAME.to_string()),
table_name: TABLE_NAME.to_string(),
desc: None,
column_defs: vec![
ColumnDef {
name: "VendorID".to_string(),
datatype: ColumnDataType::Int64 as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "tpep_pickup_datetime".to_string(),
datatype: ColumnDataType::Int64 as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "tpep_dropoff_datetime".to_string(),
datatype: ColumnDataType::Int64 as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "passenger_count".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "trip_distance".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "RatecodeID".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "store_and_fwd_flag".to_string(),
datatype: ColumnDataType::String as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "PULocationID".to_string(),
datatype: ColumnDataType::Int64 as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "DOLocationID".to_string(),
datatype: ColumnDataType::Int64 as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "payment_type".to_string(),
datatype: ColumnDataType::Int64 as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "fare_amount".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "extra".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "mta_tax".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "tip_amount".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "tolls_amount".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "improvement_surcharge".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "total_amount".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "congestion_surcharge".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "airport_fee".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
},
],
time_index: "tpep_pickup_datetime".to_string(),
primary_keys: vec!["VendorID".to_string()],
create_if_not_exists: false,
table_options: Default::default(),
}
}
fn query_set() -> HashMap<String, String> {
let mut ret = HashMap::new();
ret.insert(
"count_all".to_string(),
format!("SELECT COUNT(*) FROM {};", TABLE_NAME),
);
ret.insert(
"fare_amt_by_passenger".to_string(),
format!("SELECT passenger_count, MIN(fare_amount), MAX(fare_amount), SUM(fare_amount) FROM {} GROUP BY passenger_count",TABLE_NAME)
);
ret
}
async fn do_write(args: &Args, client: &Client) {
let admin = Admin::new("admin", client.clone());
let mut file_list = get_file_list(args.path.clone().expect("Specify data path in argument"));
let mut write_jobs = JoinSet::new();
let create_table_result = admin.create(create_table_expr()).await;
println!("Create table result: {:?}", create_table_result);
let progress_bar_style = ProgressStyle::with_template(
"[{elapsed_precise}] {bar:60.cyan/blue} {pos:>7}/{len:7} {msg}",
)
.unwrap()
.progress_chars("##-");
let multi_progress_bar = MultiProgress::new();
let file_progress = multi_progress_bar.add(ProgressBar::new(file_list.len() as _));
file_progress.inc(0);
let batch_size = args.batch_size;
for _ in 0..args.thread_num {
if let Some(path) = file_list.pop() {
let db = Database::new(DATABASE_NAME, client.clone());
let mpb = multi_progress_bar.clone();
let pb_style = progress_bar_style.clone();
write_jobs.spawn(async move { write_data(batch_size, &db, path, mpb, pb_style).await });
}
}
while write_jobs.join_next().await.is_some() {
file_progress.inc(1);
if let Some(path) = file_list.pop() {
let db = Database::new(DATABASE_NAME, client.clone());
let mpb = multi_progress_bar.clone();
let pb_style = progress_bar_style.clone();
write_jobs.spawn(async move { write_data(batch_size, &db, path, mpb, pb_style).await });
}
}
}
async fn do_query(num_iter: usize, db: &Database) {
for (query_name, query) in query_set() {
println!("Running query: {}", query);
for i in 0..num_iter {
let now = Instant::now();
let _res = db.select(Select::Sql(query.clone())).await.unwrap();
let elapsed = now.elapsed();
println!(
"query {}, iteration {}: {}ms",
query_name,
i,
elapsed.as_millis()
);
}
}
}
fn main() {
let args = Args::parse();
tokio::runtime::Builder::new_multi_thread()
.worker_threads(args.thread_num)
.enable_all()
.build()
.unwrap()
.block_on(async {
let client = Client::with_urls(vec![&args.endpoint]);
if !args.skip_write {
do_write(&args, &client).await;
}
if !args.skip_read {
let db = Database::new(DATABASE_NAME, client.clone());
do_query(args.iter_num, &db).await;
}
})
}

View File

@@ -1,8 +1,9 @@
node_id = 42
http_addr = '0.0.0.0:3000'
rpc_addr = '0.0.0.0:3001'
wal_dir = '/tmp/greptimedb/wal'
rpc_runtime_size = 8
mode = "standalone"
mysql_addr = '0.0.0.0:3306'
mysql_runtime_size = 4
@@ -13,3 +14,9 @@ postgres_runtime_size = 4
[storage]
type = 'File'
data_dir = '/tmp/greptimedb/data/'
[meta_client_opts]
metasrv_addr = "1.1.1.1:3002"
timeout_millis = 3000
connect_timeout_millis = 5000
tcp_nodelay = true

View File

@@ -5,6 +5,8 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
common-base = { path = "../common/base" }
common-time = { path = "../common/time" }
datatypes = { path = "../datatypes" }
prost = "0.11"
snafu = { version = "0.7", features = ["backtraces"] }

View File

@@ -8,3 +8,7 @@ message InsertBatch {
repeated Column columns = 1;
uint32 row_count = 2;
}
message RegionId {
uint64 id = 1;
}

View File

@@ -1,8 +1,13 @@
use common_base::BitVec;
use common_time::timestamp::TimeUnit;
use datatypes::prelude::ConcreteDataType;
use datatypes::value::Value;
use datatypes::vectors::VectorRef;
use snafu::prelude::*;
use crate::error::{self, Result};
use crate::v1::column::Values;
use crate::v1::Column;
use crate::v1::ColumnDataType;
#[derive(Debug, PartialEq, Eq)]
@@ -143,8 +148,47 @@ impl Values {
}
}
impl Column {
// The type of vals must be same.
pub fn push_vals(&mut self, origin_count: usize, vector: VectorRef) {
let values = self.values.get_or_insert_with(Values::default);
let mut null_mask = BitVec::from_slice(&self.null_mask);
let len = vector.len();
null_mask.reserve_exact(origin_count + len);
null_mask.extend(BitVec::repeat(false, len));
(0..len).into_iter().for_each(|idx| match vector.get(idx) {
Value::Null => null_mask.set(idx + origin_count, true),
Value::Boolean(val) => values.bool_values.push(val),
Value::UInt8(val) => values.u8_values.push(val.into()),
Value::UInt16(val) => values.u16_values.push(val.into()),
Value::UInt32(val) => values.u32_values.push(val),
Value::UInt64(val) => values.u64_values.push(val),
Value::Int8(val) => values.i8_values.push(val.into()),
Value::Int16(val) => values.i16_values.push(val.into()),
Value::Int32(val) => values.i32_values.push(val),
Value::Int64(val) => values.i64_values.push(val),
Value::Float32(val) => values.f32_values.push(*val),
Value::Float64(val) => values.f64_values.push(*val),
Value::String(val) => values.string_values.push(val.as_utf8().to_string()),
Value::Binary(val) => values.binary_values.push(val.to_vec()),
Value::Date(val) => values.date_values.push(val.val()),
Value::DateTime(val) => values.datetime_values.push(val.val()),
Value::Timestamp(val) => values
.ts_millis_values
.push(val.convert_to(TimeUnit::Millisecond)),
Value::List(_) => unreachable!(),
});
self.null_mask = null_mask.into_vec();
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use datatypes::vectors::BooleanVector;
use super::*;
#[test]
@@ -358,4 +402,29 @@ mod tests {
"Failed to create column datatype from List(ListType { inner: Boolean(BooleanType) })"
);
}
#[test]
fn test_column_put_vector() {
use crate::v1::column::SemanticType;
// Some(false), None, Some(true), Some(true)
let mut column = Column {
column_name: "test".to_string(),
semantic_type: SemanticType::Field as i32,
values: Some(Values {
bool_values: vec![false, true, true],
..Default::default()
}),
null_mask: vec![2],
datatype: ColumnDataType::Boolean as i32,
};
let row_count = 4;
let vector = Arc::new(BooleanVector::from(vec![Some(true), None, Some(false)]));
column.push_vals(row_count, vector);
// Some(false), None, Some(true), Some(true), Some(true), None, Some(false)
let bool_values = column.values.unwrap().bool_values;
assert_eq!(vec![false, true, true, true, false], bool_values);
let null_mask = column.null_mask;
assert_eq!(34, null_mask[0]);
}
}

View File

@@ -1,7 +1,7 @@
pub use prost::DecodeError;
use prost::Message;
use crate::v1::codec::{InsertBatch, PhysicalPlanNode, SelectResult};
use crate::v1::codec::{InsertBatch, PhysicalPlanNode, RegionId, SelectResult};
macro_rules! impl_convert_with_bytes {
($data_type: ty) => {
@@ -24,6 +24,7 @@ macro_rules! impl_convert_with_bytes {
impl_convert_with_bytes!(InsertBatch);
impl_convert_with_bytes!(SelectResult);
impl_convert_with_bytes!(PhysicalPlanNode);
impl_convert_with_bytes!(RegionId);
#[cfg(test)]
mod tests {
@@ -127,6 +128,16 @@ mod tests {
);
}
#[test]
fn test_convert_region_id() {
let region_id = RegionId { id: 12 };
let bytes: Vec<u8> = region_id.into();
let region_id: RegionId = bytes.deref().try_into().unwrap();
assert_eq!(12, region_id.id);
}
fn mock_insert_batch() -> InsertBatch {
let values = column::Values {
i32_values: vec![2, 3, 4, 5, 6, 7, 8],

View File

@@ -9,6 +9,7 @@ api = { path = "../api" }
arc-swap = "1.0"
async-stream = "0.3"
async-trait = "0.1"
backoff = { version = "0.4", features = ["tokio"] }
common-catalog = { path = "../common/catalog" }
common-error = { path = "../common/error" }
common-grpc = { path = "../common/grpc" }

View File

@@ -4,6 +4,7 @@ use common_error::ext::{BoxedError, ErrorExt};
use common_error::prelude::{Snafu, StatusCode};
use datafusion::error::DataFusionError;
use datatypes::arrow;
use datatypes::schema::RawSchema;
use snafu::{Backtrace, ErrorCompat};
#[derive(Debug, Snafu)]
@@ -110,6 +111,19 @@ pub enum Error {
source: table::error::Error,
},
#[snafu(display(
"Invalid table schema in catalog entry, table:{}, schema: {:?}, source: {}",
table_info,
schema,
source
))]
InvalidTableSchema {
table_info: String,
schema: RawSchema,
#[snafu(backtrace)]
source: datatypes::error::Error,
},
#[snafu(display("Failed to execute system catalog table scan, source: {}", source))]
SystemCatalogTableScanExec {
#[snafu(backtrace)]
@@ -135,6 +149,12 @@ pub enum Error {
#[snafu(backtrace)]
source: meta_client::error::Error,
},
#[snafu(display("Failed to bump table id"))]
BumpTableId { msg: String, backtrace: Backtrace },
#[snafu(display("Failed to parse table id from metasrv, data: {:?}", data))]
ParseTableId { data: String, backtrace: Backtrace },
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -170,6 +190,10 @@ impl ErrorExt for Error {
Error::MetaSrv { source, .. } => source.status_code(),
Error::SystemCatalogTableScan { source } => source.status_code(),
Error::SystemCatalogTableScanExec { source } => source.status_code(),
Error::InvalidTableSchema { source, .. } => source.status_code(),
Error::BumpTableId { .. } | Error::ParseTableId { .. } => {
StatusCode::StorageUnavailable
}
}
}

View File

@@ -67,32 +67,27 @@ pub type CatalogProviderRef = Arc<dyn CatalogProvider>;
#[async_trait::async_trait]
pub trait CatalogManager: CatalogList {
/// Starts a catalog manager.
async fn start(&self) -> error::Result<()>;
async fn start(&self) -> Result<()>;
/// Returns next available table id.
fn next_table_id(&self) -> TableId;
async fn next_table_id(&self) -> Result<TableId>;
/// Registers a table given given catalog/schema to catalog manager,
/// returns table registered.
async fn register_table(&self, request: RegisterTableRequest) -> error::Result<usize>;
async fn register_table(&self, request: RegisterTableRequest) -> Result<usize>;
/// Register a system table, should be called before starting the manager.
async fn register_system_table(&self, request: RegisterSystemTableRequest)
-> error::Result<()>;
/// Returns the table by catalog, schema and table name.
fn table(
&self,
catalog: &str,
schema: &str,
table_name: &str,
) -> error::Result<Option<TableRef>>;
fn table(&self, catalog: &str, schema: &str, table_name: &str) -> Result<Option<TableRef>>;
}
pub type CatalogManagerRef = Arc<dyn CatalogManager>;
/// Hook called after system table opening.
pub type OpenSystemTableHook = Arc<dyn Fn(TableRef) -> error::Result<()> + Send + Sync>;
pub type OpenSystemTableHook = Arc<dyn Fn(TableRef) -> Result<()> + Send + Sync>;
/// Register system table request:
/// - When system table is already created and registered, the hook will be called

View File

@@ -7,7 +7,7 @@ use common_catalog::consts::{
SYSTEM_CATALOG_NAME, SYSTEM_CATALOG_TABLE_NAME,
};
use common_recordbatch::RecordBatch;
use common_telemetry::{debug, info};
use common_telemetry::info;
use datatypes::prelude::ScalarVector;
use datatypes::vectors::{BinaryVector, UInt8Vector};
use futures_util::lock::Mutex;
@@ -183,7 +183,6 @@ impl LocalCatalogManager {
info!("Registered schema: {:?}", s);
}
Entry::Table(t) => {
debug!("t: {:?}", t);
self.open_and_register_table(&t).await?;
info!("Registered table: {:?}", t);
max_table_id = max_table_id.max(t.table_id);
@@ -273,8 +272,8 @@ impl CatalogManager for LocalCatalogManager {
}
#[inline]
fn next_table_id(&self) -> TableId {
self.next_table_id.fetch_add(1, Ordering::Relaxed)
async fn next_table_id(&self) -> Result<TableId> {
Ok(self.next_table_id.fetch_add(1, Ordering::Relaxed))
}
async fn register_table(&self, request: RegisterTableRequest) -> Result<usize> {

View File

@@ -19,13 +19,27 @@ pub type ValueIter<'a, E> = Pin<Box<dyn Stream<Item = Result<Kv, E>> + Send + 'a
#[async_trait::async_trait]
pub trait KvBackend: Send + Sync {
fn range<'a, 'b>(&'a self, key: &[u8]) -> ValueIter<'b, crate::error::Error>
fn range<'a, 'b>(&'a self, key: &[u8]) -> ValueIter<'b, Error>
where
'a: 'b;
async fn set(&self, key: &[u8], val: &[u8]) -> Result<(), crate::error::Error>;
async fn set(&self, key: &[u8], val: &[u8]) -> Result<(), Error>;
async fn delete_range(&self, key: &[u8], end: &[u8]) -> Result<(), crate::error::Error>;
/// Compare and set value of key. `expect` is the expected value, if backend's current value associated
/// with key is the same as `expect`, the value will be updated to `val`.
///
/// - If the compare-and-set operation successfully updated value, this method will return an `Ok(Ok())`
/// - If associated value is not the same as `expect`, no value will be updated and an `Ok(Err(Vec<u8>))`
/// will be returned, the `Err(Vec<u8>)` indicates the current associated value of key.
/// - If any error happens during operation, an `Err(Error)` will be returned.
async fn compare_and_set(
&self,
key: &[u8],
expect: &[u8],
val: &[u8],
) -> Result<Result<(), Option<Vec<u8>>>, Error>;
async fn delete_range(&self, key: &[u8], end: &[u8]) -> Result<(), Error>;
async fn delete(&self, key: &[u8]) -> Result<(), Error> {
self.delete_range(key, &[]).await
@@ -74,6 +88,15 @@ mod tests {
unimplemented!()
}
async fn compare_and_set(
&self,
_key: &[u8],
_expect: &[u8],
_val: &[u8],
) -> Result<Result<(), Option<Vec<u8>>>, Error> {
unimplemented!()
}
async fn delete_range(&self, _key: &[u8], _end: &[u8]) -> Result<(), Error> {
unimplemented!()
}

View File

@@ -3,7 +3,7 @@ use std::fmt::Debug;
use async_stream::stream;
use common_telemetry::info;
use meta_client::client::MetaClient;
use meta_client::rpc::{DeleteRangeRequest, PutRequest, RangeRequest};
use meta_client::rpc::{CompareAndPutRequest, DeleteRangeRequest, PutRequest, RangeRequest};
use snafu::ResultExt;
use crate::error::{Error, MetaSrvSnafu};
@@ -68,4 +68,26 @@ impl KvBackend for MetaKvBackend {
Ok(())
}
async fn compare_and_set(
&self,
key: &[u8],
expect: &[u8],
val: &[u8],
) -> Result<Result<(), Option<Vec<u8>>>, Error> {
let request = CompareAndPutRequest::new()
.with_key(key.to_vec())
.with_expect(expect.to_vec())
.with_value(val.to_vec());
let mut response = self
.client
.compare_and_put(request)
.await
.context(MetaSrvSnafu)?;
if response.is_success() {
Ok(Ok(()))
} else {
Ok(Err(response.take_prev_kv().map(|v| v.value().to_vec())))
}
}
}

View File

@@ -1,32 +1,34 @@
use std::any::Any;
use std::collections::HashMap;
use std::pin::Pin;
use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::Arc;
use std::time::Duration;
use arc_swap::ArcSwap;
use async_stream::stream;
use backoff::exponential::ExponentialBackoffBuilder;
use backoff::ExponentialBackoff;
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MIN_USER_TABLE_ID};
use common_catalog::{
build_catalog_prefix, build_schema_prefix, build_table_prefix, CatalogKey, CatalogValue,
SchemaKey, SchemaValue, TableKey, TableValue,
build_catalog_prefix, build_schema_prefix, build_table_global_prefix, CatalogKey, CatalogValue,
SchemaKey, SchemaValue, TableGlobalKey, TableGlobalValue, TableRegionalKey, TableRegionalValue,
};
use common_telemetry::{debug, info};
use datatypes::schema::Schema;
use common_telemetry::{debug, error, info};
use futures::Stream;
use futures_util::StreamExt;
use snafu::{OptionExt, ResultExt};
use table::engine::{EngineContext, TableEngineRef};
use table::metadata::{TableId, TableVersion};
use table::metadata::TableId;
use table::requests::{CreateTableRequest, OpenTableRequest};
use table::table::numbers::NumbersTable;
use table::TableRef;
use tokio::sync::Mutex;
use crate::error::Result;
use crate::error::{
CatalogNotFoundSnafu, CreateTableSnafu, InvalidCatalogValueSnafu, OpenTableSnafu,
SchemaNotFoundSnafu, TableExistsSnafu,
BumpTableIdSnafu, CatalogNotFoundSnafu, CreateTableSnafu, InvalidCatalogValueSnafu,
OpenTableSnafu, ParseTableIdSnafu, SchemaNotFoundSnafu, TableExistsSnafu,
};
use crate::error::{InvalidTableSchemaSnafu, Result};
use crate::remote::{Kv, KvBackendRef};
use crate::{
handle_system_table_request, CatalogList, CatalogManager, CatalogProvider, CatalogProviderRef,
@@ -38,7 +40,6 @@ pub struct RemoteCatalogManager {
node_id: u64,
backend: KvBackendRef,
catalogs: Arc<ArcSwap<HashMap<String, CatalogProviderRef>>>,
next_table_id: Arc<AtomicU32>,
engine: TableEngineRef,
system_table_requests: Mutex<Vec<RegisterSystemTableRequest>>,
mutex: Arc<Mutex<()>>,
@@ -51,7 +52,6 @@ impl RemoteCatalogManager {
node_id,
backend,
catalogs: Default::default(),
next_table_id: Default::default(),
system_table_requests: Default::default(),
mutex: Default::default(),
}
@@ -60,14 +60,12 @@ impl RemoteCatalogManager {
fn build_catalog_key(&self, catalog_name: impl AsRef<str>) -> CatalogKey {
CatalogKey {
catalog_name: catalog_name.as_ref().to_string(),
node_id: self.node_id,
}
}
fn new_catalog_provider(&self, catalog_name: &str) -> CatalogProviderRef {
Arc::new(RemoteCatalogProvider {
catalog_name: catalog_name.to_string(),
node_id: self.node_id,
backend: self.backend.clone(),
schemas: Default::default(),
mutex: Default::default(),
@@ -100,9 +98,7 @@ impl RemoteCatalogManager {
}
let key = CatalogKey::parse(&String::from_utf8_lossy(&k))
.context(InvalidCatalogValueSnafu)?;
if key.node_id == self.node_id {
yield Ok(key)
}
yield Ok(key)
}
}))
}
@@ -124,10 +120,7 @@ impl RemoteCatalogManager {
let schema_key = SchemaKey::parse(&String::from_utf8_lossy(&k))
.context(InvalidCatalogValueSnafu)?;
if schema_key.node_id == self.node_id {
yield Ok(schema_key)
}
yield Ok(schema_key)
}
}))
}
@@ -139,8 +132,8 @@ impl RemoteCatalogManager {
&self,
catalog_name: &str,
schema_name: &str,
) -> Pin<Box<dyn Stream<Item = Result<(TableKey, TableValue)>> + Send + '_>> {
let table_prefix = build_table_prefix(catalog_name, schema_name);
) -> Pin<Box<dyn Stream<Item = Result<(TableGlobalKey, TableGlobalValue)>> + Send + '_>> {
let table_prefix = build_table_global_prefix(catalog_name, schema_name);
let mut tables = self.backend.range(table_prefix.as_bytes());
Box::pin(stream!({
while let Some(r) = tables.next().await {
@@ -149,12 +142,22 @@ impl RemoteCatalogManager {
debug!("Ignoring non-table prefix: {}", String::from_utf8_lossy(&k));
continue;
}
let table_key = TableKey::parse(&String::from_utf8_lossy(&k))
let table_key = TableGlobalKey::parse(&String::from_utf8_lossy(&k))
.context(InvalidCatalogValueSnafu)?;
let table_value = TableValue::parse(&String::from_utf8_lossy(&v))
let table_value = TableGlobalValue::parse(&String::from_utf8_lossy(&v))
.context(InvalidCatalogValueSnafu)?;
if table_value.node_id == self.node_id {
debug!(
"Found catalog table entry, key: {}, value: {:?}",
table_key, table_value
);
// metasrv has allocated region ids to current datanode
if table_value
.regions_id_map
.get(&self.node_id)
.map(|v| !v.is_empty())
.unwrap_or(false)
{
yield Ok((table_key, table_value))
}
}
@@ -164,7 +167,7 @@ impl RemoteCatalogManager {
/// Fetch catalogs/schemas/tables from remote catalog manager along with max table id allocated.
async fn initiate_catalogs(&self) -> Result<(HashMap<String, CatalogProviderRef>, TableId)> {
let mut res = HashMap::new();
let max_table_id = MIN_USER_TABLE_ID;
let max_table_id = MIN_USER_TABLE_ID - 1;
// initiate default catalog and schema
let default_catalog = self.initiate_default_catalog().await?;
@@ -246,18 +249,17 @@ impl RemoteCatalogManager {
async fn initiate_default_catalog(&self) -> Result<CatalogProviderRef> {
let default_catalog = self.new_catalog_provider(DEFAULT_CATALOG_NAME);
let default_schema = self.new_schema_provider(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME);
default_catalog.register_schema(DEFAULT_SCHEMA_NAME.to_string(), default_schema)?;
default_catalog.register_schema(DEFAULT_SCHEMA_NAME.to_string(), default_schema.clone())?;
let schema_key = SchemaKey {
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
node_id: self.node_id,
}
.to_string();
self.backend
.set(
schema_key.as_bytes(),
&SchemaValue {}
.to_bytes()
.as_bytes()
.context(InvalidCatalogValueSnafu)?,
)
.await?;
@@ -265,14 +267,13 @@ impl RemoteCatalogManager {
let catalog_key = CatalogKey {
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
node_id: self.node_id,
}
.to_string();
self.backend
.set(
catalog_key.as_bytes(),
&CatalogValue {}
.to_bytes()
.as_bytes()
.context(InvalidCatalogValueSnafu)?,
)
.await?;
@@ -282,18 +283,23 @@ impl RemoteCatalogManager {
async fn open_or_create_table(
&self,
table_key: &TableKey,
table_value: &TableValue,
table_key: &TableGlobalKey,
table_value: &TableGlobalValue,
) -> Result<TableRef> {
let context = EngineContext {};
let TableKey {
let TableGlobalKey {
catalog_name,
schema_name,
table_name,
..
} = table_key;
let TableValue { id, meta, .. } = table_value;
let TableGlobalValue {
id,
meta,
regions_id_map,
..
} = table_value;
let request = OpenTableRequest {
catalog_name: catalog_name.clone(),
@@ -310,13 +316,22 @@ impl RemoteCatalogManager {
})? {
Some(table) => Ok(table),
None => {
let schema = meta
.schema
.clone()
.try_into()
.context(InvalidTableSchemaSnafu {
table_info: format!("{}.{}.{}", catalog_name, schema_name, table_name,),
schema: meta.schema.clone(),
})?;
let req = CreateTableRequest {
id: *id,
catalog_name: catalog_name.clone(),
schema_name: schema_name.clone(),
table_name: table_name.clone(),
desc: None,
schema: Arc::new(Schema::new(meta.schema.column_schemas.clone())),
schema: Arc::new(schema),
region_numbers: regions_id_map.get(&self.node_id).unwrap().clone(), // this unwrap is safe because region_id_map is checked in `iter_remote_tables`
primary_key_indices: meta.primary_key_indices.clone(),
create_if_not_exists: true,
table_options: meta.options.clone(),
@@ -345,18 +360,78 @@ impl CatalogManager for RemoteCatalogManager {
catalogs.keys().cloned().collect::<Vec<_>>()
);
self.catalogs.store(Arc::new(catalogs));
self.next_table_id
.store(max_table_id + 1, Ordering::Relaxed);
info!("Max table id allocated: {}", max_table_id);
let mut system_table_requests = self.system_table_requests.lock().await;
handle_system_table_request(self, self.engine.clone(), &mut system_table_requests).await?;
info!("All system table opened");
self.catalog(DEFAULT_CATALOG_NAME)
.unwrap()
.unwrap()
.schema(DEFAULT_SCHEMA_NAME)
.unwrap()
.unwrap()
.register_table("numbers".to_string(), Arc::new(NumbersTable::default()))
.unwrap();
Ok(())
}
fn next_table_id(&self) -> TableId {
self.next_table_id.fetch_add(1, Ordering::Relaxed)
/// Bump table id in a CAS manner with backoff.
async fn next_table_id(&self) -> Result<TableId> {
let key = common_catalog::consts::TABLE_ID_KEY_PREFIX.as_bytes();
let op = || async {
// TODO(hl): optimize this get
let (prev, prev_bytes) = match self.backend.get(key).await? {
None => (MIN_USER_TABLE_ID, vec![]),
Some(kv) => (parse_table_id(&kv.1)?, kv.1),
};
match self
.backend
.compare_and_set(key, &prev_bytes, &(prev + 1).to_le_bytes())
.await
{
Ok(cas_res) => match cas_res {
Ok(_) => Ok(prev),
Err(e) => {
info!("Table id {:?} already occupied", e);
Err(backoff::Error::transient(
BumpTableIdSnafu {
msg: "Table id occupied",
}
.build(),
))
}
},
Err(e) => {
error!(e;"Failed to CAS table id");
Err(backoff::Error::permanent(
BumpTableIdSnafu {
msg: format!("Failed to perform CAS operation: {:?}", e),
}
.build(),
))
}
}
};
let retry_policy: ExponentialBackoff = ExponentialBackoffBuilder::new()
.with_initial_interval(Duration::from_millis(4))
.with_multiplier(2.0)
.with_max_interval(Duration::from_millis(1000))
.with_max_elapsed_time(Some(Duration::from_millis(3000)))
.build();
backoff::future::retry(retry_policy, op).await.map_err(|e| {
BumpTableIdSnafu {
msg: format!(
"Bump table id exceeds max fail times, last error msg: {:?}",
e
),
}
.build()
})
}
async fn register_table(&self, request: RegisterTableRequest) -> Result<usize> {
@@ -427,7 +502,7 @@ impl CatalogList for RemoteCatalogManager {
.set(
key.as_bytes(),
&CatalogValue {}
.to_bytes()
.as_bytes()
.context(InvalidCatalogValueSnafu)?,
)
.await?;
@@ -456,17 +531,15 @@ impl CatalogList for RemoteCatalogManager {
pub struct RemoteCatalogProvider {
catalog_name: String,
node_id: u64,
backend: KvBackendRef,
schemas: Arc<ArcSwap<HashMap<String, SchemaProviderRef>>>,
mutex: Arc<Mutex<()>>,
}
impl RemoteCatalogProvider {
pub fn new(catalog_name: String, node_id: u64, backend: KvBackendRef) -> Self {
pub fn new(catalog_name: String, backend: KvBackendRef) -> Self {
Self {
catalog_name,
node_id,
backend,
schemas: Default::default(),
mutex: Default::default(),
@@ -477,7 +550,6 @@ impl RemoteCatalogProvider {
SchemaKey {
catalog_name: self.catalog_name.clone(),
schema_name: schema_name.as_ref().to_string(),
node_id: self.node_id,
}
}
}
@@ -508,10 +580,11 @@ impl CatalogProvider for RemoteCatalogProvider {
.set(
key.as_bytes(),
&SchemaValue {}
.to_bytes()
.as_bytes()
.context(InvalidCatalogValueSnafu)?,
)
.await?;
let prev_schemas = schemas.load();
let mut new_schemas = HashMap::with_capacity(prev_schemas.len() + 1);
new_schemas.clone_from(&prev_schemas);
@@ -529,6 +602,16 @@ impl CatalogProvider for RemoteCatalogProvider {
}
}
/// Parse u8 slice to `TableId`
fn parse_table_id(val: &[u8]) -> Result<TableId> {
Ok(TableId::from_le_bytes(val.try_into().map_err(|_| {
ParseTableIdSnafu {
data: format!("{:?}", val),
}
.build()
})?))
}
pub struct RemoteSchemaProvider {
catalog_name: String,
schema_name: String,
@@ -555,16 +638,11 @@ impl RemoteSchemaProvider {
}
}
fn build_table_key(
&self,
table_name: impl AsRef<str>,
table_version: TableVersion,
) -> TableKey {
TableKey {
fn build_regional_table_key(&self, table_name: impl AsRef<str>) -> TableRegionalKey {
TableRegionalKey {
catalog_name: self.catalog_name.clone(),
schema_name: self.schema_name.clone(),
table_name: table_name.as_ref().to_string(),
version: table_version,
node_id: self.node_id,
}
}
@@ -586,19 +664,14 @@ impl SchemaProvider for RemoteSchemaProvider {
fn register_table(&self, name: String, table: TableRef) -> Result<Option<TableRef>> {
let table_info = table.table_info();
let table_version = table_info.ident.version;
let table_value = TableValue {
meta: table_info.meta.clone().into(),
id: table_info.ident.table_id,
node_id: self.node_id,
regions_ids: vec![],
let table_value = TableRegionalValue {
version: table_version,
regions_ids: table.table_info().meta.region_numbers.clone(),
};
let backend = self.backend.clone();
let mutex = self.mutex.clone();
let tables = self.tables.clone();
let table_key = self
.build_table_key(name.clone(), table_version)
.to_string();
let table_key = self.build_regional_table_key(&name).to_string();
let prev = std::thread::spawn(move || {
common_runtime::block_on_read(async move {
@@ -628,18 +701,11 @@ impl SchemaProvider for RemoteSchemaProvider {
}
fn deregister_table(&self, name: &str) -> Result<Option<TableRef>> {
let table_version = match self.tables.load().get(name) {
None => return Ok(None),
Some(t) => t.table_info().ident.version,
};
let table_name = name.to_string();
let table_key = self.build_table_key(&table_name, table_version).to_string();
let table_key = self.build_regional_table_key(&table_name).to_string();
let backend = self.backend.clone();
let mutex = self.mutex.clone();
let tables = self.tables.clone();
let prev = std::thread::spawn(move || {
common_runtime::block_on_read(async move {
let _guard = mutex.lock().await;
@@ -667,3 +733,17 @@ impl SchemaProvider for RemoteSchemaProvider {
Ok(self.tables.load().contains_key(name))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_table_id() {
assert_eq!(12, parse_table_id(&12_i32.to_le_bytes()).unwrap());
let mut data = vec![];
data.extend_from_slice(&12_i32.to_le_bytes());
data.push(0);
assert!(parse_table_id(&data).is_err());
}
}

View File

@@ -96,6 +96,7 @@ impl SystemCatalogTable {
table_name: SYSTEM_CATALOG_TABLE_NAME.to_string(),
desc: Some("System catalog table".to_string()),
schema: schema.clone(),
region_numbers: vec![0],
primary_key_indices: vec![ENTRY_TYPE_INDEX, KEY_INDEX, TIMESTAMP_INDEX],
create_if_not_exists: true,
table_options: HashMap::new(),

View File

@@ -1,3 +1,4 @@
use std::collections::btree_map::Entry;
use std::collections::{BTreeMap, HashMap};
use std::fmt::{Display, Formatter};
use std::str::FromStr;
@@ -68,6 +69,34 @@ impl KvBackend for MockKvBackend {
Ok(())
}
async fn compare_and_set(
&self,
key: &[u8],
expect: &[u8],
val: &[u8],
) -> Result<Result<(), Option<Vec<u8>>>, Error> {
let mut map = self.map.write().await;
let existing = map.entry(key.to_vec());
match existing {
Entry::Vacant(e) => {
if expect.is_empty() {
e.insert(val.to_vec());
Ok(Ok(()))
} else {
Ok(Err(None))
}
}
Entry::Occupied(mut existing) => {
if existing.get() == expect {
existing.insert(val.to_vec());
Ok(Ok(()))
} else {
Ok(Err(Some(existing.get().clone())))
}
}
}
}
async fn delete_range(&self, key: &[u8], end: &[u8]) -> Result<(), Error> {
let start = key.to_vec();
let end = end.to_vec();

View File

@@ -12,7 +12,7 @@ mod tests {
KvBackend, KvBackendRef, RemoteCatalogManager, RemoteCatalogProvider, RemoteSchemaProvider,
};
use catalog::{CatalogManager, CatalogManagerRef, RegisterTableRequest};
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MIN_USER_TABLE_ID};
use common_catalog::{CatalogKey, CatalogValue, SchemaKey, SchemaValue};
use datatypes::schema::Schema;
use futures_util::StreamExt;
@@ -24,19 +24,17 @@ mod tests {
#[tokio::test]
async fn test_backend() {
common_telemetry::init_default_ut_logging();
let node_id = 42;
let backend = MockKvBackend::default();
let default_catalog_key = CatalogKey {
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
node_id,
}
.to_string();
backend
.set(
default_catalog_key.as_bytes(),
&CatalogValue {}.to_bytes().unwrap(),
&CatalogValue {}.as_bytes().unwrap(),
)
.await
.unwrap();
@@ -44,11 +42,10 @@ mod tests {
let schema_key = SchemaKey {
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
node_id,
}
.to_string();
backend
.set(schema_key.as_bytes(), &SchemaValue {}.to_bytes().unwrap())
.set(schema_key.as_bytes(), &SchemaValue {}.as_bytes().unwrap())
.await
.unwrap();
@@ -59,7 +56,7 @@ mod tests {
res.insert(String::from_utf8_lossy(&kv.0).to_string());
}
assert_eq!(
vec!["__c-greptime-42".to_string()],
vec!["__c-greptime".to_string()],
res.into_iter().collect::<Vec<_>>()
);
}
@@ -114,6 +111,7 @@ mod tests {
table_name: table_name.clone(),
desc: None,
schema: table_schema.clone(),
region_numbers: vec![0],
primary_key_indices: vec![],
create_if_not_exists: false,
table_options: Default::default(),
@@ -154,7 +152,7 @@ mod tests {
.schema(DEFAULT_SCHEMA_NAME)
.unwrap()
.unwrap();
assert_eq!(Vec::<String>::new(), default_schema.table_names().unwrap());
assert_eq!(vec!["numbers"], default_schema.table_names().unwrap());
// register a new table with an nonexistent catalog
let catalog_name = DEFAULT_CATALOG_NAME.to_string();
@@ -173,6 +171,7 @@ mod tests {
table_name: table_name.clone(),
desc: None,
schema: table_schema.clone(),
region_numbers: vec![0],
primary_key_indices: vec![],
create_if_not_exists: false,
table_options: Default::default(),
@@ -188,7 +187,14 @@ mod tests {
table,
};
assert_eq!(1, catalog_manager.register_table(reg_req).await.unwrap());
assert_eq!(vec![table_name], default_schema.table_names().unwrap());
assert_eq!(
HashSet::from([table_name, "numbers".to_string()]),
default_schema
.table_names()
.unwrap()
.into_iter()
.collect::<HashSet<_>>()
);
}
#[tokio::test]
@@ -200,7 +206,6 @@ mod tests {
let schema_name = "nonexistent_schema".to_string();
let catalog = Arc::new(RemoteCatalogProvider::new(
catalog_name.clone(),
node_id,
backend.clone(),
));
@@ -225,6 +230,7 @@ mod tests {
table_name: "".to_string(),
desc: None,
schema: Arc::new(Schema::new(vec![])),
region_numbers: vec![0],
primary_key_indices: vec![],
create_if_not_exists: false,
table_options: Default::default(),
@@ -271,4 +277,19 @@ mod tests {
new_catalog.schema_names().unwrap().into_iter().collect()
)
}
#[tokio::test]
async fn test_next_table_id() {
let node_id = 42;
let (_, _, catalog_manager) = prepare_components(node_id).await;
assert_eq!(
MIN_USER_TABLE_ID,
catalog_manager.next_table_id().await.unwrap()
);
assert_eq!(
MIN_USER_TABLE_ID + 1,
catalog_manager.next_table_id().await.unwrap()
);
}
}

View File

@@ -4,6 +4,8 @@ mod database;
mod error;
pub mod load_balance;
pub use api;
pub use self::{
client::Client,
database::{Database, ObjectResult, Select},

View File

@@ -2,6 +2,7 @@
name = "cmd"
version = "0.1.0"
edition = "2021"
default-run = "greptime"
[[bin]]
name = "greptime"
@@ -10,7 +11,9 @@ path = "src/bin/greptime.rs"
[dependencies]
clap = { version = "3.1", features = ["derive"] }
common-error = { path = "../common/error" }
common-telemetry = { path = "../common/telemetry", features = ["deadlock_detection"] }
common-telemetry = { path = "../common/telemetry", features = [
"deadlock_detection",
] }
datanode = { path = "../datanode" }
frontend = { path = "../frontend" }
futures = "0.3"

View File

@@ -1,9 +1,9 @@
use clap::Parser;
use common_telemetry::logging;
use datanode::datanode::{Datanode, DatanodeOptions};
use datanode::datanode::{Datanode, DatanodeOptions, Mode};
use snafu::ResultExt;
use crate::error::{Error, Result, StartDatanodeSnafu};
use crate::error::{Error, MissingConfigSnafu, Result, StartDatanodeSnafu};
use crate::toml_loader;
#[derive(Parser)]
@@ -33,6 +33,8 @@ impl SubCommand {
#[derive(Debug, Parser)]
struct StartCommand {
#[clap(long)]
node_id: Option<u64>,
#[clap(long)]
http_addr: Option<String>,
#[clap(long)]
@@ -41,6 +43,8 @@ struct StartCommand {
mysql_addr: Option<String>,
#[clap(long)]
postgres_addr: Option<String>,
#[clap(long)]
metasrv_addr: Option<String>,
#[clap(short, long)]
config_file: Option<String>,
}
@@ -84,6 +88,31 @@ impl TryFrom<StartCommand> for DatanodeOptions {
opts.postgres_addr = addr;
}
match (cmd.metasrv_addr, cmd.node_id) {
(Some(meta_addr), Some(node_id)) => {
// Running mode is only set to Distributed when
// both metasrv addr and node id are set in
// commandline options
opts.meta_client_opts.metasrv_addr = meta_addr;
opts.node_id = node_id;
opts.mode = Mode::Distributed;
}
(None, None) => {
opts.mode = Mode::Standalone;
}
(None, Some(_)) => {
return MissingConfigSnafu {
msg: "Missing metasrv address option",
}
.fail();
}
(Some(_), None) => {
return MissingConfigSnafu {
msg: "Missing node id option",
}
.fail();
}
}
Ok(opts)
}
}
@@ -97,10 +126,12 @@ mod tests {
#[test]
fn test_read_from_config_file() {
let cmd = StartCommand {
node_id: None,
http_addr: None,
rpc_addr: None,
mysql_addr: None,
postgres_addr: None,
metasrv_addr: None,
config_file: Some(format!(
"{}/../../config/datanode.example.toml",
std::env::current_dir().unwrap().as_path().to_str().unwrap()
@@ -112,6 +143,13 @@ mod tests {
assert_eq!("/tmp/greptimedb/wal".to_string(), options.wal_dir);
assert_eq!("0.0.0.0:3306".to_string(), options.mysql_addr);
assert_eq!(4, options.mysql_runtime_size);
assert_eq!(
"1.1.1.1:3002".to_string(),
options.meta_client_opts.metasrv_addr
);
assert_eq!(5000, options.meta_client_opts.connect_timeout_millis);
assert_eq!(3000, options.meta_client_opts.timeout_millis);
assert!(options.meta_client_opts.tcp_nodelay);
assert_eq!("0.0.0.0:5432".to_string(), options.postgres_addr);
assert_eq!(4, options.postgres_runtime_size);
@@ -122,4 +160,58 @@ mod tests {
}
};
}
#[test]
fn test_try_from_cmd() {
assert_eq!(
Mode::Standalone,
DatanodeOptions::try_from(StartCommand {
node_id: None,
http_addr: None,
rpc_addr: None,
mysql_addr: None,
postgres_addr: None,
metasrv_addr: None,
config_file: None
})
.unwrap()
.mode
);
assert_eq!(
Mode::Distributed,
DatanodeOptions::try_from(StartCommand {
node_id: Some(42),
http_addr: None,
rpc_addr: None,
mysql_addr: None,
postgres_addr: None,
metasrv_addr: Some("127.0.0.1:3002".to_string()),
config_file: None
})
.unwrap()
.mode
);
assert!(DatanodeOptions::try_from(StartCommand {
node_id: None,
http_addr: None,
rpc_addr: None,
mysql_addr: None,
postgres_addr: None,
metasrv_addr: Some("127.0.0.1:3002".to_string()),
config_file: None,
})
.is_err());
assert!(DatanodeOptions::try_from(StartCommand {
node_id: Some(42),
http_addr: None,
rpc_addr: None,
mysql_addr: None,
postgres_addr: None,
metasrv_addr: None,
config_file: None,
})
.is_err());
}
}

View File

@@ -35,6 +35,9 @@ pub enum Error {
source: toml::de::Error,
backtrace: Backtrace,
},
#[snafu(display("Missing config, msg: {}", msg))]
MissingConfig { msg: String, backtrace: Backtrace },
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -45,7 +48,9 @@ impl ErrorExt for Error {
Error::StartDatanode { source } => source.status_code(),
Error::StartFrontend { source } => source.status_code(),
Error::StartMetaServer { source } => source.status_code(),
Error::ReadConfig { .. } | Error::ParseConfig { .. } => StatusCode::InvalidArguments,
Error::ReadConfig { .. } | Error::ParseConfig { .. } | Error::MissingConfig { .. } => {
StatusCode::InvalidArguments
}
}
}

View File

@@ -37,7 +37,7 @@ impl SubCommand {
}
#[derive(Debug, Parser)]
struct StartCommand {
pub struct StartCommand {
#[clap(long)]
http_addr: Option<String>,
#[clap(long)]

View File

@@ -14,4 +14,6 @@ pub const SCRIPTS_TABLE_ID: u32 = 1;
pub(crate) const CATALOG_KEY_PREFIX: &str = "__c";
pub(crate) const SCHEMA_KEY_PREFIX: &str = "__s";
pub(crate) const TABLE_KEY_PREFIX: &str = "__t";
pub(crate) const TABLE_GLOBAL_KEY_PREFIX: &str = "__tg";
pub(crate) const TABLE_REGIONAL_KEY_PREFIX: &str = "__tr";
pub const TABLE_ID_KEY_PREFIX: &str = "__tid";

View File

@@ -1,5 +1,5 @@
use std::collections::HashMap;
use std::fmt::{Display, Formatter};
use std::str::FromStr;
use lazy_static::lazy_static;
use regex::Regex;
@@ -7,29 +7,38 @@ use serde::{Deserialize, Serialize, Serializer};
use snafu::{ensure, OptionExt, ResultExt};
use table::metadata::{RawTableMeta, TableId, TableVersion};
use crate::consts::{CATALOG_KEY_PREFIX, SCHEMA_KEY_PREFIX, TABLE_KEY_PREFIX};
use crate::consts::{
CATALOG_KEY_PREFIX, SCHEMA_KEY_PREFIX, TABLE_GLOBAL_KEY_PREFIX, TABLE_REGIONAL_KEY_PREFIX,
};
use crate::error::{
DeserializeCatalogEntryValueSnafu, Error, InvalidCatalogSnafu, ParseNodeIdSnafu,
SerializeCatalogEntryValueSnafu,
DeserializeCatalogEntryValueSnafu, Error, InvalidCatalogSnafu, SerializeCatalogEntryValueSnafu,
};
lazy_static! {
static ref CATALOG_KEY_PATTERN: Regex =
Regex::new(&format!("^{}-([a-zA-Z_]+)-([0-9]+)$", CATALOG_KEY_PREFIX)).unwrap();
Regex::new(&format!("^{}-([a-zA-Z_]+)$", CATALOG_KEY_PREFIX)).unwrap();
}
lazy_static! {
static ref SCHEMA_KEY_PATTERN: Regex = Regex::new(&format!(
"^{}-([a-zA-Z_]+)-([a-zA-Z_]+)-([0-9]+)$",
"^{}-([a-zA-Z_]+)-([a-zA-Z_]+)$",
SCHEMA_KEY_PREFIX
))
.unwrap();
}
lazy_static! {
static ref TABLE_KEY_PATTERN: Regex = Regex::new(&format!(
"^{}-([a-zA-Z_]+)-([a-zA-Z_]+)-([a-zA-Z_]+)-([0-9]+)-([0-9]+)$",
TABLE_KEY_PREFIX
static ref TABLE_GLOBAL_KEY_PATTERN: Regex = Regex::new(&format!(
"^{}-([a-zA-Z_]+)-([a-zA-Z_]+)-([a-zA-Z_]+)$",
TABLE_GLOBAL_KEY_PREFIX
))
.unwrap();
}
lazy_static! {
static ref TABLE_REGIONAL_KEY_PATTERN: Regex = Regex::new(&format!(
"^{}-([a-zA-Z_]+)-([a-zA-Z_]+)-([a-zA-Z_]+)-([0-9]+)$",
TABLE_REGIONAL_KEY_PREFIX
))
.unwrap();
}
@@ -42,26 +51,92 @@ pub fn build_schema_prefix(catalog_name: impl AsRef<str>) -> String {
format!("{}-{}-", SCHEMA_KEY_PREFIX, catalog_name.as_ref())
}
pub fn build_table_prefix(catalog_name: impl AsRef<str>, schema_name: impl AsRef<str>) -> String {
pub fn build_table_global_prefix(
catalog_name: impl AsRef<str>,
schema_name: impl AsRef<str>,
) -> String {
format!(
"{}-{}-{}-",
TABLE_KEY_PREFIX,
TABLE_GLOBAL_KEY_PREFIX,
catalog_name.as_ref(),
schema_name.as_ref()
)
}
pub struct TableKey {
pub fn build_table_regional_prefix(
catalog_name: impl AsRef<str>,
schema_name: impl AsRef<str>,
) -> String {
format!(
"{}-{}-{}-",
TABLE_REGIONAL_KEY_PREFIX,
catalog_name.as_ref(),
schema_name.as_ref()
)
}
/// Table global info has only one key across all datanodes so it does not have `node_id` field.
pub struct TableGlobalKey {
pub catalog_name: String,
pub schema_name: String,
pub table_name: String,
}
impl Display for TableGlobalKey {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_str(TABLE_GLOBAL_KEY_PREFIX)?;
f.write_str("-")?;
f.write_str(&self.catalog_name)?;
f.write_str("-")?;
f.write_str(&self.schema_name)?;
f.write_str("-")?;
f.write_str(&self.table_name)
}
}
impl TableGlobalKey {
pub fn parse<S: AsRef<str>>(s: S) -> Result<Self, Error> {
let key = s.as_ref();
let captures = TABLE_GLOBAL_KEY_PATTERN
.captures(key)
.context(InvalidCatalogSnafu { key })?;
ensure!(captures.len() == 4, InvalidCatalogSnafu { key });
Ok(Self {
catalog_name: captures[1].to_string(),
schema_name: captures[2].to_string(),
table_name: captures[3].to_string(),
})
}
}
/// Table global info contains necessary info for a datanode to create table regions, including
/// table id, table meta(schema...), region id allocation across datanodes.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct TableGlobalValue {
/// Table id is the same across all datanodes.
pub id: TableId,
/// Id of datanode that created the global table info kv. only for debugging.
pub node_id: u64,
/// Allocation of region ids across all datanodes.
pub regions_id_map: HashMap<u64, Vec<u32>>,
/// Node id -> region ids
pub meta: RawTableMeta,
/// Partition rules for table
pub partition_rules: String,
}
/// Table regional info that varies between datanode, so it contains a `node_id` field.
pub struct TableRegionalKey {
pub catalog_name: String,
pub schema_name: String,
pub table_name: String,
pub version: TableVersion,
pub node_id: u64,
}
impl Display for TableKey {
impl Display for TableRegionalKey {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_str(TABLE_KEY_PREFIX)?;
f.write_str(TABLE_REGIONAL_KEY_PREFIX)?;
f.write_str("-")?;
f.write_str(&self.catalog_name)?;
f.write_str("-")?;
@@ -69,68 +144,47 @@ impl Display for TableKey {
f.write_str("-")?;
f.write_str(&self.table_name)?;
f.write_str("-")?;
f.serialize_u64(self.version)?;
f.write_str("-")?;
f.serialize_u64(self.node_id)
}
}
impl TableKey {
impl TableRegionalKey {
pub fn parse<S: AsRef<str>>(s: S) -> Result<Self, Error> {
let key = s.as_ref();
let captures = TABLE_KEY_PATTERN
let captures = TABLE_REGIONAL_KEY_PATTERN
.captures(key)
.context(InvalidCatalogSnafu { key })?;
ensure!(captures.len() == 6, InvalidCatalogSnafu { key });
let version =
u64::from_str(&captures[4]).map_err(|_| InvalidCatalogSnafu { key }.build())?;
let node_id_str = captures[5].to_string();
let node_id = u64::from_str(&node_id_str)
.map_err(|_| ParseNodeIdSnafu { key: node_id_str }.build())?;
ensure!(captures.len() == 5, InvalidCatalogSnafu { key });
let node_id = captures[4]
.to_string()
.parse()
.map_err(|_| InvalidCatalogSnafu { key }.build())?;
Ok(Self {
catalog_name: captures[1].to_string(),
schema_name: captures[2].to_string(),
table_name: captures[3].to_string(),
version,
node_id,
})
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct TableValue {
pub id: TableId,
pub node_id: u64,
pub regions_ids: Vec<u64>,
pub meta: RawTableMeta,
}
impl TableValue {
pub fn parse(s: impl AsRef<str>) -> Result<Self, Error> {
serde_json::from_str(s.as_ref())
.context(DeserializeCatalogEntryValueSnafu { raw: s.as_ref() })
}
pub fn as_bytes(&self) -> Result<Vec<u8>, Error> {
Ok(serde_json::to_string(self)
.context(SerializeCatalogEntryValueSnafu)?
.into_bytes())
}
/// Regional table info of specific datanode, including table version on that datanode and
/// region ids allocated by metasrv.
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct TableRegionalValue {
pub version: TableVersion,
pub regions_ids: Vec<u32>,
}
pub struct CatalogKey {
pub catalog_name: String,
pub node_id: u64,
}
impl Display for CatalogKey {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_str(CATALOG_KEY_PREFIX)?;
f.write_str("-")?;
f.write_str(&self.catalog_name)?;
f.write_str("-")?;
f.serialize_u64(self.node_id)
f.write_str(&self.catalog_name)
}
}
@@ -140,15 +194,9 @@ impl CatalogKey {
let captures = CATALOG_KEY_PATTERN
.captures(key)
.context(InvalidCatalogSnafu { key })?;
ensure!(captures.len() == 3, InvalidCatalogSnafu { key });
let node_id_str = captures[2].to_string();
let node_id = u64::from_str(&node_id_str)
.map_err(|_| ParseNodeIdSnafu { key: node_id_str }.build())?;
ensure!(captures.len() == 2, InvalidCatalogSnafu { key });
Ok(Self {
catalog_name: captures[1].to_string(),
node_id,
})
}
}
@@ -156,18 +204,9 @@ impl CatalogKey {
#[derive(Debug, Serialize, Deserialize)]
pub struct CatalogValue;
impl CatalogValue {
pub fn to_bytes(&self) -> Result<Vec<u8>, Error> {
Ok(serde_json::to_string(self)
.context(SerializeCatalogEntryValueSnafu)?
.into_bytes())
}
}
pub struct SchemaKey {
pub catalog_name: String,
pub schema_name: String,
pub node_id: u64,
}
impl Display for SchemaKey {
@@ -176,9 +215,7 @@ impl Display for SchemaKey {
f.write_str("-")?;
f.write_str(&self.catalog_name)?;
f.write_str("-")?;
f.write_str(&self.schema_name)?;
f.write_str("-")?;
f.serialize_u64(self.node_id)
f.write_str(&self.schema_name)
}
}
@@ -188,16 +225,10 @@ impl SchemaKey {
let captures = SCHEMA_KEY_PATTERN
.captures(key)
.context(InvalidCatalogSnafu { key })?;
ensure!(captures.len() == 4, InvalidCatalogSnafu { key });
let node_id_str = captures[3].to_string();
let node_id = u64::from_str(&node_id_str)
.map_err(|_| ParseNodeIdSnafu { key: node_id_str }.build())?;
ensure!(captures.len() == 3, InvalidCatalogSnafu { key });
Ok(Self {
catalog_name: captures[1].to_string(),
schema_name: captures[2].to_string(),
node_id,
})
}
}
@@ -205,14 +236,32 @@ impl SchemaKey {
#[derive(Debug, Serialize, Deserialize)]
pub struct SchemaValue;
impl SchemaValue {
pub fn to_bytes(&self) -> Result<Vec<u8>, Error> {
Ok(serde_json::to_string(self)
.context(SerializeCatalogEntryValueSnafu)?
.into_bytes())
}
macro_rules! define_catalog_value {
( $($val_ty: ty), *) => {
$(
impl $val_ty {
pub fn parse(s: impl AsRef<str>) -> Result<Self, Error> {
serde_json::from_str(s.as_ref())
.context(DeserializeCatalogEntryValueSnafu { raw: s.as_ref() })
}
pub fn as_bytes(&self) -> Result<Vec<u8>, Error> {
Ok(serde_json::to_string(self)
.context(SerializeCatalogEntryValueSnafu)?
.into_bytes())
}
}
)*
}
}
define_catalog_value!(
TableRegionalValue,
TableGlobalValue,
CatalogValue,
SchemaValue
);
#[cfg(test)]
mod tests {
use datatypes::prelude::ConcreteDataType;
@@ -222,32 +271,28 @@ mod tests {
#[test]
fn test_parse_catalog_key() {
let key = "__c-C-2";
let key = "__c-C";
let catalog_key = CatalogKey::parse(key).unwrap();
assert_eq!("C", catalog_key.catalog_name);
assert_eq!(2, catalog_key.node_id);
assert_eq!(key, catalog_key.to_string());
}
#[test]
fn test_parse_schema_key() {
let key = "__s-C-S-3";
let key = "__s-C-S";
let schema_key = SchemaKey::parse(key).unwrap();
assert_eq!("C", schema_key.catalog_name);
assert_eq!("S", schema_key.schema_name);
assert_eq!(3, schema_key.node_id);
assert_eq!(key, schema_key.to_string());
}
#[test]
fn test_parse_table_key() {
let key = "__t-C-S-T-42-1";
let entry = TableKey::parse(key).unwrap();
let key = "__tg-C-S-T";
let entry = TableGlobalKey::parse(key).unwrap();
assert_eq!("C", entry.catalog_name);
assert_eq!("S", entry.schema_name);
assert_eq!("T", entry.table_name);
assert_eq!(1, entry.node_id);
assert_eq!(42, entry.version);
assert_eq!(key, &entry.to_string());
}
@@ -256,8 +301,8 @@ mod tests {
assert_eq!("__c-", build_catalog_prefix());
assert_eq!("__s-CATALOG-", build_schema_prefix("CATALOG"));
assert_eq!(
"__t-CATALOG-SCHEMA-",
build_table_prefix("CATALOG", "SCHEMA")
"__tg-CATALOG-SCHEMA-",
build_table_global_prefix("CATALOG", "SCHEMA")
);
}
@@ -278,16 +323,18 @@ mod tests {
engine_options: Default::default(),
value_indices: vec![2, 3],
options: Default::default(),
region_numbers: vec![1],
};
let value = TableValue {
let value = TableGlobalValue {
id: 42,
node_id: 32,
regions_ids: vec![1, 2, 3],
node_id: 0,
regions_id_map: HashMap::from([(0, vec![1, 2, 3])]),
meta,
partition_rules: "{}".to_string(),
};
let serialized = serde_json::to_string(&value).unwrap();
let deserialized = TableValue::parse(&serialized).unwrap();
let deserialized = TableGlobalValue::parse(&serialized).unwrap();
assert_eq!(value, deserialized);
}
}

View File

@@ -3,6 +3,7 @@ pub mod error;
mod helper;
pub use helper::{
build_catalog_prefix, build_schema_prefix, build_table_prefix, CatalogKey, CatalogValue,
SchemaKey, SchemaValue, TableKey, TableValue,
build_catalog_prefix, build_schema_prefix, build_table_global_prefix,
build_table_regional_prefix, CatalogKey, CatalogValue, SchemaKey, SchemaValue, TableGlobalKey,
TableGlobalValue, TableRegionalKey, TableRegionalValue,
};

View File

@@ -8,6 +8,7 @@ arc-swap = "1.0"
chrono-tz = "0.6"
common-error = { path = "../error" }
common-function-macro = { path = "../function-macro" }
common-time = { path = "../time" }
common-query = { path = "../query" }
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
datatypes = { path = "../../datatypes" }

View File

@@ -6,6 +6,7 @@ pub mod math;
pub mod numpy;
#[cfg(test)]
pub(crate) mod test;
mod timestamp;
pub mod udf;
pub use aggregate::MedianAccumulatorCreator;

View File

@@ -9,6 +9,7 @@ use crate::scalars::aggregate::{AggregateFunctionMetaRef, AggregateFunctions};
use crate::scalars::function::FunctionRef;
use crate::scalars::math::MathFunction;
use crate::scalars::numpy::NumpyFunction;
use crate::scalars::timestamp::TimestampFunction;
#[derive(Default)]
pub struct FunctionRegistry {
@@ -31,6 +32,10 @@ impl FunctionRegistry {
.insert(func.name(), func);
}
pub fn get_aggr_function(&self, name: &str) -> Option<AggregateFunctionMetaRef> {
self.aggregate_functions.read().unwrap().get(name).cloned()
}
pub fn get_function(&self, name: &str) -> Option<FunctionRef> {
self.functions.read().unwrap().get(name).cloned()
}
@@ -54,6 +59,7 @@ pub static FUNCTION_REGISTRY: Lazy<Arc<FunctionRegistry>> = Lazy::new(|| {
MathFunction::register(&function_registry);
NumpyFunction::register(&function_registry);
TimestampFunction::register(&function_registry);
AggregateFunctions::register(&function_registry);

View File

@@ -0,0 +1,116 @@
//! from_unixtime function.
/// TODO(dennis) It can be removed after we upgrade datafusion.
use std::fmt;
use std::sync::Arc;
use arrow::compute::arithmetics;
use arrow::datatypes::DataType as ArrowDatatype;
use arrow::scalar::PrimitiveScalar;
use common_query::error::{IntoVectorSnafu, UnsupportedInputDataTypeSnafu};
use common_query::prelude::{Signature, Volatility};
use datatypes::prelude::ConcreteDataType;
use datatypes::vectors::TimestampVector;
use datatypes::vectors::VectorRef;
use snafu::ResultExt;
use crate::error::Result;
use crate::scalars::function::{Function, FunctionContext};
#[derive(Clone, Debug, Default)]
pub struct FromUnixtimeFunction;
const NAME: &str = "from_unixtime";
impl Function for FromUnixtimeFunction {
fn name(&self) -> &str {
NAME
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::timestamp_millis_datatype())
}
fn signature(&self) -> Signature {
Signature::uniform(
1,
vec![ConcreteDataType::int64_datatype()],
Volatility::Immutable,
)
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
match columns[0].data_type() {
ConcreteDataType::Int64(_) => {
let array = columns[0].to_arrow_array();
// Our timestamp vector's time unit is millisecond
let array = arithmetics::mul_scalar(
&*array,
&PrimitiveScalar::new(ArrowDatatype::Int64, Some(1000i64)),
);
Ok(Arc::new(
TimestampVector::try_from_arrow_array(array).context(IntoVectorSnafu {
data_type: ArrowDatatype::Int64,
})?,
))
}
_ => UnsupportedInputDataTypeSnafu {
function: NAME,
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
}
.fail()
.map_err(|e| e.into()),
}
}
}
impl fmt::Display for FromUnixtimeFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "FROM_UNIXTIME")
}
}
#[cfg(test)]
mod tests {
use common_query::prelude::TypeSignature;
use datatypes::value::Value;
use datatypes::vectors::Int64Vector;
use super::*;
#[test]
fn test_from_unixtime() {
let f = FromUnixtimeFunction::default();
assert_eq!("from_unixtime", f.name());
assert_eq!(
ConcreteDataType::timestamp_millis_datatype(),
f.return_type(&[]).unwrap()
);
assert!(matches!(f.signature(),
Signature {
type_signature: TypeSignature::Uniform(1, valid_types),
volatility: Volatility::Immutable
} if valid_types == vec![ConcreteDataType::int64_datatype()]
));
let times = vec![Some(1494410783), None, Some(1494410983)];
let args: Vec<VectorRef> = vec![Arc::new(Int64Vector::from(times.clone()))];
let vector = f.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(3, vector.len());
for (i, t) in times.iter().enumerate() {
let v = vector.get(i);
if i == 1 {
assert_eq!(Value::Null, v);
continue;
}
match v {
Value::Timestamp(ts) => {
assert_eq!(ts.value(), t.unwrap() * 1000);
}
_ => unreachable!(),
}
}
}
}

View File

@@ -0,0 +1,14 @@
use std::sync::Arc;
mod from_unixtime;
use from_unixtime::FromUnixtimeFunction;
use crate::scalars::function_registry::FunctionRegistry;
pub(crate) struct TimestampFunction;
impl TimestampFunction {
pub fn register(registry: &FunctionRegistry) {
registry.register(Arc::new(FromUnixtimeFunction::default()));
}
}

View File

@@ -9,6 +9,7 @@ async-trait = "0.1"
common-base = { path = "../base" }
common-error = { path = "../error" }
common-runtime = { path = "../runtime" }
dashmap = "5.4"
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = ["simd"] }
snafu = { version = "0.7", features = ["backtraces"] }
tokio = { version = "1.0", features = ["full"] }
@@ -19,3 +20,11 @@ tower = "0.4"
package = "arrow2"
version = "0.10"
features = ["io_csv", "io_json", "io_parquet", "io_parquet_compression", "io_ipc", "ahash", "compute", "serde_types"]
[dev-dependencies]
criterion = "0.4"
rand = "0.8"
[[bench]]
name = "bench_main"
harness = false

View File

@@ -0,0 +1,7 @@
use criterion::criterion_main;
mod channel_manager;
criterion_main! {
channel_manager::benches
}

View File

@@ -0,0 +1,34 @@
use common_grpc::channel_manager::ChannelManager;
use criterion::{criterion_group, criterion_main, Criterion};
#[tokio::main]
async fn do_bench_channel_manager() {
let m = ChannelManager::new();
let task_count = 8;
let mut joins = Vec::with_capacity(task_count);
for _ in 0..task_count {
let m_clone = m.clone();
let join = tokio::spawn(async move {
for _ in 0..10000 {
let idx = rand::random::<usize>() % 100;
let ret = m_clone.get(format!("{}", idx));
assert!(ret.is_ok());
}
});
joins.push(join);
}
for join in joins {
let _ = join.await;
}
}
fn bench_channel_manager(c: &mut Criterion) {
c.bench_function("bench channel manager", |b| {
b.iter(do_bench_channel_manager);
});
}
criterion_group!(benches, bench_channel_manager);
criterion_main!(benches);

View File

@@ -1,8 +1,10 @@
use std::collections::HashMap;
use std::sync::atomic::AtomicUsize;
use std::sync::atomic::Ordering;
use std::sync::Arc;
use std::sync::Mutex;
use std::time::Duration;
use dashmap::mapref::entry::Entry;
use dashmap::DashMap;
use snafu::ResultExt;
use tonic::transport::Channel as InnerChannel;
use tonic::transport::Endpoint;
@@ -17,7 +19,7 @@ const RECYCLE_CHANNEL_INTERVAL_SECS: u64 = 60;
#[derive(Clone, Debug)]
pub struct ChannelManager {
config: ChannelConfig,
pool: Arc<Mutex<Pool>>,
pool: Arc<Pool>,
}
impl Default for ChannelManager {
@@ -32,17 +34,14 @@ impl ChannelManager {
}
pub fn with_config(config: ChannelConfig) -> Self {
let pool = Pool {
channels: HashMap::default(),
};
let pool = Arc::new(Mutex::new(pool));
let pool = Arc::new(Pool::default());
let cloned_pool = pool.clone();
common_runtime::spawn_bg(async move {
common_runtime::spawn_bg(async {
recycle_channel_in_loop(cloned_pool, RECYCLE_CHANNEL_INTERVAL_SECS).await;
});
Self { pool, config }
Self { config, pool }
}
pub fn config(&self) -> &ChannelConfig {
@@ -51,23 +50,30 @@ impl ChannelManager {
pub fn get(&self, addr: impl AsRef<str>) -> Result<InnerChannel> {
let addr = addr.as_ref();
let mut pool = self.pool.lock().unwrap();
if let Some(ch) = pool.get_mut(addr) {
ch.access += 1;
return Ok(ch.channel.clone());
// It will acquire the read lock.
if let Some(inner_ch) = self.pool.get(addr) {
return Ok(inner_ch);
}
let endpoint = self.build_endpoint(addr)?;
// It will acquire the write lock.
let entry = match self.pool.entry(addr.to_string()) {
Entry::Occupied(entry) => {
entry.get().increase_access();
entry.into_ref()
}
Entry::Vacant(entry) => {
let endpoint = self.build_endpoint(addr)?;
let inner_channel = endpoint.connect_lazy();
let inner_channel = endpoint.connect_lazy();
let channel = Channel {
channel: inner_channel.clone(),
access: 1,
use_default_connector: true,
let channel = Channel {
channel: inner_channel,
access: AtomicUsize::new(1),
use_default_connector: true,
};
entry.insert(channel)
}
};
pool.put(addr, channel);
Ok(inner_channel)
Ok(entry.channel.clone())
}
pub fn reset_with_connector<C>(
@@ -86,11 +92,10 @@ impl ChannelManager {
let inner_channel = endpoint.connect_with_connector_lazy(connector);
let channel = Channel {
channel: inner_channel.clone(),
access: 1,
access: AtomicUsize::new(1),
use_default_connector: false,
};
let mut pool = self.pool.lock().unwrap();
pool.put(addr, channel);
self.pool.put(addr, channel);
Ok(inner_channel)
}
@@ -99,8 +104,7 @@ impl ChannelManager {
where
F: FnMut(&String, &mut Channel) -> bool,
{
let mut pool = self.pool.lock().unwrap();
pool.retain_channel(f);
self.pool.retain_channel(f);
}
fn build_endpoint(&self, addr: &str) -> Result<Endpoint> {
@@ -297,39 +301,56 @@ impl ChannelConfig {
#[derive(Debug)]
pub struct Channel {
channel: InnerChannel,
access: usize,
access: AtomicUsize,
use_default_connector: bool,
}
impl Channel {
#[inline]
pub fn access(&self) -> usize {
self.access
self.access.load(Ordering::Relaxed)
}
#[inline]
pub fn use_default_connector(&self) -> bool {
self.use_default_connector
}
#[inline]
pub fn increase_access(&self) {
self.access.fetch_add(1, Ordering::Relaxed);
}
}
#[derive(Debug)]
#[derive(Debug, Default)]
struct Pool {
channels: HashMap<String, Channel>,
channels: DashMap<String, Channel>,
}
impl Pool {
#[inline]
fn get_mut(&mut self, addr: &str) -> Option<&mut Channel> {
self.channels.get_mut(addr)
fn get(&self, addr: &str) -> Option<InnerChannel> {
let channel = self.channels.get(addr);
channel.map(|ch| {
ch.increase_access();
ch.channel.clone()
})
}
#[inline]
fn put(&mut self, addr: &str, channel: Channel) {
fn entry(&self, addr: String) -> Entry<String, Channel> {
self.channels.entry(addr)
}
#[cfg(test)]
fn get_access(&self, addr: &str) -> Option<usize> {
let channel = self.channels.get(addr);
channel.map(|ch| ch.access())
}
fn put(&self, addr: &str, channel: Channel) {
self.channels.insert(addr.to_string(), channel);
}
#[inline]
fn retain_channel<F>(&mut self, f: F)
fn retain_channel<F>(&self, f: F)
where
F: FnMut(&String, &mut Channel) -> bool,
{
@@ -337,20 +358,12 @@ impl Pool {
}
}
async fn recycle_channel_in_loop(pool: Arc<Mutex<Pool>>, interval_secs: u64) {
async fn recycle_channel_in_loop(pool: Arc<Pool>, interval_secs: u64) {
let mut interval = tokio::time::interval(Duration::from_secs(interval_secs));
loop {
interval.tick().await;
let mut pool = pool.lock().unwrap();
pool.retain_channel(|_, c| {
if c.access == 0 {
false
} else {
c.access = 0;
true
}
})
pool.retain_channel(|_, c| c.access.swap(0, Ordering::Relaxed) != 0)
}
}
@@ -363,10 +376,7 @@ mod tests {
#[should_panic]
#[test]
fn test_invalid_addr() {
let pool = Pool {
channels: HashMap::default(),
};
let pool = Arc::new(Mutex::new(pool));
let pool = Arc::new(Pool::default());
let mgr = ChannelManager {
pool,
..Default::default()
@@ -378,36 +388,31 @@ mod tests {
#[tokio::test]
async fn test_access_count() {
let pool = Pool {
channels: HashMap::default(),
};
let pool = Arc::new(Mutex::new(pool));
let pool = Arc::new(Pool::default());
let config = ChannelConfig::new();
let mgr = ChannelManager { pool, config };
let mgr = Arc::new(ChannelManager { pool, config });
let addr = "test_uri";
for i in 0..10 {
{
let _ = mgr.get(addr).unwrap();
let mut pool = mgr.pool.lock().unwrap();
assert_eq!(i + 1, pool.get_mut(addr).unwrap().access);
}
let mut joins = Vec::with_capacity(10);
for _ in 0..10 {
let mgr_clone = mgr.clone();
let join = tokio::spawn(async move {
for _ in 0..100 {
let _ = mgr_clone.get(addr);
}
});
joins.push(join);
}
for join in joins {
join.await.unwrap();
}
let mut pool = mgr.pool.lock().unwrap();
assert_eq!(1000, mgr.pool.get_access(addr).unwrap());
assert_eq!(10, pool.get_mut(addr).unwrap().access);
mgr.pool
.retain_channel(|_, c| c.access.swap(0, Ordering::Relaxed) != 0);
pool.retain_channel(|_, c| {
if c.access == 0 {
false
} else {
c.access = 0;
true
}
});
assert_eq!(0, pool.get_mut(addr).unwrap().access);
assert_eq!(0, mgr.pool.get_access(addr).unwrap());
}
#[test]
@@ -466,10 +471,7 @@ mod tests {
#[test]
fn test_build_endpoint() {
let pool = Pool {
channels: HashMap::default(),
};
let pool = Arc::new(Mutex::new(pool));
let pool = Arc::new(Pool::default());
let config = ChannelConfig::new()
.timeout(Duration::from_secs(3))
.connect_timeout(Duration::from_secs(5))
@@ -493,9 +495,11 @@ mod tests {
#[tokio::test]
async fn test_channel_with_connector() {
let pool = Pool {
channels: HashMap::default(),
channels: DashMap::default(),
};
let pool = Arc::new(Mutex::new(pool));
let pool = Arc::new(pool);
let config = ChannelConfig::new();
let mgr = ChannelManager { pool, config };

View File

@@ -4,6 +4,7 @@ use arrow::datatypes::DataType as ArrowDatatype;
use common_error::prelude::*;
use datafusion_common::DataFusionError;
use datatypes::error::Error as DataTypeError;
use datatypes::prelude::ConcreteDataType;
use statrs::StatsError;
common_error::define_opaque_error!(Error);
@@ -17,6 +18,13 @@ pub enum InnerError {
backtrace: Backtrace,
},
#[snafu(display("Unsupported input datatypes {:?} in function {}", datatypes, function))]
UnsupportedInputDataType {
function: String,
datatypes: Vec<ConcreteDataType>,
backtrace: Backtrace,
},
#[snafu(display("Fail to generate function, source: {}", source))]
GenerateFunction {
source: StatsError,
@@ -116,6 +124,8 @@ impl ErrorExt for InnerError {
| InnerError::GeneralDataFusion { .. }
| InnerError::DataFusionExecutionPlan { .. } => StatusCode::Unexpected,
InnerError::UnsupportedInputDataType { .. } => StatusCode::InvalidArguments,
InnerError::ConvertDfRecordBatchStream { source, .. } => source.status_code(),
}
}

View File

@@ -4,9 +4,12 @@ mod recordbatch;
pub mod util;
use std::pin::Pin;
use std::sync::Arc;
use datafusion::arrow_print;
pub use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
use datatypes::schema::SchemaRef;
use datatypes::prelude::VectorRef;
use datatypes::schema::{Schema, SchemaRef};
use error::Result;
use futures::task::{Context, Poll};
use futures::Stream;
@@ -54,6 +57,35 @@ pub struct RecordBatches {
}
impl RecordBatches {
pub fn try_from_columns<I: IntoIterator<Item = VectorRef>>(
schema: SchemaRef,
columns: I,
) -> Result<Self> {
let batches = vec![RecordBatch::new(schema.clone(), columns)?];
Ok(Self { schema, batches })
}
#[inline]
pub fn empty() -> Self {
Self {
schema: Arc::new(Schema::new(vec![])),
batches: vec![],
}
}
pub fn iter(&self) -> impl Iterator<Item = &RecordBatch> {
self.batches.iter()
}
pub fn pretty_print(&self) -> String {
arrow_print::write(
&self
.iter()
.map(|x| x.df_recordbatch.clone())
.collect::<Vec<_>>(),
)
}
pub fn try_new(schema: SchemaRef, batches: Vec<RecordBatch>) -> Result<Self> {
for batch in batches.iter() {
ensure!(
@@ -124,7 +156,26 @@ mod tests {
use super::*;
#[test]
fn test_recordbatches() {
fn test_recordbatches_try_from_columns() {
let schema = Arc::new(Schema::new(vec![ColumnSchema::new(
"a",
ConcreteDataType::int32_datatype(),
false,
)]));
let result = RecordBatches::try_from_columns(
schema.clone(),
vec![Arc::new(StringVector::from(vec!["hello", "world"])) as _],
);
assert!(result.is_err());
let v: VectorRef = Arc::new(Int32Vector::from_slice(&[1, 2]));
let expected = vec![RecordBatch::new(schema.clone(), vec![v.clone()]).unwrap()];
let r = RecordBatches::try_from_columns(schema, vec![v]).unwrap();
assert_eq!(r.take(), expected);
}
#[test]
fn test_recordbatches_try_new() {
let column_a = ColumnSchema::new("a", ConcreteDataType::int32_datatype(), false);
let column_b = ColumnSchema::new("b", ConcreteDataType::string_datatype(), false);
let column_c = ColumnSchema::new("c", ConcreteDataType::boolean_datatype(), false);
@@ -150,6 +201,15 @@ mod tests {
);
let batches = RecordBatches::try_new(schema1.clone(), vec![batch1.clone()]).unwrap();
let expected = "\
+---+-------+
| a | b |
+---+-------+
| 1 | hello |
| 2 | world |
+---+-------+";
assert_eq!(batches.pretty_print(), expected);
assert_eq!(schema1, batches.schema());
assert_eq!(vec![batch1], batches.take());
}

View File

@@ -377,6 +377,7 @@ mod test {
table_name: table_name.to_string(),
desc: None,
schema: Arc::new(Schema::new(supported_types())),
region_numbers: vec![0],
primary_key_indices: vec![],
create_if_not_exists: true,
table_options: Default::default(),

View File

@@ -74,7 +74,7 @@ pub trait BucketAligned {
impl<T: Into<i64>> BucketAligned for T {
fn align_by_bucket(self, bucket_duration: i64) -> Option<TimestampMillis> {
assert!(bucket_duration > 0);
assert!(bucket_duration > 0, "{}", bucket_duration);
self.into()
.checked_div_euclid(bucket_duration)
.and_then(|val| val.checked_mul(bucket_duration))

View File

@@ -29,6 +29,8 @@ datatypes = { path = "../datatypes" }
futures = "0.3"
hyper = { version = "0.14", features = ["full"] }
log-store = { path = "../log-store" }
meta-client = { path = "../meta-client" }
meta-srv = { path = "../meta-srv", features = ["mock"] }
metrics = "0.20"
object-store = { path = "../object-store" }
query = { path = "../query" }

View File

@@ -20,8 +20,16 @@ impl Default for ObjectStoreConfig {
}
}
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum Mode {
Standalone,
Distributed,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct DatanodeOptions {
pub node_id: u64,
pub http_addr: String,
pub rpc_addr: String,
pub rpc_runtime_size: usize,
@@ -29,13 +37,16 @@ pub struct DatanodeOptions {
pub mysql_runtime_size: usize,
pub postgres_addr: String,
pub postgres_runtime_size: usize,
pub meta_client_opts: MetaClientOpts,
pub wal_dir: String,
pub storage: ObjectStoreConfig,
pub mode: Mode,
}
impl Default for DatanodeOptions {
fn default() -> Self {
Self {
node_id: 0,
http_addr: "0.0.0.0:3000".to_string(),
rpc_addr: "0.0.0.0:3001".to_string(),
rpc_runtime_size: 8,
@@ -43,8 +54,10 @@ impl Default for DatanodeOptions {
mysql_runtime_size: 2,
postgres_addr: "0.0.0.0:5432".to_string(),
postgres_runtime_size: 2,
meta_client_opts: MetaClientOpts::default(),
wal_dir: "/tmp/greptimedb/wal".to_string(),
storage: ObjectStoreConfig::default(),
mode: Mode::Standalone,
}
}
}
@@ -72,3 +85,23 @@ impl Datanode {
self.services.start(&self.opts).await
}
}
// Options for meta client in datanode instance.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct MetaClientOpts {
pub metasrv_addr: String,
pub timeout_millis: u64,
pub connect_timeout_millis: u64,
pub tcp_nodelay: bool,
}
impl Default for MetaClientOpts {
fn default() -> Self {
Self {
metasrv_addr: "127.0.0.1:3002".to_string(),
timeout_millis: 3_000u64,
connect_timeout_millis: 5_000u64,
tcp_nodelay: true,
}
}
}

View File

@@ -279,6 +279,12 @@ pub enum Error {
table_name: String,
source: catalog::error::Error,
},
#[snafu(display("Failed to initialize meta client, source: {}", source))]
MetaClientInit {
#[snafu(backtrace)]
source: meta_client::error::Error,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -346,6 +352,7 @@ impl ErrorExt for Error {
| Error::CollectRecordBatches { source } => source.status_code(),
Error::ArrowComputation { .. } => StatusCode::Unexpected,
Error::MetaClientInit { source, .. } => source.status_code(),
}
}

View File

@@ -0,0 +1,109 @@
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::time::Duration;
use api::v1::meta::{HeartbeatRequest, HeartbeatResponse, Peer};
use common_telemetry::{error, info, warn};
use meta_client::client::{HeartbeatSender, MetaClient};
use snafu::ResultExt;
use crate::error::{MetaClientInitSnafu, Result};
#[derive(Debug, Clone, Default)]
pub struct HeartbeatTask {
node_id: u64,
server_addr: String,
running: Arc<AtomicBool>,
meta_client: MetaClient,
interval: u64,
}
impl Drop for HeartbeatTask {
fn drop(&mut self) {
self.running.store(false, Ordering::Release);
}
}
impl HeartbeatTask {
/// Create a new heartbeat task instance.
pub fn new(node_id: u64, server_addr: String, meta_client: MetaClient) -> Self {
Self {
node_id,
server_addr,
running: Arc::new(AtomicBool::new(false)),
meta_client,
interval: 5_000, // default interval is set to 5 secs
}
}
pub async fn create_streams(
meta_client: &MetaClient,
running: Arc<AtomicBool>,
) -> Result<HeartbeatSender> {
let (tx, mut rx) = meta_client.heartbeat().await.context(MetaClientInitSnafu)?;
common_runtime::spawn_bg(async move {
while let Some(res) = match rx.message().await {
Ok(m) => m,
Err(e) => {
error!(e; "Error while reading heartbeat response");
None
}
} {
Self::handle_response(res).await;
if !running.load(Ordering::Acquire) {
info!("Heartbeat task shutdown");
}
}
info!("Heartbeat handling loop exit.")
});
Ok(tx)
}
async fn handle_response(resp: HeartbeatResponse) {
info!("heartbeat response: {:?}", resp);
}
/// Start heartbeat task, spawn background task.
pub async fn start(&self) -> Result<()> {
let running = self.running.clone();
if running
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Acquire)
.is_err()
{
warn!("Heartbeat task started multiple times");
return Ok(());
}
let interval = self.interval;
let node_id = self.node_id;
let server_addr = self.server_addr.clone();
let meta_client = self.meta_client.clone();
let mut tx = Self::create_streams(&meta_client, running.clone()).await?;
common_runtime::spawn_bg(async move {
while running.load(Ordering::Acquire) {
let req = HeartbeatRequest {
peer: Some(Peer {
id: node_id,
addr: server_addr.clone(),
}),
..Default::default()
};
if let Err(e) = tx.send(req).await {
error!("Failed to send heartbeat to metasrv, error: {:?}", e);
match Self::create_streams(&meta_client, running.clone()).await {
Ok(new_tx) => {
info!("Reconnected to metasrv");
tx = new_tx;
}
Err(e) => {
error!(e;"Failed to reconnect to metasrv!");
}
}
}
tokio::time::sleep(Duration::from_millis(interval)).await;
}
});
Ok(())
}
}

View File

@@ -1,8 +1,12 @@
use std::time::Duration;
use std::{fs, path, sync::Arc};
use catalog::remote::MetaKvBackend;
use catalog::CatalogManagerRef;
use common_grpc::channel_manager::{ChannelConfig, ChannelManager};
use common_telemetry::logging::info;
use log_store::fs::{config::LogConfig, log::LocalFileLogStore};
use meta_client::client::{MetaClient, MetaClientBuilder};
use object_store::{services::fs::Builder, util, ObjectStore};
use query::query_engine::{QueryEngineFactory, QueryEngineRef};
use snafu::prelude::*;
@@ -10,8 +14,9 @@ use storage::{config::EngineConfig as StorageEngineConfig, EngineImpl};
use table_engine::config::EngineConfig as TableEngineConfig;
use table_engine::engine::MitoEngine;
use crate::datanode::{DatanodeOptions, ObjectStoreConfig};
use crate::error::{self, NewCatalogSnafu, Result};
use crate::datanode::{DatanodeOptions, MetaClientOpts, Mode, ObjectStoreConfig};
use crate::error::{self, CatalogSnafu, MetaClientInitSnafu, NewCatalogSnafu, Result};
use crate::heartbeat::HeartbeatTask;
use crate::script::ScriptExecutor;
use crate::server::grpc::plan::PhysicalPlanner;
use crate::sql::SqlHandler;
@@ -19,15 +24,18 @@ use crate::sql::SqlHandler;
mod grpc;
mod sql;
type DefaultEngine = MitoEngine<EngineImpl<LocalFileLogStore>>;
pub(crate) type DefaultEngine = MitoEngine<EngineImpl<LocalFileLogStore>>;
// An abstraction to read/write services.
pub struct Instance {
query_engine: QueryEngineRef,
sql_handler: SqlHandler,
catalog_manager: CatalogManagerRef,
physical_planner: PhysicalPlanner,
script_executor: ScriptExecutor,
pub(crate) query_engine: QueryEngineRef,
pub(crate) sql_handler: SqlHandler,
pub(crate) catalog_manager: CatalogManagerRef,
pub(crate) physical_planner: PhysicalPlanner,
pub(crate) script_executor: ScriptExecutor,
#[allow(unused)]
pub(crate) meta_client: Option<MetaClient>,
pub(crate) heartbeat_task: Option<HeartbeatTask>,
}
pub type InstanceRef = Arc<Instance>;
@@ -37,6 +45,13 @@ impl Instance {
let object_store = new_object_store(&opts.storage).await?;
let log_store = create_local_file_log_store(opts).await?;
let meta_client = match opts.mode {
Mode::Standalone => None,
Mode::Distributed => {
Some(new_metasrv_client(opts.node_id, &opts.meta_client_opts).await?)
}
};
let table_engine = Arc::new(DefaultEngine::new(
TableEngineConfig::default(),
EngineImpl::new(
@@ -46,22 +61,52 @@ impl Instance {
),
object_store,
));
let catalog_manager = Arc::new(
catalog::local::LocalCatalogManager::try_new(table_engine.clone())
.await
.context(NewCatalogSnafu)?,
);
let factory = QueryEngineFactory::new(catalog_manager.clone());
// create remote catalog manager
let (catalog_manager, factory) = match opts.mode {
Mode::Standalone => {
let catalog = Arc::new(
catalog::local::LocalCatalogManager::try_new(table_engine.clone())
.await
.context(CatalogSnafu)?,
);
let factory = QueryEngineFactory::new(catalog.clone());
(catalog as CatalogManagerRef, factory)
}
Mode::Distributed => {
let catalog = Arc::new(catalog::remote::RemoteCatalogManager::new(
table_engine.clone(),
opts.node_id,
Arc::new(MetaKvBackend {
client: meta_client.as_ref().unwrap().clone(),
}),
));
let factory = QueryEngineFactory::new(catalog.clone());
(catalog as CatalogManagerRef, factory)
}
};
let query_engine = factory.query_engine().clone();
let script_executor =
ScriptExecutor::new(catalog_manager.clone(), query_engine.clone()).await?;
let heartbeat_task = match opts.mode {
Mode::Standalone => None,
Mode::Distributed => Some(HeartbeatTask::new(
opts.node_id, /*node id not set*/
opts.rpc_addr.clone(),
meta_client.as_ref().unwrap().clone(),
)),
};
Ok(Self {
query_engine: query_engine.clone(),
sql_handler: SqlHandler::new(table_engine, catalog_manager.clone()),
catalog_manager,
physical_planner: PhysicalPlanner::new(query_engine),
script_executor,
meta_client,
heartbeat_task,
})
}
@@ -70,6 +115,9 @@ impl Instance {
.start()
.await
.context(NewCatalogSnafu)?;
if let Some(task) = &self.heartbeat_task {
task.start().await?;
}
Ok(())
}
@@ -80,47 +128,9 @@ impl Instance {
pub fn catalog_manager(&self) -> &CatalogManagerRef {
&self.catalog_manager
}
// This method is used in other crate's testing codes, so move it out of "cfg(test)".
// TODO(LFC): Delete it when callers no longer need it.
pub async fn new_mock() -> Result<Self> {
use table_engine::table::test_util::new_test_object_store;
use table_engine::table::test_util::MockEngine;
use table_engine::table::test_util::MockMitoEngine;
let (_dir, object_store) = new_test_object_store("setup_mock_engine_and_table").await;
let mock_engine = Arc::new(MockMitoEngine::new(
TableEngineConfig::default(),
MockEngine::default(),
object_store,
));
let catalog_manager = Arc::new(
catalog::local::manager::LocalCatalogManager::try_new(mock_engine.clone())
.await
.unwrap(),
);
let factory = QueryEngineFactory::new(catalog_manager.clone());
let query_engine = factory.query_engine().clone();
let sql_handler = SqlHandler::new(mock_engine.clone(), catalog_manager.clone());
let physical_planner = PhysicalPlanner::new(query_engine.clone());
let script_executor = ScriptExecutor::new(catalog_manager.clone(), query_engine.clone())
.await
.unwrap();
Ok(Self {
query_engine,
sql_handler,
catalog_manager,
physical_planner,
script_executor,
})
}
}
async fn new_object_store(store_config: &ObjectStoreConfig) -> Result<ObjectStore> {
pub(crate) async fn new_object_store(store_config: &ObjectStoreConfig) -> Result<ObjectStore> {
// TODO(dennis): supports other backend
let data_dir = util::normalize_dir(match store_config {
ObjectStoreConfig::File { data_dir } => data_dir,
@@ -139,7 +149,38 @@ async fn new_object_store(store_config: &ObjectStoreConfig) -> Result<ObjectStor
Ok(ObjectStore::new(accessor))
}
async fn create_local_file_log_store(opts: &DatanodeOptions) -> Result<LocalFileLogStore> {
/// Create metasrv client instance and spawn heartbeat loop.
async fn new_metasrv_client(node_id: u64, meta_config: &MetaClientOpts) -> Result<MetaClient> {
let cluster_id = 0; // TODO(hl): read from config
let member_id = node_id;
let config = ChannelConfig::new()
.timeout(Duration::from_millis(meta_config.timeout_millis))
.connect_timeout(Duration::from_millis(meta_config.connect_timeout_millis))
.tcp_nodelay(meta_config.tcp_nodelay);
let channel_manager = ChannelManager::with_config(config);
let mut meta_client = MetaClientBuilder::new(cluster_id, member_id)
.enable_heartbeat()
.enable_router()
.enable_store()
.channel_manager(channel_manager)
.build();
meta_client
.start(&[&meta_config.metasrv_addr])
.await
.context(MetaClientInitSnafu)?;
// required only when the heartbeat_client is enabled
meta_client
.ask_leader()
.await
.context(MetaClientInitSnafu)?;
Ok(meta_client)
}
pub(crate) async fn create_local_file_log_store(
opts: &DatanodeOptions,
) -> Result<LocalFileLogStore> {
// create WAL directory
fs::create_dir_all(path::Path::new(&opts.wal_dir))
.context(error::CreateDirSnafu { dir: &opts.wal_dir })?;

View File

@@ -1,3 +1,6 @@
use std::ops::Deref;
use api::v1::codec::RegionId;
use api::v1::{
admin_expr, codec::InsertBatch, insert_expr, object_expr, select_expr, AdminExpr, AdminResult,
ObjectExpr, ObjectResult, SelectExpr,
@@ -62,7 +65,11 @@ impl Instance {
insert_batches: &[InsertBatch],
) -> Result<()> {
// Create table automatically, build schema from data.
let table_id = self.catalog_manager.next_table_id();
let table_id = self
.catalog_manager
.next_table_id()
.await
.context(CatalogSnafu)?;
let create_table_request = insert::build_create_table_request(
catalog_name,
schema_name,
@@ -200,6 +207,18 @@ impl GrpcQueryHandler for Instance {
.context(servers::error::InvalidQuerySnafu {
reason: "missing `expr` in `InsertExpr`",
})?;
// TODO(fys): _region_id is for later use.
let _region_id: Option<RegionId> = insert_expr
.options
.get("region_id")
.map(|id| {
id.deref()
.try_into()
.context(servers::error::DecodeRegionIdSnafu)
})
.transpose()?;
match expr {
insert_expr::Expr::Values(values) => {
self.handle_insert(table_name, values).await

View File

@@ -10,7 +10,7 @@ use servers::query_handler::SqlQueryHandler;
use snafu::prelude::*;
use sql::statements::statement::Statement;
use crate::error::{ExecuteSqlSnafu, Result};
use crate::error::{CatalogSnafu, ExecuteSqlSnafu, Result};
use crate::instance::Instance;
use crate::metric;
use crate::sql::SqlRequest;
@@ -49,7 +49,11 @@ impl Instance {
}
Statement::Create(c) => {
let table_id = self.catalog_manager.next_table_id();
let table_id = self
.catalog_manager
.next_table_id()
.await
.context(CatalogSnafu)?;
let _engine_name = c.engine.clone();
// TODO(hl): Select table engine by engine_name
@@ -77,6 +81,9 @@ impl Instance {
Statement::ShowTables(stmt) => {
self.sql_handler.execute(SqlRequest::ShowTables(stmt)).await
}
Statement::ShowCreateTable(_stmt) => {
unimplemented!("SHOW CREATE TABLE is unimplemented yet");
}
}
}
}

View File

@@ -2,8 +2,10 @@
pub mod datanode;
pub mod error;
mod heartbeat;
pub mod instance;
mod metric;
mod mock;
mod script;
pub mod server;
mod sql;

126
src/datanode/src/mock.rs Normal file
View File

@@ -0,0 +1,126 @@
use std::sync::Arc;
use catalog::remote::MetaKvBackend;
use meta_client::client::{MetaClient, MetaClientBuilder};
use query::QueryEngineFactory;
use storage::config::EngineConfig as StorageEngineConfig;
use storage::EngineImpl;
use table_engine::config::EngineConfig as TableEngineConfig;
use crate::datanode::DatanodeOptions;
use crate::error::Result;
use crate::heartbeat::HeartbeatTask;
use crate::instance::{create_local_file_log_store, new_object_store, DefaultEngine, Instance};
use crate::script::ScriptExecutor;
use crate::server::grpc::plan::PhysicalPlanner;
use crate::sql::SqlHandler;
impl Instance {
// This method is used in other crate's testing codes, so move it out of "cfg(test)".
// TODO(LFC): Delete it when callers no longer need it.
pub async fn new_mock() -> Result<Self> {
use table_engine::table::test_util::new_test_object_store;
use table_engine::table::test_util::MockEngine;
use table_engine::table::test_util::MockMitoEngine;
let meta_client = Some(mock_meta_client().await);
let (_dir, object_store) = new_test_object_store("setup_mock_engine_and_table").await;
let mock_engine = Arc::new(MockMitoEngine::new(
TableEngineConfig::default(),
MockEngine::default(),
object_store,
));
let catalog_manager = Arc::new(
catalog::local::manager::LocalCatalogManager::try_new(mock_engine.clone())
.await
.unwrap(),
);
let factory = QueryEngineFactory::new(catalog_manager.clone());
let query_engine = factory.query_engine().clone();
let sql_handler = SqlHandler::new(mock_engine.clone(), catalog_manager.clone());
let physical_planner = PhysicalPlanner::new(query_engine.clone());
let script_executor = ScriptExecutor::new(catalog_manager.clone(), query_engine.clone())
.await
.unwrap();
let heartbeat_task = Some(HeartbeatTask::new(
0,
"127.0.0.1:3302".to_string(),
meta_client.as_ref().unwrap().clone(),
));
Ok(Self {
query_engine,
sql_handler,
catalog_manager,
physical_planner,
script_executor,
meta_client,
heartbeat_task,
})
}
pub async fn with_mock_meta_client(opts: &DatanodeOptions) -> Result<Self> {
let object_store = new_object_store(&opts.storage).await?;
let log_store = create_local_file_log_store(opts).await?;
let meta_client = mock_meta_client().await;
let table_engine = Arc::new(DefaultEngine::new(
TableEngineConfig::default(),
EngineImpl::new(
StorageEngineConfig::default(),
Arc::new(log_store),
object_store.clone(),
),
object_store,
));
// create remote catalog manager
let catalog_manager = Arc::new(catalog::remote::RemoteCatalogManager::new(
table_engine.clone(),
opts.node_id,
Arc::new(MetaKvBackend {
client: meta_client.clone(),
}),
));
let factory = QueryEngineFactory::new(catalog_manager.clone());
let query_engine = factory.query_engine().clone();
let script_executor =
ScriptExecutor::new(catalog_manager.clone(), query_engine.clone()).await?;
let heartbeat_task =
HeartbeatTask::new(opts.node_id, opts.rpc_addr.clone(), meta_client.clone());
Ok(Self {
query_engine: query_engine.clone(),
sql_handler: SqlHandler::new(table_engine, catalog_manager.clone()),
catalog_manager,
physical_planner: PhysicalPlanner::new(query_engine),
script_executor,
meta_client: Some(meta_client),
heartbeat_task: Some(heartbeat_task),
})
}
}
async fn mock_meta_client() -> MetaClient {
let mock_info = meta_srv::mocks::mock_with_memstore().await;
let meta_srv::mocks::MockInfo {
server_addr,
channel_manager,
} = mock_info;
let id = (1000u64, 2000u64);
let mut meta_client = MetaClientBuilder::new(id.0, id.1)
.enable_heartbeat()
.enable_router()
.enable_store()
.channel_manager(channel_manager)
.build();
meta_client.start(&[&server_addr]).await.unwrap();
// // required only when the heartbeat_client is enabled
meta_client.ask_leader().await.unwrap();
meta_client
}

View File

@@ -11,14 +11,14 @@ use futures::TryFutureExt;
use snafu::prelude::*;
use table::requests::{AddColumnRequest, AlterKind, AlterTableRequest, CreateTableRequest};
use crate::error::{self, ColumnDefaultConstraintSnafu, MissingFieldSnafu, Result};
use crate::error::{self, CatalogSnafu, ColumnDefaultConstraintSnafu, MissingFieldSnafu, Result};
use crate::instance::Instance;
use crate::server::grpc::handler::AdminResultBuilder;
use crate::sql::SqlRequest;
impl Instance {
pub(crate) async fn handle_create(&self, expr: CreateExpr) -> AdminResult {
let request = self.create_expr_to_request(expr);
let request = self.create_expr_to_request(expr).await;
let result = futures::future::ready(request)
.and_then(|request| self.sql_handler().execute(SqlRequest::Create(request)))
.await;
@@ -63,7 +63,7 @@ impl Instance {
}
}
fn create_expr_to_request(&self, expr: CreateExpr) -> Result<CreateTableRequest> {
async fn create_expr_to_request(&self, expr: CreateExpr) -> Result<CreateTableRequest> {
let schema = create_table_schema(&expr)?;
let primary_key_indices = expr
@@ -76,14 +76,26 @@ impl Instance {
})
.collect::<Result<Vec<usize>>>()?;
let table_id = self.catalog_manager().next_table_id();
let catalog_name = expr
.catalog_name
.unwrap_or_else(|| DEFAULT_CATALOG_NAME.to_string());
let schema_name = expr
.schema_name
.unwrap_or_else(|| DEFAULT_SCHEMA_NAME.to_string());
let table_id = self
.catalog_manager()
.next_table_id()
.await
.context(CatalogSnafu)?;
let region_id = expr
.table_options
.get(&"region_id".to_string())
.unwrap()
.parse::<u32>()
.unwrap();
Ok(CreateTableRequest {
id: table_id,
catalog_name,
@@ -91,6 +103,7 @@ impl Instance {
table_name: expr.table_name,
desc: expr.desc,
schema,
region_numbers: vec![region_id],
primary_key_indices,
create_if_not_exists: expr.create_if_not_exists,
table_options: expr.table_options,
@@ -179,14 +192,15 @@ mod tests {
use super::*;
use crate::tests::test_util;
#[tokio::test]
#[tokio::test(flavor = "multi_thread")]
async fn test_create_expr_to_request() {
common_telemetry::init_default_ut_logging();
let (opts, _guard) = test_util::create_tmp_dir_and_datanode_opts("create_expr_to_request");
let instance = Instance::new(&opts).await.unwrap();
let instance = Instance::with_mock_meta_client(&opts).await.unwrap();
instance.start().await.unwrap();
let expr = testing_create_expr();
let request = instance.create_expr_to_request(expr).unwrap();
let request = instance.create_expr_to_request(expr).await.unwrap();
assert_eq!(request.id, common_catalog::consts::MIN_USER_TABLE_ID);
assert_eq!(request.catalog_name, "greptime".to_string());
assert_eq!(request.schema_name, "public".to_string());
@@ -198,7 +212,7 @@ mod tests {
let mut expr = testing_create_expr();
expr.primary_keys = vec!["host".to_string(), "not-exist-column".to_string()];
let result = instance.create_expr_to_request(expr);
let result = instance.create_expr_to_request(expr).await;
assert!(result.is_err());
assert!(result
.unwrap_err()
@@ -291,6 +305,9 @@ mod tests {
default_constraint: None,
},
];
let table_options = [("region_id".to_string(), "0".to_string())]
.into_iter()
.collect::<HashMap<_, _>>();
CreateExpr {
catalog_name: None,
schema_name: None,
@@ -300,7 +317,7 @@ mod tests {
time_index: "ts".to_string(),
primary_keys: vec!["ts".to_string(), "host".to_string()],
create_if_not_exists: true,
table_options: HashMap::new(),
table_options,
}
}

View File

@@ -168,6 +168,7 @@ pub fn build_create_table_request(
create_if_not_exists: true,
primary_key_indices,
table_options: HashMap::new(),
region_numbers: vec![0],
});
}

View File

@@ -155,6 +155,7 @@ impl SqlHandler {
table_name,
desc: None,
schema,
region_numbers: vec![0],
primary_key_indices: primary_keys,
create_if_not_exists: stmt.if_not_exists,
table_options: HashMap::new(),

View File

@@ -34,7 +34,7 @@ impl SqlHandler {
stmt: Insert,
) -> Result<SqlRequest> {
let columns = stmt.columns();
let values = stmt.values();
let values = stmt.values().context(ParseSqlValueSnafu)?;
//TODO(dennis): table name may be in the form of `catalog.schema.table`,
// but we don't process it right now.
let table_name = stmt.table_name();

View File

@@ -24,7 +24,7 @@ async fn setup_grpc_server(name: &str, port: usize) -> (String, TestGuard, Arc<G
let (mut opts, guard) = test_util::create_tmp_dir_and_datanode_opts(name);
let addr = format!("127.0.0.1:{}", port);
opts.rpc_addr = addr.clone();
let instance = Arc::new(Instance::new(&opts).await.unwrap());
let instance = Arc::new(Instance::with_mock_meta_client(&opts).await.unwrap());
instance.start().await.unwrap();
let addr_cloned = addr.clone();
@@ -50,7 +50,7 @@ async fn setup_grpc_server(name: &str, port: usize) -> (String, TestGuard, Arc<G
(addr, guard, grpc_server)
}
#[tokio::test]
#[tokio::test(flavor = "multi_thread")]
async fn test_auto_create_table() {
let (addr, _guard, grpc_server) = setup_grpc_server("auto_create_table", 3991).await;
@@ -116,8 +116,9 @@ fn expect_data() -> (Column, Column, Column, Column) {
)
}
#[tokio::test]
#[tokio::test(flavor = "multi_thread")]
async fn test_insert_and_select() {
common_telemetry::init_default_ut_logging();
let (addr, _guard, grpc_server) = setup_grpc_server("insert_and_select", 3990).await;
let grpc_client = Client::with_urls(vec![addr]);
@@ -247,6 +248,6 @@ fn testing_create_expr() -> CreateExpr {
time_index: "ts".to_string(),
primary_keys: vec!["ts".to_string(), "host".to_string()],
create_if_not_exists: true,
table_options: HashMap::new(),
table_options: HashMap::from([("region_id".to_string(), "0".to_string())]),
}
}

View File

@@ -5,7 +5,6 @@ use axum::http::StatusCode;
use axum::Router;
use axum_test_helper::TestClient;
use datatypes::prelude::ConcreteDataType;
use servers::http::handler::ScriptExecution;
use servers::http::HttpServer;
use servers::server::Server;
use test_util::TestGuard;
@@ -15,7 +14,7 @@ use crate::tests::test_util;
async fn make_test_app(name: &str) -> (Router, TestGuard) {
let (opts, guard) = test_util::create_tmp_dir_and_datanode_opts(name);
let instance = Arc::new(Instance::new(&opts).await.unwrap());
let instance = Arc::new(Instance::with_mock_meta_client(&opts).await.unwrap());
instance.start().await.unwrap();
test_util::create_test_table(&instance, ConcreteDataType::timestamp_millis_datatype())
.await
@@ -24,7 +23,7 @@ async fn make_test_app(name: &str) -> (Router, TestGuard) {
(http_server.make_app(), guard)
}
#[tokio::test]
#[tokio::test(flavor = "multi_thread")]
async fn test_sql_api() {
common_telemetry::init_default_ut_logging();
let (app, _guard) = make_test_app("sql_api").await;
@@ -84,7 +83,7 @@ async fn test_sql_api() {
);
}
#[tokio::test]
#[tokio::test(flavor = "multi_thread")]
async fn test_metrics_api() {
common_telemetry::init_default_ut_logging();
common_telemetry::init_default_metrics_recorder();
@@ -99,28 +98,26 @@ async fn test_metrics_api() {
assert_eq!(res.status(), StatusCode::OK);
// Call metrics api
let res = client.get("/v1/metrics").send().await;
let res = client.get("/metrics").send().await;
assert_eq!(res.status(), StatusCode::OK);
let body = res.text().await;
assert!(body.contains("datanode_handle_sql_elapsed"));
}
#[tokio::test]
#[tokio::test(flavor = "multi_thread")]
async fn test_scripts_api() {
common_telemetry::init_default_ut_logging();
let (app, _guard) = make_test_app("scripts_api").await;
let client = TestClient::new(app);
let res = client
.post("/v1/scripts")
.json(&ScriptExecution {
name: "test".to_string(),
script: r#"
.post("/v1/scripts?name=test")
.body(
r#"
@copr(sql='select number from numbers limit 10', args=['number'], returns=['n'])
def test(n):
return n + 1;
"#
.to_string(),
})
"#,
)
.send()
.await;
assert_eq!(res.status(), StatusCode::OK);

View File

@@ -1,20 +1,18 @@
use arrow::array::{Int64Array, UInt64Array};
use common_query::Output;
use common_recordbatch::util;
use datafusion::arrow_print;
use datafusion_common::record_batch::RecordBatch as DfRecordBatch;
use datatypes::arrow_array::StringArray;
use datatypes::prelude::ConcreteDataType;
use crate::instance::Instance;
use crate::tests::test_util;
#[tokio::test]
#[tokio::test(flavor = "multi_thread")]
async fn test_execute_insert() {
common_telemetry::init_default_ut_logging();
let (opts, _guard) = test_util::create_tmp_dir_and_datanode_opts("execute_insert");
let instance = Instance::new(&opts).await.unwrap();
let instance = Instance::with_mock_meta_client(&opts).await.unwrap();
instance.start().await.unwrap();
test_util::create_test_table(&instance, ConcreteDataType::timestamp_millis_datatype())
@@ -33,12 +31,12 @@ async fn test_execute_insert() {
assert!(matches!(output, Output::AffectedRows(2)));
}
#[tokio::test]
#[tokio::test(flavor = "multi_thread")]
async fn test_execute_insert_query_with_i64_timestamp() {
common_telemetry::init_default_ut_logging();
let (opts, _guard) = test_util::create_tmp_dir_and_datanode_opts("insert_query_i64_timestamp");
let instance = Instance::new(&opts).await.unwrap();
let instance = Instance::with_mock_meta_client(&opts).await.unwrap();
instance.start().await.unwrap();
test_util::create_test_table(&instance, ConcreteDataType::int64_datatype())
@@ -72,10 +70,10 @@ async fn test_execute_insert_query_with_i64_timestamp() {
}
}
#[tokio::test]
#[tokio::test(flavor = "multi_thread")]
async fn test_execute_query() {
let (opts, _guard) = test_util::create_tmp_dir_and_datanode_opts("execute_query");
let instance = Instance::new(&opts).await.unwrap();
let instance = Instance::with_mock_meta_client(&opts).await.unwrap();
instance.start().await.unwrap();
let output = instance
@@ -98,11 +96,11 @@ async fn test_execute_query() {
}
}
#[tokio::test]
#[tokio::test(flavor = "multi_thread")]
async fn test_execute_show_databases_tables() {
let (opts, _guard) =
test_util::create_tmp_dir_and_datanode_opts("execute_show_databases_tables");
let instance = Instance::new(&opts).await.unwrap();
let instance = Instance::with_mock_meta_client(&opts).await.unwrap();
instance.start().await.unwrap();
let output = instance.execute_sql("show databases").await.unwrap();
@@ -188,12 +186,12 @@ async fn test_execute_show_databases_tables() {
}
}
#[tokio::test]
#[tokio::test(flavor = "multi_thread")]
pub async fn test_execute_create() {
common_telemetry::init_default_ut_logging();
let (opts, _guard) = test_util::create_tmp_dir_and_datanode_opts("execute_create");
let instance = Instance::new(&opts).await.unwrap();
let instance = Instance::with_mock_meta_client(&opts).await.unwrap();
instance.start().await.unwrap();
let output = instance
@@ -212,13 +210,13 @@ pub async fn test_execute_create() {
assert!(matches!(output, Output::AffectedRows(1)));
}
#[tokio::test]
#[tokio::test(flavor = "multi_thread")]
pub async fn test_create_table_illegal_timestamp_type() {
common_telemetry::init_default_ut_logging();
let (opts, _guard) =
test_util::create_tmp_dir_and_datanode_opts("create_table_illegal_timestamp_type");
let instance = Instance::new(&opts).await.unwrap();
let instance = Instance::with_mock_meta_client(&opts).await.unwrap();
instance.start().await.unwrap();
let output = instance
@@ -244,6 +242,8 @@ pub async fn test_create_table_illegal_timestamp_type() {
#[tokio::test]
async fn test_alter_table() {
use datafusion::arrow_print;
use datafusion_common::record_batch::RecordBatch as DfRecordBatch;
// TODO(LFC) Use real Mito engine when we can alter its region schema,
// and delete the `new_mock` method.
let instance = Instance::new_mock().await.unwrap();

View File

@@ -72,6 +72,7 @@ pub async fn create_test_table(instance: &Instance, ts_type: ConcreteDataType) -
create_if_not_exists: true,
primary_key_indices: vec![3, 0], // "host" and "ts" are primary keys
table_options: HashMap::new(),
region_numbers: vec![0],
},
)
.await

View File

@@ -7,6 +7,7 @@ edition = "2021"
api = { path = "../api" }
async-stream = "0.3"
async-trait = "0.1"
catalog = { path = "../catalog" }
client = { path = "../client" }
common-base = { path = "../common/base" }
common-error = { path = "../common/error" }
@@ -16,10 +17,14 @@ common-recordbatch = { path = "../common/recordbatch" }
common-runtime = { path = "../common/runtime" }
common-telemetry = { path = "../common/telemetry" }
common-time = { path = "../common/time" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = ["simd"] }
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
datatypes = { path = "../datatypes" }
itertools = "0.10"
openmetrics-parser = "0.4"
prost = "0.11"
query = { path = "../query" }
serde = "1.0"
servers = { path = "../servers" }
snafu = { version = "0.7", features = ["backtraces"] }
@@ -34,8 +39,6 @@ version = "0.10"
features = ["io_csv", "io_json", "io_parquet", "io_parquet_compression", "io_ipc", "ahash", "compute", "serde_types"]
[dev-dependencies]
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = ["simd"] }
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
datanode = { path = "../datanode" }
futures = "0.3"
tempdir = "0.3"

View File

@@ -1,6 +1,11 @@
use std::any::Any;
use common_error::prelude::*;
use common_query::logical_plan::Expr;
use datafusion_common::ScalarValue;
use store_api::storage::RegionId;
use crate::mock::DatanodeId;
#[derive(Debug, Snafu)]
#[snafu(visibility(pub))]
@@ -83,6 +88,17 @@ pub enum Error {
backtrace: Backtrace,
},
#[snafu(display(
"Failed to convert DataFusion's ScalarValue: {:?}, source: {}",
value,
source
))]
ConvertScalarValue {
value: ScalarValue,
#[snafu(backtrace)]
source: datatypes::error::Error,
},
#[snafu(display("Failed to find partition column: {}", column_name))]
FindPartitionColumn {
column_name: String,
@@ -95,6 +111,24 @@ pub enum Error {
backtrace: Backtrace,
},
#[snafu(display("Failed to find regions by filters: {:?}", filters))]
FindRegions {
filters: Vec<Expr>,
backtrace: Backtrace,
},
#[snafu(display("Failed to find Datanode by region: {:?}", region))]
FindDatanode {
region: RegionId,
backtrace: Backtrace,
},
#[snafu(display("Failed to get Datanode instance: {:?}", datanode))]
DatanodeInstance {
datanode: DatanodeId,
backtrace: Backtrace,
},
#[snafu(display("Invaild InsertRequest, reason: {}", reason))]
InvalidInsertRequest {
reason: String,
@@ -107,6 +141,12 @@ pub enum Error {
actual: usize,
backtrace: Backtrace,
},
#[snafu(display("Failed to join task, source: {}", source))]
JoinTask {
source: common_runtime::JoinError,
backtrace: Backtrace,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -118,19 +158,31 @@ impl ErrorExt for Error {
| Error::ParseAddr { .. }
| Error::InvalidSql { .. }
| Error::FindRegion { .. }
| Error::FindRegions { .. }
| Error::InvalidInsertRequest { .. }
| Error::FindPartitionColumn { .. }
| Error::RegionKeysSize { .. } => StatusCode::InvalidArguments,
Error::RuntimeResource { source, .. } => source.status_code(),
Error::StartServer { source, .. } => source.status_code(),
Error::ParseSql { source } => source.status_code(),
Error::ConvertColumnDefaultConstraint { source, .. } => source.status_code(),
Error::ConvertColumnDefaultConstraint { source, .. }
| Error::ConvertScalarValue { source, .. } => source.status_code(),
Error::RequestDatanode { source } => source.status_code(),
Error::ColumnDataType { .. } => StatusCode::Internal,
Error::ColumnDataType { .. }
| Error::FindDatanode { .. }
| Error::DatanodeInstance { .. } => StatusCode::Internal,
Error::IllegalFrontendState { .. } | Error::IncompleteGrpcResult { .. } => {
StatusCode::Unexpected
}
Error::ExecOpentsdbPut { .. } => StatusCode::Internal,
Error::JoinTask { .. } => StatusCode::Unexpected,
}
}

View File

@@ -161,7 +161,10 @@ fn create_to_expr(create: CreateTable) -> Result<CreateExpr> {
primary_keys: find_primary_keys(&create.constraints)?,
create_if_not_exists: create.if_not_exists,
// TODO(LFC): Fill in other table options.
table_options: HashMap::from([("engine".to_string(), create.engine)]),
table_options: HashMap::from([
("engine".to_string(), create.engine),
("region_id".to_string(), "0".to_string()),
]),
..Default::default()
};
Ok(expr)
@@ -282,8 +285,6 @@ mod tests {
admin_expr, admin_result, column, column::SemanticType, object_expr, object_result,
select_expr, Column, ExprHeader, MutateResult, SelectExpr,
};
use datafusion::arrow_print;
use datafusion_common::record_batch::RecordBatch as DfRecordBatch;
use datatypes::schema::ColumnDefaultConstraint;
use datatypes::value::Value;
@@ -324,12 +325,7 @@ mod tests {
let output = SqlQueryHandler::do_query(&*instance, sql).await.unwrap();
match output {
Output::RecordBatches(recordbatches) => {
let recordbatches = recordbatches
.take()
.into_iter()
.map(|r| r.df_recordbatch)
.collect::<Vec<DfRecordBatch>>();
let pretty_print = arrow_print::write(&recordbatches);
let pretty_print = recordbatches.pretty_print();
let pretty_print = pretty_print.lines().collect::<Vec<&str>>();
let expected = vec![
"+----------------+---------------------+-----+--------+-----------+",
@@ -349,12 +345,7 @@ mod tests {
let output = SqlQueryHandler::do_query(&*instance, sql).await.unwrap();
match output {
Output::RecordBatches(recordbatches) => {
let recordbatches = recordbatches
.take()
.into_iter()
.map(|r| r.df_recordbatch)
.collect::<Vec<DfRecordBatch>>();
let pretty_print = arrow_print::write(&recordbatches);
let pretty_print = recordbatches.pretty_print();
let pretty_print = pretty_print.lines().collect::<Vec<&str>>();
let expected = vec![
"+----------------+---------------------+-----+--------+-----------+",
@@ -550,12 +541,15 @@ mod tests {
default_constraint: None,
},
];
let mut table_options = HashMap::with_capacity(1);
table_options.insert("region_id".to_string(), "0".to_string());
CreateExpr {
table_name: "demo".to_string(),
column_defs,
time_index: "ts".to_string(),
primary_keys: vec!["ts".to_string(), "host".to_string()],
create_if_not_exists: true,
table_options,
..Default::default()
}
}

View File

@@ -5,6 +5,7 @@ pub mod frontend;
pub mod grpc;
pub mod influxdb;
pub mod instance;
pub(crate) mod mock;
pub mod mysql;
pub mod opentsdb;
pub mod partitioning;
@@ -12,5 +13,6 @@ pub mod postgres;
pub mod prometheus;
mod server;
pub mod spliter;
mod table;
#[cfg(test)]
mod tests;

175
src/frontend/src/mock.rs Normal file
View File

@@ -0,0 +1,175 @@
// FIXME(LFC): no mock
use std::fmt::Formatter;
use std::sync::Arc;
use api::v1::InsertExpr;
use catalog::CatalogManagerRef;
use client::ObjectResult;
use client::{Database, Select};
use common_query::prelude::Expr;
use common_query::Output;
use common_recordbatch::util;
use common_recordbatch::RecordBatches;
use datafusion::logical_plan::{LogicalPlan as DfLogicPlan, LogicalPlanBuilder};
use datafusion_expr::Expr as DfExpr;
use datatypes::prelude::Value;
use datatypes::schema::SchemaRef;
use query::plan::LogicalPlan;
use table::table::adapter::DfTableProviderAdapter;
pub(crate) type DatanodeId = u64;
#[derive(Clone)]
pub(crate) struct DatanodeInstance {
pub(crate) datanode_id: DatanodeId,
catalog_manager: CatalogManagerRef,
db: Database,
}
impl std::fmt::Debug for DatanodeInstance {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_str("DatanodeInstance")
}
}
impl DatanodeInstance {
#[allow(dead_code)]
pub(crate) fn new(
datanode_id: DatanodeId,
catalog_manager: CatalogManagerRef,
db: Database,
) -> Self {
Self {
datanode_id,
catalog_manager,
db,
}
}
pub(crate) async fn grpc_insert(&self, request: InsertExpr) -> client::Result<ObjectResult> {
self.db.insert(request).await
}
#[allow(clippy::print_stdout)]
pub(crate) async fn grpc_table_scan(&self, plan: TableScanPlan) -> RecordBatches {
let logical_plan = self.build_logical_plan(&plan);
// TODO(LFC): Directly pass in logical plan to GRPC interface when our substrait codec supports filter.
let sql = to_sql(logical_plan);
println!("executing sql \"{}\" in datanode {}", sql, self.datanode_id);
let result = self.db.select(Select::Sql(sql)).await.unwrap();
let output: Output = result.try_into().unwrap();
let recordbatches = match output {
Output::Stream(stream) => util::collect(stream).await.unwrap(),
Output::RecordBatches(x) => x.take(),
_ => unreachable!(),
};
let schema = recordbatches.first().unwrap().schema.clone();
RecordBatches::try_new(schema, recordbatches).unwrap()
}
fn build_logical_plan(&self, table_scan: &TableScanPlan) -> LogicalPlan {
let catalog = self.catalog_manager.catalog("greptime").unwrap().unwrap();
let schema = catalog.schema("public").unwrap().unwrap();
let table = schema.table(&table_scan.table_name).unwrap().unwrap();
let table_provider = Arc::new(DfTableProviderAdapter::new(table.clone()));
let mut builder = LogicalPlanBuilder::scan_with_filters(
table_scan.table_name.clone(),
table_provider,
table_scan.projection.clone(),
table_scan
.filters
.iter()
.map(|x| x.df_expr().clone())
.collect::<Vec<_>>(),
)
.unwrap();
if let Some(limit) = table_scan.limit {
builder = builder.limit(limit).unwrap();
}
let plan = builder.build().unwrap();
LogicalPlan::DfPlan(plan)
}
}
#[derive(Debug)]
pub(crate) struct TableScanPlan {
pub table_name: String,
pub projection: Option<Vec<usize>>,
pub filters: Vec<Expr>,
pub limit: Option<usize>,
}
fn to_sql(plan: LogicalPlan) -> String {
let LogicalPlan::DfPlan(plan) = plan;
let table_scan = match plan {
DfLogicPlan::TableScan(table_scan) => table_scan,
_ => unreachable!("unknown plan: {:?}", plan),
};
let schema: SchemaRef = Arc::new(table_scan.source.schema().try_into().unwrap());
let projection = table_scan
.projection
.map(|x| {
x.iter()
.map(|i| schema.column_name_by_index(*i).to_string())
.collect::<Vec<String>>()
})
.unwrap_or_else(|| {
schema
.column_schemas()
.iter()
.map(|x| x.name.clone())
.collect::<Vec<String>>()
})
.join(", ");
let mut sql = format!("select {} from {}", projection, &table_scan.table_name);
let filters = table_scan
.filters
.iter()
.map(expr_to_sql)
.collect::<Vec<String>>()
.join(" AND ");
if !filters.is_empty() {
sql.push_str(" where ");
sql.push_str(&filters);
}
if let Some(limit) = table_scan.limit {
sql.push_str(" limit ");
sql.push_str(&limit.to_string());
}
sql
}
fn expr_to_sql(expr: &DfExpr) -> String {
match expr {
DfExpr::BinaryExpr {
ref left,
ref right,
ref op,
} => format!(
"{} {} {}",
expr_to_sql(left.as_ref()),
op,
expr_to_sql(right.as_ref())
),
DfExpr::Column(c) => c.name.clone(),
DfExpr::Literal(sv) => {
let v: Value = Value::try_from(sv.clone()).unwrap();
if v.data_type().is_string() {
format!("'{}'", sv)
} else {
format!("{}", sv)
}
}
_ => unimplemented!("not implemented for {:?}", expr),
}
}

View File

@@ -1,13 +1,16 @@
mod columns;
mod range;
pub(crate) mod range;
use std::fmt::Debug;
use std::sync::Arc;
pub use datafusion_expr::Operator;
use datatypes::prelude::Value;
use store_api::storage::RegionId;
pub trait PartitionRule {
pub(crate) type PartitionRuleRef<E> = Arc<dyn PartitionRule<Error = E>>;
pub trait PartitionRule: Sync + Send {
type Error: Debug;
fn partition_columns(&self) -> Vec<String>;
@@ -36,6 +39,14 @@ pub struct PartitionExpr {
}
impl PartitionExpr {
pub(crate) fn new(column: impl Into<String>, op: Operator, value: Value) -> Self {
Self {
column: column.into(),
op,
value,
}
}
pub fn value(&self) -> &Value {
&self.value
}

View File

@@ -67,6 +67,19 @@ impl RangeColumnsPartitionRule {
value_lists: Vec<Vec<PartitionBound>>,
regions: Vec<RegionId>,
) -> Self {
// An example range columns partition rule to calculate the first column bounds and regions:
// SQL:
// PARTITION p1 VALUES LESS THAN (10, 'c'),
// PARTITION p2 VALUES LESS THAN (20, 'h'),
// PARTITION p3 VALUES LESS THAN (20, 'm'),
// PARTITION p4 VALUES LESS THAN (50, 'p'),
// PARTITION p5 VALUES LESS THAN (MAXVALUE, 'x'),
// PARTITION p6 VALUES LESS THAN (MAXVALUE, MAXVALUE),
// first column bounds:
// [10, 20, 50, MAXVALUE]
// first column regions:
// [[1], [2, 3], [4], [5, 6]]
let first_column_bounds = value_lists
.iter()
.map(|x| &x[0])
@@ -136,16 +149,6 @@ impl PartitionRule for RangeColumnsPartitionRule {
// "unwrap" is safe because we have checked that "self.column_list" contains all columns in "exprs"
.unwrap();
// an example of bounds and regions:
// SQL:
// PARTITION p1 VALUES LESS THAN (10, 'c'),
// PARTITION p2 VALUES LESS THAN (20, 'h'),
// PARTITION p3 VALUES LESS THAN (20, 'm'),
// PARTITION p4 VALUES LESS THAN (50, 'p'),
// PARTITION p5 VALUES LESS THAN (MAXVALUE, 'x'),
// PARTITION p6 VALUES LESS THAN (MAXVALUE, MAXVALUE),
// bounds: [10, 20, 50, MAXVALUE]
// regions: [[1], [2, 3], [4], [5, 6]]
let regions = &self.first_column_regions;
match self
.first_column_bounds

View File

@@ -41,7 +41,7 @@ use crate::partitioning::{Operator, PartitionExpr, PartitionRule, RegionId};
///
// TODO(LFC): Further clarify "partition" and "region".
// Could be creating an extra layer between partition and region.
struct RangePartitionRule {
pub(crate) struct RangePartitionRule {
column_name: String,
// Does not store the last "MAXVALUE" bound; because in this way our binary search in finding
// partitions are easier (besides, it's hard to represent "MAXVALUE" in our `Value`).
@@ -51,6 +51,20 @@ struct RangePartitionRule {
}
impl RangePartitionRule {
// FIXME(LFC): no allow, for clippy temporarily
#[allow(dead_code)]
pub(crate) fn new(
column_name: impl Into<String>,
bounds: Vec<Value>,
regions: Vec<RegionId>,
) -> Self {
Self {
column_name: column_name.into(),
bounds,
regions,
}
}
fn column_name(&self) -> &String {
&self.column_name
}
@@ -72,6 +86,9 @@ impl PartitionRule for RangePartitionRule {
}
fn find_regions(&self, exprs: &[PartitionExpr]) -> Result<Vec<RegionId>, Self::Error> {
if exprs.is_empty() {
return Ok(self.regions.clone());
}
debug_assert_eq!(
exprs.len(),
1,

View File

@@ -8,23 +8,21 @@ use snafu::OptionExt;
use store_api::storage::RegionId;
use table::requests::InsertRequest;
use crate::error::Error;
use crate::error::FindPartitionColumnSnafu;
use crate::error::FindRegionSnafu;
use crate::error::InvalidInsertRequestSnafu;
use crate::error::Result;
use crate::partitioning::PartitionRule;
use crate::partitioning::PartitionRuleRef;
pub type DistInsertRequest = HashMap<RegionId, InsertRequest>;
pub struct WriteSpliter<'a, P> {
partition_rule: &'a P,
pub struct WriteSpliter {
partition_rule: PartitionRuleRef<Error>,
}
impl<'a, P> WriteSpliter<'a, P>
where
P: PartitionRule,
{
pub fn with_patition_rule(rule: &'a P) -> Self {
impl WriteSpliter {
pub fn with_patition_rule(rule: PartitionRuleRef<Error>) -> Self {
Self {
partition_rule: rule,
}
@@ -156,7 +154,7 @@ fn partition_insert_request(
#[cfg(test)]
mod tests {
use std::{collections::HashMap, result::Result};
use std::{collections::HashMap, result::Result, sync::Arc};
use datatypes::{
data_type::ConcreteDataType,
@@ -167,10 +165,13 @@ mod tests {
use table::requests::InsertRequest;
use super::{
check_req, find_partitioning_values, partition_insert_request, partition_values,
PartitionRule, RegionId, WriteSpliter,
check_req, find_partitioning_values, partition_insert_request, partition_values, RegionId,
WriteSpliter,
};
use crate::{
error::Error,
partitioning::{PartitionExpr, PartitionRule, PartitionRuleRef},
};
use crate::partitioning::PartitionExpr;
#[test]
fn test_insert_req_check() {
@@ -186,7 +187,8 @@ mod tests {
#[test]
fn test_writer_spliter() {
let insert = mock_insert_request();
let spliter = WriteSpliter::with_patition_rule(&MockPartitionRule);
let rule = Arc::new(MockPartitionRule) as PartitionRuleRef<Error>;
let spliter = WriteSpliter::with_patition_rule(rule);
let ret = spliter.split(insert).unwrap();
assert_eq!(2, ret.len());
@@ -406,7 +408,7 @@ mod tests {
// PARTITION r1 VALUES IN(2, 3),
// );
impl PartitionRule for MockPartitionRule {
type Error = String;
type Error = Error;
fn partition_columns(&self) -> Vec<String> {
vec!["id".to_string()]

622
src/frontend/src/table.rs Normal file
View File

@@ -0,0 +1,622 @@
mod insert;
use std::any::Any;
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
use async_trait::async_trait;
use common_query::error::Result as QueryResult;
use common_query::logical_plan::Expr;
use common_query::physical_plan::{PhysicalPlan, PhysicalPlanRef};
use common_recordbatch::{RecordBatches, SendableRecordBatchStream};
use datafusion::execution::runtime_env::RuntimeEnv;
use datafusion::logical_plan::Expr as DfExpr;
use datafusion::physical_plan::Partitioning;
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use snafu::prelude::*;
use store_api::storage::RegionId;
use table::error::Error as TableError;
use table::metadata::{FilterPushDownType, TableInfoRef};
use table::requests::InsertRequest;
use table::Table;
use tokio::sync::RwLock;
use crate::error::{self, Error, Result};
use crate::mock::{DatanodeId, DatanodeInstance, TableScanPlan};
use crate::partitioning::{Operator, PartitionExpr, PartitionRuleRef};
use crate::spliter::WriteSpliter;
struct DistTable {
table_name: String,
schema: SchemaRef,
partition_rule: PartitionRuleRef<Error>,
region_dist_map: HashMap<RegionId, DatanodeId>,
datanode_instances: HashMap<DatanodeId, DatanodeInstance>,
}
#[async_trait]
impl Table for DistTable {
fn as_any(&self) -> &dyn Any {
self
}
fn schema(&self) -> SchemaRef {
self.schema.clone()
}
fn table_info(&self) -> TableInfoRef {
unimplemented!()
}
async fn insert(&self, request: InsertRequest) -> table::Result<usize> {
let spliter = WriteSpliter::with_patition_rule(self.partition_rule.clone());
let inserts = spliter.split(request).map_err(TableError::new)?;
let result = match self.dist_insert(inserts).await.map_err(TableError::new)? {
client::ObjectResult::Select(_) => unreachable!(),
client::ObjectResult::Mutate(result) => result,
};
Ok(result.success as usize)
}
async fn scan(
&self,
projection: &Option<Vec<usize>>,
filters: &[Expr],
limit: Option<usize>,
) -> table::Result<PhysicalPlanRef> {
let regions = self.find_regions(filters).map_err(TableError::new)?;
let datanodes = self.find_datanodes(regions).map_err(TableError::new)?;
let partition_execs = datanodes
.iter()
.map(|(datanode, _regions)| {
let datanode_instance = self
.datanode_instances
.get(datanode)
.context(error::DatanodeInstanceSnafu {
datanode: *datanode,
})?
.clone();
// TODO(LFC): Pass in "regions" when Datanode supports multi regions for a table.
Ok(PartitionExec {
table_name: self.table_name.clone(),
datanode_instance,
projection: projection.clone(),
filters: filters.to_vec(),
limit,
batches: Arc::new(RwLock::new(None)),
})
})
.collect::<Result<Vec<PartitionExec>>>()
.map_err(TableError::new)?;
let dist_scan = DistTableScan {
schema: project_schema(self.schema(), projection),
partition_execs,
};
Ok(Arc::new(dist_scan))
}
fn supports_filter_pushdown(&self, _filter: &Expr) -> table::Result<FilterPushDownType> {
Ok(FilterPushDownType::Inexact)
}
}
impl DistTable {
// TODO(LFC): Finding regions now seems less efficient, should be further looked into.
fn find_regions(&self, filters: &[Expr]) -> Result<Vec<RegionId>> {
let regions = if let Some((first, rest)) = filters.split_first() {
let mut target = self.find_regions0(first)?;
for filter in rest {
let regions = self.find_regions0(filter)?;
// When all filters are provided as a collection, it often implicitly states that
// "all filters must be satisfied". So we join all the results here.
target.retain(|x| regions.contains(x));
// Failed fast, empty collection join any is empty.
if target.is_empty() {
break;
}
}
target.into_iter().collect::<Vec<RegionId>>()
} else {
self.partition_rule.find_regions(&[])?
};
ensure!(
!regions.is_empty(),
error::FindRegionsSnafu {
filters: filters.to_vec()
}
);
Ok(regions)
}
// TODO(LFC): Support other types of filter expr:
// - BETWEEN and IN (maybe more)
// - expr with arithmetic like "a + 1 < 10" (should have been optimized in logic plan?)
// - not comparison or neither "AND" nor "OR" operations, for example, "a LIKE x"
fn find_regions0(&self, filter: &Expr) -> Result<HashSet<RegionId>> {
let expr = filter.df_expr();
match expr {
DfExpr::BinaryExpr { left, op, right } if is_compare_op(op) => {
let column_op_value = match (left.as_ref(), right.as_ref()) {
(DfExpr::Column(c), DfExpr::Literal(v)) => Some((&c.name, *op, v)),
(DfExpr::Literal(v), DfExpr::Column(c)) => {
Some((&c.name, reverse_operator(op), v))
}
_ => None,
};
if let Some((column, op, sv)) = column_op_value {
let value = sv
.clone()
.try_into()
.with_context(|_| error::ConvertScalarValueSnafu { value: sv.clone() })?;
return Ok(self
.partition_rule
.find_regions(&[PartitionExpr::new(column, op, value)])?
.into_iter()
.collect::<HashSet<RegionId>>());
}
}
DfExpr::BinaryExpr { left, op, right }
if matches!(op, Operator::And | Operator::Or) =>
{
let left_regions = self.find_regions0(&(*left.clone()).into())?;
let right_regions = self.find_regions0(&(*right.clone()).into())?;
let regions = match op {
Operator::And => left_regions
.intersection(&right_regions)
.cloned()
.collect::<HashSet<RegionId>>(),
Operator::Or => left_regions
.union(&right_regions)
.cloned()
.collect::<HashSet<RegionId>>(),
_ => unreachable!(),
};
return Ok(regions);
}
_ => (),
}
// Returns all regions for not supported partition expr as a safety hatch.
Ok(self
.partition_rule
.find_regions(&[])?
.into_iter()
.collect::<HashSet<RegionId>>())
}
fn find_datanodes(&self, regions: Vec<RegionId>) -> Result<HashMap<DatanodeId, Vec<RegionId>>> {
let mut datanodes = HashMap::new();
for region in regions.iter() {
let datanode = *self
.region_dist_map
.get(region)
.context(error::FindDatanodeSnafu { region: *region })?;
datanodes
.entry(datanode)
.or_insert_with(Vec::new)
.push(*region);
}
Ok(datanodes)
}
}
fn project_schema(table_schema: SchemaRef, projection: &Option<Vec<usize>>) -> SchemaRef {
if let Some(projection) = &projection {
let columns = table_schema.column_schemas();
let projected = projection
.iter()
.map(|x| columns[*x].clone())
.collect::<Vec<ColumnSchema>>();
Arc::new(Schema::new(projected))
} else {
table_schema
}
}
fn is_compare_op(op: &Operator) -> bool {
matches!(
*op,
Operator::Eq
| Operator::NotEq
| Operator::Lt
| Operator::LtEq
| Operator::Gt
| Operator::GtEq
)
}
fn reverse_operator(op: &Operator) -> Operator {
match *op {
Operator::Lt => Operator::Gt,
Operator::Gt => Operator::Lt,
Operator::LtEq => Operator::GtEq,
Operator::GtEq => Operator::LtEq,
_ => *op,
}
}
#[derive(Debug)]
struct DistTableScan {
schema: SchemaRef,
partition_execs: Vec<PartitionExec>,
}
#[async_trait]
impl PhysicalPlan for DistTableScan {
fn as_any(&self) -> &dyn Any {
self
}
fn schema(&self) -> SchemaRef {
self.schema.clone()
}
fn output_partitioning(&self) -> Partitioning {
Partitioning::UnknownPartitioning(self.partition_execs.len())
}
fn children(&self) -> Vec<PhysicalPlanRef> {
vec![]
}
fn with_new_children(&self, _children: Vec<PhysicalPlanRef>) -> QueryResult<PhysicalPlanRef> {
unimplemented!()
}
async fn execute(
&self,
partition: usize,
_runtime: Arc<RuntimeEnv>,
) -> QueryResult<SendableRecordBatchStream> {
let exec = &self.partition_execs[partition];
exec.maybe_init().await;
Ok(exec.as_stream().await)
}
}
#[derive(Debug)]
struct PartitionExec {
table_name: String,
datanode_instance: DatanodeInstance,
projection: Option<Vec<usize>>,
filters: Vec<Expr>,
limit: Option<usize>,
batches: Arc<RwLock<Option<RecordBatches>>>,
}
impl PartitionExec {
async fn maybe_init(&self) {
if self.batches.read().await.is_some() {
return;
}
let mut batches = self.batches.write().await;
if batches.is_some() {
return;
}
let plan = TableScanPlan {
table_name: self.table_name.clone(),
projection: self.projection.clone(),
filters: self.filters.clone(),
limit: self.limit,
};
let result = self.datanode_instance.grpc_table_scan(plan).await;
let _ = batches.insert(result);
}
async fn as_stream(&self) -> SendableRecordBatchStream {
let batches = self.batches.read().await;
batches
.as_ref()
.expect("should have been initialized in \"maybe_init\"")
.as_stream()
}
}
// FIXME(LFC): no allow, for clippy temporarily
#[allow(clippy::print_stdout)]
#[cfg(test)]
mod test {
use catalog::RegisterTableRequest;
use client::Database;
use common_recordbatch::{util, RecordBatch};
use datafusion::arrow_print;
use datafusion_common::record_batch::RecordBatch as DfRecordBatch;
use datafusion_expr::expr_fn::col;
use datafusion_expr::expr_fn::{and, binary_expr, or};
use datafusion_expr::lit;
use datanode::datanode::{DatanodeOptions, ObjectStoreConfig};
use datanode::instance::Instance;
use datatypes::prelude::{ConcreteDataType, VectorRef};
use datatypes::schema::{ColumnSchema, Schema};
use datatypes::vectors::{Int32Vector, UInt32Vector};
use table::test_util::MemTable;
use table::TableRef;
use tempdir::TempDir;
use super::*;
use crate::partitioning::range::RangePartitionRule;
#[tokio::test(flavor = "multi_thread")]
async fn test_dist_table_scan() {
let table = Arc::new(new_dist_table().await);
// should scan all regions
// select * from numbers
let projection = None;
let filters = vec![];
exec_table_scan(table.clone(), projection, filters, None).await;
println!();
// should scan only region 1
// select a, row_id from numbers where a < 10
let projection = Some(vec![0, 1]);
let filters = vec![binary_expr(col("a"), Operator::Lt, lit(10)).into()];
exec_table_scan(table.clone(), projection, filters, None).await;
println!();
// should scan region 1 and 2
// select a, row_id from numbers where a < 15
let projection = Some(vec![0, 1]);
let filters = vec![binary_expr(col("a"), Operator::Lt, lit(15)).into()];
exec_table_scan(table.clone(), projection, filters, None).await;
println!();
// should scan region 2 and 3
// select a, row_id from numbers where a < 40 and a >= 10
let projection = Some(vec![0, 1]);
let filters = vec![and(
binary_expr(col("a"), Operator::Lt, lit(40)),
binary_expr(col("a"), Operator::GtEq, lit(10)),
)
.into()];
exec_table_scan(table.clone(), projection, filters, None).await;
println!();
// should scan all regions
// select a, row_id from numbers where a < 1000 and row_id == 1
let projection = Some(vec![0, 1]);
let filters = vec![and(
binary_expr(col("a"), Operator::Lt, lit(1000)),
binary_expr(col("row_id"), Operator::Eq, lit(1)),
)
.into()];
exec_table_scan(table.clone(), projection, filters, None).await;
}
async fn exec_table_scan(
table: TableRef,
projection: Option<Vec<usize>>,
filters: Vec<Expr>,
limit: Option<usize>,
) {
let table_scan = table
.scan(&projection, filters.as_slice(), limit)
.await
.unwrap();
for partition in 0..table_scan.output_partitioning().partition_count() {
let result = table_scan
.execute(partition, Arc::new(RuntimeEnv::default()))
.await
.unwrap();
let recordbatches = util::collect(result).await.unwrap();
let df_recordbatch = recordbatches
.into_iter()
.map(|r| r.df_recordbatch)
.collect::<Vec<DfRecordBatch>>();
println!("DataFusion partition {}:", partition);
let pretty_print = arrow_print::write(&df_recordbatch);
let pretty_print = pretty_print.lines().collect::<Vec<&str>>();
pretty_print.iter().for_each(|x| println!("{}", x));
}
}
async fn new_dist_table() -> DistTable {
let schema = Arc::new(Schema::new(vec![
ColumnSchema::new("a", ConcreteDataType::int32_datatype(), true),
ColumnSchema::new("row_id", ConcreteDataType::uint32_datatype(), true),
]));
// PARTITION BY RANGE (a) (
// PARTITION r1 VALUES LESS THAN (10),
// PARTITION r2 VALUES LESS THAN (20),
// PARTITION r3 VALUES LESS THAN (50),
// PARTITION r4 VALUES LESS THAN (MAXVALUE),
// )
let partition_rule = RangePartitionRule::new(
"a",
vec![10_i32.into(), 20_i32.into(), 50_i32.into()],
vec![1_u64, 2, 3, 4],
);
let table1 = new_memtable(schema.clone(), (0..5).collect::<Vec<i32>>());
let table2 = new_memtable(schema.clone(), (10..15).collect::<Vec<i32>>());
let table3 = new_memtable(schema.clone(), (30..35).collect::<Vec<i32>>());
let table4 = new_memtable(schema.clone(), (100..105).collect::<Vec<i32>>());
let instance1 = create_datanode_instance(1, table1).await;
let instance2 = create_datanode_instance(2, table2).await;
let instance3 = create_datanode_instance(3, table3).await;
let instance4 = create_datanode_instance(4, table4).await;
let datanode_instances = HashMap::from([
(instance1.datanode_id, instance1),
(instance2.datanode_id, instance2),
(instance3.datanode_id, instance3),
(instance4.datanode_id, instance4),
]);
DistTable {
table_name: "dist_numbers".to_string(),
schema,
partition_rule: Arc::new(partition_rule),
region_dist_map: HashMap::from([(1_u64, 1), (2_u64, 2), (3_u64, 3), (4_u64, 4)]),
datanode_instances,
}
}
fn new_memtable(schema: SchemaRef, data: Vec<i32>) -> MemTable {
let rows = data.len() as u32;
let columns: Vec<VectorRef> = vec![
// column "a"
Arc::new(Int32Vector::from_slice(data)),
// column "row_id"
Arc::new(UInt32Vector::from_slice((1..=rows).collect::<Vec<u32>>())),
];
let recordbatch = RecordBatch::new(schema, columns).unwrap();
MemTable::new("dist_numbers", recordbatch)
}
async fn create_datanode_instance(
datanode_id: DatanodeId,
table: MemTable,
) -> DatanodeInstance {
let wal_tmp_dir = TempDir::new_in("/tmp", "gt_wal_dist_table_test").unwrap();
let data_tmp_dir = TempDir::new_in("/tmp", "gt_data_dist_table_test").unwrap();
let opts = DatanodeOptions {
wal_dir: wal_tmp_dir.path().to_str().unwrap().to_string(),
storage: ObjectStoreConfig::File {
data_dir: data_tmp_dir.path().to_str().unwrap().to_string(),
},
..Default::default()
};
let instance = Arc::new(Instance::with_mock_meta_client(&opts).await.unwrap());
instance.start().await.unwrap();
let catalog_manager = instance.catalog_manager().clone();
catalog_manager
.register_table(RegisterTableRequest {
catalog: "greptime".to_string(),
schema: "public".to_string(),
table_name: table.table_name().to_string(),
table_id: 1234,
table: Arc::new(table),
})
.await
.unwrap();
let client = crate::tests::create_datanode_client(instance).await;
DatanodeInstance::new(
datanode_id,
catalog_manager,
Database::new("greptime", client),
)
}
#[tokio::test(flavor = "multi_thread")]
async fn test_find_regions() {
let table = new_dist_table().await;
let test = |filters: Vec<Expr>, expect_regions: Vec<u64>| {
let mut regions = table.find_regions(filters.as_slice()).unwrap();
regions.sort();
assert_eq!(regions, expect_regions);
};
// test simple filter
test(
vec![binary_expr(col("a"), Operator::Lt, lit(10)).into()], // a < 10
vec![1],
);
test(
vec![binary_expr(col("a"), Operator::LtEq, lit(10)).into()], // a <= 10
vec![1, 2],
);
test(
vec![binary_expr(lit(20), Operator::Gt, col("a")).into()], // 20 > a
vec![1, 2],
);
test(
vec![binary_expr(lit(20), Operator::GtEq, col("a")).into()], // 20 >= a
vec![1, 2, 3],
);
test(
vec![binary_expr(lit(45), Operator::Eq, col("a")).into()], // 45 == a
vec![3],
);
test(
vec![binary_expr(col("a"), Operator::NotEq, lit(45)).into()], // a != 45
vec![1, 2, 3, 4],
);
test(
vec![binary_expr(col("a"), Operator::Gt, lit(50)).into()], // a > 50
vec![4],
);
// test multiple filters
test(
vec![
binary_expr(col("a"), Operator::Gt, lit(10)).into(),
binary_expr(col("a"), Operator::Gt, lit(50)).into(),
], // [a > 10, a > 50]
vec![4],
);
// test finding all regions when provided with not supported filters or not partition column
test(
vec![binary_expr(col("row_id"), Operator::LtEq, lit(123)).into()], // row_id <= 123
vec![1, 2, 3, 4],
);
test(
vec![binary_expr(col("b"), Operator::Like, lit("foo%")).into()], // b LIKE 'foo%'
vec![1, 2, 3, 4],
);
test(
vec![binary_expr(col("c"), Operator::Gt, lit(123)).into()], // c > 789
vec![1, 2, 3, 4],
);
// test complex "AND" or "OR" filters
test(
vec![and(
binary_expr(col("row_id"), Operator::Lt, lit(1)),
or(
binary_expr(col("row_id"), Operator::Lt, lit(1)),
binary_expr(col("a"), Operator::Lt, lit(1)),
),
)
.into()], // row_id < 1 OR (row_id < 1 AND a > 1)
vec![1, 2, 3, 4],
);
test(
vec![or(
binary_expr(col("a"), Operator::Lt, lit(20)),
binary_expr(col("a"), Operator::GtEq, lit(20)),
)
.into()], // a < 20 OR a >= 20
vec![1, 2, 3, 4],
);
test(
vec![and(
binary_expr(col("a"), Operator::Lt, lit(20)),
binary_expr(col("a"), Operator::Lt, lit(50)),
)
.into()], // a < 20 AND a < 50
vec![1, 2],
);
// test failed to find regions by contradictory filters
let regions = table.find_regions(
vec![and(
binary_expr(col("a"), Operator::Lt, lit(20)),
binary_expr(col("a"), Operator::GtEq, lit(20)),
)
.into()]
.as_slice(),
); // a < 20 AND a >= 20
assert!(matches!(
regions.unwrap_err(),
error::Error::FindRegions { .. }
));
}
}

View File

@@ -0,0 +1,202 @@
use std::collections::HashMap;
use api::helper::ColumnDataTypeWrapper;
use api::v1::codec;
use api::v1::insert_expr;
use api::v1::insert_expr::Expr;
use api::v1::Column;
use api::v1::InsertExpr;
use api::v1::MutateResult;
use client::ObjectResult;
use snafu::ensure;
use snafu::OptionExt;
use snafu::ResultExt;
use store_api::storage::RegionId;
use table::requests::InsertRequest;
use super::DistTable;
use crate::error;
use crate::error::Result;
impl DistTable {
pub async fn dist_insert(
&self,
inserts: HashMap<RegionId, InsertRequest>,
) -> Result<ObjectResult> {
let mut joins = Vec::with_capacity(inserts.len());
for (region_id, insert) in inserts {
let db = self
.region_dist_map
.get(&region_id)
.context(error::FindDatanodeSnafu { region: region_id })?;
let instance = self
.datanode_instances
.get(db)
.context(error::DatanodeInstanceSnafu { datanode: *db })?;
let instance = instance.clone();
// TODO(fys): a separate runtime should be used here.
let join = tokio::spawn(async move {
instance
.grpc_insert(to_insert_expr(region_id, insert)?)
.await
.context(error::RequestDatanodeSnafu)
});
joins.push(join);
}
let mut success = 0;
let mut failure = 0;
for join in joins {
let object_result = join.await.context(error::JoinTaskSnafu)??;
let result = match object_result {
client::ObjectResult::Select(_) => unreachable!(),
client::ObjectResult::Mutate(result) => result,
};
success += result.success;
failure += result.failure;
}
Ok(ObjectResult::Mutate(MutateResult { success, failure }))
}
}
fn to_insert_expr(region_id: RegionId, insert: InsertRequest) -> Result<InsertExpr> {
let mut row_count = None;
let columns = insert
.columns_values
.into_iter()
.map(|(column_name, vector)| {
match row_count {
Some(rows) => ensure!(
rows == vector.len(),
error::InvalidInsertRequestSnafu {
reason: "The row count of columns is not the same."
}
),
None => row_count = Some(vector.len()),
}
let datatype: ColumnDataTypeWrapper = vector
.data_type()
.try_into()
.context(error::ColumnDataTypeSnafu)?;
let mut column = Column {
column_name,
datatype: datatype.datatype() as i32,
..Default::default()
};
column.push_vals(0, vector);
Ok(column)
})
.collect::<Result<Vec<_>>>()?;
let insert_batch = codec::InsertBatch {
columns,
row_count: row_count.map(|rows| rows as u32).unwrap_or(0),
};
let mut options = HashMap::with_capacity(1);
options.insert(
// TODO(fys): Temporarily hard code here
"region_id".to_string(),
codec::RegionId { id: region_id }.into(),
);
Ok(InsertExpr {
table_name: insert.table_name,
options,
expr: Some(Expr::Values(insert_expr::Values {
values: vec![insert_batch.into()],
})),
})
}
#[cfg(test)]
mod tests {
use std::{collections::HashMap, ops::Deref};
use api::v1::{
codec::{self, InsertBatch},
insert_expr::Expr,
ColumnDataType, InsertExpr,
};
use datatypes::{prelude::ConcreteDataType, types::StringType, vectors::VectorBuilder};
use table::requests::InsertRequest;
use super::to_insert_expr;
#[test]
fn test_to_insert_expr() {
let insert_request = mock_insert_request();
let insert_expr = to_insert_expr(12, insert_request).unwrap();
verify_insert_expr(insert_expr);
}
fn mock_insert_request() -> InsertRequest {
let mut columns_values = HashMap::with_capacity(4);
let mut builder = VectorBuilder::new(ConcreteDataType::String(StringType));
builder.push(&"host1".into());
builder.push_null();
builder.push(&"host3".into());
columns_values.insert("host".to_string(), builder.finish());
let mut builder = VectorBuilder::new(ConcreteDataType::int16_datatype());
builder.push(&1_i16.into());
builder.push(&2_i16.into());
builder.push(&3_i16.into());
columns_values.insert("id".to_string(), builder.finish());
InsertRequest {
table_name: "demo".to_string(),
columns_values,
}
}
fn verify_insert_expr(insert_expr: InsertExpr) {
let table_name = insert_expr.table_name;
assert_eq!("demo", table_name);
let expr = insert_expr.expr.as_ref().unwrap();
let vals = match expr {
Expr::Values(vals) => vals,
Expr::Sql(_) => unreachable!(),
};
let batch: &[u8] = vals.values[0].as_ref();
let vals: InsertBatch = batch.try_into().unwrap();
for column in vals.columns {
let name = column.column_name;
if name == "id" {
assert_eq!(0, column.null_mask[0]);
assert_eq!(ColumnDataType::Int16 as i32, column.datatype);
assert_eq!(vec![1, 2, 3], column.values.as_ref().unwrap().i16_values);
}
if name == "host" {
assert_eq!(2, column.null_mask[0]);
assert_eq!(ColumnDataType::String as i32, column.datatype);
assert_eq!(
vec!["host1", "host3"],
column.values.as_ref().unwrap().string_values
);
}
}
let bytes = insert_expr.options.get("region_id").unwrap();
let region_id: codec::RegionId = bytes.deref().try_into().unwrap();
assert_eq!(12, region_id.id);
}
}

View File

@@ -20,7 +20,11 @@ async fn create_datanode_instance() -> Arc<DatanodeInstance> {
pub(crate) async fn create_frontend_instance() -> Arc<Instance> {
let datanode_instance = create_datanode_instance().await;
let client = create_datanode_client(datanode_instance).await;
Arc::new(Instance::with_client(client))
}
pub(crate) async fn create_datanode_client(datanode_instance: Arc<DatanodeInstance>) -> Client {
let (client, server) = tokio::io::duplex(1024);
let runtime = Arc::new(
@@ -67,6 +71,5 @@ pub(crate) async fn create_frontend_instance() -> Arc<Instance> {
}),
)
.unwrap();
let client = Client::with_manager_and_urls(channel_manager, vec![addr]);
Arc::new(Instance::with_client(client))
Client::with_manager_and_urls(channel_manager, vec![addr])
}

View File

@@ -9,8 +9,8 @@ use byteorder::ByteOrder;
use byteorder::LittleEndian;
use bytes::{Bytes, BytesMut};
use common_error::ext::BoxedError;
use common_telemetry::debug;
use common_telemetry::logging::{error, info};
use common_telemetry::{debug, trace};
use futures::Stream;
use futures_util::StreamExt;
use snafu::ResultExt;
@@ -380,7 +380,7 @@ impl LogFile {
}
}
}
debug!("Yield batch size: {}", batch.len());
trace!("Yield batch size: {}", batch.len());
yield Ok(batch);
}
});

View File

@@ -11,8 +11,8 @@ use router::Client as RouterClient;
use snafu::OptionExt;
use store::Client as StoreClient;
use self::heartbeat::HeartbeatSender;
use self::heartbeat::HeartbeatStream;
pub use self::heartbeat::HeartbeatSender;
pub use self::heartbeat::HeartbeatStream;
use crate::error;
use crate::error::Result;
use crate::rpc::BatchPutRequest;
@@ -92,13 +92,13 @@ impl MetaClientBuilder {
let mgr = client.channel_manager.clone();
if self.enable_heartbeat {
client.heartbeat_client = Some(HeartbeatClient::new(self.id, mgr.clone()));
client.heartbeat = Some(HeartbeatClient::new(self.id, mgr.clone()));
}
if self.enable_router {
client.router_client = Some(RouterClient::new(self.id, mgr.clone()));
client.router = Some(RouterClient::new(self.id, mgr.clone()));
}
if self.enable_store {
client.store_client = Some(StoreClient::new(self.id, mgr));
client.store = Some(StoreClient::new(self.id, mgr));
}
client
@@ -109,9 +109,9 @@ impl MetaClientBuilder {
pub struct MetaClient {
id: Id,
channel_manager: ChannelManager,
heartbeat_client: Option<HeartbeatClient>,
router_client: Option<RouterClient>,
store_client: Option<StoreClient>,
heartbeat: Option<HeartbeatClient>,
router: Option<RouterClient>,
store: Option<StoreClient>,
}
impl MetaClient {
@@ -137,52 +137,46 @@ impl MetaClient {
{
info!("MetaClient channel config: {:?}", self.channel_config());
if let Some(heartbeat_client) = &mut self.heartbeat_client {
heartbeat_client.start(urls.clone()).await?;
if let Some(client) = &mut self.heartbeat {
client.start(urls.clone()).await?;
info!("Heartbeat client started");
}
if let Some(router_client) = &mut self.router_client {
router_client.start(urls.clone()).await?;
if let Some(client) = &mut self.router {
client.start(urls.clone()).await?;
info!("Router client started");
}
if let Some(store_client) = &mut self.store_client {
store_client.start(urls).await?;
if let Some(client) = &mut self.store {
client.start(urls).await?;
info!("Store client started");
}
Ok(())
}
/// Ask the leader address of `metasrv`, and the heartbeat component
/// needs to create a bidirectional streaming to the leader.
pub async fn ask_leader(&self) -> Result<()> {
self.heartbeat_client()
.context(error::NotStartedSnafu {
name: "heartbeat_client",
})?
.ask_leader()
.await
}
pub async fn refresh_members(&mut self) {
todo!()
self.heartbeat_client()?.ask_leader().await
}
/// Returns a heartbeat bidirectional streaming: (sender, recever), the
/// other end is the leader of `metasrv`.
///
/// The `datanode` needs to use the sender to continuously send heartbeat
/// packets (some self-state data), and the receiver can receive a response
/// from "metasrv" (which may contain some scheduling instructions).
pub async fn heartbeat(&self) -> Result<(HeartbeatSender, HeartbeatStream)> {
self.heartbeat_client()
.context(error::NotStartedSnafu {
name: "heartbeat_client",
})?
.heartbeat()
.await
self.heartbeat_client()?.heartbeat().await
}
/// Provides routing information for distributed create table requests.
///
/// When a distributed create table request is received, this method returns
/// a list of `datanode` addresses that are generated based on the partition
/// information contained in the request and using some intelligent policies,
/// such as load-based.
pub async fn create_route(&self, req: CreateRequest) -> Result<RouteResponse> {
self.router_client()
.context(error::NotStartedSnafu {
name: "route_client",
})?
.create(req.into())
.await?
.try_into()
self.router_client()?.create(req.into()).await?.try_into()
}
/// Fetch routing information for tables. The smallest unit is the complete
@@ -205,46 +199,22 @@ impl MetaClient {
/// ```
///
pub async fn route(&self, req: RouteRequest) -> Result<RouteResponse> {
self.router_client()
.context(error::NotStartedSnafu {
name: "route_client",
})?
.route(req.into())
.await?
.try_into()
self.router_client()?.route(req.into()).await?.try_into()
}
/// Range gets the keys in the range from the key-value store.
pub async fn range(&self, req: RangeRequest) -> Result<RangeResponse> {
self.store_client()
.context(error::NotStartedSnafu {
name: "store_client",
})?
.range(req.into())
.await?
.try_into()
self.store_client()?.range(req.into()).await?.try_into()
}
/// Put puts the given key into the key-value store.
pub async fn put(&self, req: PutRequest) -> Result<PutResponse> {
self.store_client()
.context(error::NotStartedSnafu {
name: "store_client",
})?
.put(req.into())
.await?
.try_into()
self.store_client()?.put(req.into()).await?.try_into()
}
/// BatchPut atomically puts the given keys into the key-value store.
pub async fn batch_put(&self, req: BatchPutRequest) -> Result<BatchPutResponse> {
self.store_client()
.context(error::NotStartedSnafu {
name: "store_client",
})?
.batch_put(req.into())
.await?
.try_into()
self.store_client()?.batch_put(req.into()).await?.try_into()
}
/// CompareAndPut atomically puts the value to the given updated
@@ -253,10 +223,7 @@ impl MetaClient {
&self,
req: CompareAndPutRequest,
) -> Result<CompareAndPutResponse> {
self.store_client()
.context(error::NotStartedSnafu {
name: "store_client",
})?
self.store_client()?
.compare_and_put(req.into())
.await?
.try_into()
@@ -264,28 +231,31 @@ impl MetaClient {
/// DeleteRange deletes the given range from the key-value store.
pub async fn delete_range(&self, req: DeleteRangeRequest) -> Result<DeleteRangeResponse> {
self.store_client()
.context(error::NotStartedSnafu {
name: "store_client",
})?
self.store_client()?
.delete_range(req.into())
.await?
.try_into()
}
#[inline]
pub fn heartbeat_client(&self) -> Option<HeartbeatClient> {
self.heartbeat_client.clone()
pub fn heartbeat_client(&self) -> Result<HeartbeatClient> {
self.heartbeat.clone().context(error::NotStartedSnafu {
name: "heartbeat_client",
})
}
#[inline]
pub fn router_client(&self) -> Option<RouterClient> {
self.router_client.clone()
pub fn router_client(&self) -> Result<RouterClient> {
self.router.clone().context(error::NotStartedSnafu {
name: "store_client",
})
}
#[inline]
pub fn store_client(&self) -> Option<StoreClient> {
self.store_client.clone()
pub fn store_client(&self) -> Result<StoreClient> {
self.store.clone().context(error::NotStartedSnafu {
name: "store_client",
})
}
#[inline]
@@ -320,23 +290,23 @@ mod tests {
let urls = &["127.0.0.1:3001", "127.0.0.1:3002"];
let mut meta_client = MetaClientBuilder::new(0, 0).enable_heartbeat().build();
assert!(meta_client.heartbeat_client().is_some());
assert!(meta_client.router_client().is_none());
assert!(meta_client.store_client().is_none());
assert!(meta_client.heartbeat_client().is_ok());
assert!(meta_client.router_client().is_err());
assert!(meta_client.store_client().is_err());
meta_client.start(urls).await.unwrap();
assert!(meta_client.heartbeat_client().unwrap().is_started().await);
let mut meta_client = MetaClientBuilder::new(0, 0).enable_router().build();
assert!(meta_client.heartbeat_client().is_none());
assert!(meta_client.router_client().is_some());
assert!(meta_client.store_client().is_none());
assert!(meta_client.heartbeat_client().is_err());
assert!(meta_client.router_client().is_ok());
assert!(meta_client.store_client().is_err());
meta_client.start(urls).await.unwrap();
assert!(meta_client.router_client().unwrap().is_started().await);
let mut meta_client = MetaClientBuilder::new(0, 0).enable_store().build();
assert!(meta_client.heartbeat_client().is_none());
assert!(meta_client.router_client().is_none());
assert!(meta_client.store_client().is_some());
assert!(meta_client.heartbeat_client().is_err());
assert!(meta_client.router_client().is_err());
assert!(meta_client.store_client().is_ok());
meta_client.start(urls).await.unwrap();
assert!(meta_client.store_client().unwrap().is_started().await);
@@ -347,9 +317,9 @@ mod tests {
.build();
assert_eq!(1, meta_client.id().0);
assert_eq!(2, meta_client.id().1);
assert!(meta_client.heartbeat_client().is_some());
assert!(meta_client.router_client().is_some());
assert!(meta_client.store_client().is_some());
assert!(meta_client.heartbeat_client().is_ok());
assert!(meta_client.router_client().is_ok());
assert!(meta_client.store_client().is_ok());
meta_client.start(urls).await.unwrap();
assert!(meta_client.heartbeat_client().unwrap().is_started().await);
assert!(meta_client.router_client().unwrap().is_started().await);
@@ -648,23 +618,26 @@ mod tests {
let res = client.compare_and_put(req).await;
assert!(!res.unwrap().is_success());
// empty expect key is not allowed
// create if absent
let req = CompareAndPutRequest::new()
.with_key(b"key".to_vec())
.with_value(b"value".to_vec());
let res = client.compare_and_put(req).await;
let mut res = res.unwrap();
assert!(!res.is_success());
let mut kv = res.take_prev_kv().unwrap();
assert_eq!(b"key".to_vec(), kv.take_key());
assert!(kv.take_value().is_empty());
assert!(res.is_success());
assert!(res.take_prev_kv().is_none());
let req = PutRequest::new()
// compare and put fail
let req = CompareAndPutRequest::new()
.with_key(b"key".to_vec())
.with_value(b"value".to_vec());
let res = client.put(req).await;
assert!(res.is_ok());
.with_expect(b"not_eq".to_vec())
.with_value(b"value2".to_vec());
let res = client.compare_and_put(req).await;
let mut res = res.unwrap();
assert!(!res.is_success());
assert_eq!(b"value".to_vec(), res.take_prev_kv().unwrap().take_value());
// compare and put success
let req = CompareAndPutRequest::new()
.with_key(b"key".to_vec())
.with_expect(b"value".to_vec())

View File

@@ -29,7 +29,7 @@ pub struct HeartbeatSender {
impl HeartbeatSender {
#[inline]
const fn new(id: Id, sender: mpsc::Sender<HeartbeatRequest>) -> Self {
fn new(id: Id, sender: mpsc::Sender<HeartbeatRequest>) -> Self {
Self { id, sender }
}
@@ -58,7 +58,7 @@ pub struct HeartbeatStream {
impl HeartbeatStream {
#[inline]
const fn new(id: Id, stream: Streaming<HeartbeatResponse>) -> Self {
fn new(id: Id, stream: Streaming<HeartbeatResponse>) -> Self {
Self { id, stream }
}

View File

@@ -1,6 +1,6 @@
mod router;
mod store;
mod util;
pub mod util;
use api::v1::meta::KeyValue as PbKeyValue;
use api::v1::meta::Peer as PbPeer;

View File

@@ -143,14 +143,17 @@ impl KvStore for EtcdStore {
options,
} = req.try_into()?;
let txn = Txn::new()
.when(vec![Compare::value(
key.clone(),
CompareOp::Equal,
expect.clone(),
)])
.and_then(vec![TxnOp::put(key.clone(), value, options)])
.or_else(vec![TxnOp::get(key.clone(), None)]);
let put_op = vec![TxnOp::put(key.clone(), value, options)];
let get_op = vec![TxnOp::get(key.clone(), None)];
let mut txn = if expect.is_empty() {
// create if absent
// revision 0 means key was not exist
Txn::new().when(vec![Compare::create_revision(key, CompareOp::Equal, 0)])
} else {
// compare and put
Txn::new().when(vec![Compare::value(key, CompareOp::Equal, expect)])
};
txn = txn.and_then(put_op).or_else(get_op);
let txn_res = self
.client
@@ -158,6 +161,7 @@ impl KvStore for EtcdStore {
.txn(txn)
.await
.context(error::EtcdFailedSnafu)?;
let success = txn_res.succeeded();
let op_res = txn_res
.op_responses()
@@ -165,26 +169,26 @@ impl KvStore for EtcdStore {
.context(error::InvalidTxnResultSnafu {
err_msg: "empty response",
})?;
let prev_kv = if success {
Some(KeyValue { key, value: expect })
} else {
match op_res {
TxnOpResponse::Get(get_res) => {
if get_res.count() == 0 {
// do not exists
Some(KeyValue { key, value: vec![] })
} else {
ensure!(
get_res.count() == 1,
error::InvalidTxnResultSnafu {
err_msg: format!("expect 1 response, actual {}", get_res.count())
}
);
Some(KeyValue::from(KvPair::new(&get_res.kvs()[0])))
}
}
_ => unreachable!(), // never get here
let prev_kv = match op_res {
TxnOpResponse::Put(put_res) => {
put_res.prev_key().map(|kv| KeyValue::from(KvPair::new(kv)))
}
TxnOpResponse::Get(get_res) => {
if get_res.count() == 0 {
// do not exists
None
} else {
ensure!(
get_res.count() == 1,
error::InvalidTxnResultSnafu {
err_msg: format!("expect 1 response, actual {}", get_res.count())
}
);
Some(KeyValue::from(KvPair::new(&get_res.kvs()[0])))
}
}
_ => unreachable!(), // never get here
};
let header = Some(ResponseHeader::success(cluster_id));

View File

@@ -145,27 +145,16 @@ impl KvStore for MemStore {
} = req;
let mut memory = self.inner.write();
let (success, prev_kv) = if expect.is_empty() {
(
false,
Some(KeyValue {
key: key.clone(),
value: vec![],
}),
)
} else {
let prev_val = memory.get(&key);
let success = prev_val
.map(|v| expect.cmp(v) == Ordering::Equal)
.unwrap_or(false);
(
success,
prev_val.map(|v| KeyValue {
key: key.clone(),
value: v.clone(),
}),
)
};
let prev_val = memory.get(&key);
let success = prev_val
.map(|v| expect.cmp(v) == Ordering::Equal)
.unwrap_or(false | expect.is_empty());
let prev_kv = prev_val.map(|v| KeyValue {
key: key.clone(),
value: v.clone(),
});
if success {
memory.insert(key, value);

View File

@@ -48,6 +48,7 @@ where
Statement::Query(qb) => self.query_to_plan(qb),
Statement::ShowTables(_)
| Statement::ShowDatabases(_)
| Statement::ShowCreateTable(_)
| Statement::Create(_)
| Statement::Alter(_)
| Statement::Insert(_) => unreachable!(),

View File

@@ -270,31 +270,45 @@ pub(crate) mod greptime_builtin {
// P.S.: not extract to file because not-inlined proc macro attribute is *unstable*
use std::sync::Arc;
use common_function::scalars::math::PowFunction;
use common_function::scalars::{function::FunctionContext, Function};
use datafusion::arrow::compute::comparison::{gt_eq_scalar, lt_eq_scalar};
use datafusion::arrow::datatypes::DataType;
use datafusion::arrow::error::ArrowError;
use datafusion::arrow::scalar::{PrimitiveScalar, Scalar};
use datafusion::physical_plan::expressions;
use common_function::scalars::{
function::FunctionContext, math::PowFunction, Function, FunctionRef, FUNCTION_REGISTRY,
};
use datafusion::{
arrow::{
compute::comparison::{gt_eq_scalar, lt_eq_scalar},
datatypes::DataType,
error::ArrowError,
scalar::{PrimitiveScalar, Scalar},
},
physical_plan::expressions,
};
use datafusion_expr::ColumnarValue as DFColValue;
use datafusion_physical_expr::math_expressions;
use datatypes::arrow;
use datatypes::arrow::array::{ArrayRef, NullArray};
use datatypes::arrow::compute;
use datatypes::vectors::{ConstantVector, Float64Vector, Helper, Int64Vector};
use datatypes::{
arrow::{
self,
array::{ArrayRef, NullArray},
compute,
},
vectors::VectorRef,
};
use paste::paste;
use rustpython_vm::builtins::{PyFloat, PyFunction, PyInt, PyStr};
use rustpython_vm::function::{FuncArgs, KwArgs, OptionalArg};
use rustpython_vm::{AsObject, PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine};
use rustpython_vm::{
builtins::{PyFloat, PyFunction, PyInt, PyStr},
function::{FuncArgs, KwArgs, OptionalArg},
AsObject, PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine,
};
use crate::python::builtins::{
all_to_f64, eval_aggr_fn, from_df_err, try_into_columnar_value, try_into_py_obj,
type_cast_error,
};
use crate::python::utils::PyVectorRef;
use crate::python::utils::{is_instance, py_vec_obj_to_array};
use crate::python::PyVector;
use crate::python::{
utils::{is_instance, py_vec_obj_to_array, PyVectorRef},
vector::val_to_pyobj,
PyVector,
};
#[pyfunction]
fn vector(args: OptionalArg<PyObjectRef>, vm: &VirtualMachine) -> PyResult<PyVector> {
@@ -303,10 +317,135 @@ pub(crate) mod greptime_builtin {
// the main binding code, due to proc macro things, can't directly use a simpler macro
// because pyfunction is not a attr?
// ------
// GrepTime DB's own UDF&UDAF
// ------
fn eval_func(name: &str, v: &[PyVectorRef], vm: &VirtualMachine) -> PyResult<PyVector> {
let v: Vec<VectorRef> = v.iter().map(|v| v.as_vector_ref()).collect();
let func: Option<FunctionRef> = FUNCTION_REGISTRY.get_function(name);
let res = match func {
Some(f) => f.eval(Default::default(), &v),
None => return Err(vm.new_type_error(format!("Can't find function {}", name))),
};
match res {
Ok(v) => Ok(v.into()),
Err(err) => {
Err(vm.new_runtime_error(format!("Fail to evaluate the function,: {}", err)))
}
}
}
fn eval_aggr_func(
name: &str,
args: &[PyVectorRef],
vm: &VirtualMachine,
) -> PyResult<PyObjectRef> {
let v: Vec<VectorRef> = args.iter().map(|v| v.as_vector_ref()).collect();
let func = FUNCTION_REGISTRY.get_aggr_function(name);
let f = match func {
Some(f) => f.create().creator(),
None => return Err(vm.new_type_error(format!("Can't find function {}", name))),
};
let types: Vec<_> = v.iter().map(|v| v.data_type()).collect();
let acc = f(&types);
let mut acc = match acc {
Ok(acc) => acc,
Err(err) => {
return Err(vm.new_runtime_error(format!("Failed to create accumulator: {}", err)))
}
};
match acc.update_batch(&v) {
Ok(_) => (),
Err(err) => {
return Err(vm.new_runtime_error(format!("Failed to update batch: {}", err)))
}
};
let res = match acc.evaluate() {
Ok(r) => r,
Err(err) => {
return Err(vm.new_runtime_error(format!("Failed to evaluate accumulator: {}", err)))
}
};
let res = val_to_pyobj(res, vm);
Ok(res)
}
/// GrepTime's own impl of pow function
#[pyfunction]
fn pow_gp(v0: PyVectorRef, v1: PyVectorRef, vm: &VirtualMachine) -> PyResult<PyVector> {
eval_func("pow", &[v0, v1], vm)
}
#[pyfunction]
fn clip(
v0: PyVectorRef,
v1: PyVectorRef,
v2: PyVectorRef,
vm: &VirtualMachine,
) -> PyResult<PyVector> {
eval_func("clip", &[v0, v1, v2], vm)
}
#[pyfunction]
fn median(v: PyVectorRef, vm: &VirtualMachine) -> PyResult<PyObjectRef> {
eval_aggr_func("median", &[v], vm)
}
#[pyfunction]
fn diff(v: PyVectorRef, vm: &VirtualMachine) -> PyResult<PyObjectRef> {
eval_aggr_func("diff", &[v], vm)
}
#[pyfunction]
fn mean(v: PyVectorRef, vm: &VirtualMachine) -> PyResult<PyObjectRef> {
eval_aggr_func("mean", &[v], vm)
}
#[pyfunction]
fn polyval(v0: PyVectorRef, v1: PyVectorRef, vm: &VirtualMachine) -> PyResult<PyObjectRef> {
eval_aggr_func("polyval", &[v0, v1], vm)
}
#[pyfunction]
fn argmax(v0: PyVectorRef, vm: &VirtualMachine) -> PyResult<PyObjectRef> {
eval_aggr_func("argmax", &[v0], vm)
}
#[pyfunction]
fn argmin(v0: PyVectorRef, vm: &VirtualMachine) -> PyResult<PyObjectRef> {
eval_aggr_func("argmin", &[v0], vm)
}
#[pyfunction]
fn percentile(v0: PyVectorRef, v1: PyVectorRef, vm: &VirtualMachine) -> PyResult<PyObjectRef> {
eval_aggr_func("percentile", &[v0, v1], vm)
}
#[pyfunction]
fn scipy_stats_norm_cdf(
v0: PyVectorRef,
v1: PyVectorRef,
vm: &VirtualMachine,
) -> PyResult<PyObjectRef> {
eval_aggr_func("scipystatsnormcdf", &[v0, v1], vm)
}
#[pyfunction]
fn scipy_stats_norm_pdf(
v0: PyVectorRef,
v1: PyVectorRef,
vm: &VirtualMachine,
) -> PyResult<PyObjectRef> {
eval_aggr_func("scipystatsnormpdf", &[v0, v1], vm)
}
// The math function return a general PyObjectRef
// so it can return both PyVector or a scalar PyInt/Float/Bool
// ------
// DataFusion's UDF&UDAF
// ------
/// simple math function, the backing implement is datafusion's `sqrt` math function
#[pyfunction]
fn sqrt(val: PyObjectRef, vm: &VirtualMachine) -> PyResult<PyObjectRef> {

View File

@@ -924,5 +924,198 @@ sum(prev(values))"#,
ty: Float64,
value: Float(3.0)
))
),
TestCase(
input: {
"values": Var(
ty: Float64,
value: FloatVec([1.0, 2.0, 3.0])
),
"pows": Var(
ty: Float64,
value: FloatVec([1.0, 2.0, 3.0])
),
},
script: r#"
from greptime import *
pow_gp(values, pows)"#,
expect: Ok((
ty: Float64,
value: FloatVec([1.0, 4.0, 27.0])
))
),
TestCase(
input: {
"values": Var(
ty: Float64,
value: FloatVec([-1.0, 2.0, 0.5])
),
"lower": Var(
ty: Float64,
value: FloatVec([0.0, 0.0, 0.0])
),
"upper": Var(
ty: Float64,
value: FloatVec([1.0, 1.0, 1.0])
),
},
script: r#"
from greptime import *
clip(values, lower, upper)"#,
expect: Ok((
ty: Float64,
value: FloatVec([0.0, 1.0, 0.5])
))
),
TestCase(
input: {
"values": Var(
ty: Float64,
value: FloatVec([-1.0, 2.0, 2.0, 0.5])
)
},
script: r#"
from greptime import *
median(values)"#,
expect: Ok((
ty: Float64,
value: Float(1.25)
))
),
TestCase(
input: {
"values": Var(
ty: Float64,
value: FloatVec([-1.0, 2.0, 2.0, 0.5])
)
},
script: r#"
from greptime import *
diff(values)"#,
expect: Ok((
ty: Float64,
value: FloatVec([3.0, 0.0, -1.5])
))
),
TestCase(
input: {
"values": Var(
ty: Float64,
value: FloatVec([-1.0, 2.0, 2.0, 0.0])
)
},
script: r#"
from greptime import *
mean(values)"#,
expect: Ok((
ty: Float64,
value: Float(0.75)
))
),
TestCase(
input: {
"p": Var(
ty: Float64,
value: FloatVec([-1.0, 2.0])
),
"x": Var(
ty: Int64,
value: IntVec([1, 1])
)
},
script: r#"
from greptime import *
polyval(p, x)"#,
expect: Ok((
ty: Float64,
value: Float(1.0)
))
),
TestCase(
input: {
"p": Var(
ty: Float64,
value: FloatVec([-1.0, 2.0, 3.0])
)
},
script: r#"
from greptime import *
argmax(p)"#,
expect: Ok((
ty: Int64,
value: Int(2)
))
),
TestCase(
input: {
"p": Var(
ty: Float64,
value: FloatVec([-1.0, 2.0, 3.0])
)
},
script: r#"
from greptime import *
argmin(p)"#,
expect: Ok((
ty: Int64,
value: Int(0)
))
),
TestCase(
input: {
"x": Var(
ty: Float64,
value: FloatVec([-1.0, 2.0, 3.0])
),
"p": Var(
ty: Float64,
value: FloatVec([0.5, 0.5, 0.5])
)
},
script: r#"
from greptime import *
percentile(x, p)"#,
expect: Ok((
ty: Float64,
value: Float(-0.97)
))
),
TestCase(
input: {
"x": Var(
ty: Float64,
value: FloatVec([-1.0, 2.0, 3.0])
),
"p": Var(
ty: Float64,
value: FloatVec([0.5, 0.5, 0.5])
)
},
script: r#"
from greptime import *
scipy_stats_norm_cdf(x, p)"#,
expect: Ok((
ty: Float64,
value: Float(0.3444602779022303)
))
),
TestCase(
input: {
"x": Var(
ty: Float64,
value: FloatVec([-1.0, 2.0, 3.0])
),
"p": Var(
ty: Float64,
value: FloatVec([0.5, 0.5, 0.5])
)
},
script: r#"
from greptime import *
scipy_stats_norm_pdf(x, p)"#,
expect: Ok((
ty: Float64,
value: Float(0.1768885735289059)
))
)
]

View File

@@ -939,7 +939,16 @@ pub fn val_to_pyobj(val: value::Value, vm: &VirtualMachine) -> PyObjectRef {
value::Value::DateTime(v) => vm.ctx.new_int(v.val()).into(),
// FIXME(dennis): lose the timestamp unit here
Value::Timestamp(v) => vm.ctx.new_int(v.value()).into(),
value::Value::List(_) => unreachable!(),
value::Value::List(list) => {
let list = list.items().as_ref();
match list {
Some(list) => {
let list: Vec<_> = list.iter().map(|v| val_to_pyobj(v.clone(), vm)).collect();
vm.ctx.new_list(list).into()
}
None => vm.ctx.new_list(Vec::new()).into(),
}
}
}
}

View File

@@ -47,6 +47,7 @@ impl ScriptsTable {
desc: Some("Scripts table".to_string()),
schema,
// name and timestamp as primary key
region_numbers: vec![0],
primary_key_indices: vec![0, 3],
create_if_not_exists: true,
table_options: HashMap::default(),

View File

@@ -26,10 +26,12 @@ hyper = { version = "0.14", features = ["full"] }
influxdb_line_protocol = { git = "https://github.com/evenyag/influxdb_iox", branch = "feat/line-protocol" }
metrics = "0.20"
num_cpus = "1.13"
once_cell = "1.16"
openmetrics-parser = "0.4"
opensrv-mysql = "0.1"
pgwire = { version = "0.4" }
prost = "0.11"
regex = "1.6"
schemars = "0.8"
serde = "1.0"
serde_json = "1.0"

166
src/servers/src/context.rs Normal file
View File

@@ -0,0 +1,166 @@
use std::collections::HashMap;
use std::sync::Arc;
use serde::{Deserialize, Serialize};
use crate::context::AuthMethod::Token;
use crate::context::Channel::HTTP;
type CtxFnRef = Arc<dyn Fn(&Context) -> bool + Send + Sync>;
#[derive(Default, Serialize, Deserialize)]
pub struct Context {
pub exec_info: ExecInfo,
pub client_info: ClientInfo,
pub user_info: UserInfo,
pub quota: Quota,
#[serde(skip)]
pub predicates: Vec<CtxFnRef>,
}
impl Context {
pub fn new() -> Self {
Context::default()
}
pub fn add_predicate(&mut self, predicate: CtxFnRef) {
self.predicates.push(predicate);
}
}
#[derive(Default, Serialize, Deserialize)]
pub struct ExecInfo {
pub catalog: Option<String>,
pub schema: Option<String>,
// should opts to be thread safe?
pub extra_opts: HashMap<String, String>,
pub trace_id: Option<String>,
}
#[derive(Default, Serialize, Deserialize)]
pub struct ClientInfo {
pub client_host: Option<String>,
}
impl ClientInfo {
pub fn new(host: Option<String>) -> Self {
ClientInfo { client_host: host }
}
}
#[derive(Default, Serialize, Deserialize)]
pub struct UserInfo {
pub username: Option<String>,
pub from_channel: Option<Channel>,
pub auth_method: Option<AuthMethod>,
}
impl UserInfo {
pub fn with_http_token(token: String) -> Self {
UserInfo {
username: None,
from_channel: Some(HTTP),
auth_method: Some(Token(token)),
}
}
}
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
pub enum Channel {
GRPC,
HTTP,
MYSQL,
}
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
pub enum AuthMethod {
None,
Password {
hash_method: AuthHashMethod,
hashed_value: Vec<u8>,
},
Token(String),
}
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
pub enum AuthHashMethod {
DoubleSha1,
Sha256,
}
#[derive(Default, Serialize, Deserialize)]
pub struct Quota {
pub total: u64,
pub consumed: u64,
pub estimated: u64,
}
#[cfg(test)]
mod test {
use std::collections::HashMap;
use std::sync::Arc;
use crate::context::AuthMethod::Token;
use crate::context::Channel::HTTP;
use crate::context::{ClientInfo, Context, ExecInfo, Quota, UserInfo};
#[test]
fn test_predicate() {
let mut ctx = Context::default();
ctx.add_predicate(Arc::new(|ctx: &Context| {
ctx.quota.total > ctx.quota.consumed
}));
ctx.quota.total = 10;
ctx.quota.consumed = 5;
let predicates = ctx.predicates.clone();
let mut re = true;
for predicate in predicates {
re &= predicate(&ctx);
}
assert!(re);
}
#[test]
fn test_build() {
let ctx = Context {
exec_info: ExecInfo {
catalog: Some(String::from("greptime")),
schema: Some(String::from("public")),
extra_opts: HashMap::new(),
trace_id: None,
},
client_info: ClientInfo::new(Some(String::from("127.0.0.1:4001"))),
user_info: UserInfo::with_http_token(String::from("HELLO")),
quota: Quota {
total: 10,
consumed: 5,
estimated: 2,
},
predicates: vec![],
};
assert_eq!(ctx.exec_info.catalog.unwrap(), String::from("greptime"));
assert_eq!(ctx.exec_info.schema.unwrap(), String::from("public"));
assert_eq!(ctx.exec_info.extra_opts.capacity(), 0);
assert_eq!(ctx.exec_info.trace_id, None);
assert_eq!(
ctx.client_info.client_host.unwrap(),
String::from("127.0.0.1:4001")
);
assert_eq!(ctx.user_info.username, None);
assert_eq!(ctx.user_info.from_channel.unwrap(), HTTP);
assert_eq!(
ctx.user_info.auth_method.unwrap(),
Token(String::from("HELLO"))
);
assert!(ctx.quota.total > 0);
assert!(ctx.quota.consumed > 0);
assert!(ctx.quota.estimated > 0);
assert_eq!(ctx.predicates.capacity(), 0);
}
}

View File

@@ -152,6 +152,9 @@ pub enum Error {
#[snafu(display("Invalid prometheus remote read query result, msg: {}", msg))]
InvalidPromRemoteReadQueryResult { msg: String, backtrace: Backtrace },
#[snafu(display("Failed to decode region id, source: {}", source))]
DecodeRegionId { source: api::DecodeError },
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -186,6 +189,7 @@ impl ErrorExt for Error {
| DecodePromRemoteRequest { .. }
| DecompressPromRemoteRequest { .. }
| InvalidPromRemoteRequest { .. }
| DecodeRegionId { .. }
| TimePrecision { .. } => StatusCode::InvalidArguments,
InfluxdbLinesWrite { source, .. } => source.status_code(),

View File

@@ -304,8 +304,7 @@ impl HttpServer {
router = router.nest(&format!("/{}/prometheus", HTTP_API_VERSION), prom_router);
}
let metrics_router = Router::new().route("/", routing::get(handler::metrics));
router = router.nest(&format!("/{}/metrics", HTTP_API_VERSION), metrics_router);
router = router.route("/metrics", routing::get(handler::metrics));
router
// middlewares

View File

@@ -1,7 +1,7 @@
use std::collections::HashMap;
use aide::transform::TransformOperation;
use axum::extract::{Json, Query, State};
use axum::extract::{Json, Query, RawBody, State};
use common_error::prelude::ErrorExt;
use common_error::status_code::StatusCode;
use common_telemetry::metric;
@@ -47,54 +47,67 @@ pub async fn metrics(Query(_params): Query<HashMap<String, String>>) -> String {
}
}
#[derive(Debug, Deserialize, Serialize, JsonSchema)]
pub struct ScriptExecution {
pub name: String,
pub script: String,
macro_rules! json_err {
($e: expr) => {{
return Json(JsonResponse::with_error(
format!("Invalid argument: {}", $e),
common_error::status_code::StatusCode::InvalidArguments,
));
}};
($msg: expr, $code: expr) => {{
return Json(JsonResponse::with_error($msg.to_string(), $code));
}};
}
macro_rules! unwrap_or_json_err {
($result: expr) => {
match $result {
Ok(result) => result,
Err(e) => json_err!(e),
}
};
}
/// Handler to insert and compile script
#[axum_macros::debug_handler]
pub async fn scripts(
State(query_handler): State<SqlQueryHandlerRef>,
Json(payload): Json<ScriptExecution>,
Query(params): Query<ScriptQuery>,
RawBody(body): RawBody,
) -> Json<JsonResponse> {
if payload.name.is_empty() || payload.script.is_empty() {
return Json(JsonResponse::with_error(
"Invalid name or script".to_string(),
StatusCode::InvalidArguments,
));
}
let name = params.name.as_ref();
let body = match query_handler
.insert_script(&payload.name, &payload.script)
.await
{
if name.is_none() || name.unwrap().is_empty() {
json_err!("invalid name");
}
let bytes = unwrap_or_json_err!(hyper::body::to_bytes(body).await);
let script = unwrap_or_json_err!(String::from_utf8(bytes.to_vec()));
let body = match query_handler.insert_script(name.unwrap(), &script).await {
Ok(()) => JsonResponse::with_output(None),
Err(e) => JsonResponse::with_error(format!("Insert script error: {}", e), e.status_code()),
Err(e) => json_err!(format!("Insert script error: {}", e), e.status_code()),
};
Json(body)
}
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
pub struct RunScriptQuery {
name: Option<String>,
pub struct ScriptQuery {
pub name: Option<String>,
}
/// Handler to execute script
#[axum_macros::debug_handler]
pub async fn run_script(
State(query_handler): State<SqlQueryHandlerRef>,
Query(params): Query<RunScriptQuery>,
Query(params): Query<ScriptQuery>,
) -> Json<JsonResponse> {
let name = params.name.as_ref();
if name.is_none() || name.unwrap().is_empty() {
return Json(JsonResponse::with_error(
"Invalid name".to_string(),
StatusCode::InvalidArguments,
));
json_err!("invalid name");
}
let output = query_handler.execute_script(name.unwrap()).await;

View File

@@ -1,5 +1,6 @@
#![feature(assert_matches)]
pub mod context;
pub mod error;
pub mod grpc;
pub mod http;

View File

@@ -0,0 +1,374 @@
//! Use regex to filter out some MySQL federated components' emitted statements.
//! Inspired by Databend's "[mysql_federated.rs](https://github.com/datafuselabs/databend/blob/ac706bf65845e6895141c96c0a10bad6fdc2d367/src/query/service/src/servers/mysql/mysql_federated.rs)".
use std::collections::HashMap;
use std::sync::Arc;
use common_query::Output;
use common_recordbatch::RecordBatches;
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::{ColumnSchema, Schema};
use datatypes::vectors::StringVector;
use once_cell::sync::Lazy;
use regex::bytes::RegexSet;
use regex::Regex;
// TODO(LFC): Include GreptimeDB's version and git commit tag etc.
const MYSQL_VERSION: &str = "8.0.26";
static SELECT_VAR_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new("(?i)^(SELECT @@(.*))").unwrap());
static MYSQL_CONN_JAVA_PATTERN: Lazy<Regex> =
Lazy::new(|| Regex::new("(?i)^(/\\* mysql-connector-java(.*))").unwrap());
static SHOW_LOWER_CASE_PATTERN: Lazy<Regex> =
Lazy::new(|| Regex::new("(?i)^(SHOW VARIABLES LIKE 'lower_case_table_names'(.*))").unwrap());
static SHOW_COLLATION_PATTERN: Lazy<Regex> =
Lazy::new(|| Regex::new("(?i)^(show collation where(.*))").unwrap());
static SHOW_VARIABLES_PATTERN: Lazy<Regex> =
Lazy::new(|| Regex::new("(?i)^(SHOW VARIABLES(.*))").unwrap());
static SELECT_VERSION_PATTERN: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?i)^(SELECT VERSION\(\s*\))").unwrap());
// SELECT TIMEDIFF(NOW(), UTC_TIMESTAMP());
static SELECT_TIME_DIFF_FUNC_PATTERN: Lazy<Regex> =
Lazy::new(|| Regex::new("(?i)^(SELECT TIMEDIFF\\(NOW\\(\\), UTC_TIMESTAMP\\(\\)\\))").unwrap());
// sqlalchemy < 1.4.30
static SHOW_SQL_MODE_PATTERN: Lazy<Regex> =
Lazy::new(|| Regex::new("(?i)^(SHOW VARIABLES LIKE 'sql_mode'(.*))").unwrap());
static OTHER_NOT_SUPPORTED_STMT: Lazy<RegexSet> = Lazy::new(|| {
RegexSet::new(&[
// Txn.
"(?i)^(ROLLBACK(.*))",
"(?i)^(COMMIT(.*))",
"(?i)^(START(.*))",
// Set.
"(?i)^(SET NAMES(.*))",
"(?i)^(SET character_set_results(.*))",
"(?i)^(SET net_write_timeout(.*))",
"(?i)^(SET FOREIGN_KEY_CHECKS(.*))",
"(?i)^(SET AUTOCOMMIT(.*))",
"(?i)^(SET SQL_LOG_BIN(.*))",
"(?i)^(SET sql_mode(.*))",
"(?i)^(SET SQL_SELECT_LIMIT(.*))",
"(?i)^(SET @@(.*))",
"(?i)^(SHOW COLLATION)",
"(?i)^(SHOW CHARSET)",
// mysqldump.
"(?i)^(SET SESSION(.*))",
"(?i)^(SET SQL_QUOTE_SHOW_CREATE(.*))",
"(?i)^(LOCK TABLES(.*))",
"(?i)^(UNLOCK TABLES(.*))",
"(?i)^(SELECT LOGFILE_GROUP_NAME, FILE_NAME, TOTAL_EXTENTS, INITIAL_SIZE, ENGINE, EXTRA FROM INFORMATION_SCHEMA.FILES(.*))",
// mydumper.
"(?i)^(/\\*!80003 SET(.*) \\*/)$",
"(?i)^(SHOW MASTER STATUS)",
"(?i)^(SHOW ALL SLAVES STATUS)",
"(?i)^(LOCK BINLOG FOR BACKUP)",
"(?i)^(LOCK TABLES FOR BACKUP)",
"(?i)^(UNLOCK BINLOG(.*))",
"(?i)^(/\\*!40101 SET(.*) \\*/)$",
// DBeaver.
"(?i)^(SHOW WARNINGS)",
"(?i)^(/\\* ApplicationName=(.*)SHOW WARNINGS)",
"(?i)^(/\\* ApplicationName=(.*)SHOW PLUGINS)",
"(?i)^(/\\* ApplicationName=(.*)SHOW COLLATION)",
"(?i)^(/\\* ApplicationName=(.*)SHOW CHARSET)",
"(?i)^(/\\* ApplicationName=(.*)SHOW ENGINES)",
"(?i)^(/\\* ApplicationName=(.*)SELECT @@(.*))",
"(?i)^(/\\* ApplicationName=(.*)SHOW @@(.*))",
"(?i)^(/\\* ApplicationName=(.*)SET net_write_timeout(.*))",
"(?i)^(/\\* ApplicationName=(.*)SET SQL_SELECT_LIMIT(.*))",
"(?i)^(/\\* ApplicationName=(.*)SHOW VARIABLES(.*))",
// pt-toolkit
"(?i)^(/\\*!40101 SET(.*) \\*/)$",
// mysqldump 5.7.16
"(?i)^(/\\*!40100 SET(.*) \\*/)$",
"(?i)^(/\\*!40103 SET(.*) \\*/)$",
"(?i)^(/\\*!40111 SET(.*) \\*/)$",
"(?i)^(/\\*!40101 SET(.*) \\*/)$",
"(?i)^(/\\*!40014 SET(.*) \\*/)$",
"(?i)^(/\\*!40000 SET(.*) \\*/)$",
]).unwrap()
});
static VAR_VALUES: Lazy<HashMap<&str, &str>> = Lazy::new(|| {
HashMap::from([
("tx_isolation", "REPEATABLE-READ"),
("session.tx_isolation", "REPEATABLE-READ"),
("transaction_isolation", "REPEATABLE-READ"),
("session.transaction_isolation", "REPEATABLE-READ"),
("session.transaction_read_only", "0"),
("time_zone", "UTC"),
("system_time_zone", "UTC"),
("max_allowed_packet", "134217728"),
("interactive_timeout", "31536000"),
("wait_timeout", "31536000"),
("net_write_timeout", "31536000"),
("version_comment", "Greptime"),
])
});
// Recordbatches for select function.
// Format:
// |function_name|
// |value|
fn select_function(name: &str, value: &str) -> RecordBatches {
let schema = Arc::new(Schema::new(vec![ColumnSchema::new(
name,
ConcreteDataType::string_datatype(),
true,
)]));
let columns = vec![Arc::new(StringVector::from(vec![value])) as _];
RecordBatches::try_from_columns(schema, columns)
// unwrap is safe because the schema and data are definitely able to form a recordbatch, they are all string type
.unwrap()
}
// Recordbatches for show variable statement.
// Format is:
// | Variable_name | Value |
// | xx | yy |
fn show_variables(name: &str, value: &str) -> RecordBatches {
let schema = Arc::new(Schema::new(vec![
ColumnSchema::new("Variable_name", ConcreteDataType::string_datatype(), true),
ColumnSchema::new("Value", ConcreteDataType::string_datatype(), true),
]));
let columns = vec![
Arc::new(StringVector::from(vec![name])) as _,
Arc::new(StringVector::from(vec![value])) as _,
];
RecordBatches::try_from_columns(schema, columns)
// unwrap is safe because the schema and data are definitely able to form a recordbatch, they are all string type
.unwrap()
}
fn select_variable(query: &str) -> Option<Output> {
let mut fields = vec![];
let mut values = vec![];
// query like "SELECT @@aa, @@bb as cc, @dd..."
let query = query.to_lowercase();
let vars: Vec<&str> = query.split("@@").collect();
if vars.len() <= 1 {
return None;
}
// skip the first "select"
for var in vars.iter().skip(1) {
let var = var.trim_matches(|c| c == ' ' || c == ',');
let var_as: Vec<&str> = var
.split(" as ")
.map(|x| {
x.trim_matches(|c| c == ' ')
.split_whitespace()
.next()
.unwrap_or("")
})
.collect();
match var_as.len() {
1 => {
// @@aa
let value = VAR_VALUES.get(var_as[0]).unwrap_or(&"0");
values.push(Arc::new(StringVector::from(vec![*value])) as _);
// field is '@@aa'
fields.push(ColumnSchema::new(
&format!("@@{}", var_as[0]),
ConcreteDataType::string_datatype(),
true,
));
}
2 => {
// @@bb as cc:
// var is 'bb'.
let value = VAR_VALUES.get(var_as[0]).unwrap_or(&"0");
values.push(Arc::new(StringVector::from(vec![*value])) as _);
// field is 'cc'.
fields.push(ColumnSchema::new(
var_as[1],
ConcreteDataType::string_datatype(),
true,
));
}
_ => return None,
}
}
let schema = Arc::new(Schema::new(fields));
// unwrap is safe because the schema and data are definitely able to form a recordbatch, they are all string type
let batches = RecordBatches::try_from_columns(schema, values).unwrap();
Some(Output::RecordBatches(batches))
}
fn check_select_variable(query: &str) -> Option<Output> {
if vec![&SELECT_VAR_PATTERN, &MYSQL_CONN_JAVA_PATTERN]
.iter()
.any(|r| r.is_match(query))
{
select_variable(query)
} else {
None
}
}
fn check_show_variables(query: &str) -> Option<Output> {
let recordbatches = if SHOW_SQL_MODE_PATTERN.is_match(query) {
Some(show_variables("sql_mode", "ONLY_FULL_GROUP_BY STRICT_TRANS_TABLES NO_ZERO_IN_DATE NO_ZERO_DATE ERROR_FOR_DIVISION_BY_ZERO NO_ENGINE_SUBSTITUTION"))
} else if SHOW_LOWER_CASE_PATTERN.is_match(query) {
Some(show_variables("lower_case_table_names", "0"))
} else if SHOW_COLLATION_PATTERN.is_match(query) || SHOW_VARIABLES_PATTERN.is_match(query) {
Some(show_variables("", ""))
} else {
None
};
recordbatches.map(Output::RecordBatches)
}
// Check for SET or others query, this is the final check of the federated query.
fn check_others(query: &str) -> Option<Output> {
if OTHER_NOT_SUPPORTED_STMT.is_match(query.as_bytes()) {
return Some(Output::RecordBatches(RecordBatches::empty()));
}
let recordbatches = if SELECT_VERSION_PATTERN.is_match(query) {
Some(select_function("version()", MYSQL_VERSION))
} else if SELECT_TIME_DIFF_FUNC_PATTERN.is_match(query) {
Some(select_function(
"TIMEDIFF(NOW(), UTC_TIMESTAMP())",
"00:00:00",
))
} else {
None
};
recordbatches.map(Output::RecordBatches)
}
// Check whether the query is a federated or driver setup command,
// and return some faked results if there are any.
pub fn check(query: &str) -> Option<Output> {
// First to check the query is like "select @@variables".
let output = check_select_variable(query);
if output.is_some() {
return output;
}
// Then to check "show variables like ...".
let output = check_show_variables(query);
if output.is_some() {
return output;
}
// Last check.
check_others(query)
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_check() {
let query = "select 1";
let result = check(query);
assert!(result.is_none());
let query = "select versiona";
let output = check(query);
assert!(output.is_none());
fn test(query: &str, expected: Vec<&str>) {
let output = check(query);
match output.unwrap() {
Output::RecordBatches(r) => {
assert_eq!(r.pretty_print().lines().collect::<Vec<_>>(), expected)
}
_ => unreachable!(),
}
}
let query = "select version()";
let expected = vec![
"+-----------+",
"| version() |",
"+-----------+",
"| 8.0.26 |",
"+-----------+",
];
test(query, expected);
let query = "SELECT @@version_comment LIMIT 1";
let expected = vec![
"+-------------------+",
"| @@version_comment |",
"+-------------------+",
"| Greptime |",
"+-------------------+",
];
test(query, expected);
// variables
let query = "select @@tx_isolation, @@session.tx_isolation";
let expected = vec![
"+-----------------+------------------------+",
"| @@tx_isolation | @@session.tx_isolation |",
"+-----------------+------------------------+",
"| REPEATABLE-READ | REPEATABLE-READ |",
"+-----------------+------------------------+",
];
test(query, expected);
// complex variables
let query = "/* mysql-connector-java-8.0.17 (Revision: 16a712ddb3f826a1933ab42b0039f7fb9eebc6ec) */SELECT @@session.auto_increment_increment AS auto_increment_increment, @@character_set_client AS character_set_client, @@character_set_connection AS character_set_connection, @@character_set_results AS character_set_results, @@character_set_server AS character_set_server, @@collation_server AS collation_server, @@collation_connection AS collation_connection, @@init_connect AS init_connect, @@interactive_timeout AS interactive_timeout, @@license AS license, @@lower_case_table_names AS lower_case_table_names, @@max_allowed_packet AS max_allowed_packet, @@net_write_timeout AS net_write_timeout, @@performance_schema AS performance_schema, @@sql_mode AS sql_mode, @@system_time_zone AS system_time_zone, @@time_zone AS time_zone, @@transaction_isolation AS transaction_isolation, @@wait_timeout AS wait_timeout;";
let expected = vec![
"+--------------------------+----------------------+--------------------------+-----------------------+----------------------+------------------+----------------------+--------------+---------------------+---------+------------------------+--------------------+-------------------+--------------------+----------+------------------+-----------+-----------------------+---------------+",
"| auto_increment_increment | character_set_client | character_set_connection | character_set_results | character_set_server | collation_server | collation_connection | init_connect | interactive_timeout | license | lower_case_table_names | max_allowed_packet | net_write_timeout | performance_schema | sql_mode | system_time_zone | time_zone | transaction_isolation | wait_timeout; |",
"+--------------------------+----------------------+--------------------------+-----------------------+----------------------+------------------+----------------------+--------------+---------------------+---------+------------------------+--------------------+-------------------+--------------------+----------+------------------+-----------+-----------------------+---------------+",
"| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 31536000 | 0 | 0 | 134217728 | 31536000 | 0 | 0 | UTC | UTC | REPEATABLE-READ | 31536000 |",
"+--------------------------+----------------------+--------------------------+-----------------------+----------------------+------------------+----------------------+--------------+---------------------+---------+------------------------+--------------------+-------------------+--------------------+----------+------------------+-----------+-----------------------+---------------+",
];
test(query, expected);
let query = "show variables";
let expected = vec![
"+---------------+-------+",
"| Variable_name | Value |",
"+---------------+-------+",
"| | |",
"+---------------+-------+",
];
test(query, expected);
let query = "show variables like 'lower_case_table_names'";
let expected = vec![
"+------------------------+-------+",
"| Variable_name | Value |",
"+------------------------+-------+",
"| lower_case_table_names | 0 |",
"+------------------------+-------+",
];
test(query, expected);
let query = "show collation";
let expected = vec!["++", "++"]; // empty
test(query, expected);
let query = "SELECT TIMEDIFF(NOW(), UTC_TIMESTAMP())";
let expected = vec![
"+----------------------------------+",
"| TIMEDIFF(NOW(), UTC_TIMESTAMP()) |",
"+----------------------------------+",
"| 00:00:00 |",
"+----------------------------------+",
];
test(query, expected);
}
}

View File

@@ -63,7 +63,14 @@ impl<W: io::Write + Send + Sync> AsyncMysqlShim<W> for MysqlInstanceShim {
query: &'a str,
writer: QueryResultWriter<'a, W>,
) -> Result<()> {
let output = self.query_handler.do_query(query).await;
// TODO(LFC): Find a better way:
// `check` uses regex to filter out unsupported statements emitted by MySQL's federated
// components, this is quick and dirty, there must be a better way to do it.
let output = if let Some(output) = crate::mysql::federated::check(query) {
Ok(output)
} else {
self.query_handler.do_query(query).await
};
let mut writer = MysqlResultWriter::new(writer);
writer.write(output).await

View File

@@ -1,3 +1,4 @@
mod federated;
pub mod handler;
pub mod server;
pub mod writer;

View File

@@ -1,10 +1,10 @@
use std::collections::HashMap;
use axum::extract::{Json, Query, State};
use axum::body::Body;
use axum::extract::{Json, Query, RawBody, State};
use common_telemetry::metric;
use metrics::counter;
use servers::http::handler as http_handler;
use servers::http::handler::ScriptExecution;
use servers::http::JsonOutput;
use table::test_util::MemTable;
@@ -58,27 +58,38 @@ async fn test_metrics() {
async fn test_scripts() {
common_telemetry::init_default_ut_logging();
let exec = create_script_payload();
let query_handler = create_testing_sql_query_handler(MemTable::default_numbers_table());
let script = r#"
@copr(sql='select uint32s as number from numbers', args=['number'], returns=['n'])
def test(n):
return n;
"#
.to_string();
let Json(json) = http_handler::scripts(State(query_handler), exec).await;
let query_handler = create_testing_sql_query_handler(MemTable::default_numbers_table());
let body = RawBody(Body::from(script.clone()));
let invalid_query = create_invalid_script_query();
let Json(json) = http_handler::scripts(State(query_handler.clone()), invalid_query, body).await;
assert!(!json.success(), "{:?}", json);
assert_eq!(json.error().unwrap(), "Invalid argument: invalid name");
let body = RawBody(Body::from(script));
let exec = create_script_query();
let Json(json) = http_handler::scripts(State(query_handler), exec, body).await;
assert!(json.success(), "{:?}", json);
assert!(json.error().is_none());
assert!(json.output().is_none());
}
fn create_script_payload() -> Json<ScriptExecution> {
Json(ScriptExecution {
name: "test".to_string(),
script: r#"
@copr(sql='select uint32s as number from numbers', args=['number'], returns=['n'])
def test(n):
return n;
"#
.to_string(),
fn create_script_query() -> Query<http_handler::ScriptQuery> {
Query(http_handler::ScriptQuery {
name: Some("test".to_string()),
})
}
fn create_invalid_script_query() -> Query<http_handler::ScriptQuery> {
Query(http_handler::ScriptQuery { name: None })
}
fn create_query() -> Query<http_handler::SqlQuery> {
Query(http_handler::SqlQuery {
sql: Some("select sum(uint32s) from numbers limit 20".to_string()),

View File

@@ -83,6 +83,9 @@ pub enum Error {
#[snafu(display("Invalid database name: {}", name))]
InvalidDatabaseName { name: String, backtrace: Backtrace },
#[snafu(display("Invalid table name: {}", name))]
InvalidTableName { name: String, backtrace: Backtrace },
#[snafu(display("Invalid default constraint, column: {}, source: {}", column, source))]
InvalidDefault {
column: String,
@@ -106,7 +109,9 @@ impl ErrorExt for Error {
| SqlTypeNotSupported { .. }
| InvalidDefault { .. } => StatusCode::InvalidSyntax,
InvalidDatabaseName { .. } | ColumnTypeMismatch { .. } => StatusCode::InvalidArguments,
InvalidDatabaseName { .. } | ColumnTypeMismatch { .. } | InvalidTableName { .. } => {
StatusCode::InvalidArguments
}
}
}

View File

@@ -5,8 +5,10 @@ use sqlparser::parser::Parser;
use sqlparser::parser::ParserError;
use sqlparser::tokenizer::{Token, Tokenizer};
use crate::error::{self, InvalidDatabaseNameSnafu, Result, SyntaxSnafu, TokenizerSnafu};
use crate::statements::show::{ShowDatabases, ShowKind, ShowTables};
use crate::error::{
self, InvalidDatabaseNameSnafu, InvalidTableNameSnafu, Result, SyntaxSnafu, TokenizerSnafu,
};
use crate::statements::show::{ShowCreateTable, ShowDatabases, ShowKind, ShowTables};
use crate::statements::statement::Statement;
/// GrepTime SQL parser context, a simple wrapper for Datafusion SQL parser.
@@ -102,11 +104,38 @@ impl<'a> ParserContext<'a> {
} else if self.matches_keyword(Keyword::TABLES) {
self.parser.next_token();
self.parse_show_tables()
} else if self.consume_token("CREATE") {
if self.consume_token("TABLE") {
self.parse_show_create_table()
} else {
self.unsupported(self.peek_token_as_string())
}
} else {
self.unsupported(self.peek_token_as_string())
}
}
/// Parse SHOW CREATE TABLE statement
fn parse_show_create_table(&mut self) -> Result<Statement> {
let table_name =
self.parser
.parse_object_name()
.with_context(|_| error::UnexpectedSnafu {
sql: self.sql,
expected: "a table name",
actual: self.peek_token_as_string(),
})?;
ensure!(
!table_name.0.is_empty(),
InvalidTableNameSnafu {
name: table_name.to_string(),
}
);
Ok(Statement::ShowCreateTable(ShowCreateTable {
table_name: table_name.to_string(),
}))
}
fn parse_show_tables(&mut self) -> Result<Statement> {
let database = match self.parser.peek_token() {
Token::EOF | Token::SemiColon => {

View File

@@ -1,7 +1,8 @@
use sqlparser::ast::{SetExpr, Statement, Values};
use sqlparser::ast::{SetExpr, Statement, UnaryOperator, Values};
use sqlparser::parser::ParserError;
use crate::ast::{Expr, Value};
use crate::error::{self, Result};
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Insert {
@@ -27,34 +28,59 @@ impl Insert {
}
}
pub fn values(&self) -> Vec<Vec<Value>> {
match &self.inner {
pub fn values(&self) -> Result<Vec<Vec<Value>>> {
let values = match &self.inner {
Statement::Insert { source, .. } => match &source.body {
SetExpr::Values(Values(values)) => values
.iter()
.map(|v| {
v.iter()
.map(|expr| match expr {
Expr::Value(v) => v.clone(),
Expr::Identifier(ident) => {
Value::SingleQuotedString(ident.value.clone())
}
_ => unreachable!(),
})
.collect::<Vec<Value>>()
})
.collect(),
SetExpr::Values(Values(exprs)) => sql_exprs_to_values(exprs)?,
_ => unreachable!(),
},
_ => unreachable!(),
}
};
Ok(values)
}
}
fn sql_exprs_to_values(exprs: &Vec<Vec<Expr>>) -> Result<Vec<Vec<Value>>> {
let mut values = Vec::with_capacity(exprs.len());
for es in exprs.iter() {
let mut vs = Vec::with_capacity(es.len());
for expr in es.iter() {
vs.push(match expr {
Expr::Value(v) => v.clone(),
Expr::Identifier(ident) => Value::SingleQuotedString(ident.value.clone()),
Expr::UnaryOp { op, expr }
if matches!(op, UnaryOperator::Minus | UnaryOperator::Plus) =>
{
if let Expr::Value(Value::Number(s, b)) = &**expr {
match op {
UnaryOperator::Minus => Value::Number(format!("-{}", s), *b),
UnaryOperator::Plus => Value::Number(s.to_string(), *b),
_ => unreachable!(),
}
} else {
return error::ParseSqlValueSnafu {
msg: format!("{:?}", expr),
}
.fail();
}
}
_ => {
return error::ParseSqlValueSnafu {
msg: format!("{:?}", expr),
}
.fail()
}
});
}
values.push(vs);
}
Ok(values)
}
impl TryFrom<Statement> for Insert {
type Error = ParserError;
fn try_from(value: Statement) -> Result<Self, Self::Error> {
fn try_from(value: Statement) -> std::result::Result<Self, Self::Error> {
match value {
Statement::Insert { .. } => Ok(Insert { inner: value }),
unexp => Err(ParserError::ParserError(format!(
@@ -78,7 +104,37 @@ mod tests {
let mut stmts = ParserContext::create_with_dialect(sql, &GenericDialect {}).unwrap();
assert_eq!(1, stmts.len());
let insert = stmts.pop().unwrap();
let r: Result<Statement, ParserError> = insert.try_into();
r.unwrap();
let _stmt: Statement = insert.try_into().unwrap();
}
#[test]
fn test_insert_value_with_unary_op() {
use crate::statements::statement::Statement;
// insert "-1"
let sql = "INSERT INTO my_table VALUES(-1)";
let stmt = ParserContext::create_with_dialect(sql, &GenericDialect {})
.unwrap()
.remove(0);
match stmt {
Statement::Insert(insert) => {
let values = insert.values().unwrap();
assert_eq!(values, vec![vec![Value::Number("-1".to_string(), false)]]);
}
_ => unreachable!(),
}
// insert "+1"
let sql = "INSERT INTO my_table VALUES(+1)";
let stmt = ParserContext::create_with_dialect(sql, &GenericDialect {})
.unwrap()
.remove(0);
match stmt {
Statement::Insert(insert) => {
let values = insert.values().unwrap();
assert_eq!(values, vec![vec![Value::Number("1".to_string(), false)]]);
}
_ => unreachable!(),
}
}
}

View File

@@ -40,6 +40,12 @@ pub struct ShowTables {
pub database: Option<String>,
}
/// SQL structure for `SHOW CREATE TABLE`.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ShowCreateTable {
pub table_name: String,
}
#[cfg(test)]
mod tests {
use std::assert_matches::assert_matches;
@@ -94,4 +100,27 @@ mod tests {
}
}
}
#[test]
pub fn test_show_create_table() {
let sql = "SHOW CREATE TABLE test";
let stmts: Vec<Statement> =
ParserContext::create_with_dialect(sql, &GenericDialect {}).unwrap();
assert_eq!(1, stmts.len());
assert_matches!(&stmts[0], Statement::ShowCreateTable { .. });
match &stmts[0] {
Statement::ShowCreateTable(show) => {
let table_name = show.table_name.as_str();
assert_eq!(table_name, "test");
}
_ => {
unreachable!();
}
}
}
#[test]
pub fn test_show_create_missing_table_name() {
let sql = "SHOW CREATE TABLE";
ParserContext::create_with_dialect(sql, &GenericDialect {}).unwrap_err();
}
}

View File

@@ -5,7 +5,7 @@ use crate::statements::alter::AlterTable;
use crate::statements::create_table::CreateTable;
use crate::statements::insert::Insert;
use crate::statements::query::Query;
use crate::statements::show::{ShowDatabases, ShowTables};
use crate::statements::show::{ShowCreateTable, ShowDatabases, ShowTables};
/// Tokens parsed by `DFParser` are converted into these values.
#[derive(Debug, Clone, PartialEq, Eq)]
@@ -22,6 +22,8 @@ pub enum Statement {
ShowDatabases(ShowDatabases),
// SHOW TABLES
ShowTables(ShowTables),
// SHOW CREATE TABLE
ShowCreateTable(ShowCreateTable),
}
/// Converts Statement to sqlparser statement
@@ -36,6 +38,9 @@ impl TryFrom<Statement> for SpStatement {
Statement::ShowTables(_) => Err(ParserError::ParserError(
"sqlparser does not support SHOW TABLES query.".to_string(),
)),
Statement::ShowCreateTable(_) => Err(ParserError::ParserError(
"sqlparser does not support SHOW CREATE TABLE query.".to_string(),
)),
Statement::Query(s) => Ok(SpStatement::Query(Box::new(s.inner))),
Statement::Insert(i) => Ok(i.inner),
Statement::Create(_) | Statement::Alter(_) => unimplemented!(),

Some files were not shown because too many files have changed in this diff Show More