Compare commits

...

78 Commits

Author SHA1 Message Date
Lei, Huang
2c9bcbe885 refactor: catalog crate (#415)
* chore: refactor dir for local catalog manager

* refactor: CatalogProvider returns Result

* refactor: SchemaProvider returns Result

* feat: add kv operations to remote catalog

* chore: refactor some code

* feat: impl catalog initialization

* feat: add register table and register system table function

* refactor: add table_info method for Table trait

* chore: add some tests

* chore: add register schema test

* chore: fix build issue after rebase onto develop

* refactor: mock to separate file

* build: failed to compile

* fix: use a container struct to bridge KvBackend and Accessor trait

* feat: upgrade opendal to 0.17

* test: add more tests

* chore: add catalog name and schema name to table info

* chore: add catalog name and schema name to table info

* chore: rebase onto develop

* refactor: common-catalog crate

* chore: refactor dir for local catalog manager

* refactor: CatalogProvider returns Result

* refactor: SchemaProvider returns Result

* feat: add kv operations to remote catalog

* chore: refactor some code

* feat: impl catalog initialization

* feat: add register table and register system table function

* refactor: add table_info method for Table trait

* chore: add some tests

* chore: add register schema test

* chore: fix build issue after rebase onto develop

* refactor: mock to separate file

* build: failed to compile

* fix: use a container struct to bridge KvBackend and Accessor trait

* feat: upgrade opendal to 0.17

* test: add more tests

* chore: add catalog name and schema name to table info

* chore: add catalog name and schema name to table info

* chore: rebase onto develop

* refactor: common-catalog crate

* refactor: remove remote catalog related files

* fix: compilation

* feat: add table version to TableKey

* feat: add node id to TableValue

* fix: some CR comments

* chore: change async fn create_expr_to_request to sync

* fix: add backtrace to errors

* fix: code style

* refactor: merge refactor/catalog-crate

* feat: table key with version

* feat: impl KvBackend for MetaClient

* fix: integrate metaclient

* fix: catalog use local table info as baseline

* fix: sync metsrv

* fix: wip

* fix: update remote catalog on register and deregister

* refactor: CatalogProvider

* refactor: CatalogManager

* fix: catalog key filtering

* fix: pass some test

* refactor: catalog iterating

* fix: CatalogManager::table also requires both catalog_name and schema_name

* chore: merge develop

* chore: merge catalog crate

* fix: adapt to recent meta-client api change

* feat: databode lease

* feat: remote catalog (#356)

* chore: refactor dir for local catalog manager

* refactor: CatalogProvider returns Result

* refactor: SchemaProvider returns Result

* feat: add kv operations to remote catalog

* chore: refactor some code

* feat: impl catalog initialization

* feat: add register table and register system table function

* refactor: add table_info method for Table trait

* chore: add some tests

* chore: add register schema test

* chore: fix build issue after rebase onto develop

* refactor: mock to separate file

* build: failed to compile

* fix: use a container struct to bridge KvBackend and Accessor trait

* feat: upgrade opendal to 0.17

* test: add more tests

* chore: add catalog name and schema name to table info

* chore: add catalog name and schema name to table info

* chore: rebase onto develop

* refactor: common-catalog crate

* chore: refactor dir for local catalog manager

* refactor: CatalogProvider returns Result

* refactor: SchemaProvider returns Result

* feat: add kv operations to remote catalog

* chore: refactor some code

* feat: impl catalog initialization

* feat: add register table and register system table function

* refactor: add table_info method for Table trait

* chore: add some tests

* chore: add register schema test

* chore: fix build issue after rebase onto develop

* refactor: mock to separate file

* build: failed to compile

* fix: use a container struct to bridge KvBackend and Accessor trait

* feat: upgrade opendal to 0.17

* test: add more tests

* chore: add catalog name and schema name to table info

* chore: add catalog name and schema name to table info

* chore: rebase onto develop

* refactor: common-catalog crate

* refactor: remove remote catalog related files

* fix: compilation

* feat: add table version to TableKey

* feat: add node id to TableValue

* fix: some CR comments

* chore: change async fn create_expr_to_request to sync

* fix: add backtrace to errors

* fix: code style

* refactor: merge refactor/catalog-crate

* feat: table key with version

* feat: impl KvBackend for MetaClient

* fix: integrate metaclient

* fix: catalog use local table info as baseline

* fix: sync metsrv

* fix: wip

* fix: update remote catalog on register and deregister

* refactor: CatalogProvider

* refactor: CatalogManager

* fix: catalog key filtering

* fix: pass some test

* refactor: catalog iterating

* fix: CatalogManager::table also requires both catalog_name and schema_name

* chore: merge develop

* chore: merge catalog crate

* fix: adapt to recent meta-client api change

* feat: datanode heartbeat (#355)

* feat: add heartbeat task to instance

* feat: add node_id datanode opts

* fix: use real node id in heartbeat and meta client

* feat: distribute table in frontend

* test: distribute read demo

* test: distribute read demo

* test: distribute read demo

* add write spliter

* fix: node id changed to u64

* feat: datanode uses remote catalog implementation

* dist insert integrate table

* feat: specify region ids on creating table (#359)

* fix: compiling issues

* feat: datanode lease (#354)

* Some glue code about dist_insert

* fix: correctly wrap string value with quotes

* feat: create route

* feat: frontend catalog (#362)

* feat: integrate catalog to frontend

* feat: preserve partition rule on create

* fix: print tables on start

* chore: log in create route

* test: distribute read demo

* feat: support metasrv addr command line options

* feat: optimize DataNodeInstance creation (#368)

* chore: remove unnecessary changes

* chore: revert changes to src/api

* chore: revert changes to src/datanode/src/server.rs

* chore: remove opendal backend

* chore: optimize imports

* chore: revert changes to instance and region ids

* refactor: MetaKvBackend range

* fix: remove some wrap

* refactor: initiation of catalog

* feat: add region id to create table request and add heartbeat task to datanode instance

* fix: fix auto reconnect for heartbeat task

* chore: change TableValue::region_numbers to vec<u32>.

* fix: some tests

* fix: avoid concurrently start Heartbeat task by compare_exchange

* feat: refactor catalog key and values, separate table info into two kinds of keys

* feat: bump table id from metasrv

* fix: compare and set table id

* chore: merge develop

* fix: use integer serialization instead of string serialization

Co-authored-by: jiachun <jiachun_fjc@163.com>
Co-authored-by: luofucong <luofucong@greptime.com>
Co-authored-by: fys <1113014250@qq.com>
Co-authored-by: Jiachun Feng <jiachun_feng@proton.me>
2022-11-08 20:40:16 +08:00
Lei, Huang
dfd4b10493 feat: add shutdown mechanism for HeartbeatTask (#424) 2022-11-08 19:23:02 +08:00
dennis zhuang
dd488e8d21 feat: adds from_unixtime function (#420) 2022-11-08 18:22:00 +08:00
fys
857054f70d feat: impl insert for DistTable (#406)
* feat: impl insert for dist_table in frontend

* add the logic of decode region id in datanode.
2022-11-08 17:19:17 +08:00
Lei, Huang
a41aec0a86 fix: use same tmp data location as default config (#422) 2022-11-08 16:58:48 +08:00
Sheng hui
cff8fe4e0e feat: Allow sql parser to parse show-create-table statement (#347)
* feat: Add ShowCreateTable to Statement

* feat: Add ShowCreateTable to Statement

* feat: Add ShowCreateTable to Statement

* feat: Add ShowCreateTable to Statement

* feat: Add ShowCreateTable to Statement

* feat: Add ShowCreateTable to Statement

* feat: Add ShowCreateTable to Statement

* feat: Add ShowCreateTable to Statement

* feat: Add ShowCreateTable to Statement
2022-11-08 16:35:56 +08:00
Lei, Huang
a2f9b788f1 fix: datanode start in standalone mode by default (#418)
* fix: datanode start in standalone mode by default

* fix: detech misconfig on startup

* fix: some CR comments and add tests
2022-11-08 16:18:13 +08:00
shuiyisong
43f9c40f43 feat: add context to query_handler (#417)
This pr merely create the Context struct, fields within Context is not stable yet. Feel free to modify at will.
2022-11-08 13:29:32 +08:00
Ruihang Xia
af1df2066c perf: enlarge write row group size (#413)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-11-08 11:23:10 +08:00
LFC
f34a99ff5a feat: use regex to filter out not supported MySQL stmt (#396)
* feat: use regex to filter out not supported MySQL stmt

* fix: resolve PR comments

Co-authored-by: luofucong <luofucong@greptime.com>
2022-11-08 11:09:46 +08:00
Ruihang Xia
89a3b39728 perf: improve table scan performance (#407)
* refactor: improve table scan performance

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* use BufReader to avoid pre-loading all content

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-11-07 17:28:53 +08:00
Lei, Huang
2137587091 feat: datanode heartbeat (#377)
* chore: refactor dir for local catalog manager

* refactor: CatalogProvider returns Result

* refactor: SchemaProvider returns Result

* feat: add kv operations to remote catalog

* chore: refactor some code

* feat: impl catalog initialization

* feat: add register table and register system table function

* refactor: add table_info method for Table trait

* chore: add some tests

* chore: add register schema test

* chore: fix build issue after rebase onto develop

* refactor: mock to separate file

* build: failed to compile

* fix: use a container struct to bridge KvBackend and Accessor trait

* feat: upgrade opendal to 0.17

* test: add more tests

* chore: add catalog name and schema name to table info

* chore: add catalog name and schema name to table info

* chore: rebase onto develop

* refactor: common-catalog crate

* chore: refactor dir for local catalog manager

* refactor: CatalogProvider returns Result

* refactor: SchemaProvider returns Result

* feat: add kv operations to remote catalog

* chore: refactor some code

* feat: impl catalog initialization

* feat: add register table and register system table function

* refactor: add table_info method for Table trait

* chore: add some tests

* chore: add register schema test

* chore: fix build issue after rebase onto develop

* refactor: mock to separate file

* build: failed to compile

* fix: use a container struct to bridge KvBackend and Accessor trait

* feat: upgrade opendal to 0.17

* test: add more tests

* chore: add catalog name and schema name to table info

* chore: add catalog name and schema name to table info

* chore: rebase onto develop

* refactor: common-catalog crate

* refactor: remove remote catalog related files

* fix: compilation

* feat: add table version to TableKey

* feat: add node id to TableValue

* fix: some CR comments

* chore: change async fn create_expr_to_request to sync

* fix: add backtrace to errors

* fix: code style

* refactor: merge refactor/catalog-crate

* feat: table key with version

* feat: impl KvBackend for MetaClient

* fix: integrate metaclient

* fix: catalog use local table info as baseline

* fix: sync metsrv

* fix: wip

* fix: update remote catalog on register and deregister

* refactor: CatalogProvider

* refactor: CatalogManager

* fix: catalog key filtering

* fix: pass some test

* refactor: catalog iterating

* fix: CatalogManager::table also requires both catalog_name and schema_name

* chore: merge develop

* chore: merge catalog crate

* fix: adapt to recent meta-client api change

* feat: databode lease

* feat: remote catalog (#356)

* chore: refactor dir for local catalog manager

* refactor: CatalogProvider returns Result

* refactor: SchemaProvider returns Result

* feat: add kv operations to remote catalog

* chore: refactor some code

* feat: impl catalog initialization

* feat: add register table and register system table function

* refactor: add table_info method for Table trait

* chore: add some tests

* chore: add register schema test

* chore: fix build issue after rebase onto develop

* refactor: mock to separate file

* build: failed to compile

* fix: use a container struct to bridge KvBackend and Accessor trait

* feat: upgrade opendal to 0.17

* test: add more tests

* chore: add catalog name and schema name to table info

* chore: add catalog name and schema name to table info

* chore: rebase onto develop

* refactor: common-catalog crate

* chore: refactor dir for local catalog manager

* refactor: CatalogProvider returns Result

* refactor: SchemaProvider returns Result

* feat: add kv operations to remote catalog

* chore: refactor some code

* feat: impl catalog initialization

* feat: add register table and register system table function

* refactor: add table_info method for Table trait

* chore: add some tests

* chore: add register schema test

* chore: fix build issue after rebase onto develop

* refactor: mock to separate file

* build: failed to compile

* fix: use a container struct to bridge KvBackend and Accessor trait

* feat: upgrade opendal to 0.17

* test: add more tests

* chore: add catalog name and schema name to table info

* chore: add catalog name and schema name to table info

* chore: rebase onto develop

* refactor: common-catalog crate

* refactor: remove remote catalog related files

* fix: compilation

* feat: add table version to TableKey

* feat: add node id to TableValue

* fix: some CR comments

* chore: change async fn create_expr_to_request to sync

* fix: add backtrace to errors

* fix: code style

* refactor: merge refactor/catalog-crate

* feat: table key with version

* feat: impl KvBackend for MetaClient

* fix: integrate metaclient

* fix: catalog use local table info as baseline

* fix: sync metsrv

* fix: wip

* fix: update remote catalog on register and deregister

* refactor: CatalogProvider

* refactor: CatalogManager

* fix: catalog key filtering

* fix: pass some test

* refactor: catalog iterating

* fix: CatalogManager::table also requires both catalog_name and schema_name

* chore: merge develop

* chore: merge catalog crate

* fix: adapt to recent meta-client api change

* feat: datanode heartbeat (#355)

* feat: add heartbeat task to instance

* feat: add node_id datanode opts

* fix: use real node id in heartbeat and meta client

* feat: distribute table in frontend

* test: distribute read demo

* test: distribute read demo

* test: distribute read demo

* add write spliter

* fix: node id changed to u64

* feat: datanode uses remote catalog implementation

* dist insert integrate table

* feat: specify region ids on creating table (#359)

* fix: compiling issues

* feat: datanode lease (#354)

* Some glue code about dist_insert

* fix: correctly wrap string value with quotes

* feat: create route

* feat: frontend catalog (#362)

* feat: integrate catalog to frontend

* feat: preserve partition rule on create

* fix: print tables on start

* chore: log in create route

* test: distribute read demo

* feat: support metasrv addr command line options

* feat: optimize DataNodeInstance creation (#368)

* chore: remove unnecessary changes

* chore: revert changes to src/api

* chore: revert changes to src/datanode/src/server.rs

* chore: remove opendal backend

* chore: optimize imports

* chore: revert changes to instance and region ids

* refactor: MetaKvBackend range

* fix: remove some wrap

* refactor: initiation of catalog

* feat: add region id to create table request and add heartbeat task to datanode instance

* fix: fix auto reconnect for heartbeat task

* chore: change TableValue::region_numbers to vec<u32>.

* fix: some tests

* fix: avoid concurrently start Heartbeat task by compare_exchange

* fix: some cr comments

* fix: fix unit tests

Co-authored-by: jiachun <jiachun_fjc@163.com>
Co-authored-by: luofucong <luofucong@greptime.com>
Co-authored-by: fys <1113014250@qq.com>
Co-authored-by: Jiachun Feng <jiachun_feng@proton.me>
2022-11-07 17:10:43 +08:00
Jiachun Feng
172c9a1e21 chore: minor refactor with meta_client (#393)
* chore: minor refactor

* feat: support none expect value on CAS
2022-11-07 17:03:31 +08:00
Ruihang Xia
ae147c2a74 chore: refine some unnecessary log (#410)
* remove some unnecessary informations in log

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* further cleaning

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-11-07 16:36:27 +08:00
fys
c2e1b0857c refactor: optimize channel_manager (#401)
* refactor: use dashmap in channel manager

* add benchmark for channel manager

* access field in channel use AtomicUsize

* cr
2022-11-07 16:09:01 +08:00
dennis zhuang
6e99bb8490 fix: /metrics endpoint (#404) 2022-11-07 10:34:13 +08:00
Ning Sun
eef20887cc ci: use lld linker for ci (#398)
* ci: use lld linker for ci

* ci: do a disk cleanup before test

* ci: add llvm cache to speedup installation

* ci: use lld linker for coverage as well

* feat: use lld for release too
2022-11-07 10:28:58 +08:00
LFC
16500b045b feat: distribute table in frontend (#328)
Co-authored-by: luofucong <luofucong@greptime.com>
2022-11-05 10:41:59 +08:00
discord9
3d195ff858 feat: bind Greptime's own UDF&UDAF into Python Coprocessor Module (#335)
* feat: port own UDF&UDAF into py copr(untest yet)

* refactor: move UDF&UDAF to greptime_builtins

* feat: support List in val2py_obj

* test: some testcases for newly added UDFs

* test: complete test for all added gpdb's own UDF

* refactor: add underscore for long func name

* feat: better error message

* fix: typo
2022-11-04 15:49:41 +08:00
zyy17
bc701d3e7f ci: push image to dockerhub (#394) 2022-11-04 15:07:12 +08:00
LFC
6373bb04f9 fix: insert negative values (#383)
* fix: insert negative values

* fix: resolve PR comments

* fix: resolve PR comments

Co-authored-by: luofucong <luofucong@greptime.com>
2022-11-04 14:22:31 +08:00
Ruihang Xia
bfcd74fd16 feat: benchmark suit based on nyc taxi dataset (#384)
* solve dep conflict

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* feat: nyc taxi dataset writer

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix some literals

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add some queries

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix progress bar

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* able to skip write or read

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* rename to nyc-taxi.rs

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove main.rs

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* adapt new client api

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* allow stdout output in this cli

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* some default values

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-11-04 14:13:17 +08:00
dennis zhuang
fc6d73b06b feat: improve /scripts API (#390)
* feat: improve /scripts API

* chore: json_err macro

* chore: json_err macro and refactor code

* fix: test
2022-11-04 14:09:07 +08:00
Lei, Huang
db2b577628 feat: remote catalog (#315)
* chore: refactor dir for local catalog manager

* refactor: CatalogProvider returns Result

* refactor: SchemaProvider returns Result

* feat: add kv operations to remote catalog

* chore: refactor some code

* feat: impl catalog initialization

* feat: add register table and register system table function

* refactor: add table_info method for Table trait

* chore: add some tests

* chore: add register schema test

* chore: fix build issue after rebase onto develop

* refactor: mock to separate file

* build: failed to compile

* fix: use a container struct to bridge KvBackend and Accessor trait

* feat: upgrade opendal to 0.17

* test: add more tests

* chore: add catalog name and schema name to table info

* chore: add catalog name and schema name to table info

* chore: rebase onto develop

* refactor: common-catalog crate

* chore: refactor dir for local catalog manager

* refactor: CatalogProvider returns Result

* refactor: SchemaProvider returns Result

* feat: add kv operations to remote catalog

* chore: refactor some code

* feat: impl catalog initialization

* feat: add register table and register system table function

* refactor: add table_info method for Table trait

* chore: add some tests

* chore: add register schema test

* chore: fix build issue after rebase onto develop

* refactor: mock to separate file

* build: failed to compile

* fix: use a container struct to bridge KvBackend and Accessor trait

* feat: upgrade opendal to 0.17

* test: add more tests

* chore: add catalog name and schema name to table info

* chore: add catalog name and schema name to table info

* chore: rebase onto develop

* refactor: common-catalog crate

* refactor: remove remote catalog related files

* fix: compilation

* feat: add table version to TableKey

* feat: add node id to TableValue

* fix: some CR comments

* chore: change async fn create_expr_to_request to sync

* fix: add backtrace to errors

* fix: code style

* refactor: merge refactor/catalog-crate

* feat: table key with version

* feat: impl KvBackend for MetaClient

* fix: integrate metaclient

* fix: catalog use local table info as baseline

* fix: sync metsrv

* fix: wip

* fix: update remote catalog on register and deregister

* refactor: CatalogProvider

* refactor: CatalogManager

* fix: catalog key filtering

* fix: pass some test

* refactor: catalog iterating

* fix: CatalogManager::table also requires both catalog_name and schema_name

* chore: merge develop

* chore: merge catalog crate

* fix: adapt to recent meta-client api change

* feat: databode lease

* feat: remote catalog (#356)

* chore: refactor dir for local catalog manager

* refactor: CatalogProvider returns Result

* refactor: SchemaProvider returns Result

* feat: add kv operations to remote catalog

* chore: refactor some code

* feat: impl catalog initialization

* feat: add register table and register system table function

* refactor: add table_info method for Table trait

* chore: add some tests

* chore: add register schema test

* chore: fix build issue after rebase onto develop

* refactor: mock to separate file

* build: failed to compile

* fix: use a container struct to bridge KvBackend and Accessor trait

* feat: upgrade opendal to 0.17

* test: add more tests

* chore: add catalog name and schema name to table info

* chore: add catalog name and schema name to table info

* chore: rebase onto develop

* refactor: common-catalog crate

* chore: refactor dir for local catalog manager

* refactor: CatalogProvider returns Result

* refactor: SchemaProvider returns Result

* feat: add kv operations to remote catalog

* chore: refactor some code

* feat: impl catalog initialization

* feat: add register table and register system table function

* refactor: add table_info method for Table trait

* chore: add some tests

* chore: add register schema test

* chore: fix build issue after rebase onto develop

* refactor: mock to separate file

* build: failed to compile

* fix: use a container struct to bridge KvBackend and Accessor trait

* feat: upgrade opendal to 0.17

* test: add more tests

* chore: add catalog name and schema name to table info

* chore: add catalog name and schema name to table info

* chore: rebase onto develop

* refactor: common-catalog crate

* refactor: remove remote catalog related files

* fix: compilation

* feat: add table version to TableKey

* feat: add node id to TableValue

* fix: some CR comments

* chore: change async fn create_expr_to_request to sync

* fix: add backtrace to errors

* fix: code style

* refactor: merge refactor/catalog-crate

* feat: table key with version

* feat: impl KvBackend for MetaClient

* fix: integrate metaclient

* fix: catalog use local table info as baseline

* fix: sync metsrv

* fix: wip

* fix: update remote catalog on register and deregister

* refactor: CatalogProvider

* refactor: CatalogManager

* fix: catalog key filtering

* fix: pass some test

* refactor: catalog iterating

* fix: CatalogManager::table also requires both catalog_name and schema_name

* chore: merge develop

* chore: merge catalog crate

* fix: adapt to recent meta-client api change

* feat: datanode heartbeat (#355)

* feat: add heartbeat task to instance

* feat: add node_id datanode opts

* fix: use real node id in heartbeat and meta client

* feat: distribute table in frontend

* test: distribute read demo

* test: distribute read demo

* test: distribute read demo

* add write spliter

* fix: node id changed to u64

* feat: datanode uses remote catalog implementation

* dist insert integrate table

* feat: specify region ids on creating table (#359)

* fix: compiling issues

* feat: datanode lease (#354)

* Some glue code about dist_insert

* fix: correctly wrap string value with quotes

* feat: create route

* feat: frontend catalog (#362)

* feat: integrate catalog to frontend

* feat: preserve partition rule on create

* fix: print tables on start

* chore: log in create route

* test: distribute read demo

* feat: support metasrv addr command line options

* feat: optimize DataNodeInstance creation (#368)

* chore: remove unnecessary changes

* chore: revert changes to src/api

* chore: revert changes to src/datanode/src/server.rs

* chore: remove opendal backend

* chore: optimize imports

* chore: revert changes to instance and region ids

* refactor: MetaKvBackend range

* fix: remove some wrap

* refactor: initiation of catalog

* fix: next range request start key

* fix: mock delete range

* refactor: simplify range response handling

Co-authored-by: jiachun <jiachun_fjc@163.com>
Co-authored-by: luofucong <luofucong@greptime.com>
Co-authored-by: fys <1113014250@qq.com>
Co-authored-by: Jiachun Feng <jiachun_feng@proton.me>
2022-11-04 11:43:31 +08:00
Yingwen
cba611b9f5 refactor: Serialize RawSchema/RawTableMeta/RawTableInfo (#382)
* refactor: Serialize Schema/TableMeta/TableInfo to raw structs

* test: Add tests for raw struct conversion

* style: Fix clippy

* refactor: SchemaBuilder::timestamp_index takes Option<usize>

So caller could chain the timestamp_index method call where there is no
timestamp index.

* style(datatypes): Chains SchemaBuilder method calls
2022-11-04 11:25:17 +08:00
zyy17
6aec1b4f90 ci: add workflow of artifacts release (#389)
Signed-off-by: zyy17 <zyylsxm@gmail.com>

Signed-off-by: zyy17 <zyylsxm@gmail.com>
2022-11-04 10:55:41 +08:00
Ruihang Xia
6d1dd5e7af fix: also run CI in develop branch (#387)
* fix: also run CI in develop branch

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add develop branch

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-11-03 18:35:30 +08:00
Jiachun Feng
e19b63f4f5 chore: meta mock test (#379)
* chore: meta mock

* chore: refacor datanode selector

* chore: create route mock test

* chore: add mock module

* chore: memory store for test

* chore: mock meta for test

* chore: ensure memorysotre has the same behavious with etcd

* chore: replace tokio lock to parking_lot
2022-11-03 18:33:29 +08:00
shuiyisong
750310c648 feat: frontend start with instance param (#385)
* chore: fix conflict

* chore: remove unused import
2022-11-03 18:05:01 +08:00
Ruihang Xia
9fd2d4e8db fix: detach grpc tasks to another runtime (#376)
* fix: detach grpc tasks to another runtime

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add runtime size options

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* group an obj-req into one task

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* make nitpicking CRer happy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-11-03 17:24:15 +08:00
元波
77233c20e1 fix: remove unnecessary protocol (#386) 2022-11-03 17:14:08 +08:00
fys
1fad67cf4d feat: grpc client support multi peers (#380)
* feat: grpc client use channel manager

* cr
2022-11-03 11:55:22 +08:00
LFC
5abff7a536 feat: range columns partitioning rule (#374)
* feat: parse partition syntax in "create table"

* feat: partition rule

* fix: rebase develop

* feat: range partitioning rule

* fix: resolve PR comments

* feat: range columns partitioning rule

* refactor: remove unused codes

* fix: resolve PR comments

* fix: resolve PR comments

Co-authored-by: luofucong <luofucong@greptime.com>
2022-11-02 22:36:32 +08:00
Yingwen
6f1f697bfc feat: Implements shutdown for GrpcServer and HttpServer (#372)
* fix: Fix TestGuard being dropped before grpc test starts

* feat: Let start and shutdown takes immutable reference to self

Also implement shutdown for GrpcServer

* feat: Implement shutdown for HttpServer

* style: Fix clippy

* chore: Add name to AlreadyStarted error
2022-11-02 18:10:41 +08:00
Jiachun Feng
2d4a44414d feat: refactor for test (#375)
* chore: add set_header macro & remove some unnessary code

* chore: channel_mannager with connector
2022-11-01 17:34:54 +08:00
LFC
ea2ebc0e87 feat: range partition rule (#304)
* feat: range partitioning rule

Co-authored-by: luofucong <luofucong@greptime.com>
2022-11-01 16:09:23 +08:00
Jiachun Feng
dacfd12b8f feat: router impl (#363)
* feat: heartbeat lease & route api

* feat: batchput&cas

* chore: demo&ut

* chore: by cr

* chore: datanode selector

* chore: rename with_key_range to with_range

* chore: ut
2022-11-01 11:45:05 +08:00
Ning Sun
518b665f1e feat: Improve http sql api and attempt to add openapi docs (#361)
This patch changes output for our http SQL API and prepare it for our SQL editor development. Changes includes:

- includes aide for OAS 3.1 openapi documents, available at /v1/private/api.json
- simplified some of http handlers return type, use string or json directly
- created new HttpRecordsOutput type to hide internals of RecordBatch from end-user. It also tuned data structure to be friendly for application to consume
-  updated response struct to use code for success or detailed error code

Residual issue #366 

* feat: allow http post for our sql http api

* feat: update our http api and attempt to add openapi spec support

* test: correct test against new handler apis

* refactor: rename rows to records

* refactor: removed HttpResponse completely

* feat: add information to our openapi docs

* feat: add docs for sql interface response

* refactor: use struct to represent query so we can doc it via aide

* refactor: use arc wrapped api

* feat: add redoc UI support

* Update src/servers/src/http.rs

Co-authored-by: LFC <bayinamine@gmail.com>

* Update src/servers/src/http.rs

Co-authored-by: LFC <bayinamine@gmail.com>

* fix: address review comments

* test: update integration tests for new api output

* refactor: make prometheus http apis compatible with recent changes

* refactor: get schema from stream

* test: add test for recordbatch to json serialization

* test: add todo for a test to be fixed later

* Revert "test: add todo for a test to be fixed later"

This reverts commit a5a50c7afb.

* fix: Revert "refactor: get schema from stream"

This reverts commit 945b685556.

* chore: add todo for pending issue #366

* chore: remove fixed server url in openapi docs

* feat: include error_code in json response

* refactor: use code over success field in json response

Co-authored-by: LFC <bayinamine@gmail.com>
2022-10-31 16:20:03 +08:00
Ruihang Xia
e2c28fe374 feat: support data type and schema conversion (#351)
* feat: type and schema transformer

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* test schema codec

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* support projection and schema

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy warning

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* project schema

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix typos

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix compile

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* Update src/common/substrait/src/df_logical.rs

Co-authored-by: Yingwen <realevenyag@gmail.com>

* more document about type variations

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
2022-10-31 15:16:13 +08:00
Yingwen
f4e22282a4 feat: Region supports reading data with different schema (#342)
* feat(storage): Implement skeleton of ReadResolver

ReadResolver is used to resolve difference between schemas

* feat(storage): Add user_column_end to ReadResover

* feat(storage): Implement Batch::batch_from_parts

Used to construct Batch from parts according to the schema that user
expects to read.

* feat(storage): Compat memtable schema

* feat(storage): Compat parquet file schema

* fix(storage): ReadResolver supports projection under same schema version

Now ReadResolver takes ProjectedSchemaRef as dest schema, and checks
whether a value column is needed by the schema after projection.

* feat(storage): Check whether columns are same columns

is_source_column_readable() takes ColumnMetadata instead of
ColumnSchema, and compares their column id to check whether they are
same columns.

* refactor(storage): Use row_key_end/user_column_end in source_schema

Rename ReadResolver::is_needed to ReadResolver::is_source_needed, and
remove row_key_end/user_column_end from ReadResolver, since they should
be same as source_schema's

* chore(storage): Remove unused codes

* test(storage): Add tests for the resolver

* feat(storage): Returns error on different source and dest column names

* style(storage): Fix clippy

* refactor: Rename ReadResolver to ReadAdapter

* chore(table): Removed unused comment

* refactor: rename to is_source_column_compatible
2022-10-31 11:42:07 +08:00
dennis zhuang
0604eb7509 feat: prometheus remote write and read (#346)
* feat: scaffold for prometheus protocol handler

* feat: impl remote write and read for prometheus

* chore: make label matchers working in remote reading

* chore: case senstive regexp matching for labers and tweak restful api

* test: prometheus test

* test: adds test for prometheus handler and http server

* fix: typo in comment

* refactor: move snappy_compress and snappy_decompress

* fix: by code review

* fix: collect_timeseries_ids

* fix: timestamp and value column's value may be null
2022-10-28 18:47:16 +08:00
Lei, Huang
81716d622e feat: timestamp column support i64 (#325)
* feat: align_bucket support i64 and timestamp values

* feat: add Int64 to timestamp

* feat: support query i64 timestamp vector

* test: fix failling tests

* refactor: simplify some code

* fix: CR comments and add insert and query test for i64 timestamp column
2022-10-28 18:39:11 +08:00
Ruihang Xia
3e8d9b421c chore: set CI timeout (#358)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-10-28 11:01:12 +08:00
fys
6d4c0ad5a3 feat: add writespliter (#345)
* Add writespliter

* Partition_rule use reference, not Arc
2022-10-27 10:57:34 +08:00
Jiachun Feng
00966cad69 feat: meta refactor (#339)
* feat: heartbeat handler

* chore: heartbeat handlers lock refactor

* chore: store rpc req/res wrapper

* chore: router rpc/res wrapper

* chore: const method(request_header)

* chore: rm unnessary const fn & refactor HeartbeatHandler

* chore: refactor CreateRequest

* chore: HeartbeatAccumulator

* chore: improve router req/res convert

* fix: register race condition
2022-10-26 11:26:40 +08:00
Lei, Huang
932b30d299 refactor: catalog crate (#331)
* chore: refactor dir for local catalog manager

* refactor: CatalogProvider returns Result

* refactor: SchemaProvider returns Result

* feat: add kv operations to remote catalog

* chore: refactor some code

* feat: impl catalog initialization

* feat: add register table and register system table function

* refactor: add table_info method for Table trait

* chore: add some tests

* chore: add register schema test

* chore: fix build issue after rebase onto develop

* refactor: mock to separate file

* build: failed to compile

* fix: use a container struct to bridge KvBackend and Accessor trait

* feat: upgrade opendal to 0.17

* test: add more tests

* chore: add catalog name and schema name to table info

* chore: add catalog name and schema name to table info

* chore: rebase onto develop

* refactor: common-catalog crate

* refactor: remove remote catalog related files

* fix: compilation

* feat: add table version to TableKey

* feat: add node id to TableValue

* fix: some CR comments

* chore: change async fn create_expr_to_request to sync

* fix: add backtrace to errors

* fix: code style

* fix: CatalogManager::table also requires both catalog_name and schema_name

* chore: merge develop
2022-10-26 10:50:39 +08:00
Ruihang Xia
7fe39e9187 feat: support quering with logical plan in gRPC layer (#344)
* impl logical exec & example

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* test on upper api

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add todo to prost dep

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* sign the TODO

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-10-25 16:05:53 +08:00
LFC
2ca667cbdf refactor: make table scan return physical plan (#326)
* refactor: return PhysicalPlan in Table trait's scan method, to support partitioned execution in Frontend's distribute read

* refactor: pub use necessary DataFusion types

* refactor: replace old "PhysicalPlan" and its adapters

Co-authored-by: luofucong <luofucong@greptime.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
2022-10-25 11:34:53 +08:00
Yingwen
64dac51e83 feat: Holds ColumnMetadata in StoreSchema (#333)
* chore: Update StoreSchema comment

* feat: Add metadata to ColumnSchema

* feat: Impl conversion between ColumnMetadata and ColumnSchema

We could use this feature to store the ColumnMetadata as arrow's
Schema, since the ColumnSchema could be further converted to an arrow
schema. Then we could use ColumnMetadata in StoreSchema, which contains
more information, especially the column id.

* feat(storage): Merge schema::Error to metadata::Error

To avoid cyclic dependency of two Errors

* feat(storage): Store ColumnMetadata in StoreSchema

* feat(storage): Use StoreSchemaRef to avoid cloning the whole StoreSchema struct

* test(storage): Fix test_store_schema

* feat(datatypes): Return error on duplicate meta key

* chore: Address CR comments
2022-10-25 11:06:22 +08:00
xiaomin tang
edad6f89b5 docs: Add code_of_conduct adapted from the Contributor Covenant (#340) 2022-10-24 19:04:55 +08:00
Ruihang Xia
8ab43b65ea feat: serialize/deserialize logical and execution plan via substrait (#317)
* fix: change Utf8Array indice type

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* refactor: remove unused sub-crate

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* feat: impl for both Logical and Execution plan

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* refactor: move test-util subcrate into table

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* test: table scan logical plan round trip

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* drop support of physical plan

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix warnings

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* rename trait fns to encode/decode

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* address review comments

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-10-24 15:29:33 +08:00
Lei, Huang
6fc45e31e0 fix: put type rewrite optimizer rule at first (#337) 2022-10-24 15:05:59 +08:00
Yingwen
a457c49d99 refactor: Remove column_null_mask in MutationExtra (#314)
* refactor: Remove column_null_mask in MutationExtra

MutationExtra::column_null_mask is no longer needed as we could ensure
there is no missing column in WriteBatch.

* feat(storage): Remove MutationExtra

Just stores MutationType in the WalHeader, no longer needs MutationExtra
2022-10-24 14:53:35 +08:00
Jiachun Feng
b650656ae3 chore: refactor meta protocol (#332)
* chore: refactor channel_config

* chore: refactor grpc protocol

* feat: heartbeat streams
2022-10-21 20:30:57 +08:00
Ruihang Xia
bc9a2df9bf refactor: move test-util subcrate into table (#334)
* refactor: move test-util subcrate into table

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* chore: clean comment

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* move MockTableEngine into test-util

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-10-21 14:39:40 +08:00
LFC
6b0c5281d4 feat: try from DataFusion's ScalarValue for our Value (#329)
* feat: try from DataFusion's ScalarValue for our Value

* Update src/datatypes/src/value.rs

Co-authored-by: Lei, Huang <6406592+v0y4g3r@users.noreply.github.com>

* fix: resolve CR comments

Co-authored-by: luofucong <luofucong@greptime.com>
Co-authored-by: Lei, Huang <6406592+v0y4g3r@users.noreply.github.com>
2022-10-20 20:22:40 +08:00
fys
fad8f442ef feat: modify proto for distribute insert (#327) 2022-10-20 12:41:15 +08:00
Lei, Huang
2d52f19662 feat: add table info (#323)
* refactor: add table_info method for Table trait

* feat: add table_info method to Table trait

* test: add more unit test

* fix: impl table_info for SystemTable

* test: fix failing test
2022-10-20 12:23:44 +08:00
LFC
d5800d0b60 feat: parse partition syntax in "create table" (#298)
* feat: parse partition syntax in "create table"

* Update src/sql/src/parsers/create_parser.rs

Co-authored-by: luofucong <luofucong@greptime.com>
Co-authored-by: Lei, Huang <6406592+v0y4g3r@users.noreply.github.com>
2022-10-20 10:43:15 +08:00
Ruihang Xia
fbea07ea83 chore: remove unused dependencies (#319)
* chore: remove unused dependences

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: recover some dev-deps

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-10-19 14:08:54 +08:00
Yingwen
87130adf54 docs: Move contributing parts from README to CONTRIBUTING.md (#321) 2022-10-19 14:00:31 +08:00
Yingwen
c147657275 ci: Use docs instead of doc (#322) 2022-10-19 11:56:49 +08:00
Jiachun Feng
d5b34f8917 feat: metasrv (#300)
* meta: meta api&client

* meta: heartbeat server init

* feat: kv store

* chore: grpc server

* chore: meta server bootstrap

* feat: heartbeat client

* feat: route for create table

* chore: a channel pool manager

* feat: route client

* feat: store client

* chore: meta_client example

* chore: change schema

* chore: unit test & by cr

* chore: refactor meta client

* chore: add unit test
2022-10-19 11:02:58 +08:00
Yingwen
4d08ee6fbb fix: Fix broken wal and memtable benchmarks (#320) 2022-10-19 10:54:01 +08:00
dennis zhuang
94b263c261 refactor: datanode instance (#316)
* refactor: datanode Instance

* fix: resolve todo
2022-10-19 10:51:45 +08:00
Yingwen
c6d91edb83 refactor(storage): Split schema mod into multiple sub-mods (#318) 2022-10-18 18:56:52 +08:00
Yingwen
cdf3280fcf feat: Region supports write requests with old schema (#297)
* feat: Adds ColumnDefaultConstraint::create_default_vector

ColumnDefaultConstraint::create_default_vector is ported from
MitoTable::try_get_column_default_constraint_vector.

* refactor: Replace try_get_column_default_constraint_vector by create_default_vector

* style: Remove unnecessary map_err in MitoTable::insert

* feat: Adds compat_write

For column in `dest_schema` but not in `write_batch`, this method would insert a
vector with default value to the `write_batch`. If there are columns not in
`dest_schema`, an error would be returned.

* chore: Add info log to RegionInner::alter

* feat(storage): RegionImpl::write support request with old version

* feat: Add nullable check when creating default value

* feat: Validate nullable and default value

* chore: Modify PutOperation comments

* chore: Make ColumnDescriptor::is_nullable readonly and validate name

* feat: Use CompatWrite trait to replace campat::compat_write method

Adds a CompactWrite trait to support padding columns to WriteBatch:
- The WriteBatch and PutData implements this trait
- Fix the issue that WriteBatch::schema is not updated to the
  schema after compat
- Also validate the created column when adding to PutData

The WriteBatch would also pad default value to missing columns in
PutData, so the memtable inserter don't need to manually check whether
the column is nullable and then insert a NullVector. All WriteBatch is
ensured to have all columns defined by the schema in its PutData.

* feat: Validate constraint by ColumnDefaultConstraint::validate()

The ColumnDefaultConstraint::validate() would also ensure the default
value has the same data type as the column's.

* feat: Use NullVector for null columns

* fix: Fix BinaryType returns wrong logical_type_id

* fix: Fix tests and revert NullVector for null columns

NullVector doesn't support custom logical type make it hard to
encode/decode, which also cause the arrow/protobuf codec of write batch
fail.

* fix: create_default_vector use replicate to create vector with default value

This would fix the test_codec_with_none_column_protobuf test, as we need
to downcast the vector to construct the protobuf values.

* test: add tests for column default constraints

* test: Add tests for CompatWrite trait impl

* test: Test write region with old schema

* fix(storage): Fix replay() applies metadata too early

The committed sequence of the RegionChange action is the sequence of the
last entry that use the old metadata (schema). During replay, we should
apply the new metadata after we see an entry that has sequence greater
than (not equals to) the `RegionChange::committed_sequence`

Also remove duplicate `set_committed_sequence()` call in
persist_manifest_version()

* chore: Removes some unreachable codes

Also add more comments to document codes in these files

* refactor: Refactor MitoTable::insert

Return error if we could not create a default vector for given column,
instead of ignoring the error

* chore: Fix incorrect comments

* chore: Fix typo in error message
2022-10-18 10:47:24 +08:00
Ning Sun
f243649971 refactor: Removed openssl from build requirement (#308)
* refactor:replace another axum-test-helper branch

* refactor: upgrade opendal  version

* refactor: use cursor for file buffer

* refactor:remove native-tls in mysql_async

* refactor: use async block and pipeline for newer opendal api

* chore: update Cargo.lock

* chore: update dependencies

* docs: removed openssl from build requirement

* fix: call close on pipe writer to flush reader for parquet streamer

* refactor: remove redundant return

* chore: use pinned revision for our forked mysql_async

* style: avoid wild-card import in test code

* Apply suggestions from code review

Co-authored-by: Yingwen <realevenyag@gmail.com>

* style: use chained call for builder

Co-authored-by: liangxingjian <965662709@qq.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
2022-10-17 19:29:17 +08:00
evenyag
69ba4581b7 test(servers): Fix OpenTSDB shutdown test occasionally fails (#311)
* test(servers): OpenTSDB shutdown test cover error branch

Create connection continuously to cover some branches of error handling
in OpentsdbServer

* test(servers): Add more tests for opentsdb server

Add a test to ensure we could not connect the server after shutdown and
a test to check existing connection usage after shutdown
2022-10-17 14:00:44 +08:00
evenyag
f942b53ed0 style(table-engine): Remove unnecessary TableError::from (#312)
The usage of TableError::from could be replaced by `?`, which is more
concise
2022-10-17 11:49:21 +08:00
dennis zhuang
25a16875b6 feat: create table and add new columns automatically in gRPC (#310)
* fix: readme

* feat: change Column's datatype in protobuf from optional to required

* feat: supports creating table and adding new columns automatically in gRPC, #279, #283

* fix: test

* refactor: execute_grpc_insert

* refactor: clean code and add test

* fix: test after rebasing develop branch

* test: test grpc server with different ports

* fix: typo

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

* fix: typo

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

* chore: minor changes

* chore: build_alter_table_request

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
2022-10-17 10:34:52 +08:00
dennis zhuang
494a93c4f2 feat: manifest improvements (#303)
* feat: adds commited_sequence to RegionChange action, #281

* refactor: saving protocol action when writer version is changed

* feat: recover all region medata in manifest and replay them when replaying WAL, #282

* refactor: minor change and test recovering metadata after altering table schema

* fix: write wrong min_reader_version into manifest for region

* refactor: move up DataRow

* refactor: by CR comments

* test: assert recovered metadata

* refactor: by CR comments

* fix: comment
2022-10-13 15:43:35 +08:00
Ruihang Xia
b61d5989b7 fix: flaky parquet predicate suits (#307)
* fix: flaky parquet predicate suits

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: change ParquetWriter::write_rows as well

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-10-13 14:00:42 +08:00
evenyag
a8a6426abf fix: Fix replicate_primitive doesn't consider null values (#306) 2022-10-12 16:52:09 +08:00
Ruihang Xia
e99668092c refactor: relax memory ordering of accessing VersionControl::submmitted_sequence (#305)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-10-12 11:52:43 +08:00
Ruihang Xia
0c829a9712 chore: ignore vscode config directory in git (#299)
* chore: ignore vscode config directory in git

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: correct gitignore file

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-10-10 15:08:26 +08:00
fys
752be8dc41 feat: batch grpc insert for influxdb write (#295) 2022-10-09 10:49:27 +08:00
evenyag
2e1ab050a7 feat: Implements RegionWriter::alter (#292)
* fix(storage): Failure of writing manifest version won't abort applying edit

* feat(storage): Adds RegionMetadata::validate_alter to validate AlterRequest

* fix(storage): Protect write and apply region edit by version mutex

The region meta action needs previous manifest version, so we need to
use the version mutex to avoid other thread update the manifest version
during writing the action to the manifest.

* feat(storage): Implement RegionWriter::alter

RegionWriter::alter() would
1. acquire write lock first
2. then validate the alter request
3. build the new metadata by RegionMetadata::alter()
4. acquire the version lock
5. write the metadata to the manifest, which also bump the manifest
   version
6. freeze mutable memtables and apply the new metadata to Version
7. write the manifest version to wal

* test(storage): Add tests for Region::alter()

* test(storage): Add tests for RegionMetadata::validate_alter

* chore(storage): Modify InvalidAlterRequest error msg

* chore: Adjust comment
2022-10-08 20:41:04 +08:00
337 changed files with 27079 additions and 4981 deletions

2
.cargo/config.toml Normal file
View File

@@ -0,0 +1,2 @@
[target.aarch64-unknown-linux-gnu]
linker = "aarch64-linux-gnu-gcc"

View File

@@ -4,7 +4,7 @@
"color": "B60205"
},
"CHECKS": {
"regexp": "^(feat|fix|test|refactor|chore|style|doc|perf|build|ci|revert)(\\(.*\\))?:.*",
"regexp": "^(feat|fix|test|refactor|chore|style|docs|perf|build|ci|revert)(\\(.*\\))?:.*",
"ignoreLabels" : ["ignore-title"]
}
}

View File

@@ -15,9 +15,20 @@ jobs:
grcov:
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- uses: actions/checkout@v2
- name: Cache LLVM and Clang
id: cache-llvm
uses: actions/cache@v3
with:
path: ./llvm
key: llvm
- uses: arduino/setup-protoc@v1
- uses: KyleMayes/install-llvm-action@v1
with:
version: "14.0"
cached: ${{ steps.cache-llvm.outputs.cache-hit }}
- name: Install toolchain
uses: actions-rs/toolchain@v1
with:
@@ -29,7 +40,7 @@ jobs:
- name: Cleanup disk
uses: curoky/cleanup-disk-action@v2.0
with:
retain: 'rust'
retain: 'rust,llvm'
- name: Execute tests
uses: actions-rs/cargo@v1
with:
@@ -38,7 +49,7 @@ jobs:
env:
RUST_BACKTRACE: 1
CARGO_INCREMENTAL: 0
RUSTFLAGS: "-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=unwind -Zpanic_abort_tests"
RUSTFLAGS: "-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=unwind -Zpanic_abort_tests -Clink-arg=-fuse-ld=lld"
GT_S3_BUCKET: ${{ secrets.S3_BUCKET }}
GT_S3_ACCESS_KEY_ID: ${{ secrets.S3_ACCESS_KEY_ID }}
GT_S3_ACCESS_KEY: ${{ secrets.S3_ACCESS_KEY }}

View File

@@ -1,6 +1,18 @@
on:
pull_request:
types: [opened, synchronize, reopened, ready_for_review]
push:
branches:
- develop
- main
paths-ignore:
- 'docs/**'
- 'config/**'
- '.github/**'
- '**.md'
- '**.yml'
- '.dockerignore'
- 'docker/**'
name: Continuous integration for developing
@@ -12,6 +24,7 @@ jobs:
name: Check
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- uses: actions/checkout@v2
- uses: arduino/setup-protoc@v1
@@ -31,9 +44,20 @@ jobs:
name: Test Suite
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- uses: actions/checkout@v2
- name: Cache LLVM and Clang
id: cache-llvm
uses: actions/cache@v3
with:
path: ./llvm
key: llvm
- uses: arduino/setup-protoc@v1
- uses: KyleMayes/install-llvm-action@v1
with:
version: "14.0"
cached: ${{ steps.cache-llvm.outputs.cache-hit }}
- uses: actions-rs/toolchain@v1
with:
profile: minimal
@@ -41,11 +65,16 @@ jobs:
override: true
- name: Rust Cache
uses: Swatinem/rust-cache@v2.0.0
- name: Cleanup disk
uses: curoky/cleanup-disk-action@v2.0
with:
retain: 'rust,llvm'
- uses: actions-rs/cargo@v1
with:
command: test
args: --workspace
env:
CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=lld"
RUST_BACKTRACE: 1
GT_S3_BUCKET: ${{ secrets.S3_BUCKET }}
GT_S3_ACCESS_KEY_ID: ${{ secrets.S3_ACCESS_KEY_ID }}
@@ -56,6 +85,7 @@ jobs:
name: Rustfmt
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- uses: actions/checkout@v2
- uses: arduino/setup-protoc@v1
@@ -76,6 +106,7 @@ jobs:
name: Clippy
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- uses: actions/checkout@v2
- uses: arduino/setup-protoc@v1

View File

@@ -11,6 +11,7 @@ on:
jobs:
check:
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- uses: thehanimo/pr-title-checker@v1.3.4
with:

188
.github/workflows/release.yml vendored Normal file
View File

@@ -0,0 +1,188 @@
on:
push:
tags:
- "v*.*.*"
name: Release
env:
RUST_TOOLCHAIN: nightly-2022-07-14
jobs:
build:
name: Build binary
strategy:
matrix:
# The file format is greptime-<tag>.<os>-<arch>
include:
- arch: x86_64-unknown-linux-gnu
os: ubuntu-latest
file: greptime-${{ github.ref_name }}.linux-amd64
- arch: aarch64-unknown-linux-gnu
os: ubuntu-latest
file: greptime-${{ github.ref_name }}.linux-arm64
- arch: aarch64-apple-darwin
os: macos-latest
file: greptime-${{ github.ref_name }}.darwin-arm64
- arch: x86_64-apple-darwin
os: macos-latest
file: greptime-${{ github.ref_name }}.darwin-amd64
runs-on: ${{ matrix.os }}
steps:
- name: Checkout sources
uses: actions/checkout@v3
- name: Cache cargo assets
id: cache
uses: actions/cache@v3
with:
path: |
./llvm
~/.cargo/bin/
~/.cargo/registry/index/
~/.cargo/registry/cache/
~/.cargo/git/db/
target/
key: ${{ matrix.arch }}-build-cargo-${{ hashFiles('**/Cargo.lock') }}
- name: Install Protoc for linux
if: contains(matrix.arch, 'linux') && endsWith(matrix.arch, '-gnu')
run: | # Make sure the protoc is >= 3.15
wget https://github.com/protocolbuffers/protobuf/releases/download/v21.9/protoc-21.9-linux-x86_64.zip
unzip protoc-21.9-linux-x86_64.zip -d protoc
sudo cp protoc/bin/protoc /usr/local/bin/
sudo cp -r protoc/include/google /usr/local/include/
- uses: KyleMayes/install-llvm-action@v1
with:
version: "14.0"
cached: ${{ steps.cache.outputs.cache-hit }}
- name: Install Protoc for macos
if: contains(matrix.arch, 'darwin')
run: |
brew install protobuf
- name: Install dependencies for linux
if: contains(matrix.arch, 'linux') && endsWith(matrix.arch, '-gnu')
run: |
sudo apt-get -y update
sudo apt-get -y install libssl-dev pkg-config g++-aarch64-linux-gnu gcc-aarch64-linux-gnu
- name: Install stable toolchain
uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: ${{ env.RUST_TOOLCHAIN }}
override: true
target: ${{ matrix.arch }}
- name: Output package versions
run: protoc --version ; cargo version ; rustc --version ; gcc --version ; g++ --version
- name: Run cargo build
uses: actions-rs/cargo@v1
with:
command: build
args: ${{ matrix.opts }} --release --locked --target ${{ matrix.arch }}
env:
CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=lld"
- name: Calculate checksum and rename binary
shell: bash
run: |
cd target/${{ matrix.arch }}/release
cp greptime ${{ matrix.file }}
echo $(shasum -a 256 greptime | cut -f1 -d' ') > ${{ matrix.file }}.sha256sum
- name: Upload artifacts
uses: actions/upload-artifact@v3
with:
name: ${{ matrix.file }}
path: target/${{ matrix.arch }}/release/${{ matrix.file }}
- name: Upload checksum of artifacts
uses: actions/upload-artifact@v3
with:
name: ${{ matrix.file }}.sha256sum
path: target/${{ matrix.arch }}/release/${{ matrix.file }}.sha256sum
release:
name: Release artifacts
needs: [build]
runs-on: ubuntu-latest
steps:
- name: Checkout sources
uses: actions/checkout@v3
- name: Download artifacts
uses: actions/download-artifact@v3
- name: Publish release
uses: softprops/action-gh-release@v1
with:
name: "Release ${{ github.ref_name }}"
files: |
**/greptime-${{ github.ref_name }}.*
docker:
name: Build docker image
needs: [build]
runs-on: ubuntu-latest
steps:
- name: Checkout sources
uses: actions/checkout@v2
- name: Download amd64 binary
uses: actions/download-artifact@v3
with:
name: greptime-${{ github.ref_name }}.linux-amd64
path: amd64
- name: Rename amd64 binary
run: |
mv amd64/greptime-${{ github.ref_name }}.linux-amd64 amd64/greptime
- name: Download arm64 binary
uses: actions/download-artifact@v3
with:
name: greptime-${{ github.ref_name }}.linux-arm64
path: arm64
- name: Rename arm64 binary
run: |
mv arm64/greptime-${{ github.ref_name }}.linux-arm64 arm64/greptime
- name: Set file permissions
shell: bash
run: |
chmod +x amd64/greptime arm64/greptime
- name: Login to GitHub Container Registry
uses: docker/login-action@v2
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Login to Dockerhub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
- name: Set up buildx
uses: docker/setup-buildx-action@v2
- name: Build and push
uses: docker/build-push-action@v3
with:
context: .
file: ./docker/ci/Dockerfile
push: true
platforms: linux/amd64,linux/arm64
tags: |
ghcr.io/greptimeteam/greptimedb:${{ github.ref_name }}
greptime/greptimedb:${{ github.ref_name }}

9
.gitignore vendored
View File

@@ -19,12 +19,15 @@ debug/
# JetBrains IDE config directory
.idea/
# VSCode IDE config directory
.vscode/
# Logs
**/__unittest_logs
logs/
.DS_store
.gitignore
# cpython's generated python byte code
**/__pycache__/
# Benchmark dataset
benchmarks/data

View File

@@ -9,7 +9,7 @@ repos:
rev: e6a795bc6b2c0958f9ef52af4863bbd7cc17238f
hooks:
- id: cargo-sort
args: ["--workspace"]
args: ["--workspace", "--print"]
- repo: https://github.com/doublify/pre-commit-rust
rev: v1.0

132
CODE_OF_CONDUCT.md Normal file
View File

@@ -0,0 +1,132 @@
# Contributor Covenant Code of Conduct
## Our Pledge
We as members, contributors, and leaders pledge to make participation in our
community a harassment-free experience for everyone, regardless of age, body
size, visible or invisible disability, ethnicity, sex characteristics, gender
identity and expression, level of experience, education, socio-economic status,
nationality, personal appearance, race, caste, color, religion, or sexual
identity and orientation.
We pledge to act and interact in ways that contribute to an open, welcoming,
diverse, inclusive, and healthy community.
## Our Standards
Examples of behavior that contributes to a positive environment for our
community include:
* Demonstrating empathy and kindness toward other people
* Being respectful of differing opinions, viewpoints, and experiences
* Giving and gracefully accepting constructive feedback
* Accepting responsibility and apologizing to those affected by our mistakes,
and learning from the experience
* Focusing on what is best not just for us as individuals, but for the overall
community
Examples of unacceptable behavior include:
* The use of sexualized language or imagery, and sexual attention or advances of
any kind
* Trolling, insulting or derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or email address,
without their explicit permission
* Other conduct which could reasonably be considered inappropriate in a
professional setting
## Enforcement Responsibilities
Community leaders are responsible for clarifying and enforcing our standards of
acceptable behavior and will take appropriate and fair corrective action in
response to any behavior that they deem inappropriate, threatening, offensive,
or harmful.
Community leaders have the right and responsibility to remove, edit, or reject
comments, commits, code, wiki edits, issues, and other contributions that are
not aligned to this Code of Conduct, and will communicate reasons for moderation
decisions when appropriate.
## Scope
This Code of Conduct applies within all community spaces, and also applies when
an individual is officially representing the community in public spaces.
Examples of representing our community include using an official e-mail address,
posting via an official social media account, or acting as an appointed
representative at an online or offline event.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported to the community leaders responsible for enforcement at
info@greptime.com.
All complaints will be reviewed and investigated promptly and fairly.
All community leaders are obligated to respect the privacy and security of the
reporter of any incident.
## Enforcement Guidelines
Community leaders will follow these Community Impact Guidelines in determining
the consequences for any action they deem in violation of this Code of Conduct:
### 1. Correction
**Community Impact**: Use of inappropriate language or other behavior deemed
unprofessional or unwelcome in the community.
**Consequence**: A private, written warning from community leaders, providing
clarity around the nature of the violation and an explanation of why the
behavior was inappropriate. A public apology may be requested.
### 2. Warning
**Community Impact**: A violation through a single incident or series of
actions.
**Consequence**: A warning with consequences for continued behavior. No
interaction with the people involved, including unsolicited interaction with
those enforcing the Code of Conduct, for a specified period of time. This
includes avoiding interactions in community spaces as well as external channels
like social media. Violating these terms may lead to a temporary or permanent
ban.
### 3. Temporary Ban
**Community Impact**: A serious violation of community standards, including
sustained inappropriate behavior.
**Consequence**: A temporary ban from any sort of interaction or public
communication with the community for a specified period of time. No public or
private interaction with the people involved, including unsolicited interaction
with those enforcing the Code of Conduct, is allowed during this period.
Violating these terms may lead to a permanent ban.
### 4. Permanent Ban
**Community Impact**: Demonstrating a pattern of violation of community
standards, including sustained inappropriate behavior, harassment of an
individual, or aggression toward or disparagement of classes of individuals.
**Consequence**: A permanent ban from any sort of public interaction within the
community.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
version 2.1, available at
[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
Community Impact Guidelines were inspired by
[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
For answers to common questions about this code of conduct, see the FAQ at
[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
[https://www.contributor-covenant.org/translations][translations].
[homepage]: https://www.contributor-covenant.org
[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
[Mozilla CoC]: https://github.com/mozilla/diversity
[FAQ]: https://www.contributor-covenant.org/faq
[translations]: https://www.contributor-covenant.org/translations

View File

@@ -10,6 +10,34 @@ To learn about the design of GreptimeDB, please refer to the [design docs](https
- Make sure all unit tests are passed.
- Make sure all clippy warnings are fixed (you can check it locally by running `cargo clippy --workspace --all-targets -- -D warnings -D clippy::print_stdout -D clippy::print_stderr`).
#### `pre-commit` Hooks
You could setup the [`pre-commit`](https://pre-commit.com/#plugins) hooks to run these checks on every commit automatically.
1. Install `pre-commit`
```
$ pip install pre-commit
```
or
```
$ brew install pre-commit
```
2. Install the `pre-commit` hooks
```
$ pre-commit install
pre-commit installed at .git/hooks/pre-commit
$ pre-commit install --hook-type commit-msg
pre-commit installed at .git/hooks/commit-msg
$ pre-commit install --hook-type pre-push
pre-commit installed at .git/hooks/pre-pus
```
now `pre-commit` will run automatically on `git commit`.
### Title
The titles of pull requests should be prefixed with category name listed in [Conventional Commits specification](https://www.conventionalcommits.org/en/v1.0.0)
@@ -32,4 +60,4 @@ of what you were trying to do and what went wrong. You can also reach for help i
## Bug report
To report a bug or a security issue, you can [open a new GitHub issue](https://github.com/GrepTimeTeam/greptimedb/issues/new).
To report a bug or a security issue, you can [open a new GitHub issue](https://github.com/GrepTimeTeam/greptimedb/issues/new).

1314
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,10 +1,12 @@
[workspace]
members = [
"benchmarks",
"src/api",
"src/catalog",
"src/client",
"src/cmd",
"src/common/base",
"src/common/catalog",
"src/common/error",
"src/common/function",
"src/common/function-macro",
@@ -12,13 +14,15 @@ members = [
"src/common/query",
"src/common/recordbatch",
"src/common/runtime",
"src/common/substrait",
"src/common/telemetry",
"src/common/time",
"src/datanode",
"src/datatypes",
"src/frontend",
"src/log-store",
"src/logical-plans",
"src/meta-client",
"src/meta-srv",
"src/object-store",
"src/query",
"src/script",
@@ -28,5 +32,7 @@ members = [
"src/store-api",
"src/table",
"src/table-engine",
"test-util",
]
[profile.release]
debug = true

View File

@@ -11,7 +11,6 @@ GreptimeDB: the next-generation hybrid timeseries/analytics processing database
To compile GreptimeDB from source, you'll need the following:
- Rust
- Protobuf
- OpenSSL
#### Rust
@@ -23,23 +22,6 @@ The easiest way to install Rust is to use [`rustup`](https://rustup.rs/), which
major package manager on macos and linux distributions. You can find an
installation instructions [here](https://grpc.io/docs/protoc-installation/).
#### OpenSSL
For Ubuntu:
```bash
sudo apt install libssl-dev
```
For RedHat-based: Fedora, Oracle Linux, etc:
```bash
sudo dnf install openssl-devel
```
For macOS:
```bash
brew install openssl
```
### Build the Docker Image
```
@@ -125,7 +107,7 @@ cargo run -- --log-dir=logs --log-level=debug frontend start -c ./config/fronten
cpu DOUBLE DEFAULT 0,
memory DOUBLE,
TIME INDEX (ts),
PRIMARY KEY(ts,host)) ENGINE=mito WITH(regions=1);
PRIMARY KEY(host)) ENGINE=mito WITH(regions=1);
```
3. Insert data:
@@ -151,33 +133,6 @@ cargo run -- --log-dir=logs --log-level=debug frontend start -c ./config/fronten
```
You can delete your data by removing `/tmp/greptimedb`.
## Contribute
## Contributing
1. [Install rust](https://www.rust-lang.org/tools/install)
2. [Install `pre-commit`](https://pre-commit.com/#plugins) for run hooks on every commit automatically such as `cargo fmt` etc.
```
$ pip install pre-commit
or
$ brew install pre-commit
$
```
3. Install the git hook scripts:
```
$ pre-commit install
pre-commit installed at .git/hooks/pre-commit
$ pre-commit install --hook-type commit-msg
pre-commit installed at .git/hooks/commit-msg
$ pre-commit install --hook-type pre-push
pre-commit installed at .git/hooks/pre-pus
```
now `pre-commit` will run automatically on `git commit`.
4. Check out branch from `develop` and make your contribution. Follow the [style guide](https://github.com/GreptimeTeam/docs/blob/main/style-guide/zh.md). Create a PR when you are ready, feel free and have fun!
Please refer to [contribution guidelines](CONTRIBUTING.md) for more information.

14
benchmarks/Cargo.toml Normal file
View File

@@ -0,0 +1,14 @@
[package]
name = "benchmarks"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
arrow = "10"
clap = { version = "4.0", features = ["derive"] }
client = { path = "../src/client" }
indicatif = "0.17.1"
itertools = "0.10.5"
parquet = { version = "*" }
tokio = { version = "1.21", features = ["full"] }

View File

@@ -0,0 +1,439 @@
//! Use the taxi trip records from New York City dataset to bench. You can download the dataset from
//! [here](https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page).
#![feature(once_cell)]
#![allow(clippy::print_stdout)]
use std::{
collections::HashMap,
path::{Path, PathBuf},
sync::Arc,
time::Instant,
};
use arrow::{
array::{ArrayRef, PrimitiveArray, StringArray, TimestampNanosecondArray},
datatypes::{DataType, Float64Type, Int64Type},
record_batch::RecordBatch,
};
use clap::Parser;
use client::{
admin::Admin,
api::v1::{
codec::InsertBatch, column::Values, insert_expr, Column, ColumnDataType, ColumnDef,
CreateExpr, InsertExpr,
},
Client, Database, Select,
};
use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
use parquet::{
arrow::{ArrowReader, ParquetFileArrowReader},
file::{reader::FileReader, serialized_reader::SerializedFileReader},
};
use tokio::task::JoinSet;
const DATABASE_NAME: &str = "greptime";
const CATALOG_NAME: &str = "greptime";
const SCHEMA_NAME: &str = "public";
const TABLE_NAME: &str = "nyc_taxi";
#[derive(Parser)]
#[command(name = "NYC benchmark runner")]
struct Args {
/// Path to the dataset
#[arg(short, long)]
path: Option<String>,
/// Batch size of insert request.
#[arg(short = 's', long = "batch-size", default_value_t = 4096)]
batch_size: usize,
/// Number of client threads on write (parallel on file level)
#[arg(short = 't', long = "thread-num", default_value_t = 4)]
thread_num: usize,
/// Number of query iteration
#[arg(short = 'i', long = "iter-num", default_value_t = 3)]
iter_num: usize,
#[arg(long = "skip-write")]
skip_write: bool,
#[arg(long = "skip-read")]
skip_read: bool,
#[arg(short, long, default_value_t = String::from("127.0.0.1:3001"))]
endpoint: String,
}
fn get_file_list<P: AsRef<Path>>(path: P) -> Vec<PathBuf> {
std::fs::read_dir(path)
.unwrap()
.map(|dir| dir.unwrap().path().canonicalize().unwrap())
.collect()
}
async fn write_data(
batch_size: usize,
db: &Database,
path: PathBuf,
mpb: MultiProgress,
pb_style: ProgressStyle,
) -> u128 {
let file = std::fs::File::open(&path).unwrap();
let file_reader = Arc::new(SerializedFileReader::new(file).unwrap());
let row_num = file_reader.metadata().file_metadata().num_rows();
let record_batch_reader = ParquetFileArrowReader::new(file_reader)
.get_record_reader(batch_size)
.unwrap();
let progress_bar = mpb.add(ProgressBar::new(row_num as _));
progress_bar.set_style(pb_style);
progress_bar.set_message(format!("{:?}", path));
let mut total_rpc_elapsed_ms = 0;
for record_batch in record_batch_reader {
let record_batch = record_batch.unwrap();
let row_count = record_batch.num_rows();
let insert_batch = convert_record_batch(record_batch).into();
let insert_expr = InsertExpr {
table_name: TABLE_NAME.to_string(),
expr: Some(insert_expr::Expr::Values(insert_expr::Values {
values: vec![insert_batch],
})),
options: HashMap::default(),
};
let now = Instant::now();
db.insert(insert_expr).await.unwrap();
let elapsed = now.elapsed();
total_rpc_elapsed_ms += elapsed.as_millis();
progress_bar.inc(row_count as _);
}
progress_bar.finish_with_message(format!(
"file {:?} done in {}ms",
path, total_rpc_elapsed_ms
));
total_rpc_elapsed_ms
}
fn convert_record_batch(record_batch: RecordBatch) -> InsertBatch {
let schema = record_batch.schema();
let fields = schema.fields();
let row_count = record_batch.num_rows();
let mut columns = vec![];
for (array, field) in record_batch.columns().iter().zip(fields.iter()) {
let values = build_values(array);
let column = Column {
column_name: field.name().to_owned(),
values: Some(values),
null_mask: vec![],
// datatype and semantic_type are set to default
..Default::default()
};
columns.push(column);
}
InsertBatch {
columns,
row_count: row_count as _,
}
}
fn build_values(column: &ArrayRef) -> Values {
match column.data_type() {
DataType::Int64 => {
let array = column
.as_any()
.downcast_ref::<PrimitiveArray<Int64Type>>()
.unwrap();
let values = array.values();
Values {
i64_values: values.to_vec(),
..Default::default()
}
}
DataType::Float64 => {
let array = column
.as_any()
.downcast_ref::<PrimitiveArray<Float64Type>>()
.unwrap();
let values = array.values();
Values {
f64_values: values.to_vec(),
..Default::default()
}
}
DataType::Timestamp(_, _) => {
let array = column
.as_any()
.downcast_ref::<TimestampNanosecondArray>()
.unwrap();
let values = array.values();
Values {
i64_values: values.to_vec(),
..Default::default()
}
}
DataType::Utf8 => {
let array = column.as_any().downcast_ref::<StringArray>().unwrap();
let values = array.iter().filter_map(|s| s.map(String::from)).collect();
Values {
string_values: values,
..Default::default()
}
}
DataType::Null
| DataType::Boolean
| DataType::Int8
| DataType::Int16
| DataType::Int32
| DataType::UInt8
| DataType::UInt16
| DataType::UInt32
| DataType::UInt64
| DataType::Float16
| DataType::Float32
| DataType::Date32
| DataType::Date64
| DataType::Time32(_)
| DataType::Time64(_)
| DataType::Duration(_)
| DataType::Interval(_)
| DataType::Binary
| DataType::FixedSizeBinary(_)
| DataType::LargeBinary
| DataType::LargeUtf8
| DataType::List(_)
| DataType::FixedSizeList(_, _)
| DataType::LargeList(_)
| DataType::Struct(_)
| DataType::Union(_, _)
| DataType::Dictionary(_, _)
| DataType::Decimal(_, _)
| DataType::Map(_, _) => todo!(),
}
}
fn create_table_expr() -> CreateExpr {
CreateExpr {
catalog_name: Some(CATALOG_NAME.to_string()),
schema_name: Some(SCHEMA_NAME.to_string()),
table_name: TABLE_NAME.to_string(),
desc: None,
column_defs: vec![
ColumnDef {
name: "VendorID".to_string(),
datatype: ColumnDataType::Int64 as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "tpep_pickup_datetime".to_string(),
datatype: ColumnDataType::Int64 as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "tpep_dropoff_datetime".to_string(),
datatype: ColumnDataType::Int64 as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "passenger_count".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "trip_distance".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "RatecodeID".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "store_and_fwd_flag".to_string(),
datatype: ColumnDataType::String as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "PULocationID".to_string(),
datatype: ColumnDataType::Int64 as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "DOLocationID".to_string(),
datatype: ColumnDataType::Int64 as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "payment_type".to_string(),
datatype: ColumnDataType::Int64 as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "fare_amount".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "extra".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "mta_tax".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "tip_amount".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "tolls_amount".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "improvement_surcharge".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "total_amount".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "congestion_surcharge".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
},
ColumnDef {
name: "airport_fee".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
},
],
time_index: "tpep_pickup_datetime".to_string(),
primary_keys: vec!["VendorID".to_string()],
create_if_not_exists: false,
table_options: Default::default(),
}
}
fn query_set() -> HashMap<String, String> {
let mut ret = HashMap::new();
ret.insert(
"count_all".to_string(),
format!("SELECT COUNT(*) FROM {};", TABLE_NAME),
);
ret.insert(
"fare_amt_by_passenger".to_string(),
format!("SELECT passenger_count, MIN(fare_amount), MAX(fare_amount), SUM(fare_amount) FROM {} GROUP BY passenger_count",TABLE_NAME)
);
ret
}
async fn do_write(args: &Args, client: &Client) {
let admin = Admin::new("admin", client.clone());
let mut file_list = get_file_list(args.path.clone().expect("Specify data path in argument"));
let mut write_jobs = JoinSet::new();
let create_table_result = admin.create(create_table_expr()).await;
println!("Create table result: {:?}", create_table_result);
let progress_bar_style = ProgressStyle::with_template(
"[{elapsed_precise}] {bar:60.cyan/blue} {pos:>7}/{len:7} {msg}",
)
.unwrap()
.progress_chars("##-");
let multi_progress_bar = MultiProgress::new();
let file_progress = multi_progress_bar.add(ProgressBar::new(file_list.len() as _));
file_progress.inc(0);
let batch_size = args.batch_size;
for _ in 0..args.thread_num {
if let Some(path) = file_list.pop() {
let db = Database::new(DATABASE_NAME, client.clone());
let mpb = multi_progress_bar.clone();
let pb_style = progress_bar_style.clone();
write_jobs.spawn(async move { write_data(batch_size, &db, path, mpb, pb_style).await });
}
}
while write_jobs.join_next().await.is_some() {
file_progress.inc(1);
if let Some(path) = file_list.pop() {
let db = Database::new(DATABASE_NAME, client.clone());
let mpb = multi_progress_bar.clone();
let pb_style = progress_bar_style.clone();
write_jobs.spawn(async move { write_data(batch_size, &db, path, mpb, pb_style).await });
}
}
}
async fn do_query(num_iter: usize, db: &Database) {
for (query_name, query) in query_set() {
println!("Running query: {}", query);
for i in 0..num_iter {
let now = Instant::now();
let _res = db.select(Select::Sql(query.clone())).await.unwrap();
let elapsed = now.elapsed();
println!(
"query {}, iteration {}: {}ms",
query_name,
i,
elapsed.as_millis()
);
}
}
}
fn main() {
let args = Args::parse();
tokio::runtime::Builder::new_multi_thread()
.worker_threads(args.thread_num)
.enable_all()
.build()
.unwrap()
.block_on(async {
let client = Client::with_urls(vec![&args.endpoint]);
if !args.skip_write {
do_write(&args, &client).await;
}
if !args.skip_read {
let db = Database::new(DATABASE_NAME, client.clone());
do_query(args.iter_num, &db).await;
}
})
}

View File

@@ -1,7 +1,9 @@
node_id = 42
http_addr = '0.0.0.0:3000'
rpc_addr = '0.0.0.0:3001'
wal_dir = '/tmp/greptimedb/wal'
rpc_runtime_size = 8
mode = "standalone"
mysql_addr = '0.0.0.0:3306'
mysql_runtime_size = 4
@@ -12,3 +14,9 @@ postgres_runtime_size = 4
[storage]
type = 'File'
data_dir = '/tmp/greptimedb/data/'
[meta_client_opts]
metasrv_addr = "1.1.1.1:3002"
timeout_millis = 3000
connect_timeout_millis = 5000
tcp_nodelay = true

View File

@@ -0,0 +1,4 @@
bind_addr = '127.0.0.1:3002'
server_addr = '0.0.0.0:3002'
store_addr = '127.0.0.1:2380'
datanode_lease_secs = 30

View File

@@ -24,9 +24,8 @@ RUN cargo build --release
# TODO(zyy17): Maybe should use the more secure container image.
FROM ubuntu:22.04 as base
WORKDIR /greptimedb
COPY --from=builder /greptimedb/target/release/greptime /greptimedb/bin/
ENV PATH /greptimedb/bin/:$PATH
WORKDIR /greptime
COPY --from=builder /greptimedb/target/release/greptime /greptime/bin/
ENV PATH /greptime/bin/:$PATH
ENTRYPOINT [ "greptime" ]
CMD [ "datanode", "start"]
ENTRYPOINT ["greptime"]

9
docker/ci/Dockerfile Normal file
View File

@@ -0,0 +1,9 @@
FROM ubuntu:22.04
ARG TARGETARCH
ADD $TARGETARCH/greptime /greptime/bin/
ENV PATH /greptime/bin/:$PATH
ENTRYPOINT ["greptime"]

View File

@@ -5,6 +5,8 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
common-base = { path = "../common/base" }
common-time = { path = "../common/time" }
datatypes = { path = "../datatypes" }
prost = "0.11"
snafu = { version = "0.7", features = ["backtraces"] }

View File

@@ -6,6 +6,11 @@ fn main() {
"greptime/v1/select.proto",
"greptime/v1/physical_plan.proto",
"greptime/v1/greptime.proto",
"greptime/v1/meta/common.proto",
"greptime/v1/meta/heartbeat.proto",
"greptime/v1/meta/route.proto",
"greptime/v1/meta/store.proto",
"prometheus/remote/remote.proto",
],
&["."],
)

View File

@@ -49,7 +49,7 @@ message Column {
bytes null_mask = 4;
// Helpful in creating vector from column.
optional ColumnDataType datatype = 5;
ColumnDataType datatype = 5;
}
message ColumnDef {

View File

@@ -2,6 +2,10 @@ syntax = "proto3";
package greptime.v1;
message RequestHeader {
string tenant = 1;
}
message ExprHeader {
uint32 version = 1;
}

View File

@@ -27,6 +27,7 @@ message ObjectExpr {
message SelectExpr {
oneof expr {
string sql = 1;
bytes logical_plan = 2;
PhysicalPlan physical_plan = 15;
}
}
@@ -55,6 +56,8 @@ message InsertExpr {
// The "sql" field is meant to be removed in the future.
string sql = 3;
}
map<string, bytes> options = 4;
}
// TODO(jiachun)

View File

@@ -3,6 +3,7 @@ syntax = "proto3";
package greptime.v1;
import "greptime/v1/admin.proto";
import "greptime/v1/common.proto";
import "greptime/v1/database.proto";
service Greptime {
@@ -10,8 +11,9 @@ service Greptime {
}
message BatchRequest {
repeated AdminRequest admins = 1;
repeated DatabaseRequest databases = 2;
RequestHeader header = 1;
repeated AdminRequest admins = 2;
repeated DatabaseRequest databases = 3;
}
message BatchResponse {

View File

@@ -8,3 +8,7 @@ message InsertBatch {
repeated Column columns = 1;
uint32 row_count = 2;
}
message RegionId {
uint64 id = 1;
}

View File

@@ -0,0 +1,48 @@
syntax = "proto3";
package greptime.v1.meta;
message RequestHeader {
uint64 protocol_version = 1;
// cluster_id is the ID of the cluster which be sent to.
uint64 cluster_id = 2;
// member_id is the ID of the sender server.
uint64 member_id = 3;
}
message ResponseHeader {
uint64 protocol_version = 1;
// cluster_id is the ID of the cluster which sent the response.
uint64 cluster_id = 2;
Error error = 3;
}
message Error {
int32 code = 1;
string err_msg = 2;
}
message Peer {
uint64 id = 1;
string addr = 2;
}
message TableName {
string catalog_name = 1;
string schema_name = 2;
string table_name = 3;
}
message TimeInterval {
// The unix timestamp in millis of the start of this period.
uint64 start_timestamp_millis = 1;
// The unix timestamp in millis of the end of this period.
uint64 end_timestamp_millis = 2;
}
message KeyValue {
// key is the key in bytes. An empty key is not allowed.
bytes key = 1;
// value is the value held by the key, in bytes.
bytes value = 2;
}

View File

@@ -0,0 +1,92 @@
syntax = "proto3";
package greptime.v1.meta;
import "greptime/v1/meta/common.proto";
service Heartbeat {
// Heartbeat, there may be many contents of the heartbeat, such as:
// 1. Metadata to be registered to meta server and discoverable by other nodes.
// 2. Some performance metrics, such as Load, CPU usage, etc.
// 3. The number of computing tasks being executed.
rpc Heartbeat(stream HeartbeatRequest) returns (stream HeartbeatResponse) {}
// Ask leader's endpoint.
rpc AskLeader(AskLeaderRequest) returns (AskLeaderResponse) {}
}
message HeartbeatRequest {
RequestHeader header = 1;
// Self peer
Peer peer = 2;
// Leader node
bool is_leader = 3;
// Actually reported time interval
TimeInterval report_interval = 4;
// Node stat
NodeStat node_stat = 5;
// Region stats in this node
repeated RegionStat region_stats = 6;
// Follower nodes and stats, empty on follower nodes
repeated ReplicaStat replica_stats = 7;
}
message NodeStat {
// The read capacity units during this period
uint64 rcus = 1;
// The write capacity units during this period
uint64 wcus = 2;
// Table number in this node
uint64 table_num = 3;
// Regon number in this node
uint64 region_num = 4;
double cpu_usage = 5;
double load = 6;
// Read disk I/O in the node
double read_io_rate = 7;
// Write disk I/O in the node
double write_io_rate = 8;
// Others
map<string, string> attrs = 100;
}
message RegionStat {
uint64 region_id = 1;
TableName table_name = 2;
// The read capacity units during this period
uint64 rcus = 3;
// The write capacity units during this period
uint64 wcus = 4;
// Approximate region size
uint64 approximate_size = 5;
// Approximate number of rows
uint64 approximate_rows = 6;
// Others
map<string, string> attrs = 100;
}
message ReplicaStat {
Peer peer = 1;
bool in_sync = 2;
bool is_learner = 3;
}
message HeartbeatResponse {
ResponseHeader header = 1;
repeated bytes payload = 2;
}
message AskLeaderRequest {
RequestHeader header = 1;
}
message AskLeaderResponse {
ResponseHeader header = 1;
Peer leader = 2;
}

View File

@@ -0,0 +1,82 @@
syntax = "proto3";
package greptime.v1.meta;
import "greptime/v1/meta/common.proto";
service Router {
// Fetch routing information for tables. The smallest unit is the complete
// routing information(all regions) of a table.
//
// ```text
// table_1
// table_name
// table_schema
// regions
// region_1
// leader_peer
// follower_peer_1, follower_peer_2
// region_2
// leader_peer
// follower_peer_1, follower_peer_2, follower_peer_3
// region_xxx
// table_2
// ...
// ```
//
rpc Route(RouteRequest) returns (RouteResponse) {}
rpc Create(CreateRequest) returns (RouteResponse) {}
}
message RouteRequest {
RequestHeader header = 1;
repeated TableName table_names = 2;
}
message RouteResponse {
ResponseHeader header = 1;
repeated Peer peers = 2;
repeated TableRoute table_routes = 3;
}
message CreateRequest {
RequestHeader header = 1;
TableName table_name = 2;
repeated Partition partitions = 3;
}
message TableRoute {
Table table = 1;
repeated RegionRoute region_routes = 2;
}
message RegionRoute {
Region region = 1;
// single leader node for write task
uint64 leader_peer_index = 2;
// multiple follower nodes for read task
repeated uint64 follower_peer_indexes = 3;
}
message Table {
TableName table_name = 1;
bytes table_schema = 2;
}
message Region {
uint64 id = 1;
string name = 2;
Partition partition = 3;
map<string, string> attrs = 100;
}
// PARTITION `region_name` VALUES LESS THAN (value_list)
message Partition {
repeated bytes column_list = 1;
repeated bytes value_list = 2;
}

View File

@@ -0,0 +1,138 @@
syntax = "proto3";
package greptime.v1.meta;
import "greptime/v1/meta/common.proto";
service Store {
// Range gets the keys in the range from the key-value store.
rpc Range(RangeRequest) returns (RangeResponse);
// Put puts the given key into the key-value store.
rpc Put(PutRequest) returns (PutResponse);
// BatchPut atomically puts the given keys into the key-value store.
rpc BatchPut(BatchPutRequest) returns (BatchPutResponse);
// CompareAndPut atomically puts the value to the given updated
// value if the current value == the expected value.
rpc CompareAndPut(CompareAndPutRequest) returns (CompareAndPutResponse);
// DeleteRange deletes the given range from the key-value store.
rpc DeleteRange(DeleteRangeRequest) returns (DeleteRangeResponse);
}
message RangeRequest {
RequestHeader header = 1;
// key is the first key for the range, If range_end is not given, the
// request only looks up key.
bytes key = 2;
// range_end is the upper bound on the requested range [key, range_end).
// If range_end is '\0', the range is all keys >= key.
// If range_end is key plus one (e.g., "aa"+1 == "ab", "a\xff"+1 == "b"),
// then the range request gets all keys prefixed with key.
// If both key and range_end are '\0', then the range request returns all
// keys.
bytes range_end = 3;
// limit is a limit on the number of keys returned for the request. When
// limit is set to 0, it is treated as no limit.
int64 limit = 4;
// keys_only when set returns only the keys and not the values.
bool keys_only = 5;
}
message RangeResponse {
ResponseHeader header = 1;
// kvs is the list of key-value pairs matched by the range request.
repeated KeyValue kvs = 2;
// more indicates if there are more keys to return in the requested range.
bool more = 3;
}
message PutRequest {
RequestHeader header = 1;
// key is the key, in bytes, to put into the key-value store.
bytes key = 2;
// value is the value, in bytes, to associate with the key in the
// key-value store.
bytes value = 3;
// If prev_kv is set, gets the previous key-value pair before changing it.
// The previous key-value pair will be returned in the put response.
bool prev_kv = 4;
}
message PutResponse {
ResponseHeader header = 1;
// If prev_kv is set in the request, the previous key-value pair will be
// returned.
KeyValue prev_kv = 2;
}
message BatchPutRequest {
RequestHeader header = 1;
repeated KeyValue kvs = 2;
// If prev_kv is set, gets the previous key-value pairs before changing it.
// The previous key-value pairs will be returned in the batch put response.
bool prev_kv = 3;
}
message BatchPutResponse {
ResponseHeader header = 1;
// If prev_kv is set in the request, the previous key-value pairs will be
// returned.
repeated KeyValue prev_kvs = 2;
}
message CompareAndPutRequest {
RequestHeader header = 1;
// key is the key, in bytes, to put into the key-value store.
bytes key = 2;
// expect is the previous value, in bytes
bytes expect = 3;
// value is the value, in bytes, to associate with the key in the
// key-value store.
bytes value = 4;
}
message CompareAndPutResponse {
ResponseHeader header = 1;
bool success = 2;
KeyValue prev_kv = 3;
}
message DeleteRangeRequest {
RequestHeader header = 1;
// key is the first key to delete in the range.
bytes key = 2;
// range_end is the key following the last key to delete for the range
// [key, range_end).
// If range_end is not given, the range is defined to contain only the key
// argument.
// If range_end is one bit larger than the given key, then the range is all
// the keys with the prefix (the given key).
// If range_end is '\0', the range is all keys greater than or equal to the
// key argument.
bytes range_end = 3;
// If prev_kv is set, gets the previous key-value pairs before deleting it.
// The previous key-value pairs will be returned in the delete response.
bool prev_kv = 4;
}
message DeleteRangeResponse {
ResponseHeader header = 1;
// deleted is the number of keys deleted by the delete range request.
int64 deleted = 2;
// If prev_kv is set in the request, the previous key-value pairs will be
// returned.
repeated KeyValue prev_kvs = 3;
}

View File

@@ -0,0 +1,85 @@
// Copyright 2016 Prometheus Team
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package prometheus;
option go_package = "prompb";
import "prometheus/remote/types.proto";
message WriteRequest {
repeated prometheus.TimeSeries timeseries = 1;
// Cortex uses this field to determine the source of the write request.
// We reserve it to avoid any compatibility issues.
reserved 2;
repeated prometheus.MetricMetadata metadata = 3;
}
// ReadRequest represents a remote read request.
message ReadRequest {
repeated Query queries = 1;
enum ResponseType {
// Server will return a single ReadResponse message with matched series that includes list of raw samples.
// It's recommended to use streamed response types instead.
//
// Response headers:
// Content-Type: "application/x-protobuf"
// Content-Encoding: "snappy"
SAMPLES = 0;
// Server will stream a delimited ChunkedReadResponse message that contains XOR encoded chunks for a single series.
// Each message is following varint size and fixed size bigendian uint32 for CRC32 Castagnoli checksum.
//
// Response headers:
// Content-Type: "application/x-streamed-protobuf; proto=prometheus.ChunkedReadResponse"
// Content-Encoding: ""
STREAMED_XOR_CHUNKS = 1;
}
// accepted_response_types allows negotiating the content type of the response.
//
// Response types are taken from the list in the FIFO order. If no response type in `accepted_response_types` is
// implemented by server, error is returned.
// For request that do not contain `accepted_response_types` field the SAMPLES response type will be used.
repeated ResponseType accepted_response_types = 2;
}
// ReadResponse is a response when response_type equals SAMPLES.
message ReadResponse {
// In same order as the request's queries.
repeated QueryResult results = 1;
}
message Query {
int64 start_timestamp_ms = 1;
int64 end_timestamp_ms = 2;
repeated prometheus.LabelMatcher matchers = 3;
prometheus.ReadHints hints = 4;
}
message QueryResult {
// Samples within a time series must be ordered by time.
repeated prometheus.TimeSeries timeseries = 1;
}
// ChunkedReadResponse is a response when response_type equals STREAMED_XOR_CHUNKS.
// We strictly stream full series after series, optionally split by time. This means that a single frame can contain
// partition of the single series, but once a new series is started to be streamed it means that no more chunks will
// be sent for previous one. Series are returned sorted in the same way TSDB block are internally.
message ChunkedReadResponse {
repeated prometheus.ChunkedSeries chunked_series = 1;
// query_index represents an index of the query from ReadRequest.queries these chunks relates to.
int64 query_index = 2;
}

View File

@@ -0,0 +1,117 @@
// Copyright 2017 Prometheus Team
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package prometheus;
option go_package = "prompb";
message MetricMetadata {
enum MetricType {
UNKNOWN = 0;
COUNTER = 1;
GAUGE = 2;
HISTOGRAM = 3;
GAUGEHISTOGRAM = 4;
SUMMARY = 5;
INFO = 6;
STATESET = 7;
}
// Represents the metric type, these match the set from Prometheus.
// Refer to model/textparse/interface.go for details.
MetricType type = 1;
string metric_family_name = 2;
string help = 4;
string unit = 5;
}
message Sample {
double value = 1;
// timestamp is in ms format, see model/timestamp/timestamp.go for
// conversion from time.Time to Prometheus timestamp.
int64 timestamp = 2;
}
message Exemplar {
// Optional, can be empty.
repeated Label labels = 1;
double value = 2;
// timestamp is in ms format, see model/timestamp/timestamp.go for
// conversion from time.Time to Prometheus timestamp.
int64 timestamp = 3;
}
// TimeSeries represents samples and labels for a single time series.
message TimeSeries {
// For a timeseries to be valid, and for the samples and exemplars
// to be ingested by the remote system properly, the labels field is required.
repeated Label labels = 1;
repeated Sample samples = 2;
repeated Exemplar exemplars = 3;
}
message Label {
string name = 1;
string value = 2;
}
message Labels {
repeated Label labels = 1;
}
// Matcher specifies a rule, which can match or set of labels or not.
message LabelMatcher {
enum Type {
EQ = 0;
NEQ = 1;
RE = 2;
NRE = 3;
}
Type type = 1;
string name = 2;
string value = 3;
}
message ReadHints {
int64 step_ms = 1; // Query step size in milliseconds.
string func = 2; // String representation of surrounding function or aggregation.
int64 start_ms = 3; // Start time in milliseconds.
int64 end_ms = 4; // End time in milliseconds.
repeated string grouping = 5; // List of label names used in aggregation.
bool by = 6; // Indicate whether it is without or by.
int64 range_ms = 7; // Range vector selector range in milliseconds.
}
// Chunk represents a TSDB chunk.
// Time range [min, max] is inclusive.
message Chunk {
int64 min_time_ms = 1;
int64 max_time_ms = 2;
// We require this to match chunkenc.Encoding.
enum Encoding {
UNKNOWN = 0;
XOR = 1;
}
Encoding type = 3;
bytes data = 4;
}
// ChunkedSeries represents single, encoded time series.
message ChunkedSeries {
// Labels should be sorted.
repeated Label labels = 1;
// Chunks will be in start time order and may overlap.
repeated Chunk chunks = 2;
}

View File

@@ -1,8 +1,13 @@
use common_base::BitVec;
use common_time::timestamp::TimeUnit;
use datatypes::prelude::ConcreteDataType;
use datatypes::value::Value;
use datatypes::vectors::VectorRef;
use snafu::prelude::*;
use crate::error::{self, Result};
use crate::v1::column::Values;
use crate::v1::Column;
use crate::v1::ColumnDataType;
#[derive(Debug, PartialEq, Eq)]
@@ -143,8 +148,47 @@ impl Values {
}
}
impl Column {
// The type of vals must be same.
pub fn push_vals(&mut self, origin_count: usize, vector: VectorRef) {
let values = self.values.get_or_insert_with(Values::default);
let mut null_mask = BitVec::from_slice(&self.null_mask);
let len = vector.len();
null_mask.reserve_exact(origin_count + len);
null_mask.extend(BitVec::repeat(false, len));
(0..len).into_iter().for_each(|idx| match vector.get(idx) {
Value::Null => null_mask.set(idx + origin_count, true),
Value::Boolean(val) => values.bool_values.push(val),
Value::UInt8(val) => values.u8_values.push(val.into()),
Value::UInt16(val) => values.u16_values.push(val.into()),
Value::UInt32(val) => values.u32_values.push(val),
Value::UInt64(val) => values.u64_values.push(val),
Value::Int8(val) => values.i8_values.push(val.into()),
Value::Int16(val) => values.i16_values.push(val.into()),
Value::Int32(val) => values.i32_values.push(val),
Value::Int64(val) => values.i64_values.push(val),
Value::Float32(val) => values.f32_values.push(*val),
Value::Float64(val) => values.f64_values.push(*val),
Value::String(val) => values.string_values.push(val.as_utf8().to_string()),
Value::Binary(val) => values.binary_values.push(val.to_vec()),
Value::Date(val) => values.date_values.push(val.val()),
Value::DateTime(val) => values.datetime_values.push(val.val()),
Value::Timestamp(val) => values
.ts_millis_values
.push(val.convert_to(TimeUnit::Millisecond)),
Value::List(_) => unreachable!(),
});
self.null_mask = null_mask.into_vec();
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use datatypes::vectors::BooleanVector;
use super::*;
#[test]
@@ -358,4 +402,29 @@ mod tests {
"Failed to create column datatype from List(ListType { inner: Boolean(BooleanType) })"
);
}
#[test]
fn test_column_put_vector() {
use crate::v1::column::SemanticType;
// Some(false), None, Some(true), Some(true)
let mut column = Column {
column_name: "test".to_string(),
semantic_type: SemanticType::Field as i32,
values: Some(Values {
bool_values: vec![false, true, true],
..Default::default()
}),
null_mask: vec![2],
datatype: ColumnDataType::Boolean as i32,
};
let row_count = 4;
let vector = Arc::new(BooleanVector::from(vec![Some(true), None, Some(false)]));
column.push_vals(row_count, vector);
// Some(false), None, Some(true), Some(true), Some(true), None, Some(false)
let bool_values = column.values.unwrap().bool_values;
assert_eq!(vec![false, true, true, true, false], bool_values);
let null_mask = column.null_mask;
assert_eq!(34, null_mask[0]);
}
}

View File

@@ -1,5 +1,6 @@
pub mod error;
pub mod helper;
pub mod prometheus;
pub mod serde;
pub mod v1;

View File

@@ -0,0 +1,5 @@
#![allow(clippy::derive_partial_eq_without_eq)]
pub mod remote {
tonic::include_proto!("prometheus");
}

View File

@@ -1,7 +1,7 @@
pub use prost::DecodeError;
use prost::Message;
use crate::v1::codec::{InsertBatch, PhysicalPlanNode, SelectResult};
use crate::v1::codec::{InsertBatch, PhysicalPlanNode, RegionId, SelectResult};
macro_rules! impl_convert_with_bytes {
($data_type: ty) => {
@@ -24,6 +24,7 @@ macro_rules! impl_convert_with_bytes {
impl_convert_with_bytes!(InsertBatch);
impl_convert_with_bytes!(SelectResult);
impl_convert_with_bytes!(PhysicalPlanNode);
impl_convert_with_bytes!(RegionId);
#[cfg(test)]
mod tests {
@@ -127,6 +128,16 @@ mod tests {
);
}
#[test]
fn test_convert_region_id() {
let region_id = RegionId { id: 12 };
let bytes: Vec<u8> = region_id.into();
let region_id: RegionId = bytes.deref().try_into().unwrap();
assert_eq!(12, region_id.id);
}
fn mock_insert_batch() -> InsertBatch {
let values = column::Values {
i32_values: vec![2, 3, 4, 5, 6, 7, 8],

View File

@@ -4,3 +4,5 @@ tonic::include_proto!("greptime.v1");
pub mod codec {
tonic::include_proto!("greptime.v1.codec");
}
pub mod meta;

54
src/api/src/v1/meta.rs Normal file
View File

@@ -0,0 +1,54 @@
tonic::include_proto!("greptime.v1.meta");
pub const PROTOCOL_VERSION: u64 = 1;
impl RequestHeader {
#[inline]
pub fn new((cluster_id, member_id): (u64, u64)) -> Self {
Self {
protocol_version: PROTOCOL_VERSION,
cluster_id,
member_id,
}
}
}
impl ResponseHeader {
#[inline]
pub fn success(cluster_id: u64) -> Self {
Self {
protocol_version: PROTOCOL_VERSION,
cluster_id,
..Default::default()
}
}
#[inline]
pub fn failed(cluster_id: u64, error: Error) -> Self {
Self {
protocol_version: PROTOCOL_VERSION,
cluster_id,
error: Some(error),
}
}
}
macro_rules! gen_set_header {
($req: ty) => {
impl $req {
#[inline]
pub fn set_header(&mut self, (cluster_id, member_id): (u64, u64)) {
self.header = Some(RequestHeader::new((cluster_id, member_id)));
}
}
};
}
gen_set_header!(HeartbeatRequest);
gen_set_header!(RouteRequest);
gen_set_header!(CreateRequest);
gen_set_header!(RangeRequest);
gen_set_header!(PutRequest);
gen_set_header!(BatchPutRequest);
gen_set_header!(CompareAndPutRequest);
gen_set_header!(DeleteRangeRequest);

View File

@@ -5,21 +5,42 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
api = { path = "../api" }
arc-swap = "1.0"
async-stream = "0.3"
async-trait = "0.1"
backoff = { version = "0.4", features = ["tokio"] }
common-catalog = { path = "../common/catalog" }
common-error = { path = "../common/error" }
common-grpc = { path = "../common/grpc" }
common-query = { path = "../common/query" }
common-recordbatch = { path = "../common/recordbatch" }
common-runtime = { path = "../common/runtime" }
common-telemetry = { path = "../common/telemetry" }
common-time = { path = "../common/time" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = ["simd"] }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
"simd",
] }
datatypes = { path = "../datatypes" }
futures = "0.3"
futures-util = "0.3"
lazy_static = "1.4"
meta-client = { path = "../meta-client" }
opendal = "0.17"
regex = "1.6"
serde = "1.0"
serde_json = "1.0"
snafu = { version = "0.7", features = ["backtraces"] }
storage = { path = "../storage" }
table = { path = "../table" }
tokio = { version = "1.18", features = ["full"] }
[dev-dependencies]
chrono = "0.4"
log-store = { path = "../log-store" }
object-store = { path = "../object-store" }
opendal = "0.17"
storage = { path = "../storage" }
table-engine = { path = "../table-engine" }
tempdir = "0.3"
tokio = { version = "1.0", features = ["full"] }

View File

@@ -4,6 +4,7 @@ use common_error::ext::{BoxedError, ErrorExt};
use common_error::prelude::{Snafu, StatusCode};
use datafusion::error::DataFusionError;
use datatypes::arrow;
use datatypes::schema::RawSchema;
use snafu::{Backtrace, ErrorCompat};
#[derive(Debug, Snafu)]
@@ -103,6 +104,57 @@ pub enum Error {
#[snafu(display("Illegal catalog manager state: {}", msg))]
IllegalManagerState { backtrace: Backtrace, msg: String },
#[snafu(display("Failed to scan system catalog table, source: {}", source))]
SystemCatalogTableScan {
#[snafu(backtrace)]
source: table::error::Error,
},
#[snafu(display(
"Invalid table schema in catalog entry, table:{}, schema: {:?}, source: {}",
table_info,
schema,
source
))]
InvalidTableSchema {
table_info: String,
schema: RawSchema,
#[snafu(backtrace)]
source: datatypes::error::Error,
},
#[snafu(display("Failed to execute system catalog table scan, source: {}", source))]
SystemCatalogTableScanExec {
#[snafu(backtrace)]
source: common_query::error::Error,
},
#[snafu(display("Cannot parse catalog value, source: {}", source))]
InvalidCatalogValue {
#[snafu(backtrace)]
source: common_catalog::error::Error,
},
#[snafu(display("IO error occurred while fetching catalog info, source: {}", source))]
Io {
backtrace: Backtrace,
source: std::io::Error,
},
#[snafu(display("Local and remote catalog data are inconsistent, msg: {}", msg))]
CatalogStateInconsistent { msg: String, backtrace: Backtrace },
#[snafu(display("Failed to perform metasrv operation, source: {}", source))]
MetaSrv {
#[snafu(backtrace)]
source: meta_client::error::Error,
},
#[snafu(display("Failed to bump table id"))]
BumpTableId { msg: String, backtrace: Backtrace },
#[snafu(display("Failed to parse table id from metasrv, data: {:?}", data))]
ParseTableId { data: String, backtrace: Backtrace },
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -115,14 +167,17 @@ impl ErrorExt for Error {
| Error::TableNotFound { .. }
| Error::IllegalManagerState { .. }
| Error::CatalogNotFound { .. }
| Error::InvalidEntryType { .. } => StatusCode::Unexpected,
| Error::InvalidEntryType { .. }
| Error::CatalogStateInconsistent { .. } => StatusCode::Unexpected,
Error::SystemCatalog { .. } | Error::EmptyValue | Error::ValueDeserialize { .. } => {
StatusCode::StorageUnavailable
}
Error::SystemCatalog { .. }
| Error::EmptyValue
| Error::ValueDeserialize { .. }
| Error::Io { .. } => StatusCode::StorageUnavailable,
Error::ReadSystemCatalog { source, .. } => source.status_code(),
Error::SystemCatalogTypeMismatch { source, .. } => source.status_code(),
Error::InvalidCatalogValue { source, .. } => source.status_code(),
Error::RegisterTable { .. } => StatusCode::Internal,
Error::TableExists { .. } => StatusCode::TableAlreadyExists,
@@ -132,6 +187,13 @@ impl ErrorExt for Error {
| Error::InsertTableRecord { source, .. }
| Error::OpenTable { source, .. }
| Error::CreateTable { source, .. } => source.status_code(),
Error::MetaSrv { source, .. } => source.status_code(),
Error::SystemCatalogTableScan { source } => source.status_code(),
Error::SystemCatalogTableScanExec { source } => source.status_code(),
Error::InvalidTableSchema { source, .. } => source.status_code(),
Error::BumpTableId { .. } | Error::ParseTableId { .. } => {
StatusCode::StorageUnavailable
}
}
}

View File

@@ -3,20 +3,21 @@
use std::any::Any;
use std::sync::Arc;
use common_telemetry::info;
use snafu::ResultExt;
use table::engine::{EngineContext, TableEngineRef};
use table::metadata::TableId;
use table::requests::CreateTableRequest;
use table::TableRef;
pub use crate::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MIN_USER_TABLE_ID};
pub use crate::manager::LocalCatalogManager;
use crate::error::{CreateTableSnafu, Result};
pub use crate::schema::{SchemaProvider, SchemaProviderRef};
pub mod consts;
pub mod error;
mod manager;
pub mod memory;
pub mod local;
pub mod remote;
pub mod schema;
mod system;
pub mod system;
pub mod tables;
/// Represent a list of named catalogs
@@ -31,13 +32,13 @@ pub trait CatalogList: Sync + Send {
&self,
name: String,
catalog: CatalogProviderRef,
) -> Option<CatalogProviderRef>;
) -> Result<Option<CatalogProviderRef>>;
/// Retrieves the list of available catalog names
fn catalog_names(&self) -> Vec<String>;
fn catalog_names(&self) -> Result<Vec<String>>;
/// Retrieves a specific catalog by name, provided it exists.
fn catalog(&self, name: &str) -> Option<CatalogProviderRef>;
fn catalog(&self, name: &str) -> Result<Option<CatalogProviderRef>>;
}
/// Represents a catalog, comprising a number of named schemas.
@@ -47,14 +48,17 @@ pub trait CatalogProvider: Sync + Send {
fn as_any(&self) -> &dyn Any;
/// Retrieves the list of available schema names in this catalog.
fn schema_names(&self) -> Vec<String>;
fn schema_names(&self) -> Result<Vec<String>>;
/// Registers schema to this catalog.
fn register_schema(&self, name: String, schema: SchemaProviderRef)
-> Option<SchemaProviderRef>;
fn register_schema(
&self,
name: String,
schema: SchemaProviderRef,
) -> Result<Option<SchemaProviderRef>>;
/// Retrieves a specific schema from the catalog by name, provided it exists.
fn schema(&self, name: &str) -> Option<SchemaProviderRef>;
fn schema(&self, name: &str) -> Result<Option<SchemaProviderRef>>;
}
pub type CatalogListRef = Arc<dyn CatalogList>;
@@ -63,32 +67,27 @@ pub type CatalogProviderRef = Arc<dyn CatalogProvider>;
#[async_trait::async_trait]
pub trait CatalogManager: CatalogList {
/// Starts a catalog manager.
async fn start(&self) -> error::Result<()>;
async fn start(&self) -> Result<()>;
/// Returns next available table id.
fn next_table_id(&self) -> TableId;
async fn next_table_id(&self) -> Result<TableId>;
/// Registers a table given given catalog/schema to catalog manager,
/// returns table registered.
async fn register_table(&self, request: RegisterTableRequest) -> error::Result<usize>;
async fn register_table(&self, request: RegisterTableRequest) -> Result<usize>;
/// Register a system table, should be called before starting the manager.
async fn register_system_table(&self, request: RegisterSystemTableRequest)
-> error::Result<()>;
/// Returns the table by catalog, schema and table name.
fn table(
&self,
catalog: Option<&str>,
schema: Option<&str>,
table_name: &str,
) -> error::Result<Option<TableRef>>;
fn table(&self, catalog: &str, schema: &str, table_name: &str) -> Result<Option<TableRef>>;
}
pub type CatalogManagerRef = Arc<dyn CatalogManager>;
/// Hook called after system table opening.
pub type OpenSystemTableHook = Arc<dyn Fn(TableRef) -> error::Result<()> + Send + Sync>;
pub type OpenSystemTableHook = Arc<dyn Fn(TableRef) -> Result<()> + Send + Sync>;
/// Register system table request:
/// - When system table is already created and registered, the hook will be called
@@ -99,9 +98,10 @@ pub struct RegisterSystemTableRequest {
pub open_hook: Option<OpenSystemTableHook>,
}
#[derive(Clone)]
pub struct RegisterTableRequest {
pub catalog: Option<String>,
pub schema: Option<String>,
pub catalog: String,
pub schema: String,
pub table_name: String,
pub table_id: TableId,
pub table: TableRef,
@@ -111,3 +111,53 @@ pub struct RegisterTableRequest {
pub fn format_full_table_name(catalog: &str, schema: &str, table: &str) -> String {
format!("{}.{}.{}", catalog, schema, table)
}
pub trait CatalogProviderFactory {
fn create(&self, catalog_name: String) -> CatalogProviderRef;
}
pub trait SchemaProviderFactory {
fn create(&self, catalog_name: String, schema_name: String) -> SchemaProviderRef;
}
pub(crate) async fn handle_system_table_request<'a, M: CatalogManager>(
manager: &'a M,
engine: TableEngineRef,
sys_table_requests: &'a mut Vec<RegisterSystemTableRequest>,
) -> Result<()> {
for req in sys_table_requests.drain(..) {
let catalog_name = &req.create_table_request.catalog_name;
let schema_name = &req.create_table_request.schema_name;
let table_name = &req.create_table_request.table_name;
let table_id = req.create_table_request.id;
let table = if let Some(table) = manager.table(catalog_name, schema_name, table_name)? {
table
} else {
let table = engine
.create_table(&EngineContext::default(), req.create_table_request.clone())
.await
.with_context(|_| CreateTableSnafu {
table_info: format!(
"{}.{}.{}, id: {}",
catalog_name, schema_name, table_name, table_id,
),
})?;
manager
.register_table(RegisterTableRequest {
catalog: catalog_name.clone(),
schema: schema_name.clone(),
table_name: table_name.clone(),
table_id,
table: table.clone(),
})
.await?;
info!("Created and registered system table: {}", table_name);
table
};
if let Some(hook) = req.open_hook {
(hook)(table)?;
}
}
Ok(())
}

7
src/catalog/src/local.rs Normal file
View File

@@ -0,0 +1,7 @@
pub mod manager;
pub mod memory;
pub use manager::LocalCatalogManager;
pub use memory::{
new_memory_catalog_list, MemoryCatalogList, MemoryCatalogProvider, MemorySchemaProvider,
};

View File

@@ -2,8 +2,12 @@ use std::any::Any;
use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::Arc;
use common_catalog::consts::{
DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, MIN_USER_TABLE_ID,
SYSTEM_CATALOG_NAME, SYSTEM_CATALOG_TABLE_NAME,
};
use common_recordbatch::RecordBatch;
use common_telemetry::{debug, info};
use common_telemetry::info;
use datatypes::prelude::ScalarVector;
use datatypes::vectors::{BinaryVector, UInt8Vector};
use futures_util::lock::Mutex;
@@ -15,25 +19,22 @@ use table::requests::OpenTableRequest;
use table::table::numbers::NumbersTable;
use table::TableRef;
use super::error::Result;
use crate::consts::{
INFORMATION_SCHEMA_NAME, MIN_USER_TABLE_ID, SYSTEM_CATALOG_NAME, SYSTEM_CATALOG_TABLE_NAME,
};
use crate::error::Result;
use crate::error::{
CatalogNotFoundSnafu, CreateTableSnafu, IllegalManagerStateSnafu, OpenTableSnafu,
ReadSystemCatalogSnafu, SchemaNotFoundSnafu, SystemCatalogSnafu,
SystemCatalogTypeMismatchSnafu, TableExistsSnafu, TableNotFoundSnafu,
CatalogNotFoundSnafu, IllegalManagerStateSnafu, OpenTableSnafu, ReadSystemCatalogSnafu,
SchemaNotFoundSnafu, SystemCatalogSnafu, SystemCatalogTypeMismatchSnafu, TableExistsSnafu,
TableNotFoundSnafu,
};
use crate::memory::{MemoryCatalogList, MemoryCatalogProvider, MemorySchemaProvider};
use crate::local::memory::{MemoryCatalogList, MemoryCatalogProvider, MemorySchemaProvider};
use crate::system::{
decode_system_catalog, Entry, SystemCatalogTable, TableEntry, ENTRY_TYPE_INDEX, KEY_INDEX,
VALUE_INDEX,
};
use crate::tables::SystemCatalog;
use crate::{
format_full_table_name, CatalogList, CatalogManager, CatalogProvider, CatalogProviderRef,
RegisterSystemTableRequest, RegisterTableRequest, SchemaProvider, DEFAULT_CATALOG_NAME,
DEFAULT_SCHEMA_NAME,
format_full_table_name, handle_system_table_request, CatalogList, CatalogManager,
CatalogProvider, CatalogProviderRef, RegisterSystemTableRequest, RegisterTableRequest,
SchemaProvider,
};
/// A `CatalogManager` consists of a system catalog and a bunch of user catalogs.
@@ -50,7 +51,7 @@ impl LocalCatalogManager {
/// Create a new [CatalogManager] with given user catalogs and table engine
pub async fn try_new(engine: TableEngineRef) -> Result<Self> {
let table = SystemCatalogTable::new(engine.clone()).await?;
let memory_catalog_list = crate::memory::new_memory_catalog_list()?;
let memory_catalog_list = crate::local::memory::new_memory_catalog_list()?;
let system_catalog = Arc::new(SystemCatalog::new(
table,
memory_catalog_list.clone(),
@@ -90,49 +91,7 @@ impl LocalCatalogManager {
// Processing system table hooks
let mut sys_table_requests = self.system_table_requests.lock().await;
for req in sys_table_requests.drain(..) {
let catalog_name = &req.create_table_request.catalog_name;
let schema_name = &req.create_table_request.schema_name;
let table_name = &req.create_table_request.table_name;
let table_id = req.create_table_request.id;
let table = if let Some(table) =
self.table(catalog_name.as_deref(), schema_name.as_deref(), table_name)?
{
table
} else {
let table = self
.engine
.create_table(&EngineContext::default(), req.create_table_request.clone())
.await
.with_context(|_| CreateTableSnafu {
table_info: format!(
"{}.{}.{}, id: {}",
catalog_name.as_deref().unwrap_or(DEFAULT_CATALOG_NAME),
schema_name.as_deref().unwrap_or(DEFAULT_SCHEMA_NAME),
table_name,
table_id,
),
})?;
self.register_table(RegisterTableRequest {
catalog: catalog_name.clone(),
schema: schema_name.clone(),
table_name: table_name.clone(),
table_id,
table: table.clone(),
})
.await?;
info!("Created and registered system table: {}", table_name);
table
};
if let Some(hook) = req.open_hook {
(hook)(table)?;
}
}
handle_system_table_request(self, self.engine.clone(), &mut sys_table_requests).await?;
Ok(())
}
@@ -143,9 +102,9 @@ impl LocalCatalogManager {
self.system.information_schema.system.clone(),
)?;
let system_catalog = Arc::new(MemoryCatalogProvider::new());
system_catalog.register_schema(INFORMATION_SCHEMA_NAME.to_string(), system_schema);
system_catalog.register_schema(INFORMATION_SCHEMA_NAME.to_string(), system_schema)?;
self.catalogs
.register_catalog(SYSTEM_CATALOG_NAME.to_string(), system_catalog);
.register_catalog(SYSTEM_CATALOG_NAME.to_string(), system_catalog)?;
let default_catalog = Arc::new(MemoryCatalogProvider::new());
let default_schema = Arc::new(MemorySchemaProvider::new());
@@ -155,9 +114,9 @@ impl LocalCatalogManager {
let table = Arc::new(NumbersTable::default());
default_schema.register_table("numbers".to_string(), table)?;
default_catalog.register_schema(DEFAULT_SCHEMA_NAME.to_string(), default_schema);
default_catalog.register_schema(DEFAULT_SCHEMA_NAME.to_string(), default_schema)?;
self.catalogs
.register_catalog(DEFAULT_CATALOG_NAME.to_string(), default_catalog);
.register_catalog(DEFAULT_CATALOG_NAME.to_string(), default_catalog)?;
Ok(())
}
@@ -213,18 +172,17 @@ impl LocalCatalogManager {
Entry::Schema(s) => {
let catalog =
self.catalogs
.catalog(&s.catalog_name)
.catalog(&s.catalog_name)?
.context(CatalogNotFoundSnafu {
catalog_name: &s.catalog_name,
})?;
catalog.register_schema(
s.schema_name.clone(),
Arc::new(MemorySchemaProvider::new()),
);
)?;
info!("Registered schema: {:?}", s);
}
Entry::Table(t) => {
debug!("t: {:?}", t);
self.open_and_register_table(&t).await?;
info!("Registered table: {:?}", t);
max_table_id = max_table_id.max(t.table_id);
@@ -237,12 +195,12 @@ impl LocalCatalogManager {
async fn open_and_register_table(&self, t: &TableEntry) -> Result<()> {
let catalog = self
.catalogs
.catalog(&t.catalog_name)
.catalog(&t.catalog_name)?
.context(CatalogNotFoundSnafu {
catalog_name: &t.catalog_name,
})?;
let schema = catalog
.schema(&t.schema_name)
.schema(&t.schema_name)?
.context(SchemaNotFoundSnafu {
schema_info: format!("{}.{}", &t.catalog_name, &t.schema_name),
})?;
@@ -286,19 +244,19 @@ impl CatalogList for LocalCatalogManager {
&self,
name: String,
catalog: CatalogProviderRef,
) -> Option<Arc<dyn CatalogProvider>> {
) -> Result<Option<CatalogProviderRef>> {
self.catalogs.register_catalog(name, catalog)
}
fn catalog_names(&self) -> Vec<String> {
let mut res = self.catalogs.catalog_names();
fn catalog_names(&self) -> Result<Vec<String>> {
let mut res = self.catalogs.catalog_names()?;
res.push(SYSTEM_CATALOG_NAME.to_string());
res
Ok(res)
}
fn catalog(&self, name: &str) -> Option<Arc<dyn CatalogProvider>> {
fn catalog(&self, name: &str) -> Result<Option<CatalogProviderRef>> {
if name.eq_ignore_ascii_case(SYSTEM_CATALOG_NAME) {
Some(self.system.clone())
Ok(Some(self.system.clone()))
} else {
self.catalogs.catalog(name)
}
@@ -307,15 +265,15 @@ impl CatalogList for LocalCatalogManager {
#[async_trait::async_trait]
impl CatalogManager for LocalCatalogManager {
/// Start [MemoryCatalogManager] to load all information from system catalog table.
/// Start [LocalCatalogManager] to load all information from system catalog table.
/// Make sure table engine is initialized before starting [MemoryCatalogManager].
async fn start(&self) -> Result<()> {
self.init().await
}
#[inline]
fn next_table_id(&self) -> TableId {
self.next_table_id.fetch_add(1, Ordering::Relaxed)
async fn next_table_id(&self) -> Result<TableId> {
Ok(self.next_table_id.fetch_add(1, Ordering::Relaxed))
}
async fn register_table(&self, request: RegisterTableRequest) -> Result<usize> {
@@ -328,36 +286,30 @@ impl CatalogManager for LocalCatalogManager {
}
);
let catalog_name = request
.catalog
.unwrap_or_else(|| DEFAULT_CATALOG_NAME.to_string());
let schema_name = request
.schema
.unwrap_or_else(|| DEFAULT_SCHEMA_NAME.to_string());
let catalog_name = &request.catalog;
let schema_name = &request.schema;
let catalog = self
.catalogs
.catalog(&catalog_name)
.context(CatalogNotFoundSnafu {
catalog_name: &catalog_name,
})?;
.catalog(catalog_name)?
.context(CatalogNotFoundSnafu { catalog_name })?;
let schema = catalog
.schema(&schema_name)
.schema(schema_name)?
.with_context(|| SchemaNotFoundSnafu {
schema_info: format!("{}.{}", catalog_name, schema_name),
})?;
if schema.table_exist(&request.table_name) {
if schema.table_exist(&request.table_name)? {
return TableExistsSnafu {
table: format_full_table_name(&catalog_name, &schema_name, &request.table_name),
table: format_full_table_name(catalog_name, schema_name, &request.table_name),
}
.fail();
}
self.system
.register_table(
catalog_name,
schema_name,
catalog_name.clone(),
schema_name.clone(),
request.table_name.clone(),
request.table_id,
)
@@ -383,22 +335,19 @@ impl CatalogManager for LocalCatalogManager {
fn table(
&self,
catalog: Option<&str>,
schema: Option<&str>,
catalog_name: &str,
schema_name: &str,
table_name: &str,
) -> Result<Option<TableRef>> {
let catalog_name = catalog.unwrap_or(DEFAULT_CATALOG_NAME);
let schema_name = schema.unwrap_or(DEFAULT_SCHEMA_NAME);
let catalog = self
.catalogs
.catalog(catalog_name)
.catalog(catalog_name)?
.context(CatalogNotFoundSnafu { catalog_name })?;
let schema = catalog
.schema(schema_name)
.schema(schema_name)?
.with_context(|| SchemaNotFoundSnafu {
schema_info: format!("{}.{}", catalog_name, schema_name),
})?;
Ok(schema.table(table_name))
schema.table(table_name)
}
}

View File

@@ -23,7 +23,7 @@ impl MemoryCatalogList {
pub fn register_catalog_if_absent(
&self,
name: String,
catalog: Arc<dyn CatalogProvider>,
catalog: CatalogProviderRef,
) -> Option<CatalogProviderRef> {
let mut catalogs = self.catalogs.write().unwrap();
let entry = catalogs.entry(name);
@@ -46,19 +46,19 @@ impl CatalogList for MemoryCatalogList {
&self,
name: String,
catalog: CatalogProviderRef,
) -> Option<CatalogProviderRef> {
) -> Result<Option<CatalogProviderRef>> {
let mut catalogs = self.catalogs.write().unwrap();
catalogs.insert(name, catalog)
Ok(catalogs.insert(name, catalog))
}
fn catalog_names(&self) -> Vec<String> {
fn catalog_names(&self) -> Result<Vec<String>> {
let catalogs = self.catalogs.read().unwrap();
catalogs.keys().map(|s| s.to_string()).collect()
Ok(catalogs.keys().map(|s| s.to_string()).collect())
}
fn catalog(&self, name: &str) -> Option<CatalogProviderRef> {
fn catalog(&self, name: &str) -> Result<Option<CatalogProviderRef>> {
let catalogs = self.catalogs.read().unwrap();
catalogs.get(name).cloned()
Ok(catalogs.get(name).cloned())
}
}
@@ -87,23 +87,23 @@ impl CatalogProvider for MemoryCatalogProvider {
self
}
fn schema_names(&self) -> Vec<String> {
fn schema_names(&self) -> Result<Vec<String>> {
let schemas = self.schemas.read().unwrap();
schemas.keys().cloned().collect()
Ok(schemas.keys().cloned().collect())
}
fn register_schema(
&self,
name: String,
schema: SchemaProviderRef,
) -> Option<SchemaProviderRef> {
) -> Result<Option<SchemaProviderRef>> {
let mut schemas = self.schemas.write().unwrap();
schemas.insert(name, schema)
Ok(schemas.insert(name, schema))
}
fn schema(&self, name: &str) -> Option<Arc<dyn SchemaProvider>> {
fn schema(&self, name: &str) -> Result<Option<Arc<dyn SchemaProvider>>> {
let schemas = self.schemas.read().unwrap();
schemas.get(name).cloned()
Ok(schemas.get(name).cloned())
}
}
@@ -132,18 +132,18 @@ impl SchemaProvider for MemorySchemaProvider {
self
}
fn table_names(&self) -> Vec<String> {
fn table_names(&self) -> Result<Vec<String>> {
let tables = self.tables.read().unwrap();
tables.keys().cloned().collect()
Ok(tables.keys().cloned().collect())
}
fn table(&self, name: &str) -> Option<TableRef> {
fn table(&self, name: &str) -> Result<Option<TableRef>> {
let tables = self.tables.read().unwrap();
tables.get(name).cloned()
Ok(tables.get(name).cloned())
}
fn register_table(&self, name: String, table: TableRef) -> Result<Option<TableRef>> {
if self.table_exist(name.as_str()) {
if self.table_exist(name.as_str())? {
return TableExistsSnafu { table: name }.fail()?;
}
let mut tables = self.tables.write().unwrap();
@@ -155,9 +155,9 @@ impl SchemaProvider for MemorySchemaProvider {
Ok(tables.remove(name))
}
fn table_exist(&self, name: &str) -> bool {
fn table_exist(&self, name: &str) -> Result<bool> {
let tables = self.tables.read().unwrap();
tables.contains_key(name)
Ok(tables.contains_key(name))
}
}
@@ -168,40 +168,50 @@ pub fn new_memory_catalog_list() -> Result<Arc<MemoryCatalogList>> {
#[cfg(test)]
mod tests {
use common_catalog::consts::*;
use common_error::ext::ErrorExt;
use common_error::prelude::StatusCode;
use table::table::numbers::NumbersTable;
use super::*;
use crate::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
#[test]
fn test_new_memory_catalog_list() {
let catalog_list = new_memory_catalog_list().unwrap();
assert!(catalog_list.catalog(DEFAULT_CATALOG_NAME).is_none());
assert!(catalog_list
.catalog(DEFAULT_CATALOG_NAME)
.unwrap()
.is_none());
let default_catalog = Arc::new(MemoryCatalogProvider::default());
catalog_list.register_catalog(DEFAULT_CATALOG_NAME.to_string(), default_catalog.clone());
catalog_list
.register_catalog(DEFAULT_CATALOG_NAME.to_string(), default_catalog.clone())
.unwrap();
assert!(default_catalog.schema(DEFAULT_SCHEMA_NAME).is_none());
assert!(default_catalog
.schema(DEFAULT_SCHEMA_NAME)
.unwrap()
.is_none());
let default_schema = Arc::new(MemorySchemaProvider::default());
default_catalog.register_schema(DEFAULT_SCHEMA_NAME.to_string(), default_schema.clone());
default_catalog
.register_schema(DEFAULT_SCHEMA_NAME.to_string(), default_schema.clone())
.unwrap();
default_schema
.register_table("numbers".to_string(), Arc::new(NumbersTable::default()))
.unwrap();
let table = default_schema.table("numbers");
let table = default_schema.table("numbers").unwrap();
assert!(table.is_some());
assert!(default_schema.table("not_exists").is_none());
assert!(default_schema.table("not_exists").unwrap().is_none());
}
#[tokio::test]
async fn test_mem_provider() {
let provider = MemorySchemaProvider::new();
let table_name = "numbers";
assert!(!provider.table_exist(table_name));
assert!(!provider.table_exist(table_name).unwrap());
assert!(provider.deregister_table(table_name).unwrap().is_none());
let test_table = NumbersTable::default();
// register table successfully
@@ -209,7 +219,7 @@ mod tests {
.register_table(table_name.to_string(), Arc::new(test_table))
.unwrap()
.is_none());
assert!(provider.table_exist(table_name));
assert!(provider.table_exist(table_name).unwrap());
let other_table = NumbersTable::default();
let result = provider.register_table(table_name.to_string(), Arc::new(other_table));
let err = result.err().unwrap();

117
src/catalog/src/remote.rs Normal file
View File

@@ -0,0 +1,117 @@
use std::fmt::Debug;
use std::pin::Pin;
use std::sync::Arc;
pub use client::MetaKvBackend;
use futures::Stream;
use futures_util::StreamExt;
pub use manager::{RemoteCatalogManager, RemoteCatalogProvider, RemoteSchemaProvider};
use crate::error::Error;
mod client;
mod manager;
#[derive(Debug, Clone)]
pub struct Kv(pub Vec<u8>, pub Vec<u8>);
pub type ValueIter<'a, E> = Pin<Box<dyn Stream<Item = Result<Kv, E>> + Send + 'a>>;
#[async_trait::async_trait]
pub trait KvBackend: Send + Sync {
fn range<'a, 'b>(&'a self, key: &[u8]) -> ValueIter<'b, Error>
where
'a: 'b;
async fn set(&self, key: &[u8], val: &[u8]) -> Result<(), Error>;
/// Compare and set value of key. `expect` is the expected value, if backend's current value associated
/// with key is the same as `expect`, the value will be updated to `val`.
///
/// - If the compare-and-set operation successfully updated value, this method will return an `Ok(Ok())`
/// - If associated value is not the same as `expect`, no value will be updated and an `Ok(Err(Vec<u8>))`
/// will be returned, the `Err(Vec<u8>)` indicates the current associated value of key.
/// - If any error happens during operation, an `Err(Error)` will be returned.
async fn compare_and_set(
&self,
key: &[u8],
expect: &[u8],
val: &[u8],
) -> Result<Result<(), Option<Vec<u8>>>, Error>;
async fn delete_range(&self, key: &[u8], end: &[u8]) -> Result<(), Error>;
async fn delete(&self, key: &[u8]) -> Result<(), Error> {
self.delete_range(key, &[]).await
}
/// Default get is implemented based on `range` method.
async fn get(&self, key: &[u8]) -> Result<Option<Kv>, Error> {
let mut iter = self.range(key);
while let Some(r) = iter.next().await {
let kv = r?;
if kv.0 == key {
return Ok(Some(kv));
}
}
return Ok(None);
}
}
pub type KvBackendRef = Arc<dyn KvBackend>;
#[cfg(test)]
mod tests {
use async_stream::stream;
use super::*;
struct MockKvBackend {}
#[async_trait::async_trait]
impl KvBackend for MockKvBackend {
fn range<'a, 'b>(&'a self, _key: &[u8]) -> ValueIter<'b, Error>
where
'a: 'b,
{
Box::pin(stream!({
for i in 0..3 {
yield Ok(Kv(
i.to_string().as_bytes().to_vec(),
i.to_string().as_bytes().to_vec(),
))
}
}))
}
async fn set(&self, _key: &[u8], _val: &[u8]) -> Result<(), Error> {
unimplemented!()
}
async fn compare_and_set(
&self,
_key: &[u8],
_expect: &[u8],
_val: &[u8],
) -> Result<Result<(), Option<Vec<u8>>>, Error> {
unimplemented!()
}
async fn delete_range(&self, _key: &[u8], _end: &[u8]) -> Result<(), Error> {
unimplemented!()
}
}
#[tokio::test]
async fn test_get() {
let backend = MockKvBackend {};
let result = backend.get(0.to_string().as_bytes()).await;
assert_eq!(0.to_string().as_bytes(), result.unwrap().unwrap().0);
let result = backend.get(1.to_string().as_bytes()).await;
assert_eq!(1.to_string().as_bytes(), result.unwrap().unwrap().0);
let result = backend.get(2.to_string().as_bytes()).await;
assert_eq!(2.to_string().as_bytes(), result.unwrap().unwrap().0);
let result = backend.get(3.to_string().as_bytes()).await;
assert!(result.unwrap().is_none());
}
}

View File

@@ -0,0 +1,93 @@
use std::fmt::Debug;
use async_stream::stream;
use common_telemetry::info;
use meta_client::client::MetaClient;
use meta_client::rpc::{CompareAndPutRequest, DeleteRangeRequest, PutRequest, RangeRequest};
use snafu::ResultExt;
use crate::error::{Error, MetaSrvSnafu};
use crate::remote::{Kv, KvBackend, ValueIter};
#[derive(Debug)]
pub struct MetaKvBackend {
pub client: MetaClient,
}
/// Implement `KvBackend` trait for `MetaKvBackend` instead of opendal's `Accessor` since
/// `MetaClient`'s range method can return both keys and values, which can reduce IO overhead
/// comparing to `Accessor`'s list and get method.
#[async_trait::async_trait]
impl KvBackend for MetaKvBackend {
fn range<'a, 'b>(&'a self, key: &[u8]) -> ValueIter<'b, Error>
where
'a: 'b,
{
let key = key.to_vec();
Box::pin(stream!({
let mut resp = self
.client
.range(RangeRequest::new().with_prefix(key))
.await
.context(MetaSrvSnafu)?;
let kvs = resp.take_kvs();
for mut kv in kvs.into_iter() {
yield Ok(Kv(kv.take_key(), kv.take_value()))
}
}))
}
async fn get(&self, key: &[u8]) -> Result<Option<Kv>, Error> {
let mut response = self
.client
.range(RangeRequest::new().with_key(key))
.await
.context(MetaSrvSnafu)?;
Ok(response
.take_kvs()
.get_mut(0)
.map(|kv| Kv(kv.take_key(), kv.take_value())))
}
async fn set(&self, key: &[u8], val: &[u8]) -> Result<(), Error> {
let req = PutRequest::new()
.with_key(key.to_vec())
.with_value(val.to_vec());
let _ = self.client.put(req).await.context(MetaSrvSnafu)?;
Ok(())
}
async fn delete_range(&self, key: &[u8], end: &[u8]) -> Result<(), Error> {
let req = DeleteRangeRequest::new().with_range(key.to_vec(), end.to_vec());
let resp = self.client.delete_range(req).await.context(MetaSrvSnafu)?;
info!(
"Delete range, key: {}, end: {}, deleted: {}",
String::from_utf8_lossy(key),
String::from_utf8_lossy(end),
resp.deleted()
);
Ok(())
}
async fn compare_and_set(
&self,
key: &[u8],
expect: &[u8],
val: &[u8],
) -> Result<Result<(), Option<Vec<u8>>>, Error> {
let request = CompareAndPutRequest::new()
.with_key(key.to_vec())
.with_expect(expect.to_vec())
.with_value(val.to_vec());
let mut response = self
.client
.compare_and_put(request)
.await
.context(MetaSrvSnafu)?;
if response.is_success() {
Ok(Ok(()))
} else {
Ok(Err(response.take_prev_kv().map(|v| v.value().to_vec())))
}
}
}

View File

@@ -0,0 +1,749 @@
use std::any::Any;
use std::collections::HashMap;
use std::pin::Pin;
use std::sync::Arc;
use std::time::Duration;
use arc_swap::ArcSwap;
use async_stream::stream;
use backoff::exponential::ExponentialBackoffBuilder;
use backoff::ExponentialBackoff;
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MIN_USER_TABLE_ID};
use common_catalog::{
build_catalog_prefix, build_schema_prefix, build_table_global_prefix, CatalogKey, CatalogValue,
SchemaKey, SchemaValue, TableGlobalKey, TableGlobalValue, TableRegionalKey, TableRegionalValue,
};
use common_telemetry::{debug, error, info};
use futures::Stream;
use futures_util::StreamExt;
use snafu::{OptionExt, ResultExt};
use table::engine::{EngineContext, TableEngineRef};
use table::metadata::TableId;
use table::requests::{CreateTableRequest, OpenTableRequest};
use table::table::numbers::NumbersTable;
use table::TableRef;
use tokio::sync::Mutex;
use crate::error::{
BumpTableIdSnafu, CatalogNotFoundSnafu, CreateTableSnafu, InvalidCatalogValueSnafu,
OpenTableSnafu, ParseTableIdSnafu, SchemaNotFoundSnafu, TableExistsSnafu,
};
use crate::error::{InvalidTableSchemaSnafu, Result};
use crate::remote::{Kv, KvBackendRef};
use crate::{
handle_system_table_request, CatalogList, CatalogManager, CatalogProvider, CatalogProviderRef,
RegisterSystemTableRequest, RegisterTableRequest, SchemaProvider, SchemaProviderRef,
};
/// Catalog manager based on metasrv.
pub struct RemoteCatalogManager {
node_id: u64,
backend: KvBackendRef,
catalogs: Arc<ArcSwap<HashMap<String, CatalogProviderRef>>>,
engine: TableEngineRef,
system_table_requests: Mutex<Vec<RegisterSystemTableRequest>>,
mutex: Arc<Mutex<()>>,
}
impl RemoteCatalogManager {
pub fn new(engine: TableEngineRef, node_id: u64, backend: KvBackendRef) -> Self {
Self {
engine,
node_id,
backend,
catalogs: Default::default(),
system_table_requests: Default::default(),
mutex: Default::default(),
}
}
fn build_catalog_key(&self, catalog_name: impl AsRef<str>) -> CatalogKey {
CatalogKey {
catalog_name: catalog_name.as_ref().to_string(),
}
}
fn new_catalog_provider(&self, catalog_name: &str) -> CatalogProviderRef {
Arc::new(RemoteCatalogProvider {
catalog_name: catalog_name.to_string(),
backend: self.backend.clone(),
schemas: Default::default(),
mutex: Default::default(),
}) as _
}
fn new_schema_provider(&self, catalog_name: &str, schema_name: &str) -> SchemaProviderRef {
Arc::new(RemoteSchemaProvider {
catalog_name: catalog_name.to_string(),
schema_name: schema_name.to_string(),
tables: Default::default(),
node_id: self.node_id,
backend: self.backend.clone(),
mutex: Default::default(),
}) as _
}
async fn iter_remote_catalogs(
&self,
) -> Pin<Box<dyn Stream<Item = Result<CatalogKey>> + Send + '_>> {
let catalog_range_prefix = build_catalog_prefix();
info!("catalog_range_prefix: {}", catalog_range_prefix);
let mut catalogs = self.backend.range(catalog_range_prefix.as_bytes());
Box::pin(stream!({
while let Some(r) = catalogs.next().await {
let Kv(k, _) = r?;
if !k.starts_with(catalog_range_prefix.as_bytes()) {
debug!("Ignoring non-catalog key: {}", String::from_utf8_lossy(&k));
continue;
}
let key = CatalogKey::parse(&String::from_utf8_lossy(&k))
.context(InvalidCatalogValueSnafu)?;
yield Ok(key)
}
}))
}
async fn iter_remote_schemas(
&self,
catalog_name: &str,
) -> Pin<Box<dyn Stream<Item = Result<SchemaKey>> + Send + '_>> {
let schema_prefix = build_schema_prefix(catalog_name);
let mut schemas = self.backend.range(schema_prefix.as_bytes());
Box::pin(stream!({
while let Some(r) = schemas.next().await {
let Kv(k, _) = r?;
if !k.starts_with(schema_prefix.as_bytes()) {
debug!("Ignoring non-schema key: {}", String::from_utf8_lossy(&k));
continue;
}
let schema_key = SchemaKey::parse(&String::from_utf8_lossy(&k))
.context(InvalidCatalogValueSnafu)?;
yield Ok(schema_key)
}
}))
}
/// Iterate over all table entries on metasrv
/// TODO(hl): table entries with different version is not currently considered.
/// Ideally deprecated table entry must be deleted when deregistering from catalog.
async fn iter_remote_tables(
&self,
catalog_name: &str,
schema_name: &str,
) -> Pin<Box<dyn Stream<Item = Result<(TableGlobalKey, TableGlobalValue)>> + Send + '_>> {
let table_prefix = build_table_global_prefix(catalog_name, schema_name);
let mut tables = self.backend.range(table_prefix.as_bytes());
Box::pin(stream!({
while let Some(r) = tables.next().await {
let Kv(k, v) = r?;
if !k.starts_with(table_prefix.as_bytes()) {
debug!("Ignoring non-table prefix: {}", String::from_utf8_lossy(&k));
continue;
}
let table_key = TableGlobalKey::parse(&String::from_utf8_lossy(&k))
.context(InvalidCatalogValueSnafu)?;
let table_value = TableGlobalValue::parse(&String::from_utf8_lossy(&v))
.context(InvalidCatalogValueSnafu)?;
debug!(
"Found catalog table entry, key: {}, value: {:?}",
table_key, table_value
);
// metasrv has allocated region ids to current datanode
if table_value
.regions_id_map
.get(&self.node_id)
.map(|v| !v.is_empty())
.unwrap_or(false)
{
yield Ok((table_key, table_value))
}
}
}))
}
/// Fetch catalogs/schemas/tables from remote catalog manager along with max table id allocated.
async fn initiate_catalogs(&self) -> Result<(HashMap<String, CatalogProviderRef>, TableId)> {
let mut res = HashMap::new();
let max_table_id = MIN_USER_TABLE_ID - 1;
// initiate default catalog and schema
let default_catalog = self.initiate_default_catalog().await?;
res.insert(DEFAULT_CATALOG_NAME.to_string(), default_catalog);
info!("Default catalog and schema registered");
let mut catalogs = self.iter_remote_catalogs().await;
while let Some(r) = catalogs.next().await {
let CatalogKey { catalog_name, .. } = r?;
info!("Fetch catalog from metasrv: {}", catalog_name);
let catalog = res
.entry(catalog_name.clone())
.or_insert_with(|| self.new_catalog_provider(&catalog_name))
.clone();
self.initiate_schemas(catalog_name, catalog, max_table_id)
.await?;
}
Ok((res, max_table_id))
}
async fn initiate_schemas(
&self,
catalog_name: String,
catalog: CatalogProviderRef,
max_table_id: TableId,
) -> Result<()> {
let mut schemas = self.iter_remote_schemas(&catalog_name).await;
while let Some(r) = schemas.next().await {
let SchemaKey {
catalog_name,
schema_name,
..
} = r?;
info!("Found schema: {}.{}", catalog_name, schema_name);
let schema = match catalog.schema(&schema_name)? {
None => {
let schema = self.new_schema_provider(&catalog_name, &schema_name);
catalog.register_schema(schema_name.clone(), schema.clone())?;
info!("Registered schema: {}", &schema_name);
schema
}
Some(schema) => schema,
};
info!(
"Fetch schema from metasrv: {}.{}",
&catalog_name, &schema_name
);
self.initiate_tables(&catalog_name, &schema_name, schema, max_table_id)
.await?;
}
Ok(())
}
/// Initiates all tables inside a catalog by fetching data from metasrv.
async fn initiate_tables<'a>(
&'a self,
catalog_name: &'a str,
schema_name: &'a str,
schema: SchemaProviderRef,
mut max_table_id: TableId,
) -> Result<()> {
let mut tables = self.iter_remote_tables(catalog_name, schema_name).await;
while let Some(r) = tables.next().await {
let (table_key, table_value) = r?;
let table_ref = self.open_or_create_table(&table_key, &table_value).await?;
schema.register_table(table_key.table_name.to_string(), table_ref)?;
info!("Registered table {}", &table_key.table_name);
if table_value.id > max_table_id {
info!("Max table id: {} -> {}", max_table_id, table_value.id);
max_table_id = table_value.id;
}
}
Ok(())
}
async fn initiate_default_catalog(&self) -> Result<CatalogProviderRef> {
let default_catalog = self.new_catalog_provider(DEFAULT_CATALOG_NAME);
let default_schema = self.new_schema_provider(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME);
default_catalog.register_schema(DEFAULT_SCHEMA_NAME.to_string(), default_schema.clone())?;
let schema_key = SchemaKey {
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
}
.to_string();
self.backend
.set(
schema_key.as_bytes(),
&SchemaValue {}
.as_bytes()
.context(InvalidCatalogValueSnafu)?,
)
.await?;
info!("Registered default schema");
let catalog_key = CatalogKey {
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
}
.to_string();
self.backend
.set(
catalog_key.as_bytes(),
&CatalogValue {}
.as_bytes()
.context(InvalidCatalogValueSnafu)?,
)
.await?;
info!("Registered default catalog");
Ok(default_catalog)
}
async fn open_or_create_table(
&self,
table_key: &TableGlobalKey,
table_value: &TableGlobalValue,
) -> Result<TableRef> {
let context = EngineContext {};
let TableGlobalKey {
catalog_name,
schema_name,
table_name,
..
} = table_key;
let TableGlobalValue {
id,
meta,
regions_id_map,
..
} = table_value;
let request = OpenTableRequest {
catalog_name: catalog_name.clone(),
schema_name: schema_name.clone(),
table_name: table_name.clone(),
table_id: *id,
};
match self
.engine
.open_table(&context, request)
.await
.with_context(|_| OpenTableSnafu {
table_info: format!("{}.{}.{}, id:{}", catalog_name, schema_name, table_name, id,),
})? {
Some(table) => Ok(table),
None => {
let schema = meta
.schema
.clone()
.try_into()
.context(InvalidTableSchemaSnafu {
table_info: format!("{}.{}.{}", catalog_name, schema_name, table_name,),
schema: meta.schema.clone(),
})?;
let req = CreateTableRequest {
id: *id,
catalog_name: catalog_name.clone(),
schema_name: schema_name.clone(),
table_name: table_name.clone(),
desc: None,
schema: Arc::new(schema),
region_numbers: regions_id_map.get(&self.node_id).unwrap().clone(), // this unwrap is safe because region_id_map is checked in `iter_remote_tables`
primary_key_indices: meta.primary_key_indices.clone(),
create_if_not_exists: true,
table_options: meta.options.clone(),
};
self.engine
.create_table(&context, req)
.await
.context(CreateTableSnafu {
table_info: format!(
"{}.{}.{}, id:{}",
&catalog_name, &schema_name, &table_name, id
),
})
}
}
}
}
#[async_trait::async_trait]
impl CatalogManager for RemoteCatalogManager {
async fn start(&self) -> Result<()> {
let (catalogs, max_table_id) = self.initiate_catalogs().await?;
info!(
"Initialized catalogs: {:?}",
catalogs.keys().cloned().collect::<Vec<_>>()
);
self.catalogs.store(Arc::new(catalogs));
info!("Max table id allocated: {}", max_table_id);
let mut system_table_requests = self.system_table_requests.lock().await;
handle_system_table_request(self, self.engine.clone(), &mut system_table_requests).await?;
info!("All system table opened");
self.catalog(DEFAULT_CATALOG_NAME)
.unwrap()
.unwrap()
.schema(DEFAULT_SCHEMA_NAME)
.unwrap()
.unwrap()
.register_table("numbers".to_string(), Arc::new(NumbersTable::default()))
.unwrap();
Ok(())
}
/// Bump table id in a CAS manner with backoff.
async fn next_table_id(&self) -> Result<TableId> {
let key = common_catalog::consts::TABLE_ID_KEY_PREFIX.as_bytes();
let op = || async {
// TODO(hl): optimize this get
let (prev, prev_bytes) = match self.backend.get(key).await? {
None => (MIN_USER_TABLE_ID, vec![]),
Some(kv) => (parse_table_id(&kv.1)?, kv.1),
};
match self
.backend
.compare_and_set(key, &prev_bytes, &(prev + 1).to_le_bytes())
.await
{
Ok(cas_res) => match cas_res {
Ok(_) => Ok(prev),
Err(e) => {
info!("Table id {:?} already occupied", e);
Err(backoff::Error::transient(
BumpTableIdSnafu {
msg: "Table id occupied",
}
.build(),
))
}
},
Err(e) => {
error!(e;"Failed to CAS table id");
Err(backoff::Error::permanent(
BumpTableIdSnafu {
msg: format!("Failed to perform CAS operation: {:?}", e),
}
.build(),
))
}
}
};
let retry_policy: ExponentialBackoff = ExponentialBackoffBuilder::new()
.with_initial_interval(Duration::from_millis(4))
.with_multiplier(2.0)
.with_max_interval(Duration::from_millis(1000))
.with_max_elapsed_time(Some(Duration::from_millis(3000)))
.build();
backoff::future::retry(retry_policy, op).await.map_err(|e| {
BumpTableIdSnafu {
msg: format!(
"Bump table id exceeds max fail times, last error msg: {:?}",
e
),
}
.build()
})
}
async fn register_table(&self, request: RegisterTableRequest) -> Result<usize> {
let catalog_name = request.catalog;
let schema_name = request.schema;
let catalog_provider = self.catalog(&catalog_name)?.context(CatalogNotFoundSnafu {
catalog_name: &catalog_name,
})?;
let schema_provider =
catalog_provider
.schema(&schema_name)?
.with_context(|| SchemaNotFoundSnafu {
schema_info: format!("{}.{}", &catalog_name, &schema_name),
})?;
if schema_provider.table_exist(&request.table_name)? {
return TableExistsSnafu {
table: format!("{}.{}.{}", &catalog_name, &schema_name, &request.table_name),
}
.fail();
}
schema_provider.register_table(request.table_name, request.table)?;
Ok(1)
}
async fn register_system_table(&self, request: RegisterSystemTableRequest) -> Result<()> {
let mut requests = self.system_table_requests.lock().await;
requests.push(request);
Ok(())
}
fn table(
&self,
catalog_name: &str,
schema_name: &str,
table_name: &str,
) -> Result<Option<TableRef>> {
let catalog = self
.catalog(catalog_name)?
.with_context(|| CatalogNotFoundSnafu { catalog_name })?;
let schema = catalog
.schema(schema_name)?
.with_context(|| SchemaNotFoundSnafu {
schema_info: format!("{}.{}", catalog_name, schema_name),
})?;
schema.table(table_name)
}
}
impl CatalogList for RemoteCatalogManager {
fn as_any(&self) -> &dyn Any {
self
}
fn register_catalog(
&self,
name: String,
catalog: CatalogProviderRef,
) -> Result<Option<CatalogProviderRef>> {
let key = self.build_catalog_key(&name).to_string();
let backend = self.backend.clone();
let mutex = self.mutex.clone();
let catalogs = self.catalogs.clone();
std::thread::spawn(|| {
common_runtime::block_on_write(async move {
let _guard = mutex.lock().await;
backend
.set(
key.as_bytes(),
&CatalogValue {}
.as_bytes()
.context(InvalidCatalogValueSnafu)?,
)
.await?;
let prev_catalogs = catalogs.load();
let mut new_catalogs = HashMap::with_capacity(prev_catalogs.len() + 1);
new_catalogs.clone_from(&prev_catalogs);
let prev = new_catalogs.insert(name, catalog);
catalogs.store(Arc::new(new_catalogs));
Ok(prev)
})
})
.join()
.unwrap()
}
/// List all catalogs from metasrv
fn catalog_names(&self) -> Result<Vec<String>> {
Ok(self.catalogs.load().keys().cloned().collect::<Vec<_>>())
}
/// Read catalog info of given name from metasrv.
fn catalog(&self, name: &str) -> Result<Option<CatalogProviderRef>> {
Ok(self.catalogs.load().get(name).cloned())
}
}
pub struct RemoteCatalogProvider {
catalog_name: String,
backend: KvBackendRef,
schemas: Arc<ArcSwap<HashMap<String, SchemaProviderRef>>>,
mutex: Arc<Mutex<()>>,
}
impl RemoteCatalogProvider {
pub fn new(catalog_name: String, backend: KvBackendRef) -> Self {
Self {
catalog_name,
backend,
schemas: Default::default(),
mutex: Default::default(),
}
}
fn build_schema_key(&self, schema_name: impl AsRef<str>) -> SchemaKey {
SchemaKey {
catalog_name: self.catalog_name.clone(),
schema_name: schema_name.as_ref().to_string(),
}
}
}
impl CatalogProvider for RemoteCatalogProvider {
fn as_any(&self) -> &dyn Any {
self
}
fn schema_names(&self) -> Result<Vec<String>> {
Ok(self.schemas.load().keys().cloned().collect::<Vec<_>>())
}
fn register_schema(
&self,
name: String,
schema: SchemaProviderRef,
) -> Result<Option<SchemaProviderRef>> {
let key = self.build_schema_key(&name).to_string();
let backend = self.backend.clone();
let mutex = self.mutex.clone();
let schemas = self.schemas.clone();
std::thread::spawn(|| {
common_runtime::block_on_write(async move {
let _guard = mutex.lock().await;
backend
.set(
key.as_bytes(),
&SchemaValue {}
.as_bytes()
.context(InvalidCatalogValueSnafu)?,
)
.await?;
let prev_schemas = schemas.load();
let mut new_schemas = HashMap::with_capacity(prev_schemas.len() + 1);
new_schemas.clone_from(&prev_schemas);
let prev_schema = new_schemas.insert(name, schema);
schemas.store(Arc::new(new_schemas));
Ok(prev_schema)
})
})
.join()
.unwrap()
}
fn schema(&self, name: &str) -> Result<Option<Arc<dyn SchemaProvider>>> {
Ok(self.schemas.load().get(name).cloned())
}
}
/// Parse u8 slice to `TableId`
fn parse_table_id(val: &[u8]) -> Result<TableId> {
Ok(TableId::from_le_bytes(val.try_into().map_err(|_| {
ParseTableIdSnafu {
data: format!("{:?}", val),
}
.build()
})?))
}
pub struct RemoteSchemaProvider {
catalog_name: String,
schema_name: String,
node_id: u64,
backend: KvBackendRef,
tables: Arc<ArcSwap<HashMap<String, TableRef>>>,
mutex: Arc<Mutex<()>>,
}
impl RemoteSchemaProvider {
pub fn new(
catalog_name: String,
schema_name: String,
node_id: u64,
backend: KvBackendRef,
) -> Self {
Self {
catalog_name,
schema_name,
node_id,
backend,
tables: Default::default(),
mutex: Default::default(),
}
}
fn build_regional_table_key(&self, table_name: impl AsRef<str>) -> TableRegionalKey {
TableRegionalKey {
catalog_name: self.catalog_name.clone(),
schema_name: self.schema_name.clone(),
table_name: table_name.as_ref().to_string(),
node_id: self.node_id,
}
}
}
impl SchemaProvider for RemoteSchemaProvider {
fn as_any(&self) -> &dyn Any {
self
}
fn table_names(&self) -> Result<Vec<String>> {
Ok(self.tables.load().keys().cloned().collect::<Vec<_>>())
}
fn table(&self, name: &str) -> Result<Option<TableRef>> {
Ok(self.tables.load().get(name).cloned())
}
fn register_table(&self, name: String, table: TableRef) -> Result<Option<TableRef>> {
let table_info = table.table_info();
let table_version = table_info.ident.version;
let table_value = TableRegionalValue {
version: table_version,
regions_ids: table.table_info().meta.region_numbers.clone(),
};
let backend = self.backend.clone();
let mutex = self.mutex.clone();
let tables = self.tables.clone();
let table_key = self.build_regional_table_key(&name).to_string();
let prev = std::thread::spawn(move || {
common_runtime::block_on_read(async move {
let _guard = mutex.lock().await;
backend
.set(
table_key.as_bytes(),
&table_value.as_bytes().context(InvalidCatalogValueSnafu)?,
)
.await?;
debug!(
"Successfully set catalog table entry, key: {}, table value: {:?}",
table_key, table_value
);
let prev_tables = tables.load();
let mut new_tables = HashMap::with_capacity(prev_tables.len() + 1);
new_tables.clone_from(&prev_tables);
let prev = new_tables.insert(name, table);
tables.store(Arc::new(new_tables));
Ok(prev)
})
})
.join()
.unwrap();
prev
}
fn deregister_table(&self, name: &str) -> Result<Option<TableRef>> {
let table_name = name.to_string();
let table_key = self.build_regional_table_key(&table_name).to_string();
let backend = self.backend.clone();
let mutex = self.mutex.clone();
let tables = self.tables.clone();
let prev = std::thread::spawn(move || {
common_runtime::block_on_read(async move {
let _guard = mutex.lock().await;
backend.delete(table_key.as_bytes()).await?;
debug!(
"Successfully deleted catalog table entry, key: {}",
table_key
);
let prev_tables = tables.load();
let mut new_tables = HashMap::with_capacity(prev_tables.len() + 1);
new_tables.clone_from(&prev_tables);
let prev = new_tables.remove(&table_name);
tables.store(Arc::new(new_tables));
Ok(prev)
})
})
.join()
.unwrap();
prev
}
/// Checks if table exists in schema provider based on locally opened table map.
fn table_exist(&self, name: &str) -> Result<bool> {
Ok(self.tables.load().contains_key(name))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_table_id() {
assert_eq!(12, parse_table_id(&12_i32.to_le_bytes()).unwrap());
let mut data = vec![];
data.extend_from_slice(&12_i32.to_le_bytes());
data.push(0);
assert!(parse_table_id(&data).is_err());
}
}

View File

@@ -12,10 +12,10 @@ pub trait SchemaProvider: Sync + Send {
fn as_any(&self) -> &dyn Any;
/// Retrieves the list of available table names in this schema.
fn table_names(&self) -> Vec<String>;
fn table_names(&self) -> Result<Vec<String>>;
/// Retrieves a specific table from the schema by name, provided it exists.
fn table(&self, name: &str) -> Option<TableRef>;
fn table(&self, name: &str) -> Result<Option<TableRef>>;
/// If supported by the implementation, adds a new table to this schema.
/// If a table of the same name existed before, it returns "Table already exists" error.
@@ -28,7 +28,7 @@ pub trait SchemaProvider: Sync + Send {
/// If supported by the implementation, checks the table exist in the schema provider or not.
/// If no matched table in the schema provider, return false.
/// Otherwise, return true.
fn table_exist(&self, name: &str) -> bool;
fn table_exist(&self, name: &str) -> Result<bool>;
}
pub type SchemaProviderRef = Arc<dyn SchemaProvider>;

View File

@@ -2,7 +2,13 @@ use std::any::Any;
use std::collections::HashMap;
use std::sync::Arc;
use common_catalog::consts::{
INFORMATION_SCHEMA_NAME, SYSTEM_CATALOG_NAME, SYSTEM_CATALOG_TABLE_ID,
SYSTEM_CATALOG_TABLE_NAME,
};
use common_query::logical_plan::Expr;
use common_query::physical_plan::PhysicalPlanRef;
use common_query::physical_plan::RuntimeEnv;
use common_recordbatch::SendableRecordBatchStream;
use common_telemetry::debug;
use common_time::timestamp::Timestamp;
@@ -13,16 +19,12 @@ use datatypes::vectors::{BinaryVector, TimestampVector, UInt8Vector};
use serde::{Deserialize, Serialize};
use snafu::{ensure, OptionExt, ResultExt};
use table::engine::{EngineContext, TableEngineRef};
use table::metadata::TableId;
use table::metadata::{TableId, TableInfoRef};
use table::requests::{CreateTableRequest, InsertRequest, OpenTableRequest};
use table::{Table, TableRef};
use crate::consts::{
INFORMATION_SCHEMA_NAME, SYSTEM_CATALOG_NAME, SYSTEM_CATALOG_TABLE_ID,
SYSTEM_CATALOG_TABLE_NAME,
};
use crate::error::{
CreateSystemCatalogSnafu, EmptyValueSnafu, Error, InvalidEntryTypeSnafu, InvalidKeySnafu,
self, CreateSystemCatalogSnafu, EmptyValueSnafu, Error, InvalidEntryTypeSnafu, InvalidKeySnafu,
OpenSystemCatalogSnafu, Result, ValueDeserializeSnafu,
};
@@ -32,7 +34,7 @@ pub const TIMESTAMP_INDEX: usize = 2;
pub const VALUE_INDEX: usize = 3;
pub struct SystemCatalogTable {
schema: SchemaRef,
table_info: TableInfoRef,
pub table: TableRef,
}
@@ -43,7 +45,7 @@ impl Table for SystemCatalogTable {
}
fn schema(&self) -> SchemaRef {
self.schema.clone()
self.table_info.meta.schema.clone()
}
async fn scan(
@@ -51,7 +53,7 @@ impl Table for SystemCatalogTable {
_projection: &Option<Vec<usize>>,
_filters: &[Expr],
_limit: Option<usize>,
) -> table::Result<SendableRecordBatchStream> {
) -> table::Result<PhysicalPlanRef> {
panic!("System catalog table does not support scan!")
}
@@ -59,6 +61,10 @@ impl Table for SystemCatalogTable {
async fn insert(&self, request: InsertRequest) -> table::error::Result<usize> {
self.table.insert(request).await
}
fn table_info(&self) -> TableInfoRef {
self.table_info.clone()
}
}
impl SystemCatalogTable {
@@ -77,16 +83,20 @@ impl SystemCatalogTable {
.await
.context(OpenSystemCatalogSnafu)?
{
Ok(Self { table, schema })
Ok(Self {
table_info: table.table_info(),
table,
})
} else {
// system catalog table is not yet created, try to create
let request = CreateTableRequest {
id: SYSTEM_CATALOG_TABLE_ID,
catalog_name: Some(SYSTEM_CATALOG_NAME.to_string()),
schema_name: Some(INFORMATION_SCHEMA_NAME.to_string()),
catalog_name: SYSTEM_CATALOG_NAME.to_string(),
schema_name: INFORMATION_SCHEMA_NAME.to_string(),
table_name: SYSTEM_CATALOG_TABLE_NAME.to_string(),
desc: Some("System catalog table".to_string()),
schema: schema.clone(),
region_numbers: vec![0],
primary_key_indices: vec![ENTRY_TYPE_INDEX, KEY_INDEX, TIMESTAMP_INDEX],
create_if_not_exists: true,
table_options: HashMap::new(),
@@ -96,14 +106,23 @@ impl SystemCatalogTable {
.create_table(&ctx, request)
.await
.context(CreateSystemCatalogSnafu)?;
Ok(Self { table, schema })
let table_info = table.table_info();
Ok(Self { table, table_info })
}
}
/// Create a stream of all entries inside system catalog table
pub async fn records(&self) -> Result<SendableRecordBatchStream> {
let full_projection = None;
let stream = self.table.scan(&full_projection, &[], None).await.unwrap();
let scan = self
.table
.scan(&full_projection, &[], None)
.await
.context(error::SystemCatalogTableScanSnafu)?;
let stream = scan
.execute(0, Arc::new(RuntimeEnv::default()))
.await
.context(error::SystemCatalogTableScanExecSnafu)?;
Ok(stream)
}
}
@@ -153,7 +172,7 @@ fn build_system_catalog_schema() -> Schema {
// The schema of this table must be valid.
SchemaBuilder::try_from(cols)
.unwrap()
.timestamp_index(2)
.timestamp_index(Some(2))
.build()
.unwrap()
}
@@ -320,6 +339,16 @@ pub struct TableEntryValue {
#[cfg(test)]
mod tests {
use log_store::fs::noop::NoopLogStore;
use object_store::ObjectStore;
use storage::config::EngineConfig as StorageEngineConfig;
use storage::EngineImpl;
use table::metadata::TableType;
use table::metadata::TableType::Base;
use table_engine::config::EngineConfig;
use table_engine::engine::MitoEngine;
use tempdir::TempDir;
use super::*;
#[test]
@@ -391,4 +420,43 @@ mod tests {
assert_eq!(EntryType::Table, EntryType::try_from(3).unwrap());
assert!(EntryType::try_from(4).is_err());
}
pub async fn prepare_table_engine() -> (TempDir, TableEngineRef) {
let dir = TempDir::new("system-table-test").unwrap();
let store_dir = dir.path().to_string_lossy();
let accessor = opendal::services::fs::Builder::default()
.root(&store_dir)
.build()
.unwrap();
let object_store = ObjectStore::new(accessor);
let table_engine = Arc::new(MitoEngine::new(
EngineConfig::default(),
EngineImpl::new(
StorageEngineConfig::default(),
Arc::new(NoopLogStore::default()),
object_store.clone(),
),
object_store,
));
(dir, table_engine)
}
#[tokio::test]
async fn test_system_table_type() {
let (_dir, table_engine) = prepare_table_engine().await;
let system_table = SystemCatalogTable::new(table_engine).await.unwrap();
assert_eq!(Base, system_table.table_type());
}
#[tokio::test]
async fn test_system_table_info() {
let (_dir, table_engine) = prepare_table_engine().await;
let system_table = SystemCatalogTable::new(table_engine).await.unwrap();
let info = system_table.table_info();
assert_eq!(TableType::Base, info.table_type);
assert_eq!(SYSTEM_CATALOG_TABLE_NAME, info.name);
assert_eq!(SYSTEM_CATALOG_TABLE_ID, info.ident.table_id);
assert_eq!(SYSTEM_CATALOG_NAME, info.catalog_name);
assert_eq!(INFORMATION_SCHEMA_NAME, info.schema_name);
}
}

View File

@@ -6,9 +6,12 @@ use std::sync::Arc;
use std::task::{Context, Poll};
use async_stream::stream;
use common_catalog::consts::{INFORMATION_SCHEMA_NAME, SYSTEM_CATALOG_TABLE_NAME};
use common_error::ext::BoxedError;
use common_query::logical_plan::Expr;
use common_query::physical_plan::PhysicalPlanRef;
use common_recordbatch::error::Result as RecordBatchResult;
use common_recordbatch::{RecordBatch, RecordBatchStream, SendableRecordBatchStream};
use common_recordbatch::{RecordBatch, RecordBatchStream};
use datatypes::prelude::{ConcreteDataType, VectorBuilder};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::value::Value;
@@ -16,11 +19,12 @@ use datatypes::vectors::VectorRef;
use futures::Stream;
use snafu::ResultExt;
use table::engine::TableEngineRef;
use table::metadata::TableId;
use table::error::TablesRecordBatchSnafu;
use table::metadata::{TableId, TableInfoRef};
use table::table::scan::SimpleTableScan;
use table::{Table, TableRef};
use crate::consts::{INFORMATION_SCHEMA_NAME, SYSTEM_CATALOG_TABLE_NAME};
use crate::error::InsertTableRecordSnafu;
use crate::error::{Error, InsertTableRecordSnafu};
use crate::system::{build_table_insert_request, SystemCatalogTable};
use crate::{
format_full_table_name, CatalogListRef, CatalogProvider, SchemaProvider, SchemaProviderRef,
@@ -53,23 +57,53 @@ impl Table for Tables {
self.schema.clone()
}
fn table_info(&self) -> TableInfoRef {
unreachable!("Tables does not support table_info method")
}
async fn scan(
&self,
_projection: &Option<Vec<usize>>,
_filters: &[Expr],
_limit: Option<usize>,
) -> table::error::Result<SendableRecordBatchStream> {
) -> table::error::Result<PhysicalPlanRef> {
let catalogs = self.catalogs.clone();
let schema_ref = self.schema.clone();
let engine_name = self.engine_name.clone();
let stream = stream!({
for catalog_name in catalogs.catalog_names() {
let catalog = catalogs.catalog(&catalog_name).unwrap();
for schema_name in catalog.schema_names() {
let mut tables_in_schema = Vec::with_capacity(catalog.schema_names().len());
let schema = catalog.schema(&schema_name).unwrap();
for table_name in schema.table_names() {
for catalog_name in catalogs
.catalog_names()
.map_err(BoxedError::new)
.context(TablesRecordBatchSnafu)?
{
let catalog = catalogs
.catalog(&catalog_name)
.map_err(BoxedError::new)
.context(TablesRecordBatchSnafu)?
.unwrap();
for schema_name in catalog
.schema_names()
.map_err(BoxedError::new)
.context(TablesRecordBatchSnafu)?
{
let mut tables_in_schema = Vec::with_capacity(
catalog
.schema_names()
.map_err(BoxedError::new)
.context(TablesRecordBatchSnafu)?
.len(),
);
let schema = catalog
.schema(&schema_name)
.map_err(BoxedError::new)
.context(TablesRecordBatchSnafu)?
.unwrap();
for table_name in schema
.table_names()
.map_err(BoxedError::new)
.context(TablesRecordBatchSnafu)?
{
tables_in_schema.push(table_name);
}
@@ -85,10 +119,11 @@ impl Table for Tables {
}
});
Ok(Box::pin(TablesRecordBatchStream {
let stream = Box::pin(TablesRecordBatchStream {
schema: self.schema.clone(),
stream: Box::pin(stream),
}))
});
Ok(Arc::new(SimpleTableScan::new(stream)))
}
}
@@ -152,17 +187,20 @@ impl SchemaProvider for InformationSchema {
self
}
fn table_names(&self) -> Vec<String> {
vec!["tables".to_string(), SYSTEM_CATALOG_TABLE_NAME.to_string()]
fn table_names(&self) -> Result<Vec<String>, Error> {
Ok(vec![
"tables".to_string(),
SYSTEM_CATALOG_TABLE_NAME.to_string(),
])
}
fn table(&self, name: &str) -> Option<TableRef> {
fn table(&self, name: &str) -> Result<Option<TableRef>, Error> {
if name.eq_ignore_ascii_case("tables") {
Some(self.tables.clone())
Ok(Some(self.tables.clone()))
} else if name.eq_ignore_ascii_case(SYSTEM_CATALOG_TABLE_NAME) {
Some(self.system.clone())
Ok(Some(self.system.clone()))
} else {
None
Ok(None)
}
}
@@ -178,8 +216,9 @@ impl SchemaProvider for InformationSchema {
panic!("System catalog & schema does not support deregister table")
}
fn table_exist(&self, name: &str) -> bool {
name.eq_ignore_ascii_case("tables") || name.eq_ignore_ascii_case(SYSTEM_CATALOG_TABLE_NAME)
fn table_exist(&self, name: &str) -> Result<bool, Error> {
Ok(name.eq_ignore_ascii_case("tables")
|| name.eq_ignore_ascii_case(SYSTEM_CATALOG_TABLE_NAME))
}
}
@@ -224,23 +263,23 @@ impl CatalogProvider for SystemCatalog {
self
}
fn schema_names(&self) -> Vec<String> {
vec![INFORMATION_SCHEMA_NAME.to_string()]
fn schema_names(&self) -> Result<Vec<String>, Error> {
Ok(vec![INFORMATION_SCHEMA_NAME.to_string()])
}
fn register_schema(
&self,
_name: String,
_schema: SchemaProviderRef,
) -> Option<SchemaProviderRef> {
) -> Result<Option<SchemaProviderRef>, Error> {
panic!("System catalog does not support registering schema!")
}
fn schema(&self, name: &str) -> Option<Arc<dyn SchemaProvider>> {
fn schema(&self, name: &str) -> Result<Option<Arc<dyn SchemaProvider>>, Error> {
if name.eq_ignore_ascii_case(INFORMATION_SCHEMA_NAME) {
Some(self.information_schema.clone())
Ok(Some(self.information_schema.clone()))
} else {
None
Ok(None)
}
}
}
@@ -273,13 +312,16 @@ fn build_schema_for_tables() -> Schema {
#[cfg(test)]
mod tests {
use common_query::physical_plan::RuntimeEnv;
use datatypes::arrow::array::Utf8Array;
use datatypes::arrow::datatypes::DataType;
use futures_util::StreamExt;
use table::table::numbers::NumbersTable;
use super::*;
use crate::memory::{new_memory_catalog_list, MemoryCatalogProvider, MemorySchemaProvider};
use crate::local::memory::{
new_memory_catalog_list, MemoryCatalogProvider, MemorySchemaProvider,
};
use crate::CatalogList;
#[tokio::test]
@@ -290,11 +332,19 @@ mod tests {
schema
.register_table("test_table".to_string(), Arc::new(NumbersTable::default()))
.unwrap();
catalog_provider.register_schema("test_schema".to_string(), schema);
catalog_list.register_catalog("test_catalog".to_string(), catalog_provider);
catalog_provider
.register_schema("test_schema".to_string(), schema)
.unwrap();
catalog_list
.register_catalog("test_catalog".to_string(), catalog_provider)
.unwrap();
let tables = Tables::new(catalog_list, "test_engine".to_string());
let mut tables_stream = tables.scan(&None, &[], None).await.unwrap();
let tables_stream = tables.scan(&None, &[], None).await.unwrap();
let mut tables_stream = tables_stream
.execute(0, Arc::new(RuntimeEnv::default()))
.await
.unwrap();
if let Some(t) = tables_stream.next().await {
let batch = t.unwrap().df_recordbatch;
assert_eq!(1, batch.num_rows());

192
src/catalog/tests/mock.rs Normal file
View File

@@ -0,0 +1,192 @@
use std::collections::btree_map::Entry;
use std::collections::{BTreeMap, HashMap};
use std::fmt::{Display, Formatter};
use std::str::FromStr;
use std::sync::Arc;
use async_stream::stream;
use catalog::error::Error;
use catalog::remote::{Kv, KvBackend, ValueIter};
use common_recordbatch::RecordBatch;
use common_telemetry::logging::info;
use datatypes::data_type::ConcreteDataType;
use datatypes::schema::{ColumnSchema, Schema};
use datatypes::vectors::StringVector;
use serde::Serializer;
use table::engine::{EngineContext, TableEngine};
use table::metadata::TableId;
use table::requests::{AlterTableRequest, CreateTableRequest, DropTableRequest, OpenTableRequest};
use table::test_util::MemTable;
use table::TableRef;
use tokio::sync::RwLock;
#[derive(Default)]
pub struct MockKvBackend {
map: RwLock<BTreeMap<Vec<u8>, Vec<u8>>>,
}
impl Display for MockKvBackend {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
futures::executor::block_on(async {
let map = self.map.read().await;
for (k, v) in map.iter() {
f.serialize_str(&String::from_utf8_lossy(k))?;
f.serialize_str(" -> ")?;
f.serialize_str(&String::from_utf8_lossy(v))?;
f.serialize_str("\n")?;
}
Ok(())
})
}
}
#[async_trait::async_trait]
impl KvBackend for MockKvBackend {
fn range<'a, 'b>(&'a self, key: &[u8]) -> ValueIter<'b, Error>
where
'a: 'b,
{
let prefix = key.to_vec();
let prefix_string = String::from_utf8_lossy(&prefix).to_string();
Box::pin(stream!({
let maps = self.map.read().await.clone();
for (k, v) in maps.range(prefix.clone()..) {
let key_string = String::from_utf8_lossy(k).to_string();
let matches = key_string.starts_with(&prefix_string);
if matches {
yield Ok(Kv(k.clone(), v.clone()))
} else {
info!("Stream finished");
return;
}
}
}))
}
async fn set(&self, key: &[u8], val: &[u8]) -> Result<(), Error> {
let mut map = self.map.write().await;
map.insert(key.to_vec(), val.to_vec());
Ok(())
}
async fn compare_and_set(
&self,
key: &[u8],
expect: &[u8],
val: &[u8],
) -> Result<Result<(), Option<Vec<u8>>>, Error> {
let mut map = self.map.write().await;
let existing = map.entry(key.to_vec());
match existing {
Entry::Vacant(e) => {
if expect.is_empty() {
e.insert(val.to_vec());
Ok(Ok(()))
} else {
Ok(Err(None))
}
}
Entry::Occupied(mut existing) => {
if existing.get() == expect {
existing.insert(val.to_vec());
Ok(Ok(()))
} else {
Ok(Err(Some(existing.get().clone())))
}
}
}
}
async fn delete_range(&self, key: &[u8], end: &[u8]) -> Result<(), Error> {
let start = key.to_vec();
let end = end.to_vec();
let range = start..end;
let mut map = self.map.write().await;
map.retain(|k, _| !range.contains(k));
Ok(())
}
}
#[derive(Default)]
pub struct MockTableEngine {
tables: RwLock<HashMap<String, TableRef>>,
}
#[async_trait::async_trait]
impl TableEngine for MockTableEngine {
fn name(&self) -> &str {
"MockTableEngine"
}
/// Create a table with only one column
async fn create_table(
&self,
_ctx: &EngineContext,
request: CreateTableRequest,
) -> table::Result<TableRef> {
let table_name = request.table_name.clone();
let catalog_name = request.catalog_name.clone();
let schema_name = request.schema_name.clone();
let default_table_id = "0".to_owned();
let table_id = TableId::from_str(
request
.table_options
.get("table_id")
.unwrap_or(&default_table_id),
)
.unwrap();
let schema = Arc::new(Schema::new(vec![ColumnSchema::new(
"name",
ConcreteDataType::string_datatype(),
true,
)]));
let data = vec![Arc::new(StringVector::from(vec!["a", "b", "c"])) as _];
let record_batch = RecordBatch::new(schema, data).unwrap();
let table: TableRef = Arc::new(MemTable::new_with_catalog(
&table_name,
record_batch,
table_id,
catalog_name,
schema_name,
)) as Arc<_>;
let mut tables = self.tables.write().await;
tables.insert(table_name, table.clone() as TableRef);
Ok(table)
}
async fn open_table(
&self,
_ctx: &EngineContext,
request: OpenTableRequest,
) -> table::Result<Option<TableRef>> {
Ok(self.tables.read().await.get(&request.table_name).cloned())
}
async fn alter_table(
&self,
_ctx: &EngineContext,
_request: AlterTableRequest,
) -> table::Result<TableRef> {
unimplemented!()
}
fn get_table(&self, _ctx: &EngineContext, name: &str) -> table::Result<Option<TableRef>> {
futures::executor::block_on(async { Ok(self.tables.read().await.get(name).cloned()) })
}
fn table_exists(&self, _ctx: &EngineContext, name: &str) -> bool {
futures::executor::block_on(async { self.tables.read().await.contains_key(name) })
}
async fn drop_table(
&self,
_ctx: &EngineContext,
_request: DropTableRequest,
) -> table::Result<()> {
unimplemented!()
}
}

View File

@@ -0,0 +1,295 @@
#![feature(assert_matches)]
mod mock;
#[cfg(test)]
mod tests {
use std::assert_matches::assert_matches;
use std::collections::HashSet;
use std::sync::Arc;
use catalog::remote::{
KvBackend, KvBackendRef, RemoteCatalogManager, RemoteCatalogProvider, RemoteSchemaProvider,
};
use catalog::{CatalogManager, CatalogManagerRef, RegisterTableRequest};
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MIN_USER_TABLE_ID};
use common_catalog::{CatalogKey, CatalogValue, SchemaKey, SchemaValue};
use datatypes::schema::Schema;
use futures_util::StreamExt;
use table::engine::{EngineContext, TableEngineRef};
use table::requests::CreateTableRequest;
use crate::mock::{MockKvBackend, MockTableEngine};
#[tokio::test]
async fn test_backend() {
common_telemetry::init_default_ut_logging();
let backend = MockKvBackend::default();
let default_catalog_key = CatalogKey {
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
}
.to_string();
backend
.set(
default_catalog_key.as_bytes(),
&CatalogValue {}.as_bytes().unwrap(),
)
.await
.unwrap();
let schema_key = SchemaKey {
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
}
.to_string();
backend
.set(schema_key.as_bytes(), &SchemaValue {}.as_bytes().unwrap())
.await
.unwrap();
let mut iter = backend.range("__c-".as_bytes());
let mut res = HashSet::new();
while let Some(r) = iter.next().await {
let kv = r.unwrap();
res.insert(String::from_utf8_lossy(&kv.0).to_string());
}
assert_eq!(
vec!["__c-greptime".to_string()],
res.into_iter().collect::<Vec<_>>()
);
}
async fn prepare_components(node_id: u64) -> (KvBackendRef, TableEngineRef, CatalogManagerRef) {
let backend = Arc::new(MockKvBackend::default()) as KvBackendRef;
let table_engine = Arc::new(MockTableEngine::default());
let catalog_manager =
RemoteCatalogManager::new(table_engine.clone(), node_id, backend.clone());
catalog_manager.start().await.unwrap();
(backend, table_engine, Arc::new(catalog_manager))
}
#[tokio::test]
async fn test_remote_catalog_default() {
common_telemetry::init_default_ut_logging();
let node_id = 42;
let (_, _, catalog_manager) = prepare_components(node_id).await;
assert_eq!(
vec![DEFAULT_CATALOG_NAME.to_string()],
catalog_manager.catalog_names().unwrap()
);
let default_catalog = catalog_manager
.catalog(DEFAULT_CATALOG_NAME)
.unwrap()
.unwrap();
assert_eq!(
vec![DEFAULT_SCHEMA_NAME.to_string()],
default_catalog.schema_names().unwrap()
);
}
#[tokio::test]
async fn test_remote_catalog_register_nonexistent() {
common_telemetry::init_default_ut_logging();
let node_id = 42;
let (_, table_engine, catalog_manager) = prepare_components(node_id).await;
// register a new table with an nonexistent catalog
let catalog_name = "nonexistent_catalog".to_string();
let schema_name = "nonexistent_schema".to_string();
let table_name = "fail_table".to_string();
// this schema has no effect
let table_schema = Arc::new(Schema::new(vec![]));
let table = table_engine
.create_table(
&EngineContext {},
CreateTableRequest {
id: 1,
catalog_name: catalog_name.clone(),
schema_name: schema_name.clone(),
table_name: table_name.clone(),
desc: None,
schema: table_schema.clone(),
region_numbers: vec![0],
primary_key_indices: vec![],
create_if_not_exists: false,
table_options: Default::default(),
},
)
.await
.unwrap();
let reg_req = RegisterTableRequest {
catalog: catalog_name,
schema: schema_name,
table_name,
table_id: 1,
table,
};
let res = catalog_manager.register_table(reg_req).await;
// because nonexistent_catalog does not exist yet.
assert_matches!(
res.err().unwrap(),
catalog::error::Error::CatalogNotFound { .. }
);
}
#[tokio::test]
async fn test_register_table() {
let node_id = 42;
let (_, table_engine, catalog_manager) = prepare_components(node_id).await;
let default_catalog = catalog_manager
.catalog(DEFAULT_CATALOG_NAME)
.unwrap()
.unwrap();
assert_eq!(
vec![DEFAULT_SCHEMA_NAME.to_string()],
default_catalog.schema_names().unwrap()
);
let default_schema = default_catalog
.schema(DEFAULT_SCHEMA_NAME)
.unwrap()
.unwrap();
assert_eq!(vec!["numbers"], default_schema.table_names().unwrap());
// register a new table with an nonexistent catalog
let catalog_name = DEFAULT_CATALOG_NAME.to_string();
let schema_name = DEFAULT_SCHEMA_NAME.to_string();
let table_name = "test_table".to_string();
let table_id = 1;
// this schema has no effect
let table_schema = Arc::new(Schema::new(vec![]));
let table = table_engine
.create_table(
&EngineContext {},
CreateTableRequest {
id: table_id,
catalog_name: catalog_name.clone(),
schema_name: schema_name.clone(),
table_name: table_name.clone(),
desc: None,
schema: table_schema.clone(),
region_numbers: vec![0],
primary_key_indices: vec![],
create_if_not_exists: false,
table_options: Default::default(),
},
)
.await
.unwrap();
let reg_req = RegisterTableRequest {
catalog: catalog_name,
schema: schema_name,
table_name: table_name.clone(),
table_id,
table,
};
assert_eq!(1, catalog_manager.register_table(reg_req).await.unwrap());
assert_eq!(
HashSet::from([table_name, "numbers".to_string()]),
default_schema
.table_names()
.unwrap()
.into_iter()
.collect::<HashSet<_>>()
);
}
#[tokio::test]
async fn test_register_catalog_schema_table() {
let node_id = 42;
let (backend, table_engine, catalog_manager) = prepare_components(node_id).await;
let catalog_name = "test_catalog".to_string();
let schema_name = "nonexistent_schema".to_string();
let catalog = Arc::new(RemoteCatalogProvider::new(
catalog_name.clone(),
backend.clone(),
));
// register catalog to catalog manager
catalog_manager
.register_catalog(catalog_name.clone(), catalog)
.unwrap();
assert_eq!(
HashSet::<String>::from_iter(
vec![DEFAULT_CATALOG_NAME.to_string(), catalog_name.clone()].into_iter()
),
HashSet::from_iter(catalog_manager.catalog_names().unwrap().into_iter())
);
let table_to_register = table_engine
.create_table(
&EngineContext {},
CreateTableRequest {
id: 2,
catalog_name: catalog_name.clone(),
schema_name: schema_name.clone(),
table_name: "".to_string(),
desc: None,
schema: Arc::new(Schema::new(vec![])),
region_numbers: vec![0],
primary_key_indices: vec![],
create_if_not_exists: false,
table_options: Default::default(),
},
)
.await
.unwrap();
let reg_req = RegisterTableRequest {
catalog: catalog_name.clone(),
schema: schema_name.clone(),
table_name: " fail_table".to_string(),
table_id: 2,
table: table_to_register,
};
// this register will fail since schema does not exist yet
assert_matches!(
catalog_manager
.register_table(reg_req.clone())
.await
.unwrap_err(),
catalog::error::Error::SchemaNotFound { .. }
);
let new_catalog = catalog_manager
.catalog(&catalog_name)
.unwrap()
.expect("catalog should exist since it's already registered");
let schema = Arc::new(RemoteSchemaProvider::new(
catalog_name.clone(),
schema_name.clone(),
node_id,
backend.clone(),
));
let prev = new_catalog
.register_schema(schema_name.clone(), schema.clone())
.expect("Register schema should not fail");
assert!(prev.is_none());
assert_eq!(1, catalog_manager.register_table(reg_req).await.unwrap());
assert_eq!(
HashSet::from([schema_name.clone()]),
new_catalog.schema_names().unwrap().into_iter().collect()
)
}
#[tokio::test]
async fn test_next_table_id() {
let node_id = 42;
let (_, _, catalog_manager) = prepare_components(node_id).await;
assert_eq!(
MIN_USER_TABLE_ID,
catalog_manager.next_table_id().await.unwrap()
);
assert_eq!(
MIN_USER_TABLE_ID + 1,
catalog_manager.next_table_id().await.unwrap()
);
}
}

View File

@@ -7,20 +7,34 @@ edition = "2021"
[dependencies]
api = { path = "../api" }
async-stream = "0.3"
catalog = { path = "../catalog" }
common-base = { path = "../common/base" }
common-error = { path = "../common/error" }
common-grpc = { path = "../common/grpc" }
common-query = { path = "../common/query" }
common-recordbatch = { path = "../common/recordbatch" }
common-time = { path = "../common/time" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = ["simd"] }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
"simd",
] }
datatypes = { path = "../datatypes" }
enum_dispatch = "0.3"
parking_lot = "0.12"
rand = "0.8"
snafu = { version = "0.7", features = ["backtraces"] }
tonic = "0.8"
[dev-dependencies]
datanode = { path = "../datanode" }
substrait = { path = "../common/substrait" }
tokio = { version = "1.0", features = ["full"] }
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
# TODO(ruihang): upgrade to 0.11 once substrait-rs supports it.
[dev-dependencies.prost_09]
package = "prost"
version = "0.9"
[dev-dependencies.substrait_proto]
package = "substrait"
version = "0.2"

View File

@@ -1,3 +1,5 @@
use std::collections::HashMap;
use api::v1::{codec::InsertBatch, *};
use client::{Client, Database};
@@ -10,7 +12,7 @@ fn main() {
#[tokio::main]
async fn run() {
let client = Client::connect("http://127.0.0.1:3001").await.unwrap();
let client = Client::with_urls(vec!["127.0.0.1:3001"]);
let db = Database::new("greptime", client);
let expr = InsertExpr {
@@ -18,6 +20,7 @@ async fn run() {
expr: Some(insert_expr::Expr::Values(insert_expr::Values {
values: insert_batches(),
})),
options: HashMap::default(),
};
db.insert(expr).await.unwrap();
}

View File

@@ -0,0 +1,96 @@
use api::v1::{ColumnDataType, ColumnDef, CreateExpr};
use client::{admin::Admin, Client, Database};
use prost_09::Message;
use substrait_proto::protobuf::{
plan_rel::RelType as PlanRelType,
read_rel::{NamedTable, ReadType},
rel::RelType,
PlanRel, ReadRel, Rel,
};
use tracing::{event, Level};
fn main() {
tracing::subscriber::set_global_default(tracing_subscriber::FmtSubscriber::builder().finish())
.unwrap();
run();
}
#[tokio::main]
async fn run() {
let client = Client::with_urls(vec!["127.0.0.1:3001"]);
let create_table_expr = CreateExpr {
catalog_name: Some("greptime".to_string()),
schema_name: Some("public".to_string()),
table_name: "test_logical_dist_exec".to_string(),
desc: None,
column_defs: vec![
ColumnDef {
name: "timestamp".to_string(),
datatype: ColumnDataType::Timestamp as i32,
is_nullable: false,
default_constraint: None,
},
ColumnDef {
name: "key".to_string(),
datatype: ColumnDataType::Uint64 as i32,
is_nullable: false,
default_constraint: None,
},
ColumnDef {
name: "value".to_string(),
datatype: ColumnDataType::Uint64 as i32,
is_nullable: false,
default_constraint: None,
},
],
time_index: "timestamp".to_string(),
primary_keys: vec!["key".to_string()],
create_if_not_exists: false,
table_options: Default::default(),
};
let admin = Admin::new("create table", client.clone());
let result = admin.create(create_table_expr).await.unwrap();
event!(Level::INFO, "create table result: {:#?}", result);
let logical = mock_logical_plan();
event!(Level::INFO, "plan size: {:#?}", logical.len());
let db = Database::new("greptime", client);
let result = db.logical_plan(logical).await.unwrap();
event!(Level::INFO, "result: {:#?}", result);
}
fn mock_logical_plan() -> Vec<u8> {
let catalog_name = "greptime".to_string();
let schema_name = "public".to_string();
let table_name = "test_logical_dist_exec".to_string();
let named_table = NamedTable {
names: vec![catalog_name, schema_name, table_name],
advanced_extension: None,
};
let read_type = ReadType::NamedTable(named_table);
let read_rel = ReadRel {
common: None,
base_schema: None,
filter: None,
projection: None,
advanced_extension: None,
read_type: Some(read_type),
};
let mut buf = vec![];
let rel = Rel {
rel_type: Some(RelType::Read(Box::new(read_rel))),
};
let plan_rel = PlanRel {
rel_type: Some(PlanRelType::Rel(rel)),
};
plan_rel.encode(&mut buf).unwrap();
buf
}

View File

@@ -16,7 +16,7 @@ fn main() {
#[tokio::main]
async fn run() {
let client = Client::connect("http://127.0.0.1:3001").await.unwrap();
let client = Client::with_urls(vec!["127.0.0.1:3001"]);
let db = Database::new("greptime", client);
let physical = mock_physical_plan();

View File

@@ -10,7 +10,7 @@ fn main() {
#[tokio::main]
async fn run() {
let client = Client::connect("http://127.0.0.1:3001").await.unwrap();
let client = Client::with_urls(vec!["127.0.0.1:3001"]);
let db = Database::new("greptime", client);
let sql = Select::Sql("select * from demo".to_string());

View File

@@ -22,10 +22,6 @@ impl Admin {
}
}
pub async fn start(&mut self, url: impl Into<String>) -> Result<()> {
self.client.start(url).await
}
pub async fn create(&self, expr: CreateExpr) -> Result<AdminResult> {
let header = ExprHeader {
version: PROTOCOL_VERSION,

View File

@@ -1,47 +1,96 @@
use api::v1::{greptime_client::GreptimeClient, *};
use snafu::{OptionExt, ResultExt};
use std::sync::Arc;
use api::v1::greptime_client::GreptimeClient;
use api::v1::*;
use common_grpc::channel_manager::ChannelManager;
use parking_lot::RwLock;
use snafu::OptionExt;
use snafu::ResultExt;
use tonic::transport::Channel;
use crate::error;
use crate::load_balance::LoadBalance;
use crate::load_balance::Loadbalancer;
use crate::Result;
#[derive(Clone, Debug, Default)]
pub struct Client {
client: Option<GreptimeClient<Channel>>,
inner: Arc<Inner>,
}
#[derive(Debug, Default)]
struct Inner {
channel_manager: ChannelManager,
peers: Arc<RwLock<Vec<String>>>,
load_balance: Loadbalancer,
}
impl Inner {
fn with_manager(channel_manager: ChannelManager) -> Self {
Self {
channel_manager,
..Default::default()
}
}
fn set_peers(&self, peers: Vec<String>) {
let mut guard = self.peers.write();
*guard = peers;
}
fn get_peer(&self) -> Option<String> {
let guard = self.peers.read();
self.load_balance.get_peer(&guard).cloned()
}
}
impl Client {
pub async fn start(&mut self, url: impl Into<String>) -> Result<()> {
match self.client.as_ref() {
None => {
let url = url.into();
let client = GreptimeClient::connect(url.clone())
.await
.context(error::ConnectFailedSnafu { url })?;
self.client = Some(client);
Ok(())
}
Some(_) => error::IllegalGrpcClientStateSnafu {
err_msg: "already started",
}
.fail(),
}
pub fn new() -> Self {
Default::default()
}
pub fn with_client(client: GreptimeClient<Channel>) -> Self {
pub fn with_manager(channel_manager: ChannelManager) -> Self {
let inner = Arc::new(Inner::with_manager(channel_manager));
Self { inner }
}
pub fn with_urls<U, A>(urls: A) -> Self
where
U: AsRef<str>,
A: AsRef<[U]>,
{
Self::with_manager_and_urls(ChannelManager::new(), urls)
}
pub fn with_manager_and_urls<U, A>(channel_manager: ChannelManager, urls: A) -> Self
where
U: AsRef<str>,
A: AsRef<[U]>,
{
let inner = Inner::with_manager(channel_manager);
let urls: Vec<String> = urls
.as_ref()
.iter()
.map(|peer| peer.as_ref().to_string())
.collect();
inner.set_peers(urls);
Self {
client: Some(client),
inner: Arc::new(inner),
}
}
pub async fn connect(url: impl Into<String>) -> Result<Self> {
let url = url.into();
let client = GreptimeClient::connect(url.clone())
.await
.context(error::ConnectFailedSnafu { url })?;
Ok(Self {
client: Some(client),
})
pub fn start<U, A>(&self, urls: A)
where
U: AsRef<str>,
A: AsRef<[U]>,
{
let urls: Vec<String> = urls
.as_ref()
.iter()
.map(|peer| peer.as_ref().to_string())
.collect();
self.inner.set_peers(urls);
}
pub async fn admin(&self, req: AdminRequest) -> Result<AdminResponse> {
@@ -73,18 +122,59 @@ impl Client {
}
pub async fn batch(&self, req: BatchRequest) -> Result<BatchResponse> {
if let Some(client) = self.client.as_ref() {
let res = client
.clone()
.batch(req)
.await
.context(error::TonicStatusSnafu)?;
Ok(res.into_inner())
} else {
error::IllegalGrpcClientStateSnafu {
err_msg: "not started",
}
.fail()
let peer = self
.inner
.get_peer()
.context(error::IllegalGrpcClientStateSnafu {
err_msg: "No available peer found",
})?;
let mut client = self.make_client(peer)?;
let result = client.batch(req).await.context(error::TonicStatusSnafu)?;
Ok(result.into_inner())
}
fn make_client(&self, addr: impl AsRef<str>) -> Result<GreptimeClient<Channel>> {
let addr = addr.as_ref();
let channel = self
.inner
.channel_manager
.get(addr)
.context(error::CreateChannelSnafu { addr })?;
Ok(GreptimeClient::new(channel))
}
}
#[cfg(test)]
mod tests {
use std::collections::HashSet;
use super::Inner;
use crate::load_balance::Loadbalancer;
fn mock_peers() -> Vec<String> {
vec![
"127.0.0.1:3001".to_string(),
"127.0.0.1:3002".to_string(),
"127.0.0.1:3003".to_string(),
]
}
#[test]
fn test_inner() {
let inner = Inner::default();
assert!(matches!(
inner.load_balance,
Loadbalancer::Random(crate::load_balance::Random)
));
assert!(inner.get_peer().is_none());
let peers = mock_peers();
inner.set_peers(peers.clone());
let all: HashSet<String> = peers.into_iter().collect();
for _ in 0..20 {
assert!(all.contains(&inner.get_peer().unwrap()));
}
}
}

View File

@@ -23,10 +23,7 @@ use snafu::{ensure, OptionExt, ResultExt};
use crate::error;
use crate::{
error::{
ConvertSchemaSnafu, DatanodeSnafu, DecodeSelectSnafu, EncodePhysicalSnafu,
MissingFieldSnafu,
},
error::{ConvertSchemaSnafu, DatanodeSnafu, DecodeSelectSnafu, EncodePhysicalSnafu},
Client, Result,
};
@@ -46,10 +43,6 @@ impl Database {
}
}
pub async fn start(&mut self, url: impl Into<String>) -> Result<()> {
self.client.start(url).await
}
pub fn name(&self) -> &str {
&self.name
}
@@ -65,6 +58,24 @@ impl Database {
self.object(expr).await?.try_into()
}
pub async fn batch_insert(&self, insert_exprs: Vec<InsertExpr>) -> Result<Vec<ObjectResult>> {
let header = ExprHeader {
version: PROTOCOL_VERSION,
};
let obj_exprs = insert_exprs
.into_iter()
.map(|expr| ObjectExpr {
header: Some(header.clone()),
expr: Some(object_expr::Expr::Insert(expr)),
})
.collect();
self.objects(obj_exprs)
.await?
.into_iter()
.map(|result| result.try_into())
.collect()
}
pub async fn select(&self, expr: Select) -> Result<ObjectResult> {
let select_expr = match expr {
Select::Sql(sql) => SelectExpr {
@@ -92,6 +103,13 @@ impl Database {
self.do_select(select_expr).await
}
pub async fn logical_plan(&self, logical_plan: Vec<u8>) -> Result<ObjectResult> {
let select_expr = SelectExpr {
expr: Some(select_expr::Expr::LogicalPlan(logical_plan)),
};
self.do_select(select_expr).await
}
async fn do_select(&self, select_expr: SelectExpr) -> Result<ObjectResult> {
let header = ExprHeader {
version: PROTOCOL_VERSION,
@@ -222,12 +240,8 @@ impl TryFrom<ObjectResult> for Output {
}
fn column_to_vector(column: &Column, rows: u32) -> Result<VectorRef> {
let wrapper = ColumnDataTypeWrapper::try_new(
column
.datatype
.context(MissingFieldSnafu { field: "datatype" })?,
)
.context(error::ColumnDataTypeSnafu)?;
let wrapper =
ColumnDataTypeWrapper::try_new(column.datatype).context(error::ColumnDataTypeSnafu)?;
let column_datatype = wrapper.datatype();
let rows = rows as usize;
@@ -330,7 +344,7 @@ mod tests {
#[test]
fn test_column_to_vector() {
let mut column = create_test_column(Arc::new(BooleanVector::from(vec![true])));
column.datatype = Some(-100);
column.datatype = -100;
let result = column_to_vector(&column, 1);
assert!(result.is_err());
assert_eq!(
@@ -408,7 +422,7 @@ mod tests {
semantic_type: 1,
values: Some(values(&[array.clone()]).unwrap()),
null_mask: null_mask(&vec![array], vector.len()),
datatype: Some(wrapper.datatype() as i32),
datatype: wrapper.datatype() as i32,
}
}
}

View File

@@ -85,6 +85,17 @@ pub enum Error {
#[snafu(backtrace)]
source: datatypes::error::Error,
},
#[snafu(display(
"Failed to create gRPC channel, peer address: {}, source: {}",
addr,
source
))]
CreateChannel {
addr: String,
#[snafu(backtrace)]
source: common_grpc::error::Error,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -107,6 +118,7 @@ impl ErrorExt for Error {
source.status_code()
}
Error::CreateRecordBatches { source } => source.status_code(),
Error::CreateChannel { source, .. } => source.status_code(),
Error::IllegalGrpcClientState { .. } => StatusCode::Unexpected,
}
}

View File

@@ -2,6 +2,9 @@ pub mod admin;
mod client;
mod database;
mod error;
pub mod load_balance;
pub use api;
pub use self::{
client::Client,

View File

@@ -0,0 +1,52 @@
use enum_dispatch::enum_dispatch;
use rand::seq::SliceRandom;
#[enum_dispatch]
pub trait LoadBalance {
fn get_peer<'a>(&self, peers: &'a [String]) -> Option<&'a String>;
}
#[enum_dispatch(LoadBalance)]
#[derive(Debug)]
pub enum Loadbalancer {
Random,
}
impl Default for Loadbalancer {
fn default() -> Self {
Loadbalancer::from(Random)
}
}
#[derive(Debug)]
pub struct Random;
impl LoadBalance for Random {
fn get_peer<'a>(&self, peers: &'a [String]) -> Option<&'a String> {
peers.choose(&mut rand::thread_rng())
}
}
#[cfg(test)]
mod tests {
use std::collections::HashSet;
use super::{LoadBalance, Random};
#[test]
fn test_random_lb() {
let peers = vec![
"127.0.0.1:3001".to_string(),
"127.0.0.1:3002".to_string(),
"127.0.0.1:3003".to_string(),
"127.0.0.1:3004".to_string(),
];
let all: HashSet<String> = peers.clone().into_iter().collect();
let random = Random;
for _ in 0..100 {
let peer = random.get_peer(&peers).unwrap();
all.contains(peer);
}
}
}

View File

@@ -2,6 +2,7 @@
name = "cmd"
version = "0.1.0"
edition = "2021"
default-run = "greptime"
[[bin]]
name = "greptime"
@@ -10,10 +11,13 @@ path = "src/bin/greptime.rs"
[dependencies]
clap = { version = "3.1", features = ["derive"] }
common-error = { path = "../common/error" }
common-telemetry = { path = "../common/telemetry", features = ["deadlock_detection"] }
common-telemetry = { path = "../common/telemetry", features = [
"deadlock_detection",
] }
datanode = { path = "../datanode" }
frontend = { path = "../frontend" }
futures = "0.3"
meta-srv = { path = "../meta-srv" }
snafu = { version = "0.7", features = ["backtraces"] }
tokio = { version = "1.18", features = ["full"] }
toml = "0.5"

View File

@@ -4,7 +4,9 @@ use clap::Parser;
use cmd::datanode;
use cmd::error::Result;
use cmd::frontend;
use common_telemetry::{self, logging::error, logging::info};
use cmd::metasrv;
use common_telemetry::logging::error;
use common_telemetry::logging::info;
#[derive(Parser)]
#[clap(name = "greptimedb")]
@@ -29,6 +31,8 @@ enum SubCommand {
Datanode(datanode::Command),
#[clap(name = "frontend")]
Frontend(frontend::Command),
#[clap(name = "metasrv")]
Metasrv(metasrv::Command),
}
impl SubCommand {
@@ -36,6 +40,7 @@ impl SubCommand {
match self {
SubCommand::Datanode(cmd) => cmd.run().await,
SubCommand::Frontend(cmd) => cmd.run().await,
SubCommand::Metasrv(cmd) => cmd.run().await,
}
}
}
@@ -45,6 +50,7 @@ impl fmt::Display for SubCommand {
match self {
SubCommand::Datanode(..) => write!(f, "greptime-datanode"),
SubCommand::Frontend(..) => write!(f, "greptime-frontend"),
SubCommand::Metasrv(..) => write!(f, "greptime-metasrv"),
}
}
}

View File

@@ -1,9 +1,9 @@
use clap::Parser;
use common_telemetry::logging;
use datanode::datanode::{Datanode, DatanodeOptions};
use datanode::datanode::{Datanode, DatanodeOptions, Mode};
use snafu::ResultExt;
use crate::error::{Error, Result, StartDatanodeSnafu};
use crate::error::{Error, MissingConfigSnafu, Result, StartDatanodeSnafu};
use crate::toml_loader;
#[derive(Parser)]
@@ -33,6 +33,8 @@ impl SubCommand {
#[derive(Debug, Parser)]
struct StartCommand {
#[clap(long)]
node_id: Option<u64>,
#[clap(long)]
http_addr: Option<String>,
#[clap(long)]
@@ -41,6 +43,8 @@ struct StartCommand {
mysql_addr: Option<String>,
#[clap(long)]
postgres_addr: Option<String>,
#[clap(long)]
metasrv_addr: Option<String>,
#[clap(short, long)]
config_file: Option<String>,
}
@@ -84,6 +88,31 @@ impl TryFrom<StartCommand> for DatanodeOptions {
opts.postgres_addr = addr;
}
match (cmd.metasrv_addr, cmd.node_id) {
(Some(meta_addr), Some(node_id)) => {
// Running mode is only set to Distributed when
// both metasrv addr and node id are set in
// commandline options
opts.meta_client_opts.metasrv_addr = meta_addr;
opts.node_id = node_id;
opts.mode = Mode::Distributed;
}
(None, None) => {
opts.mode = Mode::Standalone;
}
(None, Some(_)) => {
return MissingConfigSnafu {
msg: "Missing metasrv address option",
}
.fail();
}
(Some(_), None) => {
return MissingConfigSnafu {
msg: "Missing node id option",
}
.fail();
}
}
Ok(opts)
}
}
@@ -97,10 +126,12 @@ mod tests {
#[test]
fn test_read_from_config_file() {
let cmd = StartCommand {
node_id: None,
http_addr: None,
rpc_addr: None,
mysql_addr: None,
postgres_addr: None,
metasrv_addr: None,
config_file: Some(format!(
"{}/../../config/datanode.example.toml",
std::env::current_dir().unwrap().as_path().to_str().unwrap()
@@ -112,6 +143,13 @@ mod tests {
assert_eq!("/tmp/greptimedb/wal".to_string(), options.wal_dir);
assert_eq!("0.0.0.0:3306".to_string(), options.mysql_addr);
assert_eq!(4, options.mysql_runtime_size);
assert_eq!(
"1.1.1.1:3002".to_string(),
options.meta_client_opts.metasrv_addr
);
assert_eq!(5000, options.meta_client_opts.connect_timeout_millis);
assert_eq!(3000, options.meta_client_opts.timeout_millis);
assert!(options.meta_client_opts.tcp_nodelay);
assert_eq!("0.0.0.0:5432".to_string(), options.postgres_addr);
assert_eq!(4, options.postgres_runtime_size);
@@ -122,4 +160,58 @@ mod tests {
}
};
}
#[test]
fn test_try_from_cmd() {
assert_eq!(
Mode::Standalone,
DatanodeOptions::try_from(StartCommand {
node_id: None,
http_addr: None,
rpc_addr: None,
mysql_addr: None,
postgres_addr: None,
metasrv_addr: None,
config_file: None
})
.unwrap()
.mode
);
assert_eq!(
Mode::Distributed,
DatanodeOptions::try_from(StartCommand {
node_id: Some(42),
http_addr: None,
rpc_addr: None,
mysql_addr: None,
postgres_addr: None,
metasrv_addr: Some("127.0.0.1:3002".to_string()),
config_file: None
})
.unwrap()
.mode
);
assert!(DatanodeOptions::try_from(StartCommand {
node_id: None,
http_addr: None,
rpc_addr: None,
mysql_addr: None,
postgres_addr: None,
metasrv_addr: Some("127.0.0.1:3002".to_string()),
config_file: None,
})
.is_err());
assert!(DatanodeOptions::try_from(StartCommand {
node_id: Some(42),
http_addr: None,
rpc_addr: None,
mysql_addr: None,
postgres_addr: None,
metasrv_addr: None,
config_file: None,
})
.is_err());
}
}

View File

@@ -17,14 +17,27 @@ pub enum Error {
source: frontend::error::Error,
},
#[snafu(display("Failed to start meta server, source: {}", source))]
StartMetaServer {
#[snafu(backtrace)]
source: meta_srv::error::Error,
},
#[snafu(display("Failed to read config file: {}, source: {}", path, source))]
ReadConfig {
source: std::io::Error,
path: String,
source: std::io::Error,
backtrace: Backtrace,
},
#[snafu(display("Failed to parse config, source: {}", source))]
ParseConfig { source: toml::de::Error },
ParseConfig {
source: toml::de::Error,
backtrace: Backtrace,
},
#[snafu(display("Missing config, msg: {}", msg))]
MissingConfig { msg: String, backtrace: Backtrace },
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -34,7 +47,10 @@ impl ErrorExt for Error {
match self {
Error::StartDatanode { source } => source.status_code(),
Error::StartFrontend { source } => source.status_code(),
Error::ReadConfig { .. } | Error::ParseConfig { .. } => StatusCode::InvalidArguments,
Error::StartMetaServer { source } => source.status_code(),
Error::ReadConfig { .. } | Error::ParseConfig { .. } | Error::MissingConfig { .. } => {
StatusCode::InvalidArguments
}
}
}
@@ -51,18 +67,68 @@ impl ErrorExt for Error {
mod tests {
use super::*;
fn raise_read_config_error() -> std::result::Result<(), std::io::Error> {
Err(std::io::ErrorKind::NotFound.into())
type StdResult<E> = std::result::Result<(), E>;
#[test]
fn test_start_node_error() {
fn throw_datanode_error() -> StdResult<datanode::error::Error> {
datanode::error::MissingFieldSnafu {
field: "test_field",
}
.fail()
}
let e = throw_datanode_error()
.context(StartDatanodeSnafu)
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
}
#[test]
fn test_error() {
let e = raise_read_config_error()
fn test_start_frontend_error() {
fn throw_frontend_error() -> StdResult<frontend::error::Error> {
frontend::error::InvalidSqlSnafu { err_msg: "failed" }.fail()
}
let e = throw_frontend_error()
.context(StartFrontendSnafu)
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
}
#[test]
fn test_start_metasrv_error() {
fn throw_metasrv_error() -> StdResult<meta_srv::error::Error> {
meta_srv::error::StreamNoneSnafu {}.fail()
}
let e = throw_metasrv_error()
.context(StartMetaServerSnafu)
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::Internal);
}
#[test]
fn test_read_config_error() {
fn throw_read_config_error() -> StdResult<std::io::Error> {
Err(std::io::ErrorKind::NotFound.into())
}
let e = throw_read_config_error()
.context(ReadConfigSnafu { path: "test" })
.err()
.unwrap();
assert!(e.backtrace_opt().is_none());
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
}
}

View File

@@ -1,6 +1,8 @@
use clap::Parser;
use frontend::frontend::{Frontend, FrontendOptions};
use frontend::grpc::GrpcOptions;
use frontend::influxdb::InfluxdbOptions;
use frontend::instance::Instance;
use frontend::mysql::MysqlOptions;
use frontend::opentsdb::OpentsdbOptions;
use frontend::postgres::PostgresOptions;
@@ -35,7 +37,7 @@ impl SubCommand {
}
#[derive(Debug, Parser)]
struct StartCommand {
pub struct StartCommand {
#[clap(long)]
http_addr: Option<String>,
#[clap(long)]
@@ -55,7 +57,7 @@ struct StartCommand {
impl StartCommand {
async fn run(self) -> Result<()> {
let opts = self.try_into()?;
let mut frontend = Frontend::new(opts);
let mut frontend = Frontend::new(opts, Instance::new());
frontend.start().await.context(error::StartFrontendSnafu)
}
}
@@ -74,7 +76,10 @@ impl TryFrom<StartCommand> for FrontendOptions {
opts.http_addr = Some(addr);
}
if let Some(addr) = cmd.grpc_addr {
opts.grpc_addr = Some(addr);
opts.grpc_options = Some(GrpcOptions {
addr,
..Default::default()
});
}
if let Some(addr) = cmd.mysql_addr {
opts.mysql_options = Some(MysqlOptions {
@@ -130,7 +135,10 @@ mod tests {
);
let default_opts = FrontendOptions::default();
assert_eq!(opts.grpc_addr, default_opts.grpc_addr);
assert_eq!(
opts.grpc_options.unwrap().addr,
default_opts.grpc_options.unwrap().addr
);
assert_eq!(
opts.mysql_options.as_ref().unwrap().runtime_size,
default_opts.mysql_options.as_ref().unwrap().runtime_size

View File

@@ -1,4 +1,5 @@
pub mod datanode;
pub mod error;
pub mod frontend;
pub mod metasrv;
mod toml_loader;

122
src/cmd/src/metasrv.rs Normal file
View File

@@ -0,0 +1,122 @@
use clap::Parser;
use common_telemetry::logging;
use meta_srv::bootstrap;
use meta_srv::metasrv::MetaSrvOptions;
use snafu::ResultExt;
use crate::error;
use crate::error::Error;
use crate::error::Result;
use crate::toml_loader;
#[derive(Parser)]
pub struct Command {
#[clap(subcommand)]
subcmd: SubCommand,
}
impl Command {
pub async fn run(self) -> Result<()> {
self.subcmd.run().await
}
}
#[derive(Parser)]
enum SubCommand {
Start(StartCommand),
}
impl SubCommand {
async fn run(self) -> Result<()> {
match self {
SubCommand::Start(cmd) => cmd.run().await,
}
}
}
#[derive(Debug, Parser)]
struct StartCommand {
#[clap(long)]
bind_addr: Option<String>,
#[clap(long)]
server_addr: Option<String>,
#[clap(long)]
store_addr: Option<String>,
#[clap(short, long)]
config_file: Option<String>,
}
impl StartCommand {
async fn run(self) -> Result<()> {
logging::info!("MetaSrv start command: {:#?}", self);
let opts: MetaSrvOptions = self.try_into()?;
logging::info!("MetaSrv options: {:#?}", opts);
bootstrap::bootstrap_meta_srv(opts)
.await
.context(error::StartMetaServerSnafu)
}
}
impl TryFrom<StartCommand> for MetaSrvOptions {
type Error = Error;
fn try_from(cmd: StartCommand) -> Result<Self> {
let mut opts: MetaSrvOptions = if let Some(path) = cmd.config_file {
toml_loader::from_file!(&path)?
} else {
MetaSrvOptions::default()
};
if let Some(addr) = cmd.bind_addr {
opts.bind_addr = addr;
}
if let Some(addr) = cmd.server_addr {
opts.server_addr = addr;
}
if let Some(addr) = cmd.store_addr {
opts.store_addr = addr;
}
Ok(opts)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_read_from_cmd() {
let cmd = StartCommand {
bind_addr: Some("127.0.0.1:3002".to_string()),
server_addr: Some("0.0.0.0:3002".to_string()),
store_addr: Some("127.0.0.1:2380".to_string()),
config_file: None,
};
let options: MetaSrvOptions = cmd.try_into().unwrap();
assert_eq!("127.0.0.1:3002".to_string(), options.bind_addr);
assert_eq!("0.0.0.0:3002".to_string(), options.server_addr);
assert_eq!("127.0.0.1:2380".to_string(), options.store_addr);
}
#[test]
fn test_read_from_config_file() {
let cmd = StartCommand {
bind_addr: None,
server_addr: None,
store_addr: None,
config_file: Some(format!(
"{}/../../config/metasrv.example.toml",
std::env::current_dir().unwrap().as_path().to_str().unwrap()
)),
};
let options: MetaSrvOptions = cmd.try_into().unwrap();
assert_eq!("127.0.0.1:3002".to_string(), options.bind_addr);
assert_eq!("0.0.0.0:3002".to_string(), options.server_addr);
assert_eq!("127.0.0.1:2380".to_string(), options.store_addr);
assert_eq!(30, options.datanode_lease_secs);
}
}

View File

@@ -0,0 +1,22 @@
[package]
name = "common-catalog"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
async-trait = "0.1"
common-error = { path = "../error" }
common-telemetry = { path = "../telemetry" }
datatypes = { path = "../../datatypes" }
lazy_static = "1.4"
regex = "1.6"
serde = "1.0"
serde_json = "1.0"
snafu = { version = "0.7", features = ["backtraces"] }
table = { path = "../../table" }
[dev-dependencies]
chrono = "0.4"
tempdir = "0.3"
tokio = { version = "1.0", features = ["full"] }

View File

@@ -11,3 +11,9 @@ pub const MIN_USER_TABLE_ID: u32 = 1024;
pub const SYSTEM_CATALOG_TABLE_ID: u32 = 0;
/// scripts table id
pub const SCRIPTS_TABLE_ID: u32 = 1;
pub(crate) const CATALOG_KEY_PREFIX: &str = "__c";
pub(crate) const SCHEMA_KEY_PREFIX: &str = "__s";
pub(crate) const TABLE_GLOBAL_KEY_PREFIX: &str = "__tg";
pub(crate) const TABLE_REGIONAL_KEY_PREFIX: &str = "__tr";
pub const TABLE_ID_KEY_PREFIX: &str = "__tid";

View File

@@ -0,0 +1,49 @@
use std::any::Any;
use common_error::ext::ErrorExt;
use common_error::prelude::{Snafu, StatusCode};
use snafu::{Backtrace, ErrorCompat};
#[derive(Debug, Snafu)]
#[snafu(visibility(pub))]
pub enum Error {
#[snafu(display("Invalid catalog info: {}", key))]
InvalidCatalog { key: String, backtrace: Backtrace },
#[snafu(display("Failed to deserialize catalog entry value: {}", raw))]
DeserializeCatalogEntryValue {
raw: String,
backtrace: Backtrace,
source: serde_json::error::Error,
},
#[snafu(display("Failed to serialize catalog entry value"))]
SerializeCatalogEntryValue {
backtrace: Backtrace,
source: serde_json::error::Error,
},
#[snafu(display("Failed to parse node id: {}", key))]
ParseNodeId { key: String, backtrace: Backtrace },
}
impl ErrorExt for Error {
fn status_code(&self) -> StatusCode {
match self {
Error::InvalidCatalog { .. }
| Error::DeserializeCatalogEntryValue { .. }
| Error::SerializeCatalogEntryValue { .. } => StatusCode::Unexpected,
Error::ParseNodeId { .. } => StatusCode::InvalidArguments,
}
}
fn backtrace_opt(&self) -> Option<&Backtrace> {
ErrorCompat::backtrace(self)
}
fn as_any(&self) -> &dyn Any {
self
}
}
pub type Result<T> = std::result::Result<T, Error>;

View File

@@ -0,0 +1,340 @@
use std::collections::HashMap;
use std::fmt::{Display, Formatter};
use lazy_static::lazy_static;
use regex::Regex;
use serde::{Deserialize, Serialize, Serializer};
use snafu::{ensure, OptionExt, ResultExt};
use table::metadata::{RawTableMeta, TableId, TableVersion};
use crate::consts::{
CATALOG_KEY_PREFIX, SCHEMA_KEY_PREFIX, TABLE_GLOBAL_KEY_PREFIX, TABLE_REGIONAL_KEY_PREFIX,
};
use crate::error::{
DeserializeCatalogEntryValueSnafu, Error, InvalidCatalogSnafu, SerializeCatalogEntryValueSnafu,
};
lazy_static! {
static ref CATALOG_KEY_PATTERN: Regex =
Regex::new(&format!("^{}-([a-zA-Z_]+)$", CATALOG_KEY_PREFIX)).unwrap();
}
lazy_static! {
static ref SCHEMA_KEY_PATTERN: Regex = Regex::new(&format!(
"^{}-([a-zA-Z_]+)-([a-zA-Z_]+)$",
SCHEMA_KEY_PREFIX
))
.unwrap();
}
lazy_static! {
static ref TABLE_GLOBAL_KEY_PATTERN: Regex = Regex::new(&format!(
"^{}-([a-zA-Z_]+)-([a-zA-Z_]+)-([a-zA-Z_]+)$",
TABLE_GLOBAL_KEY_PREFIX
))
.unwrap();
}
lazy_static! {
static ref TABLE_REGIONAL_KEY_PATTERN: Regex = Regex::new(&format!(
"^{}-([a-zA-Z_]+)-([a-zA-Z_]+)-([a-zA-Z_]+)-([0-9]+)$",
TABLE_REGIONAL_KEY_PREFIX
))
.unwrap();
}
pub fn build_catalog_prefix() -> String {
format!("{}-", CATALOG_KEY_PREFIX)
}
pub fn build_schema_prefix(catalog_name: impl AsRef<str>) -> String {
format!("{}-{}-", SCHEMA_KEY_PREFIX, catalog_name.as_ref())
}
pub fn build_table_global_prefix(
catalog_name: impl AsRef<str>,
schema_name: impl AsRef<str>,
) -> String {
format!(
"{}-{}-{}-",
TABLE_GLOBAL_KEY_PREFIX,
catalog_name.as_ref(),
schema_name.as_ref()
)
}
pub fn build_table_regional_prefix(
catalog_name: impl AsRef<str>,
schema_name: impl AsRef<str>,
) -> String {
format!(
"{}-{}-{}-",
TABLE_REGIONAL_KEY_PREFIX,
catalog_name.as_ref(),
schema_name.as_ref()
)
}
/// Table global info has only one key across all datanodes so it does not have `node_id` field.
pub struct TableGlobalKey {
pub catalog_name: String,
pub schema_name: String,
pub table_name: String,
}
impl Display for TableGlobalKey {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_str(TABLE_GLOBAL_KEY_PREFIX)?;
f.write_str("-")?;
f.write_str(&self.catalog_name)?;
f.write_str("-")?;
f.write_str(&self.schema_name)?;
f.write_str("-")?;
f.write_str(&self.table_name)
}
}
impl TableGlobalKey {
pub fn parse<S: AsRef<str>>(s: S) -> Result<Self, Error> {
let key = s.as_ref();
let captures = TABLE_GLOBAL_KEY_PATTERN
.captures(key)
.context(InvalidCatalogSnafu { key })?;
ensure!(captures.len() == 4, InvalidCatalogSnafu { key });
Ok(Self {
catalog_name: captures[1].to_string(),
schema_name: captures[2].to_string(),
table_name: captures[3].to_string(),
})
}
}
/// Table global info contains necessary info for a datanode to create table regions, including
/// table id, table meta(schema...), region id allocation across datanodes.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct TableGlobalValue {
/// Table id is the same across all datanodes.
pub id: TableId,
/// Id of datanode that created the global table info kv. only for debugging.
pub node_id: u64,
/// Allocation of region ids across all datanodes.
pub regions_id_map: HashMap<u64, Vec<u32>>,
/// Node id -> region ids
pub meta: RawTableMeta,
/// Partition rules for table
pub partition_rules: String,
}
/// Table regional info that varies between datanode, so it contains a `node_id` field.
pub struct TableRegionalKey {
pub catalog_name: String,
pub schema_name: String,
pub table_name: String,
pub node_id: u64,
}
impl Display for TableRegionalKey {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_str(TABLE_REGIONAL_KEY_PREFIX)?;
f.write_str("-")?;
f.write_str(&self.catalog_name)?;
f.write_str("-")?;
f.write_str(&self.schema_name)?;
f.write_str("-")?;
f.write_str(&self.table_name)?;
f.write_str("-")?;
f.serialize_u64(self.node_id)
}
}
impl TableRegionalKey {
pub fn parse<S: AsRef<str>>(s: S) -> Result<Self, Error> {
let key = s.as_ref();
let captures = TABLE_REGIONAL_KEY_PATTERN
.captures(key)
.context(InvalidCatalogSnafu { key })?;
ensure!(captures.len() == 5, InvalidCatalogSnafu { key });
let node_id = captures[4]
.to_string()
.parse()
.map_err(|_| InvalidCatalogSnafu { key }.build())?;
Ok(Self {
catalog_name: captures[1].to_string(),
schema_name: captures[2].to_string(),
table_name: captures[3].to_string(),
node_id,
})
}
}
/// Regional table info of specific datanode, including table version on that datanode and
/// region ids allocated by metasrv.
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct TableRegionalValue {
pub version: TableVersion,
pub regions_ids: Vec<u32>,
}
pub struct CatalogKey {
pub catalog_name: String,
}
impl Display for CatalogKey {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_str(CATALOG_KEY_PREFIX)?;
f.write_str("-")?;
f.write_str(&self.catalog_name)
}
}
impl CatalogKey {
pub fn parse(s: impl AsRef<str>) -> Result<Self, Error> {
let key = s.as_ref();
let captures = CATALOG_KEY_PATTERN
.captures(key)
.context(InvalidCatalogSnafu { key })?;
ensure!(captures.len() == 2, InvalidCatalogSnafu { key });
Ok(Self {
catalog_name: captures[1].to_string(),
})
}
}
#[derive(Debug, Serialize, Deserialize)]
pub struct CatalogValue;
pub struct SchemaKey {
pub catalog_name: String,
pub schema_name: String,
}
impl Display for SchemaKey {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_str(SCHEMA_KEY_PREFIX)?;
f.write_str("-")?;
f.write_str(&self.catalog_name)?;
f.write_str("-")?;
f.write_str(&self.schema_name)
}
}
impl SchemaKey {
pub fn parse(s: impl AsRef<str>) -> Result<Self, Error> {
let key = s.as_ref();
let captures = SCHEMA_KEY_PATTERN
.captures(key)
.context(InvalidCatalogSnafu { key })?;
ensure!(captures.len() == 3, InvalidCatalogSnafu { key });
Ok(Self {
catalog_name: captures[1].to_string(),
schema_name: captures[2].to_string(),
})
}
}
#[derive(Debug, Serialize, Deserialize)]
pub struct SchemaValue;
macro_rules! define_catalog_value {
( $($val_ty: ty), *) => {
$(
impl $val_ty {
pub fn parse(s: impl AsRef<str>) -> Result<Self, Error> {
serde_json::from_str(s.as_ref())
.context(DeserializeCatalogEntryValueSnafu { raw: s.as_ref() })
}
pub fn as_bytes(&self) -> Result<Vec<u8>, Error> {
Ok(serde_json::to_string(self)
.context(SerializeCatalogEntryValueSnafu)?
.into_bytes())
}
}
)*
}
}
define_catalog_value!(
TableRegionalValue,
TableGlobalValue,
CatalogValue,
SchemaValue
);
#[cfg(test)]
mod tests {
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::{ColumnSchema, RawSchema, Schema};
use super::*;
#[test]
fn test_parse_catalog_key() {
let key = "__c-C";
let catalog_key = CatalogKey::parse(key).unwrap();
assert_eq!("C", catalog_key.catalog_name);
assert_eq!(key, catalog_key.to_string());
}
#[test]
fn test_parse_schema_key() {
let key = "__s-C-S";
let schema_key = SchemaKey::parse(key).unwrap();
assert_eq!("C", schema_key.catalog_name);
assert_eq!("S", schema_key.schema_name);
assert_eq!(key, schema_key.to_string());
}
#[test]
fn test_parse_table_key() {
let key = "__tg-C-S-T";
let entry = TableGlobalKey::parse(key).unwrap();
assert_eq!("C", entry.catalog_name);
assert_eq!("S", entry.schema_name);
assert_eq!("T", entry.table_name);
assert_eq!(key, &entry.to_string());
}
#[test]
fn test_build_prefix() {
assert_eq!("__c-", build_catalog_prefix());
assert_eq!("__s-CATALOG-", build_schema_prefix("CATALOG"));
assert_eq!(
"__tg-CATALOG-SCHEMA-",
build_table_global_prefix("CATALOG", "SCHEMA")
);
}
#[test]
fn test_serialize_schema() {
let schema = Schema::new(vec![ColumnSchema::new(
"name",
ConcreteDataType::string_datatype(),
true,
)]);
let meta = RawTableMeta {
schema: RawSchema::from(&schema),
engine: "mito".to_string(),
created_on: chrono::DateTime::default(),
primary_key_indices: vec![0, 1],
next_column_id: 3,
engine_options: Default::default(),
value_indices: vec![2, 3],
options: Default::default(),
region_numbers: vec![1],
};
let value = TableGlobalValue {
id: 42,
node_id: 0,
regions_id_map: HashMap::from([(0, vec![1, 2, 3])]),
meta,
partition_rules: "{}".to_string(),
};
let serialized = serde_json::to_string(&value).unwrap();
let deserialized = TableGlobalValue::parse(&serialized).unwrap();
assert_eq!(value, deserialized);
}
}

View File

@@ -0,0 +1,9 @@
pub mod consts;
pub mod error;
mod helper;
pub use helper::{
build_catalog_prefix, build_schema_prefix, build_table_global_prefix,
build_table_regional_prefix, CatalogKey, CatalogValue, SchemaKey, SchemaValue, TableGlobalKey,
TableGlobalValue, TableRegionalKey, TableRegionalValue,
};

View File

@@ -7,12 +7,12 @@ edition = "2021"
proc-macro = true
[dependencies]
common-query = { path = "../query" }
datatypes = { path = "../../datatypes" }
quote = "1.0"
snafu = { version = "0.7", features = ["backtraces"] }
syn = "1.0"
[dev-dependencies]
arc-swap = "1.0"
common-query = { path = "../query" }
datatypes = { path = "../../datatypes" }
snafu = { version = "0.7", features = ["backtraces"] }
static_assertions = "1.1.0"

View File

@@ -8,6 +8,7 @@ arc-swap = "1.0"
chrono-tz = "0.6"
common-error = { path = "../error" }
common-function-macro = { path = "../function-macro" }
common-time = { path = "../time" }
common-query = { path = "../query" }
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
datatypes = { path = "../../datatypes" }

View File

@@ -6,6 +6,7 @@ pub mod math;
pub mod numpy;
#[cfg(test)]
pub(crate) mod test;
mod timestamp;
pub mod udf;
pub use aggregate::MedianAccumulatorCreator;

View File

@@ -9,6 +9,7 @@ use crate::scalars::aggregate::{AggregateFunctionMetaRef, AggregateFunctions};
use crate::scalars::function::FunctionRef;
use crate::scalars::math::MathFunction;
use crate::scalars::numpy::NumpyFunction;
use crate::scalars::timestamp::TimestampFunction;
#[derive(Default)]
pub struct FunctionRegistry {
@@ -31,6 +32,10 @@ impl FunctionRegistry {
.insert(func.name(), func);
}
pub fn get_aggr_function(&self, name: &str) -> Option<AggregateFunctionMetaRef> {
self.aggregate_functions.read().unwrap().get(name).cloned()
}
pub fn get_function(&self, name: &str) -> Option<FunctionRef> {
self.functions.read().unwrap().get(name).cloned()
}
@@ -54,6 +59,7 @@ pub static FUNCTION_REGISTRY: Lazy<Arc<FunctionRegistry>> = Lazy::new(|| {
MathFunction::register(&function_registry);
NumpyFunction::register(&function_registry);
TimestampFunction::register(&function_registry);
AggregateFunctions::register(&function_registry);

View File

@@ -0,0 +1,116 @@
//! from_unixtime function.
/// TODO(dennis) It can be removed after we upgrade datafusion.
use std::fmt;
use std::sync::Arc;
use arrow::compute::arithmetics;
use arrow::datatypes::DataType as ArrowDatatype;
use arrow::scalar::PrimitiveScalar;
use common_query::error::{IntoVectorSnafu, UnsupportedInputDataTypeSnafu};
use common_query::prelude::{Signature, Volatility};
use datatypes::prelude::ConcreteDataType;
use datatypes::vectors::TimestampVector;
use datatypes::vectors::VectorRef;
use snafu::ResultExt;
use crate::error::Result;
use crate::scalars::function::{Function, FunctionContext};
#[derive(Clone, Debug, Default)]
pub struct FromUnixtimeFunction;
const NAME: &str = "from_unixtime";
impl Function for FromUnixtimeFunction {
fn name(&self) -> &str {
NAME
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::timestamp_millis_datatype())
}
fn signature(&self) -> Signature {
Signature::uniform(
1,
vec![ConcreteDataType::int64_datatype()],
Volatility::Immutable,
)
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
match columns[0].data_type() {
ConcreteDataType::Int64(_) => {
let array = columns[0].to_arrow_array();
// Our timestamp vector's time unit is millisecond
let array = arithmetics::mul_scalar(
&*array,
&PrimitiveScalar::new(ArrowDatatype::Int64, Some(1000i64)),
);
Ok(Arc::new(
TimestampVector::try_from_arrow_array(array).context(IntoVectorSnafu {
data_type: ArrowDatatype::Int64,
})?,
))
}
_ => UnsupportedInputDataTypeSnafu {
function: NAME,
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
}
.fail()
.map_err(|e| e.into()),
}
}
}
impl fmt::Display for FromUnixtimeFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "FROM_UNIXTIME")
}
}
#[cfg(test)]
mod tests {
use common_query::prelude::TypeSignature;
use datatypes::value::Value;
use datatypes::vectors::Int64Vector;
use super::*;
#[test]
fn test_from_unixtime() {
let f = FromUnixtimeFunction::default();
assert_eq!("from_unixtime", f.name());
assert_eq!(
ConcreteDataType::timestamp_millis_datatype(),
f.return_type(&[]).unwrap()
);
assert!(matches!(f.signature(),
Signature {
type_signature: TypeSignature::Uniform(1, valid_types),
volatility: Volatility::Immutable
} if valid_types == vec![ConcreteDataType::int64_datatype()]
));
let times = vec![Some(1494410783), None, Some(1494410983)];
let args: Vec<VectorRef> = vec![Arc::new(Int64Vector::from(times.clone()))];
let vector = f.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(3, vector.len());
for (i, t) in times.iter().enumerate() {
let v = vector.get(i);
if i == 1 {
assert_eq!(Value::Null, v);
continue;
}
match v {
Value::Timestamp(ts) => {
assert_eq!(ts.value(), t.unwrap() * 1000);
}
_ => unreachable!(),
}
}
}
}

View File

@@ -0,0 +1,14 @@
use std::sync::Arc;
mod from_unixtime;
use from_unixtime::FromUnixtimeFunction;
use crate::scalars::function_registry::FunctionRegistry;
pub(crate) struct TimestampFunction;
impl TimestampFunction {
pub fn register(registry: &FunctionRegistry) {
registry.register(Arc::new(FromUnixtimeFunction::default()));
}
}

View File

@@ -8,10 +8,23 @@ api = { path = "../../api" }
async-trait = "0.1"
common-base = { path = "../base" }
common-error = { path = "../error" }
common-runtime = { path = "../runtime" }
dashmap = "5.4"
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = ["simd"] }
snafu = { version = "0.7", features = ["backtraces"] }
tokio = { version = "1.0", features = ["full"] }
tonic = "0.8"
tower = "0.4"
[dependencies.arrow]
package = "arrow2"
version = "0.10"
features = ["io_csv", "io_json", "io_parquet", "io_parquet_compression", "io_ipc", "ahash", "compute", "serde_types"]
[dev-dependencies]
criterion = "0.4"
rand = "0.8"
[[bench]]
name = "bench_main"
harness = false

View File

@@ -0,0 +1,7 @@
use criterion::criterion_main;
mod channel_manager;
criterion_main! {
channel_manager::benches
}

View File

@@ -0,0 +1,34 @@
use common_grpc::channel_manager::ChannelManager;
use criterion::{criterion_group, criterion_main, Criterion};
#[tokio::main]
async fn do_bench_channel_manager() {
let m = ChannelManager::new();
let task_count = 8;
let mut joins = Vec::with_capacity(task_count);
for _ in 0..task_count {
let m_clone = m.clone();
let join = tokio::spawn(async move {
for _ in 0..10000 {
let idx = rand::random::<usize>() % 100;
let ret = m_clone.get(format!("{}", idx));
assert!(ret.is_ok());
}
});
joins.push(join);
}
for join in joins {
let _ = join.await;
}
}
fn bench_channel_manager(c: &mut Criterion) {
c.bench_function("bench channel manager", |b| {
b.iter(do_bench_channel_manager);
});
}
criterion_group!(benches, bench_channel_manager);
criterion_main!(benches);

View File

@@ -0,0 +1,534 @@
use std::sync::atomic::AtomicUsize;
use std::sync::atomic::Ordering;
use std::sync::Arc;
use std::time::Duration;
use dashmap::mapref::entry::Entry;
use dashmap::DashMap;
use snafu::ResultExt;
use tonic::transport::Channel as InnerChannel;
use tonic::transport::Endpoint;
use tonic::transport::Uri;
use tower::make::MakeConnection;
use crate::error;
use crate::error::Result;
const RECYCLE_CHANNEL_INTERVAL_SECS: u64 = 60;
#[derive(Clone, Debug)]
pub struct ChannelManager {
config: ChannelConfig,
pool: Arc<Pool>,
}
impl Default for ChannelManager {
fn default() -> Self {
ChannelManager::with_config(ChannelConfig::default())
}
}
impl ChannelManager {
pub fn new() -> Self {
Default::default()
}
pub fn with_config(config: ChannelConfig) -> Self {
let pool = Arc::new(Pool::default());
let cloned_pool = pool.clone();
common_runtime::spawn_bg(async {
recycle_channel_in_loop(cloned_pool, RECYCLE_CHANNEL_INTERVAL_SECS).await;
});
Self { config, pool }
}
pub fn config(&self) -> &ChannelConfig {
&self.config
}
pub fn get(&self, addr: impl AsRef<str>) -> Result<InnerChannel> {
let addr = addr.as_ref();
// It will acquire the read lock.
if let Some(inner_ch) = self.pool.get(addr) {
return Ok(inner_ch);
}
// It will acquire the write lock.
let entry = match self.pool.entry(addr.to_string()) {
Entry::Occupied(entry) => {
entry.get().increase_access();
entry.into_ref()
}
Entry::Vacant(entry) => {
let endpoint = self.build_endpoint(addr)?;
let inner_channel = endpoint.connect_lazy();
let channel = Channel {
channel: inner_channel,
access: AtomicUsize::new(1),
use_default_connector: true,
};
entry.insert(channel)
}
};
Ok(entry.channel.clone())
}
pub fn reset_with_connector<C>(
&self,
addr: impl AsRef<str>,
connector: C,
) -> Result<InnerChannel>
where
C: MakeConnection<Uri> + Send + 'static,
C::Connection: Unpin + Send + 'static,
C::Future: Send + 'static,
Box<dyn std::error::Error + Send + Sync>: From<C::Error> + Send + 'static,
{
let addr = addr.as_ref();
let endpoint = self.build_endpoint(addr)?;
let inner_channel = endpoint.connect_with_connector_lazy(connector);
let channel = Channel {
channel: inner_channel.clone(),
access: AtomicUsize::new(1),
use_default_connector: false,
};
self.pool.put(addr, channel);
Ok(inner_channel)
}
pub fn retain_channel<F>(&self, f: F)
where
F: FnMut(&String, &mut Channel) -> bool,
{
self.pool.retain_channel(f);
}
fn build_endpoint(&self, addr: &str) -> Result<Endpoint> {
let mut endpoint =
Endpoint::new(format!("http://{}", addr)).context(error::CreateChannelSnafu)?;
if let Some(dur) = self.config.timeout {
endpoint = endpoint.timeout(dur);
}
if let Some(dur) = self.config.connect_timeout {
endpoint = endpoint.connect_timeout(dur);
}
if let Some(limit) = self.config.concurrency_limit {
endpoint = endpoint.concurrency_limit(limit);
}
if let Some((limit, dur)) = self.config.rate_limit {
endpoint = endpoint.rate_limit(limit, dur);
}
if let Some(size) = self.config.initial_stream_window_size {
endpoint = endpoint.initial_stream_window_size(size);
}
if let Some(size) = self.config.initial_connection_window_size {
endpoint = endpoint.initial_connection_window_size(size);
}
if let Some(dur) = self.config.http2_keep_alive_interval {
endpoint = endpoint.http2_keep_alive_interval(dur);
}
if let Some(dur) = self.config.http2_keep_alive_timeout {
endpoint = endpoint.keep_alive_timeout(dur);
}
if let Some(enabled) = self.config.http2_keep_alive_while_idle {
endpoint = endpoint.keep_alive_while_idle(enabled);
}
if let Some(enabled) = self.config.http2_adaptive_window {
endpoint = endpoint.http2_adaptive_window(enabled);
}
endpoint = endpoint
.tcp_keepalive(self.config.tcp_keepalive)
.tcp_nodelay(self.config.tcp_nodelay);
Ok(endpoint)
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ChannelConfig {
pub timeout: Option<Duration>,
pub connect_timeout: Option<Duration>,
pub concurrency_limit: Option<usize>,
pub rate_limit: Option<(u64, Duration)>,
pub initial_stream_window_size: Option<u32>,
pub initial_connection_window_size: Option<u32>,
pub http2_keep_alive_interval: Option<Duration>,
pub http2_keep_alive_timeout: Option<Duration>,
pub http2_keep_alive_while_idle: Option<bool>,
pub http2_adaptive_window: Option<bool>,
pub tcp_keepalive: Option<Duration>,
pub tcp_nodelay: bool,
}
impl Default for ChannelConfig {
fn default() -> Self {
Self {
timeout: None,
connect_timeout: None,
concurrency_limit: None,
rate_limit: None,
initial_stream_window_size: None,
initial_connection_window_size: None,
http2_keep_alive_interval: None,
http2_keep_alive_timeout: None,
http2_keep_alive_while_idle: None,
http2_adaptive_window: None,
tcp_keepalive: None,
tcp_nodelay: true,
}
}
}
impl ChannelConfig {
pub fn new() -> Self {
Default::default()
}
/// A timeout to each request.
pub fn timeout(self, timeout: Duration) -> Self {
Self {
timeout: Some(timeout),
..self
}
}
/// A timeout to connecting to the uri.
///
/// Defaults to no timeout.
pub fn connect_timeout(self, timeout: Duration) -> Self {
Self {
connect_timeout: Some(timeout),
..self
}
}
/// A concurrency limit to each request.
pub fn concurrency_limit(self, limit: usize) -> Self {
Self {
concurrency_limit: Some(limit),
..self
}
}
/// A rate limit to each request.
pub fn rate_limit(self, limit: u64, duration: Duration) -> Self {
Self {
rate_limit: Some((limit, duration)),
..self
}
}
/// Sets the SETTINGS_INITIAL_WINDOW_SIZE option for HTTP2 stream-level flow control.
/// Default is 65,535
pub fn initial_stream_window_size(self, size: u32) -> Self {
Self {
initial_stream_window_size: Some(size),
..self
}
}
/// Sets the max connection-level flow control for HTTP2
///
/// Default is 65,535
pub fn initial_connection_window_size(self, size: u32) -> Self {
Self {
initial_connection_window_size: Some(size),
..self
}
}
/// Set http2 KEEP_ALIVE_INTERVAL. Uses hypers default otherwise.
pub fn http2_keep_alive_interval(self, duration: Duration) -> Self {
Self {
http2_keep_alive_interval: Some(duration),
..self
}
}
/// Set http2 KEEP_ALIVE_TIMEOUT. Uses hypers default otherwise.
pub fn http2_keep_alive_timeout(self, duration: Duration) -> Self {
Self {
http2_keep_alive_timeout: Some(duration),
..self
}
}
/// Set http2 KEEP_ALIVE_WHILE_IDLE. Uses hypers default otherwise.
pub fn http2_keep_alive_while_idle(self, enabled: bool) -> Self {
Self {
http2_keep_alive_while_idle: Some(enabled),
..self
}
}
/// Sets whether to use an adaptive flow control. Uses hypers default otherwise.
pub fn http2_adaptive_window(self, enabled: bool) -> Self {
Self {
http2_adaptive_window: Some(enabled),
..self
}
}
/// Set whether TCP keepalive messages are enabled on accepted connections.
///
/// If None is specified, keepalive is disabled, otherwise the duration specified
/// will be the time to remain idle before sending TCP keepalive probes.
///
/// Default is no keepalive (None)
pub fn tcp_keepalive(self, duration: Duration) -> Self {
Self {
tcp_keepalive: Some(duration),
..self
}
}
/// Set the value of TCP_NODELAY option for accepted connections.
///
/// Enabled by default.
pub fn tcp_nodelay(self, enabled: bool) -> Self {
Self {
tcp_nodelay: enabled,
..self
}
}
}
#[derive(Debug)]
pub struct Channel {
channel: InnerChannel,
access: AtomicUsize,
use_default_connector: bool,
}
impl Channel {
#[inline]
pub fn access(&self) -> usize {
self.access.load(Ordering::Relaxed)
}
#[inline]
pub fn use_default_connector(&self) -> bool {
self.use_default_connector
}
#[inline]
pub fn increase_access(&self) {
self.access.fetch_add(1, Ordering::Relaxed);
}
}
#[derive(Debug, Default)]
struct Pool {
channels: DashMap<String, Channel>,
}
impl Pool {
fn get(&self, addr: &str) -> Option<InnerChannel> {
let channel = self.channels.get(addr);
channel.map(|ch| {
ch.increase_access();
ch.channel.clone()
})
}
fn entry(&self, addr: String) -> Entry<String, Channel> {
self.channels.entry(addr)
}
#[cfg(test)]
fn get_access(&self, addr: &str) -> Option<usize> {
let channel = self.channels.get(addr);
channel.map(|ch| ch.access())
}
fn put(&self, addr: &str, channel: Channel) {
self.channels.insert(addr.to_string(), channel);
}
fn retain_channel<F>(&self, f: F)
where
F: FnMut(&String, &mut Channel) -> bool,
{
self.channels.retain(f);
}
}
async fn recycle_channel_in_loop(pool: Arc<Pool>, interval_secs: u64) {
let mut interval = tokio::time::interval(Duration::from_secs(interval_secs));
loop {
interval.tick().await;
pool.retain_channel(|_, c| c.access.swap(0, Ordering::Relaxed) != 0)
}
}
#[cfg(test)]
mod tests {
use tower::service_fn;
use super::*;
#[should_panic]
#[test]
fn test_invalid_addr() {
let pool = Arc::new(Pool::default());
let mgr = ChannelManager {
pool,
..Default::default()
};
let addr = "http://test";
let _ = mgr.get(addr).unwrap();
}
#[tokio::test]
async fn test_access_count() {
let pool = Arc::new(Pool::default());
let config = ChannelConfig::new();
let mgr = Arc::new(ChannelManager { pool, config });
let addr = "test_uri";
let mut joins = Vec::with_capacity(10);
for _ in 0..10 {
let mgr_clone = mgr.clone();
let join = tokio::spawn(async move {
for _ in 0..100 {
let _ = mgr_clone.get(addr);
}
});
joins.push(join);
}
for join in joins {
join.await.unwrap();
}
assert_eq!(1000, mgr.pool.get_access(addr).unwrap());
mgr.pool
.retain_channel(|_, c| c.access.swap(0, Ordering::Relaxed) != 0);
assert_eq!(0, mgr.pool.get_access(addr).unwrap());
}
#[test]
fn test_config() {
let default_cfg = ChannelConfig::new();
assert_eq!(
ChannelConfig {
timeout: None,
connect_timeout: None,
concurrency_limit: None,
rate_limit: None,
initial_stream_window_size: None,
initial_connection_window_size: None,
http2_keep_alive_interval: None,
http2_keep_alive_timeout: None,
http2_keep_alive_while_idle: None,
http2_adaptive_window: None,
tcp_keepalive: None,
tcp_nodelay: true,
},
default_cfg
);
let cfg = default_cfg
.timeout(Duration::from_secs(3))
.connect_timeout(Duration::from_secs(5))
.concurrency_limit(6)
.rate_limit(5, Duration::from_secs(1))
.initial_stream_window_size(10)
.initial_connection_window_size(20)
.http2_keep_alive_interval(Duration::from_secs(1))
.http2_keep_alive_timeout(Duration::from_secs(3))
.http2_keep_alive_while_idle(true)
.http2_adaptive_window(true)
.tcp_keepalive(Duration::from_secs(2))
.tcp_nodelay(false);
assert_eq!(
ChannelConfig {
timeout: Some(Duration::from_secs(3)),
connect_timeout: Some(Duration::from_secs(5)),
concurrency_limit: Some(6),
rate_limit: Some((5, Duration::from_secs(1))),
initial_stream_window_size: Some(10),
initial_connection_window_size: Some(20),
http2_keep_alive_interval: Some(Duration::from_secs(1)),
http2_keep_alive_timeout: Some(Duration::from_secs(3)),
http2_keep_alive_while_idle: Some(true),
http2_adaptive_window: Some(true),
tcp_keepalive: Some(Duration::from_secs(2)),
tcp_nodelay: false,
},
cfg
);
}
#[test]
fn test_build_endpoint() {
let pool = Arc::new(Pool::default());
let config = ChannelConfig::new()
.timeout(Duration::from_secs(3))
.connect_timeout(Duration::from_secs(5))
.concurrency_limit(6)
.rate_limit(5, Duration::from_secs(1))
.initial_stream_window_size(10)
.initial_connection_window_size(20)
.http2_keep_alive_interval(Duration::from_secs(1))
.http2_keep_alive_timeout(Duration::from_secs(3))
.http2_keep_alive_while_idle(true)
.http2_adaptive_window(true)
.tcp_keepalive(Duration::from_secs(2))
.tcp_nodelay(true);
let mgr = ChannelManager { pool, config };
let res = mgr.build_endpoint("test_addr");
assert!(res.is_ok());
}
#[tokio::test]
async fn test_channel_with_connector() {
let pool = Pool {
channels: DashMap::default(),
};
let pool = Arc::new(pool);
let config = ChannelConfig::new();
let mgr = ChannelManager { pool, config };
let addr = "test_addr";
let res = mgr.get(addr);
assert!(res.is_ok());
mgr.retain_channel(|addr, channel| {
assert_eq!("test_addr", addr);
assert!(channel.use_default_connector());
true
});
let (client, _) = tokio::io::duplex(1024);
let mut client = Some(client);
let res = mgr.reset_with_connector(
addr,
service_fn(move |_| {
let client = client.take().unwrap();
async move { Ok::<_, std::io::Error>(client) }
}),
);
assert!(res.is_ok());
mgr.retain_channel(|addr, channel| {
assert_eq!("test_addr", addr);
assert!(!channel.use_default_connector());
true
});
}
}

View File

@@ -49,6 +49,12 @@ pub enum Error {
actual: String,
backtrace: Backtrace,
},
#[snafu(display("Failed to create gRPC channel, source: {}", source))]
CreateChannel {
source: tonic::transport::Error,
backtrace: Backtrace,
},
}
impl ErrorExt for Error {
@@ -61,9 +67,9 @@ impl ErrorExt for Error {
Error::UnsupportedDfPlan { .. } | Error::UnsupportedDfExpr { .. } => {
StatusCode::Unsupported
}
Error::NewProjection { .. } | Error::DecodePhysicalPlanNode { .. } => {
StatusCode::Internal
}
Error::NewProjection { .. }
| Error::DecodePhysicalPlanNode { .. }
| Error::CreateChannel { .. } => StatusCode::Internal,
}
}
@@ -75,3 +81,129 @@ impl ErrorExt for Error {
self
}
}
#[cfg(test)]
mod tests {
use snafu::OptionExt;
use snafu::ResultExt;
use super::*;
type StdResult<E> = std::result::Result<(), E>;
fn throw_none_option() -> Option<String> {
None
}
#[test]
fn test_empty_physical_plan_error() {
let e = throw_none_option()
.context(EmptyPhysicalPlanSnafu { name: "test" })
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
}
#[test]
fn test_empty_physical_expr_error() {
let e = throw_none_option()
.context(EmptyPhysicalExprSnafu { name: "test" })
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
}
#[test]
fn test_unsupported_df_plan_error() {
let e = throw_none_option()
.context(UnsupportedDfPlanSnafu { name: "test" })
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::Unsupported);
}
#[test]
fn test_unsupported_df_expr_error() {
let e = throw_none_option()
.context(UnsupportedDfExprSnafu { name: "test" })
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::Unsupported);
}
#[test]
fn test_missing_field_error() {
let e = throw_none_option()
.context(MissingFieldSnafu { field: "test" })
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
}
#[test]
fn test_new_projection_error() {
fn throw_df_error() -> StdResult<DataFusionError> {
Err(DataFusionError::NotImplemented("".to_string()))
}
let e = throw_df_error().context(NewProjectionSnafu).err().unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::Internal);
}
#[test]
fn test_decode_physical_plan_node_error() {
fn throw_decode_error() -> StdResult<DecodeError> {
Err(DecodeError::new("test"))
}
let e = throw_decode_error()
.context(DecodePhysicalPlanNodeSnafu)
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::Internal);
}
#[test]
fn test_type_mismatch_error() {
let e = throw_none_option()
.context(TypeMismatchSnafu {
column_name: "",
expected: "",
actual: "",
})
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
}
#[test]
fn test_create_channel_error() {
fn throw_tonic_error() -> StdResult<tonic::transport::Error> {
tonic::transport::Endpoint::new("http//http").map(|_| ())
}
let e = throw_tonic_error()
.context(CreateChannelSnafu)
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::Internal);
}
}

View File

@@ -1,3 +1,4 @@
pub mod channel_manager;
pub mod error;
pub mod physical;
pub mod writer;

View File

@@ -184,7 +184,7 @@ impl ExecutionPlan for MockExecution {
_runtime: Arc<RuntimeEnv>,
) -> datafusion::error::Result<SendableRecordBatchStream> {
let id_array = Arc::new(PrimitiveArray::from_slice([1u32, 2, 3, 4, 5]));
let name_array = Arc::new(Utf8Array::<i64>::from_slice([
let name_array = Arc::new(Utf8Array::<i32>::from_slice([
"zhangsan", "lisi", "wangwu", "Tony", "Mike",
]));
let age_array = Arc::new(PrimitiveArray::from_slice([25u32, 28, 27, 35, 25]));

View File

@@ -35,7 +35,7 @@ impl LinesWriter {
SemanticType::Timestamp,
);
ensure!(
column.datatype == Some(ColumnDataType::Timestamp.into()),
column.datatype == ColumnDataType::Timestamp as i32,
TypeMismatchSnafu {
column_name,
expected: "timestamp",
@@ -52,7 +52,7 @@ impl LinesWriter {
pub fn write_tag(&mut self, column_name: &str, value: &str) -> Result<()> {
let (idx, column) = self.mut_column(column_name, ColumnDataType::String, SemanticType::Tag);
ensure!(
column.datatype == Some(ColumnDataType::String.into()),
column.datatype == ColumnDataType::String as i32,
TypeMismatchSnafu {
column_name,
expected: "string",
@@ -70,7 +70,7 @@ impl LinesWriter {
let (idx, column) =
self.mut_column(column_name, ColumnDataType::Uint64, SemanticType::Field);
ensure!(
column.datatype == Some(ColumnDataType::Uint64.into()),
column.datatype == ColumnDataType::Uint64 as i32,
TypeMismatchSnafu {
column_name,
expected: "u64",
@@ -88,7 +88,7 @@ impl LinesWriter {
let (idx, column) =
self.mut_column(column_name, ColumnDataType::Int64, SemanticType::Field);
ensure!(
column.datatype == Some(ColumnDataType::Int64.into()),
column.datatype == ColumnDataType::Int64 as i32,
TypeMismatchSnafu {
column_name,
expected: "i64",
@@ -106,7 +106,7 @@ impl LinesWriter {
let (idx, column) =
self.mut_column(column_name, ColumnDataType::Float64, SemanticType::Field);
ensure!(
column.datatype == Some(ColumnDataType::Float64.into()),
column.datatype == ColumnDataType::Float64 as i32,
TypeMismatchSnafu {
column_name,
expected: "f64",
@@ -124,7 +124,7 @@ impl LinesWriter {
let (idx, column) =
self.mut_column(column_name, ColumnDataType::String, SemanticType::Field);
ensure!(
column.datatype == Some(ColumnDataType::String.into()),
column.datatype == ColumnDataType::String as i32,
TypeMismatchSnafu {
column_name,
expected: "string",
@@ -142,7 +142,7 @@ impl LinesWriter {
let (idx, column) =
self.mut_column(column_name, ColumnDataType::Boolean, SemanticType::Field);
ensure!(
column.datatype == Some(ColumnDataType::Boolean.into()),
column.datatype == ColumnDataType::Boolean as i32,
TypeMismatchSnafu {
column_name,
expected: "boolean",
@@ -197,7 +197,7 @@ impl LinesWriter {
column_name: column_name.to_string(),
semantic_type: semantic_type.into(),
values: Some(Values::with_capacity(datatype, to_insert)),
datatype: Some(datatype.into()),
datatype: datatype as i32,
null_mask: Vec::default(),
});
column_names.insert(column_name.to_string(), new_idx);
@@ -275,7 +275,7 @@ mod tests {
let column = &columns[0];
assert_eq!("host", columns[0].column_name);
assert_eq!(Some(ColumnDataType::String as i32), column.datatype);
assert_eq!(ColumnDataType::String as i32, column.datatype);
assert_eq!(SemanticType::Tag as i32, column.semantic_type);
assert_eq!(
vec!["host1", "host2", "host3"],
@@ -285,28 +285,28 @@ mod tests {
let column = &columns[1];
assert_eq!("cpu", column.column_name);
assert_eq!(Some(ColumnDataType::Float64 as i32), column.datatype);
assert_eq!(ColumnDataType::Float64 as i32, column.datatype);
assert_eq!(SemanticType::Field as i32, column.semantic_type);
assert_eq!(vec![0.5, 0.4], column.values.as_ref().unwrap().f64_values);
verify_null_mask(&column.null_mask, vec![false, true, false]);
let column = &columns[2];
assert_eq!("memory", column.column_name);
assert_eq!(Some(ColumnDataType::Float64 as i32), column.datatype);
assert_eq!(ColumnDataType::Float64 as i32, column.datatype);
assert_eq!(SemanticType::Field as i32, column.semantic_type);
assert_eq!(vec![0.4], column.values.as_ref().unwrap().f64_values);
verify_null_mask(&column.null_mask, vec![false, true, true]);
let column = &columns[3];
assert_eq!("name", column.column_name);
assert_eq!(Some(ColumnDataType::String as i32), column.datatype);
assert_eq!(ColumnDataType::String as i32, column.datatype);
assert_eq!(SemanticType::Field as i32, column.semantic_type);
assert_eq!(vec!["name1"], column.values.as_ref().unwrap().string_values);
verify_null_mask(&column.null_mask, vec![false, true, true]);
let column = &columns[4];
assert_eq!("ts", column.column_name);
assert_eq!(Some(ColumnDataType::Timestamp as i32), column.datatype);
assert_eq!(ColumnDataType::Timestamp as i32, column.datatype);
assert_eq!(SemanticType::Timestamp as i32, column.semantic_type);
assert_eq!(
vec![101011000, 102011001, 103011002],
@@ -316,28 +316,28 @@ mod tests {
let column = &columns[5];
assert_eq!("enable_reboot", column.column_name);
assert_eq!(Some(ColumnDataType::Boolean as i32), column.datatype);
assert_eq!(ColumnDataType::Boolean as i32, column.datatype);
assert_eq!(SemanticType::Field as i32, column.semantic_type);
assert_eq!(vec![true], column.values.as_ref().unwrap().bool_values);
verify_null_mask(&column.null_mask, vec![true, false, true]);
let column = &columns[6];
assert_eq!("year_of_service", column.column_name);
assert_eq!(Some(ColumnDataType::Uint64 as i32), column.datatype);
assert_eq!(ColumnDataType::Uint64 as i32, column.datatype);
assert_eq!(SemanticType::Field as i32, column.semantic_type);
assert_eq!(vec![2], column.values.as_ref().unwrap().u64_values);
verify_null_mask(&column.null_mask, vec![true, false, true]);
let column = &columns[7];
assert_eq!("temperature", column.column_name);
assert_eq!(Some(ColumnDataType::Int64 as i32), column.datatype);
assert_eq!(ColumnDataType::Int64 as i32, column.datatype);
assert_eq!(SemanticType::Field as i32, column.semantic_type);
assert_eq!(vec![4], column.values.as_ref().unwrap().i64_values);
verify_null_mask(&column.null_mask, vec![true, false, true]);
let column = &columns[8];
assert_eq!("cpu_core_num", column.column_name);
assert_eq!(Some(ColumnDataType::Uint64 as i32), column.datatype);
assert_eq!(ColumnDataType::Uint64 as i32, column.datatype);
assert_eq!(SemanticType::Field as i32, column.semantic_type);
assert_eq!(vec![16], column.values.as_ref().unwrap().u64_values);
verify_null_mask(&column.null_mask, vec![true, true, false]);

View File

@@ -4,6 +4,7 @@ version = "0.1.0"
edition = "2021"
[dependencies]
async-trait = "0.1"
common-error = { path = "../error" }
common-recordbatch = { path = "../recordbatch" }
common-time = { path = "../time" }

View File

@@ -4,6 +4,7 @@ use arrow::datatypes::DataType as ArrowDatatype;
use common_error::prelude::*;
use datafusion_common::DataFusionError;
use datatypes::error::Error as DataTypeError;
use datatypes::prelude::ConcreteDataType;
use statrs::StatsError;
common_error::define_opaque_error!(Error);
@@ -17,6 +18,13 @@ pub enum InnerError {
backtrace: Backtrace,
},
#[snafu(display("Unsupported input datatypes {:?} in function {}", datatypes, function))]
UnsupportedInputDataType {
function: String,
datatypes: Vec<ConcreteDataType>,
backtrace: Backtrace,
},
#[snafu(display("Fail to generate function, source: {}", source))]
GenerateFunction {
source: StatsError,
@@ -62,6 +70,36 @@ pub enum InnerError {
#[snafu(display("unexpected: not constant column"))]
InvalidInputCol { backtrace: Backtrace },
#[snafu(display("Not expected to run ExecutionPlan more than once"))]
ExecuteRepeatedly { backtrace: Backtrace },
#[snafu(display("General DataFusion error, source: {}", source))]
GeneralDataFusion {
source: DataFusionError,
backtrace: Backtrace,
},
#[snafu(display("Failed to execute DataFusion ExecutionPlan, source: {}", source))]
DataFusionExecutionPlan {
source: DataFusionError,
backtrace: Backtrace,
},
#[snafu(display(
"Failed to convert DataFusion's recordbatch stream, source: {}",
source
))]
ConvertDfRecordBatchStream {
#[snafu(backtrace)]
source: common_recordbatch::error::Error,
},
#[snafu(display("Failed to convert arrow schema, source: {}", source))]
ConvertArrowSchema {
#[snafu(backtrace)]
source: DataTypeError,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -76,9 +114,19 @@ impl ErrorExt for InnerError {
| InnerError::InvalidInputState { .. }
| InnerError::InvalidInputCol { .. }
| InnerError::BadAccumulatorImpl { .. } => StatusCode::EngineExecuteQuery,
InnerError::InvalidInputs { source, .. } => source.status_code(),
InnerError::IntoVector { source, .. } => source.status_code(),
InnerError::FromScalarValue { source } => source.status_code(),
InnerError::InvalidInputs { source, .. }
| InnerError::IntoVector { source, .. }
| InnerError::FromScalarValue { source }
| InnerError::ConvertArrowSchema { source } => source.status_code(),
InnerError::ExecuteRepeatedly { .. }
| InnerError::GeneralDataFusion { .. }
| InnerError::DataFusionExecutionPlan { .. } => StatusCode::Unexpected,
InnerError::UnsupportedInputDataType { .. } => StatusCode::InvalidArguments,
InnerError::ConvertDfRecordBatchStream { source, .. } => source.status_code(),
}
}
@@ -105,6 +153,7 @@ impl From<Error> for DataFusionError {
#[cfg(test)]
mod tests {
use arrow::error::ArrowError;
use snafu::GenerateImplicitData;
use super::*;
@@ -127,6 +176,48 @@ mod tests {
.unwrap()
.into();
assert_error(&err, StatusCode::EngineExecuteQuery);
let err: Error = throw_df_error()
.context(GeneralDataFusionSnafu)
.err()
.unwrap()
.into();
assert_error(&err, StatusCode::Unexpected);
let err: Error = throw_df_error()
.context(DataFusionExecutionPlanSnafu)
.err()
.unwrap()
.into();
assert_error(&err, StatusCode::Unexpected);
}
#[test]
fn test_execute_repeatedly_error() {
let error: Error = None::<i32>
.context(ExecuteRepeatedlySnafu)
.err()
.unwrap()
.into();
assert_eq!(error.inner.status_code(), StatusCode::Unexpected);
assert!(error.backtrace_opt().is_some());
}
#[test]
fn test_convert_df_recordbatch_stream_error() {
let result: std::result::Result<i32, common_recordbatch::error::Error> =
Err(common_recordbatch::error::InnerError::PollStream {
source: ArrowError::Overflow,
backtrace: Backtrace::generate(),
}
.into());
let error: Error = result
.context(ConvertDfRecordBatchStreamSnafu)
.err()
.unwrap()
.into();
assert_eq!(error.inner.status_code(), StatusCode::Internal);
assert!(error.backtrace_opt().is_some());
}
fn raise_datatype_error() -> std::result::Result<(), DataTypeError> {

View File

@@ -4,6 +4,7 @@ pub mod columnar_value;
pub mod error;
mod function;
pub mod logical_plan;
pub mod physical_plan;
pub mod prelude;
mod signature;
@@ -13,3 +14,5 @@ pub enum Output {
RecordBatches(RecordBatches),
Stream(SendableRecordBatchStream),
}
pub use datafusion::physical_plan::ExecutionPlan as DfPhysicalPlan;

View File

@@ -2,7 +2,7 @@ use datafusion::logical_plan::Expr as DfExpr;
/// Central struct of query API.
/// Represent logical expressions such as `A + 1`, or `CAST(c1 AS int)`.
#[derive(Clone, PartialEq, Hash)]
#[derive(Clone, PartialEq, Hash, Debug)]
pub struct Expr {
df_expr: DfExpr,
}

View File

@@ -0,0 +1,325 @@
use std::any::Any;
use std::fmt::Debug;
use std::sync::Arc;
use async_trait::async_trait;
use common_recordbatch::adapter::{DfRecordBatchStreamAdapter, RecordBatchStreamAdapter};
use common_recordbatch::DfSendableRecordBatchStream;
use common_recordbatch::SendableRecordBatchStream;
use datafusion::arrow::datatypes::SchemaRef as DfSchemaRef;
use datafusion::error::Result as DfResult;
pub use datafusion::execution::runtime_env::RuntimeEnv;
use datafusion::physical_plan::expressions::PhysicalSortExpr;
pub use datafusion::physical_plan::Partitioning;
use datafusion::physical_plan::Statistics;
use datatypes::schema::SchemaRef;
use snafu::ResultExt;
use crate::error::{self, Result};
use crate::DfPhysicalPlan;
pub type PhysicalPlanRef = Arc<dyn PhysicalPlan>;
/// `PhysicalPlan` represent nodes in the Physical Plan.
///
/// Each `PhysicalPlan` is Partition-aware and is responsible for
/// creating the actual `async` [`SendableRecordBatchStream`]s
/// of [`RecordBatch`] that incrementally compute the operator's
/// output from its input partition.
#[async_trait]
pub trait PhysicalPlan: Debug + Send + Sync {
/// Returns the physical plan as [`Any`](std::any::Any) so that it can be
/// downcast to a specific implementation.
fn as_any(&self) -> &dyn Any;
/// Get the schema for this physical plan
fn schema(&self) -> SchemaRef;
/// Specifies the output partitioning scheme of this plan
fn output_partitioning(&self) -> Partitioning;
/// Get a list of child physical plans that provide the input for this plan. The returned list
/// will be empty for leaf nodes, will contain a single value for unary nodes, or two
/// values for binary nodes (such as joins).
fn children(&self) -> Vec<PhysicalPlanRef>;
/// Returns a new plan where all children were replaced by new plans.
/// The size of `children` must be equal to the size of `PhysicalPlan::children()`.
fn with_new_children(&self, children: Vec<PhysicalPlanRef>) -> Result<PhysicalPlanRef>;
/// Creates an RecordBatch stream.
async fn execute(
&self,
partition: usize,
runtime: Arc<RuntimeEnv>,
) -> Result<SendableRecordBatchStream>;
}
#[derive(Debug)]
pub struct PhysicalPlanAdapter {
schema: SchemaRef,
df_plan: Arc<dyn DfPhysicalPlan>,
}
impl PhysicalPlanAdapter {
pub fn new(schema: SchemaRef, df_plan: Arc<dyn DfPhysicalPlan>) -> Self {
Self { schema, df_plan }
}
pub fn df_plan(&self) -> Arc<dyn DfPhysicalPlan> {
self.df_plan.clone()
}
}
#[async_trait]
impl PhysicalPlan for PhysicalPlanAdapter {
fn as_any(&self) -> &dyn Any {
self
}
fn schema(&self) -> SchemaRef {
self.schema.clone()
}
fn output_partitioning(&self) -> Partitioning {
self.df_plan.output_partitioning()
}
fn children(&self) -> Vec<PhysicalPlanRef> {
self.df_plan
.children()
.into_iter()
.map(|x| Arc::new(PhysicalPlanAdapter::new(self.schema(), x)) as _)
.collect()
}
fn with_new_children(&self, children: Vec<PhysicalPlanRef>) -> Result<PhysicalPlanRef> {
let children = children
.into_iter()
.map(|x| Arc::new(DfPhysicalPlanAdapter(x)) as _)
.collect();
let plan = self
.df_plan
.with_new_children(children)
.context(error::GeneralDataFusionSnafu)?;
Ok(Arc::new(PhysicalPlanAdapter::new(self.schema(), plan)))
}
async fn execute(
&self,
partition: usize,
runtime: Arc<RuntimeEnv>,
) -> Result<SendableRecordBatchStream> {
let stream = self
.df_plan
.execute(partition, runtime)
.await
.context(error::DataFusionExecutionPlanSnafu)?;
let stream = RecordBatchStreamAdapter::try_new(stream)
.context(error::ConvertDfRecordBatchStreamSnafu)?;
Ok(Box::pin(stream))
}
}
#[derive(Debug)]
pub struct DfPhysicalPlanAdapter(pub PhysicalPlanRef);
#[async_trait]
impl DfPhysicalPlan for DfPhysicalPlanAdapter {
fn as_any(&self) -> &dyn Any {
self
}
fn schema(&self) -> DfSchemaRef {
self.0.schema().arrow_schema().clone()
}
fn output_partitioning(&self) -> Partitioning {
self.0.output_partitioning()
}
fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
None
}
fn children(&self) -> Vec<Arc<dyn DfPhysicalPlan>> {
self.0
.children()
.into_iter()
.map(|x| Arc::new(DfPhysicalPlanAdapter(x)) as _)
.collect()
}
fn with_new_children(
&self,
children: Vec<Arc<dyn DfPhysicalPlan>>,
) -> DfResult<Arc<dyn DfPhysicalPlan>> {
let df_schema = self.schema();
let schema: SchemaRef = Arc::new(
df_schema
.try_into()
.context(error::ConvertArrowSchemaSnafu)
.map_err(error::Error::from)?,
);
let children = children
.into_iter()
.map(|x| Arc::new(PhysicalPlanAdapter::new(schema.clone(), x)) as _)
.collect();
let plan = self.0.with_new_children(children)?;
Ok(Arc::new(DfPhysicalPlanAdapter(plan)))
}
async fn execute(
&self,
partition: usize,
runtime: Arc<RuntimeEnv>,
) -> DfResult<DfSendableRecordBatchStream> {
let stream = self.0.execute(partition, runtime).await?;
Ok(Box::pin(DfRecordBatchStreamAdapter::new(stream)))
}
fn statistics(&self) -> Statistics {
// TODO(LFC): impl statistics
Statistics::default()
}
}
#[cfg(test)]
mod test {
use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
use common_recordbatch::{RecordBatch, RecordBatches};
use datafusion::arrow_print;
use datafusion::datasource::TableProvider as DfTableProvider;
use datafusion::logical_plan::LogicalPlanBuilder;
use datafusion::physical_plan::collect;
use datafusion::physical_plan::empty::EmptyExec;
use datafusion::prelude::ExecutionContext;
use datafusion_common::field_util::SchemaExt;
use datafusion_expr::Expr;
use datatypes::schema::Schema;
use datatypes::vectors::Int32Vector;
use super::*;
struct MyDfTableProvider;
#[async_trait]
impl DfTableProvider for MyDfTableProvider {
fn as_any(&self) -> &dyn Any {
self
}
fn schema(&self) -> DfSchemaRef {
Arc::new(ArrowSchema::new(vec![Field::new(
"a",
DataType::Int32,
false,
)]))
}
async fn scan(
&self,
_projection: &Option<Vec<usize>>,
_filters: &[Expr],
_limit: Option<usize>,
) -> DfResult<Arc<dyn DfPhysicalPlan>> {
let schema = Schema::try_from(self.schema()).unwrap();
let my_plan = Arc::new(MyExecutionPlan {
schema: Arc::new(schema),
});
let df_plan = DfPhysicalPlanAdapter(my_plan);
Ok(Arc::new(df_plan))
}
}
#[derive(Debug)]
struct MyExecutionPlan {
schema: SchemaRef,
}
#[async_trait]
impl PhysicalPlan for MyExecutionPlan {
fn as_any(&self) -> &dyn Any {
self
}
fn schema(&self) -> SchemaRef {
self.schema.clone()
}
fn output_partitioning(&self) -> Partitioning {
Partitioning::UnknownPartitioning(1)
}
fn children(&self) -> Vec<PhysicalPlanRef> {
vec![]
}
fn with_new_children(&self, _children: Vec<PhysicalPlanRef>) -> Result<PhysicalPlanRef> {
unimplemented!()
}
async fn execute(
&self,
_partition: usize,
_runtime: Arc<RuntimeEnv>,
) -> Result<SendableRecordBatchStream> {
let schema = self.schema();
let recordbatches = RecordBatches::try_new(
schema.clone(),
vec![
RecordBatch::new(
schema.clone(),
vec![Arc::new(Int32Vector::from_slice(vec![1])) as _],
)
.unwrap(),
RecordBatch::new(
schema,
vec![Arc::new(Int32Vector::from_slice(vec![2, 3])) as _],
)
.unwrap(),
],
)
.unwrap();
Ok(recordbatches.as_stream())
}
}
// Test our physical plan can be executed by DataFusion, through adapters.
#[tokio::test]
async fn test_execute_physical_plan() {
let ctx = ExecutionContext::new();
let logical_plan = LogicalPlanBuilder::scan("test", Arc::new(MyDfTableProvider), None)
.unwrap()
.build()
.unwrap();
let physical_plan = ctx.create_physical_plan(&logical_plan).await.unwrap();
let df_recordbatches = collect(physical_plan, Arc::new(RuntimeEnv::default()))
.await
.unwrap();
let pretty_print = arrow_print::write(&df_recordbatches);
let pretty_print = pretty_print.lines().collect::<Vec<&str>>();
assert_eq!(
pretty_print,
vec!["+---+", "| a |", "+---+", "| 1 |", "| 2 |", "| 3 |", "+---+",]
);
}
#[test]
fn test_physical_plan_adapter() {
let df_schema = Arc::new(ArrowSchema::new(vec![Field::new(
"name",
DataType::Utf8,
true,
)]));
let plan = PhysicalPlanAdapter::new(
Arc::new(Schema::try_from(df_schema.clone()).unwrap()),
Arc::new(EmptyExec::new(true, df_schema.clone())),
);
assert!(plan.df_plan.as_any().downcast_ref::<EmptyExec>().is_some());
let df_plan = DfPhysicalPlanAdapter(Arc::new(plan));
assert_eq!(df_schema, df_plan.schema());
}
}

View File

@@ -0,0 +1,92 @@
use std::pin::Pin;
use std::sync::Arc;
use std::task::{Context, Poll};
use datafusion::arrow::datatypes::SchemaRef as DfSchemaRef;
use datafusion::physical_plan::RecordBatchStream as DfRecordBatchStream;
use datafusion_common::record_batch::RecordBatch as DfRecordBatch;
use datatypes::arrow::error::ArrowError;
use datatypes::arrow::error::Result as ArrowResult;
use datatypes::schema::{Schema, SchemaRef};
use snafu::ResultExt;
use crate::error::{self, Result};
use crate::DfSendableRecordBatchStream;
use crate::{RecordBatch, RecordBatchStream, SendableRecordBatchStream, Stream};
/// Greptime SendableRecordBatchStream -> DataFusion RecordBatchStream
pub struct DfRecordBatchStreamAdapter {
stream: SendableRecordBatchStream,
}
impl DfRecordBatchStreamAdapter {
pub fn new(stream: SendableRecordBatchStream) -> Self {
Self { stream }
}
}
impl DfRecordBatchStream for DfRecordBatchStreamAdapter {
fn schema(&self) -> DfSchemaRef {
self.stream.schema().arrow_schema().clone()
}
}
impl Stream for DfRecordBatchStreamAdapter {
type Item = ArrowResult<DfRecordBatch>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
match Pin::new(&mut self.stream).poll_next(cx) {
Poll::Pending => Poll::Pending,
Poll::Ready(Some(recordbatch)) => match recordbatch {
Ok(recordbatch) => Poll::Ready(Some(Ok(recordbatch.df_recordbatch))),
Err(e) => Poll::Ready(Some(Err(ArrowError::External("".to_owned(), Box::new(e))))),
},
Poll::Ready(None) => Poll::Ready(None),
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.stream.size_hint()
}
}
/// DataFusion SendableRecordBatchStream -> Greptime RecordBatchStream
pub struct RecordBatchStreamAdapter {
schema: SchemaRef,
stream: DfSendableRecordBatchStream,
}
impl RecordBatchStreamAdapter {
pub fn try_new(stream: DfSendableRecordBatchStream) -> Result<Self> {
let schema =
Arc::new(Schema::try_from(stream.schema()).context(error::SchemaConversionSnafu)?);
Ok(Self { schema, stream })
}
}
impl RecordBatchStream for RecordBatchStreamAdapter {
fn schema(&self) -> SchemaRef {
self.schema.clone()
}
}
impl Stream for RecordBatchStreamAdapter {
type Item = Result<RecordBatch>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
match Pin::new(&mut self.stream).poll_next(cx) {
Poll::Pending => Poll::Pending,
Poll::Ready(Some(df_recordbatch)) => Poll::Ready(Some(Ok(RecordBatch {
schema: self.schema(),
df_recordbatch: df_recordbatch.context(error::PollStreamSnafu)?,
}))),
Poll::Ready(None) => Poll::Ready(None),
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.stream.size_hint()
}
}

View File

@@ -33,16 +33,32 @@ pub enum InnerError {
reason: String,
backtrace: Backtrace,
},
#[snafu(display("Failed to convert Arrow schema, source: {}", source))]
SchemaConversion {
source: datatypes::error::Error,
backtrace: Backtrace,
},
#[snafu(display("Failed to poll stream, source: {}", source))]
PollStream {
source: datatypes::arrow::error::ArrowError,
backtrace: Backtrace,
},
}
impl ErrorExt for InnerError {
fn status_code(&self) -> StatusCode {
match self {
InnerError::NewDfRecordBatch { .. } => StatusCode::InvalidArguments,
InnerError::DataTypes { .. } | InnerError::CreateRecordBatches { .. } => {
StatusCode::Internal
}
InnerError::DataTypes { .. }
| InnerError::CreateRecordBatches { .. }
| InnerError::PollStream { .. } => StatusCode::Internal,
InnerError::External { source } => source.status_code(),
InnerError::SchemaConversion { source, .. } => source.status_code(),
}
}

View File

@@ -1,10 +1,15 @@
pub mod adapter;
pub mod error;
mod recordbatch;
pub mod util;
use std::pin::Pin;
use std::sync::Arc;
use datatypes::schema::SchemaRef;
use datafusion::arrow_print;
pub use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
use datatypes::prelude::VectorRef;
use datatypes::schema::{Schema, SchemaRef};
use error::Result;
use futures::task::{Context, Poll};
use futures::Stream;
@@ -52,6 +57,35 @@ pub struct RecordBatches {
}
impl RecordBatches {
pub fn try_from_columns<I: IntoIterator<Item = VectorRef>>(
schema: SchemaRef,
columns: I,
) -> Result<Self> {
let batches = vec![RecordBatch::new(schema.clone(), columns)?];
Ok(Self { schema, batches })
}
#[inline]
pub fn empty() -> Self {
Self {
schema: Arc::new(Schema::new(vec![])),
batches: vec![],
}
}
pub fn iter(&self) -> impl Iterator<Item = &RecordBatch> {
self.batches.iter()
}
pub fn pretty_print(&self) -> String {
arrow_print::write(
&self
.iter()
.map(|x| x.df_recordbatch.clone())
.collect::<Vec<_>>(),
)
}
pub fn try_new(schema: SchemaRef, batches: Vec<RecordBatch>) -> Result<Self> {
for batch in batches.iter() {
ensure!(
@@ -74,6 +108,41 @@ impl RecordBatches {
pub fn take(self) -> Vec<RecordBatch> {
self.batches
}
pub fn as_stream(&self) -> SendableRecordBatchStream {
Box::pin(SimpleRecordBatchStream {
inner: RecordBatches {
schema: self.schema(),
batches: self.batches.clone(),
},
index: 0,
})
}
}
pub struct SimpleRecordBatchStream {
inner: RecordBatches,
index: usize,
}
impl RecordBatchStream for SimpleRecordBatchStream {
fn schema(&self) -> SchemaRef {
self.inner.schema()
}
}
impl Stream for SimpleRecordBatchStream {
type Item = Result<RecordBatch>;
fn poll_next(mut self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
Poll::Ready(if self.index < self.inner.batches.len() {
let batch = self.inner.batches[self.index].clone();
self.index += 1;
Some(Ok(batch))
} else {
None
})
}
}
#[cfg(test)]
@@ -87,7 +156,26 @@ mod tests {
use super::*;
#[test]
fn test_recordbatches() {
fn test_recordbatches_try_from_columns() {
let schema = Arc::new(Schema::new(vec![ColumnSchema::new(
"a",
ConcreteDataType::int32_datatype(),
false,
)]));
let result = RecordBatches::try_from_columns(
schema.clone(),
vec![Arc::new(StringVector::from(vec!["hello", "world"])) as _],
);
assert!(result.is_err());
let v: VectorRef = Arc::new(Int32Vector::from_slice(&[1, 2]));
let expected = vec![RecordBatch::new(schema.clone(), vec![v.clone()]).unwrap()];
let r = RecordBatches::try_from_columns(schema, vec![v]).unwrap();
assert_eq!(r.take(), expected);
}
#[test]
fn test_recordbatches_try_new() {
let column_a = ColumnSchema::new("a", ConcreteDataType::int32_datatype(), false);
let column_b = ColumnSchema::new("b", ConcreteDataType::string_datatype(), false);
let column_c = ColumnSchema::new("c", ConcreteDataType::boolean_datatype(), false);
@@ -113,7 +201,39 @@ mod tests {
);
let batches = RecordBatches::try_new(schema1.clone(), vec![batch1.clone()]).unwrap();
let expected = "\
+---+-------+
| a | b |
+---+-------+
| 1 | hello |
| 2 | world |
+---+-------+";
assert_eq!(batches.pretty_print(), expected);
assert_eq!(schema1, batches.schema());
assert_eq!(vec![batch1], batches.take());
}
#[tokio::test]
async fn test_simple_recordbatch_stream() {
let column_a = ColumnSchema::new("a", ConcreteDataType::int32_datatype(), false);
let column_b = ColumnSchema::new("b", ConcreteDataType::string_datatype(), false);
let schema = Arc::new(Schema::new(vec![column_a, column_b]));
let va1: VectorRef = Arc::new(Int32Vector::from_slice(&[1, 2]));
let vb1: VectorRef = Arc::new(StringVector::from(vec!["a", "b"]));
let batch1 = RecordBatch::new(schema.clone(), vec![va1, vb1]).unwrap();
let va2: VectorRef = Arc::new(Int32Vector::from_slice(&[3, 4, 5]));
let vb2: VectorRef = Arc::new(StringVector::from(vec!["c", "d", "e"]));
let batch2 = RecordBatch::new(schema.clone(), vec![va2, vb2]).unwrap();
let recordbatches =
RecordBatches::try_new(schema.clone(), vec![batch1.clone(), batch2.clone()]).unwrap();
let stream = recordbatches.as_stream();
let collected = util::collect(stream).await.unwrap();
assert_eq!(collected.len(), 2);
assert_eq!(collected[0], batch1);
assert_eq!(collected[1], batch2);
}
}

View File

@@ -31,6 +31,10 @@ impl RecordBatch {
})
}
pub fn num_rows(&self) -> usize {
self.df_recordbatch.num_rows()
}
/// Create an iterator to traverse the data by row
pub fn rows(&self) -> RecordBatchRowIterator<'_> {
RecordBatchRowIterator::new(self)

View File

@@ -0,0 +1,27 @@
[package]
name = "substrait"
version = "0.1.0"
edition = "2021"
[dependencies]
bytes = "1.1"
catalog = { path = "../../catalog" }
common-catalog = { path = "../catalog" }
common-error = { path = "../error" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
"simd",
] }
datatypes = { path = "../../datatypes" }
futures = "0.3"
prost = "0.9"
snafu = { version = "0.7", features = ["backtraces"] }
table = { path = "../../table" }
[dependencies.substrait_proto]
package = "substrait"
version = "0.2"
[dev-dependencies]
datatypes = { path = "../../datatypes" }
table = { path = "../../table" }
tokio = { version = "1.0", features = ["full"] }

View File

@@ -0,0 +1,433 @@
use std::sync::Arc;
use bytes::{Buf, Bytes, BytesMut};
use catalog::CatalogManagerRef;
use common_error::prelude::BoxedError;
use datafusion::datasource::TableProvider;
use datafusion::logical_plan::{LogicalPlan, TableScan, ToDFSchema};
use datafusion::physical_plan::project_schema;
use prost::Message;
use snafu::ensure;
use snafu::{OptionExt, ResultExt};
use substrait_proto::protobuf::expression::mask_expression::{StructItem, StructSelect};
use substrait_proto::protobuf::expression::MaskExpression;
use substrait_proto::protobuf::plan_rel::RelType as PlanRelType;
use substrait_proto::protobuf::read_rel::{NamedTable, ReadType};
use substrait_proto::protobuf::rel::RelType;
use substrait_proto::protobuf::PlanRel;
use substrait_proto::protobuf::ReadRel;
use substrait_proto::protobuf::Rel;
use table::table::adapter::DfTableProviderAdapter;
use crate::error::Error;
use crate::error::{
DFInternalSnafu, DecodeRelSnafu, EmptyPlanSnafu, EncodeRelSnafu, InternalSnafu,
InvalidParametersSnafu, MissingFieldSnafu, SchemaNotMatchSnafu, TableNotFoundSnafu,
UnknownPlanSnafu, UnsupportedExprSnafu, UnsupportedPlanSnafu,
};
use crate::schema::{from_schema, to_schema};
use crate::SubstraitPlan;
pub struct DFLogicalSubstraitConvertor {
catalog_manager: CatalogManagerRef,
}
impl SubstraitPlan for DFLogicalSubstraitConvertor {
type Error = Error;
type Plan = LogicalPlan;
fn decode<B: Buf + Send>(&self, message: B) -> Result<Self::Plan, Self::Error> {
let plan_rel = PlanRel::decode(message).context(DecodeRelSnafu)?;
let rel = match plan_rel.rel_type.context(EmptyPlanSnafu)? {
PlanRelType::Rel(rel) => rel,
PlanRelType::Root(_) => UnsupportedPlanSnafu {
name: "Root Relation",
}
.fail()?,
};
self.convert_rel(rel)
}
fn encode(&self, plan: Self::Plan) -> Result<Bytes, Self::Error> {
let rel = self.convert_plan(plan)?;
let plan_rel = PlanRel {
rel_type: Some(PlanRelType::Rel(rel)),
};
let mut buf = BytesMut::new();
plan_rel.encode(&mut buf).context(EncodeRelSnafu)?;
Ok(buf.freeze())
}
}
impl DFLogicalSubstraitConvertor {
pub fn new(catalog_manager: CatalogManagerRef) -> Self {
Self { catalog_manager }
}
}
impl DFLogicalSubstraitConvertor {
pub fn convert_rel(&self, rel: Rel) -> Result<LogicalPlan, Error> {
let rel_type = rel.rel_type.context(EmptyPlanSnafu)?;
let logical_plan = match rel_type {
RelType::Read(read_rel) => self.convert_read_rel(read_rel),
RelType::Filter(_filter_rel) => UnsupportedPlanSnafu {
name: "Filter Relation",
}
.fail()?,
RelType::Fetch(_fetch_rel) => UnsupportedPlanSnafu {
name: "Fetch Relation",
}
.fail()?,
RelType::Aggregate(_aggr_rel) => UnsupportedPlanSnafu {
name: "Fetch Relation",
}
.fail()?,
RelType::Sort(_sort_rel) => UnsupportedPlanSnafu {
name: "Sort Relation",
}
.fail()?,
RelType::Join(_join_rel) => UnsupportedPlanSnafu {
name: "Join Relation",
}
.fail()?,
RelType::Project(_project_rel) => UnsupportedPlanSnafu {
name: "Project Relation",
}
.fail()?,
RelType::Set(_set_rel) => UnsupportedPlanSnafu {
name: "Set Relation",
}
.fail()?,
RelType::ExtensionSingle(_ext_single_rel) => UnsupportedPlanSnafu {
name: "Extension Single Relation",
}
.fail()?,
RelType::ExtensionMulti(_ext_multi_rel) => UnsupportedPlanSnafu {
name: "Extension Multi Relation",
}
.fail()?,
RelType::ExtensionLeaf(_ext_leaf_rel) => UnsupportedPlanSnafu {
name: "Extension Leaf Relation",
}
.fail()?,
RelType::Cross(_cross_rel) => UnsupportedPlanSnafu {
name: "Cross Relation",
}
.fail()?,
}?;
Ok(logical_plan)
}
fn convert_read_rel(&self, read_rel: Box<ReadRel>) -> Result<LogicalPlan, Error> {
// Extract the catalog, schema and table name from NamedTable. Assume the first three are those names.
let read_type = read_rel.read_type.context(MissingFieldSnafu {
field: "read_type",
plan: "Read",
})?;
let (table_name, schema_name, catalog_name) = match read_type {
ReadType::NamedTable(mut named_table) => {
ensure!(
named_table.names.len() == 3,
InvalidParametersSnafu {
reason:
"NamedTable should contains three names for catalog, schema and table",
}
);
(
named_table.names.pop().unwrap(),
named_table.names.pop().unwrap(),
named_table.names.pop().unwrap(),
)
}
ReadType::VirtualTable(_) | ReadType::LocalFiles(_) | ReadType::ExtensionTable(_) => {
UnsupportedExprSnafu {
name: "Non-NamedTable Read",
}
.fail()?
}
};
// Get projection indices
let projection = read_rel
.projection
.map(|mask_expr| self.convert_mask_expression(mask_expr));
// Get table handle from catalog manager
let table_ref = self
.catalog_manager
.table(&catalog_name, &schema_name, &table_name)
.map_err(BoxedError::new)
.context(InternalSnafu)?
.context(TableNotFoundSnafu {
name: format!("{}.{}.{}", catalog_name, schema_name, table_name),
})?;
let adapter = Arc::new(DfTableProviderAdapter::new(table_ref));
// Get schema directly from the table, and compare it with the schema retrived from substrait proto.
let stored_schema = adapter.schema();
let retrived_schema = to_schema(read_rel.base_schema.unwrap_or_default())?;
let retrived_arrow_schema = retrived_schema.arrow_schema();
ensure!(
stored_schema.fields == retrived_arrow_schema.fields,
SchemaNotMatchSnafu {
substrait_schema: retrived_arrow_schema.clone(),
storage_schema: stored_schema
}
);
// Calculate the projected schema
let projected_schema = project_schema(&stored_schema, projection.as_ref())
.context(DFInternalSnafu)?
.to_dfschema_ref()
.context(DFInternalSnafu)?;
// TODO(ruihang): Support filters and limit
Ok(LogicalPlan::TableScan(TableScan {
table_name,
source: adapter,
projection,
projected_schema,
filters: vec![],
limit: None,
}))
}
fn convert_mask_expression(&self, mask_expression: MaskExpression) -> Vec<usize> {
mask_expression
.select
.unwrap_or_default()
.struct_items
.into_iter()
.map(|select| select.field as _)
.collect()
}
}
impl DFLogicalSubstraitConvertor {
pub fn convert_plan(&self, plan: LogicalPlan) -> Result<Rel, Error> {
match plan {
LogicalPlan::Projection(_) => UnsupportedPlanSnafu {
name: "DataFusion Logical Projection",
}
.fail()?,
LogicalPlan::Filter(_) => UnsupportedPlanSnafu {
name: "DataFusion Logical Filter",
}
.fail()?,
LogicalPlan::Window(_) => UnsupportedPlanSnafu {
name: "DataFusion Logical Window",
}
.fail()?,
LogicalPlan::Aggregate(_) => UnsupportedPlanSnafu {
name: "DataFusion Logical Aggregate",
}
.fail()?,
LogicalPlan::Sort(_) => UnsupportedPlanSnafu {
name: "DataFusion Logical Sort",
}
.fail()?,
LogicalPlan::Join(_) => UnsupportedPlanSnafu {
name: "DataFusion Logical Join",
}
.fail()?,
LogicalPlan::CrossJoin(_) => UnsupportedPlanSnafu {
name: "DataFusion Logical CrossJoin",
}
.fail()?,
LogicalPlan::Repartition(_) => UnsupportedPlanSnafu {
name: "DataFusion Logical Repartition",
}
.fail()?,
LogicalPlan::Union(_) => UnsupportedPlanSnafu {
name: "DataFusion Logical Union",
}
.fail()?,
LogicalPlan::TableScan(table_scan) => {
let read_rel = self.convert_table_scan_plan(table_scan)?;
Ok(Rel {
rel_type: Some(RelType::Read(Box::new(read_rel))),
})
}
LogicalPlan::EmptyRelation(_) => UnsupportedPlanSnafu {
name: "DataFusion Logical EmptyRelation",
}
.fail()?,
LogicalPlan::Limit(_) => UnsupportedPlanSnafu {
name: "DataFusion Logical Limit",
}
.fail()?,
LogicalPlan::CreateExternalTable(_)
| LogicalPlan::CreateMemoryTable(_)
| LogicalPlan::DropTable(_)
| LogicalPlan::Values(_)
| LogicalPlan::Explain(_)
| LogicalPlan::Analyze(_)
| LogicalPlan::Extension(_) => InvalidParametersSnafu {
reason: format!(
"Trying to convert DDL/DML plan to substrait proto, plan: {:?}",
plan
),
}
.fail()?,
}
}
pub fn convert_table_scan_plan(&self, table_scan: TableScan) -> Result<ReadRel, Error> {
let provider = table_scan
.source
.as_any()
.downcast_ref::<DfTableProviderAdapter>()
.context(UnknownPlanSnafu)?;
let table_info = provider.table().table_info();
// assemble NamedTable and ReadType
let catalog_name = table_info.catalog_name.clone();
let schema_name = table_info.schema_name.clone();
let table_name = table_info.name.clone();
let named_table = NamedTable {
names: vec![catalog_name, schema_name, table_name],
advanced_extension: None,
};
let read_type = ReadType::NamedTable(named_table);
// assemble projection
let projection = table_scan
.projection
.map(|proj| self.convert_schema_projection(&proj));
// assemble base (unprojected) schema using Table's schema.
let base_schema = from_schema(&provider.table().schema())?;
let read_rel = ReadRel {
common: None,
base_schema: Some(base_schema),
filter: None,
projection,
advanced_extension: None,
read_type: Some(read_type),
};
Ok(read_rel)
}
/// Convert a index-based schema projection to substrait's [MaskExpression].
fn convert_schema_projection(&self, projections: &[usize]) -> MaskExpression {
let struct_items = projections
.iter()
.map(|index| StructItem {
field: *index as i32,
child: None,
})
.collect();
MaskExpression {
select: Some(StructSelect { struct_items }),
// TODO(ruihang): this field is unspecified
maintain_singular_struct: true,
}
}
}
#[cfg(test)]
mod test {
use catalog::local::LocalCatalogManager;
use catalog::{
local::{MemoryCatalogProvider, MemorySchemaProvider},
CatalogList, CatalogProvider, RegisterTableRequest,
};
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use datafusion::logical_plan::DFSchema;
use datatypes::schema::Schema;
use table::{requests::CreateTableRequest, test_util::EmptyTable, test_util::MockTableEngine};
use super::*;
use crate::schema::test::supported_types;
const DEFAULT_TABLE_NAME: &str = "SubstraitTable";
async fn build_mock_catalog_manager() -> CatalogManagerRef {
let mock_table_engine = Arc::new(MockTableEngine::new());
let catalog_manager = Arc::new(
LocalCatalogManager::try_new(mock_table_engine)
.await
.unwrap(),
);
let schema_provider = Arc::new(MemorySchemaProvider::new());
let catalog_provider = Arc::new(MemoryCatalogProvider::new());
catalog_provider
.register_schema(DEFAULT_SCHEMA_NAME.to_string(), schema_provider)
.unwrap();
catalog_manager
.register_catalog(DEFAULT_CATALOG_NAME.to_string(), catalog_provider)
.unwrap();
catalog_manager.init().await.unwrap();
catalog_manager
}
fn build_create_table_request<N: ToString>(table_name: N) -> CreateTableRequest {
CreateTableRequest {
id: 1,
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
table_name: table_name.to_string(),
desc: None,
schema: Arc::new(Schema::new(supported_types())),
region_numbers: vec![0],
primary_key_indices: vec![],
create_if_not_exists: true,
table_options: Default::default(),
}
}
async fn logical_plan_round_trip(plan: LogicalPlan, catalog: CatalogManagerRef) {
let convertor = DFLogicalSubstraitConvertor::new(catalog);
let proto = convertor.encode(plan.clone()).unwrap();
let tripped_plan = convertor.decode(proto).unwrap();
assert_eq!(format!("{:?}", plan), format!("{:?}", tripped_plan));
}
#[tokio::test]
async fn test_table_scan() {
let catalog_manager = build_mock_catalog_manager().await;
let table_ref = Arc::new(EmptyTable::new(build_create_table_request(
DEFAULT_TABLE_NAME,
)));
catalog_manager
.register_table(RegisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: DEFAULT_TABLE_NAME.to_string(),
table_id: 1,
table: table_ref.clone(),
})
.await
.unwrap();
let adapter = Arc::new(DfTableProviderAdapter::new(table_ref));
let projection = vec![1, 3, 5];
let df_schema = adapter.schema().to_dfschema().unwrap();
let projected_fields = projection
.iter()
.map(|index| df_schema.field(*index).clone())
.collect();
let projected_schema =
Arc::new(DFSchema::new_with_metadata(projected_fields, Default::default()).unwrap());
let table_scan_plan = LogicalPlan::TableScan(TableScan {
table_name: DEFAULT_TABLE_NAME.to_string(),
source: adapter,
projection: Some(projection),
projected_schema,
filters: vec![],
limit: None,
});
logical_plan_round_trip(table_scan_plan, catalog_manager).await;
}
}

Some files were not shown because too many files have changed in this diff Show More