Compare commits

...

56 Commits

Author SHA1 Message Date
zyy17
24b880f982 ci: push image to dockerhub
Signed-off-by: zyy17 <zyylsxm@gmail.com>
2022-11-04 13:05:35 +08:00
Lei, Huang
db2b577628 feat: remote catalog (#315)
* chore: refactor dir for local catalog manager

* refactor: CatalogProvider returns Result

* refactor: SchemaProvider returns Result

* feat: add kv operations to remote catalog

* chore: refactor some code

* feat: impl catalog initialization

* feat: add register table and register system table function

* refactor: add table_info method for Table trait

* chore: add some tests

* chore: add register schema test

* chore: fix build issue after rebase onto develop

* refactor: mock to separate file

* build: failed to compile

* fix: use a container struct to bridge KvBackend and Accessor trait

* feat: upgrade opendal to 0.17

* test: add more tests

* chore: add catalog name and schema name to table info

* chore: add catalog name and schema name to table info

* chore: rebase onto develop

* refactor: common-catalog crate

* chore: refactor dir for local catalog manager

* refactor: CatalogProvider returns Result

* refactor: SchemaProvider returns Result

* feat: add kv operations to remote catalog

* chore: refactor some code

* feat: impl catalog initialization

* feat: add register table and register system table function

* refactor: add table_info method for Table trait

* chore: add some tests

* chore: add register schema test

* chore: fix build issue after rebase onto develop

* refactor: mock to separate file

* build: failed to compile

* fix: use a container struct to bridge KvBackend and Accessor trait

* feat: upgrade opendal to 0.17

* test: add more tests

* chore: add catalog name and schema name to table info

* chore: add catalog name and schema name to table info

* chore: rebase onto develop

* refactor: common-catalog crate

* refactor: remove remote catalog related files

* fix: compilation

* feat: add table version to TableKey

* feat: add node id to TableValue

* fix: some CR comments

* chore: change async fn create_expr_to_request to sync

* fix: add backtrace to errors

* fix: code style

* refactor: merge refactor/catalog-crate

* feat: table key with version

* feat: impl KvBackend for MetaClient

* fix: integrate metaclient

* fix: catalog use local table info as baseline

* fix: sync metsrv

* fix: wip

* fix: update remote catalog on register and deregister

* refactor: CatalogProvider

* refactor: CatalogManager

* fix: catalog key filtering

* fix: pass some test

* refactor: catalog iterating

* fix: CatalogManager::table also requires both catalog_name and schema_name

* chore: merge develop

* chore: merge catalog crate

* fix: adapt to recent meta-client api change

* feat: databode lease

* feat: remote catalog (#356)

* chore: refactor dir for local catalog manager

* refactor: CatalogProvider returns Result

* refactor: SchemaProvider returns Result

* feat: add kv operations to remote catalog

* chore: refactor some code

* feat: impl catalog initialization

* feat: add register table and register system table function

* refactor: add table_info method for Table trait

* chore: add some tests

* chore: add register schema test

* chore: fix build issue after rebase onto develop

* refactor: mock to separate file

* build: failed to compile

* fix: use a container struct to bridge KvBackend and Accessor trait

* feat: upgrade opendal to 0.17

* test: add more tests

* chore: add catalog name and schema name to table info

* chore: add catalog name and schema name to table info

* chore: rebase onto develop

* refactor: common-catalog crate

* chore: refactor dir for local catalog manager

* refactor: CatalogProvider returns Result

* refactor: SchemaProvider returns Result

* feat: add kv operations to remote catalog

* chore: refactor some code

* feat: impl catalog initialization

* feat: add register table and register system table function

* refactor: add table_info method for Table trait

* chore: add some tests

* chore: add register schema test

* chore: fix build issue after rebase onto develop

* refactor: mock to separate file

* build: failed to compile

* fix: use a container struct to bridge KvBackend and Accessor trait

* feat: upgrade opendal to 0.17

* test: add more tests

* chore: add catalog name and schema name to table info

* chore: add catalog name and schema name to table info

* chore: rebase onto develop

* refactor: common-catalog crate

* refactor: remove remote catalog related files

* fix: compilation

* feat: add table version to TableKey

* feat: add node id to TableValue

* fix: some CR comments

* chore: change async fn create_expr_to_request to sync

* fix: add backtrace to errors

* fix: code style

* refactor: merge refactor/catalog-crate

* feat: table key with version

* feat: impl KvBackend for MetaClient

* fix: integrate metaclient

* fix: catalog use local table info as baseline

* fix: sync metsrv

* fix: wip

* fix: update remote catalog on register and deregister

* refactor: CatalogProvider

* refactor: CatalogManager

* fix: catalog key filtering

* fix: pass some test

* refactor: catalog iterating

* fix: CatalogManager::table also requires both catalog_name and schema_name

* chore: merge develop

* chore: merge catalog crate

* fix: adapt to recent meta-client api change

* feat: datanode heartbeat (#355)

* feat: add heartbeat task to instance

* feat: add node_id datanode opts

* fix: use real node id in heartbeat and meta client

* feat: distribute table in frontend

* test: distribute read demo

* test: distribute read demo

* test: distribute read demo

* add write spliter

* fix: node id changed to u64

* feat: datanode uses remote catalog implementation

* dist insert integrate table

* feat: specify region ids on creating table (#359)

* fix: compiling issues

* feat: datanode lease (#354)

* Some glue code about dist_insert

* fix: correctly wrap string value with quotes

* feat: create route

* feat: frontend catalog (#362)

* feat: integrate catalog to frontend

* feat: preserve partition rule on create

* fix: print tables on start

* chore: log in create route

* test: distribute read demo

* feat: support metasrv addr command line options

* feat: optimize DataNodeInstance creation (#368)

* chore: remove unnecessary changes

* chore: revert changes to src/api

* chore: revert changes to src/datanode/src/server.rs

* chore: remove opendal backend

* chore: optimize imports

* chore: revert changes to instance and region ids

* refactor: MetaKvBackend range

* fix: remove some wrap

* refactor: initiation of catalog

* fix: next range request start key

* fix: mock delete range

* refactor: simplify range response handling

Co-authored-by: jiachun <jiachun_fjc@163.com>
Co-authored-by: luofucong <luofucong@greptime.com>
Co-authored-by: fys <1113014250@qq.com>
Co-authored-by: Jiachun Feng <jiachun_feng@proton.me>
2022-11-04 11:43:31 +08:00
Yingwen
cba611b9f5 refactor: Serialize RawSchema/RawTableMeta/RawTableInfo (#382)
* refactor: Serialize Schema/TableMeta/TableInfo to raw structs

* test: Add tests for raw struct conversion

* style: Fix clippy

* refactor: SchemaBuilder::timestamp_index takes Option<usize>

So caller could chain the timestamp_index method call where there is no
timestamp index.

* style(datatypes): Chains SchemaBuilder method calls
2022-11-04 11:25:17 +08:00
zyy17
6aec1b4f90 ci: add workflow of artifacts release (#389)
Signed-off-by: zyy17 <zyylsxm@gmail.com>

Signed-off-by: zyy17 <zyylsxm@gmail.com>
2022-11-04 10:55:41 +08:00
Ruihang Xia
6d1dd5e7af fix: also run CI in develop branch (#387)
* fix: also run CI in develop branch

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add develop branch

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-11-03 18:35:30 +08:00
Jiachun Feng
e19b63f4f5 chore: meta mock test (#379)
* chore: meta mock

* chore: refacor datanode selector

* chore: create route mock test

* chore: add mock module

* chore: memory store for test

* chore: mock meta for test

* chore: ensure memorysotre has the same behavious with etcd

* chore: replace tokio lock to parking_lot
2022-11-03 18:33:29 +08:00
shuiyisong
750310c648 feat: frontend start with instance param (#385)
* chore: fix conflict

* chore: remove unused import
2022-11-03 18:05:01 +08:00
Ruihang Xia
9fd2d4e8db fix: detach grpc tasks to another runtime (#376)
* fix: detach grpc tasks to another runtime

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add runtime size options

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* group an obj-req into one task

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* make nitpicking CRer happy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-11-03 17:24:15 +08:00
元波
77233c20e1 fix: remove unnecessary protocol (#386) 2022-11-03 17:14:08 +08:00
fys
1fad67cf4d feat: grpc client support multi peers (#380)
* feat: grpc client use channel manager

* cr
2022-11-03 11:55:22 +08:00
LFC
5abff7a536 feat: range columns partitioning rule (#374)
* feat: parse partition syntax in "create table"

* feat: partition rule

* fix: rebase develop

* feat: range partitioning rule

* fix: resolve PR comments

* feat: range columns partitioning rule

* refactor: remove unused codes

* fix: resolve PR comments

* fix: resolve PR comments

Co-authored-by: luofucong <luofucong@greptime.com>
2022-11-02 22:36:32 +08:00
Yingwen
6f1f697bfc feat: Implements shutdown for GrpcServer and HttpServer (#372)
* fix: Fix TestGuard being dropped before grpc test starts

* feat: Let start and shutdown takes immutable reference to self

Also implement shutdown for GrpcServer

* feat: Implement shutdown for HttpServer

* style: Fix clippy

* chore: Add name to AlreadyStarted error
2022-11-02 18:10:41 +08:00
Jiachun Feng
2d4a44414d feat: refactor for test (#375)
* chore: add set_header macro & remove some unnessary code

* chore: channel_mannager with connector
2022-11-01 17:34:54 +08:00
LFC
ea2ebc0e87 feat: range partition rule (#304)
* feat: range partitioning rule

Co-authored-by: luofucong <luofucong@greptime.com>
2022-11-01 16:09:23 +08:00
Jiachun Feng
dacfd12b8f feat: router impl (#363)
* feat: heartbeat lease & route api

* feat: batchput&cas

* chore: demo&ut

* chore: by cr

* chore: datanode selector

* chore: rename with_key_range to with_range

* chore: ut
2022-11-01 11:45:05 +08:00
Ning Sun
518b665f1e feat: Improve http sql api and attempt to add openapi docs (#361)
This patch changes output for our http SQL API and prepare it for our SQL editor development. Changes includes:

- includes aide for OAS 3.1 openapi documents, available at /v1/private/api.json
- simplified some of http handlers return type, use string or json directly
- created new HttpRecordsOutput type to hide internals of RecordBatch from end-user. It also tuned data structure to be friendly for application to consume
-  updated response struct to use code for success or detailed error code

Residual issue #366 

* feat: allow http post for our sql http api

* feat: update our http api and attempt to add openapi spec support

* test: correct test against new handler apis

* refactor: rename rows to records

* refactor: removed HttpResponse completely

* feat: add information to our openapi docs

* feat: add docs for sql interface response

* refactor: use struct to represent query so we can doc it via aide

* refactor: use arc wrapped api

* feat: add redoc UI support

* Update src/servers/src/http.rs

Co-authored-by: LFC <bayinamine@gmail.com>

* Update src/servers/src/http.rs

Co-authored-by: LFC <bayinamine@gmail.com>

* fix: address review comments

* test: update integration tests for new api output

* refactor: make prometheus http apis compatible with recent changes

* refactor: get schema from stream

* test: add test for recordbatch to json serialization

* test: add todo for a test to be fixed later

* Revert "test: add todo for a test to be fixed later"

This reverts commit a5a50c7afb.

* fix: Revert "refactor: get schema from stream"

This reverts commit 945b685556.

* chore: add todo for pending issue #366

* chore: remove fixed server url in openapi docs

* feat: include error_code in json response

* refactor: use code over success field in json response

Co-authored-by: LFC <bayinamine@gmail.com>
2022-10-31 16:20:03 +08:00
Ruihang Xia
e2c28fe374 feat: support data type and schema conversion (#351)
* feat: type and schema transformer

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* test schema codec

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* support projection and schema

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy warning

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* project schema

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix typos

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix compile

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* Update src/common/substrait/src/df_logical.rs

Co-authored-by: Yingwen <realevenyag@gmail.com>

* more document about type variations

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
2022-10-31 15:16:13 +08:00
Yingwen
f4e22282a4 feat: Region supports reading data with different schema (#342)
* feat(storage): Implement skeleton of ReadResolver

ReadResolver is used to resolve difference between schemas

* feat(storage): Add user_column_end to ReadResover

* feat(storage): Implement Batch::batch_from_parts

Used to construct Batch from parts according to the schema that user
expects to read.

* feat(storage): Compat memtable schema

* feat(storage): Compat parquet file schema

* fix(storage): ReadResolver supports projection under same schema version

Now ReadResolver takes ProjectedSchemaRef as dest schema, and checks
whether a value column is needed by the schema after projection.

* feat(storage): Check whether columns are same columns

is_source_column_readable() takes ColumnMetadata instead of
ColumnSchema, and compares their column id to check whether they are
same columns.

* refactor(storage): Use row_key_end/user_column_end in source_schema

Rename ReadResolver::is_needed to ReadResolver::is_source_needed, and
remove row_key_end/user_column_end from ReadResolver, since they should
be same as source_schema's

* chore(storage): Remove unused codes

* test(storage): Add tests for the resolver

* feat(storage): Returns error on different source and dest column names

* style(storage): Fix clippy

* refactor: Rename ReadResolver to ReadAdapter

* chore(table): Removed unused comment

* refactor: rename to is_source_column_compatible
2022-10-31 11:42:07 +08:00
dennis zhuang
0604eb7509 feat: prometheus remote write and read (#346)
* feat: scaffold for prometheus protocol handler

* feat: impl remote write and read for prometheus

* chore: make label matchers working in remote reading

* chore: case senstive regexp matching for labers and tweak restful api

* test: prometheus test

* test: adds test for prometheus handler and http server

* fix: typo in comment

* refactor: move snappy_compress and snappy_decompress

* fix: by code review

* fix: collect_timeseries_ids

* fix: timestamp and value column's value may be null
2022-10-28 18:47:16 +08:00
Lei, Huang
81716d622e feat: timestamp column support i64 (#325)
* feat: align_bucket support i64 and timestamp values

* feat: add Int64 to timestamp

* feat: support query i64 timestamp vector

* test: fix failling tests

* refactor: simplify some code

* fix: CR comments and add insert and query test for i64 timestamp column
2022-10-28 18:39:11 +08:00
Ruihang Xia
3e8d9b421c chore: set CI timeout (#358)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-10-28 11:01:12 +08:00
fys
6d4c0ad5a3 feat: add writespliter (#345)
* Add writespliter

* Partition_rule use reference, not Arc
2022-10-27 10:57:34 +08:00
Jiachun Feng
00966cad69 feat: meta refactor (#339)
* feat: heartbeat handler

* chore: heartbeat handlers lock refactor

* chore: store rpc req/res wrapper

* chore: router rpc/res wrapper

* chore: const method(request_header)

* chore: rm unnessary const fn & refactor HeartbeatHandler

* chore: refactor CreateRequest

* chore: HeartbeatAccumulator

* chore: improve router req/res convert

* fix: register race condition
2022-10-26 11:26:40 +08:00
Lei, Huang
932b30d299 refactor: catalog crate (#331)
* chore: refactor dir for local catalog manager

* refactor: CatalogProvider returns Result

* refactor: SchemaProvider returns Result

* feat: add kv operations to remote catalog

* chore: refactor some code

* feat: impl catalog initialization

* feat: add register table and register system table function

* refactor: add table_info method for Table trait

* chore: add some tests

* chore: add register schema test

* chore: fix build issue after rebase onto develop

* refactor: mock to separate file

* build: failed to compile

* fix: use a container struct to bridge KvBackend and Accessor trait

* feat: upgrade opendal to 0.17

* test: add more tests

* chore: add catalog name and schema name to table info

* chore: add catalog name and schema name to table info

* chore: rebase onto develop

* refactor: common-catalog crate

* refactor: remove remote catalog related files

* fix: compilation

* feat: add table version to TableKey

* feat: add node id to TableValue

* fix: some CR comments

* chore: change async fn create_expr_to_request to sync

* fix: add backtrace to errors

* fix: code style

* fix: CatalogManager::table also requires both catalog_name and schema_name

* chore: merge develop
2022-10-26 10:50:39 +08:00
Ruihang Xia
7fe39e9187 feat: support quering with logical plan in gRPC layer (#344)
* impl logical exec & example

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* test on upper api

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add todo to prost dep

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* sign the TODO

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-10-25 16:05:53 +08:00
LFC
2ca667cbdf refactor: make table scan return physical plan (#326)
* refactor: return PhysicalPlan in Table trait's scan method, to support partitioned execution in Frontend's distribute read

* refactor: pub use necessary DataFusion types

* refactor: replace old "PhysicalPlan" and its adapters

Co-authored-by: luofucong <luofucong@greptime.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
2022-10-25 11:34:53 +08:00
Yingwen
64dac51e83 feat: Holds ColumnMetadata in StoreSchema (#333)
* chore: Update StoreSchema comment

* feat: Add metadata to ColumnSchema

* feat: Impl conversion between ColumnMetadata and ColumnSchema

We could use this feature to store the ColumnMetadata as arrow's
Schema, since the ColumnSchema could be further converted to an arrow
schema. Then we could use ColumnMetadata in StoreSchema, which contains
more information, especially the column id.

* feat(storage): Merge schema::Error to metadata::Error

To avoid cyclic dependency of two Errors

* feat(storage): Store ColumnMetadata in StoreSchema

* feat(storage): Use StoreSchemaRef to avoid cloning the whole StoreSchema struct

* test(storage): Fix test_store_schema

* feat(datatypes): Return error on duplicate meta key

* chore: Address CR comments
2022-10-25 11:06:22 +08:00
xiaomin tang
edad6f89b5 docs: Add code_of_conduct adapted from the Contributor Covenant (#340) 2022-10-24 19:04:55 +08:00
Ruihang Xia
8ab43b65ea feat: serialize/deserialize logical and execution plan via substrait (#317)
* fix: change Utf8Array indice type

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* refactor: remove unused sub-crate

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* feat: impl for both Logical and Execution plan

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* refactor: move test-util subcrate into table

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* test: table scan logical plan round trip

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* drop support of physical plan

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix warnings

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* rename trait fns to encode/decode

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* address review comments

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-10-24 15:29:33 +08:00
Lei, Huang
6fc45e31e0 fix: put type rewrite optimizer rule at first (#337) 2022-10-24 15:05:59 +08:00
Yingwen
a457c49d99 refactor: Remove column_null_mask in MutationExtra (#314)
* refactor: Remove column_null_mask in MutationExtra

MutationExtra::column_null_mask is no longer needed as we could ensure
there is no missing column in WriteBatch.

* feat(storage): Remove MutationExtra

Just stores MutationType in the WalHeader, no longer needs MutationExtra
2022-10-24 14:53:35 +08:00
Jiachun Feng
b650656ae3 chore: refactor meta protocol (#332)
* chore: refactor channel_config

* chore: refactor grpc protocol

* feat: heartbeat streams
2022-10-21 20:30:57 +08:00
Ruihang Xia
bc9a2df9bf refactor: move test-util subcrate into table (#334)
* refactor: move test-util subcrate into table

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* chore: clean comment

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* move MockTableEngine into test-util

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-10-21 14:39:40 +08:00
LFC
6b0c5281d4 feat: try from DataFusion's ScalarValue for our Value (#329)
* feat: try from DataFusion's ScalarValue for our Value

* Update src/datatypes/src/value.rs

Co-authored-by: Lei, Huang <6406592+v0y4g3r@users.noreply.github.com>

* fix: resolve CR comments

Co-authored-by: luofucong <luofucong@greptime.com>
Co-authored-by: Lei, Huang <6406592+v0y4g3r@users.noreply.github.com>
2022-10-20 20:22:40 +08:00
fys
fad8f442ef feat: modify proto for distribute insert (#327) 2022-10-20 12:41:15 +08:00
Lei, Huang
2d52f19662 feat: add table info (#323)
* refactor: add table_info method for Table trait

* feat: add table_info method to Table trait

* test: add more unit test

* fix: impl table_info for SystemTable

* test: fix failing test
2022-10-20 12:23:44 +08:00
LFC
d5800d0b60 feat: parse partition syntax in "create table" (#298)
* feat: parse partition syntax in "create table"

* Update src/sql/src/parsers/create_parser.rs

Co-authored-by: luofucong <luofucong@greptime.com>
Co-authored-by: Lei, Huang <6406592+v0y4g3r@users.noreply.github.com>
2022-10-20 10:43:15 +08:00
Ruihang Xia
fbea07ea83 chore: remove unused dependencies (#319)
* chore: remove unused dependences

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: recover some dev-deps

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-10-19 14:08:54 +08:00
Yingwen
87130adf54 docs: Move contributing parts from README to CONTRIBUTING.md (#321) 2022-10-19 14:00:31 +08:00
Yingwen
c147657275 ci: Use docs instead of doc (#322) 2022-10-19 11:56:49 +08:00
Jiachun Feng
d5b34f8917 feat: metasrv (#300)
* meta: meta api&client

* meta: heartbeat server init

* feat: kv store

* chore: grpc server

* chore: meta server bootstrap

* feat: heartbeat client

* feat: route for create table

* chore: a channel pool manager

* feat: route client

* feat: store client

* chore: meta_client example

* chore: change schema

* chore: unit test & by cr

* chore: refactor meta client

* chore: add unit test
2022-10-19 11:02:58 +08:00
Yingwen
4d08ee6fbb fix: Fix broken wal and memtable benchmarks (#320) 2022-10-19 10:54:01 +08:00
dennis zhuang
94b263c261 refactor: datanode instance (#316)
* refactor: datanode Instance

* fix: resolve todo
2022-10-19 10:51:45 +08:00
Yingwen
c6d91edb83 refactor(storage): Split schema mod into multiple sub-mods (#318) 2022-10-18 18:56:52 +08:00
Yingwen
cdf3280fcf feat: Region supports write requests with old schema (#297)
* feat: Adds ColumnDefaultConstraint::create_default_vector

ColumnDefaultConstraint::create_default_vector is ported from
MitoTable::try_get_column_default_constraint_vector.

* refactor: Replace try_get_column_default_constraint_vector by create_default_vector

* style: Remove unnecessary map_err in MitoTable::insert

* feat: Adds compat_write

For column in `dest_schema` but not in `write_batch`, this method would insert a
vector with default value to the `write_batch`. If there are columns not in
`dest_schema`, an error would be returned.

* chore: Add info log to RegionInner::alter

* feat(storage): RegionImpl::write support request with old version

* feat: Add nullable check when creating default value

* feat: Validate nullable and default value

* chore: Modify PutOperation comments

* chore: Make ColumnDescriptor::is_nullable readonly and validate name

* feat: Use CompatWrite trait to replace campat::compat_write method

Adds a CompactWrite trait to support padding columns to WriteBatch:
- The WriteBatch and PutData implements this trait
- Fix the issue that WriteBatch::schema is not updated to the
  schema after compat
- Also validate the created column when adding to PutData

The WriteBatch would also pad default value to missing columns in
PutData, so the memtable inserter don't need to manually check whether
the column is nullable and then insert a NullVector. All WriteBatch is
ensured to have all columns defined by the schema in its PutData.

* feat: Validate constraint by ColumnDefaultConstraint::validate()

The ColumnDefaultConstraint::validate() would also ensure the default
value has the same data type as the column's.

* feat: Use NullVector for null columns

* fix: Fix BinaryType returns wrong logical_type_id

* fix: Fix tests and revert NullVector for null columns

NullVector doesn't support custom logical type make it hard to
encode/decode, which also cause the arrow/protobuf codec of write batch
fail.

* fix: create_default_vector use replicate to create vector with default value

This would fix the test_codec_with_none_column_protobuf test, as we need
to downcast the vector to construct the protobuf values.

* test: add tests for column default constraints

* test: Add tests for CompatWrite trait impl

* test: Test write region with old schema

* fix(storage): Fix replay() applies metadata too early

The committed sequence of the RegionChange action is the sequence of the
last entry that use the old metadata (schema). During replay, we should
apply the new metadata after we see an entry that has sequence greater
than (not equals to) the `RegionChange::committed_sequence`

Also remove duplicate `set_committed_sequence()` call in
persist_manifest_version()

* chore: Removes some unreachable codes

Also add more comments to document codes in these files

* refactor: Refactor MitoTable::insert

Return error if we could not create a default vector for given column,
instead of ignoring the error

* chore: Fix incorrect comments

* chore: Fix typo in error message
2022-10-18 10:47:24 +08:00
Ning Sun
f243649971 refactor: Removed openssl from build requirement (#308)
* refactor:replace another axum-test-helper branch

* refactor: upgrade opendal  version

* refactor: use cursor for file buffer

* refactor:remove native-tls in mysql_async

* refactor: use async block and pipeline for newer opendal api

* chore: update Cargo.lock

* chore: update dependencies

* docs: removed openssl from build requirement

* fix: call close on pipe writer to flush reader for parquet streamer

* refactor: remove redundant return

* chore: use pinned revision for our forked mysql_async

* style: avoid wild-card import in test code

* Apply suggestions from code review

Co-authored-by: Yingwen <realevenyag@gmail.com>

* style: use chained call for builder

Co-authored-by: liangxingjian <965662709@qq.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
2022-10-17 19:29:17 +08:00
evenyag
69ba4581b7 test(servers): Fix OpenTSDB shutdown test occasionally fails (#311)
* test(servers): OpenTSDB shutdown test cover error branch

Create connection continuously to cover some branches of error handling
in OpentsdbServer

* test(servers): Add more tests for opentsdb server

Add a test to ensure we could not connect the server after shutdown and
a test to check existing connection usage after shutdown
2022-10-17 14:00:44 +08:00
evenyag
f942b53ed0 style(table-engine): Remove unnecessary TableError::from (#312)
The usage of TableError::from could be replaced by `?`, which is more
concise
2022-10-17 11:49:21 +08:00
dennis zhuang
25a16875b6 feat: create table and add new columns automatically in gRPC (#310)
* fix: readme

* feat: change Column's datatype in protobuf from optional to required

* feat: supports creating table and adding new columns automatically in gRPC, #279, #283

* fix: test

* refactor: execute_grpc_insert

* refactor: clean code and add test

* fix: test after rebasing develop branch

* test: test grpc server with different ports

* fix: typo

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

* fix: typo

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

* chore: minor changes

* chore: build_alter_table_request

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
2022-10-17 10:34:52 +08:00
dennis zhuang
494a93c4f2 feat: manifest improvements (#303)
* feat: adds commited_sequence to RegionChange action, #281

* refactor: saving protocol action when writer version is changed

* feat: recover all region medata in manifest and replay them when replaying WAL, #282

* refactor: minor change and test recovering metadata after altering table schema

* fix: write wrong min_reader_version into manifest for region

* refactor: move up DataRow

* refactor: by CR comments

* test: assert recovered metadata

* refactor: by CR comments

* fix: comment
2022-10-13 15:43:35 +08:00
Ruihang Xia
b61d5989b7 fix: flaky parquet predicate suits (#307)
* fix: flaky parquet predicate suits

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: change ParquetWriter::write_rows as well

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-10-13 14:00:42 +08:00
evenyag
a8a6426abf fix: Fix replicate_primitive doesn't consider null values (#306) 2022-10-12 16:52:09 +08:00
Ruihang Xia
e99668092c refactor: relax memory ordering of accessing VersionControl::submmitted_sequence (#305)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-10-12 11:52:43 +08:00
Ruihang Xia
0c829a9712 chore: ignore vscode config directory in git (#299)
* chore: ignore vscode config directory in git

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: correct gitignore file

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-10-10 15:08:26 +08:00
fys
752be8dc41 feat: batch grpc insert for influxdb write (#295) 2022-10-09 10:49:27 +08:00
evenyag
2e1ab050a7 feat: Implements RegionWriter::alter (#292)
* fix(storage): Failure of writing manifest version won't abort applying edit

* feat(storage): Adds RegionMetadata::validate_alter to validate AlterRequest

* fix(storage): Protect write and apply region edit by version mutex

The region meta action needs previous manifest version, so we need to
use the version mutex to avoid other thread update the manifest version
during writing the action to the manifest.

* feat(storage): Implement RegionWriter::alter

RegionWriter::alter() would
1. acquire write lock first
2. then validate the alter request
3. build the new metadata by RegionMetadata::alter()
4. acquire the version lock
5. write the metadata to the manifest, which also bump the manifest
   version
6. freeze mutable memtables and apply the new metadata to Version
7. write the manifest version to wal

* test(storage): Add tests for Region::alter()

* test(storage): Add tests for RegionMetadata::validate_alter

* chore(storage): Modify InvalidAlterRequest error msg

* chore: Adjust comment
2022-10-08 20:41:04 +08:00
302 changed files with 22897 additions and 4769 deletions

2
.cargo/config.toml Normal file
View File

@@ -0,0 +1,2 @@
[target.aarch64-unknown-linux-gnu]
linker = "aarch64-linux-gnu-gcc"

View File

@@ -4,7 +4,7 @@
"color": "B60205"
},
"CHECKS": {
"regexp": "^(feat|fix|test|refactor|chore|style|doc|perf|build|ci|revert)(\\(.*\\))?:.*",
"regexp": "^(feat|fix|test|refactor|chore|style|docs|perf|build|ci|revert)(\\(.*\\))?:.*",
"ignoreLabels" : ["ignore-title"]
}
}

View File

@@ -15,6 +15,7 @@ jobs:
grcov:
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- uses: actions/checkout@v2
- uses: arduino/setup-protoc@v1

View File

@@ -1,6 +1,18 @@
on:
pull_request:
types: [opened, synchronize, reopened, ready_for_review]
push:
branches:
- develop
- main
paths-ignore:
- 'docs/**'
- 'config/**'
- '.github/**'
- '**.md'
- '**.yml'
- '.dockerignore'
- 'docker/**'
name: Continuous integration for developing
@@ -12,6 +24,7 @@ jobs:
name: Check
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- uses: actions/checkout@v2
- uses: arduino/setup-protoc@v1
@@ -31,6 +44,7 @@ jobs:
name: Test Suite
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- uses: actions/checkout@v2
- uses: arduino/setup-protoc@v1
@@ -56,6 +70,7 @@ jobs:
name: Rustfmt
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- uses: actions/checkout@v2
- uses: arduino/setup-protoc@v1
@@ -76,6 +91,7 @@ jobs:
name: Clippy
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- uses: actions/checkout@v2
- uses: arduino/setup-protoc@v1

View File

@@ -11,6 +11,7 @@ on:
jobs:
check:
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- uses: thehanimo/pr-title-checker@v1.3.4
with:

179
.github/workflows/release.yml vendored Normal file
View File

@@ -0,0 +1,179 @@
on:
push:
tags:
- "v*.*.*"
name: Release
env:
RUST_TOOLCHAIN: nightly-2022-07-14
jobs:
build:
name: Build binary
strategy:
matrix:
# The file format is greptime-<tag>.<os>-<arch>
include:
- arch: x86_64-unknown-linux-gnu
os: ubuntu-latest
file: greptime-${{ github.ref_name }}.linux-amd64
- arch: aarch64-unknown-linux-gnu
os: ubuntu-latest
file: greptime-${{ github.ref_name }}.linux-arm64
- arch: aarch64-apple-darwin
os: macos-latest
file: greptime-${{ github.ref_name }}.darwin-arm64
- arch: x86_64-apple-darwin
os: macos-latest
file: greptime-${{ github.ref_name }}.darwin-amd64
runs-on: ${{ matrix.os }}
steps:
- name: Checkout sources
uses: actions/checkout@v3
- name: Cache cargo assets
uses: actions/cache@v3
with:
path: |
~/.cargo/bin/
~/.cargo/registry/index/
~/.cargo/registry/cache/
~/.cargo/git/db/
target/
key: ${{ matrix.arch }}-build-cargo-${{ hashFiles('**/Cargo.lock') }}
- name: Install Protoc for linux
if: contains(matrix.arch, 'linux') && endsWith(matrix.arch, '-gnu')
run: | # Make sure the protoc is >= 3.15
wget https://github.com/protocolbuffers/protobuf/releases/download/v21.9/protoc-21.9-linux-x86_64.zip
unzip protoc-21.9-linux-x86_64.zip -d protoc
sudo cp protoc/bin/protoc /usr/local/bin/
sudo cp -r protoc/include/google /usr/local/include/
- name: Install Protoc for macos
if: contains(matrix.arch, 'darwin')
run: |
brew install protobuf
- name: Install dependencies for linux
if: contains(matrix.arch, 'linux') && endsWith(matrix.arch, '-gnu')
run: |
sudo apt-get -y update
sudo apt-get -y install libssl-dev pkg-config g++-aarch64-linux-gnu gcc-aarch64-linux-gnu
- name: Install stable toolchain
uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: ${{ env.RUST_TOOLCHAIN }}
override: true
target: ${{ matrix.arch }}
- name: Output package versions
run: protoc --version ; cargo version ; rustc --version ; gcc --version ; g++ --version
- name: Run cargo build
uses: actions-rs/cargo@v1
with:
command: build
args: ${{ matrix.opts }} --release --locked --target ${{ matrix.arch }}
- name: Calculate checksum and rename binary
shell: bash
run: |
cd target/${{ matrix.arch }}/release
cp greptime ${{ matrix.file }}
echo $(shasum -a 256 greptime | cut -f1 -d' ') > ${{ matrix.file }}.sha256sum
- name: Upload artifacts
uses: actions/upload-artifact@v3
with:
name: ${{ matrix.file }}
path: target/${{ matrix.arch }}/release/${{ matrix.file }}
- name: Upload checksum of artifacts
uses: actions/upload-artifact@v3
with:
name: ${{ matrix.file }}.sha256sum
path: target/${{ matrix.arch }}/release/${{ matrix.file }}.sha256sum
release:
name: Release artifacts
needs: [build]
runs-on: ubuntu-latest
steps:
- name: Checkout sources
uses: actions/checkout@v3
- name: Download artifacts
uses: actions/download-artifact@v3
- name: Publish release
uses: softprops/action-gh-release@v1
with:
name: "Release ${{ github.ref_name }}"
files: |
**/greptime-${{ github.ref_name }}.*
docker:
name: Build docker image
needs: [build]
runs-on: ubuntu-latest
steps:
- name: Checkout sources
uses: actions/checkout@v2
- name: Download amd64 binary
uses: actions/download-artifact@v3
with:
name: greptime-${{ github.ref_name }}.linux-amd64
path: amd64
- name: Rename amd64 binary
run: |
mv amd64/greptime-${{ github.ref_name }}.linux-amd64 amd64/greptime
- name: Download arm64 binary
uses: actions/download-artifact@v3
with:
name: greptime-${{ github.ref_name }}.linux-arm64
path: arm64
- name: Rename arm64 binary
run: |
mv arm64/greptime-${{ github.ref_name }}.linux-arm64 arm64/greptime
- name: Set file permissions
shell: bash
run: |
chmod +x amd64/greptime arm64/greptime
- name: Login to GitHub Container Registry
uses: docker/login-action@v2
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Login to Dockerhub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
- name: Set up buildx
uses: docker/setup-buildx-action@v2
- name: Build and push
uses: docker/build-push-action@v3
with:
context: .
file: ./docker/ci/Dockerfile
push: true
platforms: linux/amd64,linux/arm64
tags: |
ghcr.io/greptimeteam/greptimedb:${{ github.ref_name }}
greptime/greptimedb:${{ github.ref_name }}

6
.gitignore vendored
View File

@@ -19,12 +19,12 @@ debug/
# JetBrains IDE config directory
.idea/
# VSCode IDE config directory
.vscode/
# Logs
**/__unittest_logs
logs/
.DS_store
.gitignore
# cpython's generated python byte code
**/__pycache__/

132
CODE_OF_CONDUCT.md Normal file
View File

@@ -0,0 +1,132 @@
# Contributor Covenant Code of Conduct
## Our Pledge
We as members, contributors, and leaders pledge to make participation in our
community a harassment-free experience for everyone, regardless of age, body
size, visible or invisible disability, ethnicity, sex characteristics, gender
identity and expression, level of experience, education, socio-economic status,
nationality, personal appearance, race, caste, color, religion, or sexual
identity and orientation.
We pledge to act and interact in ways that contribute to an open, welcoming,
diverse, inclusive, and healthy community.
## Our Standards
Examples of behavior that contributes to a positive environment for our
community include:
* Demonstrating empathy and kindness toward other people
* Being respectful of differing opinions, viewpoints, and experiences
* Giving and gracefully accepting constructive feedback
* Accepting responsibility and apologizing to those affected by our mistakes,
and learning from the experience
* Focusing on what is best not just for us as individuals, but for the overall
community
Examples of unacceptable behavior include:
* The use of sexualized language or imagery, and sexual attention or advances of
any kind
* Trolling, insulting or derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or email address,
without their explicit permission
* Other conduct which could reasonably be considered inappropriate in a
professional setting
## Enforcement Responsibilities
Community leaders are responsible for clarifying and enforcing our standards of
acceptable behavior and will take appropriate and fair corrective action in
response to any behavior that they deem inappropriate, threatening, offensive,
or harmful.
Community leaders have the right and responsibility to remove, edit, or reject
comments, commits, code, wiki edits, issues, and other contributions that are
not aligned to this Code of Conduct, and will communicate reasons for moderation
decisions when appropriate.
## Scope
This Code of Conduct applies within all community spaces, and also applies when
an individual is officially representing the community in public spaces.
Examples of representing our community include using an official e-mail address,
posting via an official social media account, or acting as an appointed
representative at an online or offline event.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported to the community leaders responsible for enforcement at
info@greptime.com.
All complaints will be reviewed and investigated promptly and fairly.
All community leaders are obligated to respect the privacy and security of the
reporter of any incident.
## Enforcement Guidelines
Community leaders will follow these Community Impact Guidelines in determining
the consequences for any action they deem in violation of this Code of Conduct:
### 1. Correction
**Community Impact**: Use of inappropriate language or other behavior deemed
unprofessional or unwelcome in the community.
**Consequence**: A private, written warning from community leaders, providing
clarity around the nature of the violation and an explanation of why the
behavior was inappropriate. A public apology may be requested.
### 2. Warning
**Community Impact**: A violation through a single incident or series of
actions.
**Consequence**: A warning with consequences for continued behavior. No
interaction with the people involved, including unsolicited interaction with
those enforcing the Code of Conduct, for a specified period of time. This
includes avoiding interactions in community spaces as well as external channels
like social media. Violating these terms may lead to a temporary or permanent
ban.
### 3. Temporary Ban
**Community Impact**: A serious violation of community standards, including
sustained inappropriate behavior.
**Consequence**: A temporary ban from any sort of interaction or public
communication with the community for a specified period of time. No public or
private interaction with the people involved, including unsolicited interaction
with those enforcing the Code of Conduct, is allowed during this period.
Violating these terms may lead to a permanent ban.
### 4. Permanent Ban
**Community Impact**: Demonstrating a pattern of violation of community
standards, including sustained inappropriate behavior, harassment of an
individual, or aggression toward or disparagement of classes of individuals.
**Consequence**: A permanent ban from any sort of public interaction within the
community.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
version 2.1, available at
[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
Community Impact Guidelines were inspired by
[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
For answers to common questions about this code of conduct, see the FAQ at
[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
[https://www.contributor-covenant.org/translations][translations].
[homepage]: https://www.contributor-covenant.org
[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
[Mozilla CoC]: https://github.com/mozilla/diversity
[FAQ]: https://www.contributor-covenant.org/faq
[translations]: https://www.contributor-covenant.org/translations

View File

@@ -10,6 +10,34 @@ To learn about the design of GreptimeDB, please refer to the [design docs](https
- Make sure all unit tests are passed.
- Make sure all clippy warnings are fixed (you can check it locally by running `cargo clippy --workspace --all-targets -- -D warnings -D clippy::print_stdout -D clippy::print_stderr`).
#### `pre-commit` Hooks
You could setup the [`pre-commit`](https://pre-commit.com/#plugins) hooks to run these checks on every commit automatically.
1. Install `pre-commit`
```
$ pip install pre-commit
```
or
```
$ brew install pre-commit
```
2. Install the `pre-commit` hooks
```
$ pre-commit install
pre-commit installed at .git/hooks/pre-commit
$ pre-commit install --hook-type commit-msg
pre-commit installed at .git/hooks/commit-msg
$ pre-commit install --hook-type pre-push
pre-commit installed at .git/hooks/pre-pus
```
now `pre-commit` will run automatically on `git commit`.
### Title
The titles of pull requests should be prefixed with category name listed in [Conventional Commits specification](https://www.conventionalcommits.org/en/v1.0.0)
@@ -32,4 +60,4 @@ of what you were trying to do and what went wrong. You can also reach for help i
## Bug report
To report a bug or a security issue, you can [open a new GitHub issue](https://github.com/GrepTimeTeam/greptimedb/issues/new).
To report a bug or a security issue, you can [open a new GitHub issue](https://github.com/GrepTimeTeam/greptimedb/issues/new).

1040
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -5,6 +5,7 @@ members = [
"src/client",
"src/cmd",
"src/common/base",
"src/common/catalog",
"src/common/error",
"src/common/function",
"src/common/function-macro",
@@ -12,13 +13,15 @@ members = [
"src/common/query",
"src/common/recordbatch",
"src/common/runtime",
"src/common/substrait",
"src/common/telemetry",
"src/common/time",
"src/datanode",
"src/datatypes",
"src/frontend",
"src/log-store",
"src/logical-plans",
"src/meta-client",
"src/meta-srv",
"src/object-store",
"src/query",
"src/script",
@@ -28,5 +31,4 @@ members = [
"src/store-api",
"src/table",
"src/table-engine",
"test-util",
]

View File

@@ -11,7 +11,6 @@ GreptimeDB: the next-generation hybrid timeseries/analytics processing database
To compile GreptimeDB from source, you'll need the following:
- Rust
- Protobuf
- OpenSSL
#### Rust
@@ -23,23 +22,6 @@ The easiest way to install Rust is to use [`rustup`](https://rustup.rs/), which
major package manager on macos and linux distributions. You can find an
installation instructions [here](https://grpc.io/docs/protoc-installation/).
#### OpenSSL
For Ubuntu:
```bash
sudo apt install libssl-dev
```
For RedHat-based: Fedora, Oracle Linux, etc:
```bash
sudo dnf install openssl-devel
```
For macOS:
```bash
brew install openssl
```
### Build the Docker Image
```
@@ -125,7 +107,7 @@ cargo run -- --log-dir=logs --log-level=debug frontend start -c ./config/fronten
cpu DOUBLE DEFAULT 0,
memory DOUBLE,
TIME INDEX (ts),
PRIMARY KEY(ts,host)) ENGINE=mito WITH(regions=1);
PRIMARY KEY(host)) ENGINE=mito WITH(regions=1);
```
3. Insert data:
@@ -151,33 +133,6 @@ cargo run -- --log-dir=logs --log-level=debug frontend start -c ./config/fronten
```
You can delete your data by removing `/tmp/greptimedb`.
## Contribute
## Contributing
1. [Install rust](https://www.rust-lang.org/tools/install)
2. [Install `pre-commit`](https://pre-commit.com/#plugins) for run hooks on every commit automatically such as `cargo fmt` etc.
```
$ pip install pre-commit
or
$ brew install pre-commit
$
```
3. Install the git hook scripts:
```
$ pre-commit install
pre-commit installed at .git/hooks/pre-commit
$ pre-commit install --hook-type commit-msg
pre-commit installed at .git/hooks/commit-msg
$ pre-commit install --hook-type pre-push
pre-commit installed at .git/hooks/pre-pus
```
now `pre-commit` will run automatically on `git commit`.
4. Check out branch from `develop` and make your contribution. Follow the [style guide](https://github.com/GreptimeTeam/docs/blob/main/style-guide/zh.md). Create a PR when you are ready, feel free and have fun!
Please refer to [contribution guidelines](CONTRIBUTING.md) for more information.

View File

@@ -1,6 +1,7 @@
http_addr = '0.0.0.0:3000'
rpc_addr = '0.0.0.0:3001'
wal_dir = '/tmp/greptimedb/wal'
rpc_runtime_size = 8
mysql_addr = '0.0.0.0:3306'
mysql_runtime_size = 4

View File

@@ -0,0 +1,4 @@
bind_addr = '127.0.0.1:3002'
server_addr = '0.0.0.0:3002'
store_addr = '127.0.0.1:2380'
datanode_lease_secs = 30

View File

@@ -24,9 +24,8 @@ RUN cargo build --release
# TODO(zyy17): Maybe should use the more secure container image.
FROM ubuntu:22.04 as base
WORKDIR /greptimedb
COPY --from=builder /greptimedb/target/release/greptime /greptimedb/bin/
ENV PATH /greptimedb/bin/:$PATH
WORKDIR /greptime
COPY --from=builder /greptimedb/target/release/greptime /greptime/bin/
ENV PATH /greptime/bin/:$PATH
ENTRYPOINT [ "greptime" ]
CMD [ "datanode", "start"]
ENTRYPOINT ["greptime"]

9
docker/ci/Dockerfile Normal file
View File

@@ -0,0 +1,9 @@
FROM ubuntu:22.04
ARG TARGETARCH
ADD $TARGETARCH/greptime /greptime/bin/
ENV PATH /greptime/bin/:$PATH
ENTRYPOINT ["greptime"]

View File

@@ -6,6 +6,11 @@ fn main() {
"greptime/v1/select.proto",
"greptime/v1/physical_plan.proto",
"greptime/v1/greptime.proto",
"greptime/v1/meta/common.proto",
"greptime/v1/meta/heartbeat.proto",
"greptime/v1/meta/route.proto",
"greptime/v1/meta/store.proto",
"prometheus/remote/remote.proto",
],
&["."],
)

View File

@@ -49,7 +49,7 @@ message Column {
bytes null_mask = 4;
// Helpful in creating vector from column.
optional ColumnDataType datatype = 5;
ColumnDataType datatype = 5;
}
message ColumnDef {

View File

@@ -2,6 +2,10 @@ syntax = "proto3";
package greptime.v1;
message RequestHeader {
string tenant = 1;
}
message ExprHeader {
uint32 version = 1;
}

View File

@@ -27,6 +27,7 @@ message ObjectExpr {
message SelectExpr {
oneof expr {
string sql = 1;
bytes logical_plan = 2;
PhysicalPlan physical_plan = 15;
}
}
@@ -55,6 +56,8 @@ message InsertExpr {
// The "sql" field is meant to be removed in the future.
string sql = 3;
}
map<string, bytes> options = 4;
}
// TODO(jiachun)

View File

@@ -3,6 +3,7 @@ syntax = "proto3";
package greptime.v1;
import "greptime/v1/admin.proto";
import "greptime/v1/common.proto";
import "greptime/v1/database.proto";
service Greptime {
@@ -10,8 +11,9 @@ service Greptime {
}
message BatchRequest {
repeated AdminRequest admins = 1;
repeated DatabaseRequest databases = 2;
RequestHeader header = 1;
repeated AdminRequest admins = 2;
repeated DatabaseRequest databases = 3;
}
message BatchResponse {

View File

@@ -0,0 +1,48 @@
syntax = "proto3";
package greptime.v1.meta;
message RequestHeader {
uint64 protocol_version = 1;
// cluster_id is the ID of the cluster which be sent to.
uint64 cluster_id = 2;
// member_id is the ID of the sender server.
uint64 member_id = 3;
}
message ResponseHeader {
uint64 protocol_version = 1;
// cluster_id is the ID of the cluster which sent the response.
uint64 cluster_id = 2;
Error error = 3;
}
message Error {
int32 code = 1;
string err_msg = 2;
}
message Peer {
uint64 id = 1;
string addr = 2;
}
message TableName {
string catalog_name = 1;
string schema_name = 2;
string table_name = 3;
}
message TimeInterval {
// The unix timestamp in millis of the start of this period.
uint64 start_timestamp_millis = 1;
// The unix timestamp in millis of the end of this period.
uint64 end_timestamp_millis = 2;
}
message KeyValue {
// key is the key in bytes. An empty key is not allowed.
bytes key = 1;
// value is the value held by the key, in bytes.
bytes value = 2;
}

View File

@@ -0,0 +1,92 @@
syntax = "proto3";
package greptime.v1.meta;
import "greptime/v1/meta/common.proto";
service Heartbeat {
// Heartbeat, there may be many contents of the heartbeat, such as:
// 1. Metadata to be registered to meta server and discoverable by other nodes.
// 2. Some performance metrics, such as Load, CPU usage, etc.
// 3. The number of computing tasks being executed.
rpc Heartbeat(stream HeartbeatRequest) returns (stream HeartbeatResponse) {}
// Ask leader's endpoint.
rpc AskLeader(AskLeaderRequest) returns (AskLeaderResponse) {}
}
message HeartbeatRequest {
RequestHeader header = 1;
// Self peer
Peer peer = 2;
// Leader node
bool is_leader = 3;
// Actually reported time interval
TimeInterval report_interval = 4;
// Node stat
NodeStat node_stat = 5;
// Region stats in this node
repeated RegionStat region_stats = 6;
// Follower nodes and stats, empty on follower nodes
repeated ReplicaStat replica_stats = 7;
}
message NodeStat {
// The read capacity units during this period
uint64 rcus = 1;
// The write capacity units during this period
uint64 wcus = 2;
// Table number in this node
uint64 table_num = 3;
// Regon number in this node
uint64 region_num = 4;
double cpu_usage = 5;
double load = 6;
// Read disk I/O in the node
double read_io_rate = 7;
// Write disk I/O in the node
double write_io_rate = 8;
// Others
map<string, string> attrs = 100;
}
message RegionStat {
uint64 region_id = 1;
TableName table_name = 2;
// The read capacity units during this period
uint64 rcus = 3;
// The write capacity units during this period
uint64 wcus = 4;
// Approximate region size
uint64 approximate_size = 5;
// Approximate number of rows
uint64 approximate_rows = 6;
// Others
map<string, string> attrs = 100;
}
message ReplicaStat {
Peer peer = 1;
bool in_sync = 2;
bool is_learner = 3;
}
message HeartbeatResponse {
ResponseHeader header = 1;
repeated bytes payload = 2;
}
message AskLeaderRequest {
RequestHeader header = 1;
}
message AskLeaderResponse {
ResponseHeader header = 1;
Peer leader = 2;
}

View File

@@ -0,0 +1,82 @@
syntax = "proto3";
package greptime.v1.meta;
import "greptime/v1/meta/common.proto";
service Router {
// Fetch routing information for tables. The smallest unit is the complete
// routing information(all regions) of a table.
//
// ```text
// table_1
// table_name
// table_schema
// regions
// region_1
// leader_peer
// follower_peer_1, follower_peer_2
// region_2
// leader_peer
// follower_peer_1, follower_peer_2, follower_peer_3
// region_xxx
// table_2
// ...
// ```
//
rpc Route(RouteRequest) returns (RouteResponse) {}
rpc Create(CreateRequest) returns (RouteResponse) {}
}
message RouteRequest {
RequestHeader header = 1;
repeated TableName table_names = 2;
}
message RouteResponse {
ResponseHeader header = 1;
repeated Peer peers = 2;
repeated TableRoute table_routes = 3;
}
message CreateRequest {
RequestHeader header = 1;
TableName table_name = 2;
repeated Partition partitions = 3;
}
message TableRoute {
Table table = 1;
repeated RegionRoute region_routes = 2;
}
message RegionRoute {
Region region = 1;
// single leader node for write task
uint64 leader_peer_index = 2;
// multiple follower nodes for read task
repeated uint64 follower_peer_indexes = 3;
}
message Table {
TableName table_name = 1;
bytes table_schema = 2;
}
message Region {
uint64 id = 1;
string name = 2;
Partition partition = 3;
map<string, string> attrs = 100;
}
// PARTITION `region_name` VALUES LESS THAN (value_list)
message Partition {
repeated bytes column_list = 1;
repeated bytes value_list = 2;
}

View File

@@ -0,0 +1,138 @@
syntax = "proto3";
package greptime.v1.meta;
import "greptime/v1/meta/common.proto";
service Store {
// Range gets the keys in the range from the key-value store.
rpc Range(RangeRequest) returns (RangeResponse);
// Put puts the given key into the key-value store.
rpc Put(PutRequest) returns (PutResponse);
// BatchPut atomically puts the given keys into the key-value store.
rpc BatchPut(BatchPutRequest) returns (BatchPutResponse);
// CompareAndPut atomically puts the value to the given updated
// value if the current value == the expected value.
rpc CompareAndPut(CompareAndPutRequest) returns (CompareAndPutResponse);
// DeleteRange deletes the given range from the key-value store.
rpc DeleteRange(DeleteRangeRequest) returns (DeleteRangeResponse);
}
message RangeRequest {
RequestHeader header = 1;
// key is the first key for the range, If range_end is not given, the
// request only looks up key.
bytes key = 2;
// range_end is the upper bound on the requested range [key, range_end).
// If range_end is '\0', the range is all keys >= key.
// If range_end is key plus one (e.g., "aa"+1 == "ab", "a\xff"+1 == "b"),
// then the range request gets all keys prefixed with key.
// If both key and range_end are '\0', then the range request returns all
// keys.
bytes range_end = 3;
// limit is a limit on the number of keys returned for the request. When
// limit is set to 0, it is treated as no limit.
int64 limit = 4;
// keys_only when set returns only the keys and not the values.
bool keys_only = 5;
}
message RangeResponse {
ResponseHeader header = 1;
// kvs is the list of key-value pairs matched by the range request.
repeated KeyValue kvs = 2;
// more indicates if there are more keys to return in the requested range.
bool more = 3;
}
message PutRequest {
RequestHeader header = 1;
// key is the key, in bytes, to put into the key-value store.
bytes key = 2;
// value is the value, in bytes, to associate with the key in the
// key-value store.
bytes value = 3;
// If prev_kv is set, gets the previous key-value pair before changing it.
// The previous key-value pair will be returned in the put response.
bool prev_kv = 4;
}
message PutResponse {
ResponseHeader header = 1;
// If prev_kv is set in the request, the previous key-value pair will be
// returned.
KeyValue prev_kv = 2;
}
message BatchPutRequest {
RequestHeader header = 1;
repeated KeyValue kvs = 2;
// If prev_kv is set, gets the previous key-value pairs before changing it.
// The previous key-value pairs will be returned in the batch put response.
bool prev_kv = 3;
}
message BatchPutResponse {
ResponseHeader header = 1;
// If prev_kv is set in the request, the previous key-value pairs will be
// returned.
repeated KeyValue prev_kvs = 2;
}
message CompareAndPutRequest {
RequestHeader header = 1;
// key is the key, in bytes, to put into the key-value store.
bytes key = 2;
// expect is the previous value, in bytes
bytes expect = 3;
// value is the value, in bytes, to associate with the key in the
// key-value store.
bytes value = 4;
}
message CompareAndPutResponse {
ResponseHeader header = 1;
bool success = 2;
KeyValue prev_kv = 3;
}
message DeleteRangeRequest {
RequestHeader header = 1;
// key is the first key to delete in the range.
bytes key = 2;
// range_end is the key following the last key to delete for the range
// [key, range_end).
// If range_end is not given, the range is defined to contain only the key
// argument.
// If range_end is one bit larger than the given key, then the range is all
// the keys with the prefix (the given key).
// If range_end is '\0', the range is all keys greater than or equal to the
// key argument.
bytes range_end = 3;
// If prev_kv is set, gets the previous key-value pairs before deleting it.
// The previous key-value pairs will be returned in the delete response.
bool prev_kv = 4;
}
message DeleteRangeResponse {
ResponseHeader header = 1;
// deleted is the number of keys deleted by the delete range request.
int64 deleted = 2;
// If prev_kv is set in the request, the previous key-value pairs will be
// returned.
repeated KeyValue prev_kvs = 3;
}

View File

@@ -0,0 +1,85 @@
// Copyright 2016 Prometheus Team
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package prometheus;
option go_package = "prompb";
import "prometheus/remote/types.proto";
message WriteRequest {
repeated prometheus.TimeSeries timeseries = 1;
// Cortex uses this field to determine the source of the write request.
// We reserve it to avoid any compatibility issues.
reserved 2;
repeated prometheus.MetricMetadata metadata = 3;
}
// ReadRequest represents a remote read request.
message ReadRequest {
repeated Query queries = 1;
enum ResponseType {
// Server will return a single ReadResponse message with matched series that includes list of raw samples.
// It's recommended to use streamed response types instead.
//
// Response headers:
// Content-Type: "application/x-protobuf"
// Content-Encoding: "snappy"
SAMPLES = 0;
// Server will stream a delimited ChunkedReadResponse message that contains XOR encoded chunks for a single series.
// Each message is following varint size and fixed size bigendian uint32 for CRC32 Castagnoli checksum.
//
// Response headers:
// Content-Type: "application/x-streamed-protobuf; proto=prometheus.ChunkedReadResponse"
// Content-Encoding: ""
STREAMED_XOR_CHUNKS = 1;
}
// accepted_response_types allows negotiating the content type of the response.
//
// Response types are taken from the list in the FIFO order. If no response type in `accepted_response_types` is
// implemented by server, error is returned.
// For request that do not contain `accepted_response_types` field the SAMPLES response type will be used.
repeated ResponseType accepted_response_types = 2;
}
// ReadResponse is a response when response_type equals SAMPLES.
message ReadResponse {
// In same order as the request's queries.
repeated QueryResult results = 1;
}
message Query {
int64 start_timestamp_ms = 1;
int64 end_timestamp_ms = 2;
repeated prometheus.LabelMatcher matchers = 3;
prometheus.ReadHints hints = 4;
}
message QueryResult {
// Samples within a time series must be ordered by time.
repeated prometheus.TimeSeries timeseries = 1;
}
// ChunkedReadResponse is a response when response_type equals STREAMED_XOR_CHUNKS.
// We strictly stream full series after series, optionally split by time. This means that a single frame can contain
// partition of the single series, but once a new series is started to be streamed it means that no more chunks will
// be sent for previous one. Series are returned sorted in the same way TSDB block are internally.
message ChunkedReadResponse {
repeated prometheus.ChunkedSeries chunked_series = 1;
// query_index represents an index of the query from ReadRequest.queries these chunks relates to.
int64 query_index = 2;
}

View File

@@ -0,0 +1,117 @@
// Copyright 2017 Prometheus Team
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package prometheus;
option go_package = "prompb";
message MetricMetadata {
enum MetricType {
UNKNOWN = 0;
COUNTER = 1;
GAUGE = 2;
HISTOGRAM = 3;
GAUGEHISTOGRAM = 4;
SUMMARY = 5;
INFO = 6;
STATESET = 7;
}
// Represents the metric type, these match the set from Prometheus.
// Refer to model/textparse/interface.go for details.
MetricType type = 1;
string metric_family_name = 2;
string help = 4;
string unit = 5;
}
message Sample {
double value = 1;
// timestamp is in ms format, see model/timestamp/timestamp.go for
// conversion from time.Time to Prometheus timestamp.
int64 timestamp = 2;
}
message Exemplar {
// Optional, can be empty.
repeated Label labels = 1;
double value = 2;
// timestamp is in ms format, see model/timestamp/timestamp.go for
// conversion from time.Time to Prometheus timestamp.
int64 timestamp = 3;
}
// TimeSeries represents samples and labels for a single time series.
message TimeSeries {
// For a timeseries to be valid, and for the samples and exemplars
// to be ingested by the remote system properly, the labels field is required.
repeated Label labels = 1;
repeated Sample samples = 2;
repeated Exemplar exemplars = 3;
}
message Label {
string name = 1;
string value = 2;
}
message Labels {
repeated Label labels = 1;
}
// Matcher specifies a rule, which can match or set of labels or not.
message LabelMatcher {
enum Type {
EQ = 0;
NEQ = 1;
RE = 2;
NRE = 3;
}
Type type = 1;
string name = 2;
string value = 3;
}
message ReadHints {
int64 step_ms = 1; // Query step size in milliseconds.
string func = 2; // String representation of surrounding function or aggregation.
int64 start_ms = 3; // Start time in milliseconds.
int64 end_ms = 4; // End time in milliseconds.
repeated string grouping = 5; // List of label names used in aggregation.
bool by = 6; // Indicate whether it is without or by.
int64 range_ms = 7; // Range vector selector range in milliseconds.
}
// Chunk represents a TSDB chunk.
// Time range [min, max] is inclusive.
message Chunk {
int64 min_time_ms = 1;
int64 max_time_ms = 2;
// We require this to match chunkenc.Encoding.
enum Encoding {
UNKNOWN = 0;
XOR = 1;
}
Encoding type = 3;
bytes data = 4;
}
// ChunkedSeries represents single, encoded time series.
message ChunkedSeries {
// Labels should be sorted.
repeated Label labels = 1;
// Chunks will be in start time order and may overlap.
repeated Chunk chunks = 2;
}

View File

@@ -1,5 +1,6 @@
pub mod error;
pub mod helper;
pub mod prometheus;
pub mod serde;
pub mod v1;

View File

@@ -0,0 +1,5 @@
#![allow(clippy::derive_partial_eq_without_eq)]
pub mod remote {
tonic::include_proto!("prometheus");
}

View File

@@ -4,3 +4,5 @@ tonic::include_proto!("greptime.v1");
pub mod codec {
tonic::include_proto!("greptime.v1.codec");
}
pub mod meta;

54
src/api/src/v1/meta.rs Normal file
View File

@@ -0,0 +1,54 @@
tonic::include_proto!("greptime.v1.meta");
pub const PROTOCOL_VERSION: u64 = 1;
impl RequestHeader {
#[inline]
pub fn new((cluster_id, member_id): (u64, u64)) -> Self {
Self {
protocol_version: PROTOCOL_VERSION,
cluster_id,
member_id,
}
}
}
impl ResponseHeader {
#[inline]
pub fn success(cluster_id: u64) -> Self {
Self {
protocol_version: PROTOCOL_VERSION,
cluster_id,
..Default::default()
}
}
#[inline]
pub fn failed(cluster_id: u64, error: Error) -> Self {
Self {
protocol_version: PROTOCOL_VERSION,
cluster_id,
error: Some(error),
}
}
}
macro_rules! gen_set_header {
($req: ty) => {
impl $req {
#[inline]
pub fn set_header(&mut self, (cluster_id, member_id): (u64, u64)) {
self.header = Some(RequestHeader::new((cluster_id, member_id)));
}
}
};
}
gen_set_header!(HeartbeatRequest);
gen_set_header!(RouteRequest);
gen_set_header!(CreateRequest);
gen_set_header!(RangeRequest);
gen_set_header!(PutRequest);
gen_set_header!(BatchPutRequest);
gen_set_header!(CompareAndPutRequest);
gen_set_header!(DeleteRangeRequest);

View File

@@ -5,21 +5,41 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
api = { path = "../api" }
arc-swap = "1.0"
async-stream = "0.3"
async-trait = "0.1"
common-catalog = { path = "../common/catalog" }
common-error = { path = "../common/error" }
common-grpc = { path = "../common/grpc" }
common-query = { path = "../common/query" }
common-recordbatch = { path = "../common/recordbatch" }
common-runtime = { path = "../common/runtime" }
common-telemetry = { path = "../common/telemetry" }
common-time = { path = "../common/time" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = ["simd"] }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
"simd",
] }
datatypes = { path = "../datatypes" }
futures = "0.3"
futures-util = "0.3"
lazy_static = "1.4"
meta-client = { path = "../meta-client" }
opendal = "0.17"
regex = "1.6"
serde = "1.0"
serde_json = "1.0"
snafu = { version = "0.7", features = ["backtraces"] }
storage = { path = "../storage" }
table = { path = "../table" }
tokio = { version = "1.18", features = ["full"] }
[dev-dependencies]
chrono = "0.4"
log-store = { path = "../log-store" }
object-store = { path = "../object-store" }
opendal = "0.17"
storage = { path = "../storage" }
table-engine = { path = "../table-engine" }
tempdir = "0.3"
tokio = { version = "1.0", features = ["full"] }

View File

@@ -103,6 +103,38 @@ pub enum Error {
#[snafu(display("Illegal catalog manager state: {}", msg))]
IllegalManagerState { backtrace: Backtrace, msg: String },
#[snafu(display("Failed to scan system catalog table, source: {}", source))]
SystemCatalogTableScan {
#[snafu(backtrace)]
source: table::error::Error,
},
#[snafu(display("Failed to execute system catalog table scan, source: {}", source))]
SystemCatalogTableScanExec {
#[snafu(backtrace)]
source: common_query::error::Error,
},
#[snafu(display("Cannot parse catalog value, source: {}", source))]
InvalidCatalogValue {
#[snafu(backtrace)]
source: common_catalog::error::Error,
},
#[snafu(display("IO error occurred while fetching catalog info, source: {}", source))]
Io {
backtrace: Backtrace,
source: std::io::Error,
},
#[snafu(display("Local and remote catalog data are inconsistent, msg: {}", msg))]
CatalogStateInconsistent { msg: String, backtrace: Backtrace },
#[snafu(display("Failed to perform metasrv operation, source: {}", source))]
MetaSrv {
#[snafu(backtrace)]
source: meta_client::error::Error,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -115,14 +147,17 @@ impl ErrorExt for Error {
| Error::TableNotFound { .. }
| Error::IllegalManagerState { .. }
| Error::CatalogNotFound { .. }
| Error::InvalidEntryType { .. } => StatusCode::Unexpected,
| Error::InvalidEntryType { .. }
| Error::CatalogStateInconsistent { .. } => StatusCode::Unexpected,
Error::SystemCatalog { .. } | Error::EmptyValue | Error::ValueDeserialize { .. } => {
StatusCode::StorageUnavailable
}
Error::SystemCatalog { .. }
| Error::EmptyValue
| Error::ValueDeserialize { .. }
| Error::Io { .. } => StatusCode::StorageUnavailable,
Error::ReadSystemCatalog { source, .. } => source.status_code(),
Error::SystemCatalogTypeMismatch { source, .. } => source.status_code(),
Error::InvalidCatalogValue { source, .. } => source.status_code(),
Error::RegisterTable { .. } => StatusCode::Internal,
Error::TableExists { .. } => StatusCode::TableAlreadyExists,
@@ -132,6 +167,9 @@ impl ErrorExt for Error {
| Error::InsertTableRecord { source, .. }
| Error::OpenTable { source, .. }
| Error::CreateTable { source, .. } => source.status_code(),
Error::MetaSrv { source, .. } => source.status_code(),
Error::SystemCatalogTableScan { source } => source.status_code(),
Error::SystemCatalogTableScanExec { source } => source.status_code(),
}
}

View File

@@ -3,20 +3,21 @@
use std::any::Any;
use std::sync::Arc;
use common_telemetry::info;
use snafu::ResultExt;
use table::engine::{EngineContext, TableEngineRef};
use table::metadata::TableId;
use table::requests::CreateTableRequest;
use table::TableRef;
pub use crate::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MIN_USER_TABLE_ID};
pub use crate::manager::LocalCatalogManager;
use crate::error::{CreateTableSnafu, Result};
pub use crate::schema::{SchemaProvider, SchemaProviderRef};
pub mod consts;
pub mod error;
mod manager;
pub mod memory;
pub mod local;
pub mod remote;
pub mod schema;
mod system;
pub mod system;
pub mod tables;
/// Represent a list of named catalogs
@@ -31,13 +32,13 @@ pub trait CatalogList: Sync + Send {
&self,
name: String,
catalog: CatalogProviderRef,
) -> Option<CatalogProviderRef>;
) -> Result<Option<CatalogProviderRef>>;
/// Retrieves the list of available catalog names
fn catalog_names(&self) -> Vec<String>;
fn catalog_names(&self) -> Result<Vec<String>>;
/// Retrieves a specific catalog by name, provided it exists.
fn catalog(&self, name: &str) -> Option<CatalogProviderRef>;
fn catalog(&self, name: &str) -> Result<Option<CatalogProviderRef>>;
}
/// Represents a catalog, comprising a number of named schemas.
@@ -47,14 +48,17 @@ pub trait CatalogProvider: Sync + Send {
fn as_any(&self) -> &dyn Any;
/// Retrieves the list of available schema names in this catalog.
fn schema_names(&self) -> Vec<String>;
fn schema_names(&self) -> Result<Vec<String>>;
/// Registers schema to this catalog.
fn register_schema(&self, name: String, schema: SchemaProviderRef)
-> Option<SchemaProviderRef>;
fn register_schema(
&self,
name: String,
schema: SchemaProviderRef,
) -> Result<Option<SchemaProviderRef>>;
/// Retrieves a specific schema from the catalog by name, provided it exists.
fn schema(&self, name: &str) -> Option<SchemaProviderRef>;
fn schema(&self, name: &str) -> Result<Option<SchemaProviderRef>>;
}
pub type CatalogListRef = Arc<dyn CatalogList>;
@@ -79,8 +83,8 @@ pub trait CatalogManager: CatalogList {
/// Returns the table by catalog, schema and table name.
fn table(
&self,
catalog: Option<&str>,
schema: Option<&str>,
catalog: &str,
schema: &str,
table_name: &str,
) -> error::Result<Option<TableRef>>;
}
@@ -99,9 +103,10 @@ pub struct RegisterSystemTableRequest {
pub open_hook: Option<OpenSystemTableHook>,
}
#[derive(Clone)]
pub struct RegisterTableRequest {
pub catalog: Option<String>,
pub schema: Option<String>,
pub catalog: String,
pub schema: String,
pub table_name: String,
pub table_id: TableId,
pub table: TableRef,
@@ -111,3 +116,53 @@ pub struct RegisterTableRequest {
pub fn format_full_table_name(catalog: &str, schema: &str, table: &str) -> String {
format!("{}.{}.{}", catalog, schema, table)
}
pub trait CatalogProviderFactory {
fn create(&self, catalog_name: String) -> CatalogProviderRef;
}
pub trait SchemaProviderFactory {
fn create(&self, catalog_name: String, schema_name: String) -> SchemaProviderRef;
}
pub(crate) async fn handle_system_table_request<'a, M: CatalogManager>(
manager: &'a M,
engine: TableEngineRef,
sys_table_requests: &'a mut Vec<RegisterSystemTableRequest>,
) -> Result<()> {
for req in sys_table_requests.drain(..) {
let catalog_name = &req.create_table_request.catalog_name;
let schema_name = &req.create_table_request.schema_name;
let table_name = &req.create_table_request.table_name;
let table_id = req.create_table_request.id;
let table = if let Some(table) = manager.table(catalog_name, schema_name, table_name)? {
table
} else {
let table = engine
.create_table(&EngineContext::default(), req.create_table_request.clone())
.await
.with_context(|_| CreateTableSnafu {
table_info: format!(
"{}.{}.{}, id: {}",
catalog_name, schema_name, table_name, table_id,
),
})?;
manager
.register_table(RegisterTableRequest {
catalog: catalog_name.clone(),
schema: schema_name.clone(),
table_name: table_name.clone(),
table_id,
table: table.clone(),
})
.await?;
info!("Created and registered system table: {}", table_name);
table
};
if let Some(hook) = req.open_hook {
(hook)(table)?;
}
}
Ok(())
}

7
src/catalog/src/local.rs Normal file
View File

@@ -0,0 +1,7 @@
pub mod manager;
pub mod memory;
pub use manager::LocalCatalogManager;
pub use memory::{
new_memory_catalog_list, MemoryCatalogList, MemoryCatalogProvider, MemorySchemaProvider,
};

View File

@@ -2,6 +2,10 @@ use std::any::Any;
use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::Arc;
use common_catalog::consts::{
DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, MIN_USER_TABLE_ID,
SYSTEM_CATALOG_NAME, SYSTEM_CATALOG_TABLE_NAME,
};
use common_recordbatch::RecordBatch;
use common_telemetry::{debug, info};
use datatypes::prelude::ScalarVector;
@@ -15,25 +19,22 @@ use table::requests::OpenTableRequest;
use table::table::numbers::NumbersTable;
use table::TableRef;
use super::error::Result;
use crate::consts::{
INFORMATION_SCHEMA_NAME, MIN_USER_TABLE_ID, SYSTEM_CATALOG_NAME, SYSTEM_CATALOG_TABLE_NAME,
};
use crate::error::Result;
use crate::error::{
CatalogNotFoundSnafu, CreateTableSnafu, IllegalManagerStateSnafu, OpenTableSnafu,
ReadSystemCatalogSnafu, SchemaNotFoundSnafu, SystemCatalogSnafu,
SystemCatalogTypeMismatchSnafu, TableExistsSnafu, TableNotFoundSnafu,
CatalogNotFoundSnafu, IllegalManagerStateSnafu, OpenTableSnafu, ReadSystemCatalogSnafu,
SchemaNotFoundSnafu, SystemCatalogSnafu, SystemCatalogTypeMismatchSnafu, TableExistsSnafu,
TableNotFoundSnafu,
};
use crate::memory::{MemoryCatalogList, MemoryCatalogProvider, MemorySchemaProvider};
use crate::local::memory::{MemoryCatalogList, MemoryCatalogProvider, MemorySchemaProvider};
use crate::system::{
decode_system_catalog, Entry, SystemCatalogTable, TableEntry, ENTRY_TYPE_INDEX, KEY_INDEX,
VALUE_INDEX,
};
use crate::tables::SystemCatalog;
use crate::{
format_full_table_name, CatalogList, CatalogManager, CatalogProvider, CatalogProviderRef,
RegisterSystemTableRequest, RegisterTableRequest, SchemaProvider, DEFAULT_CATALOG_NAME,
DEFAULT_SCHEMA_NAME,
format_full_table_name, handle_system_table_request, CatalogList, CatalogManager,
CatalogProvider, CatalogProviderRef, RegisterSystemTableRequest, RegisterTableRequest,
SchemaProvider,
};
/// A `CatalogManager` consists of a system catalog and a bunch of user catalogs.
@@ -50,7 +51,7 @@ impl LocalCatalogManager {
/// Create a new [CatalogManager] with given user catalogs and table engine
pub async fn try_new(engine: TableEngineRef) -> Result<Self> {
let table = SystemCatalogTable::new(engine.clone()).await?;
let memory_catalog_list = crate::memory::new_memory_catalog_list()?;
let memory_catalog_list = crate::local::memory::new_memory_catalog_list()?;
let system_catalog = Arc::new(SystemCatalog::new(
table,
memory_catalog_list.clone(),
@@ -90,49 +91,7 @@ impl LocalCatalogManager {
// Processing system table hooks
let mut sys_table_requests = self.system_table_requests.lock().await;
for req in sys_table_requests.drain(..) {
let catalog_name = &req.create_table_request.catalog_name;
let schema_name = &req.create_table_request.schema_name;
let table_name = &req.create_table_request.table_name;
let table_id = req.create_table_request.id;
let table = if let Some(table) =
self.table(catalog_name.as_deref(), schema_name.as_deref(), table_name)?
{
table
} else {
let table = self
.engine
.create_table(&EngineContext::default(), req.create_table_request.clone())
.await
.with_context(|_| CreateTableSnafu {
table_info: format!(
"{}.{}.{}, id: {}",
catalog_name.as_deref().unwrap_or(DEFAULT_CATALOG_NAME),
schema_name.as_deref().unwrap_or(DEFAULT_SCHEMA_NAME),
table_name,
table_id,
),
})?;
self.register_table(RegisterTableRequest {
catalog: catalog_name.clone(),
schema: schema_name.clone(),
table_name: table_name.clone(),
table_id,
table: table.clone(),
})
.await?;
info!("Created and registered system table: {}", table_name);
table
};
if let Some(hook) = req.open_hook {
(hook)(table)?;
}
}
handle_system_table_request(self, self.engine.clone(), &mut sys_table_requests).await?;
Ok(())
}
@@ -143,9 +102,9 @@ impl LocalCatalogManager {
self.system.information_schema.system.clone(),
)?;
let system_catalog = Arc::new(MemoryCatalogProvider::new());
system_catalog.register_schema(INFORMATION_SCHEMA_NAME.to_string(), system_schema);
system_catalog.register_schema(INFORMATION_SCHEMA_NAME.to_string(), system_schema)?;
self.catalogs
.register_catalog(SYSTEM_CATALOG_NAME.to_string(), system_catalog);
.register_catalog(SYSTEM_CATALOG_NAME.to_string(), system_catalog)?;
let default_catalog = Arc::new(MemoryCatalogProvider::new());
let default_schema = Arc::new(MemorySchemaProvider::new());
@@ -155,9 +114,9 @@ impl LocalCatalogManager {
let table = Arc::new(NumbersTable::default());
default_schema.register_table("numbers".to_string(), table)?;
default_catalog.register_schema(DEFAULT_SCHEMA_NAME.to_string(), default_schema);
default_catalog.register_schema(DEFAULT_SCHEMA_NAME.to_string(), default_schema)?;
self.catalogs
.register_catalog(DEFAULT_CATALOG_NAME.to_string(), default_catalog);
.register_catalog(DEFAULT_CATALOG_NAME.to_string(), default_catalog)?;
Ok(())
}
@@ -213,14 +172,14 @@ impl LocalCatalogManager {
Entry::Schema(s) => {
let catalog =
self.catalogs
.catalog(&s.catalog_name)
.catalog(&s.catalog_name)?
.context(CatalogNotFoundSnafu {
catalog_name: &s.catalog_name,
})?;
catalog.register_schema(
s.schema_name.clone(),
Arc::new(MemorySchemaProvider::new()),
);
)?;
info!("Registered schema: {:?}", s);
}
Entry::Table(t) => {
@@ -237,12 +196,12 @@ impl LocalCatalogManager {
async fn open_and_register_table(&self, t: &TableEntry) -> Result<()> {
let catalog = self
.catalogs
.catalog(&t.catalog_name)
.catalog(&t.catalog_name)?
.context(CatalogNotFoundSnafu {
catalog_name: &t.catalog_name,
})?;
let schema = catalog
.schema(&t.schema_name)
.schema(&t.schema_name)?
.context(SchemaNotFoundSnafu {
schema_info: format!("{}.{}", &t.catalog_name, &t.schema_name),
})?;
@@ -286,19 +245,19 @@ impl CatalogList for LocalCatalogManager {
&self,
name: String,
catalog: CatalogProviderRef,
) -> Option<Arc<dyn CatalogProvider>> {
) -> Result<Option<CatalogProviderRef>> {
self.catalogs.register_catalog(name, catalog)
}
fn catalog_names(&self) -> Vec<String> {
let mut res = self.catalogs.catalog_names();
fn catalog_names(&self) -> Result<Vec<String>> {
let mut res = self.catalogs.catalog_names()?;
res.push(SYSTEM_CATALOG_NAME.to_string());
res
Ok(res)
}
fn catalog(&self, name: &str) -> Option<Arc<dyn CatalogProvider>> {
fn catalog(&self, name: &str) -> Result<Option<CatalogProviderRef>> {
if name.eq_ignore_ascii_case(SYSTEM_CATALOG_NAME) {
Some(self.system.clone())
Ok(Some(self.system.clone()))
} else {
self.catalogs.catalog(name)
}
@@ -307,7 +266,7 @@ impl CatalogList for LocalCatalogManager {
#[async_trait::async_trait]
impl CatalogManager for LocalCatalogManager {
/// Start [MemoryCatalogManager] to load all information from system catalog table.
/// Start [LocalCatalogManager] to load all information from system catalog table.
/// Make sure table engine is initialized before starting [MemoryCatalogManager].
async fn start(&self) -> Result<()> {
self.init().await
@@ -328,36 +287,30 @@ impl CatalogManager for LocalCatalogManager {
}
);
let catalog_name = request
.catalog
.unwrap_or_else(|| DEFAULT_CATALOG_NAME.to_string());
let schema_name = request
.schema
.unwrap_or_else(|| DEFAULT_SCHEMA_NAME.to_string());
let catalog_name = &request.catalog;
let schema_name = &request.schema;
let catalog = self
.catalogs
.catalog(&catalog_name)
.context(CatalogNotFoundSnafu {
catalog_name: &catalog_name,
})?;
.catalog(catalog_name)?
.context(CatalogNotFoundSnafu { catalog_name })?;
let schema = catalog
.schema(&schema_name)
.schema(schema_name)?
.with_context(|| SchemaNotFoundSnafu {
schema_info: format!("{}.{}", catalog_name, schema_name),
})?;
if schema.table_exist(&request.table_name) {
if schema.table_exist(&request.table_name)? {
return TableExistsSnafu {
table: format_full_table_name(&catalog_name, &schema_name, &request.table_name),
table: format_full_table_name(catalog_name, schema_name, &request.table_name),
}
.fail();
}
self.system
.register_table(
catalog_name,
schema_name,
catalog_name.clone(),
schema_name.clone(),
request.table_name.clone(),
request.table_id,
)
@@ -383,22 +336,19 @@ impl CatalogManager for LocalCatalogManager {
fn table(
&self,
catalog: Option<&str>,
schema: Option<&str>,
catalog_name: &str,
schema_name: &str,
table_name: &str,
) -> Result<Option<TableRef>> {
let catalog_name = catalog.unwrap_or(DEFAULT_CATALOG_NAME);
let schema_name = schema.unwrap_or(DEFAULT_SCHEMA_NAME);
let catalog = self
.catalogs
.catalog(catalog_name)
.catalog(catalog_name)?
.context(CatalogNotFoundSnafu { catalog_name })?;
let schema = catalog
.schema(schema_name)
.schema(schema_name)?
.with_context(|| SchemaNotFoundSnafu {
schema_info: format!("{}.{}", catalog_name, schema_name),
})?;
Ok(schema.table(table_name))
schema.table(table_name)
}
}

View File

@@ -23,7 +23,7 @@ impl MemoryCatalogList {
pub fn register_catalog_if_absent(
&self,
name: String,
catalog: Arc<dyn CatalogProvider>,
catalog: CatalogProviderRef,
) -> Option<CatalogProviderRef> {
let mut catalogs = self.catalogs.write().unwrap();
let entry = catalogs.entry(name);
@@ -46,19 +46,19 @@ impl CatalogList for MemoryCatalogList {
&self,
name: String,
catalog: CatalogProviderRef,
) -> Option<CatalogProviderRef> {
) -> Result<Option<CatalogProviderRef>> {
let mut catalogs = self.catalogs.write().unwrap();
catalogs.insert(name, catalog)
Ok(catalogs.insert(name, catalog))
}
fn catalog_names(&self) -> Vec<String> {
fn catalog_names(&self) -> Result<Vec<String>> {
let catalogs = self.catalogs.read().unwrap();
catalogs.keys().map(|s| s.to_string()).collect()
Ok(catalogs.keys().map(|s| s.to_string()).collect())
}
fn catalog(&self, name: &str) -> Option<CatalogProviderRef> {
fn catalog(&self, name: &str) -> Result<Option<CatalogProviderRef>> {
let catalogs = self.catalogs.read().unwrap();
catalogs.get(name).cloned()
Ok(catalogs.get(name).cloned())
}
}
@@ -87,23 +87,23 @@ impl CatalogProvider for MemoryCatalogProvider {
self
}
fn schema_names(&self) -> Vec<String> {
fn schema_names(&self) -> Result<Vec<String>> {
let schemas = self.schemas.read().unwrap();
schemas.keys().cloned().collect()
Ok(schemas.keys().cloned().collect())
}
fn register_schema(
&self,
name: String,
schema: SchemaProviderRef,
) -> Option<SchemaProviderRef> {
) -> Result<Option<SchemaProviderRef>> {
let mut schemas = self.schemas.write().unwrap();
schemas.insert(name, schema)
Ok(schemas.insert(name, schema))
}
fn schema(&self, name: &str) -> Option<Arc<dyn SchemaProvider>> {
fn schema(&self, name: &str) -> Result<Option<Arc<dyn SchemaProvider>>> {
let schemas = self.schemas.read().unwrap();
schemas.get(name).cloned()
Ok(schemas.get(name).cloned())
}
}
@@ -132,18 +132,18 @@ impl SchemaProvider for MemorySchemaProvider {
self
}
fn table_names(&self) -> Vec<String> {
fn table_names(&self) -> Result<Vec<String>> {
let tables = self.tables.read().unwrap();
tables.keys().cloned().collect()
Ok(tables.keys().cloned().collect())
}
fn table(&self, name: &str) -> Option<TableRef> {
fn table(&self, name: &str) -> Result<Option<TableRef>> {
let tables = self.tables.read().unwrap();
tables.get(name).cloned()
Ok(tables.get(name).cloned())
}
fn register_table(&self, name: String, table: TableRef) -> Result<Option<TableRef>> {
if self.table_exist(name.as_str()) {
if self.table_exist(name.as_str())? {
return TableExistsSnafu { table: name }.fail()?;
}
let mut tables = self.tables.write().unwrap();
@@ -155,9 +155,9 @@ impl SchemaProvider for MemorySchemaProvider {
Ok(tables.remove(name))
}
fn table_exist(&self, name: &str) -> bool {
fn table_exist(&self, name: &str) -> Result<bool> {
let tables = self.tables.read().unwrap();
tables.contains_key(name)
Ok(tables.contains_key(name))
}
}
@@ -168,40 +168,50 @@ pub fn new_memory_catalog_list() -> Result<Arc<MemoryCatalogList>> {
#[cfg(test)]
mod tests {
use common_catalog::consts::*;
use common_error::ext::ErrorExt;
use common_error::prelude::StatusCode;
use table::table::numbers::NumbersTable;
use super::*;
use crate::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
#[test]
fn test_new_memory_catalog_list() {
let catalog_list = new_memory_catalog_list().unwrap();
assert!(catalog_list.catalog(DEFAULT_CATALOG_NAME).is_none());
assert!(catalog_list
.catalog(DEFAULT_CATALOG_NAME)
.unwrap()
.is_none());
let default_catalog = Arc::new(MemoryCatalogProvider::default());
catalog_list.register_catalog(DEFAULT_CATALOG_NAME.to_string(), default_catalog.clone());
catalog_list
.register_catalog(DEFAULT_CATALOG_NAME.to_string(), default_catalog.clone())
.unwrap();
assert!(default_catalog.schema(DEFAULT_SCHEMA_NAME).is_none());
assert!(default_catalog
.schema(DEFAULT_SCHEMA_NAME)
.unwrap()
.is_none());
let default_schema = Arc::new(MemorySchemaProvider::default());
default_catalog.register_schema(DEFAULT_SCHEMA_NAME.to_string(), default_schema.clone());
default_catalog
.register_schema(DEFAULT_SCHEMA_NAME.to_string(), default_schema.clone())
.unwrap();
default_schema
.register_table("numbers".to_string(), Arc::new(NumbersTable::default()))
.unwrap();
let table = default_schema.table("numbers");
let table = default_schema.table("numbers").unwrap();
assert!(table.is_some());
assert!(default_schema.table("not_exists").is_none());
assert!(default_schema.table("not_exists").unwrap().is_none());
}
#[tokio::test]
async fn test_mem_provider() {
let provider = MemorySchemaProvider::new();
let table_name = "numbers";
assert!(!provider.table_exist(table_name));
assert!(!provider.table_exist(table_name).unwrap());
assert!(provider.deregister_table(table_name).unwrap().is_none());
let test_table = NumbersTable::default();
// register table successfully
@@ -209,7 +219,7 @@ mod tests {
.register_table(table_name.to_string(), Arc::new(test_table))
.unwrap()
.is_none());
assert!(provider.table_exist(table_name));
assert!(provider.table_exist(table_name).unwrap());
let other_table = NumbersTable::default();
let result = provider.register_table(table_name.to_string(), Arc::new(other_table));
let err = result.err().unwrap();

94
src/catalog/src/remote.rs Normal file
View File

@@ -0,0 +1,94 @@
use std::fmt::Debug;
use std::pin::Pin;
use std::sync::Arc;
pub use client::MetaKvBackend;
use futures::Stream;
use futures_util::StreamExt;
pub use manager::{RemoteCatalogManager, RemoteCatalogProvider, RemoteSchemaProvider};
use crate::error::Error;
mod client;
mod manager;
#[derive(Debug, Clone)]
pub struct Kv(pub Vec<u8>, pub Vec<u8>);
pub type ValueIter<'a, E> = Pin<Box<dyn Stream<Item = Result<Kv, E>> + Send + 'a>>;
#[async_trait::async_trait]
pub trait KvBackend: Send + Sync {
fn range<'a, 'b>(&'a self, key: &[u8]) -> ValueIter<'b, crate::error::Error>
where
'a: 'b;
async fn set(&self, key: &[u8], val: &[u8]) -> Result<(), crate::error::Error>;
async fn delete_range(&self, key: &[u8], end: &[u8]) -> Result<(), crate::error::Error>;
async fn delete(&self, key: &[u8]) -> Result<(), Error> {
self.delete_range(key, &[]).await
}
/// Default get is implemented based on `range` method.
async fn get(&self, key: &[u8]) -> Result<Option<Kv>, Error> {
let mut iter = self.range(key);
while let Some(r) = iter.next().await {
let kv = r?;
if kv.0 == key {
return Ok(Some(kv));
}
}
return Ok(None);
}
}
pub type KvBackendRef = Arc<dyn KvBackend>;
#[cfg(test)]
mod tests {
use async_stream::stream;
use super::*;
struct MockKvBackend {}
#[async_trait::async_trait]
impl KvBackend for MockKvBackend {
fn range<'a, 'b>(&'a self, _key: &[u8]) -> ValueIter<'b, Error>
where
'a: 'b,
{
Box::pin(stream!({
for i in 0..3 {
yield Ok(Kv(
i.to_string().as_bytes().to_vec(),
i.to_string().as_bytes().to_vec(),
))
}
}))
}
async fn set(&self, _key: &[u8], _val: &[u8]) -> Result<(), Error> {
unimplemented!()
}
async fn delete_range(&self, _key: &[u8], _end: &[u8]) -> Result<(), Error> {
unimplemented!()
}
}
#[tokio::test]
async fn test_get() {
let backend = MockKvBackend {};
let result = backend.get(0.to_string().as_bytes()).await;
assert_eq!(0.to_string().as_bytes(), result.unwrap().unwrap().0);
let result = backend.get(1.to_string().as_bytes()).await;
assert_eq!(1.to_string().as_bytes(), result.unwrap().unwrap().0);
let result = backend.get(2.to_string().as_bytes()).await;
assert_eq!(2.to_string().as_bytes(), result.unwrap().unwrap().0);
let result = backend.get(3.to_string().as_bytes()).await;
assert!(result.unwrap().is_none());
}
}

View File

@@ -0,0 +1,71 @@
use std::fmt::Debug;
use async_stream::stream;
use common_telemetry::info;
use meta_client::client::MetaClient;
use meta_client::rpc::{DeleteRangeRequest, PutRequest, RangeRequest};
use snafu::ResultExt;
use crate::error::{Error, MetaSrvSnafu};
use crate::remote::{Kv, KvBackend, ValueIter};
#[derive(Debug)]
pub struct MetaKvBackend {
pub client: MetaClient,
}
/// Implement `KvBackend` trait for `MetaKvBackend` instead of opendal's `Accessor` since
/// `MetaClient`'s range method can return both keys and values, which can reduce IO overhead
/// comparing to `Accessor`'s list and get method.
#[async_trait::async_trait]
impl KvBackend for MetaKvBackend {
fn range<'a, 'b>(&'a self, key: &[u8]) -> ValueIter<'b, Error>
where
'a: 'b,
{
let key = key.to_vec();
Box::pin(stream!({
let mut resp = self
.client
.range(RangeRequest::new().with_prefix(key))
.await
.context(MetaSrvSnafu)?;
let kvs = resp.take_kvs();
for mut kv in kvs.into_iter() {
yield Ok(Kv(kv.take_key(), kv.take_value()))
}
}))
}
async fn get(&self, key: &[u8]) -> Result<Option<Kv>, Error> {
let mut response = self
.client
.range(RangeRequest::new().with_key(key))
.await
.context(MetaSrvSnafu)?;
Ok(response
.take_kvs()
.get_mut(0)
.map(|kv| Kv(kv.take_key(), kv.take_value())))
}
async fn set(&self, key: &[u8], val: &[u8]) -> Result<(), Error> {
let req = PutRequest::new()
.with_key(key.to_vec())
.with_value(val.to_vec());
let _ = self.client.put(req).await.context(MetaSrvSnafu)?;
Ok(())
}
async fn delete_range(&self, key: &[u8], end: &[u8]) -> Result<(), Error> {
let req = DeleteRangeRequest::new().with_range(key.to_vec(), end.to_vec());
let resp = self.client.delete_range(req).await.context(MetaSrvSnafu)?;
info!(
"Delete range, key: {}, end: {}, deleted: {}",
String::from_utf8_lossy(key),
String::from_utf8_lossy(end),
resp.deleted()
);
Ok(())
}
}

View File

@@ -0,0 +1,669 @@
use std::any::Any;
use std::collections::HashMap;
use std::pin::Pin;
use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::Arc;
use arc_swap::ArcSwap;
use async_stream::stream;
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MIN_USER_TABLE_ID};
use common_catalog::{
build_catalog_prefix, build_schema_prefix, build_table_prefix, CatalogKey, CatalogValue,
SchemaKey, SchemaValue, TableKey, TableValue,
};
use common_telemetry::{debug, info};
use datatypes::schema::Schema;
use futures::Stream;
use futures_util::StreamExt;
use snafu::{OptionExt, ResultExt};
use table::engine::{EngineContext, TableEngineRef};
use table::metadata::{TableId, TableVersion};
use table::requests::{CreateTableRequest, OpenTableRequest};
use table::TableRef;
use tokio::sync::Mutex;
use crate::error::Result;
use crate::error::{
CatalogNotFoundSnafu, CreateTableSnafu, InvalidCatalogValueSnafu, OpenTableSnafu,
SchemaNotFoundSnafu, TableExistsSnafu,
};
use crate::remote::{Kv, KvBackendRef};
use crate::{
handle_system_table_request, CatalogList, CatalogManager, CatalogProvider, CatalogProviderRef,
RegisterSystemTableRequest, RegisterTableRequest, SchemaProvider, SchemaProviderRef,
};
/// Catalog manager based on metasrv.
pub struct RemoteCatalogManager {
node_id: u64,
backend: KvBackendRef,
catalogs: Arc<ArcSwap<HashMap<String, CatalogProviderRef>>>,
next_table_id: Arc<AtomicU32>,
engine: TableEngineRef,
system_table_requests: Mutex<Vec<RegisterSystemTableRequest>>,
mutex: Arc<Mutex<()>>,
}
impl RemoteCatalogManager {
pub fn new(engine: TableEngineRef, node_id: u64, backend: KvBackendRef) -> Self {
Self {
engine,
node_id,
backend,
catalogs: Default::default(),
next_table_id: Default::default(),
system_table_requests: Default::default(),
mutex: Default::default(),
}
}
fn build_catalog_key(&self, catalog_name: impl AsRef<str>) -> CatalogKey {
CatalogKey {
catalog_name: catalog_name.as_ref().to_string(),
node_id: self.node_id,
}
}
fn new_catalog_provider(&self, catalog_name: &str) -> CatalogProviderRef {
Arc::new(RemoteCatalogProvider {
catalog_name: catalog_name.to_string(),
node_id: self.node_id,
backend: self.backend.clone(),
schemas: Default::default(),
mutex: Default::default(),
}) as _
}
fn new_schema_provider(&self, catalog_name: &str, schema_name: &str) -> SchemaProviderRef {
Arc::new(RemoteSchemaProvider {
catalog_name: catalog_name.to_string(),
schema_name: schema_name.to_string(),
tables: Default::default(),
node_id: self.node_id,
backend: self.backend.clone(),
mutex: Default::default(),
}) as _
}
async fn iter_remote_catalogs(
&self,
) -> Pin<Box<dyn Stream<Item = Result<CatalogKey>> + Send + '_>> {
let catalog_range_prefix = build_catalog_prefix();
info!("catalog_range_prefix: {}", catalog_range_prefix);
let mut catalogs = self.backend.range(catalog_range_prefix.as_bytes());
Box::pin(stream!({
while let Some(r) = catalogs.next().await {
let Kv(k, _) = r?;
if !k.starts_with(catalog_range_prefix.as_bytes()) {
debug!("Ignoring non-catalog key: {}", String::from_utf8_lossy(&k));
continue;
}
let key = CatalogKey::parse(&String::from_utf8_lossy(&k))
.context(InvalidCatalogValueSnafu)?;
if key.node_id == self.node_id {
yield Ok(key)
}
}
}))
}
async fn iter_remote_schemas(
&self,
catalog_name: &str,
) -> Pin<Box<dyn Stream<Item = Result<SchemaKey>> + Send + '_>> {
let schema_prefix = build_schema_prefix(catalog_name);
let mut schemas = self.backend.range(schema_prefix.as_bytes());
Box::pin(stream!({
while let Some(r) = schemas.next().await {
let Kv(k, _) = r?;
if !k.starts_with(schema_prefix.as_bytes()) {
debug!("Ignoring non-schema key: {}", String::from_utf8_lossy(&k));
continue;
}
let schema_key = SchemaKey::parse(&String::from_utf8_lossy(&k))
.context(InvalidCatalogValueSnafu)?;
if schema_key.node_id == self.node_id {
yield Ok(schema_key)
}
}
}))
}
/// Iterate over all table entries on metasrv
/// TODO(hl): table entries with different version is not currently considered.
/// Ideally deprecated table entry must be deleted when deregistering from catalog.
async fn iter_remote_tables(
&self,
catalog_name: &str,
schema_name: &str,
) -> Pin<Box<dyn Stream<Item = Result<(TableKey, TableValue)>> + Send + '_>> {
let table_prefix = build_table_prefix(catalog_name, schema_name);
let mut tables = self.backend.range(table_prefix.as_bytes());
Box::pin(stream!({
while let Some(r) = tables.next().await {
let Kv(k, v) = r?;
if !k.starts_with(table_prefix.as_bytes()) {
debug!("Ignoring non-table prefix: {}", String::from_utf8_lossy(&k));
continue;
}
let table_key = TableKey::parse(&String::from_utf8_lossy(&k))
.context(InvalidCatalogValueSnafu)?;
let table_value = TableValue::parse(&String::from_utf8_lossy(&v))
.context(InvalidCatalogValueSnafu)?;
if table_value.node_id == self.node_id {
yield Ok((table_key, table_value))
}
}
}))
}
/// Fetch catalogs/schemas/tables from remote catalog manager along with max table id allocated.
async fn initiate_catalogs(&self) -> Result<(HashMap<String, CatalogProviderRef>, TableId)> {
let mut res = HashMap::new();
let max_table_id = MIN_USER_TABLE_ID;
// initiate default catalog and schema
let default_catalog = self.initiate_default_catalog().await?;
res.insert(DEFAULT_CATALOG_NAME.to_string(), default_catalog);
info!("Default catalog and schema registered");
let mut catalogs = self.iter_remote_catalogs().await;
while let Some(r) = catalogs.next().await {
let CatalogKey { catalog_name, .. } = r?;
info!("Fetch catalog from metasrv: {}", catalog_name);
let catalog = res
.entry(catalog_name.clone())
.or_insert_with(|| self.new_catalog_provider(&catalog_name))
.clone();
self.initiate_schemas(catalog_name, catalog, max_table_id)
.await?;
}
Ok((res, max_table_id))
}
async fn initiate_schemas(
&self,
catalog_name: String,
catalog: CatalogProviderRef,
max_table_id: TableId,
) -> Result<()> {
let mut schemas = self.iter_remote_schemas(&catalog_name).await;
while let Some(r) = schemas.next().await {
let SchemaKey {
catalog_name,
schema_name,
..
} = r?;
info!("Found schema: {}.{}", catalog_name, schema_name);
let schema = match catalog.schema(&schema_name)? {
None => {
let schema = self.new_schema_provider(&catalog_name, &schema_name);
catalog.register_schema(schema_name.clone(), schema.clone())?;
info!("Registered schema: {}", &schema_name);
schema
}
Some(schema) => schema,
};
info!(
"Fetch schema from metasrv: {}.{}",
&catalog_name, &schema_name
);
self.initiate_tables(&catalog_name, &schema_name, schema, max_table_id)
.await?;
}
Ok(())
}
/// Initiates all tables inside a catalog by fetching data from metasrv.
async fn initiate_tables<'a>(
&'a self,
catalog_name: &'a str,
schema_name: &'a str,
schema: SchemaProviderRef,
mut max_table_id: TableId,
) -> Result<()> {
let mut tables = self.iter_remote_tables(catalog_name, schema_name).await;
while let Some(r) = tables.next().await {
let (table_key, table_value) = r?;
let table_ref = self.open_or_create_table(&table_key, &table_value).await?;
schema.register_table(table_key.table_name.to_string(), table_ref)?;
info!("Registered table {}", &table_key.table_name);
if table_value.id > max_table_id {
info!("Max table id: {} -> {}", max_table_id, table_value.id);
max_table_id = table_value.id;
}
}
Ok(())
}
async fn initiate_default_catalog(&self) -> Result<CatalogProviderRef> {
let default_catalog = self.new_catalog_provider(DEFAULT_CATALOG_NAME);
let default_schema = self.new_schema_provider(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME);
default_catalog.register_schema(DEFAULT_SCHEMA_NAME.to_string(), default_schema)?;
let schema_key = SchemaKey {
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
node_id: self.node_id,
}
.to_string();
self.backend
.set(
schema_key.as_bytes(),
&SchemaValue {}
.to_bytes()
.context(InvalidCatalogValueSnafu)?,
)
.await?;
info!("Registered default schema");
let catalog_key = CatalogKey {
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
node_id: self.node_id,
}
.to_string();
self.backend
.set(
catalog_key.as_bytes(),
&CatalogValue {}
.to_bytes()
.context(InvalidCatalogValueSnafu)?,
)
.await?;
info!("Registered default catalog");
Ok(default_catalog)
}
async fn open_or_create_table(
&self,
table_key: &TableKey,
table_value: &TableValue,
) -> Result<TableRef> {
let context = EngineContext {};
let TableKey {
catalog_name,
schema_name,
table_name,
..
} = table_key;
let TableValue { id, meta, .. } = table_value;
let request = OpenTableRequest {
catalog_name: catalog_name.clone(),
schema_name: schema_name.clone(),
table_name: table_name.clone(),
table_id: *id,
};
match self
.engine
.open_table(&context, request)
.await
.with_context(|_| OpenTableSnafu {
table_info: format!("{}.{}.{}, id:{}", catalog_name, schema_name, table_name, id,),
})? {
Some(table) => Ok(table),
None => {
let req = CreateTableRequest {
id: *id,
catalog_name: catalog_name.clone(),
schema_name: schema_name.clone(),
table_name: table_name.clone(),
desc: None,
schema: Arc::new(Schema::new(meta.schema.column_schemas.clone())),
primary_key_indices: meta.primary_key_indices.clone(),
create_if_not_exists: true,
table_options: meta.options.clone(),
};
self.engine
.create_table(&context, req)
.await
.context(CreateTableSnafu {
table_info: format!(
"{}.{}.{}, id:{}",
&catalog_name, &schema_name, &table_name, id
),
})
}
}
}
}
#[async_trait::async_trait]
impl CatalogManager for RemoteCatalogManager {
async fn start(&self) -> Result<()> {
let (catalogs, max_table_id) = self.initiate_catalogs().await?;
info!(
"Initialized catalogs: {:?}",
catalogs.keys().cloned().collect::<Vec<_>>()
);
self.catalogs.store(Arc::new(catalogs));
self.next_table_id
.store(max_table_id + 1, Ordering::Relaxed);
info!("Max table id allocated: {}", max_table_id);
let mut system_table_requests = self.system_table_requests.lock().await;
handle_system_table_request(self, self.engine.clone(), &mut system_table_requests).await?;
info!("All system table opened");
Ok(())
}
fn next_table_id(&self) -> TableId {
self.next_table_id.fetch_add(1, Ordering::Relaxed)
}
async fn register_table(&self, request: RegisterTableRequest) -> Result<usize> {
let catalog_name = request.catalog;
let schema_name = request.schema;
let catalog_provider = self.catalog(&catalog_name)?.context(CatalogNotFoundSnafu {
catalog_name: &catalog_name,
})?;
let schema_provider =
catalog_provider
.schema(&schema_name)?
.with_context(|| SchemaNotFoundSnafu {
schema_info: format!("{}.{}", &catalog_name, &schema_name),
})?;
if schema_provider.table_exist(&request.table_name)? {
return TableExistsSnafu {
table: format!("{}.{}.{}", &catalog_name, &schema_name, &request.table_name),
}
.fail();
}
schema_provider.register_table(request.table_name, request.table)?;
Ok(1)
}
async fn register_system_table(&self, request: RegisterSystemTableRequest) -> Result<()> {
let mut requests = self.system_table_requests.lock().await;
requests.push(request);
Ok(())
}
fn table(
&self,
catalog_name: &str,
schema_name: &str,
table_name: &str,
) -> Result<Option<TableRef>> {
let catalog = self
.catalog(catalog_name)?
.with_context(|| CatalogNotFoundSnafu { catalog_name })?;
let schema = catalog
.schema(schema_name)?
.with_context(|| SchemaNotFoundSnafu {
schema_info: format!("{}.{}", catalog_name, schema_name),
})?;
schema.table(table_name)
}
}
impl CatalogList for RemoteCatalogManager {
fn as_any(&self) -> &dyn Any {
self
}
fn register_catalog(
&self,
name: String,
catalog: CatalogProviderRef,
) -> Result<Option<CatalogProviderRef>> {
let key = self.build_catalog_key(&name).to_string();
let backend = self.backend.clone();
let mutex = self.mutex.clone();
let catalogs = self.catalogs.clone();
std::thread::spawn(|| {
common_runtime::block_on_write(async move {
let _guard = mutex.lock().await;
backend
.set(
key.as_bytes(),
&CatalogValue {}
.to_bytes()
.context(InvalidCatalogValueSnafu)?,
)
.await?;
let prev_catalogs = catalogs.load();
let mut new_catalogs = HashMap::with_capacity(prev_catalogs.len() + 1);
new_catalogs.clone_from(&prev_catalogs);
let prev = new_catalogs.insert(name, catalog);
catalogs.store(Arc::new(new_catalogs));
Ok(prev)
})
})
.join()
.unwrap()
}
/// List all catalogs from metasrv
fn catalog_names(&self) -> Result<Vec<String>> {
Ok(self.catalogs.load().keys().cloned().collect::<Vec<_>>())
}
/// Read catalog info of given name from metasrv.
fn catalog(&self, name: &str) -> Result<Option<CatalogProviderRef>> {
Ok(self.catalogs.load().get(name).cloned())
}
}
pub struct RemoteCatalogProvider {
catalog_name: String,
node_id: u64,
backend: KvBackendRef,
schemas: Arc<ArcSwap<HashMap<String, SchemaProviderRef>>>,
mutex: Arc<Mutex<()>>,
}
impl RemoteCatalogProvider {
pub fn new(catalog_name: String, node_id: u64, backend: KvBackendRef) -> Self {
Self {
catalog_name,
node_id,
backend,
schemas: Default::default(),
mutex: Default::default(),
}
}
fn build_schema_key(&self, schema_name: impl AsRef<str>) -> SchemaKey {
SchemaKey {
catalog_name: self.catalog_name.clone(),
schema_name: schema_name.as_ref().to_string(),
node_id: self.node_id,
}
}
}
impl CatalogProvider for RemoteCatalogProvider {
fn as_any(&self) -> &dyn Any {
self
}
fn schema_names(&self) -> Result<Vec<String>> {
Ok(self.schemas.load().keys().cloned().collect::<Vec<_>>())
}
fn register_schema(
&self,
name: String,
schema: SchemaProviderRef,
) -> Result<Option<SchemaProviderRef>> {
let key = self.build_schema_key(&name).to_string();
let backend = self.backend.clone();
let mutex = self.mutex.clone();
let schemas = self.schemas.clone();
std::thread::spawn(|| {
common_runtime::block_on_write(async move {
let _guard = mutex.lock().await;
backend
.set(
key.as_bytes(),
&SchemaValue {}
.to_bytes()
.context(InvalidCatalogValueSnafu)?,
)
.await?;
let prev_schemas = schemas.load();
let mut new_schemas = HashMap::with_capacity(prev_schemas.len() + 1);
new_schemas.clone_from(&prev_schemas);
let prev_schema = new_schemas.insert(name, schema);
schemas.store(Arc::new(new_schemas));
Ok(prev_schema)
})
})
.join()
.unwrap()
}
fn schema(&self, name: &str) -> Result<Option<Arc<dyn SchemaProvider>>> {
Ok(self.schemas.load().get(name).cloned())
}
}
pub struct RemoteSchemaProvider {
catalog_name: String,
schema_name: String,
node_id: u64,
backend: KvBackendRef,
tables: Arc<ArcSwap<HashMap<String, TableRef>>>,
mutex: Arc<Mutex<()>>,
}
impl RemoteSchemaProvider {
pub fn new(
catalog_name: String,
schema_name: String,
node_id: u64,
backend: KvBackendRef,
) -> Self {
Self {
catalog_name,
schema_name,
node_id,
backend,
tables: Default::default(),
mutex: Default::default(),
}
}
fn build_table_key(
&self,
table_name: impl AsRef<str>,
table_version: TableVersion,
) -> TableKey {
TableKey {
catalog_name: self.catalog_name.clone(),
schema_name: self.schema_name.clone(),
table_name: table_name.as_ref().to_string(),
version: table_version,
node_id: self.node_id,
}
}
}
impl SchemaProvider for RemoteSchemaProvider {
fn as_any(&self) -> &dyn Any {
self
}
fn table_names(&self) -> Result<Vec<String>> {
Ok(self.tables.load().keys().cloned().collect::<Vec<_>>())
}
fn table(&self, name: &str) -> Result<Option<TableRef>> {
Ok(self.tables.load().get(name).cloned())
}
fn register_table(&self, name: String, table: TableRef) -> Result<Option<TableRef>> {
let table_info = table.table_info();
let table_version = table_info.ident.version;
let table_value = TableValue {
meta: table_info.meta.clone().into(),
id: table_info.ident.table_id,
node_id: self.node_id,
regions_ids: vec![],
};
let backend = self.backend.clone();
let mutex = self.mutex.clone();
let tables = self.tables.clone();
let table_key = self
.build_table_key(name.clone(), table_version)
.to_string();
let prev = std::thread::spawn(move || {
common_runtime::block_on_read(async move {
let _guard = mutex.lock().await;
backend
.set(
table_key.as_bytes(),
&table_value.as_bytes().context(InvalidCatalogValueSnafu)?,
)
.await?;
debug!(
"Successfully set catalog table entry, key: {}, table value: {:?}",
table_key, table_value
);
let prev_tables = tables.load();
let mut new_tables = HashMap::with_capacity(prev_tables.len() + 1);
new_tables.clone_from(&prev_tables);
let prev = new_tables.insert(name, table);
tables.store(Arc::new(new_tables));
Ok(prev)
})
})
.join()
.unwrap();
prev
}
fn deregister_table(&self, name: &str) -> Result<Option<TableRef>> {
let table_version = match self.tables.load().get(name) {
None => return Ok(None),
Some(t) => t.table_info().ident.version,
};
let table_name = name.to_string();
let table_key = self.build_table_key(&table_name, table_version).to_string();
let backend = self.backend.clone();
let mutex = self.mutex.clone();
let tables = self.tables.clone();
let prev = std::thread::spawn(move || {
common_runtime::block_on_read(async move {
let _guard = mutex.lock().await;
backend.delete(table_key.as_bytes()).await?;
debug!(
"Successfully deleted catalog table entry, key: {}",
table_key
);
let prev_tables = tables.load();
let mut new_tables = HashMap::with_capacity(prev_tables.len() + 1);
new_tables.clone_from(&prev_tables);
let prev = new_tables.remove(&table_name);
tables.store(Arc::new(new_tables));
Ok(prev)
})
})
.join()
.unwrap();
prev
}
/// Checks if table exists in schema provider based on locally opened table map.
fn table_exist(&self, name: &str) -> Result<bool> {
Ok(self.tables.load().contains_key(name))
}
}

View File

@@ -12,10 +12,10 @@ pub trait SchemaProvider: Sync + Send {
fn as_any(&self) -> &dyn Any;
/// Retrieves the list of available table names in this schema.
fn table_names(&self) -> Vec<String>;
fn table_names(&self) -> Result<Vec<String>>;
/// Retrieves a specific table from the schema by name, provided it exists.
fn table(&self, name: &str) -> Option<TableRef>;
fn table(&self, name: &str) -> Result<Option<TableRef>>;
/// If supported by the implementation, adds a new table to this schema.
/// If a table of the same name existed before, it returns "Table already exists" error.
@@ -28,7 +28,7 @@ pub trait SchemaProvider: Sync + Send {
/// If supported by the implementation, checks the table exist in the schema provider or not.
/// If no matched table in the schema provider, return false.
/// Otherwise, return true.
fn table_exist(&self, name: &str) -> bool;
fn table_exist(&self, name: &str) -> Result<bool>;
}
pub type SchemaProviderRef = Arc<dyn SchemaProvider>;

View File

@@ -2,7 +2,13 @@ use std::any::Any;
use std::collections::HashMap;
use std::sync::Arc;
use common_catalog::consts::{
INFORMATION_SCHEMA_NAME, SYSTEM_CATALOG_NAME, SYSTEM_CATALOG_TABLE_ID,
SYSTEM_CATALOG_TABLE_NAME,
};
use common_query::logical_plan::Expr;
use common_query::physical_plan::PhysicalPlanRef;
use common_query::physical_plan::RuntimeEnv;
use common_recordbatch::SendableRecordBatchStream;
use common_telemetry::debug;
use common_time::timestamp::Timestamp;
@@ -13,16 +19,12 @@ use datatypes::vectors::{BinaryVector, TimestampVector, UInt8Vector};
use serde::{Deserialize, Serialize};
use snafu::{ensure, OptionExt, ResultExt};
use table::engine::{EngineContext, TableEngineRef};
use table::metadata::TableId;
use table::metadata::{TableId, TableInfoRef};
use table::requests::{CreateTableRequest, InsertRequest, OpenTableRequest};
use table::{Table, TableRef};
use crate::consts::{
INFORMATION_SCHEMA_NAME, SYSTEM_CATALOG_NAME, SYSTEM_CATALOG_TABLE_ID,
SYSTEM_CATALOG_TABLE_NAME,
};
use crate::error::{
CreateSystemCatalogSnafu, EmptyValueSnafu, Error, InvalidEntryTypeSnafu, InvalidKeySnafu,
self, CreateSystemCatalogSnafu, EmptyValueSnafu, Error, InvalidEntryTypeSnafu, InvalidKeySnafu,
OpenSystemCatalogSnafu, Result, ValueDeserializeSnafu,
};
@@ -32,7 +34,7 @@ pub const TIMESTAMP_INDEX: usize = 2;
pub const VALUE_INDEX: usize = 3;
pub struct SystemCatalogTable {
schema: SchemaRef,
table_info: TableInfoRef,
pub table: TableRef,
}
@@ -43,7 +45,7 @@ impl Table for SystemCatalogTable {
}
fn schema(&self) -> SchemaRef {
self.schema.clone()
self.table_info.meta.schema.clone()
}
async fn scan(
@@ -51,7 +53,7 @@ impl Table for SystemCatalogTable {
_projection: &Option<Vec<usize>>,
_filters: &[Expr],
_limit: Option<usize>,
) -> table::Result<SendableRecordBatchStream> {
) -> table::Result<PhysicalPlanRef> {
panic!("System catalog table does not support scan!")
}
@@ -59,6 +61,10 @@ impl Table for SystemCatalogTable {
async fn insert(&self, request: InsertRequest) -> table::error::Result<usize> {
self.table.insert(request).await
}
fn table_info(&self) -> TableInfoRef {
self.table_info.clone()
}
}
impl SystemCatalogTable {
@@ -77,13 +83,16 @@ impl SystemCatalogTable {
.await
.context(OpenSystemCatalogSnafu)?
{
Ok(Self { table, schema })
Ok(Self {
table_info: table.table_info(),
table,
})
} else {
// system catalog table is not yet created, try to create
let request = CreateTableRequest {
id: SYSTEM_CATALOG_TABLE_ID,
catalog_name: Some(SYSTEM_CATALOG_NAME.to_string()),
schema_name: Some(INFORMATION_SCHEMA_NAME.to_string()),
catalog_name: SYSTEM_CATALOG_NAME.to_string(),
schema_name: INFORMATION_SCHEMA_NAME.to_string(),
table_name: SYSTEM_CATALOG_TABLE_NAME.to_string(),
desc: Some("System catalog table".to_string()),
schema: schema.clone(),
@@ -96,14 +105,23 @@ impl SystemCatalogTable {
.create_table(&ctx, request)
.await
.context(CreateSystemCatalogSnafu)?;
Ok(Self { table, schema })
let table_info = table.table_info();
Ok(Self { table, table_info })
}
}
/// Create a stream of all entries inside system catalog table
pub async fn records(&self) -> Result<SendableRecordBatchStream> {
let full_projection = None;
let stream = self.table.scan(&full_projection, &[], None).await.unwrap();
let scan = self
.table
.scan(&full_projection, &[], None)
.await
.context(error::SystemCatalogTableScanSnafu)?;
let stream = scan
.execute(0, Arc::new(RuntimeEnv::default()))
.await
.context(error::SystemCatalogTableScanExecSnafu)?;
Ok(stream)
}
}
@@ -153,7 +171,7 @@ fn build_system_catalog_schema() -> Schema {
// The schema of this table must be valid.
SchemaBuilder::try_from(cols)
.unwrap()
.timestamp_index(2)
.timestamp_index(Some(2))
.build()
.unwrap()
}
@@ -320,6 +338,16 @@ pub struct TableEntryValue {
#[cfg(test)]
mod tests {
use log_store::fs::noop::NoopLogStore;
use object_store::ObjectStore;
use storage::config::EngineConfig as StorageEngineConfig;
use storage::EngineImpl;
use table::metadata::TableType;
use table::metadata::TableType::Base;
use table_engine::config::EngineConfig;
use table_engine::engine::MitoEngine;
use tempdir::TempDir;
use super::*;
#[test]
@@ -391,4 +419,43 @@ mod tests {
assert_eq!(EntryType::Table, EntryType::try_from(3).unwrap());
assert!(EntryType::try_from(4).is_err());
}
pub async fn prepare_table_engine() -> (TempDir, TableEngineRef) {
let dir = TempDir::new("system-table-test").unwrap();
let store_dir = dir.path().to_string_lossy();
let accessor = opendal::services::fs::Builder::default()
.root(&store_dir)
.build()
.unwrap();
let object_store = ObjectStore::new(accessor);
let table_engine = Arc::new(MitoEngine::new(
EngineConfig::default(),
EngineImpl::new(
StorageEngineConfig::default(),
Arc::new(NoopLogStore::default()),
object_store.clone(),
),
object_store,
));
(dir, table_engine)
}
#[tokio::test]
async fn test_system_table_type() {
let (_dir, table_engine) = prepare_table_engine().await;
let system_table = SystemCatalogTable::new(table_engine).await.unwrap();
assert_eq!(Base, system_table.table_type());
}
#[tokio::test]
async fn test_system_table_info() {
let (_dir, table_engine) = prepare_table_engine().await;
let system_table = SystemCatalogTable::new(table_engine).await.unwrap();
let info = system_table.table_info();
assert_eq!(TableType::Base, info.table_type);
assert_eq!(SYSTEM_CATALOG_TABLE_NAME, info.name);
assert_eq!(SYSTEM_CATALOG_TABLE_ID, info.ident.table_id);
assert_eq!(SYSTEM_CATALOG_NAME, info.catalog_name);
assert_eq!(INFORMATION_SCHEMA_NAME, info.schema_name);
}
}

View File

@@ -6,9 +6,12 @@ use std::sync::Arc;
use std::task::{Context, Poll};
use async_stream::stream;
use common_catalog::consts::{INFORMATION_SCHEMA_NAME, SYSTEM_CATALOG_TABLE_NAME};
use common_error::ext::BoxedError;
use common_query::logical_plan::Expr;
use common_query::physical_plan::PhysicalPlanRef;
use common_recordbatch::error::Result as RecordBatchResult;
use common_recordbatch::{RecordBatch, RecordBatchStream, SendableRecordBatchStream};
use common_recordbatch::{RecordBatch, RecordBatchStream};
use datatypes::prelude::{ConcreteDataType, VectorBuilder};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::value::Value;
@@ -16,11 +19,12 @@ use datatypes::vectors::VectorRef;
use futures::Stream;
use snafu::ResultExt;
use table::engine::TableEngineRef;
use table::metadata::TableId;
use table::error::TablesRecordBatchSnafu;
use table::metadata::{TableId, TableInfoRef};
use table::table::scan::SimpleTableScan;
use table::{Table, TableRef};
use crate::consts::{INFORMATION_SCHEMA_NAME, SYSTEM_CATALOG_TABLE_NAME};
use crate::error::InsertTableRecordSnafu;
use crate::error::{Error, InsertTableRecordSnafu};
use crate::system::{build_table_insert_request, SystemCatalogTable};
use crate::{
format_full_table_name, CatalogListRef, CatalogProvider, SchemaProvider, SchemaProviderRef,
@@ -53,23 +57,53 @@ impl Table for Tables {
self.schema.clone()
}
fn table_info(&self) -> TableInfoRef {
unreachable!("Tables does not support table_info method")
}
async fn scan(
&self,
_projection: &Option<Vec<usize>>,
_filters: &[Expr],
_limit: Option<usize>,
) -> table::error::Result<SendableRecordBatchStream> {
) -> table::error::Result<PhysicalPlanRef> {
let catalogs = self.catalogs.clone();
let schema_ref = self.schema.clone();
let engine_name = self.engine_name.clone();
let stream = stream!({
for catalog_name in catalogs.catalog_names() {
let catalog = catalogs.catalog(&catalog_name).unwrap();
for schema_name in catalog.schema_names() {
let mut tables_in_schema = Vec::with_capacity(catalog.schema_names().len());
let schema = catalog.schema(&schema_name).unwrap();
for table_name in schema.table_names() {
for catalog_name in catalogs
.catalog_names()
.map_err(BoxedError::new)
.context(TablesRecordBatchSnafu)?
{
let catalog = catalogs
.catalog(&catalog_name)
.map_err(BoxedError::new)
.context(TablesRecordBatchSnafu)?
.unwrap();
for schema_name in catalog
.schema_names()
.map_err(BoxedError::new)
.context(TablesRecordBatchSnafu)?
{
let mut tables_in_schema = Vec::with_capacity(
catalog
.schema_names()
.map_err(BoxedError::new)
.context(TablesRecordBatchSnafu)?
.len(),
);
let schema = catalog
.schema(&schema_name)
.map_err(BoxedError::new)
.context(TablesRecordBatchSnafu)?
.unwrap();
for table_name in schema
.table_names()
.map_err(BoxedError::new)
.context(TablesRecordBatchSnafu)?
{
tables_in_schema.push(table_name);
}
@@ -85,10 +119,11 @@ impl Table for Tables {
}
});
Ok(Box::pin(TablesRecordBatchStream {
let stream = Box::pin(TablesRecordBatchStream {
schema: self.schema.clone(),
stream: Box::pin(stream),
}))
});
Ok(Arc::new(SimpleTableScan::new(stream)))
}
}
@@ -152,17 +187,20 @@ impl SchemaProvider for InformationSchema {
self
}
fn table_names(&self) -> Vec<String> {
vec!["tables".to_string(), SYSTEM_CATALOG_TABLE_NAME.to_string()]
fn table_names(&self) -> Result<Vec<String>, Error> {
Ok(vec![
"tables".to_string(),
SYSTEM_CATALOG_TABLE_NAME.to_string(),
])
}
fn table(&self, name: &str) -> Option<TableRef> {
fn table(&self, name: &str) -> Result<Option<TableRef>, Error> {
if name.eq_ignore_ascii_case("tables") {
Some(self.tables.clone())
Ok(Some(self.tables.clone()))
} else if name.eq_ignore_ascii_case(SYSTEM_CATALOG_TABLE_NAME) {
Some(self.system.clone())
Ok(Some(self.system.clone()))
} else {
None
Ok(None)
}
}
@@ -178,8 +216,9 @@ impl SchemaProvider for InformationSchema {
panic!("System catalog & schema does not support deregister table")
}
fn table_exist(&self, name: &str) -> bool {
name.eq_ignore_ascii_case("tables") || name.eq_ignore_ascii_case(SYSTEM_CATALOG_TABLE_NAME)
fn table_exist(&self, name: &str) -> Result<bool, Error> {
Ok(name.eq_ignore_ascii_case("tables")
|| name.eq_ignore_ascii_case(SYSTEM_CATALOG_TABLE_NAME))
}
}
@@ -224,23 +263,23 @@ impl CatalogProvider for SystemCatalog {
self
}
fn schema_names(&self) -> Vec<String> {
vec![INFORMATION_SCHEMA_NAME.to_string()]
fn schema_names(&self) -> Result<Vec<String>, Error> {
Ok(vec![INFORMATION_SCHEMA_NAME.to_string()])
}
fn register_schema(
&self,
_name: String,
_schema: SchemaProviderRef,
) -> Option<SchemaProviderRef> {
) -> Result<Option<SchemaProviderRef>, Error> {
panic!("System catalog does not support registering schema!")
}
fn schema(&self, name: &str) -> Option<Arc<dyn SchemaProvider>> {
fn schema(&self, name: &str) -> Result<Option<Arc<dyn SchemaProvider>>, Error> {
if name.eq_ignore_ascii_case(INFORMATION_SCHEMA_NAME) {
Some(self.information_schema.clone())
Ok(Some(self.information_schema.clone()))
} else {
None
Ok(None)
}
}
}
@@ -273,13 +312,16 @@ fn build_schema_for_tables() -> Schema {
#[cfg(test)]
mod tests {
use common_query::physical_plan::RuntimeEnv;
use datatypes::arrow::array::Utf8Array;
use datatypes::arrow::datatypes::DataType;
use futures_util::StreamExt;
use table::table::numbers::NumbersTable;
use super::*;
use crate::memory::{new_memory_catalog_list, MemoryCatalogProvider, MemorySchemaProvider};
use crate::local::memory::{
new_memory_catalog_list, MemoryCatalogProvider, MemorySchemaProvider,
};
use crate::CatalogList;
#[tokio::test]
@@ -290,11 +332,19 @@ mod tests {
schema
.register_table("test_table".to_string(), Arc::new(NumbersTable::default()))
.unwrap();
catalog_provider.register_schema("test_schema".to_string(), schema);
catalog_list.register_catalog("test_catalog".to_string(), catalog_provider);
catalog_provider
.register_schema("test_schema".to_string(), schema)
.unwrap();
catalog_list
.register_catalog("test_catalog".to_string(), catalog_provider)
.unwrap();
let tables = Tables::new(catalog_list, "test_engine".to_string());
let mut tables_stream = tables.scan(&None, &[], None).await.unwrap();
let tables_stream = tables.scan(&None, &[], None).await.unwrap();
let mut tables_stream = tables_stream
.execute(0, Arc::new(RuntimeEnv::default()))
.await
.unwrap();
if let Some(t) = tables_stream.next().await {
let batch = t.unwrap().df_recordbatch;
assert_eq!(1, batch.num_rows());

163
src/catalog/tests/mock.rs Normal file
View File

@@ -0,0 +1,163 @@
use std::collections::{BTreeMap, HashMap};
use std::fmt::{Display, Formatter};
use std::str::FromStr;
use std::sync::Arc;
use async_stream::stream;
use catalog::error::Error;
use catalog::remote::{Kv, KvBackend, ValueIter};
use common_recordbatch::RecordBatch;
use common_telemetry::logging::info;
use datatypes::data_type::ConcreteDataType;
use datatypes::schema::{ColumnSchema, Schema};
use datatypes::vectors::StringVector;
use serde::Serializer;
use table::engine::{EngineContext, TableEngine};
use table::metadata::TableId;
use table::requests::{AlterTableRequest, CreateTableRequest, DropTableRequest, OpenTableRequest};
use table::test_util::MemTable;
use table::TableRef;
use tokio::sync::RwLock;
#[derive(Default)]
pub struct MockKvBackend {
map: RwLock<BTreeMap<Vec<u8>, Vec<u8>>>,
}
impl Display for MockKvBackend {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
futures::executor::block_on(async {
let map = self.map.read().await;
for (k, v) in map.iter() {
f.serialize_str(&String::from_utf8_lossy(k))?;
f.serialize_str(" -> ")?;
f.serialize_str(&String::from_utf8_lossy(v))?;
f.serialize_str("\n")?;
}
Ok(())
})
}
}
#[async_trait::async_trait]
impl KvBackend for MockKvBackend {
fn range<'a, 'b>(&'a self, key: &[u8]) -> ValueIter<'b, Error>
where
'a: 'b,
{
let prefix = key.to_vec();
let prefix_string = String::from_utf8_lossy(&prefix).to_string();
Box::pin(stream!({
let maps = self.map.read().await.clone();
for (k, v) in maps.range(prefix.clone()..) {
let key_string = String::from_utf8_lossy(k).to_string();
let matches = key_string.starts_with(&prefix_string);
if matches {
yield Ok(Kv(k.clone(), v.clone()))
} else {
info!("Stream finished");
return;
}
}
}))
}
async fn set(&self, key: &[u8], val: &[u8]) -> Result<(), Error> {
let mut map = self.map.write().await;
map.insert(key.to_vec(), val.to_vec());
Ok(())
}
async fn delete_range(&self, key: &[u8], end: &[u8]) -> Result<(), Error> {
let start = key.to_vec();
let end = end.to_vec();
let range = start..end;
let mut map = self.map.write().await;
map.retain(|k, _| !range.contains(k));
Ok(())
}
}
#[derive(Default)]
pub struct MockTableEngine {
tables: RwLock<HashMap<String, TableRef>>,
}
#[async_trait::async_trait]
impl TableEngine for MockTableEngine {
fn name(&self) -> &str {
"MockTableEngine"
}
/// Create a table with only one column
async fn create_table(
&self,
_ctx: &EngineContext,
request: CreateTableRequest,
) -> table::Result<TableRef> {
let table_name = request.table_name.clone();
let catalog_name = request.catalog_name.clone();
let schema_name = request.schema_name.clone();
let default_table_id = "0".to_owned();
let table_id = TableId::from_str(
request
.table_options
.get("table_id")
.unwrap_or(&default_table_id),
)
.unwrap();
let schema = Arc::new(Schema::new(vec![ColumnSchema::new(
"name",
ConcreteDataType::string_datatype(),
true,
)]));
let data = vec![Arc::new(StringVector::from(vec!["a", "b", "c"])) as _];
let record_batch = RecordBatch::new(schema, data).unwrap();
let table: TableRef = Arc::new(MemTable::new_with_catalog(
&table_name,
record_batch,
table_id,
catalog_name,
schema_name,
)) as Arc<_>;
let mut tables = self.tables.write().await;
tables.insert(table_name, table.clone() as TableRef);
Ok(table)
}
async fn open_table(
&self,
_ctx: &EngineContext,
request: OpenTableRequest,
) -> table::Result<Option<TableRef>> {
Ok(self.tables.read().await.get(&request.table_name).cloned())
}
async fn alter_table(
&self,
_ctx: &EngineContext,
_request: AlterTableRequest,
) -> table::Result<TableRef> {
unimplemented!()
}
fn get_table(&self, _ctx: &EngineContext, name: &str) -> table::Result<Option<TableRef>> {
futures::executor::block_on(async { Ok(self.tables.read().await.get(name).cloned()) })
}
fn table_exists(&self, _ctx: &EngineContext, name: &str) -> bool {
futures::executor::block_on(async { self.tables.read().await.contains_key(name) })
}
async fn drop_table(
&self,
_ctx: &EngineContext,
_request: DropTableRequest,
) -> table::Result<()> {
unimplemented!()
}
}

View File

@@ -0,0 +1,274 @@
#![feature(assert_matches)]
mod mock;
#[cfg(test)]
mod tests {
use std::assert_matches::assert_matches;
use std::collections::HashSet;
use std::sync::Arc;
use catalog::remote::{
KvBackend, KvBackendRef, RemoteCatalogManager, RemoteCatalogProvider, RemoteSchemaProvider,
};
use catalog::{CatalogManager, CatalogManagerRef, RegisterTableRequest};
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_catalog::{CatalogKey, CatalogValue, SchemaKey, SchemaValue};
use datatypes::schema::Schema;
use futures_util::StreamExt;
use table::engine::{EngineContext, TableEngineRef};
use table::requests::CreateTableRequest;
use crate::mock::{MockKvBackend, MockTableEngine};
#[tokio::test]
async fn test_backend() {
common_telemetry::init_default_ut_logging();
let node_id = 42;
let backend = MockKvBackend::default();
let default_catalog_key = CatalogKey {
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
node_id,
}
.to_string();
backend
.set(
default_catalog_key.as_bytes(),
&CatalogValue {}.to_bytes().unwrap(),
)
.await
.unwrap();
let schema_key = SchemaKey {
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
node_id,
}
.to_string();
backend
.set(schema_key.as_bytes(), &SchemaValue {}.to_bytes().unwrap())
.await
.unwrap();
let mut iter = backend.range("__c-".as_bytes());
let mut res = HashSet::new();
while let Some(r) = iter.next().await {
let kv = r.unwrap();
res.insert(String::from_utf8_lossy(&kv.0).to_string());
}
assert_eq!(
vec!["__c-greptime-42".to_string()],
res.into_iter().collect::<Vec<_>>()
);
}
async fn prepare_components(node_id: u64) -> (KvBackendRef, TableEngineRef, CatalogManagerRef) {
let backend = Arc::new(MockKvBackend::default()) as KvBackendRef;
let table_engine = Arc::new(MockTableEngine::default());
let catalog_manager =
RemoteCatalogManager::new(table_engine.clone(), node_id, backend.clone());
catalog_manager.start().await.unwrap();
(backend, table_engine, Arc::new(catalog_manager))
}
#[tokio::test]
async fn test_remote_catalog_default() {
common_telemetry::init_default_ut_logging();
let node_id = 42;
let (_, _, catalog_manager) = prepare_components(node_id).await;
assert_eq!(
vec![DEFAULT_CATALOG_NAME.to_string()],
catalog_manager.catalog_names().unwrap()
);
let default_catalog = catalog_manager
.catalog(DEFAULT_CATALOG_NAME)
.unwrap()
.unwrap();
assert_eq!(
vec![DEFAULT_SCHEMA_NAME.to_string()],
default_catalog.schema_names().unwrap()
);
}
#[tokio::test]
async fn test_remote_catalog_register_nonexistent() {
common_telemetry::init_default_ut_logging();
let node_id = 42;
let (_, table_engine, catalog_manager) = prepare_components(node_id).await;
// register a new table with an nonexistent catalog
let catalog_name = "nonexistent_catalog".to_string();
let schema_name = "nonexistent_schema".to_string();
let table_name = "fail_table".to_string();
// this schema has no effect
let table_schema = Arc::new(Schema::new(vec![]));
let table = table_engine
.create_table(
&EngineContext {},
CreateTableRequest {
id: 1,
catalog_name: catalog_name.clone(),
schema_name: schema_name.clone(),
table_name: table_name.clone(),
desc: None,
schema: table_schema.clone(),
primary_key_indices: vec![],
create_if_not_exists: false,
table_options: Default::default(),
},
)
.await
.unwrap();
let reg_req = RegisterTableRequest {
catalog: catalog_name,
schema: schema_name,
table_name,
table_id: 1,
table,
};
let res = catalog_manager.register_table(reg_req).await;
// because nonexistent_catalog does not exist yet.
assert_matches!(
res.err().unwrap(),
catalog::error::Error::CatalogNotFound { .. }
);
}
#[tokio::test]
async fn test_register_table() {
let node_id = 42;
let (_, table_engine, catalog_manager) = prepare_components(node_id).await;
let default_catalog = catalog_manager
.catalog(DEFAULT_CATALOG_NAME)
.unwrap()
.unwrap();
assert_eq!(
vec![DEFAULT_SCHEMA_NAME.to_string()],
default_catalog.schema_names().unwrap()
);
let default_schema = default_catalog
.schema(DEFAULT_SCHEMA_NAME)
.unwrap()
.unwrap();
assert_eq!(Vec::<String>::new(), default_schema.table_names().unwrap());
// register a new table with an nonexistent catalog
let catalog_name = DEFAULT_CATALOG_NAME.to_string();
let schema_name = DEFAULT_SCHEMA_NAME.to_string();
let table_name = "test_table".to_string();
let table_id = 1;
// this schema has no effect
let table_schema = Arc::new(Schema::new(vec![]));
let table = table_engine
.create_table(
&EngineContext {},
CreateTableRequest {
id: table_id,
catalog_name: catalog_name.clone(),
schema_name: schema_name.clone(),
table_name: table_name.clone(),
desc: None,
schema: table_schema.clone(),
primary_key_indices: vec![],
create_if_not_exists: false,
table_options: Default::default(),
},
)
.await
.unwrap();
let reg_req = RegisterTableRequest {
catalog: catalog_name,
schema: schema_name,
table_name: table_name.clone(),
table_id,
table,
};
assert_eq!(1, catalog_manager.register_table(reg_req).await.unwrap());
assert_eq!(vec![table_name], default_schema.table_names().unwrap());
}
#[tokio::test]
async fn test_register_catalog_schema_table() {
let node_id = 42;
let (backend, table_engine, catalog_manager) = prepare_components(node_id).await;
let catalog_name = "test_catalog".to_string();
let schema_name = "nonexistent_schema".to_string();
let catalog = Arc::new(RemoteCatalogProvider::new(
catalog_name.clone(),
node_id,
backend.clone(),
));
// register catalog to catalog manager
catalog_manager
.register_catalog(catalog_name.clone(), catalog)
.unwrap();
assert_eq!(
HashSet::<String>::from_iter(
vec![DEFAULT_CATALOG_NAME.to_string(), catalog_name.clone()].into_iter()
),
HashSet::from_iter(catalog_manager.catalog_names().unwrap().into_iter())
);
let table_to_register = table_engine
.create_table(
&EngineContext {},
CreateTableRequest {
id: 2,
catalog_name: catalog_name.clone(),
schema_name: schema_name.clone(),
table_name: "".to_string(),
desc: None,
schema: Arc::new(Schema::new(vec![])),
primary_key_indices: vec![],
create_if_not_exists: false,
table_options: Default::default(),
},
)
.await
.unwrap();
let reg_req = RegisterTableRequest {
catalog: catalog_name.clone(),
schema: schema_name.clone(),
table_name: " fail_table".to_string(),
table_id: 2,
table: table_to_register,
};
// this register will fail since schema does not exist yet
assert_matches!(
catalog_manager
.register_table(reg_req.clone())
.await
.unwrap_err(),
catalog::error::Error::SchemaNotFound { .. }
);
let new_catalog = catalog_manager
.catalog(&catalog_name)
.unwrap()
.expect("catalog should exist since it's already registered");
let schema = Arc::new(RemoteSchemaProvider::new(
catalog_name.clone(),
schema_name.clone(),
node_id,
backend.clone(),
));
let prev = new_catalog
.register_schema(schema_name.clone(), schema.clone())
.expect("Register schema should not fail");
assert!(prev.is_none());
assert_eq!(1, catalog_manager.register_table(reg_req).await.unwrap());
assert_eq!(
HashSet::from([schema_name.clone()]),
new_catalog.schema_names().unwrap().into_iter().collect()
)
}
}

View File

@@ -7,20 +7,34 @@ edition = "2021"
[dependencies]
api = { path = "../api" }
async-stream = "0.3"
catalog = { path = "../catalog" }
common-base = { path = "../common/base" }
common-error = { path = "../common/error" }
common-grpc = { path = "../common/grpc" }
common-query = { path = "../common/query" }
common-recordbatch = { path = "../common/recordbatch" }
common-time = { path = "../common/time" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = ["simd"] }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
"simd",
] }
datatypes = { path = "../datatypes" }
enum_dispatch = "0.3"
parking_lot = "0.12"
rand = "0.8"
snafu = { version = "0.7", features = ["backtraces"] }
tonic = "0.8"
[dev-dependencies]
datanode = { path = "../datanode" }
substrait = { path = "../common/substrait" }
tokio = { version = "1.0", features = ["full"] }
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
# TODO(ruihang): upgrade to 0.11 once substrait-rs supports it.
[dev-dependencies.prost_09]
package = "prost"
version = "0.9"
[dev-dependencies.substrait_proto]
package = "substrait"
version = "0.2"

View File

@@ -1,3 +1,5 @@
use std::collections::HashMap;
use api::v1::{codec::InsertBatch, *};
use client::{Client, Database};
@@ -10,7 +12,7 @@ fn main() {
#[tokio::main]
async fn run() {
let client = Client::connect("http://127.0.0.1:3001").await.unwrap();
let client = Client::with_urls(vec!["127.0.0.1:3001"]);
let db = Database::new("greptime", client);
let expr = InsertExpr {
@@ -18,6 +20,7 @@ async fn run() {
expr: Some(insert_expr::Expr::Values(insert_expr::Values {
values: insert_batches(),
})),
options: HashMap::default(),
};
db.insert(expr).await.unwrap();
}

View File

@@ -0,0 +1,96 @@
use api::v1::{ColumnDataType, ColumnDef, CreateExpr};
use client::{admin::Admin, Client, Database};
use prost_09::Message;
use substrait_proto::protobuf::{
plan_rel::RelType as PlanRelType,
read_rel::{NamedTable, ReadType},
rel::RelType,
PlanRel, ReadRel, Rel,
};
use tracing::{event, Level};
fn main() {
tracing::subscriber::set_global_default(tracing_subscriber::FmtSubscriber::builder().finish())
.unwrap();
run();
}
#[tokio::main]
async fn run() {
let client = Client::with_urls(vec!["127.0.0.1:3001"]);
let create_table_expr = CreateExpr {
catalog_name: Some("greptime".to_string()),
schema_name: Some("public".to_string()),
table_name: "test_logical_dist_exec".to_string(),
desc: None,
column_defs: vec![
ColumnDef {
name: "timestamp".to_string(),
datatype: ColumnDataType::Timestamp as i32,
is_nullable: false,
default_constraint: None,
},
ColumnDef {
name: "key".to_string(),
datatype: ColumnDataType::Uint64 as i32,
is_nullable: false,
default_constraint: None,
},
ColumnDef {
name: "value".to_string(),
datatype: ColumnDataType::Uint64 as i32,
is_nullable: false,
default_constraint: None,
},
],
time_index: "timestamp".to_string(),
primary_keys: vec!["key".to_string()],
create_if_not_exists: false,
table_options: Default::default(),
};
let admin = Admin::new("create table", client.clone());
let result = admin.create(create_table_expr).await.unwrap();
event!(Level::INFO, "create table result: {:#?}", result);
let logical = mock_logical_plan();
event!(Level::INFO, "plan size: {:#?}", logical.len());
let db = Database::new("greptime", client);
let result = db.logical_plan(logical).await.unwrap();
event!(Level::INFO, "result: {:#?}", result);
}
fn mock_logical_plan() -> Vec<u8> {
let catalog_name = "greptime".to_string();
let schema_name = "public".to_string();
let table_name = "test_logical_dist_exec".to_string();
let named_table = NamedTable {
names: vec![catalog_name, schema_name, table_name],
advanced_extension: None,
};
let read_type = ReadType::NamedTable(named_table);
let read_rel = ReadRel {
common: None,
base_schema: None,
filter: None,
projection: None,
advanced_extension: None,
read_type: Some(read_type),
};
let mut buf = vec![];
let rel = Rel {
rel_type: Some(RelType::Read(Box::new(read_rel))),
};
let plan_rel = PlanRel {
rel_type: Some(PlanRelType::Rel(rel)),
};
plan_rel.encode(&mut buf).unwrap();
buf
}

View File

@@ -16,7 +16,7 @@ fn main() {
#[tokio::main]
async fn run() {
let client = Client::connect("http://127.0.0.1:3001").await.unwrap();
let client = Client::with_urls(vec!["127.0.0.1:3001"]);
let db = Database::new("greptime", client);
let physical = mock_physical_plan();

View File

@@ -10,7 +10,7 @@ fn main() {
#[tokio::main]
async fn run() {
let client = Client::connect("http://127.0.0.1:3001").await.unwrap();
let client = Client::with_urls(vec!["127.0.0.1:3001"]);
let db = Database::new("greptime", client);
let sql = Select::Sql("select * from demo".to_string());

View File

@@ -22,10 +22,6 @@ impl Admin {
}
}
pub async fn start(&mut self, url: impl Into<String>) -> Result<()> {
self.client.start(url).await
}
pub async fn create(&self, expr: CreateExpr) -> Result<AdminResult> {
let header = ExprHeader {
version: PROTOCOL_VERSION,

View File

@@ -1,47 +1,96 @@
use api::v1::{greptime_client::GreptimeClient, *};
use snafu::{OptionExt, ResultExt};
use std::sync::Arc;
use api::v1::greptime_client::GreptimeClient;
use api::v1::*;
use common_grpc::channel_manager::ChannelManager;
use parking_lot::RwLock;
use snafu::OptionExt;
use snafu::ResultExt;
use tonic::transport::Channel;
use crate::error;
use crate::load_balance::LoadBalance;
use crate::load_balance::Loadbalancer;
use crate::Result;
#[derive(Clone, Debug, Default)]
pub struct Client {
client: Option<GreptimeClient<Channel>>,
inner: Arc<Inner>,
}
#[derive(Debug, Default)]
struct Inner {
channel_manager: ChannelManager,
peers: Arc<RwLock<Vec<String>>>,
load_balance: Loadbalancer,
}
impl Inner {
fn with_manager(channel_manager: ChannelManager) -> Self {
Self {
channel_manager,
..Default::default()
}
}
fn set_peers(&self, peers: Vec<String>) {
let mut guard = self.peers.write();
*guard = peers;
}
fn get_peer(&self) -> Option<String> {
let guard = self.peers.read();
self.load_balance.get_peer(&guard).cloned()
}
}
impl Client {
pub async fn start(&mut self, url: impl Into<String>) -> Result<()> {
match self.client.as_ref() {
None => {
let url = url.into();
let client = GreptimeClient::connect(url.clone())
.await
.context(error::ConnectFailedSnafu { url })?;
self.client = Some(client);
Ok(())
}
Some(_) => error::IllegalGrpcClientStateSnafu {
err_msg: "already started",
}
.fail(),
}
pub fn new() -> Self {
Default::default()
}
pub fn with_client(client: GreptimeClient<Channel>) -> Self {
pub fn with_manager(channel_manager: ChannelManager) -> Self {
let inner = Arc::new(Inner::with_manager(channel_manager));
Self { inner }
}
pub fn with_urls<U, A>(urls: A) -> Self
where
U: AsRef<str>,
A: AsRef<[U]>,
{
Self::with_manager_and_urls(ChannelManager::new(), urls)
}
pub fn with_manager_and_urls<U, A>(channel_manager: ChannelManager, urls: A) -> Self
where
U: AsRef<str>,
A: AsRef<[U]>,
{
let inner = Inner::with_manager(channel_manager);
let urls: Vec<String> = urls
.as_ref()
.iter()
.map(|peer| peer.as_ref().to_string())
.collect();
inner.set_peers(urls);
Self {
client: Some(client),
inner: Arc::new(inner),
}
}
pub async fn connect(url: impl Into<String>) -> Result<Self> {
let url = url.into();
let client = GreptimeClient::connect(url.clone())
.await
.context(error::ConnectFailedSnafu { url })?;
Ok(Self {
client: Some(client),
})
pub fn start<U, A>(&self, urls: A)
where
U: AsRef<str>,
A: AsRef<[U]>,
{
let urls: Vec<String> = urls
.as_ref()
.iter()
.map(|peer| peer.as_ref().to_string())
.collect();
self.inner.set_peers(urls);
}
pub async fn admin(&self, req: AdminRequest) -> Result<AdminResponse> {
@@ -73,18 +122,59 @@ impl Client {
}
pub async fn batch(&self, req: BatchRequest) -> Result<BatchResponse> {
if let Some(client) = self.client.as_ref() {
let res = client
.clone()
.batch(req)
.await
.context(error::TonicStatusSnafu)?;
Ok(res.into_inner())
} else {
error::IllegalGrpcClientStateSnafu {
err_msg: "not started",
}
.fail()
let peer = self
.inner
.get_peer()
.context(error::IllegalGrpcClientStateSnafu {
err_msg: "No available peer found",
})?;
let mut client = self.make_client(peer)?;
let result = client.batch(req).await.context(error::TonicStatusSnafu)?;
Ok(result.into_inner())
}
fn make_client(&self, addr: impl AsRef<str>) -> Result<GreptimeClient<Channel>> {
let addr = addr.as_ref();
let channel = self
.inner
.channel_manager
.get(addr)
.context(error::CreateChannelSnafu { addr })?;
Ok(GreptimeClient::new(channel))
}
}
#[cfg(test)]
mod tests {
use std::collections::HashSet;
use super::Inner;
use crate::load_balance::Loadbalancer;
fn mock_peers() -> Vec<String> {
vec![
"127.0.0.1:3001".to_string(),
"127.0.0.1:3002".to_string(),
"127.0.0.1:3003".to_string(),
]
}
#[test]
fn test_inner() {
let inner = Inner::default();
assert!(matches!(
inner.load_balance,
Loadbalancer::Random(crate::load_balance::Random)
));
assert!(inner.get_peer().is_none());
let peers = mock_peers();
inner.set_peers(peers.clone());
let all: HashSet<String> = peers.into_iter().collect();
for _ in 0..20 {
assert!(all.contains(&inner.get_peer().unwrap()));
}
}
}

View File

@@ -23,10 +23,7 @@ use snafu::{ensure, OptionExt, ResultExt};
use crate::error;
use crate::{
error::{
ConvertSchemaSnafu, DatanodeSnafu, DecodeSelectSnafu, EncodePhysicalSnafu,
MissingFieldSnafu,
},
error::{ConvertSchemaSnafu, DatanodeSnafu, DecodeSelectSnafu, EncodePhysicalSnafu},
Client, Result,
};
@@ -46,10 +43,6 @@ impl Database {
}
}
pub async fn start(&mut self, url: impl Into<String>) -> Result<()> {
self.client.start(url).await
}
pub fn name(&self) -> &str {
&self.name
}
@@ -65,6 +58,24 @@ impl Database {
self.object(expr).await?.try_into()
}
pub async fn batch_insert(&self, insert_exprs: Vec<InsertExpr>) -> Result<Vec<ObjectResult>> {
let header = ExprHeader {
version: PROTOCOL_VERSION,
};
let obj_exprs = insert_exprs
.into_iter()
.map(|expr| ObjectExpr {
header: Some(header.clone()),
expr: Some(object_expr::Expr::Insert(expr)),
})
.collect();
self.objects(obj_exprs)
.await?
.into_iter()
.map(|result| result.try_into())
.collect()
}
pub async fn select(&self, expr: Select) -> Result<ObjectResult> {
let select_expr = match expr {
Select::Sql(sql) => SelectExpr {
@@ -92,6 +103,13 @@ impl Database {
self.do_select(select_expr).await
}
pub async fn logical_plan(&self, logical_plan: Vec<u8>) -> Result<ObjectResult> {
let select_expr = SelectExpr {
expr: Some(select_expr::Expr::LogicalPlan(logical_plan)),
};
self.do_select(select_expr).await
}
async fn do_select(&self, select_expr: SelectExpr) -> Result<ObjectResult> {
let header = ExprHeader {
version: PROTOCOL_VERSION,
@@ -222,12 +240,8 @@ impl TryFrom<ObjectResult> for Output {
}
fn column_to_vector(column: &Column, rows: u32) -> Result<VectorRef> {
let wrapper = ColumnDataTypeWrapper::try_new(
column
.datatype
.context(MissingFieldSnafu { field: "datatype" })?,
)
.context(error::ColumnDataTypeSnafu)?;
let wrapper =
ColumnDataTypeWrapper::try_new(column.datatype).context(error::ColumnDataTypeSnafu)?;
let column_datatype = wrapper.datatype();
let rows = rows as usize;
@@ -330,7 +344,7 @@ mod tests {
#[test]
fn test_column_to_vector() {
let mut column = create_test_column(Arc::new(BooleanVector::from(vec![true])));
column.datatype = Some(-100);
column.datatype = -100;
let result = column_to_vector(&column, 1);
assert!(result.is_err());
assert_eq!(
@@ -408,7 +422,7 @@ mod tests {
semantic_type: 1,
values: Some(values(&[array.clone()]).unwrap()),
null_mask: null_mask(&vec![array], vector.len()),
datatype: Some(wrapper.datatype() as i32),
datatype: wrapper.datatype() as i32,
}
}
}

View File

@@ -85,6 +85,17 @@ pub enum Error {
#[snafu(backtrace)]
source: datatypes::error::Error,
},
#[snafu(display(
"Failed to create gRPC channel, peer address: {}, source: {}",
addr,
source
))]
CreateChannel {
addr: String,
#[snafu(backtrace)]
source: common_grpc::error::Error,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -107,6 +118,7 @@ impl ErrorExt for Error {
source.status_code()
}
Error::CreateRecordBatches { source } => source.status_code(),
Error::CreateChannel { source, .. } => source.status_code(),
Error::IllegalGrpcClientState { .. } => StatusCode::Unexpected,
}
}

View File

@@ -2,6 +2,7 @@ pub mod admin;
mod client;
mod database;
mod error;
pub mod load_balance;
pub use self::{
client::Client,

View File

@@ -0,0 +1,52 @@
use enum_dispatch::enum_dispatch;
use rand::seq::SliceRandom;
#[enum_dispatch]
pub trait LoadBalance {
fn get_peer<'a>(&self, peers: &'a [String]) -> Option<&'a String>;
}
#[enum_dispatch(LoadBalance)]
#[derive(Debug)]
pub enum Loadbalancer {
Random,
}
impl Default for Loadbalancer {
fn default() -> Self {
Loadbalancer::from(Random)
}
}
#[derive(Debug)]
pub struct Random;
impl LoadBalance for Random {
fn get_peer<'a>(&self, peers: &'a [String]) -> Option<&'a String> {
peers.choose(&mut rand::thread_rng())
}
}
#[cfg(test)]
mod tests {
use std::collections::HashSet;
use super::{LoadBalance, Random};
#[test]
fn test_random_lb() {
let peers = vec![
"127.0.0.1:3001".to_string(),
"127.0.0.1:3002".to_string(),
"127.0.0.1:3003".to_string(),
"127.0.0.1:3004".to_string(),
];
let all: HashSet<String> = peers.clone().into_iter().collect();
let random = Random;
for _ in 0..100 {
let peer = random.get_peer(&peers).unwrap();
all.contains(peer);
}
}
}

View File

@@ -14,6 +14,7 @@ common-telemetry = { path = "../common/telemetry", features = ["deadlock_detecti
datanode = { path = "../datanode" }
frontend = { path = "../frontend" }
futures = "0.3"
meta-srv = { path = "../meta-srv" }
snafu = { version = "0.7", features = ["backtraces"] }
tokio = { version = "1.18", features = ["full"] }
toml = "0.5"

View File

@@ -4,7 +4,9 @@ use clap::Parser;
use cmd::datanode;
use cmd::error::Result;
use cmd::frontend;
use common_telemetry::{self, logging::error, logging::info};
use cmd::metasrv;
use common_telemetry::logging::error;
use common_telemetry::logging::info;
#[derive(Parser)]
#[clap(name = "greptimedb")]
@@ -29,6 +31,8 @@ enum SubCommand {
Datanode(datanode::Command),
#[clap(name = "frontend")]
Frontend(frontend::Command),
#[clap(name = "metasrv")]
Metasrv(metasrv::Command),
}
impl SubCommand {
@@ -36,6 +40,7 @@ impl SubCommand {
match self {
SubCommand::Datanode(cmd) => cmd.run().await,
SubCommand::Frontend(cmd) => cmd.run().await,
SubCommand::Metasrv(cmd) => cmd.run().await,
}
}
}
@@ -45,6 +50,7 @@ impl fmt::Display for SubCommand {
match self {
SubCommand::Datanode(..) => write!(f, "greptime-datanode"),
SubCommand::Frontend(..) => write!(f, "greptime-frontend"),
SubCommand::Metasrv(..) => write!(f, "greptime-metasrv"),
}
}
}

View File

@@ -17,14 +17,24 @@ pub enum Error {
source: frontend::error::Error,
},
#[snafu(display("Failed to start meta server, source: {}", source))]
StartMetaServer {
#[snafu(backtrace)]
source: meta_srv::error::Error,
},
#[snafu(display("Failed to read config file: {}, source: {}", path, source))]
ReadConfig {
source: std::io::Error,
path: String,
source: std::io::Error,
backtrace: Backtrace,
},
#[snafu(display("Failed to parse config, source: {}", source))]
ParseConfig { source: toml::de::Error },
ParseConfig {
source: toml::de::Error,
backtrace: Backtrace,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -34,6 +44,7 @@ impl ErrorExt for Error {
match self {
Error::StartDatanode { source } => source.status_code(),
Error::StartFrontend { source } => source.status_code(),
Error::StartMetaServer { source } => source.status_code(),
Error::ReadConfig { .. } | Error::ParseConfig { .. } => StatusCode::InvalidArguments,
}
}
@@ -51,18 +62,68 @@ impl ErrorExt for Error {
mod tests {
use super::*;
fn raise_read_config_error() -> std::result::Result<(), std::io::Error> {
Err(std::io::ErrorKind::NotFound.into())
type StdResult<E> = std::result::Result<(), E>;
#[test]
fn test_start_node_error() {
fn throw_datanode_error() -> StdResult<datanode::error::Error> {
datanode::error::MissingFieldSnafu {
field: "test_field",
}
.fail()
}
let e = throw_datanode_error()
.context(StartDatanodeSnafu)
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
}
#[test]
fn test_error() {
let e = raise_read_config_error()
fn test_start_frontend_error() {
fn throw_frontend_error() -> StdResult<frontend::error::Error> {
frontend::error::InvalidSqlSnafu { err_msg: "failed" }.fail()
}
let e = throw_frontend_error()
.context(StartFrontendSnafu)
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
}
#[test]
fn test_start_metasrv_error() {
fn throw_metasrv_error() -> StdResult<meta_srv::error::Error> {
meta_srv::error::StreamNoneSnafu {}.fail()
}
let e = throw_metasrv_error()
.context(StartMetaServerSnafu)
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::Internal);
}
#[test]
fn test_read_config_error() {
fn throw_read_config_error() -> StdResult<std::io::Error> {
Err(std::io::ErrorKind::NotFound.into())
}
let e = throw_read_config_error()
.context(ReadConfigSnafu { path: "test" })
.err()
.unwrap();
assert!(e.backtrace_opt().is_none());
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
}
}

View File

@@ -1,6 +1,8 @@
use clap::Parser;
use frontend::frontend::{Frontend, FrontendOptions};
use frontend::grpc::GrpcOptions;
use frontend::influxdb::InfluxdbOptions;
use frontend::instance::Instance;
use frontend::mysql::MysqlOptions;
use frontend::opentsdb::OpentsdbOptions;
use frontend::postgres::PostgresOptions;
@@ -55,7 +57,7 @@ struct StartCommand {
impl StartCommand {
async fn run(self) -> Result<()> {
let opts = self.try_into()?;
let mut frontend = Frontend::new(opts);
let mut frontend = Frontend::new(opts, Instance::new());
frontend.start().await.context(error::StartFrontendSnafu)
}
}
@@ -74,7 +76,10 @@ impl TryFrom<StartCommand> for FrontendOptions {
opts.http_addr = Some(addr);
}
if let Some(addr) = cmd.grpc_addr {
opts.grpc_addr = Some(addr);
opts.grpc_options = Some(GrpcOptions {
addr,
..Default::default()
});
}
if let Some(addr) = cmd.mysql_addr {
opts.mysql_options = Some(MysqlOptions {
@@ -130,7 +135,10 @@ mod tests {
);
let default_opts = FrontendOptions::default();
assert_eq!(opts.grpc_addr, default_opts.grpc_addr);
assert_eq!(
opts.grpc_options.unwrap().addr,
default_opts.grpc_options.unwrap().addr
);
assert_eq!(
opts.mysql_options.as_ref().unwrap().runtime_size,
default_opts.mysql_options.as_ref().unwrap().runtime_size

View File

@@ -1,4 +1,5 @@
pub mod datanode;
pub mod error;
pub mod frontend;
pub mod metasrv;
mod toml_loader;

122
src/cmd/src/metasrv.rs Normal file
View File

@@ -0,0 +1,122 @@
use clap::Parser;
use common_telemetry::logging;
use meta_srv::bootstrap;
use meta_srv::metasrv::MetaSrvOptions;
use snafu::ResultExt;
use crate::error;
use crate::error::Error;
use crate::error::Result;
use crate::toml_loader;
#[derive(Parser)]
pub struct Command {
#[clap(subcommand)]
subcmd: SubCommand,
}
impl Command {
pub async fn run(self) -> Result<()> {
self.subcmd.run().await
}
}
#[derive(Parser)]
enum SubCommand {
Start(StartCommand),
}
impl SubCommand {
async fn run(self) -> Result<()> {
match self {
SubCommand::Start(cmd) => cmd.run().await,
}
}
}
#[derive(Debug, Parser)]
struct StartCommand {
#[clap(long)]
bind_addr: Option<String>,
#[clap(long)]
server_addr: Option<String>,
#[clap(long)]
store_addr: Option<String>,
#[clap(short, long)]
config_file: Option<String>,
}
impl StartCommand {
async fn run(self) -> Result<()> {
logging::info!("MetaSrv start command: {:#?}", self);
let opts: MetaSrvOptions = self.try_into()?;
logging::info!("MetaSrv options: {:#?}", opts);
bootstrap::bootstrap_meta_srv(opts)
.await
.context(error::StartMetaServerSnafu)
}
}
impl TryFrom<StartCommand> for MetaSrvOptions {
type Error = Error;
fn try_from(cmd: StartCommand) -> Result<Self> {
let mut opts: MetaSrvOptions = if let Some(path) = cmd.config_file {
toml_loader::from_file!(&path)?
} else {
MetaSrvOptions::default()
};
if let Some(addr) = cmd.bind_addr {
opts.bind_addr = addr;
}
if let Some(addr) = cmd.server_addr {
opts.server_addr = addr;
}
if let Some(addr) = cmd.store_addr {
opts.store_addr = addr;
}
Ok(opts)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_read_from_cmd() {
let cmd = StartCommand {
bind_addr: Some("127.0.0.1:3002".to_string()),
server_addr: Some("0.0.0.0:3002".to_string()),
store_addr: Some("127.0.0.1:2380".to_string()),
config_file: None,
};
let options: MetaSrvOptions = cmd.try_into().unwrap();
assert_eq!("127.0.0.1:3002".to_string(), options.bind_addr);
assert_eq!("0.0.0.0:3002".to_string(), options.server_addr);
assert_eq!("127.0.0.1:2380".to_string(), options.store_addr);
}
#[test]
fn test_read_from_config_file() {
let cmd = StartCommand {
bind_addr: None,
server_addr: None,
store_addr: None,
config_file: Some(format!(
"{}/../../config/metasrv.example.toml",
std::env::current_dir().unwrap().as_path().to_str().unwrap()
)),
};
let options: MetaSrvOptions = cmd.try_into().unwrap();
assert_eq!("127.0.0.1:3002".to_string(), options.bind_addr);
assert_eq!("0.0.0.0:3002".to_string(), options.server_addr);
assert_eq!("127.0.0.1:2380".to_string(), options.store_addr);
assert_eq!(30, options.datanode_lease_secs);
}
}

View File

@@ -0,0 +1,22 @@
[package]
name = "common-catalog"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
async-trait = "0.1"
common-error = { path = "../error" }
common-telemetry = { path = "../telemetry" }
datatypes = { path = "../../datatypes" }
lazy_static = "1.4"
regex = "1.6"
serde = "1.0"
serde_json = "1.0"
snafu = { version = "0.7", features = ["backtraces"] }
table = { path = "../../table" }
[dev-dependencies]
chrono = "0.4"
tempdir = "0.3"
tokio = { version = "1.0", features = ["full"] }

View File

@@ -11,3 +11,7 @@ pub const MIN_USER_TABLE_ID: u32 = 1024;
pub const SYSTEM_CATALOG_TABLE_ID: u32 = 0;
/// scripts table id
pub const SCRIPTS_TABLE_ID: u32 = 1;
pub(crate) const CATALOG_KEY_PREFIX: &str = "__c";
pub(crate) const SCHEMA_KEY_PREFIX: &str = "__s";
pub(crate) const TABLE_KEY_PREFIX: &str = "__t";

View File

@@ -0,0 +1,49 @@
use std::any::Any;
use common_error::ext::ErrorExt;
use common_error::prelude::{Snafu, StatusCode};
use snafu::{Backtrace, ErrorCompat};
#[derive(Debug, Snafu)]
#[snafu(visibility(pub))]
pub enum Error {
#[snafu(display("Invalid catalog info: {}", key))]
InvalidCatalog { key: String, backtrace: Backtrace },
#[snafu(display("Failed to deserialize catalog entry value: {}", raw))]
DeserializeCatalogEntryValue {
raw: String,
backtrace: Backtrace,
source: serde_json::error::Error,
},
#[snafu(display("Failed to serialize catalog entry value"))]
SerializeCatalogEntryValue {
backtrace: Backtrace,
source: serde_json::error::Error,
},
#[snafu(display("Failed to parse node id: {}", key))]
ParseNodeId { key: String, backtrace: Backtrace },
}
impl ErrorExt for Error {
fn status_code(&self) -> StatusCode {
match self {
Error::InvalidCatalog { .. }
| Error::DeserializeCatalogEntryValue { .. }
| Error::SerializeCatalogEntryValue { .. } => StatusCode::Unexpected,
Error::ParseNodeId { .. } => StatusCode::InvalidArguments,
}
}
fn backtrace_opt(&self) -> Option<&Backtrace> {
ErrorCompat::backtrace(self)
}
fn as_any(&self) -> &dyn Any {
self
}
}
pub type Result<T> = std::result::Result<T, Error>;

View File

@@ -0,0 +1,293 @@
use std::fmt::{Display, Formatter};
use std::str::FromStr;
use lazy_static::lazy_static;
use regex::Regex;
use serde::{Deserialize, Serialize, Serializer};
use snafu::{ensure, OptionExt, ResultExt};
use table::metadata::{RawTableMeta, TableId, TableVersion};
use crate::consts::{CATALOG_KEY_PREFIX, SCHEMA_KEY_PREFIX, TABLE_KEY_PREFIX};
use crate::error::{
DeserializeCatalogEntryValueSnafu, Error, InvalidCatalogSnafu, ParseNodeIdSnafu,
SerializeCatalogEntryValueSnafu,
};
lazy_static! {
static ref CATALOG_KEY_PATTERN: Regex =
Regex::new(&format!("^{}-([a-zA-Z_]+)-([0-9]+)$", CATALOG_KEY_PREFIX)).unwrap();
}
lazy_static! {
static ref SCHEMA_KEY_PATTERN: Regex = Regex::new(&format!(
"^{}-([a-zA-Z_]+)-([a-zA-Z_]+)-([0-9]+)$",
SCHEMA_KEY_PREFIX
))
.unwrap();
}
lazy_static! {
static ref TABLE_KEY_PATTERN: Regex = Regex::new(&format!(
"^{}-([a-zA-Z_]+)-([a-zA-Z_]+)-([a-zA-Z_]+)-([0-9]+)-([0-9]+)$",
TABLE_KEY_PREFIX
))
.unwrap();
}
pub fn build_catalog_prefix() -> String {
format!("{}-", CATALOG_KEY_PREFIX)
}
pub fn build_schema_prefix(catalog_name: impl AsRef<str>) -> String {
format!("{}-{}-", SCHEMA_KEY_PREFIX, catalog_name.as_ref())
}
pub fn build_table_prefix(catalog_name: impl AsRef<str>, schema_name: impl AsRef<str>) -> String {
format!(
"{}-{}-{}-",
TABLE_KEY_PREFIX,
catalog_name.as_ref(),
schema_name.as_ref()
)
}
pub struct TableKey {
pub catalog_name: String,
pub schema_name: String,
pub table_name: String,
pub version: TableVersion,
pub node_id: u64,
}
impl Display for TableKey {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_str(TABLE_KEY_PREFIX)?;
f.write_str("-")?;
f.write_str(&self.catalog_name)?;
f.write_str("-")?;
f.write_str(&self.schema_name)?;
f.write_str("-")?;
f.write_str(&self.table_name)?;
f.write_str("-")?;
f.serialize_u64(self.version)?;
f.write_str("-")?;
f.serialize_u64(self.node_id)
}
}
impl TableKey {
pub fn parse<S: AsRef<str>>(s: S) -> Result<Self, Error> {
let key = s.as_ref();
let captures = TABLE_KEY_PATTERN
.captures(key)
.context(InvalidCatalogSnafu { key })?;
ensure!(captures.len() == 6, InvalidCatalogSnafu { key });
let version =
u64::from_str(&captures[4]).map_err(|_| InvalidCatalogSnafu { key }.build())?;
let node_id_str = captures[5].to_string();
let node_id = u64::from_str(&node_id_str)
.map_err(|_| ParseNodeIdSnafu { key: node_id_str }.build())?;
Ok(Self {
catalog_name: captures[1].to_string(),
schema_name: captures[2].to_string(),
table_name: captures[3].to_string(),
version,
node_id,
})
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct TableValue {
pub id: TableId,
pub node_id: u64,
pub regions_ids: Vec<u64>,
pub meta: RawTableMeta,
}
impl TableValue {
pub fn parse(s: impl AsRef<str>) -> Result<Self, Error> {
serde_json::from_str(s.as_ref())
.context(DeserializeCatalogEntryValueSnafu { raw: s.as_ref() })
}
pub fn as_bytes(&self) -> Result<Vec<u8>, Error> {
Ok(serde_json::to_string(self)
.context(SerializeCatalogEntryValueSnafu)?
.into_bytes())
}
}
pub struct CatalogKey {
pub catalog_name: String,
pub node_id: u64,
}
impl Display for CatalogKey {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_str(CATALOG_KEY_PREFIX)?;
f.write_str("-")?;
f.write_str(&self.catalog_name)?;
f.write_str("-")?;
f.serialize_u64(self.node_id)
}
}
impl CatalogKey {
pub fn parse(s: impl AsRef<str>) -> Result<Self, Error> {
let key = s.as_ref();
let captures = CATALOG_KEY_PATTERN
.captures(key)
.context(InvalidCatalogSnafu { key })?;
ensure!(captures.len() == 3, InvalidCatalogSnafu { key });
let node_id_str = captures[2].to_string();
let node_id = u64::from_str(&node_id_str)
.map_err(|_| ParseNodeIdSnafu { key: node_id_str }.build())?;
Ok(Self {
catalog_name: captures[1].to_string(),
node_id,
})
}
}
#[derive(Debug, Serialize, Deserialize)]
pub struct CatalogValue;
impl CatalogValue {
pub fn to_bytes(&self) -> Result<Vec<u8>, Error> {
Ok(serde_json::to_string(self)
.context(SerializeCatalogEntryValueSnafu)?
.into_bytes())
}
}
pub struct SchemaKey {
pub catalog_name: String,
pub schema_name: String,
pub node_id: u64,
}
impl Display for SchemaKey {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_str(SCHEMA_KEY_PREFIX)?;
f.write_str("-")?;
f.write_str(&self.catalog_name)?;
f.write_str("-")?;
f.write_str(&self.schema_name)?;
f.write_str("-")?;
f.serialize_u64(self.node_id)
}
}
impl SchemaKey {
pub fn parse(s: impl AsRef<str>) -> Result<Self, Error> {
let key = s.as_ref();
let captures = SCHEMA_KEY_PATTERN
.captures(key)
.context(InvalidCatalogSnafu { key })?;
ensure!(captures.len() == 4, InvalidCatalogSnafu { key });
let node_id_str = captures[3].to_string();
let node_id = u64::from_str(&node_id_str)
.map_err(|_| ParseNodeIdSnafu { key: node_id_str }.build())?;
Ok(Self {
catalog_name: captures[1].to_string(),
schema_name: captures[2].to_string(),
node_id,
})
}
}
#[derive(Debug, Serialize, Deserialize)]
pub struct SchemaValue;
impl SchemaValue {
pub fn to_bytes(&self) -> Result<Vec<u8>, Error> {
Ok(serde_json::to_string(self)
.context(SerializeCatalogEntryValueSnafu)?
.into_bytes())
}
}
#[cfg(test)]
mod tests {
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::{ColumnSchema, RawSchema, Schema};
use super::*;
#[test]
fn test_parse_catalog_key() {
let key = "__c-C-2";
let catalog_key = CatalogKey::parse(key).unwrap();
assert_eq!("C", catalog_key.catalog_name);
assert_eq!(2, catalog_key.node_id);
assert_eq!(key, catalog_key.to_string());
}
#[test]
fn test_parse_schema_key() {
let key = "__s-C-S-3";
let schema_key = SchemaKey::parse(key).unwrap();
assert_eq!("C", schema_key.catalog_name);
assert_eq!("S", schema_key.schema_name);
assert_eq!(3, schema_key.node_id);
assert_eq!(key, schema_key.to_string());
}
#[test]
fn test_parse_table_key() {
let key = "__t-C-S-T-42-1";
let entry = TableKey::parse(key).unwrap();
assert_eq!("C", entry.catalog_name);
assert_eq!("S", entry.schema_name);
assert_eq!("T", entry.table_name);
assert_eq!(1, entry.node_id);
assert_eq!(42, entry.version);
assert_eq!(key, &entry.to_string());
}
#[test]
fn test_build_prefix() {
assert_eq!("__c-", build_catalog_prefix());
assert_eq!("__s-CATALOG-", build_schema_prefix("CATALOG"));
assert_eq!(
"__t-CATALOG-SCHEMA-",
build_table_prefix("CATALOG", "SCHEMA")
);
}
#[test]
fn test_serialize_schema() {
let schema = Schema::new(vec![ColumnSchema::new(
"name",
ConcreteDataType::string_datatype(),
true,
)]);
let meta = RawTableMeta {
schema: RawSchema::from(&schema),
engine: "mito".to_string(),
created_on: chrono::DateTime::default(),
primary_key_indices: vec![0, 1],
next_column_id: 3,
engine_options: Default::default(),
value_indices: vec![2, 3],
options: Default::default(),
};
let value = TableValue {
id: 42,
node_id: 32,
regions_ids: vec![1, 2, 3],
meta,
};
let serialized = serde_json::to_string(&value).unwrap();
let deserialized = TableValue::parse(&serialized).unwrap();
assert_eq!(value, deserialized);
}
}

View File

@@ -0,0 +1,8 @@
pub mod consts;
pub mod error;
mod helper;
pub use helper::{
build_catalog_prefix, build_schema_prefix, build_table_prefix, CatalogKey, CatalogValue,
SchemaKey, SchemaValue, TableKey, TableValue,
};

View File

@@ -7,12 +7,12 @@ edition = "2021"
proc-macro = true
[dependencies]
common-query = { path = "../query" }
datatypes = { path = "../../datatypes" }
quote = "1.0"
snafu = { version = "0.7", features = ["backtraces"] }
syn = "1.0"
[dev-dependencies]
arc-swap = "1.0"
common-query = { path = "../query" }
datatypes = { path = "../../datatypes" }
snafu = { version = "0.7", features = ["backtraces"] }
static_assertions = "1.1.0"

View File

@@ -8,8 +8,12 @@ api = { path = "../../api" }
async-trait = "0.1"
common-base = { path = "../base" }
common-error = { path = "../error" }
common-runtime = { path = "../runtime" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = ["simd"] }
snafu = { version = "0.7", features = ["backtraces"] }
tokio = { version = "1.0", features = ["full"] }
tonic = "0.8"
tower = "0.4"
[dependencies.arrow]
package = "arrow2"

View File

@@ -0,0 +1,530 @@
use std::collections::HashMap;
use std::sync::Arc;
use std::sync::Mutex;
use std::time::Duration;
use snafu::ResultExt;
use tonic::transport::Channel as InnerChannel;
use tonic::transport::Endpoint;
use tonic::transport::Uri;
use tower::make::MakeConnection;
use crate::error;
use crate::error::Result;
const RECYCLE_CHANNEL_INTERVAL_SECS: u64 = 60;
#[derive(Clone, Debug)]
pub struct ChannelManager {
config: ChannelConfig,
pool: Arc<Mutex<Pool>>,
}
impl Default for ChannelManager {
fn default() -> Self {
ChannelManager::with_config(ChannelConfig::default())
}
}
impl ChannelManager {
pub fn new() -> Self {
Default::default()
}
pub fn with_config(config: ChannelConfig) -> Self {
let pool = Pool {
channels: HashMap::default(),
};
let pool = Arc::new(Mutex::new(pool));
let cloned_pool = pool.clone();
common_runtime::spawn_bg(async move {
recycle_channel_in_loop(cloned_pool, RECYCLE_CHANNEL_INTERVAL_SECS).await;
});
Self { pool, config }
}
pub fn config(&self) -> &ChannelConfig {
&self.config
}
pub fn get(&self, addr: impl AsRef<str>) -> Result<InnerChannel> {
let addr = addr.as_ref();
let mut pool = self.pool.lock().unwrap();
if let Some(ch) = pool.get_mut(addr) {
ch.access += 1;
return Ok(ch.channel.clone());
}
let endpoint = self.build_endpoint(addr)?;
let inner_channel = endpoint.connect_lazy();
let channel = Channel {
channel: inner_channel.clone(),
access: 1,
use_default_connector: true,
};
pool.put(addr, channel);
Ok(inner_channel)
}
pub fn reset_with_connector<C>(
&self,
addr: impl AsRef<str>,
connector: C,
) -> Result<InnerChannel>
where
C: MakeConnection<Uri> + Send + 'static,
C::Connection: Unpin + Send + 'static,
C::Future: Send + 'static,
Box<dyn std::error::Error + Send + Sync>: From<C::Error> + Send + 'static,
{
let addr = addr.as_ref();
let endpoint = self.build_endpoint(addr)?;
let inner_channel = endpoint.connect_with_connector_lazy(connector);
let channel = Channel {
channel: inner_channel.clone(),
access: 1,
use_default_connector: false,
};
let mut pool = self.pool.lock().unwrap();
pool.put(addr, channel);
Ok(inner_channel)
}
pub fn retain_channel<F>(&self, f: F)
where
F: FnMut(&String, &mut Channel) -> bool,
{
let mut pool = self.pool.lock().unwrap();
pool.retain_channel(f);
}
fn build_endpoint(&self, addr: &str) -> Result<Endpoint> {
let mut endpoint =
Endpoint::new(format!("http://{}", addr)).context(error::CreateChannelSnafu)?;
if let Some(dur) = self.config.timeout {
endpoint = endpoint.timeout(dur);
}
if let Some(dur) = self.config.connect_timeout {
endpoint = endpoint.connect_timeout(dur);
}
if let Some(limit) = self.config.concurrency_limit {
endpoint = endpoint.concurrency_limit(limit);
}
if let Some((limit, dur)) = self.config.rate_limit {
endpoint = endpoint.rate_limit(limit, dur);
}
if let Some(size) = self.config.initial_stream_window_size {
endpoint = endpoint.initial_stream_window_size(size);
}
if let Some(size) = self.config.initial_connection_window_size {
endpoint = endpoint.initial_connection_window_size(size);
}
if let Some(dur) = self.config.http2_keep_alive_interval {
endpoint = endpoint.http2_keep_alive_interval(dur);
}
if let Some(dur) = self.config.http2_keep_alive_timeout {
endpoint = endpoint.keep_alive_timeout(dur);
}
if let Some(enabled) = self.config.http2_keep_alive_while_idle {
endpoint = endpoint.keep_alive_while_idle(enabled);
}
if let Some(enabled) = self.config.http2_adaptive_window {
endpoint = endpoint.http2_adaptive_window(enabled);
}
endpoint = endpoint
.tcp_keepalive(self.config.tcp_keepalive)
.tcp_nodelay(self.config.tcp_nodelay);
Ok(endpoint)
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ChannelConfig {
pub timeout: Option<Duration>,
pub connect_timeout: Option<Duration>,
pub concurrency_limit: Option<usize>,
pub rate_limit: Option<(u64, Duration)>,
pub initial_stream_window_size: Option<u32>,
pub initial_connection_window_size: Option<u32>,
pub http2_keep_alive_interval: Option<Duration>,
pub http2_keep_alive_timeout: Option<Duration>,
pub http2_keep_alive_while_idle: Option<bool>,
pub http2_adaptive_window: Option<bool>,
pub tcp_keepalive: Option<Duration>,
pub tcp_nodelay: bool,
}
impl Default for ChannelConfig {
fn default() -> Self {
Self {
timeout: None,
connect_timeout: None,
concurrency_limit: None,
rate_limit: None,
initial_stream_window_size: None,
initial_connection_window_size: None,
http2_keep_alive_interval: None,
http2_keep_alive_timeout: None,
http2_keep_alive_while_idle: None,
http2_adaptive_window: None,
tcp_keepalive: None,
tcp_nodelay: true,
}
}
}
impl ChannelConfig {
pub fn new() -> Self {
Default::default()
}
/// A timeout to each request.
pub fn timeout(self, timeout: Duration) -> Self {
Self {
timeout: Some(timeout),
..self
}
}
/// A timeout to connecting to the uri.
///
/// Defaults to no timeout.
pub fn connect_timeout(self, timeout: Duration) -> Self {
Self {
connect_timeout: Some(timeout),
..self
}
}
/// A concurrency limit to each request.
pub fn concurrency_limit(self, limit: usize) -> Self {
Self {
concurrency_limit: Some(limit),
..self
}
}
/// A rate limit to each request.
pub fn rate_limit(self, limit: u64, duration: Duration) -> Self {
Self {
rate_limit: Some((limit, duration)),
..self
}
}
/// Sets the SETTINGS_INITIAL_WINDOW_SIZE option for HTTP2 stream-level flow control.
/// Default is 65,535
pub fn initial_stream_window_size(self, size: u32) -> Self {
Self {
initial_stream_window_size: Some(size),
..self
}
}
/// Sets the max connection-level flow control for HTTP2
///
/// Default is 65,535
pub fn initial_connection_window_size(self, size: u32) -> Self {
Self {
initial_connection_window_size: Some(size),
..self
}
}
/// Set http2 KEEP_ALIVE_INTERVAL. Uses hypers default otherwise.
pub fn http2_keep_alive_interval(self, duration: Duration) -> Self {
Self {
http2_keep_alive_interval: Some(duration),
..self
}
}
/// Set http2 KEEP_ALIVE_TIMEOUT. Uses hypers default otherwise.
pub fn http2_keep_alive_timeout(self, duration: Duration) -> Self {
Self {
http2_keep_alive_timeout: Some(duration),
..self
}
}
/// Set http2 KEEP_ALIVE_WHILE_IDLE. Uses hypers default otherwise.
pub fn http2_keep_alive_while_idle(self, enabled: bool) -> Self {
Self {
http2_keep_alive_while_idle: Some(enabled),
..self
}
}
/// Sets whether to use an adaptive flow control. Uses hypers default otherwise.
pub fn http2_adaptive_window(self, enabled: bool) -> Self {
Self {
http2_adaptive_window: Some(enabled),
..self
}
}
/// Set whether TCP keepalive messages are enabled on accepted connections.
///
/// If None is specified, keepalive is disabled, otherwise the duration specified
/// will be the time to remain idle before sending TCP keepalive probes.
///
/// Default is no keepalive (None)
pub fn tcp_keepalive(self, duration: Duration) -> Self {
Self {
tcp_keepalive: Some(duration),
..self
}
}
/// Set the value of TCP_NODELAY option for accepted connections.
///
/// Enabled by default.
pub fn tcp_nodelay(self, enabled: bool) -> Self {
Self {
tcp_nodelay: enabled,
..self
}
}
}
#[derive(Debug)]
pub struct Channel {
channel: InnerChannel,
access: usize,
use_default_connector: bool,
}
impl Channel {
#[inline]
pub fn access(&self) -> usize {
self.access
}
#[inline]
pub fn use_default_connector(&self) -> bool {
self.use_default_connector
}
}
#[derive(Debug)]
struct Pool {
channels: HashMap<String, Channel>,
}
impl Pool {
#[inline]
fn get_mut(&mut self, addr: &str) -> Option<&mut Channel> {
self.channels.get_mut(addr)
}
#[inline]
fn put(&mut self, addr: &str, channel: Channel) {
self.channels.insert(addr.to_string(), channel);
}
#[inline]
fn retain_channel<F>(&mut self, f: F)
where
F: FnMut(&String, &mut Channel) -> bool,
{
self.channels.retain(f);
}
}
async fn recycle_channel_in_loop(pool: Arc<Mutex<Pool>>, interval_secs: u64) {
let mut interval = tokio::time::interval(Duration::from_secs(interval_secs));
loop {
interval.tick().await;
let mut pool = pool.lock().unwrap();
pool.retain_channel(|_, c| {
if c.access == 0 {
false
} else {
c.access = 0;
true
}
})
}
}
#[cfg(test)]
mod tests {
use tower::service_fn;
use super::*;
#[should_panic]
#[test]
fn test_invalid_addr() {
let pool = Pool {
channels: HashMap::default(),
};
let pool = Arc::new(Mutex::new(pool));
let mgr = ChannelManager {
pool,
..Default::default()
};
let addr = "http://test";
let _ = mgr.get(addr).unwrap();
}
#[tokio::test]
async fn test_access_count() {
let pool = Pool {
channels: HashMap::default(),
};
let pool = Arc::new(Mutex::new(pool));
let config = ChannelConfig::new();
let mgr = ChannelManager { pool, config };
let addr = "test_uri";
for i in 0..10 {
{
let _ = mgr.get(addr).unwrap();
let mut pool = mgr.pool.lock().unwrap();
assert_eq!(i + 1, pool.get_mut(addr).unwrap().access);
}
}
let mut pool = mgr.pool.lock().unwrap();
assert_eq!(10, pool.get_mut(addr).unwrap().access);
pool.retain_channel(|_, c| {
if c.access == 0 {
false
} else {
c.access = 0;
true
}
});
assert_eq!(0, pool.get_mut(addr).unwrap().access);
}
#[test]
fn test_config() {
let default_cfg = ChannelConfig::new();
assert_eq!(
ChannelConfig {
timeout: None,
connect_timeout: None,
concurrency_limit: None,
rate_limit: None,
initial_stream_window_size: None,
initial_connection_window_size: None,
http2_keep_alive_interval: None,
http2_keep_alive_timeout: None,
http2_keep_alive_while_idle: None,
http2_adaptive_window: None,
tcp_keepalive: None,
tcp_nodelay: true,
},
default_cfg
);
let cfg = default_cfg
.timeout(Duration::from_secs(3))
.connect_timeout(Duration::from_secs(5))
.concurrency_limit(6)
.rate_limit(5, Duration::from_secs(1))
.initial_stream_window_size(10)
.initial_connection_window_size(20)
.http2_keep_alive_interval(Duration::from_secs(1))
.http2_keep_alive_timeout(Duration::from_secs(3))
.http2_keep_alive_while_idle(true)
.http2_adaptive_window(true)
.tcp_keepalive(Duration::from_secs(2))
.tcp_nodelay(false);
assert_eq!(
ChannelConfig {
timeout: Some(Duration::from_secs(3)),
connect_timeout: Some(Duration::from_secs(5)),
concurrency_limit: Some(6),
rate_limit: Some((5, Duration::from_secs(1))),
initial_stream_window_size: Some(10),
initial_connection_window_size: Some(20),
http2_keep_alive_interval: Some(Duration::from_secs(1)),
http2_keep_alive_timeout: Some(Duration::from_secs(3)),
http2_keep_alive_while_idle: Some(true),
http2_adaptive_window: Some(true),
tcp_keepalive: Some(Duration::from_secs(2)),
tcp_nodelay: false,
},
cfg
);
}
#[test]
fn test_build_endpoint() {
let pool = Pool {
channels: HashMap::default(),
};
let pool = Arc::new(Mutex::new(pool));
let config = ChannelConfig::new()
.timeout(Duration::from_secs(3))
.connect_timeout(Duration::from_secs(5))
.concurrency_limit(6)
.rate_limit(5, Duration::from_secs(1))
.initial_stream_window_size(10)
.initial_connection_window_size(20)
.http2_keep_alive_interval(Duration::from_secs(1))
.http2_keep_alive_timeout(Duration::from_secs(3))
.http2_keep_alive_while_idle(true)
.http2_adaptive_window(true)
.tcp_keepalive(Duration::from_secs(2))
.tcp_nodelay(true);
let mgr = ChannelManager { pool, config };
let res = mgr.build_endpoint("test_addr");
assert!(res.is_ok());
}
#[tokio::test]
async fn test_channel_with_connector() {
let pool = Pool {
channels: HashMap::default(),
};
let pool = Arc::new(Mutex::new(pool));
let config = ChannelConfig::new();
let mgr = ChannelManager { pool, config };
let addr = "test_addr";
let res = mgr.get(addr);
assert!(res.is_ok());
mgr.retain_channel(|addr, channel| {
assert_eq!("test_addr", addr);
assert!(channel.use_default_connector());
true
});
let (client, _) = tokio::io::duplex(1024);
let mut client = Some(client);
let res = mgr.reset_with_connector(
addr,
service_fn(move |_| {
let client = client.take().unwrap();
async move { Ok::<_, std::io::Error>(client) }
}),
);
assert!(res.is_ok());
mgr.retain_channel(|addr, channel| {
assert_eq!("test_addr", addr);
assert!(!channel.use_default_connector());
true
});
}
}

View File

@@ -49,6 +49,12 @@ pub enum Error {
actual: String,
backtrace: Backtrace,
},
#[snafu(display("Failed to create gRPC channel, source: {}", source))]
CreateChannel {
source: tonic::transport::Error,
backtrace: Backtrace,
},
}
impl ErrorExt for Error {
@@ -61,9 +67,9 @@ impl ErrorExt for Error {
Error::UnsupportedDfPlan { .. } | Error::UnsupportedDfExpr { .. } => {
StatusCode::Unsupported
}
Error::NewProjection { .. } | Error::DecodePhysicalPlanNode { .. } => {
StatusCode::Internal
}
Error::NewProjection { .. }
| Error::DecodePhysicalPlanNode { .. }
| Error::CreateChannel { .. } => StatusCode::Internal,
}
}
@@ -75,3 +81,129 @@ impl ErrorExt for Error {
self
}
}
#[cfg(test)]
mod tests {
use snafu::OptionExt;
use snafu::ResultExt;
use super::*;
type StdResult<E> = std::result::Result<(), E>;
fn throw_none_option() -> Option<String> {
None
}
#[test]
fn test_empty_physical_plan_error() {
let e = throw_none_option()
.context(EmptyPhysicalPlanSnafu { name: "test" })
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
}
#[test]
fn test_empty_physical_expr_error() {
let e = throw_none_option()
.context(EmptyPhysicalExprSnafu { name: "test" })
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
}
#[test]
fn test_unsupported_df_plan_error() {
let e = throw_none_option()
.context(UnsupportedDfPlanSnafu { name: "test" })
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::Unsupported);
}
#[test]
fn test_unsupported_df_expr_error() {
let e = throw_none_option()
.context(UnsupportedDfExprSnafu { name: "test" })
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::Unsupported);
}
#[test]
fn test_missing_field_error() {
let e = throw_none_option()
.context(MissingFieldSnafu { field: "test" })
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
}
#[test]
fn test_new_projection_error() {
fn throw_df_error() -> StdResult<DataFusionError> {
Err(DataFusionError::NotImplemented("".to_string()))
}
let e = throw_df_error().context(NewProjectionSnafu).err().unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::Internal);
}
#[test]
fn test_decode_physical_plan_node_error() {
fn throw_decode_error() -> StdResult<DecodeError> {
Err(DecodeError::new("test"))
}
let e = throw_decode_error()
.context(DecodePhysicalPlanNodeSnafu)
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::Internal);
}
#[test]
fn test_type_mismatch_error() {
let e = throw_none_option()
.context(TypeMismatchSnafu {
column_name: "",
expected: "",
actual: "",
})
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
}
#[test]
fn test_create_channel_error() {
fn throw_tonic_error() -> StdResult<tonic::transport::Error> {
tonic::transport::Endpoint::new("http//http").map(|_| ())
}
let e = throw_tonic_error()
.context(CreateChannelSnafu)
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::Internal);
}
}

View File

@@ -1,3 +1,4 @@
pub mod channel_manager;
pub mod error;
pub mod physical;
pub mod writer;

View File

@@ -184,7 +184,7 @@ impl ExecutionPlan for MockExecution {
_runtime: Arc<RuntimeEnv>,
) -> datafusion::error::Result<SendableRecordBatchStream> {
let id_array = Arc::new(PrimitiveArray::from_slice([1u32, 2, 3, 4, 5]));
let name_array = Arc::new(Utf8Array::<i64>::from_slice([
let name_array = Arc::new(Utf8Array::<i32>::from_slice([
"zhangsan", "lisi", "wangwu", "Tony", "Mike",
]));
let age_array = Arc::new(PrimitiveArray::from_slice([25u32, 28, 27, 35, 25]));

View File

@@ -35,7 +35,7 @@ impl LinesWriter {
SemanticType::Timestamp,
);
ensure!(
column.datatype == Some(ColumnDataType::Timestamp.into()),
column.datatype == ColumnDataType::Timestamp as i32,
TypeMismatchSnafu {
column_name,
expected: "timestamp",
@@ -52,7 +52,7 @@ impl LinesWriter {
pub fn write_tag(&mut self, column_name: &str, value: &str) -> Result<()> {
let (idx, column) = self.mut_column(column_name, ColumnDataType::String, SemanticType::Tag);
ensure!(
column.datatype == Some(ColumnDataType::String.into()),
column.datatype == ColumnDataType::String as i32,
TypeMismatchSnafu {
column_name,
expected: "string",
@@ -70,7 +70,7 @@ impl LinesWriter {
let (idx, column) =
self.mut_column(column_name, ColumnDataType::Uint64, SemanticType::Field);
ensure!(
column.datatype == Some(ColumnDataType::Uint64.into()),
column.datatype == ColumnDataType::Uint64 as i32,
TypeMismatchSnafu {
column_name,
expected: "u64",
@@ -88,7 +88,7 @@ impl LinesWriter {
let (idx, column) =
self.mut_column(column_name, ColumnDataType::Int64, SemanticType::Field);
ensure!(
column.datatype == Some(ColumnDataType::Int64.into()),
column.datatype == ColumnDataType::Int64 as i32,
TypeMismatchSnafu {
column_name,
expected: "i64",
@@ -106,7 +106,7 @@ impl LinesWriter {
let (idx, column) =
self.mut_column(column_name, ColumnDataType::Float64, SemanticType::Field);
ensure!(
column.datatype == Some(ColumnDataType::Float64.into()),
column.datatype == ColumnDataType::Float64 as i32,
TypeMismatchSnafu {
column_name,
expected: "f64",
@@ -124,7 +124,7 @@ impl LinesWriter {
let (idx, column) =
self.mut_column(column_name, ColumnDataType::String, SemanticType::Field);
ensure!(
column.datatype == Some(ColumnDataType::String.into()),
column.datatype == ColumnDataType::String as i32,
TypeMismatchSnafu {
column_name,
expected: "string",
@@ -142,7 +142,7 @@ impl LinesWriter {
let (idx, column) =
self.mut_column(column_name, ColumnDataType::Boolean, SemanticType::Field);
ensure!(
column.datatype == Some(ColumnDataType::Boolean.into()),
column.datatype == ColumnDataType::Boolean as i32,
TypeMismatchSnafu {
column_name,
expected: "boolean",
@@ -197,7 +197,7 @@ impl LinesWriter {
column_name: column_name.to_string(),
semantic_type: semantic_type.into(),
values: Some(Values::with_capacity(datatype, to_insert)),
datatype: Some(datatype.into()),
datatype: datatype as i32,
null_mask: Vec::default(),
});
column_names.insert(column_name.to_string(), new_idx);
@@ -275,7 +275,7 @@ mod tests {
let column = &columns[0];
assert_eq!("host", columns[0].column_name);
assert_eq!(Some(ColumnDataType::String as i32), column.datatype);
assert_eq!(ColumnDataType::String as i32, column.datatype);
assert_eq!(SemanticType::Tag as i32, column.semantic_type);
assert_eq!(
vec!["host1", "host2", "host3"],
@@ -285,28 +285,28 @@ mod tests {
let column = &columns[1];
assert_eq!("cpu", column.column_name);
assert_eq!(Some(ColumnDataType::Float64 as i32), column.datatype);
assert_eq!(ColumnDataType::Float64 as i32, column.datatype);
assert_eq!(SemanticType::Field as i32, column.semantic_type);
assert_eq!(vec![0.5, 0.4], column.values.as_ref().unwrap().f64_values);
verify_null_mask(&column.null_mask, vec![false, true, false]);
let column = &columns[2];
assert_eq!("memory", column.column_name);
assert_eq!(Some(ColumnDataType::Float64 as i32), column.datatype);
assert_eq!(ColumnDataType::Float64 as i32, column.datatype);
assert_eq!(SemanticType::Field as i32, column.semantic_type);
assert_eq!(vec![0.4], column.values.as_ref().unwrap().f64_values);
verify_null_mask(&column.null_mask, vec![false, true, true]);
let column = &columns[3];
assert_eq!("name", column.column_name);
assert_eq!(Some(ColumnDataType::String as i32), column.datatype);
assert_eq!(ColumnDataType::String as i32, column.datatype);
assert_eq!(SemanticType::Field as i32, column.semantic_type);
assert_eq!(vec!["name1"], column.values.as_ref().unwrap().string_values);
verify_null_mask(&column.null_mask, vec![false, true, true]);
let column = &columns[4];
assert_eq!("ts", column.column_name);
assert_eq!(Some(ColumnDataType::Timestamp as i32), column.datatype);
assert_eq!(ColumnDataType::Timestamp as i32, column.datatype);
assert_eq!(SemanticType::Timestamp as i32, column.semantic_type);
assert_eq!(
vec![101011000, 102011001, 103011002],
@@ -316,28 +316,28 @@ mod tests {
let column = &columns[5];
assert_eq!("enable_reboot", column.column_name);
assert_eq!(Some(ColumnDataType::Boolean as i32), column.datatype);
assert_eq!(ColumnDataType::Boolean as i32, column.datatype);
assert_eq!(SemanticType::Field as i32, column.semantic_type);
assert_eq!(vec![true], column.values.as_ref().unwrap().bool_values);
verify_null_mask(&column.null_mask, vec![true, false, true]);
let column = &columns[6];
assert_eq!("year_of_service", column.column_name);
assert_eq!(Some(ColumnDataType::Uint64 as i32), column.datatype);
assert_eq!(ColumnDataType::Uint64 as i32, column.datatype);
assert_eq!(SemanticType::Field as i32, column.semantic_type);
assert_eq!(vec![2], column.values.as_ref().unwrap().u64_values);
verify_null_mask(&column.null_mask, vec![true, false, true]);
let column = &columns[7];
assert_eq!("temperature", column.column_name);
assert_eq!(Some(ColumnDataType::Int64 as i32), column.datatype);
assert_eq!(ColumnDataType::Int64 as i32, column.datatype);
assert_eq!(SemanticType::Field as i32, column.semantic_type);
assert_eq!(vec![4], column.values.as_ref().unwrap().i64_values);
verify_null_mask(&column.null_mask, vec![true, false, true]);
let column = &columns[8];
assert_eq!("cpu_core_num", column.column_name);
assert_eq!(Some(ColumnDataType::Uint64 as i32), column.datatype);
assert_eq!(ColumnDataType::Uint64 as i32, column.datatype);
assert_eq!(SemanticType::Field as i32, column.semantic_type);
assert_eq!(vec![16], column.values.as_ref().unwrap().u64_values);
verify_null_mask(&column.null_mask, vec![true, true, false]);

View File

@@ -4,6 +4,7 @@ version = "0.1.0"
edition = "2021"
[dependencies]
async-trait = "0.1"
common-error = { path = "../error" }
common-recordbatch = { path = "../recordbatch" }
common-time = { path = "../time" }

View File

@@ -62,6 +62,36 @@ pub enum InnerError {
#[snafu(display("unexpected: not constant column"))]
InvalidInputCol { backtrace: Backtrace },
#[snafu(display("Not expected to run ExecutionPlan more than once"))]
ExecuteRepeatedly { backtrace: Backtrace },
#[snafu(display("General DataFusion error, source: {}", source))]
GeneralDataFusion {
source: DataFusionError,
backtrace: Backtrace,
},
#[snafu(display("Failed to execute DataFusion ExecutionPlan, source: {}", source))]
DataFusionExecutionPlan {
source: DataFusionError,
backtrace: Backtrace,
},
#[snafu(display(
"Failed to convert DataFusion's recordbatch stream, source: {}",
source
))]
ConvertDfRecordBatchStream {
#[snafu(backtrace)]
source: common_recordbatch::error::Error,
},
#[snafu(display("Failed to convert arrow schema, source: {}", source))]
ConvertArrowSchema {
#[snafu(backtrace)]
source: DataTypeError,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -76,9 +106,17 @@ impl ErrorExt for InnerError {
| InnerError::InvalidInputState { .. }
| InnerError::InvalidInputCol { .. }
| InnerError::BadAccumulatorImpl { .. } => StatusCode::EngineExecuteQuery,
InnerError::InvalidInputs { source, .. } => source.status_code(),
InnerError::IntoVector { source, .. } => source.status_code(),
InnerError::FromScalarValue { source } => source.status_code(),
InnerError::InvalidInputs { source, .. }
| InnerError::IntoVector { source, .. }
| InnerError::FromScalarValue { source }
| InnerError::ConvertArrowSchema { source } => source.status_code(),
InnerError::ExecuteRepeatedly { .. }
| InnerError::GeneralDataFusion { .. }
| InnerError::DataFusionExecutionPlan { .. } => StatusCode::Unexpected,
InnerError::ConvertDfRecordBatchStream { source, .. } => source.status_code(),
}
}
@@ -105,6 +143,7 @@ impl From<Error> for DataFusionError {
#[cfg(test)]
mod tests {
use arrow::error::ArrowError;
use snafu::GenerateImplicitData;
use super::*;
@@ -127,6 +166,48 @@ mod tests {
.unwrap()
.into();
assert_error(&err, StatusCode::EngineExecuteQuery);
let err: Error = throw_df_error()
.context(GeneralDataFusionSnafu)
.err()
.unwrap()
.into();
assert_error(&err, StatusCode::Unexpected);
let err: Error = throw_df_error()
.context(DataFusionExecutionPlanSnafu)
.err()
.unwrap()
.into();
assert_error(&err, StatusCode::Unexpected);
}
#[test]
fn test_execute_repeatedly_error() {
let error: Error = None::<i32>
.context(ExecuteRepeatedlySnafu)
.err()
.unwrap()
.into();
assert_eq!(error.inner.status_code(), StatusCode::Unexpected);
assert!(error.backtrace_opt().is_some());
}
#[test]
fn test_convert_df_recordbatch_stream_error() {
let result: std::result::Result<i32, common_recordbatch::error::Error> =
Err(common_recordbatch::error::InnerError::PollStream {
source: ArrowError::Overflow,
backtrace: Backtrace::generate(),
}
.into());
let error: Error = result
.context(ConvertDfRecordBatchStreamSnafu)
.err()
.unwrap()
.into();
assert_eq!(error.inner.status_code(), StatusCode::Internal);
assert!(error.backtrace_opt().is_some());
}
fn raise_datatype_error() -> std::result::Result<(), DataTypeError> {

View File

@@ -4,6 +4,7 @@ pub mod columnar_value;
pub mod error;
mod function;
pub mod logical_plan;
pub mod physical_plan;
pub mod prelude;
mod signature;
@@ -13,3 +14,5 @@ pub enum Output {
RecordBatches(RecordBatches),
Stream(SendableRecordBatchStream),
}
pub use datafusion::physical_plan::ExecutionPlan as DfPhysicalPlan;

View File

@@ -2,7 +2,7 @@ use datafusion::logical_plan::Expr as DfExpr;
/// Central struct of query API.
/// Represent logical expressions such as `A + 1`, or `CAST(c1 AS int)`.
#[derive(Clone, PartialEq, Hash)]
#[derive(Clone, PartialEq, Hash, Debug)]
pub struct Expr {
df_expr: DfExpr,
}

View File

@@ -0,0 +1,325 @@
use std::any::Any;
use std::fmt::Debug;
use std::sync::Arc;
use async_trait::async_trait;
use common_recordbatch::adapter::{DfRecordBatchStreamAdapter, RecordBatchStreamAdapter};
use common_recordbatch::DfSendableRecordBatchStream;
use common_recordbatch::SendableRecordBatchStream;
use datafusion::arrow::datatypes::SchemaRef as DfSchemaRef;
use datafusion::error::Result as DfResult;
pub use datafusion::execution::runtime_env::RuntimeEnv;
use datafusion::physical_plan::expressions::PhysicalSortExpr;
pub use datafusion::physical_plan::Partitioning;
use datafusion::physical_plan::Statistics;
use datatypes::schema::SchemaRef;
use snafu::ResultExt;
use crate::error::{self, Result};
use crate::DfPhysicalPlan;
pub type PhysicalPlanRef = Arc<dyn PhysicalPlan>;
/// `PhysicalPlan` represent nodes in the Physical Plan.
///
/// Each `PhysicalPlan` is Partition-aware and is responsible for
/// creating the actual `async` [`SendableRecordBatchStream`]s
/// of [`RecordBatch`] that incrementally compute the operator's
/// output from its input partition.
#[async_trait]
pub trait PhysicalPlan: Debug + Send + Sync {
/// Returns the physical plan as [`Any`](std::any::Any) so that it can be
/// downcast to a specific implementation.
fn as_any(&self) -> &dyn Any;
/// Get the schema for this physical plan
fn schema(&self) -> SchemaRef;
/// Specifies the output partitioning scheme of this plan
fn output_partitioning(&self) -> Partitioning;
/// Get a list of child physical plans that provide the input for this plan. The returned list
/// will be empty for leaf nodes, will contain a single value for unary nodes, or two
/// values for binary nodes (such as joins).
fn children(&self) -> Vec<PhysicalPlanRef>;
/// Returns a new plan where all children were replaced by new plans.
/// The size of `children` must be equal to the size of `PhysicalPlan::children()`.
fn with_new_children(&self, children: Vec<PhysicalPlanRef>) -> Result<PhysicalPlanRef>;
/// Creates an RecordBatch stream.
async fn execute(
&self,
partition: usize,
runtime: Arc<RuntimeEnv>,
) -> Result<SendableRecordBatchStream>;
}
#[derive(Debug)]
pub struct PhysicalPlanAdapter {
schema: SchemaRef,
df_plan: Arc<dyn DfPhysicalPlan>,
}
impl PhysicalPlanAdapter {
pub fn new(schema: SchemaRef, df_plan: Arc<dyn DfPhysicalPlan>) -> Self {
Self { schema, df_plan }
}
pub fn df_plan(&self) -> Arc<dyn DfPhysicalPlan> {
self.df_plan.clone()
}
}
#[async_trait]
impl PhysicalPlan for PhysicalPlanAdapter {
fn as_any(&self) -> &dyn Any {
self
}
fn schema(&self) -> SchemaRef {
self.schema.clone()
}
fn output_partitioning(&self) -> Partitioning {
self.df_plan.output_partitioning()
}
fn children(&self) -> Vec<PhysicalPlanRef> {
self.df_plan
.children()
.into_iter()
.map(|x| Arc::new(PhysicalPlanAdapter::new(self.schema(), x)) as _)
.collect()
}
fn with_new_children(&self, children: Vec<PhysicalPlanRef>) -> Result<PhysicalPlanRef> {
let children = children
.into_iter()
.map(|x| Arc::new(DfPhysicalPlanAdapter(x)) as _)
.collect();
let plan = self
.df_plan
.with_new_children(children)
.context(error::GeneralDataFusionSnafu)?;
Ok(Arc::new(PhysicalPlanAdapter::new(self.schema(), plan)))
}
async fn execute(
&self,
partition: usize,
runtime: Arc<RuntimeEnv>,
) -> Result<SendableRecordBatchStream> {
let stream = self
.df_plan
.execute(partition, runtime)
.await
.context(error::DataFusionExecutionPlanSnafu)?;
let stream = RecordBatchStreamAdapter::try_new(stream)
.context(error::ConvertDfRecordBatchStreamSnafu)?;
Ok(Box::pin(stream))
}
}
#[derive(Debug)]
pub struct DfPhysicalPlanAdapter(pub PhysicalPlanRef);
#[async_trait]
impl DfPhysicalPlan for DfPhysicalPlanAdapter {
fn as_any(&self) -> &dyn Any {
self
}
fn schema(&self) -> DfSchemaRef {
self.0.schema().arrow_schema().clone()
}
fn output_partitioning(&self) -> Partitioning {
self.0.output_partitioning()
}
fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
None
}
fn children(&self) -> Vec<Arc<dyn DfPhysicalPlan>> {
self.0
.children()
.into_iter()
.map(|x| Arc::new(DfPhysicalPlanAdapter(x)) as _)
.collect()
}
fn with_new_children(
&self,
children: Vec<Arc<dyn DfPhysicalPlan>>,
) -> DfResult<Arc<dyn DfPhysicalPlan>> {
let df_schema = self.schema();
let schema: SchemaRef = Arc::new(
df_schema
.try_into()
.context(error::ConvertArrowSchemaSnafu)
.map_err(error::Error::from)?,
);
let children = children
.into_iter()
.map(|x| Arc::new(PhysicalPlanAdapter::new(schema.clone(), x)) as _)
.collect();
let plan = self.0.with_new_children(children)?;
Ok(Arc::new(DfPhysicalPlanAdapter(plan)))
}
async fn execute(
&self,
partition: usize,
runtime: Arc<RuntimeEnv>,
) -> DfResult<DfSendableRecordBatchStream> {
let stream = self.0.execute(partition, runtime).await?;
Ok(Box::pin(DfRecordBatchStreamAdapter::new(stream)))
}
fn statistics(&self) -> Statistics {
// TODO(LFC): impl statistics
Statistics::default()
}
}
#[cfg(test)]
mod test {
use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
use common_recordbatch::{RecordBatch, RecordBatches};
use datafusion::arrow_print;
use datafusion::datasource::TableProvider as DfTableProvider;
use datafusion::logical_plan::LogicalPlanBuilder;
use datafusion::physical_plan::collect;
use datafusion::physical_plan::empty::EmptyExec;
use datafusion::prelude::ExecutionContext;
use datafusion_common::field_util::SchemaExt;
use datafusion_expr::Expr;
use datatypes::schema::Schema;
use datatypes::vectors::Int32Vector;
use super::*;
struct MyDfTableProvider;
#[async_trait]
impl DfTableProvider for MyDfTableProvider {
fn as_any(&self) -> &dyn Any {
self
}
fn schema(&self) -> DfSchemaRef {
Arc::new(ArrowSchema::new(vec![Field::new(
"a",
DataType::Int32,
false,
)]))
}
async fn scan(
&self,
_projection: &Option<Vec<usize>>,
_filters: &[Expr],
_limit: Option<usize>,
) -> DfResult<Arc<dyn DfPhysicalPlan>> {
let schema = Schema::try_from(self.schema()).unwrap();
let my_plan = Arc::new(MyExecutionPlan {
schema: Arc::new(schema),
});
let df_plan = DfPhysicalPlanAdapter(my_plan);
Ok(Arc::new(df_plan))
}
}
#[derive(Debug)]
struct MyExecutionPlan {
schema: SchemaRef,
}
#[async_trait]
impl PhysicalPlan for MyExecutionPlan {
fn as_any(&self) -> &dyn Any {
self
}
fn schema(&self) -> SchemaRef {
self.schema.clone()
}
fn output_partitioning(&self) -> Partitioning {
Partitioning::UnknownPartitioning(1)
}
fn children(&self) -> Vec<PhysicalPlanRef> {
vec![]
}
fn with_new_children(&self, _children: Vec<PhysicalPlanRef>) -> Result<PhysicalPlanRef> {
unimplemented!()
}
async fn execute(
&self,
_partition: usize,
_runtime: Arc<RuntimeEnv>,
) -> Result<SendableRecordBatchStream> {
let schema = self.schema();
let recordbatches = RecordBatches::try_new(
schema.clone(),
vec![
RecordBatch::new(
schema.clone(),
vec![Arc::new(Int32Vector::from_slice(vec![1])) as _],
)
.unwrap(),
RecordBatch::new(
schema,
vec![Arc::new(Int32Vector::from_slice(vec![2, 3])) as _],
)
.unwrap(),
],
)
.unwrap();
Ok(recordbatches.as_stream())
}
}
// Test our physical plan can be executed by DataFusion, through adapters.
#[tokio::test]
async fn test_execute_physical_plan() {
let ctx = ExecutionContext::new();
let logical_plan = LogicalPlanBuilder::scan("test", Arc::new(MyDfTableProvider), None)
.unwrap()
.build()
.unwrap();
let physical_plan = ctx.create_physical_plan(&logical_plan).await.unwrap();
let df_recordbatches = collect(physical_plan, Arc::new(RuntimeEnv::default()))
.await
.unwrap();
let pretty_print = arrow_print::write(&df_recordbatches);
let pretty_print = pretty_print.lines().collect::<Vec<&str>>();
assert_eq!(
pretty_print,
vec!["+---+", "| a |", "+---+", "| 1 |", "| 2 |", "| 3 |", "+---+",]
);
}
#[test]
fn test_physical_plan_adapter() {
let df_schema = Arc::new(ArrowSchema::new(vec![Field::new(
"name",
DataType::Utf8,
true,
)]));
let plan = PhysicalPlanAdapter::new(
Arc::new(Schema::try_from(df_schema.clone()).unwrap()),
Arc::new(EmptyExec::new(true, df_schema.clone())),
);
assert!(plan.df_plan.as_any().downcast_ref::<EmptyExec>().is_some());
let df_plan = DfPhysicalPlanAdapter(Arc::new(plan));
assert_eq!(df_schema, df_plan.schema());
}
}

View File

@@ -0,0 +1,92 @@
use std::pin::Pin;
use std::sync::Arc;
use std::task::{Context, Poll};
use datafusion::arrow::datatypes::SchemaRef as DfSchemaRef;
use datafusion::physical_plan::RecordBatchStream as DfRecordBatchStream;
use datafusion_common::record_batch::RecordBatch as DfRecordBatch;
use datatypes::arrow::error::ArrowError;
use datatypes::arrow::error::Result as ArrowResult;
use datatypes::schema::{Schema, SchemaRef};
use snafu::ResultExt;
use crate::error::{self, Result};
use crate::DfSendableRecordBatchStream;
use crate::{RecordBatch, RecordBatchStream, SendableRecordBatchStream, Stream};
/// Greptime SendableRecordBatchStream -> DataFusion RecordBatchStream
pub struct DfRecordBatchStreamAdapter {
stream: SendableRecordBatchStream,
}
impl DfRecordBatchStreamAdapter {
pub fn new(stream: SendableRecordBatchStream) -> Self {
Self { stream }
}
}
impl DfRecordBatchStream for DfRecordBatchStreamAdapter {
fn schema(&self) -> DfSchemaRef {
self.stream.schema().arrow_schema().clone()
}
}
impl Stream for DfRecordBatchStreamAdapter {
type Item = ArrowResult<DfRecordBatch>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
match Pin::new(&mut self.stream).poll_next(cx) {
Poll::Pending => Poll::Pending,
Poll::Ready(Some(recordbatch)) => match recordbatch {
Ok(recordbatch) => Poll::Ready(Some(Ok(recordbatch.df_recordbatch))),
Err(e) => Poll::Ready(Some(Err(ArrowError::External("".to_owned(), Box::new(e))))),
},
Poll::Ready(None) => Poll::Ready(None),
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.stream.size_hint()
}
}
/// DataFusion SendableRecordBatchStream -> Greptime RecordBatchStream
pub struct RecordBatchStreamAdapter {
schema: SchemaRef,
stream: DfSendableRecordBatchStream,
}
impl RecordBatchStreamAdapter {
pub fn try_new(stream: DfSendableRecordBatchStream) -> Result<Self> {
let schema =
Arc::new(Schema::try_from(stream.schema()).context(error::SchemaConversionSnafu)?);
Ok(Self { schema, stream })
}
}
impl RecordBatchStream for RecordBatchStreamAdapter {
fn schema(&self) -> SchemaRef {
self.schema.clone()
}
}
impl Stream for RecordBatchStreamAdapter {
type Item = Result<RecordBatch>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
match Pin::new(&mut self.stream).poll_next(cx) {
Poll::Pending => Poll::Pending,
Poll::Ready(Some(df_recordbatch)) => Poll::Ready(Some(Ok(RecordBatch {
schema: self.schema(),
df_recordbatch: df_recordbatch.context(error::PollStreamSnafu)?,
}))),
Poll::Ready(None) => Poll::Ready(None),
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.stream.size_hint()
}
}

View File

@@ -33,16 +33,32 @@ pub enum InnerError {
reason: String,
backtrace: Backtrace,
},
#[snafu(display("Failed to convert Arrow schema, source: {}", source))]
SchemaConversion {
source: datatypes::error::Error,
backtrace: Backtrace,
},
#[snafu(display("Failed to poll stream, source: {}", source))]
PollStream {
source: datatypes::arrow::error::ArrowError,
backtrace: Backtrace,
},
}
impl ErrorExt for InnerError {
fn status_code(&self) -> StatusCode {
match self {
InnerError::NewDfRecordBatch { .. } => StatusCode::InvalidArguments,
InnerError::DataTypes { .. } | InnerError::CreateRecordBatches { .. } => {
StatusCode::Internal
}
InnerError::DataTypes { .. }
| InnerError::CreateRecordBatches { .. }
| InnerError::PollStream { .. } => StatusCode::Internal,
InnerError::External { source } => source.status_code(),
InnerError::SchemaConversion { source, .. } => source.status_code(),
}
}

View File

@@ -1,9 +1,11 @@
pub mod adapter;
pub mod error;
mod recordbatch;
pub mod util;
use std::pin::Pin;
pub use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
use datatypes::schema::SchemaRef;
use error::Result;
use futures::task::{Context, Poll};
@@ -74,6 +76,41 @@ impl RecordBatches {
pub fn take(self) -> Vec<RecordBatch> {
self.batches
}
pub fn as_stream(&self) -> SendableRecordBatchStream {
Box::pin(SimpleRecordBatchStream {
inner: RecordBatches {
schema: self.schema(),
batches: self.batches.clone(),
},
index: 0,
})
}
}
pub struct SimpleRecordBatchStream {
inner: RecordBatches,
index: usize,
}
impl RecordBatchStream for SimpleRecordBatchStream {
fn schema(&self) -> SchemaRef {
self.inner.schema()
}
}
impl Stream for SimpleRecordBatchStream {
type Item = Result<RecordBatch>;
fn poll_next(mut self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
Poll::Ready(if self.index < self.inner.batches.len() {
let batch = self.inner.batches[self.index].clone();
self.index += 1;
Some(Ok(batch))
} else {
None
})
}
}
#[cfg(test)]
@@ -116,4 +153,27 @@ mod tests {
assert_eq!(schema1, batches.schema());
assert_eq!(vec![batch1], batches.take());
}
#[tokio::test]
async fn test_simple_recordbatch_stream() {
let column_a = ColumnSchema::new("a", ConcreteDataType::int32_datatype(), false);
let column_b = ColumnSchema::new("b", ConcreteDataType::string_datatype(), false);
let schema = Arc::new(Schema::new(vec![column_a, column_b]));
let va1: VectorRef = Arc::new(Int32Vector::from_slice(&[1, 2]));
let vb1: VectorRef = Arc::new(StringVector::from(vec!["a", "b"]));
let batch1 = RecordBatch::new(schema.clone(), vec![va1, vb1]).unwrap();
let va2: VectorRef = Arc::new(Int32Vector::from_slice(&[3, 4, 5]));
let vb2: VectorRef = Arc::new(StringVector::from(vec!["c", "d", "e"]));
let batch2 = RecordBatch::new(schema.clone(), vec![va2, vb2]).unwrap();
let recordbatches =
RecordBatches::try_new(schema.clone(), vec![batch1.clone(), batch2.clone()]).unwrap();
let stream = recordbatches.as_stream();
let collected = util::collect(stream).await.unwrap();
assert_eq!(collected.len(), 2);
assert_eq!(collected[0], batch1);
assert_eq!(collected[1], batch2);
}
}

View File

@@ -31,6 +31,10 @@ impl RecordBatch {
})
}
pub fn num_rows(&self) -> usize {
self.df_recordbatch.num_rows()
}
/// Create an iterator to traverse the data by row
pub fn rows(&self) -> RecordBatchRowIterator<'_> {
RecordBatchRowIterator::new(self)

View File

@@ -0,0 +1,27 @@
[package]
name = "substrait"
version = "0.1.0"
edition = "2021"
[dependencies]
bytes = "1.1"
catalog = { path = "../../catalog" }
common-catalog = { path = "../catalog" }
common-error = { path = "../error" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
"simd",
] }
datatypes = { path = "../../datatypes" }
futures = "0.3"
prost = "0.9"
snafu = { version = "0.7", features = ["backtraces"] }
table = { path = "../../table" }
[dependencies.substrait_proto]
package = "substrait"
version = "0.2"
[dev-dependencies]
datatypes = { path = "../../datatypes" }
table = { path = "../../table" }
tokio = { version = "1.0", features = ["full"] }

View File

@@ -0,0 +1,432 @@
use std::sync::Arc;
use bytes::{Buf, Bytes, BytesMut};
use catalog::CatalogManagerRef;
use common_error::prelude::BoxedError;
use datafusion::datasource::TableProvider;
use datafusion::logical_plan::{LogicalPlan, TableScan, ToDFSchema};
use datafusion::physical_plan::project_schema;
use prost::Message;
use snafu::ensure;
use snafu::{OptionExt, ResultExt};
use substrait_proto::protobuf::expression::mask_expression::{StructItem, StructSelect};
use substrait_proto::protobuf::expression::MaskExpression;
use substrait_proto::protobuf::plan_rel::RelType as PlanRelType;
use substrait_proto::protobuf::read_rel::{NamedTable, ReadType};
use substrait_proto::protobuf::rel::RelType;
use substrait_proto::protobuf::PlanRel;
use substrait_proto::protobuf::ReadRel;
use substrait_proto::protobuf::Rel;
use table::table::adapter::DfTableProviderAdapter;
use crate::error::Error;
use crate::error::{
DFInternalSnafu, DecodeRelSnafu, EmptyPlanSnafu, EncodeRelSnafu, InternalSnafu,
InvalidParametersSnafu, MissingFieldSnafu, SchemaNotMatchSnafu, TableNotFoundSnafu,
UnknownPlanSnafu, UnsupportedExprSnafu, UnsupportedPlanSnafu,
};
use crate::schema::{from_schema, to_schema};
use crate::SubstraitPlan;
pub struct DFLogicalSubstraitConvertor {
catalog_manager: CatalogManagerRef,
}
impl SubstraitPlan for DFLogicalSubstraitConvertor {
type Error = Error;
type Plan = LogicalPlan;
fn decode<B: Buf + Send>(&self, message: B) -> Result<Self::Plan, Self::Error> {
let plan_rel = PlanRel::decode(message).context(DecodeRelSnafu)?;
let rel = match plan_rel.rel_type.context(EmptyPlanSnafu)? {
PlanRelType::Rel(rel) => rel,
PlanRelType::Root(_) => UnsupportedPlanSnafu {
name: "Root Relation",
}
.fail()?,
};
self.convert_rel(rel)
}
fn encode(&self, plan: Self::Plan) -> Result<Bytes, Self::Error> {
let rel = self.convert_plan(plan)?;
let plan_rel = PlanRel {
rel_type: Some(PlanRelType::Rel(rel)),
};
let mut buf = BytesMut::new();
plan_rel.encode(&mut buf).context(EncodeRelSnafu)?;
Ok(buf.freeze())
}
}
impl DFLogicalSubstraitConvertor {
pub fn new(catalog_manager: CatalogManagerRef) -> Self {
Self { catalog_manager }
}
}
impl DFLogicalSubstraitConvertor {
pub fn convert_rel(&self, rel: Rel) -> Result<LogicalPlan, Error> {
let rel_type = rel.rel_type.context(EmptyPlanSnafu)?;
let logical_plan = match rel_type {
RelType::Read(read_rel) => self.convert_read_rel(read_rel),
RelType::Filter(_filter_rel) => UnsupportedPlanSnafu {
name: "Filter Relation",
}
.fail()?,
RelType::Fetch(_fetch_rel) => UnsupportedPlanSnafu {
name: "Fetch Relation",
}
.fail()?,
RelType::Aggregate(_aggr_rel) => UnsupportedPlanSnafu {
name: "Fetch Relation",
}
.fail()?,
RelType::Sort(_sort_rel) => UnsupportedPlanSnafu {
name: "Sort Relation",
}
.fail()?,
RelType::Join(_join_rel) => UnsupportedPlanSnafu {
name: "Join Relation",
}
.fail()?,
RelType::Project(_project_rel) => UnsupportedPlanSnafu {
name: "Project Relation",
}
.fail()?,
RelType::Set(_set_rel) => UnsupportedPlanSnafu {
name: "Set Relation",
}
.fail()?,
RelType::ExtensionSingle(_ext_single_rel) => UnsupportedPlanSnafu {
name: "Extension Single Relation",
}
.fail()?,
RelType::ExtensionMulti(_ext_multi_rel) => UnsupportedPlanSnafu {
name: "Extension Multi Relation",
}
.fail()?,
RelType::ExtensionLeaf(_ext_leaf_rel) => UnsupportedPlanSnafu {
name: "Extension Leaf Relation",
}
.fail()?,
RelType::Cross(_cross_rel) => UnsupportedPlanSnafu {
name: "Cross Relation",
}
.fail()?,
}?;
Ok(logical_plan)
}
fn convert_read_rel(&self, read_rel: Box<ReadRel>) -> Result<LogicalPlan, Error> {
// Extract the catalog, schema and table name from NamedTable. Assume the first three are those names.
let read_type = read_rel.read_type.context(MissingFieldSnafu {
field: "read_type",
plan: "Read",
})?;
let (table_name, schema_name, catalog_name) = match read_type {
ReadType::NamedTable(mut named_table) => {
ensure!(
named_table.names.len() == 3,
InvalidParametersSnafu {
reason:
"NamedTable should contains three names for catalog, schema and table",
}
);
(
named_table.names.pop().unwrap(),
named_table.names.pop().unwrap(),
named_table.names.pop().unwrap(),
)
}
ReadType::VirtualTable(_) | ReadType::LocalFiles(_) | ReadType::ExtensionTable(_) => {
UnsupportedExprSnafu {
name: "Non-NamedTable Read",
}
.fail()?
}
};
// Get projection indices
let projection = read_rel
.projection
.map(|mask_expr| self.convert_mask_expression(mask_expr));
// Get table handle from catalog manager
let table_ref = self
.catalog_manager
.table(&catalog_name, &schema_name, &table_name)
.map_err(BoxedError::new)
.context(InternalSnafu)?
.context(TableNotFoundSnafu {
name: format!("{}.{}.{}", catalog_name, schema_name, table_name),
})?;
let adapter = Arc::new(DfTableProviderAdapter::new(table_ref));
// Get schema directly from the table, and compare it with the schema retrived from substrait proto.
let stored_schema = adapter.schema();
let retrived_schema = to_schema(read_rel.base_schema.unwrap_or_default())?;
let retrived_arrow_schema = retrived_schema.arrow_schema();
ensure!(
stored_schema.fields == retrived_arrow_schema.fields,
SchemaNotMatchSnafu {
substrait_schema: retrived_arrow_schema.clone(),
storage_schema: stored_schema
}
);
// Calculate the projected schema
let projected_schema = project_schema(&stored_schema, projection.as_ref())
.context(DFInternalSnafu)?
.to_dfschema_ref()
.context(DFInternalSnafu)?;
// TODO(ruihang): Support filters and limit
Ok(LogicalPlan::TableScan(TableScan {
table_name,
source: adapter,
projection,
projected_schema,
filters: vec![],
limit: None,
}))
}
fn convert_mask_expression(&self, mask_expression: MaskExpression) -> Vec<usize> {
mask_expression
.select
.unwrap_or_default()
.struct_items
.into_iter()
.map(|select| select.field as _)
.collect()
}
}
impl DFLogicalSubstraitConvertor {
pub fn convert_plan(&self, plan: LogicalPlan) -> Result<Rel, Error> {
match plan {
LogicalPlan::Projection(_) => UnsupportedPlanSnafu {
name: "DataFusion Logical Projection",
}
.fail()?,
LogicalPlan::Filter(_) => UnsupportedPlanSnafu {
name: "DataFusion Logical Filter",
}
.fail()?,
LogicalPlan::Window(_) => UnsupportedPlanSnafu {
name: "DataFusion Logical Window",
}
.fail()?,
LogicalPlan::Aggregate(_) => UnsupportedPlanSnafu {
name: "DataFusion Logical Aggregate",
}
.fail()?,
LogicalPlan::Sort(_) => UnsupportedPlanSnafu {
name: "DataFusion Logical Sort",
}
.fail()?,
LogicalPlan::Join(_) => UnsupportedPlanSnafu {
name: "DataFusion Logical Join",
}
.fail()?,
LogicalPlan::CrossJoin(_) => UnsupportedPlanSnafu {
name: "DataFusion Logical CrossJoin",
}
.fail()?,
LogicalPlan::Repartition(_) => UnsupportedPlanSnafu {
name: "DataFusion Logical Repartition",
}
.fail()?,
LogicalPlan::Union(_) => UnsupportedPlanSnafu {
name: "DataFusion Logical Union",
}
.fail()?,
LogicalPlan::TableScan(table_scan) => {
let read_rel = self.convert_table_scan_plan(table_scan)?;
Ok(Rel {
rel_type: Some(RelType::Read(Box::new(read_rel))),
})
}
LogicalPlan::EmptyRelation(_) => UnsupportedPlanSnafu {
name: "DataFusion Logical EmptyRelation",
}
.fail()?,
LogicalPlan::Limit(_) => UnsupportedPlanSnafu {
name: "DataFusion Logical Limit",
}
.fail()?,
LogicalPlan::CreateExternalTable(_)
| LogicalPlan::CreateMemoryTable(_)
| LogicalPlan::DropTable(_)
| LogicalPlan::Values(_)
| LogicalPlan::Explain(_)
| LogicalPlan::Analyze(_)
| LogicalPlan::Extension(_) => InvalidParametersSnafu {
reason: format!(
"Trying to convert DDL/DML plan to substrait proto, plan: {:?}",
plan
),
}
.fail()?,
}
}
pub fn convert_table_scan_plan(&self, table_scan: TableScan) -> Result<ReadRel, Error> {
let provider = table_scan
.source
.as_any()
.downcast_ref::<DfTableProviderAdapter>()
.context(UnknownPlanSnafu)?;
let table_info = provider.table().table_info();
// assemble NamedTable and ReadType
let catalog_name = table_info.catalog_name.clone();
let schema_name = table_info.schema_name.clone();
let table_name = table_info.name.clone();
let named_table = NamedTable {
names: vec![catalog_name, schema_name, table_name],
advanced_extension: None,
};
let read_type = ReadType::NamedTable(named_table);
// assemble projection
let projection = table_scan
.projection
.map(|proj| self.convert_schema_projection(&proj));
// assemble base (unprojected) schema using Table's schema.
let base_schema = from_schema(&provider.table().schema())?;
let read_rel = ReadRel {
common: None,
base_schema: Some(base_schema),
filter: None,
projection,
advanced_extension: None,
read_type: Some(read_type),
};
Ok(read_rel)
}
/// Convert a index-based schema projection to substrait's [MaskExpression].
fn convert_schema_projection(&self, projections: &[usize]) -> MaskExpression {
let struct_items = projections
.iter()
.map(|index| StructItem {
field: *index as i32,
child: None,
})
.collect();
MaskExpression {
select: Some(StructSelect { struct_items }),
// TODO(ruihang): this field is unspecified
maintain_singular_struct: true,
}
}
}
#[cfg(test)]
mod test {
use catalog::local::LocalCatalogManager;
use catalog::{
local::{MemoryCatalogProvider, MemorySchemaProvider},
CatalogList, CatalogProvider, RegisterTableRequest,
};
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use datafusion::logical_plan::DFSchema;
use datatypes::schema::Schema;
use table::{requests::CreateTableRequest, test_util::EmptyTable, test_util::MockTableEngine};
use super::*;
use crate::schema::test::supported_types;
const DEFAULT_TABLE_NAME: &str = "SubstraitTable";
async fn build_mock_catalog_manager() -> CatalogManagerRef {
let mock_table_engine = Arc::new(MockTableEngine::new());
let catalog_manager = Arc::new(
LocalCatalogManager::try_new(mock_table_engine)
.await
.unwrap(),
);
let schema_provider = Arc::new(MemorySchemaProvider::new());
let catalog_provider = Arc::new(MemoryCatalogProvider::new());
catalog_provider
.register_schema(DEFAULT_SCHEMA_NAME.to_string(), schema_provider)
.unwrap();
catalog_manager
.register_catalog(DEFAULT_CATALOG_NAME.to_string(), catalog_provider)
.unwrap();
catalog_manager.init().await.unwrap();
catalog_manager
}
fn build_create_table_request<N: ToString>(table_name: N) -> CreateTableRequest {
CreateTableRequest {
id: 1,
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
table_name: table_name.to_string(),
desc: None,
schema: Arc::new(Schema::new(supported_types())),
primary_key_indices: vec![],
create_if_not_exists: true,
table_options: Default::default(),
}
}
async fn logical_plan_round_trip(plan: LogicalPlan, catalog: CatalogManagerRef) {
let convertor = DFLogicalSubstraitConvertor::new(catalog);
let proto = convertor.encode(plan.clone()).unwrap();
let tripped_plan = convertor.decode(proto).unwrap();
assert_eq!(format!("{:?}", plan), format!("{:?}", tripped_plan));
}
#[tokio::test]
async fn test_table_scan() {
let catalog_manager = build_mock_catalog_manager().await;
let table_ref = Arc::new(EmptyTable::new(build_create_table_request(
DEFAULT_TABLE_NAME,
)));
catalog_manager
.register_table(RegisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: DEFAULT_TABLE_NAME.to_string(),
table_id: 1,
table: table_ref.clone(),
})
.await
.unwrap();
let adapter = Arc::new(DfTableProviderAdapter::new(table_ref));
let projection = vec![1, 3, 5];
let df_schema = adapter.schema().to_dfschema().unwrap();
let projected_fields = projection
.iter()
.map(|index| df_schema.field(*index).clone())
.collect();
let projected_schema =
Arc::new(DFSchema::new_with_metadata(projected_fields, Default::default()).unwrap());
let table_scan_plan = LogicalPlan::TableScan(TableScan {
table_name: DEFAULT_TABLE_NAME.to_string(),
source: adapter,
projection: Some(projection),
projected_schema,
filters: vec![],
limit: None,
});
logical_plan_round_trip(table_scan_plan, catalog_manager).await;
}
}

View File

@@ -0,0 +1,119 @@
use std::any::Any;
use common_error::prelude::{BoxedError, ErrorExt, StatusCode};
use datafusion::error::DataFusionError;
use datatypes::prelude::ConcreteDataType;
use prost::{DecodeError, EncodeError};
use snafu::{Backtrace, ErrorCompat, Snafu};
#[derive(Debug, Snafu)]
#[snafu(visibility(pub))]
pub enum Error {
#[snafu(display("Unsupported physical expr: {}", name))]
UnsupportedPlan { name: String, backtrace: Backtrace },
#[snafu(display("Unsupported physical plan: {}", name))]
UnsupportedExpr { name: String, backtrace: Backtrace },
#[snafu(display("Unsupported concrete type: {:?}", ty))]
UnsupportedConcreteType {
ty: ConcreteDataType,
backtrace: Backtrace,
},
#[snafu(display("Unsupported substrait type: {}", ty))]
UnsupportedSubstraitType { ty: String, backtrace: Backtrace },
#[snafu(display("Failed to decode substrait relation, source: {}", source))]
DecodeRel {
source: DecodeError,
backtrace: Backtrace,
},
#[snafu(display("Failed to encode substrait relation, source: {}", source))]
EncodeRel {
source: EncodeError,
backtrace: Backtrace,
},
#[snafu(display("Input plan is empty"))]
EmptyPlan { backtrace: Backtrace },
#[snafu(display("Input expression is empty"))]
EmptyExpr { backtrace: Backtrace },
#[snafu(display("Missing required field in protobuf, field: {}, plan: {}", field, plan))]
MissingField {
field: String,
plan: String,
backtrace: Backtrace,
},
#[snafu(display("Invalid parameters: {}", reason))]
InvalidParameters {
reason: String,
backtrace: Backtrace,
},
#[snafu(display("Internal error from DataFusion: {}", source))]
DFInternal {
source: DataFusionError,
backtrace: Backtrace,
},
#[snafu(display("Internal error: {}", source))]
Internal {
#[snafu(backtrace)]
source: BoxedError,
},
#[snafu(display("Table quering not found: {}", name))]
TableNotFound { name: String, backtrace: Backtrace },
#[snafu(display("Cannot convert plan doesn't belong to GreptimeDB"))]
UnknownPlan { backtrace: Backtrace },
#[snafu(display(
"Schema from Substrait proto doesn't match with the schema in storage.
Substrait schema: {:?}
Storage schema: {:?}",
substrait_schema,
storage_schema
))]
SchemaNotMatch {
substrait_schema: datafusion::arrow::datatypes::SchemaRef,
storage_schema: datafusion::arrow::datatypes::SchemaRef,
backtrace: Backtrace,
},
}
pub type Result<T> = std::result::Result<T, Error>;
impl ErrorExt for Error {
fn status_code(&self) -> StatusCode {
match self {
Error::UnsupportedConcreteType { .. }
| Error::UnsupportedPlan { .. }
| Error::UnsupportedExpr { .. }
| Error::UnsupportedSubstraitType { .. } => StatusCode::Unsupported,
Error::UnknownPlan { .. }
| Error::EncodeRel { .. }
| Error::DecodeRel { .. }
| Error::EmptyPlan { .. }
| Error::EmptyExpr { .. }
| Error::MissingField { .. }
| Error::InvalidParameters { .. }
| Error::TableNotFound { .. }
| Error::SchemaNotMatch { .. } => StatusCode::InvalidArguments,
Error::DFInternal { .. } | Error::Internal { .. } => StatusCode::Internal,
}
}
fn backtrace_opt(&self) -> Option<&Backtrace> {
ErrorCompat::backtrace(self)
}
fn as_any(&self) -> &dyn Any {
self
}
}

View File

@@ -0,0 +1,18 @@
mod df_logical;
pub mod error;
mod schema;
mod types;
use bytes::{Buf, Bytes};
pub use crate::df_logical::DFLogicalSubstraitConvertor;
pub trait SubstraitPlan {
type Error: std::error::Error;
type Plan;
fn decode<B: Buf + Send>(&self, message: B) -> Result<Self::Plan, Self::Error>;
fn encode(&self, plan: Self::Plan) -> Result<Bytes, Self::Error>;
}

View File

@@ -0,0 +1,97 @@
use datatypes::schema::{ColumnSchema, Schema};
use substrait_proto::protobuf::r#type::{Nullability, Struct as SubstraitStruct};
use substrait_proto::protobuf::NamedStruct;
use crate::error::Result;
use crate::types::{from_concrete_type, to_concrete_type};
pub fn to_schema(named_struct: NamedStruct) -> Result<Schema> {
if named_struct.r#struct.is_none() {
return Ok(Schema::new(vec![]));
}
let column_schemas = named_struct
.r#struct
.unwrap()
.types
.into_iter()
.zip(named_struct.names.into_iter())
.map(|(ty, name)| {
let (concrete_type, is_nullable) = to_concrete_type(&ty)?;
let column_schema = ColumnSchema::new(name, concrete_type, is_nullable);
Ok(column_schema)
})
.collect::<Result<_>>()?;
Ok(Schema::new(column_schemas))
}
pub fn from_schema(schema: &Schema) -> Result<NamedStruct> {
let mut names = Vec::with_capacity(schema.num_columns());
let mut types = Vec::with_capacity(schema.num_columns());
for column_schema in schema.column_schemas() {
names.push(column_schema.name.clone());
let substrait_type = from_concrete_type(
column_schema.data_type.clone(),
Some(column_schema.is_nullable()),
)?;
types.push(substrait_type);
}
// TODO(ruihang): `type_variation_reference` and `nullability` are unspecified.
let substrait_struct = SubstraitStruct {
types,
type_variation_reference: 0,
nullability: Nullability::Unspecified as _,
};
Ok(NamedStruct {
names,
r#struct: Some(substrait_struct),
})
}
#[cfg(test)]
pub(crate) mod test {
use datatypes::prelude::{ConcreteDataType, DataType};
use super::*;
pub(crate) fn supported_types() -> Vec<ColumnSchema> {
[
ConcreteDataType::null_datatype(),
ConcreteDataType::boolean_datatype(),
ConcreteDataType::int8_datatype(),
ConcreteDataType::int16_datatype(),
ConcreteDataType::int32_datatype(),
ConcreteDataType::int64_datatype(),
ConcreteDataType::uint8_datatype(),
ConcreteDataType::uint16_datatype(),
ConcreteDataType::uint32_datatype(),
ConcreteDataType::uint64_datatype(),
ConcreteDataType::float32_datatype(),
ConcreteDataType::float64_datatype(),
ConcreteDataType::binary_datatype(),
ConcreteDataType::string_datatype(),
ConcreteDataType::date_datatype(),
ConcreteDataType::timestamp_datatype(Default::default()),
// TODO(ruihang): DateTime and List type are not supported now
]
.into_iter()
.enumerate()
.map(|(ordinal, ty)| ColumnSchema::new(ty.name().to_string(), ty, ordinal % 2 == 0))
.collect()
}
#[test]
fn supported_types_round_trip() {
let column_schemas = supported_types();
let schema = Schema::new(column_schemas);
let named_struct = from_schema(&schema).unwrap();
let converted_schema = to_schema(named_struct).unwrap();
assert_eq!(schema, converted_schema);
}
}

View File

@@ -0,0 +1,123 @@
//! Methods that perform convertion between Substrait's type ([Type](SType)) and GreptimeDB's type ([ConcreteDataType]).
//!
//! Substrait use [type variation](https://substrait.io/types/type_variations/) to express different "logical types".
//! Current we only have variations on integer types. Variation 0 (system prefered) are the same with base types, which
//! are signed integer (i.e. I8 -> [i8]), and Variation 1 stands for unsigned integer (i.e. I8 -> [u8]).
use datatypes::prelude::ConcreteDataType;
use substrait_proto::protobuf::r#type::{self as s_type, Kind, Nullability};
use substrait_proto::protobuf::Type as SType;
use crate::error::Result;
use crate::error::{UnsupportedConcreteTypeSnafu, UnsupportedSubstraitTypeSnafu};
macro_rules! substrait_kind {
($desc:ident, $concrete_ty:ident) => {{
let nullable = $desc.nullability() == Nullability::Nullable;
let ty = ConcreteDataType::$concrete_ty();
Ok((ty, nullable))
}};
($desc:ident, $concrete_ty:expr) => {{
let nullable = $desc.nullability() == Nullability::Nullable;
Ok(($concrete_ty, nullable))
}};
($desc:ident, $concrete_ty_0:ident, $concrete_ty_1:ident) => {{
let nullable = $desc.nullability() == Nullability::Nullable;
let ty = match $desc.type_variation_reference {
0 => ConcreteDataType::$concrete_ty_0(),
1 => ConcreteDataType::$concrete_ty_1(),
_ => UnsupportedSubstraitTypeSnafu {
ty: format!("{:?}", $desc),
}
.fail()?,
};
Ok((ty, nullable))
}};
}
/// Convert Substrait [Type](SType) to GreptimeDB's [ConcreteDataType]. The bool in return
/// tuple is the nullability identifier.
pub fn to_concrete_type(ty: &SType) -> Result<(ConcreteDataType, bool)> {
if ty.kind.is_none() {
return Ok((ConcreteDataType::null_datatype(), true));
}
let kind = ty.kind.as_ref().unwrap();
match kind {
Kind::Bool(desc) => substrait_kind!(desc, boolean_datatype),
Kind::I8(desc) => substrait_kind!(desc, int8_datatype, uint8_datatype),
Kind::I16(desc) => substrait_kind!(desc, int16_datatype, uint16_datatype),
Kind::I32(desc) => substrait_kind!(desc, int32_datatype, uint32_datatype),
Kind::I64(desc) => substrait_kind!(desc, int64_datatype, uint64_datatype),
Kind::Fp32(desc) => substrait_kind!(desc, float32_datatype),
Kind::Fp64(desc) => substrait_kind!(desc, float64_datatype),
Kind::String(desc) => substrait_kind!(desc, string_datatype),
Kind::Binary(desc) => substrait_kind!(desc, binary_datatype),
Kind::Timestamp(desc) => substrait_kind!(
desc,
ConcreteDataType::timestamp_datatype(Default::default())
),
Kind::Date(desc) => substrait_kind!(desc, date_datatype),
Kind::Time(_)
| Kind::IntervalYear(_)
| Kind::IntervalDay(_)
| Kind::TimestampTz(_)
| Kind::Uuid(_)
| Kind::FixedChar(_)
| Kind::Varchar(_)
| Kind::FixedBinary(_)
| Kind::Decimal(_)
| Kind::Struct(_)
| Kind::List(_)
| Kind::Map(_)
| Kind::UserDefinedTypeReference(_) => UnsupportedSubstraitTypeSnafu {
ty: format!("{:?}", kind),
}
.fail(),
}
}
macro_rules! build_substrait_kind {
($kind:ident,$s_type:ident,$nullable:ident,$variation:literal) => {{
let nullability = match $nullable {
Some(true) => Nullability::Nullable,
Some(false) => Nullability::Required,
None => Nullability::Unspecified,
} as _;
Some(Kind::$kind(s_type::$s_type {
type_variation_reference: $variation,
nullability,
}))
}};
}
/// Convert GreptimeDB's [ConcreteDataType] to Substrait [Type](SType).
///
/// Refer to [mod level documentation](super::types) for more information about type variation.
pub fn from_concrete_type(ty: ConcreteDataType, nullability: Option<bool>) -> Result<SType> {
let kind = match ty {
ConcreteDataType::Null(_) => None,
ConcreteDataType::Boolean(_) => build_substrait_kind!(Bool, Boolean, nullability, 0),
ConcreteDataType::Int8(_) => build_substrait_kind!(I8, I8, nullability, 0),
ConcreteDataType::Int16(_) => build_substrait_kind!(I16, I16, nullability, 0),
ConcreteDataType::Int32(_) => build_substrait_kind!(I32, I32, nullability, 0),
ConcreteDataType::Int64(_) => build_substrait_kind!(I64, I64, nullability, 0),
ConcreteDataType::UInt8(_) => build_substrait_kind!(I8, I8, nullability, 1),
ConcreteDataType::UInt16(_) => build_substrait_kind!(I16, I16, nullability, 1),
ConcreteDataType::UInt32(_) => build_substrait_kind!(I32, I32, nullability, 1),
ConcreteDataType::UInt64(_) => build_substrait_kind!(I64, I64, nullability, 1),
ConcreteDataType::Float32(_) => build_substrait_kind!(Fp32, Fp32, nullability, 0),
ConcreteDataType::Float64(_) => build_substrait_kind!(Fp64, Fp64, nullability, 0),
ConcreteDataType::Binary(_) => build_substrait_kind!(Binary, Binary, nullability, 0),
ConcreteDataType::String(_) => build_substrait_kind!(String, String, nullability, 0),
ConcreteDataType::Date(_) => build_substrait_kind!(Date, Date, nullability, 0),
ConcreteDataType::DateTime(_) => UnsupportedConcreteTypeSnafu { ty }.fail()?,
ConcreteDataType::Timestamp(_) => {
build_substrait_kind!(Timestamp, Timestamp, nullability, 0)
}
ConcreteDataType::List(_) => UnsupportedConcreteTypeSnafu { ty }.fail()?,
};
Ok(SType { kind })
}

View File

@@ -6,7 +6,6 @@ edition = "2021"
[dependencies]
chrono = "0.4"
common-error = { path = "../error" }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
snafu = { version = "0.7", features = ["backtraces"] }

View File

@@ -21,24 +21,6 @@ impl TimestampMillis {
TimestampMillis(ms)
}
/// Returns the timestamp aligned by `bucket_duration` in milliseconds or
/// `None` if overflow occurred.
///
/// # Panics
/// Panics if `bucket_duration <= 0`.
pub fn align_by_bucket(self, bucket_duration: i64) -> Option<TimestampMillis> {
assert!(bucket_duration > 0);
let ts = if self.0 >= 0 {
self.0
} else {
// `bucket_duration > 0` implies `bucket_duration - 1` won't overflow.
self.0.checked_sub(bucket_duration - 1)?
};
Some(TimestampMillis(ts / bucket_duration * bucket_duration))
}
/// Returns the timestamp value as i64.
pub fn as_i64(&self) -> i64 {
self.0
@@ -51,6 +33,12 @@ impl From<i64> for TimestampMillis {
}
}
impl From<TimestampMillis> for i64 {
fn from(ts: TimestampMillis) -> Self {
ts.0
}
}
impl PartialEq<i64> for TimestampMillis {
fn eq(&self, other: &i64) -> bool {
self.0 == *other
@@ -75,6 +63,25 @@ impl PartialOrd<TimestampMillis> for i64 {
}
}
pub trait BucketAligned {
/// Returns the timestamp aligned by `bucket_duration` in milliseconds or
/// `None` if overflow occurred.
///
/// # Panics
/// Panics if `bucket_duration <= 0`.
fn align_by_bucket(self, bucket_duration: i64) -> Option<TimestampMillis>;
}
impl<T: Into<i64>> BucketAligned for T {
fn align_by_bucket(self, bucket_duration: i64) -> Option<TimestampMillis> {
assert!(bucket_duration > 0);
self.into()
.checked_div_euclid(bucket_duration)
.and_then(|val| val.checked_mul(bucket_duration))
.map(TimestampMillis)
}
}
#[cfg(test)]
mod tests {
use super::*;

View File

@@ -5,9 +5,7 @@ edition = "2021"
[features]
default = ["python"]
python = [
"dep:script"
]
python = ["dep:script"]
[dependencies]
api = { path = "../api" }
@@ -16,6 +14,7 @@ axum = "0.6.0-rc.2"
axum-macros = "0.3.0-rc.1"
catalog = { path = "../catalog" }
common-base = { path = "../common/base" }
common-catalog = { path = "../common/catalog" }
common-error = { path = "../common/error" }
common-grpc = { path = "../common/grpc" }
common-query = { path = "../common/query" }
@@ -23,7 +22,9 @@ common-recordbatch = { path = "../common/recordbatch" }
common-runtime = { path = "../common/runtime" }
common-telemetry = { path = "../common/telemetry" }
common-time = { path = "../common/time" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = ["simd"] }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
"simd",
] }
datatypes = { path = "../datatypes" }
futures = "0.3"
hyper = { version = "0.14", features = ["full"] }
@@ -39,29 +40,49 @@ snafu = { version = "0.7", features = ["backtraces"] }
sql = { path = "../sql" }
storage = { path = "../storage" }
store-api = { path = "../store-api" }
substrait = { path = "../common/substrait" }
table = { path = "../table" }
table-engine = { path = "../table-engine", features = ["test"] }
tokio = { version = "1.18", features = ["full"] }
tokio-stream = { version = "0.1.8", features = ["net"] }
tokio-stream = { version = "0.1", features = ["net"] }
tonic = "0.8"
tower = { version = "0.4", features = ["full"] }
tower-http = { version = "0.3", features = ["full"] }
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies.arrow]
package = "arrow2"
version = "0.10"
features = ["io_csv", "io_json", "io_parquet", "io_parquet_compression", "io_ipc", "ahash", "compute", "serde_types"]
features = [
"io_csv",
"io_json",
"io_parquet",
"io_parquet_compression",
"io_ipc",
"ahash",
"compute",
"serde_types",
]
[dev-dependencies]
axum-test-helper = "0.1"
axum-test-helper = { git = "https://github.com/sunng87/axum-test-helper.git", branch = "patch-1" }
client = { path = "../client" }
common-query = { path = "../common/query" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = ["simd"] }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
"simd",
] }
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
tempdir = "0.3"
[dev-dependencies.arrow]
package = "arrow2"
version = "0.10"
features = ["io_csv", "io_json", "io_parquet", "io_parquet_compression", "io_ipc", "ahash", "compute", "serde_types"]
features = [
"io_csv",
"io_json",
"io_parquet",
"io_parquet_compression",
"io_ipc",
"ahash",
"compute",
"serde_types",
]

View File

@@ -24,10 +24,11 @@ impl Default for ObjectStoreConfig {
pub struct DatanodeOptions {
pub http_addr: String,
pub rpc_addr: String,
pub rpc_runtime_size: usize,
pub mysql_addr: String,
pub mysql_runtime_size: u32,
pub mysql_runtime_size: usize,
pub postgres_addr: String,
pub postgres_runtime_size: u32,
pub postgres_runtime_size: usize,
pub wal_dir: String,
pub storage: ObjectStoreConfig,
}
@@ -37,6 +38,7 @@ impl Default for DatanodeOptions {
Self {
http_addr: "0.0.0.0:3000".to_string(),
rpc_addr: "0.0.0.0:3001".to_string(),
rpc_runtime_size: 8,
mysql_addr: "0.0.0.0:3306".to_string(),
mysql_runtime_size: 2,
postgres_addr: "0.0.0.0:5432".to_string(),

View File

@@ -16,6 +16,12 @@ pub enum Error {
source: query::error::Error,
},
#[snafu(display("Failed to decode logical plan, source: {}", source))]
DecodeLogicalPlan {
#[snafu(backtrace)]
source: substrait::error::Error,
},
#[snafu(display("Failed to execute physical plan, source: {}", source))]
ExecutePhysicalPlan {
#[snafu(backtrace)]
@@ -67,6 +73,9 @@ pub enum Error {
#[snafu(display("Missing required field in protobuf, field: {}", field))]
MissingField { field: String, backtrace: Backtrace },
#[snafu(display("Missing timestamp column in request"))]
MissingTimestampColumn { backtrace: Backtrace },
#[snafu(display(
"Columns and values number mismatch, columns: {}, values: {}",
columns,
@@ -189,7 +198,7 @@ pub enum Error {
source: api::error::Error,
},
#[snafu(display("Column default constraint error, source: {}", source))]
#[snafu(display("Invalid column default constraint, source: {}", source))]
ColumnDefaultConstraint {
#[snafu(backtrace)]
source: datatypes::error::Error,
@@ -247,6 +256,29 @@ pub enum Error {
#[snafu(backtrace)]
source: datatypes::error::Error,
},
#[snafu(display(
"Duplicated timestamp column in gRPC requests, exists {}, duplicated: {}",
exists,
duplicated
))]
DuplicatedTimestampColumn {
exists: String,
duplicated: String,
backtrace: Backtrace,
},
#[snafu(display("Failed to access catalog, source: {}", source))]
Catalog {
#[snafu(backtrace)]
source: catalog::error::Error,
},
#[snafu(display("Failed to find table {} from catalog, source: {}", table_name, source))]
FindTable {
table_name: String,
source: catalog::error::Error,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -255,9 +287,10 @@ impl ErrorExt for Error {
fn status_code(&self) -> StatusCode {
match self {
Error::ExecuteSql { source } => source.status_code(),
Error::DecodeLogicalPlan { source } => source.status_code(),
Error::ExecutePhysicalPlan { source } => source.status_code(),
Error::NewCatalog { source } => source.status_code(),
Error::FindTable { source, .. } => source.status_code(),
Error::CreateTable { source, .. }
| Error::GetTable { source, .. }
| Error::AlterTable { source, .. } => source.status_code(),
@@ -283,10 +316,13 @@ impl ErrorExt for Error {
| Error::KeyColumnNotFound { .. }
| Error::InvalidPrimaryKey { .. }
| Error::MissingField { .. }
| Error::MissingTimestampColumn { .. }
| Error::CatalogNotFound { .. }
| Error::SchemaNotFound { .. }
| Error::ConstraintNotSupported { .. }
| Error::ParseTimestamp { .. } => StatusCode::InvalidArguments,
| Error::ParseTimestamp { .. }
| Error::DuplicatedTimestampColumn { .. } => StatusCode::InvalidArguments,
// TODO(yingwen): Further categorize http error.
Error::StartServer { .. }
| Error::ParseAddr { .. }
@@ -297,7 +333,8 @@ impl ErrorExt for Error {
| Error::Conversion { .. }
| Error::IntoPhysicalPlan { .. }
| Error::UnsupportedExpr { .. }
| Error::ColumnDataType { .. } => StatusCode::Internal,
| Error::ColumnDataType { .. }
| Error::Catalog { .. } => StatusCode::Internal,
Error::InitBackend { .. } => StatusCode::StorageUnavailable,
Error::OpenLogStore { source } => source.status_code(),
@@ -342,7 +379,7 @@ mod tests {
)))
}
fn throw_catalog_error() -> std::result::Result<(), catalog::error::Error> {
fn throw_catalog_error() -> catalog::error::Result<()> {
Err(catalog::error::Error::RegisterTable {
source: BoxedError::new(MockError::with_backtrace(StatusCode::Internal)),
})

View File

@@ -1,47 +1,30 @@
use std::{fs, path, sync::Arc};
use api::v1::{
admin_expr, insert_expr, object_expr, select_expr, AdminExpr, AdminResult, ObjectExpr,
ObjectResult, SelectExpr,
};
use async_trait::async_trait;
use catalog::{CatalogManagerRef, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_error::prelude::BoxedError;
use common_error::status_code::StatusCode;
use common_query::Output;
use common_telemetry::logging::{error, info};
use common_telemetry::timer;
use catalog::CatalogManagerRef;
use common_telemetry::logging::info;
use log_store::fs::{config::LogConfig, log::LocalFileLogStore};
use object_store::{backend::fs::Backend, util, ObjectStore};
use object_store::{services::fs::Builder, util, ObjectStore};
use query::query_engine::{QueryEngineFactory, QueryEngineRef};
use servers::query_handler::{GrpcAdminHandler, GrpcQueryHandler, SqlQueryHandler};
use snafu::prelude::*;
use sql::statements::statement::Statement;
use storage::{config::EngineConfig as StorageEngineConfig, EngineImpl};
use table_engine::config::EngineConfig as TableEngineConfig;
use table_engine::engine::MitoEngine;
use crate::datanode::{DatanodeOptions, ObjectStoreConfig};
use crate::error::{
self, ExecuteSqlSnafu, InsertSnafu, NewCatalogSnafu, Result, TableNotFoundSnafu,
UnsupportedExprSnafu,
};
use crate::metric;
use crate::error::{self, NewCatalogSnafu, Result};
use crate::script::ScriptExecutor;
use crate::server::grpc::handler::{build_err_result, ObjectResultBuilder};
use crate::server::grpc::insert::insertion_expr_to_request;
use crate::server::grpc::plan::PhysicalPlanner;
use crate::server::grpc::select::to_object_result;
use crate::sql::{SqlHandler, SqlRequest};
use crate::sql::SqlHandler;
mod grpc;
mod sql;
type DefaultEngine = MitoEngine<EngineImpl<LocalFileLogStore>>;
// An abstraction to read/write services.
pub struct Instance {
// Query service
query_engine: QueryEngineRef,
sql_handler: SqlHandler,
// Catalog list
catalog_manager: CatalogManagerRef,
physical_planner: PhysicalPlanner,
script_executor: ScriptExecutor,
@@ -64,7 +47,7 @@ impl Instance {
object_store,
));
let catalog_manager = Arc::new(
catalog::LocalCatalogManager::try_new(table_engine.clone())
catalog::local::LocalCatalogManager::try_new(table_engine.clone())
.await
.context(NewCatalogSnafu)?,
);
@@ -82,94 +65,6 @@ impl Instance {
})
}
pub async fn execute_grpc_insert(
&self,
table_name: &str,
values: insert_expr::Values,
) -> Result<Output> {
let schema_provider = self
.catalog_manager
.catalog(DEFAULT_CATALOG_NAME)
.unwrap()
.schema(DEFAULT_SCHEMA_NAME)
.unwrap();
let table = schema_provider
.table(table_name)
.context(TableNotFoundSnafu { table_name })?;
let insert = insertion_expr_to_request(table_name, values, table.clone())?;
let affected_rows = table
.insert(insert)
.await
.context(InsertSnafu { table_name })?;
Ok(Output::AffectedRows(affected_rows))
}
pub async fn execute_sql(&self, sql: &str) -> Result<Output> {
let stmt = self
.query_engine
.sql_to_statement(sql)
.context(ExecuteSqlSnafu)?;
match stmt {
Statement::Query(_) => {
let logical_plan = self
.query_engine
.statement_to_plan(stmt)
.context(ExecuteSqlSnafu)?;
self.query_engine
.execute(&logical_plan)
.await
.context(ExecuteSqlSnafu)
}
Statement::Insert(i) => {
let schema_provider = self
.catalog_manager
.catalog(DEFAULT_CATALOG_NAME)
.unwrap()
.schema(DEFAULT_SCHEMA_NAME)
.unwrap();
let request = self.sql_handler.insert_to_request(schema_provider, *i)?;
self.sql_handler.execute(request).await
}
Statement::Create(c) => {
let table_id = self.catalog_manager.next_table_id();
let _engine_name = c.engine.clone();
// TODO(hl): Select table engine by engine_name
let request = self.sql_handler.create_to_request(table_id, c)?;
let catalog_name = request.catalog_name.clone();
let schema_name = request.schema_name.clone();
let table_name = request.table_name.clone();
let table_id = request.id;
info!(
"Creating table, catalog: {:?}, schema: {:?}, table name: {:?}, table id: {}",
catalog_name, schema_name, table_name, table_id
);
self.sql_handler.execute(SqlRequest::Create(request)).await
}
Statement::Alter(alter_table) => {
let req = self.sql_handler.alter_to_request(alter_table)?;
self.sql_handler.execute(SqlRequest::Alter(req)).await
}
Statement::ShowDatabases(stmt) => {
self.sql_handler
.execute(SqlRequest::ShowDatabases(stmt))
.await
}
Statement::ShowTables(stmt) => {
self.sql_handler.execute(SqlRequest::ShowTables(stmt)).await
}
}
}
pub async fn start(&self) -> Result<()> {
self.catalog_manager
.start()
@@ -178,41 +73,6 @@ impl Instance {
Ok(())
}
async fn handle_insert(&self, table_name: &str, values: insert_expr::Values) -> ObjectResult {
match self.execute_grpc_insert(table_name, values).await {
Ok(Output::AffectedRows(rows)) => ObjectResultBuilder::new()
.status_code(StatusCode::Success as u32)
.mutate_result(rows as u32, 0)
.build(),
Err(err) => {
// TODO(fys): failure count
build_err_result(&err)
}
_ => unreachable!(),
}
}
async fn handle_select(&self, select_expr: SelectExpr) -> ObjectResult {
let result = self.do_handle_select(select_expr).await;
to_object_result(result).await
}
async fn do_handle_select(&self, select_expr: SelectExpr) -> Result<Output> {
let expr = select_expr.expr;
match expr {
Some(select_expr::Expr::Sql(sql)) => self.execute_sql(&sql).await,
Some(select_expr::Expr::PhysicalPlan(api::v1::PhysicalPlan { original_ql, plan })) => {
self.physical_planner
.execute(PhysicalPlanner::parse(plan)?, original_ql)
.await
}
_ => UnsupportedExprSnafu {
name: format!("{:?}", expr),
}
.fail(),
}
}
pub fn sql_handler(&self) -> &SqlHandler {
&self.sql_handler
}
@@ -236,7 +96,7 @@ impl Instance {
));
let catalog_manager = Arc::new(
catalog::LocalCatalogManager::try_new(mock_engine.clone())
catalog::local::manager::LocalCatalogManager::try_new(mock_engine.clone())
.await
.unwrap(),
);
@@ -271,10 +131,9 @@ async fn new_object_store(store_config: &ObjectStoreConfig) -> Result<ObjectStor
info!("The storage directory is: {}", &data_dir);
let accessor = Backend::build()
let accessor = Builder::default()
.root(&data_dir)
.finish()
.await
.build()
.context(error::InitBackendSnafu { dir: &data_dir })?;
Ok(ObjectStore::new(accessor))
@@ -298,76 +157,3 @@ async fn create_local_file_log_store(opts: &DatanodeOptions) -> Result<LocalFile
Ok(log_store)
}
// TODO(LFC): Refactor datanode and frontend instances, separate impl for each query handler.
#[async_trait]
impl SqlQueryHandler for Instance {
async fn do_query(&self, query: &str) -> servers::error::Result<Output> {
let _timer = timer!(metric::METRIC_HANDLE_SQL_ELAPSED);
self.execute_sql(query)
.await
.map_err(|e| {
error!(e; "Instance failed to execute sql");
BoxedError::new(e)
})
.context(servers::error::ExecuteQuerySnafu { query })
}
async fn insert_script(&self, name: &str, script: &str) -> servers::error::Result<()> {
self.script_executor.insert_script(name, script).await
}
async fn execute_script(&self, name: &str) -> servers::error::Result<Output> {
self.script_executor.execute_script(name).await
}
}
#[async_trait]
impl GrpcQueryHandler for Instance {
async fn do_query(&self, query: ObjectExpr) -> servers::error::Result<ObjectResult> {
let object_resp = match query.expr {
Some(object_expr::Expr::Insert(insert_expr)) => {
let table_name = &insert_expr.table_name;
let expr = insert_expr
.expr
.context(servers::error::InvalidQuerySnafu {
reason: "missing `expr` in `InsertExpr`",
})?;
match expr {
insert_expr::Expr::Values(values) => {
self.handle_insert(table_name, values).await
}
insert_expr::Expr::Sql(sql) => {
let output = self.execute_sql(&sql).await;
to_object_result(output).await
}
}
}
Some(object_expr::Expr::Select(select_expr)) => self.handle_select(select_expr).await,
other => {
return servers::error::NotSupportedSnafu {
feat: format!("{:?}", other),
}
.fail();
}
};
Ok(object_resp)
}
}
#[async_trait]
impl GrpcAdminHandler for Instance {
async fn exec_admin_request(&self, expr: AdminExpr) -> servers::error::Result<AdminResult> {
let admin_resp = match expr.expr {
Some(admin_expr::Expr::Create(create_expr)) => self.handle_create(create_expr).await,
Some(admin_expr::Expr::Alter(alter_expr)) => self.handle_alter(alter_expr).await,
other => {
return servers::error::NotSupportedSnafu {
feat: format!("{:?}", other),
}
.fail();
}
};
Ok(admin_resp)
}
}

View File

@@ -0,0 +1,240 @@
use api::v1::{
admin_expr, codec::InsertBatch, insert_expr, object_expr, select_expr, AdminExpr, AdminResult,
ObjectExpr, ObjectResult, SelectExpr,
};
use async_trait::async_trait;
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_error::status_code::StatusCode;
use common_query::Output;
use common_telemetry::logging::{debug, info};
use query::plan::LogicalPlan;
use servers::query_handler::{GrpcAdminHandler, GrpcQueryHandler};
use snafu::prelude::*;
use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan};
use table::requests::AddColumnRequest;
use crate::error::{
self, CatalogSnafu, DecodeLogicalPlanSnafu, ExecuteSqlSnafu, InsertSnafu, Result,
TableNotFoundSnafu, UnsupportedExprSnafu,
};
use crate::instance::Instance;
use crate::server::grpc::handler::{build_err_result, ObjectResultBuilder};
use crate::server::grpc::insert::{self, insertion_expr_to_request};
use crate::server::grpc::plan::PhysicalPlanner;
use crate::server::grpc::select::to_object_result;
use crate::sql::SqlRequest;
impl Instance {
async fn add_new_columns_to_table(
&self,
table_name: &str,
add_columns: Vec<AddColumnRequest>,
) -> Result<()> {
let column_names = add_columns
.iter()
.map(|req| req.column_schema.name.clone())
.collect::<Vec<_>>();
let alter_request = insert::build_alter_table_request(table_name, add_columns);
debug!(
"Adding new columns: {:?} to table: {}",
column_names, table_name
);
let _result = self
.sql_handler()
.execute(SqlRequest::Alter(alter_request))
.await?;
info!(
"Added new columns: {:?} to table: {}",
column_names, table_name
);
Ok(())
}
async fn create_table_by_insert_batches(
&self,
catalog_name: &str,
schema_name: &str,
table_name: &str,
insert_batches: &[InsertBatch],
) -> Result<()> {
// Create table automatically, build schema from data.
let table_id = self.catalog_manager.next_table_id();
let create_table_request = insert::build_create_table_request(
catalog_name,
schema_name,
table_id,
table_name,
insert_batches,
)?;
info!(
"Try to create table: {} automatically with request: {:?}",
table_name, create_table_request,
);
let _result = self
.sql_handler()
.execute(SqlRequest::Create(create_table_request))
.await?;
info!("Success to create table: {} automatically", table_name);
Ok(())
}
pub async fn execute_grpc_insert(
&self,
table_name: &str,
values: insert_expr::Values,
) -> Result<Output> {
// maybe infer from insert batch?
let catalog_name = DEFAULT_CATALOG_NAME;
let schema_name = DEFAULT_SCHEMA_NAME;
let schema_provider = self
.catalog_manager
.catalog(catalog_name)
.unwrap()
.expect("default catalog must exist")
.schema(schema_name)
.expect("default schema must exist")
.unwrap();
let insert_batches = insert::insert_batches(values.values)?;
ensure!(!insert_batches.is_empty(), error::IllegalInsertDataSnafu);
let table = if let Some(table) = schema_provider.table(table_name).context(CatalogSnafu)? {
let schema = table.schema();
if let Some(add_columns) = insert::find_new_columns(&schema, &insert_batches)? {
self.add_new_columns_to_table(table_name, add_columns)
.await?;
}
table
} else {
self.create_table_by_insert_batches(
catalog_name,
schema_name,
table_name,
&insert_batches,
)
.await?;
schema_provider
.table(table_name)
.context(CatalogSnafu)?
.context(TableNotFoundSnafu { table_name })?
};
let insert = insertion_expr_to_request(table_name, insert_batches, table.clone())?;
let affected_rows = table
.insert(insert)
.await
.context(InsertSnafu { table_name })?;
Ok(Output::AffectedRows(affected_rows))
}
async fn handle_insert(&self, table_name: &str, values: insert_expr::Values) -> ObjectResult {
match self.execute_grpc_insert(table_name, values).await {
Ok(Output::AffectedRows(rows)) => ObjectResultBuilder::new()
.status_code(StatusCode::Success as u32)
.mutate_result(rows as u32, 0)
.build(),
Err(err) => {
// TODO(fys): failure count
build_err_result(&err)
}
_ => unreachable!(),
}
}
async fn handle_select(&self, select_expr: SelectExpr) -> ObjectResult {
let result = self.do_handle_select(select_expr).await;
to_object_result(result).await
}
async fn do_handle_select(&self, select_expr: SelectExpr) -> Result<Output> {
let expr = select_expr.expr;
match expr {
Some(select_expr::Expr::Sql(sql)) => self.execute_sql(&sql).await,
Some(select_expr::Expr::LogicalPlan(plan)) => self.execute_logical(plan).await,
Some(select_expr::Expr::PhysicalPlan(api::v1::PhysicalPlan { original_ql, plan })) => {
self.physical_planner
.execute(PhysicalPlanner::parse(plan)?, original_ql)
.await
}
_ => UnsupportedExprSnafu {
name: format!("{:?}", expr),
}
.fail(),
}
}
async fn execute_logical(&self, plan_bytes: Vec<u8>) -> Result<Output> {
let logical_plan_converter = DFLogicalSubstraitConvertor::new(self.catalog_manager.clone());
let logical_plan = logical_plan_converter
.decode(plan_bytes.as_slice())
.context(DecodeLogicalPlanSnafu)?;
self.query_engine
.execute(&LogicalPlan::DfPlan(logical_plan))
.await
.context(ExecuteSqlSnafu)
}
}
#[async_trait]
impl GrpcQueryHandler for Instance {
async fn do_query(&self, query: ObjectExpr) -> servers::error::Result<ObjectResult> {
let object_resp = match query.expr {
Some(object_expr::Expr::Insert(insert_expr)) => {
let table_name = &insert_expr.table_name;
let expr = insert_expr
.expr
.context(servers::error::InvalidQuerySnafu {
reason: "missing `expr` in `InsertExpr`",
})?;
match expr {
insert_expr::Expr::Values(values) => {
self.handle_insert(table_name, values).await
}
insert_expr::Expr::Sql(sql) => {
let output = self.execute_sql(&sql).await;
to_object_result(output).await
}
}
}
Some(object_expr::Expr::Select(select_expr)) => self.handle_select(select_expr).await,
other => {
return servers::error::NotSupportedSnafu {
feat: format!("{:?}", other),
}
.fail();
}
};
Ok(object_resp)
}
}
#[async_trait]
impl GrpcAdminHandler for Instance {
async fn exec_admin_request(&self, expr: AdminExpr) -> servers::error::Result<AdminResult> {
let admin_resp = match expr.expr {
Some(admin_expr::Expr::Create(create_expr)) => self.handle_create(create_expr).await,
Some(admin_expr::Expr::Alter(alter_expr)) => self.handle_alter(alter_expr).await,
other => {
return servers::error::NotSupportedSnafu {
feat: format!("{:?}", other),
}
.fail();
}
};
Ok(admin_resp)
}
}

View File

@@ -0,0 +1,104 @@
use async_trait::async_trait;
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_error::prelude::BoxedError;
use common_query::Output;
use common_telemetry::{
logging::{error, info},
timer,
};
use servers::query_handler::SqlQueryHandler;
use snafu::prelude::*;
use sql::statements::statement::Statement;
use crate::error::{ExecuteSqlSnafu, Result};
use crate::instance::Instance;
use crate::metric;
use crate::sql::SqlRequest;
impl Instance {
pub async fn execute_sql(&self, sql: &str) -> Result<Output> {
let stmt = self
.query_engine
.sql_to_statement(sql)
.context(ExecuteSqlSnafu)?;
match stmt {
Statement::Query(_) => {
let logical_plan = self
.query_engine
.statement_to_plan(stmt)
.context(ExecuteSqlSnafu)?;
self.query_engine
.execute(&logical_plan)
.await
.context(ExecuteSqlSnafu)
}
Statement::Insert(i) => {
let schema_provider = self
.catalog_manager
.catalog(DEFAULT_CATALOG_NAME)
.expect("datafusion does not accept fallible catalog access")
.unwrap()
.schema(DEFAULT_SCHEMA_NAME)
.expect("datafusion does not accept fallible catalog access")
.unwrap();
let request = self.sql_handler.insert_to_request(schema_provider, *i)?;
self.sql_handler.execute(request).await
}
Statement::Create(c) => {
let table_id = self.catalog_manager.next_table_id();
let _engine_name = c.engine.clone();
// TODO(hl): Select table engine by engine_name
let request = self.sql_handler.create_to_request(table_id, c)?;
let catalog_name = &request.catalog_name;
let schema_name = &request.schema_name;
let table_name = &request.table_name;
let table_id = request.id;
info!(
"Creating table, catalog: {:?}, schema: {:?}, table name: {:?}, table id: {}",
catalog_name, schema_name, table_name, table_id
);
self.sql_handler.execute(SqlRequest::Create(request)).await
}
Statement::Alter(alter_table) => {
let req = self.sql_handler.alter_to_request(alter_table)?;
self.sql_handler.execute(SqlRequest::Alter(req)).await
}
Statement::ShowDatabases(stmt) => {
self.sql_handler
.execute(SqlRequest::ShowDatabases(stmt))
.await
}
Statement::ShowTables(stmt) => {
self.sql_handler.execute(SqlRequest::ShowTables(stmt)).await
}
}
}
}
#[async_trait]
impl SqlQueryHandler for Instance {
async fn do_query(&self, query: &str) -> servers::error::Result<Output> {
let _timer = timer!(metric::METRIC_HANDLE_SQL_ELAPSED);
self.execute_sql(query)
.await
.map_err(|e| {
error!(e; "Instance failed to execute sql");
BoxedError::new(e)
})
.context(servers::error::ExecuteQuerySnafu { query })
}
async fn insert_script(&self, name: &str, script: &str) -> servers::error::Result<()> {
self.script_executor.insert_script(name, script).await
}
async fn execute_script(&self, name: &str) -> servers::error::Result<Output> {
self.script_executor.execute_script(name).await
}
}

View File

@@ -40,9 +40,16 @@ impl Services {
.build()
.context(error::RuntimeResourceSnafu)?,
);
let grpc_runtime = Arc::new(
RuntimeBuilder::default()
.worker_threads(opts.rpc_runtime_size as usize)
.thread_name("grpc-io-handlers")
.build()
.context(error::RuntimeResourceSnafu)?,
);
Ok(Self {
http_server: HttpServer::new(instance.clone()),
grpc_server: GrpcServer::new(instance.clone(), instance.clone()),
grpc_server: GrpcServer::new(instance.clone(), instance.clone(), grpc_runtime),
mysql_server: MysqlServer::create_server(instance.clone(), mysql_io_runtime),
postgres_server: Box::new(PostgresServer::new(instance, postgres_io_runtime)),
})

View File

@@ -2,13 +2,14 @@ use std::sync::Arc;
use api::helper::ColumnDataTypeWrapper;
use api::v1::{alter_expr::Kind, AdminResult, AlterExpr, ColumnDef, CreateExpr};
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_error::prelude::{ErrorExt, StatusCode};
use common_query::Output;
use datatypes::schema::ColumnDefaultConstraint;
use datatypes::schema::{ColumnSchema, SchemaBuilder, SchemaRef};
use futures::TryFutureExt;
use snafu::prelude::*;
use table::requests::{AlterKind, AlterTableRequest, CreateTableRequest};
use table::requests::{AddColumnRequest, AlterKind, AlterTableRequest, CreateTableRequest};
use crate::error::{self, ColumnDefaultConstraintSnafu, MissingFieldSnafu, Result};
use crate::instance::Instance;
@@ -77,10 +78,16 @@ impl Instance {
let table_id = self.catalog_manager().next_table_id();
let catalog_name = expr
.catalog_name
.unwrap_or_else(|| DEFAULT_CATALOG_NAME.to_string());
let schema_name = expr
.schema_name
.unwrap_or_else(|| DEFAULT_SCHEMA_NAME.to_string());
Ok(CreateTableRequest {
id: table_id,
catalog_name: expr.catalog_name,
schema_name: expr.schema_name,
catalog_name,
schema_name,
table_name: expr.table_name,
desc: expr.desc,
schema,
@@ -96,8 +103,12 @@ impl Instance {
let column_def = add_column.column_def.context(MissingFieldSnafu {
field: "column_def",
})?;
let alter_kind = AlterKind::AddColumn {
new_column: create_column_schema(&column_def)?,
let alter_kind = AlterKind::AddColumns {
columns: vec![AddColumnRequest {
column_schema: create_column_schema(&column_def)?,
// FIXME(dennis): supports adding key column
is_key: false,
}],
};
let request = AlterTableRequest {
catalog_name: expr.catalog_name,
@@ -134,7 +145,7 @@ fn create_table_schema(expr: &CreateExpr) -> Result<SchemaRef> {
Ok(Arc::new(
SchemaBuilder::try_from(column_schemas)
.context(error::CreateSchemaSnafu)?
.timestamp_index(ts_index)
.timestamp_index(Some(ts_index))
.build()
.context(error::CreateSchemaSnafu)?,
))
@@ -143,24 +154,25 @@ fn create_table_schema(expr: &CreateExpr) -> Result<SchemaRef> {
fn create_column_schema(column_def: &ColumnDef) -> Result<ColumnSchema> {
let data_type =
ColumnDataTypeWrapper::try_new(column_def.datatype).context(error::ColumnDataTypeSnafu)?;
Ok(ColumnSchema {
name: column_def.name.clone(),
data_type: data_type.into(),
is_nullable: column_def.is_nullable,
default_constraint: match &column_def.default_constraint {
None => None,
Some(v) => Some(
ColumnDefaultConstraint::try_from(&v[..]).context(ColumnDefaultConstraintSnafu)?,
),
},
})
let default_constraint = match &column_def.default_constraint {
None => None,
Some(v) => {
Some(ColumnDefaultConstraint::try_from(&v[..]).context(ColumnDefaultConstraintSnafu)?)
}
};
ColumnSchema::new(
column_def.name.clone(),
data_type.into(),
column_def.is_nullable,
)
.with_default_constraint(default_constraint)
.context(ColumnDefaultConstraintSnafu)
}
#[cfg(test)]
mod tests {
use std::collections::HashMap;
use catalog::MIN_USER_TABLE_ID;
use datatypes::prelude::ConcreteDataType;
use datatypes::value::Value;
@@ -169,15 +181,15 @@ mod tests {
#[tokio::test]
async fn test_create_expr_to_request() {
let (opts, _guard) = test_util::create_tmp_dir_and_datanode_opts();
let (opts, _guard) = test_util::create_tmp_dir_and_datanode_opts("create_expr_to_request");
let instance = Instance::new(&opts).await.unwrap();
instance.start().await.unwrap();
let expr = testing_create_expr();
let request = instance.create_expr_to_request(expr).unwrap();
assert_eq!(request.id, MIN_USER_TABLE_ID);
assert_eq!(request.catalog_name, None);
assert_eq!(request.schema_name, None);
assert_eq!(request.id, common_catalog::consts::MIN_USER_TABLE_ID);
assert_eq!(request.catalog_name, "greptime".to_string());
assert_eq!(request.schema_name, "public".to_string());
assert_eq!(request.table_name, "my-metrics");
assert_eq!(request.desc, Some("blabla".to_string()));
assert_eq!(request.schema, expected_table_schema());
@@ -233,7 +245,7 @@ mod tests {
let column_schema = create_column_schema(&column_def).unwrap();
assert_eq!(column_schema.name, "a");
assert_eq!(column_schema.data_type, ConcreteDataType::string_datatype());
assert!(column_schema.is_nullable);
assert!(column_schema.is_nullable());
let default_constraint = ColumnDefaultConstraint::Value(Value::from("defaut value"));
let column_def = ColumnDef {
@@ -245,10 +257,10 @@ mod tests {
let column_schema = create_column_schema(&column_def).unwrap();
assert_eq!(column_schema.name, "a");
assert_eq!(column_schema.data_type, ConcreteDataType::string_datatype());
assert!(column_schema.is_nullable);
assert!(column_schema.is_nullable());
assert_eq!(
default_constraint,
column_schema.default_constraint.unwrap()
*column_schema.default_constraint().unwrap()
);
}
@@ -294,35 +306,15 @@ mod tests {
fn expected_table_schema() -> SchemaRef {
let column_schemas = vec![
ColumnSchema {
name: "host".to_string(),
data_type: ConcreteDataType::string_datatype(),
is_nullable: false,
default_constraint: None,
},
ColumnSchema {
name: "ts".to_string(),
data_type: ConcreteDataType::timestamp_millis_datatype(),
is_nullable: false,
default_constraint: None,
},
ColumnSchema {
name: "cpu".to_string(),
data_type: ConcreteDataType::float32_datatype(),
is_nullable: true,
default_constraint: None,
},
ColumnSchema {
name: "memory".to_string(),
data_type: ConcreteDataType::float64_datatype(),
is_nullable: true,
default_constraint: None,
},
ColumnSchema::new("host", ConcreteDataType::string_datatype(), false),
ColumnSchema::new("ts", ConcreteDataType::timestamp_millis_datatype(), false),
ColumnSchema::new("cpu", ConcreteDataType::float32_datatype(), true),
ColumnSchema::new("memory", ConcreteDataType::float64_datatype(), true),
];
Arc::new(
SchemaBuilder::try_from(column_schemas)
.unwrap()
.timestamp_index(1)
.timestamp_index(Some(1))
.build()
.unwrap(),
)

View File

@@ -1,26 +1,186 @@
use std::collections::HashSet;
use std::{
collections::{hash_map::Entry, HashMap},
ops::Deref,
sync::Arc,
};
use api::v1::{codec::InsertBatch, column::Values, insert_expr, Column};
use api::{
helper::ColumnDataTypeWrapper,
v1::{
codec::InsertBatch,
column::{SemanticType, Values},
Column,
},
};
use common_base::BitVec;
use common_time::timestamp::Timestamp;
use datatypes::schema::{ColumnSchema, SchemaBuilder, SchemaRef};
use datatypes::{data_type::ConcreteDataType, value::Value, vectors::VectorBuilder};
use snafu::{ensure, OptionExt, ResultExt};
use table::{requests::InsertRequest, Table};
use table::metadata::TableId;
use table::{
requests::{AddColumnRequest, AlterKind, AlterTableRequest, CreateTableRequest, InsertRequest},
Table,
};
use crate::error::{ColumnNotFoundSnafu, DecodeInsertSnafu, IllegalInsertDataSnafu, Result};
use crate::error::{self, ColumnNotFoundSnafu, DecodeInsertSnafu, IllegalInsertDataSnafu, Result};
const TAG_SEMANTIC_TYPE: i32 = SemanticType::Tag as i32;
const TIMESTAMP_SEMANTIC_TYPE: i32 = SemanticType::Timestamp as i32;
#[inline]
fn build_column_schema(column_name: &str, datatype: i32, nullable: bool) -> Result<ColumnSchema> {
let datatype_wrapper =
ColumnDataTypeWrapper::try_new(datatype).context(error::ColumnDataTypeSnafu)?;
Ok(ColumnSchema::new(
column_name,
datatype_wrapper.into(),
nullable,
))
}
pub fn find_new_columns(
schema: &SchemaRef,
insert_batches: &[InsertBatch],
) -> Result<Option<Vec<AddColumnRequest>>> {
let mut requests = Vec::default();
let mut new_columns: HashSet<String> = HashSet::default();
for InsertBatch { columns, row_count } in insert_batches {
if *row_count == 0 || columns.is_empty() {
continue;
}
for Column {
column_name,
semantic_type,
datatype,
..
} in columns
{
if schema.column_schema_by_name(column_name).is_none()
&& !new_columns.contains(column_name)
{
let column_schema = build_column_schema(column_name, *datatype, true)?;
requests.push(AddColumnRequest {
column_schema,
is_key: *semantic_type == TAG_SEMANTIC_TYPE,
});
new_columns.insert(column_name.to_string());
}
}
}
if requests.is_empty() {
Ok(None)
} else {
Ok(Some(requests))
}
}
/// Build a alter table rqeusts that adding new columns.
#[inline]
pub fn build_alter_table_request(
table_name: &str,
columns: Vec<AddColumnRequest>,
) -> AlterTableRequest {
AlterTableRequest {
catalog_name: None,
schema_name: None,
table_name: table_name.to_string(),
alter_kind: AlterKind::AddColumns { columns },
}
}
/// Try to build create table request from insert data.
pub fn build_create_table_request(
catalog_name: &str,
schema_name: &str,
table_id: TableId,
table_name: &str,
insert_batches: &[InsertBatch],
) -> Result<CreateTableRequest> {
let mut new_columns: HashSet<String> = HashSet::default();
let mut column_schemas = Vec::default();
let mut primary_key_indices = Vec::default();
let mut timestamp_index = usize::MAX;
for InsertBatch { columns, row_count } in insert_batches {
if *row_count == 0 || columns.is_empty() {
continue;
}
for Column {
column_name,
semantic_type,
datatype,
..
} in columns
{
if !new_columns.contains(column_name) {
let mut is_nullable = true;
match *semantic_type {
TAG_SEMANTIC_TYPE => primary_key_indices.push(column_schemas.len()),
TIMESTAMP_SEMANTIC_TYPE => {
ensure!(
timestamp_index == usize::MAX,
error::DuplicatedTimestampColumnSnafu {
exists: &columns[timestamp_index].column_name,
duplicated: column_name,
}
);
timestamp_index = column_schemas.len();
// Timestamp column must not be null.
is_nullable = false;
}
_ => {}
}
let column_schema = build_column_schema(column_name, *datatype, is_nullable)?;
column_schemas.push(column_schema);
new_columns.insert(column_name.to_string());
}
}
ensure!(
timestamp_index != usize::MAX,
error::MissingTimestampColumnSnafu
);
let schema = Arc::new(
SchemaBuilder::try_from(column_schemas)
.unwrap()
.timestamp_index(Some(timestamp_index))
.build()
.context(error::CreateSchemaSnafu)?,
);
return Ok(CreateTableRequest {
id: table_id,
catalog_name: catalog_name.to_string(),
schema_name: schema_name.to_string(),
table_name: table_name.to_string(),
desc: None,
schema,
create_if_not_exists: true,
primary_key_indices,
table_options: HashMap::new(),
});
}
error::IllegalInsertDataSnafu.fail()
}
pub fn insertion_expr_to_request(
table_name: &str,
values: insert_expr::Values,
insert_batches: Vec<InsertBatch>,
table: Arc<dyn Table>,
) -> Result<InsertRequest> {
let schema = table.schema();
let mut columns_builders = HashMap::with_capacity(schema.column_schemas().len());
let insert_batches = insert_batches(values.values)?;
for InsertBatch { columns, row_count } in insert_batches {
for Column {
@@ -66,7 +226,8 @@ pub fn insertion_expr_to_request(
})
}
fn insert_batches(bytes_vec: Vec<Vec<u8>>) -> Result<Vec<InsertBatch>> {
#[inline]
pub fn insert_batches(bytes_vec: Vec<Vec<u8>>) -> Result<Vec<InsertBatch>> {
bytes_vec
.iter()
.map(|bytes| bytes.deref().try_into().context(DecodeInsertSnafu))
@@ -199,12 +360,12 @@ mod tests {
use api::v1::{
codec::InsertBatch,
column::{self, Values},
insert_expr, Column,
column::{self, SemanticType, Values},
insert_expr, Column, ColumnDataType,
};
use common_base::BitVec;
use common_query::physical_plan::PhysicalPlanRef;
use common_query::prelude::Expr;
use common_recordbatch::SendableRecordBatchStream;
use common_time::timestamp::Timestamp;
use datatypes::{
data_type::ConcreteDataType,
@@ -212,9 +373,87 @@ mod tests {
value::Value,
};
use table::error::Result as TableResult;
use table::metadata::TableInfoRef;
use table::Table;
use crate::server::grpc::insert::{convert_values, insertion_expr_to_request, is_null};
use super::{
build_column_schema, build_create_table_request, convert_values, find_new_columns,
insert_batches, insertion_expr_to_request, is_null, TAG_SEMANTIC_TYPE,
TIMESTAMP_SEMANTIC_TYPE,
};
#[test]
fn test_build_create_table_request() {
let table_id = 10;
let table_name = "test_metric";
assert!(build_create_table_request("", "", table_id, table_name, &[]).is_err());
let insert_batches = insert_batches(mock_insert_batches()).unwrap();
let req =
build_create_table_request("", "", table_id, table_name, &insert_batches).unwrap();
assert_eq!(table_id, req.id);
assert_eq!(table_name, req.table_name);
assert!(req.desc.is_none());
assert_eq!(vec![0], req.primary_key_indices);
let schema = req.schema;
assert_eq!(Some(3), schema.timestamp_index());
assert_eq!(4, schema.num_columns());
assert_eq!(
ConcreteDataType::string_datatype(),
schema.column_schema_by_name("host").unwrap().data_type
);
assert_eq!(
ConcreteDataType::float64_datatype(),
schema.column_schema_by_name("cpu").unwrap().data_type
);
assert_eq!(
ConcreteDataType::float64_datatype(),
schema.column_schema_by_name("memory").unwrap().data_type
);
assert_eq!(
ConcreteDataType::timestamp_millis_datatype(),
schema.column_schema_by_name("ts").unwrap().data_type
);
}
#[test]
fn test_find_new_columns() {
let mut columns = Vec::with_capacity(1);
let cpu_column = build_column_schema("cpu", 10, true).unwrap();
let ts_column = build_column_schema("ts", 15, false).unwrap();
columns.push(cpu_column);
columns.push(ts_column);
let schema = Arc::new(
SchemaBuilder::try_from(columns)
.unwrap()
.timestamp_index(Some(1))
.build()
.unwrap(),
);
assert!(find_new_columns(&schema, &[]).unwrap().is_none());
let insert_batches = insert_batches(mock_insert_batches()).unwrap();
let new_columns = find_new_columns(&schema, &insert_batches).unwrap().unwrap();
assert_eq!(2, new_columns.len());
let host_column = &new_columns[0];
assert!(host_column.is_key);
assert_eq!(
ConcreteDataType::string_datatype(),
host_column.column_schema.data_type
);
let memory_column = &new_columns[1];
assert!(!memory_column.is_key);
assert_eq!(
ConcreteDataType::float64_datatype(),
memory_column.column_schema.data_type
)
}
#[test]
fn test_insertion_expr_to_request() {
@@ -223,7 +462,8 @@ mod tests {
let values = insert_expr::Values {
values: mock_insert_batches(),
};
let insert_req = insertion_expr_to_request("demo", values, table).unwrap();
let insert_batches = insert_batches(values.values).unwrap();
let insert_req = insertion_expr_to_request("demo", insert_batches, table).unwrap();
assert_eq!("demo", insert_req.table_name);
@@ -297,26 +537,27 @@ mod tests {
Arc::new(
SchemaBuilder::try_from(column_schemas)
.unwrap()
.timestamp_index(3)
.timestamp_index(Some(3))
.build()
.unwrap(),
)
}
fn table_info(&self) -> TableInfoRef {
unimplemented!()
}
async fn scan(
&self,
_projection: &Option<Vec<usize>>,
_filters: &[Expr],
_limit: Option<usize>,
) -> TableResult<SendableRecordBatchStream> {
) -> TableResult<PhysicalPlanRef> {
unimplemented!();
}
}
fn mock_insert_batches() -> Vec<Vec<u8>> {
const SEMANTIC_TAG: i32 = 0;
const SEMANTIC_FIELD: i32 = 1;
const SEMANTIC_TS: i32 = 2;
let row_count = 2;
let host_vals = column::Values {
@@ -325,10 +566,10 @@ mod tests {
};
let host_column = Column {
column_name: "host".to_string(),
semantic_type: SEMANTIC_TAG,
semantic_type: TAG_SEMANTIC_TYPE,
values: Some(host_vals),
null_mask: vec![0],
..Default::default()
datatype: ColumnDataType::String as i32,
};
let cpu_vals = column::Values {
@@ -337,10 +578,10 @@ mod tests {
};
let cpu_column = Column {
column_name: "cpu".to_string(),
semantic_type: SEMANTIC_FIELD,
semantic_type: SemanticType::Field as i32,
values: Some(cpu_vals),
null_mask: vec![2],
..Default::default()
datatype: ColumnDataType::Float64 as i32,
};
let mem_vals = column::Values {
@@ -349,10 +590,10 @@ mod tests {
};
let mem_column = Column {
column_name: "memory".to_string(),
semantic_type: SEMANTIC_FIELD,
semantic_type: SemanticType::Field as i32,
values: Some(mem_vals),
null_mask: vec![1],
..Default::default()
datatype: ColumnDataType::Float64 as i32,
};
let ts_vals = column::Values {
@@ -361,10 +602,10 @@ mod tests {
};
let ts_column = Column {
column_name: "ts".to_string(),
semantic_type: SEMANTIC_TS,
semantic_type: TIMESTAMP_SEMANTIC_TYPE,
values: Some(ts_vals),
null_mask: vec![0],
datatype: Some(15),
datatype: ColumnDataType::Timestamp as i32,
};
let insert_batch = InsertBatch {

Some files were not shown because too many files have changed in this diff Show More