Compare commits

..

78 Commits

Author SHA1 Message Date
Ning Sun
249ebc6937 feat: update pgwire and refactor pg auth handler (#732) 2022-12-09 17:01:55 +08:00
elijah
c1b8981f61 refactor(mito): change the table path to schema/table_id (#728)
refactor: change the table path to `schema/table_id`
2022-12-09 12:59:16 +08:00
Jiachun Feng
949cd3e3af feat: move_value & delete_route (#707)
* feat: move_value & delete_route

* chore: minor refactor

* chore: refactor unit test of metaclient

* chore: map to kv

* Update src/meta-srv/src/service/router.rs

Co-authored-by: Yingwen <realevenyag@gmail.com>

* Update src/meta-srv/src/service/router.rs

Co-authored-by: Yingwen <realevenyag@gmail.com>

* chore: by code review

Co-authored-by: Yingwen <realevenyag@gmail.com>
2022-12-09 11:07:48 +08:00
SSebo
b26982c5d7 feat: support timestamp new syntax (#697)
* feat: support timestamp new syntax

* fix: not null at end of new time stamp index syntax

* chore: simplify code
2022-12-09 10:52:14 +08:00
fys
4fdf26810c feat: support auth in frontend (#688)
* feat: add UserProvider trait

* chore: minor fix

* support pg mysql

* refactor and add some logs

* chore: add license

Co-authored-by: shuiyisong <xixing.sys@gmail.com>
2022-12-08 11:51:52 +08:00
dennis zhuang
7f59758e69 feat: bump opendal version to 0.22 (#721)
* feat: bump opendal version to 0.22

* fix: LoggingLayer
2022-12-08 11:19:21 +08:00
Zheming Li
a521ab5041 fix: set default value when fail to get git info instead of panic (#696)
fix: set default value when fail to git info instead of panic
2022-12-07 13:16:27 +08:00
LFC
833216d317 refactor: directly invoke Datanode methods in standalone mode (part 1) (#694)
* refactor: directly invoke Datanode methods in standalone mode

* test: add more unit tests

* fix: get rid of `println` in testing codes

* fix: resolve PR comments

* fix: resolve PR comments

Co-authored-by: luofucong <luofucong@greptime.com>
2022-12-07 11:37:59 +08:00
Ruihang Xia
90c832b33d refactor: drop support of physical plan query interface (#714)
* refactor: drop support of physical plan query interface

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* refactor: collapse server/grpc sub-module

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* refactor: remove unused errors

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-12-06 19:23:32 +08:00
LFC
8959dbcef8 feat: Substrait logical plan (#704)
* feat: use Substrait logical plan to query data from Datanode in Frontend in distributed mode

* fix: resolve PR comments

* fix: resolve PR comments

* fix: resolve PR comments

Co-authored-by: luofucong <luofucong@greptime.com>
2022-12-06 19:21:57 +08:00
discord9
2034b40f33 chore: update RustPython dependence(With a tweaked fork) (#655)
* refactor: update RsPy

* depend: add `rustpython-pylib`

* feat: add_frozen stdlib for every vm init

* feat: limit stdlib to a selected few

* chore: use `rev` instead of branch` im depend

* refactor: rename to allow_list

* feat: use opt level one

* doc: add username for TODO&change optimize to 0

* style: fmt .toml
2022-12-06 14:15:00 +08:00
SSebo
55e6be7af1 fix: test_server_require_secure_client_secure (#701) 2022-12-06 10:38:54 +08:00
discord9
f9bfb121db feat: add rate() udf (#508)
* feat: rewrite `rate` UDF

* feat: rename to `prom_rate`

* refactor: solve conflict&add license

* refactor: import arrow
2022-12-06 10:30:13 +08:00
Ruihang Xia
6fb413ae50 ci: add toml format linter (#706)
* chore: run taplo format

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* ci: add workflow to check toml

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* rerun formatter with ident to 4 spaces

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update check command

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-12-05 20:03:10 +08:00
Ruihang Xia
beb07fc895 feat: new datatypes subcrate based on the official arrow (#705)
* feat: Init datatypes2 crate

* chore: Remove some unimplemented types

* feat: Implements PrimitiveType and PrimitiveVector for datatypes2 (#633)

* feat: Implement primitive types and vectors

* feat: Implement a wrapper type

* feat: Remove VectorType from ScalarRef

* feat: Move some trait bound from NativeType to WrapperType

* feat: pub use  primitive vectors and builders

* feat: Returns error in try_from when type mismatch

* feat: Impl PartialEq for some vectors

* test: Pass vector tests

* chore: Add license header

* test: Pass more vector tests

* feat: Implement some methods of vector Helper

* test: Pass more tests

* style: Fix clippy

* chore: Add license header

* feat: Remove IntoValueRef trait

* feat: Add NativeType trait bound to WrapperType::Native

* docs: Explain what is wrapper type

* chore: Fix typos

* refactor: LogicalPrimitiveType::type_name returns str

* feat: Implements DateType and DateVector (#651)

* feat: Implement DateType and DateVector

* test: Pass more value and data type tests

* chore: Address CR comments

* test: Skip list value test

* feat: datatypes2 datetime (#661)

* feat: impl DateTime type and vector

* fix: add license header

* fix: CR comments and add more tests

* fix: customized serialization for wrapper type

* feat: Implements NullType and NullVector (#658)

* feat: Implements NullType and NullVector

* chore: Address CR comment

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

* chore: Address CR comment

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

* feat: Implements StringType and StringVector (#659)

* feat: implement string vector

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add more test and from

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* cover NUL

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* feat: impl datatypes2/timestamp (#686)

* feat: add timestamp datatype and vectors

* fix: cr comments and reformat code

* chore: add some tests

* feat: Implements ListType and ListVector (#681)

* feat: Implement ListType and ListVector

* test: Pass more tests

* style: Fix clippy

* chore: Fix comment

* chore: Address CR comments

* feat: impl constant vector (#680)

* feat: impl constant vector

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix tests

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* Apply suggestions from code review

Co-authored-by: Yingwen <realevenyag@gmail.com>

* rename fn names

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove println

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>

* feat: Implements Validity (#684)

* feat: Implements Validity

* chore: remove pub from sub mod in vectors

* feat: Implements schema for datatypes2 (#695)

* feat: Add is_timestamp_compatible to DataType

* feat: Implement ColumnSchema and Schema

* feat: Impl RawSchema

* chore: Remove useless codes and run more tests

* chore: Fix clippy

* feat: Impl from_arrow_time_unit and pass schema tests

* chore: add more tests for timestamp (#702)

* chore: add more tests for timestamp

* chore: add replicate test for timestamps

* feat: Implements helper methods for vectors/values (#703)

* feat: Implement helper methods for vectors/values

* chore: Address CR comments

* chore: add more test for timestamp

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: evenyag <realevenyag@gmail.com>
Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>
Co-authored-by: Lei, HUANG <mrsatangel@gmail.com>
2022-12-05 19:59:23 +08:00
Ning Sun
4275e47bdb refactor: use updated mysql_async client (#698) 2022-12-05 11:18:32 +08:00
dennis zhuang
6720bc5f7c fix: validate create table request in mito engine (#690)
* fix: validate create table request in mito engine

* fix: comment

* chore: remove TIMESTAMP_INDEX in system.rs
2022-12-05 11:01:43 +08:00
Lei, HUANG
4052563248 fix: pr template task default state (#687) 2022-12-02 20:39:53 +08:00
dennis zhuang
952e1bd626 test: update dummy result (#693) 2022-12-02 19:22:37 +08:00
shuiyisong
8232015998 fix: cargo sort in pre-commit (#689) 2022-12-02 16:19:31 +08:00
Ruihang Xia
d82a3a7d58 feat: implement most of scalar function and selection conversion in substrait (#678)
* impl to_df_scalar_function

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* part of scalar functions

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* conjunction over filters

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* change the ser/de target to substrait::Plan

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* basic test coverage

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix typos and license header

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix CR comments

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* logs unsupported extension

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* Update src/common/substrait/src/df_expr.rs

Co-authored-by: Yingwen <realevenyag@gmail.com>

* address review comments

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* change format

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* replace context with with_context

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
2022-12-02 14:46:05 +08:00
Ning Sun
0599465685 feat: inject current database/schema into query context for postgres protocol (#685)
* feat: inject current database/schema into query context

* test: avoid duplicate server setup
2022-12-02 11:49:39 +08:00
Mofeng
13d51250ba feat: add http /health api (#676)
* feat: add http `/health` api

* feat: add `/health` api test suit in http intergration test
2022-12-01 19:11:58 +08:00
LFC
6127706b5b feat: support "use" stmt part 1 (#672)
* feat: a bare sketch of session; support "use" in MySQL server; modify insertion and selection related codes in Datanode
2022-12-01 17:05:32 +08:00
dennis zhuang
2e17e9c4b5 feat: supports s3 storage (#656)
* feat: adds s3 object storage configuration

* feat: adds s3 integration test

* chore: use map

* fix: forgot license header

* fix: checking if bucket is empty in test_on

* chore: address CR issues

* refactor: run s3 test with dotenv

* chore: randomize grpc port for test

* fix: README in tests-integration

* chore: remove redundant comments
2022-12-01 10:59:14 +08:00
xiaomin tang
b0cbfa7ffb docs: add a roadmap link in README (#673)
* docs: add roadmap to README

* docs: missing period
2022-11-30 21:25:27 +08:00
Ruihang Xia
20172338e8 ci: Revert "ci: change CI unit test trigger" (#674)
Revert "ci: change CI unit test trigger (#671)"

This reverts commit 9c53f9b24c.
2022-11-30 21:23:40 +08:00
Ruihang Xia
9c53f9b24c ci: change CI unit test trigger (#671)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-11-30 20:19:35 +08:00
Dongxu Wang
6d24f7ebb6 refactor: bump axum 0.6, use recommended way to nest routes (#668) 2022-11-30 20:04:33 +08:00
SSebo
68c2de8e45 feat: mysql and pg server support tls (#641)
* feat: mysql and pg server support tls

* chore: replace opensrv-mysql to original

* chore: TlsOption is required but supply default value

* feat: mysql server support force tls

* chore: move TlsOption to servers

* test: mysql server disable / prefer / required tls mode

* test: pg server disable / prefer / required tls mode

* chore: add doc and remove no used code

* chore: add TODO and restore cargo linker config
2022-11-30 12:46:15 +08:00
Yingwen
a17dcbc511 chore: fix SequenceNotMonotonic error message (#664)
* chore: fix SequenceNotMonotonic error message

previous sequence should greater than or equal to given sequence

* Apply suggestions from code review

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
2022-11-30 11:58:43 +08:00
Ning Sun
53ab19ea5a ci: remove assignees which is causing error (#663) 2022-11-30 11:36:35 +08:00
Ning Sun
84c44cf540 ci: fix doc label task on forked repo (#654) 2022-11-30 11:23:15 +08:00
LFC
020b9936cd fix: correctly detach spawned mysql listener task (#657)
Co-authored-by: luofucong <luofucong@greptime.com>
2022-11-29 18:39:18 +08:00
Ning Sun
75dcf2467b refactor: add tests-integration module (#590)
* refactor: add integration-tests module

* Apply suggestions from code review

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

* test: move grpc module to tests-integration

* test: adapt new standalone mode

* test: improve http assertion

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
2022-11-29 16:28:58 +08:00
Ruihang Xia
eea5393f96 feat: UI improvement for integration test runner (#645)
* improve dir resolving and start up ordering

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix orphan process

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* Update tests/runner/src/util.rs, fix typo

Co-authored-by: Dongxu Wang <dongxu@apache.org>

* simplify logic via tokio timeout

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Dongxu Wang <dongxu@apache.org>
2022-11-29 15:32:39 +08:00
Ning Sun
3d312d389d ci: add doc label support for pr too (#650) 2022-11-29 15:21:12 +08:00
dennis zhuang
fdc73fb52f perf: cache python interpreter in TLS (#649)
* perf: cache python interpreter when executing coprocessors

* test: speedup test_execute_script by reusing interpreter

* fix: remove comment

* chore: use get_or_insert_with instead
2022-11-29 14:41:37 +08:00
Ning Sun
2a36e26d19 ci: add action to create doc issue when change labelled (#648)
ci: add action to create doc issue when change labeled
2022-11-29 14:25:57 +08:00
Zheming Li
baef640fe3 feat: add --version command line option (#632)
* add version command line option

* use concat!

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
2022-11-28 17:07:17 +08:00
dennis zhuang
5fddb799f7 feat: enable atomic write for file object storage (#643)
* fix: remove opendal from catalog dependencies

* feat: enable atomic writing for fs service
2022-11-28 16:01:32 +08:00
Dongxu Wang
f372229b18 fix: append table id to table data dir (#640) 2022-11-28 10:53:13 +08:00
Xuanwo
4085fc7899 chore: Bump OpenDAL to v0.21.1 (#639)
* deps: Bump OpenDAL to v0.21.1

Signed-off-by: Xuanwo <github@xuanwo.io>

* Avoid using raw types when not needed

Signed-off-by: Xuanwo <github@xuanwo.io>

Signed-off-by: Xuanwo <github@xuanwo.io>
2022-11-27 10:18:39 +08:00
Ruihang Xia
30940e692a feat: impl DROP TABLE on memory catalog based standalone mode (#630)
* feat: implement drop table for standalone mode

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update integration test

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* enhancement test

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-11-25 11:53:46 +08:00
Mike Yang
b371ce0f48 test: added tests for statements methods (#622)
* test: added tests for parse_column_default_constraint

* test: added test for sql_column_def_to_grpc_column_def

* refactor: remove hardcode in test
2022-11-25 11:35:06 +08:00
Lei, HUANG
ac7f52d303 fix: start datanode instance before frontend services (#634) 2022-11-25 11:25:57 +08:00
Dongxu Wang
051768b735 ci: add spell check with typos (#627) 2022-11-24 14:46:50 +08:00
fys
c5b0d2431f feat: remove InsertBatch in gRPC message (#570) 2022-11-24 14:04:48 +08:00
Lei, HUANG
4038dd4067 fix: add concurrency control for catalog manager (#619) 2022-11-24 11:10:33 +08:00
Dongxu Wang
8be0f05570 chore: able to config axum timeout in toml (#624) 2022-11-24 11:09:21 +08:00
zyy17
69f06eec8b ci: change scheduled release from nigthly to weekly (#623)
Signed-off-by: zyy17 <zyylsxm@gmail.com>
2022-11-24 11:05:35 +08:00
Ruihang Xia
7b37e99a45 feat: deregister table for MemoryCatalogManager (#620)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-11-24 09:36:27 +08:00
dennis zhuang
c09775d17f feat: adds metrics, tracing and retry layer to object-store (#621) 2022-11-23 11:40:03 +08:00
Francis Du
4a9cf49637 feat: support explain syntax (#546) 2022-11-22 21:22:32 +08:00
Ruihang Xia
9f865b50ab test: add dummy select case (#618)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-11-22 16:47:45 +08:00
Ruihang Xia
b407ebf6bb feat: integration test suite (#487)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-11-22 15:34:13 +08:00
Lei, HUANG
c144a1b20e feat: impl alter table in distributed mode (#572) 2022-11-22 15:17:25 +08:00
Yingwen
0791c65149 refactor: replace some usage of MutableBitmap by BitVec (#610) 2022-11-21 17:36:53 +08:00
LFC
62fcb54258 fix: correctly open table when distributed datanode restart (#576)
Co-authored-by: luofucong <luofucong@greptime.com>
2022-11-21 15:15:14 +08:00
Lei, HUANG
2b6b979d5a fix: remove datanode mysql options in standalone mode (#595) 2022-11-21 14:15:47 +08:00
Dongxu Wang
b6fa316c65 chore: correct typos (#589) (#592) 2022-11-21 14:07:45 +08:00
Lei, HUANG
ca5734edb3 feat: disable mysql server on datande when running standalone mode (#593) 2022-11-21 12:12:26 +08:00
Mike Yang
5428ad364e fix: make nullable as default when alter table (#591) 2022-11-21 12:11:19 +08:00
zyy17
663c725838 fix: fix nightly build error and fix typo (#588)
Signed-off-by: zyy17 <zyylsxm@gmail.com>
2022-11-21 11:49:36 +08:00
zyy17
c94b544e4a ci: modify image registry in release.yml (#582)
Signed-off-by: zyy17 <zyylsxm@gmail.com>
2022-11-19 09:19:54 +08:00
Ruihang Xia
f465040acc feat: lazy evaluated record batch stream (#573)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-11-18 21:42:10 +08:00
Yingwen
22ae983280 refactor: Use re-exported arrow mod from datatypes crate (#571) 2022-11-18 18:38:07 +08:00
Igor Morozov
e1f326295f feat: implement DESCRIBE TABLE (#558)
Also need to support describe table in other catalog/schema
2022-11-18 16:34:00 +08:00
aievl
6d762aa9dc feat: update mysql default listen port to 4406 (#568)
Co-authored-by: zhaozhenhang <zhaozhenhang@kuaishou.com>
2022-11-18 14:55:11 +08:00
Ruihang Xia
d4b09f69ab docs: specify protoc version requirement (#564)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
2022-11-18 14:36:25 +08:00
Xuanwo
1f0b39cc8d chore: Bump OpenDAL to v0.20 (#569)
Signed-off-by: Xuanwo <github@xuanwo.io>
2022-11-18 14:17:38 +08:00
zyy17
dee5ccec9e ci: add nightly build job (#565) 2022-11-18 11:48:29 +08:00
dennis zhuang
f8788273d5 feat: drop column for alter table (#562)
* feat: drop column for alter table

* refactor: rename RemoveColumns to DropColumns

* test: alter table

* chore: error msg

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

* fix: test_parse_alter_drop_column

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
2022-11-17 23:00:16 +08:00
jay
df465308cc current blog url response as 404, should be https://greptime.com/blogs/index (#561) 2022-11-17 21:24:04 +08:00
LFC
e7b4d2b9cd feat: Implement table_info() for DistTable (#536) (#557)
* feat: Implement `table_info()`` for `DistTable` (#536)

* Update src/catalog/src/error.rs

Co-authored-by: Yingwen <1405012107@qq.com>

Co-authored-by: luofucong <luofucong@greptime.com>
Co-authored-by: Yingwen <1405012107@qq.com>
2022-11-17 18:40:58 +08:00
discord9
bf408e3b96 Update README.md (#552)
Add RustPython's Acknowledgement
2022-11-17 14:15:43 +08:00
dennis zhuang
73e6e2e01b fix: split code and output in README (#549) 2022-11-17 12:54:02 +08:00
Lei, Huang
8faa6b0f09 refactor: start options (#545)
* refactor: config options for frontend/datanode/standalone

* chore: rename MetaClientOpts::metasrv_addr to MetaClientOpts::metasrv_addrs

* fix: clippy

* fix: change default meta-srv addr to 127.0.0.1:3002
2022-11-17 11:47:39 +08:00
332 changed files with 21890 additions and 4749 deletions

4
.env.example Normal file
View File

@@ -0,0 +1,4 @@
# Settings for s3 test
GT_S3_BUCKET=S3 bucket
GT_S3_ACCESS_KEY_ID=S3 access key id
GT_S3_ACCESS_KEY=S3 secret access key

View File

@@ -13,7 +13,7 @@ Please explain IN DETAIL what the changes are in this PR and why they are needed
## Checklist
- [] I have written the necessary rustdoc comments.
- [] I have added the necessary unit tests and integration tests.
- [ ] I have written the necessary rustdoc comments.
- [ ] I have added the necessary unit tests and integration tests.
## Refer to a related PR or issue link (optional)

View File

@@ -26,6 +26,13 @@ env:
RUST_TOOLCHAIN: nightly-2022-07-14
jobs:
typos:
name: Spell Check with Typos
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: crate-ci/typos@v1.0.4
check:
name: Check
if: github.event.pull_request.draft == false
@@ -42,6 +49,23 @@ jobs:
- name: Run cargo check
run: cargo check --workspace --all-targets
toml:
name: Toml Check
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ env.RUST_TOOLCHAIN }}
- name: Rust Cache
uses: Swatinem/rust-cache@v2
- name: Install taplo
run: cargo install taplo-cli --version ^0.8 --locked
- name: Run taplo
run: taplo format --check --option "indent_string= "
# Use coverage to run test.
# test:
# name: Test Suite

25
.github/workflows/doc-issue.yml vendored Normal file
View File

@@ -0,0 +1,25 @@
name: Create Issue in docs repo on doc related changes
on:
issues:
types:
- labeled
pull_request_target:
types:
- labeled
jobs:
doc_issue:
if: github.event.label.name == 'doc update required'
runs-on: ubuntu-latest
steps:
- name: create an issue in doc repo
uses: dacbd/create-issue-action@main
with:
owner: GreptimeTeam
repo: docs
token: ${{ secrets.DOCS_REPO_TOKEN }}
title: Update docs for ${{ github.event.issue.title || github.event.pull_request.title }}
body: |
A document change request is generated from
${{ github.event.issue.html_url || github.event.pull_request.html_url }}

View File

@@ -2,6 +2,9 @@ on:
push:
tags:
- "v*.*.*"
schedule:
# At 00:00 on Monday.
- cron: '0 0 * * 1'
workflow_dispatch:
name: Release
@@ -9,6 +12,12 @@ name: Release
env:
RUST_TOOLCHAIN: nightly-2022-07-14
# FIXME(zyy17): Would be better to use `gh release list -L 1 | cut -f 3` to get the latest release version tag, but for a long time, we will stay at 'v0.1.0-alpha-*'.
SCHEDULED_BUILD_VERSION_PREFIX: v0.1.0-alpha
# In the future, we can change SCHEDULED_PERIOD to nightly.
SCHEDULED_PERIOD: weekly
jobs:
build:
name: Build binary
@@ -106,8 +115,32 @@ jobs:
- name: Download artifacts
uses: actions/download-artifact@v3
- name: Configure scheduled build version # the version would be ${SCHEDULED_BUILD_VERSION_PREFIX}-YYYYMMDD-${SCHEDULED_PERIOD}, like v0.1.0-alpha-20221119-weekly.
shell: bash
if: github.event_name == 'schedule'
run: |
buildTime=`date "+%Y%m%d"`
SCHEDULED_BUILD_VERSION=${{ env.SCHEDULED_BUILD_VERSION_PREFIX }}-$buildTime-${{ env.SCHEDULED_PERIOD }}
echo "SCHEDULED_BUILD_VERSION=${SCHEDULED_BUILD_VERSION}" >> $GITHUB_ENV
- name: Create scheduled build git tag
if: github.event_name == 'schedule'
run: |
git tag ${{ env.SCHEDULED_BUILD_VERSION }}
- name: Publish scheduled release # configure the different release title and tags.
uses: softprops/action-gh-release@v1
if: github.event_name == 'schedule'
with:
name: "Release ${{ env.SCHEDULED_BUILD_VERSION }}"
tag_name: ${{ env.SCHEDULED_BUILD_VERSION }}
generate_release_notes: true
files: |
**/greptime-*
- name: Publish release
uses: softprops/action-gh-release@v1
if: github.event_name != 'schedule'
with:
name: "Release ${{ github.ref_name }}"
files: |
@@ -145,12 +178,12 @@ jobs:
tar xvf greptime-linux-arm64.tgz
rm greptime-linux-arm64.tgz
- name: Login to GitHub Container Registry
- name: Login to UCloud Container Registry
uses: docker/login-action@v2
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
registry: uhub.service.ucloud.cn
username: ${{ secrets.UCLOUD_USERNAME }}
password: ${{ secrets.UCLOUD_PASSWORD }}
- name: Login to Dockerhub
uses: docker/login-action@v2
@@ -158,11 +191,20 @@ jobs:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Configure scheduled build image tag # the tag would be ${SCHEDULED_BUILD_VERSION_PREFIX}-YYYYMMDD-${SCHEDULED_PERIOD}
shell: bash
if: github.event_name == 'schedule'
run: |
buildTime=`date "+%Y%m%d"`
SCHEDULED_BUILD_VERSION=${{ env.SCHEDULED_BUILD_VERSION_PREFIX }}-$buildTime-${{ env.SCHEDULED_PERIOD }}
echo "IMAGE_TAG=${SCHEDULED_BUILD_VERSION:1}" >> $GITHUB_ENV
- name: Configure tag # If the release tag is v0.1.0, then the image version tag will be 0.1.0.
shell: bash
if: github.event_name != 'schedule'
run: |
VERSION=${{ github.ref_name }}
echo "VERSION=${VERSION:1}" >> $GITHUB_ENV
echo "IMAGE_TAG=${VERSION:1}" >> $GITHUB_ENV
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
@@ -179,5 +221,6 @@ jobs:
platforms: linux/amd64,linux/arm64
tags: |
greptime/greptimedb:latest
greptime/greptimedb:${{ env.VERSION }}
ghcr.io/greptimeteam/greptimedb:${{ env.VERSION }}
greptime/greptimedb:${{ env.IMAGE_TAG }}
uhub.service.ucloud.cn/greptime/greptimedb:latest
uhub.service.ucloud.cn/greptime/greptimedb:${{ env.IMAGE_TAG }}

4
.gitignore vendored
View File

@@ -18,6 +18,7 @@ debug/
# JetBrains IDE config directory
.idea/
*.iml
# VSCode IDE config directory
.vscode/
@@ -31,3 +32,6 @@ logs/
# Benchmark dataset
benchmarks/data
# dotenv
.env

View File

@@ -9,7 +9,7 @@ repos:
rev: e6a795bc6b2c0958f9ef52af4863bbd7cc17238f
hooks:
- id: cargo-sort
args: ["--workspace", "--print"]
args: ["--workspace"]
- repo: https://github.com/doublify/pre-commit-rust
rev: v1.0

1315
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -11,28 +11,32 @@ members = [
"src/common/function",
"src/common/function-macro",
"src/common/grpc",
"src/common/grpc-expr",
"src/common/query",
"src/common/recordbatch",
"src/common/runtime",
"src/common/substrait",
"src/common/insert",
"src/common/telemetry",
"src/common/time",
"src/datanode",
"src/datatypes",
"src/datatypes2",
"src/frontend",
"src/log-store",
"src/meta-client",
"src/meta-srv",
"src/mito",
"src/object-store",
"src/query",
"src/script",
"src/servers",
"src/session",
"src/sql",
"src/storage",
"src/store-api",
"src/table",
"src/mito",
"tests-integration",
"tests/runner",
]
[profile.release]

View File

@@ -53,8 +53,9 @@ To compile GreptimeDB from source, you'll need:
install correct Rust version for you.
- Protobuf: `protoc` is required for compiling `.proto` files. `protobuf` is
available from major package manager on macos and linux distributions. You can
find an installation instructions
[here](https://grpc.io/docs/protoc-installation/).
find an installation instructions [here](https://grpc.io/docs/protoc-installation/).
**Note that `protoc` version needs to be >= 3.15** because we have used the `optional`
keyword. You can check it with `protoc --version`.
#### Build with Docker
@@ -114,7 +115,10 @@ about Kubernetes deployment, check our [docs](https://docs.greptime.com/).
4. Query the data:
```SQL
mysql> SELECT * FROM monitor;
SELECT * FROM monitor;
```
```TEXT
+-------+---------------------+------+--------+
| host | ts | cpu | memory |
+-------+---------------------+------+--------+
@@ -156,6 +160,8 @@ break things. Benchmark on development branch may not represent its potential
performance. We release pre-built binaries constantly for functional
evaluation. Do not use it in production at the moment.
For future plans, check out [GreptimeDB roadmap](https://github.com/GreptimeTeam/greptimedb/issues/669).
## Community
Our core team is thrilled too see you participate in any ways you like. When you are stuck, try to
@@ -169,7 +175,7 @@ community, please check out:
In addition, you may:
- View our official [Blog](https://greptime.com/blog)
- View our official [Blog](https://greptime.com/blogs/index)
- Connect us with [Linkedin](https://www.linkedin.com/company/greptime/)
- Follow us on [Twitter](https://twitter.com/greptime)
@@ -189,3 +195,4 @@ Please refer to [contribution guidelines](CONTRIBUTING.md) for more information.
- GreptimeDB's query engine is powered by [Apache Arrow DataFusion](https://github.com/apache/arrow-datafusion).
- [OpenDAL](https://github.com/datafuselabs/opendal) from [Datafuse Labs](https://github.com/datafuselabs) gives GreptimeDB a very general and elegant data access abstraction layer.
- GreptimeDBs meta service is based on [etcd](https://etcd.io/).
- GreptimeDB uses [RustPython](https://github.com/RustPython/RustPython) for experimental embedded python scripting.

View File

@@ -28,9 +28,8 @@ use arrow::datatypes::{DataType, Float64Type, Int64Type};
use arrow::record_batch::RecordBatch;
use clap::Parser;
use client::admin::Admin;
use client::api::v1::codec::InsertBatch;
use client::api::v1::column::Values;
use client::api::v1::{insert_expr, Column, ColumnDataType, ColumnDef, CreateExpr, InsertExpr};
use client::api::v1::{Column, ColumnDataType, ColumnDef, CreateExpr, InsertExpr};
use client::{Client, Database, Select};
use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
use parquet::arrow::{ArrowReader, ParquetFileArrowReader};
@@ -100,16 +99,13 @@ async fn write_data(
for record_batch in record_batch_reader {
let record_batch = record_batch.unwrap();
let row_count = record_batch.num_rows();
let insert_batch = convert_record_batch(record_batch).into();
let (columns, row_count) = convert_record_batch(record_batch);
let insert_expr = InsertExpr {
schema_name: "public".to_string(),
table_name: TABLE_NAME.to_string(),
expr: Some(insert_expr::Expr::Values(insert_expr::Values {
values: vec![insert_batch],
})),
options: HashMap::default(),
region_number: 0,
columns,
row_count,
};
let now = Instant::now();
db.insert(insert_expr).await.unwrap();
@@ -125,7 +121,7 @@ async fn write_data(
total_rpc_elapsed_ms
}
fn convert_record_batch(record_batch: RecordBatch) -> InsertBatch {
fn convert_record_batch(record_batch: RecordBatch) -> (Vec<Column>, u32) {
let schema = record_batch.schema();
let fields = schema.fields();
let row_count = record_batch.num_rows();
@@ -143,10 +139,7 @@ fn convert_record_batch(record_batch: RecordBatch) -> InsertBatch {
columns.push(column);
}
InsertBatch {
columns,
row_count: row_count as _,
}
(columns, row_count as _)
}
fn build_values(column: &ArrayRef) -> Values {

View File

@@ -7,3 +7,4 @@ coverage:
patch: off
ignore:
- "**/error*.rs" # ignore all error.rs files
- "tests/runner/*.rs" # ignore integration test runner

View File

@@ -3,15 +3,16 @@ mode = 'distributed'
rpc_addr = '127.0.0.1:3001'
wal_dir = '/tmp/greptimedb/wal'
rpc_runtime_size = 8
mysql_addr = '127.0.0.1:3306'
mysql_addr = '127.0.0.1:4406'
mysql_runtime_size = 4
enable_memory_catalog = false
[storage]
type = 'File'
data_dir = '/tmp/greptimedb/data/'
[meta_client_opts]
metasrv_addr = '1.1.1.1:3002'
metasrv_addrs = ['127.0.0.1:3002']
timeout_millis = 3000
connect_timeout_millis = 5000
tcp_nodelay = false

View File

@@ -1,9 +1,12 @@
mode = 'distributed'
datanode_rpc_addr = '127.0.0.1:3001'
http_addr = '127.0.0.1:4000'
[http_options]
addr = '127.0.0.1:4000'
timeout = "30s"
[meta_client_opts]
metasrv_addr = '1.1.1.1:3002'
metasrv_addrs = ['127.0.0.1:3002']
timeout_millis = 3000
connect_timeout_millis = 5000
tcp_nodelay = false

View File

@@ -1,9 +1,11 @@
node_id = 0
mode = 'standalone'
http_addr = '127.0.0.1:4000'
datanode_mysql_addr = '127.0.0.1:3306'
datanode_mysql_runtime_size = 4
wal_dir = '/tmp/greptimedb/wal/'
enable_memory_catalog = false
[http_options]
addr = '127.0.0.1:4000'
timeout = "30s"
[storage]
type = 'File'

View File

@@ -55,7 +55,7 @@ The DataFusion basically execute aggregate like this:
2. Call `update_batch` on each accumulator with partitioned data, to let you update your aggregate calculation.
3. Call `state` to get each accumulator's internal state, the medial calculation result.
4. Call `merge_batch` to merge all accumulator's internal state to one.
5. Execute `evalute` on the chosen one to get the final calculation result.
5. Execute `evaluate` on the chosen one to get the final calculation result.
Once you know the meaning of each method, you can easily write your accumulator. You can refer to `Median` accumulator or `SUM` accumulator defined in file `my_sum_udaf_example.rs` for more details.
@@ -63,7 +63,7 @@ Once you know the meaning of each method, you can easily write your accumulator.
You can call `register_aggregate_function` method in query engine to register your aggregate function. To do that, you have to new an instance of struct `AggregateFunctionMeta`. The struct has three fields, first is the name of your aggregate function's name. The function name is case-sensitive due to DataFusion's restriction. We strongly recommend using lowercase for your name. If you have to use uppercase name, wrap your aggregate function with quotation marks. For example, if you define an aggregate function named "my_aggr", you can use "`SELECT MY_AGGR(x)`"; if you define "my_AGGR", you have to use "`SELECT "my_AGGR"(x)`".
The second field is arg_counts ,the count of the arguments. Like accumulator `percentile`, caculating the p_number of the column. We need to input the value of column and the value of p to cacalate, and so the count of the arguments is two.
The second field is arg_counts ,the count of the arguments. Like accumulator `percentile`, calculating the p_number of the column. We need to input the value of column and the value of p to cacalate, and so the count of the arguments is two.
The third field is a function about how to create your accumulator creator that you defined in step 1 above. Create creator, that's a bit intertwined, but it is how we make DataFusion use a newly created aggregate function each time it executes a SQL, preventing the stored input types from affecting each other. The key detail can be starting looking at our `DfContextProviderAdapter` struct's `get_aggregate_meta` method.

View File

@@ -7,8 +7,8 @@ license = "Apache-2.0"
[dependencies]
common-base = { path = "../common/base" }
common-time = { path = "../common/time" }
common-error = { path = "../common/error" }
common-time = { path = "../common/time" }
datatypes = { path = "../datatypes" }
prost = "0.11"
snafu = { version = "0.7", features = ["backtraces"] }

View File

@@ -20,9 +20,7 @@ fn main() {
.file_descriptor_set_path(default_out_dir.join("greptime_fd.bin"))
.compile(
&[
"greptime/v1/insert.proto",
"greptime/v1/select.proto",
"greptime/v1/physical_plan.proto",
"greptime/v1/greptime.proto",
"greptime/v1/meta/common.proto",
"greptime/v1/meta/heartbeat.proto",

View File

@@ -20,6 +20,7 @@ message AdminExpr {
CreateExpr create = 2;
AlterExpr alter = 3;
CreateDatabaseExpr create_database = 4;
DropTableExpr drop_table = 5;
}
}
@@ -51,18 +52,33 @@ message AlterExpr {
string table_name = 3;
oneof kind {
AddColumns add_columns = 4;
DropColumns drop_columns = 5;
}
}
message DropTableExpr {
string catalog_name = 1;
string schema_name = 2;
string table_name = 3;
}
message AddColumns {
repeated AddColumn add_columns = 1;
}
message DropColumns {
repeated DropColumn drop_columns = 1;
}
message AddColumn {
ColumnDef column_def = 1;
bool is_key = 2;
}
message DropColumn {
string name = 1;
}
message CreateDatabaseExpr {
//TODO(hl): maybe rename to schema_name?
string database_name = 1;

View File

@@ -2,6 +2,7 @@ syntax = "proto3";
package greptime.v1;
import "greptime/v1/column.proto";
import "greptime/v1/common.proto";
message DatabaseRequest {
@@ -28,39 +29,23 @@ message SelectExpr {
oneof expr {
string sql = 1;
bytes logical_plan = 2;
PhysicalPlan physical_plan = 15;
}
}
message PhysicalPlan {
bytes original_ql = 1;
bytes plan = 2;
}
message InsertExpr {
string schema_name = 1;
string table_name = 2;
message Values {
repeated bytes values = 1;
}
// Data is represented here.
repeated Column columns = 3;
oneof expr {
Values values = 3;
// The row_count of all columns, which include null and non-null values.
//
// Note: the row_count of all columns in a InsertExpr must be same.
uint32 row_count = 4;
// TODO(LFC): Remove field "sql" in InsertExpr.
// When Frontend instance received an insertion SQL (`insert into ...`), it's anticipated to parse the SQL and
// assemble the values to insert to feed Datanode. In other words, inserting data through Datanode instance's GRPC
// interface shouldn't use SQL directly.
// Then why the "sql" field exists here? It's because the Frontend needs table schema to create the values to insert,
// which is currently not able to find anywhere. (Maybe the table schema is suppose to be fetched from Meta?)
// The "sql" field is meant to be removed in the future.
string sql = 4;
}
/// The region number of current insert request.
// The region number of current insert request.
uint32 region_number = 5;
map<string, bytes> options = 6;
}
// TODO(jiachun)

View File

@@ -1,14 +0,0 @@
syntax = "proto3";
package greptime.v1.codec;
import "greptime/v1/column.proto";
message InsertBatch {
repeated Column columns = 1;
uint32 row_count = 2;
}
message RegionNumber {
uint32 id = 1;
}

View File

@@ -39,7 +39,7 @@ message NodeStat {
uint64 wcus = 2;
// Table number in this node
uint64 table_num = 3;
// Regon number in this node
// Region number in this node
uint64 region_num = 4;
double cpu_usage = 5;

View File

@@ -5,6 +5,8 @@ package greptime.v1.meta;
import "greptime/v1/meta/common.proto";
service Router {
rpc Create(CreateRequest) returns (RouteResponse) {}
// Fetch routing information for tables. The smallest unit is the complete
// routing information(all regions) of a table.
//
@@ -26,7 +28,14 @@ service Router {
//
rpc Route(RouteRequest) returns (RouteResponse) {}
rpc Create(CreateRequest) returns (RouteResponse) {}
rpc Delete(DeleteRequest) returns (RouteResponse) {}
}
message CreateRequest {
RequestHeader header = 1;
TableName table_name = 2;
repeated Partition partitions = 3;
}
message RouteRequest {
@@ -35,6 +44,12 @@ message RouteRequest {
repeated TableName table_names = 2;
}
message DeleteRequest {
RequestHeader header = 1;
TableName table_name = 2;
}
message RouteResponse {
ResponseHeader header = 1;
@@ -42,13 +57,6 @@ message RouteResponse {
repeated TableRoute table_routes = 3;
}
message CreateRequest {
RequestHeader header = 1;
TableName table_name = 2;
repeated Partition partitions = 3;
}
message TableRoute {
Table table = 1;
repeated RegionRoute region_routes = 2;

View File

@@ -20,6 +20,9 @@ service Store {
// DeleteRange deletes the given range from the key-value store.
rpc DeleteRange(DeleteRangeRequest) returns (DeleteRangeResponse);
// MoveValue atomically renames the key to the given updated key.
rpc MoveValue(MoveValueRequest) returns (MoveValueResponse);
}
message RangeRequest {
@@ -136,3 +139,21 @@ message DeleteRangeResponse {
// returned.
repeated KeyValue prev_kvs = 3;
}
message MoveValueRequest {
RequestHeader header = 1;
// If from_key dose not exist, return the value of to_key (if it exists).
// If from_key exists, move the value of from_key to to_key (i.e. rename),
// and return the value.
bytes from_key = 2;
bytes to_key = 3;
}
message MoveValueResponse {
ResponseHeader header = 1;
// If from_key dose not exist, return the value of to_key (if it exists).
// If from_key exists, return the value of from_key.
KeyValue kv = 2;
}

View File

@@ -1,33 +0,0 @@
syntax = "proto3";
package greptime.v1.codec;
message PhysicalPlanNode {
oneof PhysicalPlanType {
ProjectionExecNode projection = 1;
MockInputExecNode mock = 99;
// TODO(fys): impl other physical plan node
}
}
message ProjectionExecNode {
PhysicalPlanNode input = 1;
repeated PhysicalExprNode expr = 2;
repeated string expr_name = 3;
}
message PhysicalExprNode {
oneof ExprType {
PhysicalColumn column = 1;
// TODO(fys): impl other physical expr node
}
}
message PhysicalColumn {
string name = 1;
uint64 index = 2;
}
message MockInputExecNode {
string name = 1;
}

View File

@@ -33,6 +33,28 @@ pub enum Error {
from: ConcreteDataType,
backtrace: Backtrace,
},
#[snafu(display(
"Failed to convert column default constraint, column: {}, source: {}",
column,
source
))]
ConvertColumnDefaultConstraint {
column: String,
#[snafu(backtrace)]
source: datatypes::error::Error,
},
#[snafu(display(
"Invalid column default constraint, column: {}, source: {}",
column,
source
))]
InvalidColumnDefaultConstraint {
column: String,
#[snafu(backtrace)]
source: datatypes::error::Error,
},
}
impl ErrorExt for Error {
@@ -40,6 +62,8 @@ impl ErrorExt for Error {
match self {
Error::UnknownColumnDataType { .. } => StatusCode::InvalidArguments,
Error::IntoColumnDataType { .. } => StatusCode::Unexpected,
Error::ConvertColumnDefaultConstraint { source, .. }
| Error::InvalidColumnDefaultConstraint { source, .. } => source.status_code(),
}
}
fn backtrace_opt(&self) -> Option<&Backtrace> {

View File

@@ -15,7 +15,7 @@
pub use prost::DecodeError;
use prost::Message;
use crate::v1::codec::{InsertBatch, PhysicalPlanNode, RegionNumber, SelectResult};
use crate::v1::codec::SelectResult;
use crate::v1::meta::TableRouteValue;
macro_rules! impl_convert_with_bytes {
@@ -36,10 +36,7 @@ macro_rules! impl_convert_with_bytes {
};
}
impl_convert_with_bytes!(InsertBatch);
impl_convert_with_bytes!(SelectResult);
impl_convert_with_bytes!(PhysicalPlanNode);
impl_convert_with_bytes!(RegionNumber);
impl_convert_with_bytes!(TableRouteValue);
#[cfg(test)]
@@ -51,52 +48,6 @@ mod tests {
const SEMANTIC_TAG: i32 = 0;
#[test]
fn test_convert_insert_batch() {
let insert_batch = mock_insert_batch();
let bytes: Vec<u8> = insert_batch.into();
let insert: InsertBatch = bytes.deref().try_into().unwrap();
assert_eq!(8, insert.row_count);
assert_eq!(1, insert.columns.len());
let column = &insert.columns[0];
assert_eq!("foo", column.column_name);
assert_eq!(SEMANTIC_TAG, column.semantic_type);
assert_eq!(vec![1], column.null_mask);
assert_eq!(
vec![2, 3, 4, 5, 6, 7, 8],
column.values.as_ref().unwrap().i32_values
);
}
#[should_panic]
#[test]
fn test_convert_insert_batch_wrong() {
let insert_batch = mock_insert_batch();
let mut bytes: Vec<u8> = insert_batch.into();
// modify some bytes
bytes[0] = 0b1;
bytes[1] = 0b1;
let insert: InsertBatch = bytes.deref().try_into().unwrap();
assert_eq!(8, insert.row_count);
assert_eq!(1, insert.columns.len());
let column = &insert.columns[0];
assert_eq!("foo", column.column_name);
assert_eq!(SEMANTIC_TAG, column.semantic_type);
assert_eq!(vec![1], column.null_mask);
assert_eq!(
vec![2, 3, 4, 5, 6, 7, 8],
column.values.as_ref().unwrap().i32_values
);
}
#[test]
fn test_convert_select_result() {
let select_result = mock_select_result();
@@ -143,35 +94,6 @@ mod tests {
);
}
#[test]
fn test_convert_region_id() {
let region_id = RegionNumber { id: 12 };
let bytes: Vec<u8> = region_id.into();
let region_id: RegionNumber = bytes.deref().try_into().unwrap();
assert_eq!(12, region_id.id);
}
fn mock_insert_batch() -> InsertBatch {
let values = column::Values {
i32_values: vec![2, 3, 4, 5, 6, 7, 8],
..Default::default()
};
let null_mask = vec![1];
let column = Column {
column_name: "foo".to_string(),
semantic_type: SEMANTIC_TAG,
values: Some(values),
null_mask,
..Default::default()
};
InsertBatch {
columns: vec![column],
row_count: 8,
}
}
fn mock_select_result() -> SelectResult {
let values = column::Values {
i32_values: vec![2, 3, 4, 5, 6, 7, 8],

View File

@@ -21,4 +21,5 @@ pub mod codec {
tonic::include_proto!("greptime.v1.codec");
}
mod column_def;
pub mod meta;

View File

@@ -0,0 +1,38 @@
// Copyright 2022 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema};
use snafu::ResultExt;
use crate::error::{self, Result};
use crate::helper::ColumnDataTypeWrapper;
use crate::v1::ColumnDef;
impl ColumnDef {
pub fn try_as_column_schema(&self) -> Result<ColumnSchema> {
let data_type = ColumnDataTypeWrapper::try_new(self.datatype)?;
let constraint = match &self.default_constraint {
None => None,
Some(v) => Some(
ColumnDefaultConstraint::try_from(&v[..])
.context(error::ConvertColumnDefaultConstraintSnafu { column: &self.name })?,
),
};
ColumnSchema::new(&self.name, data_type.into(), self.is_nullable)
.with_default_constraint(constraint)
.context(error::InvalidColumnDefaultConstraintSnafu { column: &self.name })
}
}

View File

@@ -145,10 +145,12 @@ gen_set_header!(HeartbeatRequest);
gen_set_header!(RouteRequest);
gen_set_header!(CreateRequest);
gen_set_header!(RangeRequest);
gen_set_header!(DeleteRequest);
gen_set_header!(PutRequest);
gen_set_header!(BatchPutRequest);
gen_set_header!(CompareAndPutRequest);
gen_set_header!(DeleteRangeRequest);
gen_set_header!(MoveValueRequest);
#[cfg(test)]
mod tests {

View File

@@ -27,7 +27,6 @@ futures = "0.3"
futures-util = "0.3"
lazy_static = "1.4"
meta-client = { path = "../meta-client" }
opendal = "0.17"
regex = "1.6"
serde = "1.0"
serde_json = "1.0"
@@ -39,9 +38,8 @@ tokio = { version = "1.18", features = ["full"] }
[dev-dependencies]
chrono = "0.4"
log-store = { path = "../log-store" }
mito = { path = "../mito", features = ["test"] }
object-store = { path = "../object-store" }
opendal = "0.17"
storage = { path = "../storage" }
mito = { path = "../mito" }
tempdir = "0.3"
tokio = { version = "1.0", features = ["full"] }

View File

@@ -94,7 +94,7 @@ pub enum Error {
backtrace: Backtrace,
},
#[snafu(display("Table {} already exists", table))]
#[snafu(display("Table `{}` already exists", table))]
TableExists { table: String, backtrace: Backtrace },
#[snafu(display("Schema {} already exists", schema))]
@@ -109,6 +109,12 @@ pub enum Error {
source: BoxedError,
},
#[snafu(display("Operation {} not implemented yet", operation))]
Unimplemented {
operation: String,
backtrace: Backtrace,
},
#[snafu(display("Failed to open table, table info: {}, source: {}", table_info, source))]
OpenTable {
table_info: String,
@@ -185,8 +191,8 @@ pub enum Error {
source: meta_client::error::Error,
},
#[snafu(display("Invalid table schema in catalog, source: {:?}", source))]
InvalidSchemaInCatalog {
#[snafu(display("Invalid table info in catalog, source: {}", source))]
InvalidTableInfoInCatalog {
#[snafu(backtrace)]
source: datatypes::error::Error,
},
@@ -216,11 +222,12 @@ impl ErrorExt for Error {
| Error::ValueDeserialize { .. }
| Error::Io { .. } => StatusCode::StorageUnavailable,
Error::RegisterTable { .. } => StatusCode::Internal,
Error::ReadSystemCatalog { source, .. } => source.status_code(),
Error::SystemCatalogTypeMismatch { source, .. } => source.status_code(),
Error::InvalidCatalogValue { source, .. } => source.status_code(),
Error::RegisterTable { .. } => StatusCode::Internal,
Error::TableExists { .. } => StatusCode::TableAlreadyExists,
Error::SchemaExists { .. } => StatusCode::InvalidArguments,
@@ -233,8 +240,10 @@ impl ErrorExt for Error {
Error::SystemCatalogTableScan { source } => source.status_code(),
Error::SystemCatalogTableScanExec { source } => source.status_code(),
Error::InvalidTableSchema { source, .. } => source.status_code(),
Error::InvalidSchemaInCatalog { .. } => StatusCode::Unexpected,
Error::InvalidTableInfoInCatalog { .. } => StatusCode::Unexpected,
Error::Internal { source, .. } => source.status_code(),
Error::Unimplemented { .. } => StatusCode::Unsupported,
}
}

View File

@@ -15,44 +15,56 @@
use std::collections::HashMap;
use std::fmt::{Display, Formatter};
use common_catalog::error::{
DeserializeCatalogEntryValueSnafu, Error, InvalidCatalogSnafu, SerializeCatalogEntryValueSnafu,
};
use lazy_static::lazy_static;
use regex::Regex;
use serde::{Deserialize, Serialize, Serializer};
use snafu::{ensure, OptionExt, ResultExt};
use table::metadata::{RawTableMeta, TableId, TableVersion};
use table::metadata::{RawTableInfo, TableId, TableVersion};
use crate::consts::{
CATALOG_KEY_PREFIX, SCHEMA_KEY_PREFIX, TABLE_GLOBAL_KEY_PREFIX, TABLE_REGIONAL_KEY_PREFIX,
};
use crate::error::{
DeserializeCatalogEntryValueSnafu, Error, InvalidCatalogSnafu, SerializeCatalogEntryValueSnafu,
};
const CATALOG_KEY_PREFIX: &str = "__c";
const SCHEMA_KEY_PREFIX: &str = "__s";
const TABLE_GLOBAL_KEY_PREFIX: &str = "__tg";
const TABLE_REGIONAL_KEY_PREFIX: &str = "__tr";
const ALPHANUMERICS_NAME_PATTERN: &str = "[a-zA-Z_][a-zA-Z0-9_]*";
lazy_static! {
static ref CATALOG_KEY_PATTERN: Regex =
Regex::new(&format!("^{}-([a-zA-Z_]+)$", CATALOG_KEY_PREFIX)).unwrap();
static ref CATALOG_KEY_PATTERN: Regex = Regex::new(&format!(
"^{}-({})$",
CATALOG_KEY_PREFIX, ALPHANUMERICS_NAME_PATTERN
))
.unwrap();
}
lazy_static! {
static ref SCHEMA_KEY_PATTERN: Regex = Regex::new(&format!(
"^{}-([a-zA-Z_]+)-([a-zA-Z_]+)$",
SCHEMA_KEY_PREFIX
"^{}-({})-({})$",
SCHEMA_KEY_PREFIX, ALPHANUMERICS_NAME_PATTERN, ALPHANUMERICS_NAME_PATTERN
))
.unwrap();
}
lazy_static! {
static ref TABLE_GLOBAL_KEY_PATTERN: Regex = Regex::new(&format!(
"^{}-([a-zA-Z_]+)-([a-zA-Z_]+)-([a-zA-Z0-9_]+)$",
TABLE_GLOBAL_KEY_PREFIX
"^{}-({})-({})-({})$",
TABLE_GLOBAL_KEY_PREFIX,
ALPHANUMERICS_NAME_PATTERN,
ALPHANUMERICS_NAME_PATTERN,
ALPHANUMERICS_NAME_PATTERN
))
.unwrap();
}
lazy_static! {
static ref TABLE_REGIONAL_KEY_PATTERN: Regex = Regex::new(&format!(
"^{}-([a-zA-Z_]+)-([a-zA-Z_]+)-([a-zA-Z0-9_]+)-([0-9]+)$",
TABLE_REGIONAL_KEY_PREFIX
"^{}-({})-({})-({})-([0-9]+)$",
TABLE_REGIONAL_KEY_PREFIX,
ALPHANUMERICS_NAME_PATTERN,
ALPHANUMERICS_NAME_PATTERN,
ALPHANUMERICS_NAME_PATTERN
))
.unwrap();
}
@@ -128,15 +140,18 @@ impl TableGlobalKey {
/// table id, table meta(schema...), region id allocation across datanodes.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct TableGlobalValue {
/// Table id is the same across all datanodes.
pub id: TableId,
/// Id of datanode that created the global table info kv. only for debugging.
pub node_id: u64,
// TODO(LFC): Maybe remove it?
/// Allocation of region ids across all datanodes.
pub regions_id_map: HashMap<u64, Vec<u32>>,
// TODO(LFC): Too much for assembling the table schema that DistTable needs, find another way.
pub meta: RawTableMeta,
pub table_info: RawTableInfo,
}
impl TableGlobalValue {
pub fn table_id(&self) -> TableId {
self.table_info.ident.table_id
}
}
/// Table regional info that varies between datanode, so it contains a `node_id` field.
@@ -258,6 +273,10 @@ macro_rules! define_catalog_value {
.context(DeserializeCatalogEntryValueSnafu { raw: s.as_ref() })
}
pub fn from_bytes(bytes: impl AsRef<[u8]>) -> Result<Self, Error> {
Self::parse(&String::from_utf8_lossy(bytes.as_ref()))
}
pub fn as_bytes(&self) -> Result<Vec<u8>, Error> {
Ok(serde_json::to_string(self)
.context(SerializeCatalogEntryValueSnafu)?
@@ -279,6 +298,7 @@ define_catalog_value!(
mod tests {
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::{ColumnSchema, RawSchema, Schema};
use table::metadata::{RawTableMeta, TableIdent, TableType};
use super::*;
@@ -339,11 +359,23 @@ mod tests {
region_numbers: vec![1],
};
let table_info = RawTableInfo {
ident: TableIdent {
table_id: 42,
version: 1,
},
name: "table_1".to_string(),
desc: Some("blah".to_string()),
catalog_name: "catalog_1".to_string(),
schema_name: "schema_1".to_string(),
meta,
table_type: TableType::Base,
};
let value = TableGlobalValue {
id: 42,
node_id: 0,
regions_id_map: HashMap::from([(0, vec![1, 2, 3])]),
meta,
table_info,
};
let serialized = serde_json::to_string(&value).unwrap();
let deserialized = TableGlobalValue::parse(&serialized).unwrap();

View File

@@ -15,6 +15,7 @@
#![feature(assert_matches)]
use std::any::Any;
use std::fmt::{Debug, Formatter};
use std::sync::Arc;
use common_telemetry::info;
@@ -28,6 +29,7 @@ use crate::error::{CreateTableSnafu, Result};
pub use crate::schema::{SchemaProvider, SchemaProviderRef};
pub mod error;
pub mod helper;
pub mod local;
pub mod remote;
pub mod schema;
@@ -83,12 +85,17 @@ pub trait CatalogManager: CatalogList {
/// Starts a catalog manager.
async fn start(&self) -> Result<()>;
/// Registers a table given given catalog/schema to catalog manager,
/// returns table registered.
async fn register_table(&self, request: RegisterTableRequest) -> Result<usize>;
/// Registers a table within given catalog/schema to catalog manager,
/// returns whether the table registered.
async fn register_table(&self, request: RegisterTableRequest) -> Result<bool>;
/// Register a schema with catalog name and schema name.
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<usize>;
/// Deregisters a table within given catalog/schema to catalog manager,
/// returns whether the table deregistered.
async fn deregister_table(&self, request: DeregisterTableRequest) -> Result<bool>;
/// Register a schema with catalog name and schema name. Retuens whether the
/// schema registered.
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool>;
/// Register a system table, should be called before starting the manager.
async fn register_system_table(&self, request: RegisterSystemTableRequest)
@@ -123,6 +130,25 @@ pub struct RegisterTableRequest {
pub table: TableRef,
}
impl Debug for RegisterTableRequest {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("RegisterTableRequest")
.field("catalog", &self.catalog)
.field("schema", &self.schema)
.field("table_name", &self.table_name)
.field("table_id", &self.table_id)
.field("table", &self.table.table_info())
.finish()
}
}
#[derive(Clone)]
pub struct DeregisterTableRequest {
pub catalog: String,
pub schema: String,
pub table_name: String,
}
#[derive(Debug, Clone)]
pub struct RegisterSchemaRequest {
pub catalog: String,

View File

@@ -21,7 +21,7 @@ use common_catalog::consts::{
SYSTEM_CATALOG_NAME, SYSTEM_CATALOG_TABLE_NAME,
};
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
use common_telemetry::info;
use common_telemetry::{error, info};
use datatypes::prelude::ScalarVector;
use datatypes::vectors::{BinaryVector, UInt8Vector};
use futures_util::lock::Mutex;
@@ -36,7 +36,7 @@ use table::TableRef;
use crate::error::{
CatalogNotFoundSnafu, IllegalManagerStateSnafu, OpenTableSnafu, ReadSystemCatalogSnafu, Result,
SchemaExistsSnafu, SchemaNotFoundSnafu, SystemCatalogSnafu, SystemCatalogTypeMismatchSnafu,
TableExistsSnafu, TableNotFoundSnafu,
TableExistsSnafu, TableNotFoundSnafu, UnimplementedSnafu,
};
use crate::local::memory::{MemoryCatalogManager, MemoryCatalogProvider, MemorySchemaProvider};
use crate::system::{
@@ -46,8 +46,8 @@ use crate::system::{
use crate::tables::SystemCatalog;
use crate::{
format_full_table_name, handle_system_table_request, CatalogList, CatalogManager,
CatalogProvider, CatalogProviderRef, RegisterSchemaRequest, RegisterSystemTableRequest,
RegisterTableRequest, SchemaProvider, SchemaProviderRef,
CatalogProvider, CatalogProviderRef, DeregisterTableRequest, RegisterSchemaRequest,
RegisterSystemTableRequest, RegisterTableRequest, SchemaProvider, SchemaProviderRef,
};
/// A `CatalogManager` consists of a system catalog and a bunch of user catalogs.
@@ -57,6 +57,7 @@ pub struct LocalCatalogManager {
engine: TableEngineRef,
next_table_id: AtomicU32,
init_lock: Mutex<bool>,
register_lock: Mutex<()>,
system_table_requests: Mutex<Vec<RegisterSystemTableRequest>>,
}
@@ -76,6 +77,7 @@ impl LocalCatalogManager {
engine,
next_table_id: AtomicU32::new(MIN_USER_TABLE_ID),
init_lock: Mutex::new(false),
register_lock: Mutex::new(()),
system_table_requests: Mutex::new(Vec::default()),
})
}
@@ -241,6 +243,7 @@ impl LocalCatalogManager {
schema_name: t.schema_name.clone(),
table_name: t.table_name.clone(),
table_id: t.table_id,
region_numbers: vec![0],
};
let option = self
@@ -308,7 +311,7 @@ impl CatalogManager for LocalCatalogManager {
self.init().await
}
async fn register_table(&self, request: RegisterTableRequest) -> Result<usize> {
async fn register_table(&self, request: RegisterTableRequest) -> Result<bool> {
let started = self.init_lock.lock().await;
ensure!(
@@ -331,27 +334,50 @@ impl CatalogManager for LocalCatalogManager {
schema_info: format!("{}.{}", catalog_name, schema_name),
})?;
if schema.table_exist(&request.table_name)? {
return TableExistsSnafu {
table: format_full_table_name(catalog_name, schema_name, &request.table_name),
{
let _lock = self.register_lock.lock().await;
if let Some(existing) = schema.table(&request.table_name)? {
if existing.table_info().ident.table_id != request.table_id {
error!(
"Unexpected table register request: {:?}, existing: {:?}",
request,
existing.table_info()
);
return TableExistsSnafu {
table: format_full_table_name(
catalog_name,
schema_name,
&request.table_name,
),
}
.fail();
}
// Try to register table with same table id, just ignore.
Ok(false)
} else {
// table does not exist
self.system
.register_table(
catalog_name.clone(),
schema_name.clone(),
request.table_name.clone(),
request.table_id,
)
.await?;
schema.register_table(request.table_name, request.table)?;
Ok(true)
}
.fail();
}
self.system
.register_table(
catalog_name.clone(),
schema_name.clone(),
request.table_name.clone(),
request.table_id,
)
.await?;
schema.register_table(request.table_name, request.table)?;
Ok(1)
}
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<usize> {
async fn deregister_table(&self, _request: DeregisterTableRequest) -> Result<bool> {
UnimplementedSnafu {
operation: "deregister table",
}
.fail()
}
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool> {
let started = self.init_lock.lock().await;
ensure!(
*started,
@@ -366,17 +392,21 @@ impl CatalogManager for LocalCatalogManager {
.catalogs
.catalog(catalog_name)?
.context(CatalogNotFoundSnafu { catalog_name })?;
if catalog.schema(schema_name)?.is_some() {
return SchemaExistsSnafu {
schema: schema_name,
}
.fail();
{
let _lock = self.register_lock.lock().await;
ensure!(
catalog.schema(schema_name)?.is_none(),
SchemaExistsSnafu {
schema: schema_name,
}
);
self.system
.register_schema(request.catalog, schema_name.clone())
.await?;
catalog.register_schema(request.schema, Arc::new(MemorySchemaProvider::new()))?;
Ok(true)
}
self.system
.register_schema(request.catalog, schema_name.clone())
.await?;
catalog.register_schema(request.schema, Arc::new(MemorySchemaProvider::new()))?;
Ok(1)
}
async fn register_system_table(&self, request: RegisterSystemTableRequest) -> Result<()> {

View File

@@ -19,6 +19,7 @@ use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::{Arc, RwLock};
use common_catalog::consts::MIN_USER_TABLE_ID;
use common_telemetry::error;
use snafu::OptionExt;
use table::metadata::TableId;
use table::table::TableIdProvider;
@@ -27,8 +28,8 @@ use table::TableRef;
use crate::error::{CatalogNotFoundSnafu, Result, SchemaNotFoundSnafu, TableExistsSnafu};
use crate::schema::SchemaProvider;
use crate::{
CatalogList, CatalogManager, CatalogProvider, CatalogProviderRef, RegisterSchemaRequest,
RegisterSystemTableRequest, RegisterTableRequest, SchemaProviderRef,
CatalogList, CatalogManager, CatalogProvider, CatalogProviderRef, DeregisterTableRequest,
RegisterSchemaRequest, RegisterSystemTableRequest, RegisterTableRequest, SchemaProviderRef,
};
/// Simple in-memory list of catalogs
@@ -69,7 +70,7 @@ impl CatalogManager for MemoryCatalogManager {
Ok(())
}
async fn register_table(&self, request: RegisterTableRequest) -> Result<usize> {
async fn register_table(&self, request: RegisterTableRequest) -> Result<bool> {
let catalogs = self.catalogs.write().unwrap();
let catalog = catalogs
.get(&request.catalog)
@@ -84,10 +85,28 @@ impl CatalogManager for MemoryCatalogManager {
})?;
schema
.register_table(request.table_name, request.table)
.map(|v| if v.is_some() { 0 } else { 1 })
.map(|v| v.is_none())
}
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<usize> {
async fn deregister_table(&self, request: DeregisterTableRequest) -> Result<bool> {
let catalogs = self.catalogs.write().unwrap();
let catalog = catalogs
.get(&request.catalog)
.context(CatalogNotFoundSnafu {
catalog_name: &request.catalog,
})?
.clone();
let schema = catalog
.schema(&request.schema)?
.with_context(|| SchemaNotFoundSnafu {
schema_info: format!("{}.{}", &request.catalog, &request.schema),
})?;
schema
.deregister_table(&request.table_name)
.map(|v| v.is_some())
}
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool> {
let catalogs = self.catalogs.write().unwrap();
let catalog = catalogs
.get(&request.catalog)
@@ -95,11 +114,12 @@ impl CatalogManager for MemoryCatalogManager {
catalog_name: &request.catalog,
})?;
catalog.register_schema(request.schema, Arc::new(MemorySchemaProvider::new()))?;
Ok(1)
Ok(true)
}
async fn register_system_table(&self, _request: RegisterSystemTableRequest) -> Result<()> {
unimplemented!()
// TODO(ruihang): support register system table request
Ok(())
}
fn schema(&self, catalog: &str, schema: &str) -> Result<Option<SchemaProviderRef>> {
@@ -251,11 +271,21 @@ impl SchemaProvider for MemorySchemaProvider {
}
fn register_table(&self, name: String, table: TableRef) -> Result<Option<TableRef>> {
if self.table_exist(name.as_str())? {
return TableExistsSnafu { table: name }.fail()?;
}
let mut tables = self.tables.write().unwrap();
Ok(tables.insert(name, table))
if let Some(existing) = tables.get(name.as_str()) {
// if table with the same name but different table id exists, then it's a fatal bug
if existing.table_info().ident.table_id != table.table_info().ident.table_id {
error!(
"Unexpected table register: {:?}, existing: {:?}",
table.table_info(),
existing.table_info()
);
return TableExistsSnafu { table: name }.fail()?;
}
Ok(Some(existing.clone()))
} else {
Ok(tables.insert(name, table))
}
}
fn deregister_table(&self, name: &str) -> Result<Option<TableRef>> {
@@ -315,7 +345,7 @@ mod tests {
.unwrap()
.is_none());
assert!(provider.table_exist(table_name).unwrap());
let other_table = NumbersTable::default();
let other_table = NumbersTable::new(12);
let result = provider.register_table(table_name.to_string(), Arc::new(other_table));
let err = result.err().unwrap();
assert!(err.backtrace_opt().is_some());
@@ -340,4 +370,34 @@ mod tests {
.downcast_ref::<MemoryCatalogManager>()
.unwrap();
}
#[tokio::test]
pub async fn test_catalog_deregister_table() {
let catalog = MemoryCatalogManager::default();
let schema = catalog
.schema(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME)
.unwrap()
.unwrap();
let register_table_req = RegisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: "numbers".to_string(),
table_id: 2333,
table: Arc::new(NumbersTable::default()),
};
catalog.register_table(register_table_req).await.unwrap();
assert!(schema.table_exist("numbers").unwrap());
let deregister_table_req = DeregisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: "numbers".to_string(),
};
catalog
.deregister_table(deregister_table_req)
.await
.unwrap();
assert!(!schema.table_exist("numbers").unwrap());
}
}

View File

@@ -20,10 +20,6 @@ use std::sync::Arc;
use arc_swap::ArcSwap;
use async_stream::stream;
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MIN_USER_TABLE_ID};
use common_catalog::{
build_catalog_prefix, build_schema_prefix, build_table_global_prefix, CatalogKey, CatalogValue,
SchemaKey, SchemaValue, TableGlobalKey, TableGlobalValue, TableRegionalKey, TableRegionalValue,
};
use common_telemetry::{debug, info};
use futures::Stream;
use futures_util::StreamExt;
@@ -37,13 +33,17 @@ use tokio::sync::Mutex;
use crate::error::{
CatalogNotFoundSnafu, CreateTableSnafu, InvalidCatalogValueSnafu, InvalidTableSchemaSnafu,
OpenTableSnafu, Result, SchemaNotFoundSnafu, TableExistsSnafu,
OpenTableSnafu, Result, SchemaNotFoundSnafu, TableExistsSnafu, UnimplementedSnafu,
};
use crate::helper::{
build_catalog_prefix, build_schema_prefix, build_table_global_prefix, CatalogKey, CatalogValue,
SchemaKey, SchemaValue, TableGlobalKey, TableGlobalValue, TableRegionalKey, TableRegionalValue,
};
use crate::remote::{Kv, KvBackendRef};
use crate::{
handle_system_table_request, CatalogList, CatalogManager, CatalogProvider, CatalogProviderRef,
RegisterSchemaRequest, RegisterSystemTableRequest, RegisterTableRequest, SchemaProvider,
SchemaProviderRef,
DeregisterTableRequest, RegisterSchemaRequest, RegisterSystemTableRequest,
RegisterTableRequest, SchemaProvider, SchemaProviderRef,
};
/// Catalog manager based on metasrv.
@@ -154,8 +154,8 @@ impl RemoteCatalogManager {
}
let table_key = TableGlobalKey::parse(&String::from_utf8_lossy(&k))
.context(InvalidCatalogValueSnafu)?;
let table_value = TableGlobalValue::parse(&String::from_utf8_lossy(&v))
.context(InvalidCatalogValueSnafu)?;
let table_value =
TableGlobalValue::from_bytes(&v).context(InvalidCatalogValueSnafu)?;
info!(
"Found catalog table entry, key: {}, value: {:?}",
@@ -250,10 +250,7 @@ impl RemoteCatalogManager {
let table_ref = self.open_or_create_table(&table_key, &table_value).await?;
schema.register_table(table_key.table_name.to_string(), table_ref)?;
info!("Registered table {}", &table_key.table_name);
if table_value.id > max_table_id {
info!("Max table id: {} -> {}", max_table_id, table_value.id);
max_table_id = table_value.id;
}
max_table_id = max_table_id.max(table_value.table_id());
table_num += 1;
}
info!(
@@ -311,25 +308,33 @@ impl RemoteCatalogManager {
..
} = table_key;
let table_id = table_value.table_id();
let TableGlobalValue {
id,
meta,
table_info,
regions_id_map,
..
} = table_value;
// unwrap safety: checked in yielding this table when `iter_remote_tables`
let region_numbers = regions_id_map.get(&self.node_id).unwrap();
let request = OpenTableRequest {
catalog_name: catalog_name.clone(),
schema_name: schema_name.clone(),
table_name: table_name.clone(),
table_id: *id,
table_id,
region_numbers: region_numbers.clone(),
};
match self
.engine
.open_table(&context, request)
.await
.with_context(|_| OpenTableSnafu {
table_info: format!("{}.{}.{}, id:{}", catalog_name, schema_name, table_name, id,),
table_info: format!(
"{}.{}.{}, id:{}",
catalog_name, schema_name, table_name, table_id
),
})? {
Some(table) => {
info!(
@@ -344,6 +349,7 @@ impl RemoteCatalogManager {
catalog_name, schema_name, table_name
);
let meta = &table_info.meta;
let schema = meta
.schema
.clone()
@@ -353,13 +359,13 @@ impl RemoteCatalogManager {
schema: meta.schema.clone(),
})?;
let req = CreateTableRequest {
id: *id,
id: table_id,
catalog_name: catalog_name.clone(),
schema_name: schema_name.clone(),
table_name: table_name.clone(),
desc: None,
schema: Arc::new(schema),
region_numbers: regions_id_map.get(&self.node_id).unwrap().clone(), // this unwrap is safe because region_id_map is checked in `iter_remote_tables`
region_numbers: region_numbers.clone(),
primary_key_indices: meta.primary_key_indices.clone(),
create_if_not_exists: true,
table_options: meta.options.clone(),
@@ -371,7 +377,7 @@ impl RemoteCatalogManager {
.context(CreateTableSnafu {
table_info: format!(
"{}.{}.{}, id:{}",
&catalog_name, &schema_name, &table_name, id
&catalog_name, &schema_name, &table_name, table_id
),
})
}
@@ -405,7 +411,7 @@ impl CatalogManager for RemoteCatalogManager {
Ok(())
}
async fn register_table(&self, request: RegisterTableRequest) -> Result<usize> {
async fn register_table(&self, request: RegisterTableRequest) -> Result<bool> {
let catalog_name = request.catalog;
let schema_name = request.schema;
let catalog_provider = self.catalog(&catalog_name)?.context(CatalogNotFoundSnafu {
@@ -424,10 +430,17 @@ impl CatalogManager for RemoteCatalogManager {
.fail();
}
schema_provider.register_table(request.table_name, request.table)?;
Ok(1)
Ok(true)
}
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<usize> {
async fn deregister_table(&self, _request: DeregisterTableRequest) -> Result<bool> {
UnimplementedSnafu {
operation: "deregister table",
}
.fail()
}
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool> {
let catalog_name = request.catalog;
let schema_name = request.schema;
let catalog_provider = self.catalog(&catalog_name)?.context(CatalogNotFoundSnafu {
@@ -435,7 +448,7 @@ impl CatalogManager for RemoteCatalogManager {
})?;
let schema_provider = self.new_schema_provider(&catalog_name, &schema_name);
catalog_provider.register_schema(schema_name, schema_provider)?;
Ok(1)
Ok(true)
}
async fn register_system_table(&self, request: RegisterSystemTableRequest) -> Result<()> {

View File

@@ -43,7 +43,6 @@ use crate::error::{
pub const ENTRY_TYPE_INDEX: usize = 0;
pub const KEY_INDEX: usize = 1;
pub const TIMESTAMP_INDEX: usize = 2;
pub const VALUE_INDEX: usize = 3;
pub struct SystemCatalogTable {
@@ -87,6 +86,7 @@ impl SystemCatalogTable {
schema_name: INFORMATION_SCHEMA_NAME.to_string(),
table_name: SYSTEM_CATALOG_TABLE_NAME.to_string(),
table_id: SYSTEM_CATALOG_TABLE_ID,
region_numbers: vec![0],
};
let schema = Arc::new(build_system_catalog_schema());
let ctx = EngineContext::default();
@@ -110,7 +110,7 @@ impl SystemCatalogTable {
desc: Some("System catalog table".to_string()),
schema: schema.clone(),
region_numbers: vec![0],
primary_key_indices: vec![ENTRY_TYPE_INDEX, KEY_INDEX, TIMESTAMP_INDEX],
primary_key_indices: vec![ENTRY_TYPE_INDEX, KEY_INDEX],
create_if_not_exists: true,
table_options: HashMap::new(),
};
@@ -134,7 +134,6 @@ impl SystemCatalogTable {
.context(error::SystemCatalogTableScanSnafu)?;
let stream = scan
.execute(0, Arc::new(RuntimeEnv::default()))
.await
.context(error::SystemCatalogTableScanExecSnafu)?;
Ok(stream)
}
@@ -384,7 +383,7 @@ mod tests {
use super::*;
#[test]
pub fn test_decode_catalog_enrty() {
pub fn test_decode_catalog_entry() {
let entry = decode_system_catalog(
Some(EntryType::Catalog as u8),
Some("some_catalog".as_bytes()),
@@ -456,7 +455,7 @@ mod tests {
pub async fn prepare_table_engine() -> (TempDir, TableEngineRef) {
let dir = TempDir::new("system-table-test").unwrap();
let store_dir = dir.path().to_string_lossy();
let accessor = opendal::services::fs::Builder::default()
let accessor = object_store::backend::fs::Builder::default()
.root(&store_dir)
.build()
.unwrap();

View File

@@ -368,7 +368,6 @@ mod tests {
let tables_stream = tables.scan(&None, &[], None).await.unwrap();
let mut tables_stream = tables_stream
.execute(0, Arc::new(RuntimeEnv::default()))
.await
.unwrap();
if let Some(t) = tables_stream.next().await {

View File

@@ -0,0 +1,132 @@
// Copyright 2022 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#[cfg(test)]
mod tests {
use std::sync::Arc;
use catalog::local::LocalCatalogManager;
use catalog::{CatalogManager, RegisterTableRequest};
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_telemetry::{error, info};
use mito::config::EngineConfig;
use table::table::numbers::NumbersTable;
use table::TableRef;
use tokio::sync::Mutex;
async fn create_local_catalog_manager() -> Result<LocalCatalogManager, catalog::error::Error> {
let (_dir, object_store) =
mito::table::test_util::new_test_object_store("setup_mock_engine_and_table").await;
let mock_engine = Arc::new(mito::table::test_util::MockMitoEngine::new(
EngineConfig::default(),
mito::table::test_util::MockEngine::default(),
object_store,
));
let catalog_manager = LocalCatalogManager::try_new(mock_engine).await.unwrap();
catalog_manager.start().await?;
Ok(catalog_manager)
}
#[tokio::test]
async fn test_duplicate_register() {
let catalog_manager = create_local_catalog_manager().await.unwrap();
let request = RegisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: "test_table".to_string(),
table_id: 42,
table: Arc::new(NumbersTable::new(42)),
};
assert!(catalog_manager
.register_table(request.clone())
.await
.unwrap());
// register table with same table id will succeed with 0 as return val.
assert!(!catalog_manager.register_table(request).await.unwrap());
let err = catalog_manager
.register_table(RegisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: "test_table".to_string(),
table_id: 43,
table: Arc::new(NumbersTable::new(43)),
})
.await
.unwrap_err();
assert!(
err.to_string()
.contains("Table `greptime.public.test_table` already exists"),
"Actual error message: {}",
err
);
}
#[test]
fn test_concurrent_register() {
common_telemetry::init_default_ut_logging();
let rt = Arc::new(tokio::runtime::Builder::new_multi_thread().build().unwrap());
let catalog_manager =
Arc::new(rt.block_on(async { create_local_catalog_manager().await.unwrap() }));
let succeed: Arc<Mutex<Option<TableRef>>> = Arc::new(Mutex::new(None));
let mut handles = Vec::with_capacity(8);
for i in 0..8 {
let catalog = catalog_manager.clone();
let succeed = succeed.clone();
let handle = rt.spawn(async move {
let table_id = 42 + i;
let table = Arc::new(NumbersTable::new(table_id));
let req = RegisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: "test_table".to_string(),
table_id,
table: table.clone(),
};
match catalog.register_table(req).await {
Ok(res) => {
if res {
let mut succeed = succeed.lock().await;
info!("Successfully registered table: {}", table_id);
*succeed = Some(table);
}
}
Err(_) => {
error!("Failed to register table {}", table_id);
}
}
});
handles.push(handle);
}
rt.block_on(async move {
for handle in handles {
handle.await.unwrap();
}
let guard = succeed.lock().await;
let table = guard.as_ref().unwrap();
let table_registered = catalog_manager
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "test_table")
.unwrap()
.unwrap();
assert_eq!(
table_registered.table_info().ident.table_id,
table.table_info().ident.table_id
);
});
}
}

View File

@@ -217,7 +217,7 @@ impl TableEngine for MockTableEngine {
&self,
_ctx: &EngineContext,
_request: DropTableRequest,
) -> table::Result<()> {
) -> table::Result<bool> {
unimplemented!()
}
}

View File

@@ -22,12 +22,12 @@ mod tests {
use std::collections::HashSet;
use std::sync::Arc;
use catalog::helper::{CatalogKey, CatalogValue, SchemaKey, SchemaValue};
use catalog::remote::{
KvBackend, KvBackendRef, RemoteCatalogManager, RemoteCatalogProvider, RemoteSchemaProvider,
};
use catalog::{CatalogList, CatalogManager, RegisterTableRequest};
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_catalog::{CatalogKey, CatalogValue, SchemaKey, SchemaValue};
use datatypes::schema::Schema;
use futures_util::StreamExt;
use table::engine::{EngineContext, TableEngineRef};
@@ -202,7 +202,7 @@ mod tests {
table_id,
table,
};
assert_eq!(1, catalog_manager.register_table(reg_req).await.unwrap());
assert!(catalog_manager.register_table(reg_req).await.unwrap());
assert_eq!(
HashSet::from([table_name, "numbers".to_string()]),
default_schema
@@ -287,7 +287,7 @@ mod tests {
.register_schema(schema_name.clone(), schema.clone())
.expect("Register schema should not fail");
assert!(prev.is_none());
assert_eq!(1, catalog_manager.register_table(reg_req).await.unwrap());
assert!(catalog_manager.register_table(reg_req).await.unwrap());
assert_eq!(
HashSet::from([schema_name.clone()]),

View File

@@ -11,9 +11,9 @@ async-stream = "0.3"
common-base = { path = "../common/base" }
common-error = { path = "../common/error" }
common-grpc = { path = "../common/grpc" }
common-grpc-expr = { path = "../common/grpc-expr" }
common-query = { path = "../common/query" }
common-recordbatch = { path = "../common/recordbatch" }
common-insert = { path = "../common/insert" }
common-time = { path = "../common/time" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
"simd",

View File

@@ -12,11 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use api::v1::codec::InsertBatch;
use api::v1::*;
use client::{Client, Database};
fn main() {
tracing::subscriber::set_global_default(tracing_subscriber::FmtSubscriber::builder().finish())
.unwrap();
@@ -29,21 +27,21 @@ async fn run() {
let client = Client::with_urls(vec!["127.0.0.1:3001"]);
let db = Database::new("greptime", client);
let (columns, row_count) = insert_data();
let expr = InsertExpr {
schema_name: "public".to_string(),
table_name: "demo".to_string(),
expr: Some(insert_expr::Expr::Values(insert_expr::Values {
values: insert_batches(),
})),
options: HashMap::default(),
region_number: 0,
columns,
row_count,
};
db.insert(expr).await.unwrap();
}
fn insert_batches() -> Vec<Vec<u8>> {
fn insert_data() -> (Vec<Column>, u32) {
const SEMANTIC_TAG: i32 = 0;
const SEMANTIC_FEILD: i32 = 1;
const SEMANTIC_FIELD: i32 = 1;
const SEMANTIC_TS: i32 = 2;
let row_count = 4;
@@ -71,7 +69,7 @@ fn insert_batches() -> Vec<Vec<u8>> {
};
let cpu_column = Column {
column_name: "cpu".to_string(),
semantic_type: SEMANTIC_FEILD,
semantic_type: SEMANTIC_FIELD,
values: Some(cpu_vals),
null_mask: vec![2],
..Default::default()
@@ -83,7 +81,7 @@ fn insert_batches() -> Vec<Vec<u8>> {
};
let mem_column = Column {
column_name: "memory".to_string(),
semantic_type: SEMANTIC_FEILD,
semantic_type: SEMANTIC_FIELD,
values: Some(mem_vals),
null_mask: vec![4],
..Default::default()
@@ -101,9 +99,8 @@ fn insert_batches() -> Vec<Vec<u8>> {
..Default::default()
};
let insert_batch = InsertBatch {
columns: vec![host_column, cpu_column, mem_column, ts_column],
(
vec![host_column, cpu_column, mem_column, ts_column],
row_count,
};
vec![insert_batch.into()]
)
}

View File

@@ -1,51 +0,0 @@
// Copyright 2022 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use client::{Client, Database};
use common_grpc::MockExecution;
use datafusion::physical_plan::expressions::Column;
use datafusion::physical_plan::projection::ProjectionExec;
use datafusion::physical_plan::{ExecutionPlan, PhysicalExpr};
use tracing::{event, Level};
fn main() {
tracing::subscriber::set_global_default(tracing_subscriber::FmtSubscriber::builder().finish())
.unwrap();
run();
}
#[tokio::main]
async fn run() {
let client = Client::with_urls(vec!["127.0.0.1:3001"]);
let db = Database::new("greptime", client);
let physical = mock_physical_plan();
let result = db.physical_plan(physical, None).await;
event!(Level::INFO, "result: {:#?}", result);
}
fn mock_physical_plan() -> Arc<dyn ExecutionPlan> {
let id_expr = Arc::new(Column::new("id", 0)) as Arc<dyn PhysicalExpr>;
let age_expr = Arc::new(Column::new("age", 2)) as Arc<dyn PhysicalExpr>;
let expr = vec![(id_expr, "id".to_string()), (age_expr, "age".to_string())];
let input =
Arc::new(MockExecution::new("mock_input_exec".to_string())) as Arc<dyn ExecutionPlan>;
let projection = ProjectionExec::try_new(expr, input).unwrap();
Arc::new(projection)
}

View File

@@ -58,7 +58,19 @@ impl Admin {
header: Some(header),
expr: Some(admin_expr::Expr::Alter(expr)),
};
Ok(self.do_requests(vec![expr]).await?.remove(0))
self.do_request(expr).await
}
pub async fn drop_table(&self, expr: DropTableExpr) -> Result<AdminResult> {
let header = ExprHeader {
version: PROTOCOL_VERSION,
};
let expr = AdminExpr {
header: Some(header),
expr: Some(admin_expr::Expr::DropTable(expr)),
};
self.do_request(expr).await
}
/// Invariants: the lengths of input vec (`Vec<AdminExpr>`) and output vec (`Vec<AdminResult>`) are equal.

View File

@@ -18,22 +18,17 @@ use api::v1::codec::SelectResult as GrpcSelectResult;
use api::v1::column::SemanticType;
use api::v1::{
object_expr, object_result, select_expr, DatabaseRequest, ExprHeader, InsertExpr,
MutateResult as GrpcMutateResult, ObjectExpr, ObjectResult as GrpcObjectResult, PhysicalPlan,
SelectExpr,
MutateResult as GrpcMutateResult, ObjectExpr, ObjectResult as GrpcObjectResult, SelectExpr,
};
use common_error::status_code::StatusCode;
use common_grpc::{AsExcutionPlan, DefaultAsPlanImpl};
use common_insert::column_to_vector;
use common_grpc_expr::column_to_vector;
use common_query::Output;
use common_recordbatch::{RecordBatch, RecordBatches};
use datafusion::physical_plan::ExecutionPlan;
use datatypes::prelude::*;
use datatypes::schema::{ColumnSchema, Schema};
use snafu::{ensure, OptionExt, ResultExt};
use crate::error::{
ColumnToVectorSnafu, ConvertSchemaSnafu, DatanodeSnafu, DecodeSelectSnafu, EncodePhysicalSnafu,
};
use crate::error::{ColumnToVectorSnafu, ConvertSchemaSnafu, DatanodeSnafu, DecodeSelectSnafu};
use crate::{error, Client, Result};
pub const PROTOCOL_VERSION: u32 = 1;
@@ -94,24 +89,6 @@ impl Database {
self.do_select(select_expr).await
}
pub async fn physical_plan(
&self,
physical: Arc<dyn ExecutionPlan>,
original_ql: Option<String>,
) -> Result<ObjectResult> {
let plan = DefaultAsPlanImpl::try_from_physical_plan(physical.clone())
.context(EncodePhysicalSnafu { physical })?
.bytes;
let original_ql = original_ql.unwrap_or_default();
let select_expr = SelectExpr {
expr: Some(select_expr::Expr::PhysicalPlan(PhysicalPlan {
original_ql: original_ql.into_bytes(),
plan,
})),
};
self.do_select(select_expr).await
}
pub async fn logical_plan(&self, logical_plan: Vec<u8>) -> Result<ObjectResult> {
let select_expr = SelectExpr {
expr: Some(select_expr::Expr::LogicalPlan(logical_plan)),

View File

@@ -103,7 +103,7 @@ pub enum Error {
#[snafu(display("Failed to convert column to vector, source: {}", source))]
ColumnToVector {
#[snafu(backtrace)]
source: common_insert::error::Error,
source: common_grpc_expr::error::Error,
},
}

View File

@@ -18,8 +18,10 @@ common-telemetry = { path = "../common/telemetry", features = [
datanode = { path = "../datanode" }
frontend = { path = "../frontend" }
futures = "0.3"
meta-client = { path = "../meta-client" }
meta-srv = { path = "../meta-srv" }
serde = "1.0"
servers = { path = "../servers" }
snafu = { version = "0.7", features = ["backtraces"] }
tokio = { version = "1.18", features = ["full"] }
toml = "0.5"
@@ -27,3 +29,6 @@ toml = "0.5"
[dev-dependencies]
serde = "1.0"
tempdir = "0.3"
[build-dependencies]
build-data = "0.1.3"

29
src/cmd/build.rs Normal file
View File

@@ -0,0 +1,29 @@
// Copyright 2022 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
const DEFAULT_VALUE: &str = "unknown";
fn main() {
println!(
"cargo:rustc-env=GIT_COMMIT={}",
build_data::get_git_commit().unwrap_or_else(|_| DEFAULT_VALUE.to_string())
);
println!(
"cargo:rustc-env=GIT_BRANCH={}",
build_data::get_git_branch().unwrap_or_else(|_| DEFAULT_VALUE.to_string())
);
println!(
"cargo:rustc-env=GIT_DIRTY={}",
build_data::get_git_dirty().map_or(DEFAULT_VALUE.to_string(), |v| v.to_string())
);
}

View File

@@ -20,7 +20,7 @@ use cmd::{datanode, frontend, metasrv, standalone};
use common_telemetry::logging::{error, info};
#[derive(Parser)]
#[clap(name = "greptimedb")]
#[clap(name = "greptimedb", version = print_version())]
struct Command {
#[clap(long, default_value = "/tmp/greptimedb/logs")]
log_dir: String,
@@ -70,6 +70,19 @@ impl fmt::Display for SubCommand {
}
}
fn print_version() -> &'static str {
concat!(
"\nbranch: ",
env!("GIT_BRANCH"),
"\ncommit: ",
env!("GIT_COMMIT"),
"\ndirty: ",
env!("GIT_DIRTY"),
"\nversion: ",
env!("CARGO_PKG_VERSION")
)
}
#[tokio::main]
async fn main() -> Result<()> {
let cmd = Command::parse();

View File

@@ -14,8 +14,9 @@
use clap::Parser;
use common_telemetry::logging;
use datanode::datanode::{Datanode, DatanodeOptions};
use frontend::frontend::Mode;
use datanode::datanode::{Datanode, DatanodeOptions, ObjectStoreConfig};
use meta_client::MetaClientOpts;
use servers::Mode;
use snafu::ResultExt;
use crate::error::{Error, MissingConfigSnafu, Result, StartDatanodeSnafu};
@@ -46,7 +47,7 @@ impl SubCommand {
}
}
#[derive(Debug, Parser)]
#[derive(Debug, Parser, Default)]
struct StartCommand {
#[clap(long)]
node_id: Option<u64>,
@@ -58,6 +59,10 @@ struct StartCommand {
metasrv_addr: Option<String>,
#[clap(short, long)]
config_file: Option<String>,
#[clap(long)]
data_dir: Option<String>,
#[clap(long)]
wal_dir: Option<String>,
}
impl StartCommand {
@@ -98,7 +103,13 @@ impl TryFrom<StartCommand> for DatanodeOptions {
}
if let Some(meta_addr) = cmd.metasrv_addr {
opts.meta_client_opts.metasrv_addr = meta_addr;
opts.meta_client_opts
.get_or_insert_with(MetaClientOpts::default)
.metasrv_addrs = meta_addr
.split(',')
.map(&str::trim)
.map(&str::to_string)
.collect::<_>();
opts.mode = Mode::Distributed;
}
@@ -108,6 +119,14 @@ impl TryFrom<StartCommand> for DatanodeOptions {
}
.fail();
}
if let Some(data_dir) = cmd.data_dir {
opts.storage = ObjectStoreConfig::File { data_dir };
}
if let Some(wal_dir) = cmd.wal_dir {
opts.wal_dir = wal_dir;
}
Ok(opts)
}
}
@@ -117,39 +136,41 @@ mod tests {
use std::assert_matches::assert_matches;
use datanode::datanode::ObjectStoreConfig;
use frontend::frontend::Mode;
use servers::Mode;
use super::*;
#[test]
fn test_read_from_config_file() {
let cmd = StartCommand {
node_id: None,
rpc_addr: None,
mysql_addr: None,
metasrv_addr: None,
config_file: Some(format!(
"{}/../../config/datanode.example.toml",
std::env::current_dir().unwrap().as_path().to_str().unwrap()
)),
..Default::default()
};
let options: DatanodeOptions = cmd.try_into().unwrap();
assert_eq!("127.0.0.1:3001".to_string(), options.rpc_addr);
assert_eq!("/tmp/greptimedb/wal".to_string(), options.wal_dir);
assert_eq!("127.0.0.1:3306".to_string(), options.mysql_addr);
assert_eq!("127.0.0.1:4406".to_string(), options.mysql_addr);
assert_eq!(4, options.mysql_runtime_size);
assert_eq!(
"1.1.1.1:3002".to_string(),
options.meta_client_opts.metasrv_addr
);
assert_eq!(5000, options.meta_client_opts.connect_timeout_millis);
assert_eq!(3000, options.meta_client_opts.timeout_millis);
assert!(!options.meta_client_opts.tcp_nodelay);
let MetaClientOpts {
metasrv_addrs: metasrv_addr,
timeout_millis,
connect_timeout_millis,
tcp_nodelay,
} = options.meta_client_opts.unwrap();
assert_eq!(vec!["127.0.0.1:3002".to_string()], metasrv_addr);
assert_eq!(5000, connect_timeout_millis);
assert_eq!(3000, timeout_millis);
assert!(!tcp_nodelay);
match options.storage {
ObjectStoreConfig::File { data_dir } => {
assert_eq!("/tmp/greptimedb/data/".to_string(), data_dir)
}
ObjectStoreConfig::S3 { .. } => unreachable!(),
};
}
@@ -157,44 +178,30 @@ mod tests {
fn test_try_from_cmd() {
assert_eq!(
Mode::Standalone,
DatanodeOptions::try_from(StartCommand {
node_id: None,
rpc_addr: None,
mysql_addr: None,
metasrv_addr: None,
config_file: None
})
.unwrap()
.mode
DatanodeOptions::try_from(StartCommand::default())
.unwrap()
.mode
);
let mode = DatanodeOptions::try_from(StartCommand {
node_id: Some(42),
rpc_addr: None,
mysql_addr: None,
metasrv_addr: Some("127.0.0.1:3002".to_string()),
config_file: None,
..Default::default()
})
.unwrap()
.mode;
assert_matches!(mode, Mode::Distributed);
assert!(DatanodeOptions::try_from(StartCommand {
node_id: None,
rpc_addr: None,
mysql_addr: None,
metasrv_addr: Some("127.0.0.1:3002".to_string()),
config_file: None,
..Default::default()
})
.is_err());
// Providing node_id but leave metasrv_addr absent is ok since metasrv_addr has default value
DatanodeOptions::try_from(StartCommand {
node_id: Some(42),
rpc_addr: None,
mysql_addr: None,
metasrv_addr: None,
config_file: None,
..Default::default()
})
.unwrap();
}
@@ -202,17 +209,23 @@ mod tests {
#[test]
fn test_merge_config() {
let dn_opts = DatanodeOptions::try_from(StartCommand {
node_id: None,
rpc_addr: None,
mysql_addr: None,
metasrv_addr: None,
config_file: Some(format!(
"{}/../../config/datanode.example.toml",
std::env::current_dir().unwrap().as_path().to_str().unwrap()
)),
..Default::default()
})
.unwrap();
assert_eq!(Some(42), dn_opts.node_id);
assert_eq!("1.1.1.1:3002", dn_opts.meta_client_opts.metasrv_addr);
let MetaClientOpts {
metasrv_addrs: metasrv_addr,
timeout_millis,
connect_timeout_millis,
tcp_nodelay,
} = dn_opts.meta_client_opts.unwrap();
assert_eq!(vec!["127.0.0.1:3002".to_string()], metasrv_addr);
assert_eq!(3000, timeout_millis);
assert_eq!(5000, connect_timeout_millis);
assert!(!tcp_nodelay);
}
}

View File

@@ -25,12 +25,6 @@ pub enum Error {
source: datanode::error::Error,
},
#[snafu(display("Failed to build frontend, source: {}", source))]
BuildFrontend {
#[snafu(backtrace)]
source: frontend::error::Error,
},
#[snafu(display("Failed to start frontend, source: {}", source))]
StartFrontend {
#[snafu(backtrace)]
@@ -75,7 +69,6 @@ impl ErrorExt for Error {
StatusCode::InvalidArguments
}
Error::IllegalConfig { .. } => StatusCode::InvalidArguments,
Error::BuildFrontend { source, .. } => source.status_code(),
}
}
@@ -97,10 +90,7 @@ mod tests {
#[test]
fn test_start_node_error() {
fn throw_datanode_error() -> StdResult<datanode::error::Error> {
datanode::error::MissingFieldSnafu {
field: "test_field",
}
.fail()
datanode::error::MissingNodeIdSnafu {}.fail()
}
let e = throw_datanode_error()

View File

@@ -13,13 +13,16 @@
// limitations under the License.
use clap::Parser;
use frontend::frontend::{Frontend, FrontendOptions, Mode};
use frontend::frontend::{Frontend, FrontendOptions};
use frontend::grpc::GrpcOptions;
use frontend::influxdb::InfluxdbOptions;
use frontend::instance::Instance;
use frontend::mysql::MysqlOptions;
use frontend::opentsdb::OpentsdbOptions;
use frontend::postgres::PostgresOptions;
use meta_client::MetaClientOpts;
use servers::http::HttpOptions;
use servers::Mode;
use snafu::ResultExt;
use crate::error::{self, Result};
@@ -75,7 +78,7 @@ impl StartCommand {
let opts: FrontendOptions = self.try_into()?;
let mut frontend = Frontend::new(
opts.clone(),
Instance::try_new(&opts)
Instance::try_new_distributed(&opts)
.await
.context(error::StartFrontendSnafu)?,
);
@@ -94,7 +97,10 @@ impl TryFrom<StartCommand> for FrontendOptions {
};
if let Some(addr) = cmd.http_addr {
opts.http_addr = Some(addr);
opts.http_options = Some(HttpOptions {
addr,
..Default::default()
});
}
if let Some(addr) = cmd.grpc_addr {
opts.grpc_options = Some(GrpcOptions {
@@ -124,13 +130,13 @@ impl TryFrom<StartCommand> for FrontendOptions {
opts.influxdb_options = Some(InfluxdbOptions { enable });
}
if let Some(metasrv_addr) = cmd.metasrv_addr {
opts.metasrv_addr = Some(
metasrv_addr
.split(',')
.into_iter()
.map(|x| x.trim().to_string())
.collect::<Vec<String>>(),
);
opts.meta_client_opts
.get_or_insert_with(MetaClientOpts::default)
.metasrv_addrs = metasrv_addr
.split(',')
.map(&str::trim)
.map(&str::to_string)
.collect::<Vec<_>>();
opts.mode = Mode::Distributed;
}
Ok(opts)
@@ -139,6 +145,8 @@ impl TryFrom<StartCommand> for FrontendOptions {
#[cfg(test)]
mod tests {
use std::time::Duration;
use super::*;
#[test]
@@ -155,7 +163,7 @@ mod tests {
};
let opts: FrontendOptions = command.try_into().unwrap();
assert_eq!(opts.http_addr, Some("127.0.0.1:1234".to_string()));
assert_eq!(opts.http_options.as_ref().unwrap().addr, "127.0.0.1:1234");
assert_eq!(opts.mysql_options.as_ref().unwrap().addr, "127.0.0.1:5678");
assert_eq!(
opts.postgres_options.as_ref().unwrap().addr,
@@ -186,4 +194,32 @@ mod tests {
assert!(!opts.influxdb_options.unwrap().enable);
}
#[test]
fn test_read_from_config_file() {
let command = StartCommand {
http_addr: None,
grpc_addr: None,
mysql_addr: None,
postgres_addr: None,
opentsdb_addr: None,
influxdb_enable: None,
config_file: Some(format!(
"{}/../../config/frontend.example.toml",
std::env::current_dir().unwrap().as_path().to_str().unwrap()
)),
metasrv_addr: None,
};
let fe_opts = FrontendOptions::try_from(command).unwrap();
assert_eq!(Mode::Distributed, fe_opts.mode);
assert_eq!(
"127.0.0.1:4000".to_string(),
fe_opts.http_options.as_ref().unwrap().addr
);
assert_eq!(
Duration::from_secs(30),
fe_opts.http_options.as_ref().unwrap().timeout
);
}
}

View File

@@ -16,7 +16,7 @@ use clap::Parser;
use common_telemetry::info;
use datanode::datanode::{Datanode, DatanodeOptions, ObjectStoreConfig};
use datanode::instance::InstanceRef;
use frontend::frontend::{Frontend, FrontendOptions, Mode};
use frontend::frontend::{Frontend, FrontendOptions};
use frontend::grpc::GrpcOptions;
use frontend::influxdb::InfluxdbOptions;
use frontend::instance::Instance as FeInstance;
@@ -25,12 +25,11 @@ use frontend::opentsdb::OpentsdbOptions;
use frontend::postgres::PostgresOptions;
use frontend::prometheus::PrometheusOptions;
use serde::{Deserialize, Serialize};
use servers::http::HttpOptions;
use servers::Mode;
use snafu::ResultExt;
use tokio::try_join;
use crate::error::{
BuildFrontendSnafu, Error, IllegalConfigSnafu, Result, StartDatanodeSnafu, StartFrontendSnafu,
};
use crate::error::{Error, IllegalConfigSnafu, Result, StartDatanodeSnafu, StartFrontendSnafu};
use crate::toml_loader;
#[derive(Parser)]
@@ -60,7 +59,7 @@ impl SubCommand {
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct StandaloneOptions {
pub http_addr: Option<String>,
pub http_options: Option<HttpOptions>,
pub grpc_options: Option<GrpcOptions>,
pub mysql_options: Option<MysqlOptions>,
pub postgres_options: Option<PostgresOptions>,
@@ -70,14 +69,13 @@ pub struct StandaloneOptions {
pub mode: Mode,
pub wal_dir: String,
pub storage: ObjectStoreConfig,
pub datanode_mysql_addr: String,
pub datanode_mysql_runtime_size: usize,
pub enable_memory_catalog: bool,
}
impl Default for StandaloneOptions {
fn default() -> Self {
Self {
http_addr: Some("127.0.0.1:4000".to_string()),
http_options: Some(HttpOptions::default()),
grpc_options: Some(GrpcOptions::default()),
mysql_options: Some(MysqlOptions::default()),
postgres_options: Some(PostgresOptions::default()),
@@ -87,8 +85,7 @@ impl Default for StandaloneOptions {
mode: Mode::Standalone,
wal_dir: "/tmp/greptimedb/wal".to_string(),
storage: ObjectStoreConfig::default(),
datanode_mysql_addr: "127.0.0.1:3306".to_string(),
datanode_mysql_runtime_size: 4,
enable_memory_catalog: false,
}
}
}
@@ -96,7 +93,7 @@ impl Default for StandaloneOptions {
impl StandaloneOptions {
fn frontend_options(self) -> FrontendOptions {
FrontendOptions {
http_addr: self.http_addr,
http_options: self.http_options,
grpc_options: self.grpc_options,
mysql_options: self.mysql_options,
postgres_options: self.postgres_options,
@@ -104,8 +101,7 @@ impl StandaloneOptions {
influxdb_options: self.influxdb_options,
prometheus_options: self.prometheus_options,
mode: self.mode,
datanode_rpc_addr: "127.0.0.1:3001".to_string(),
metasrv_addr: None,
meta_client_opts: None,
}
}
@@ -113,8 +109,7 @@ impl StandaloneOptions {
DatanodeOptions {
wal_dir: self.wal_dir,
storage: self.storage,
mysql_addr: self.datanode_mysql_addr,
mysql_runtime_size: self.datanode_mysql_runtime_size,
enable_memory_catalog: self.enable_memory_catalog,
..Default::default()
}
}
@@ -136,18 +131,22 @@ struct StartCommand {
influxdb_enable: bool,
#[clap(short, long)]
config_file: Option<String>,
#[clap(short = 'm', long = "memory-catalog")]
enable_memory_catalog: bool,
}
impl StartCommand {
async fn run(self) -> Result<()> {
let enable_memory_catalog = self.enable_memory_catalog;
let config_file = self.config_file.clone();
let fe_opts = FrontendOptions::try_from(self)?;
let dn_opts: DatanodeOptions = {
let opts: StandaloneOptions = if let Some(path) = config_file {
let mut opts: StandaloneOptions = if let Some(path) = config_file {
toml_loader::from_file!(&path)?
} else {
StandaloneOptions::default()
};
opts.enable_memory_catalog = enable_memory_catalog;
opts.datanode_options()
};
@@ -159,13 +158,16 @@ impl StartCommand {
let mut datanode = Datanode::new(dn_opts.clone())
.await
.context(StartDatanodeSnafu)?;
let mut frontend = build_frontend(fe_opts, &dn_opts, datanode.get_instance()).await?;
let mut frontend = build_frontend(fe_opts, datanode.get_instance()).await?;
try_join!(
async { datanode.start().await.context(StartDatanodeSnafu) },
async { frontend.start().await.context(StartFrontendSnafu) }
)?;
// Start datanode instance before starting services, to avoid requests come in before internal components are started.
datanode
.start_instance()
.await
.context(StartDatanodeSnafu)?;
info!("Datanode instance started");
frontend.start().await.context(StartFrontendSnafu)?;
Ok(())
}
}
@@ -173,17 +175,9 @@ impl StartCommand {
/// Build frontend instance in standalone mode
async fn build_frontend(
fe_opts: FrontendOptions,
dn_opts: &DatanodeOptions,
datanode_instance: InstanceRef,
) -> Result<Frontend<FeInstance>> {
let grpc_server_addr = &dn_opts.rpc_addr;
info!(
"Build frontend with datanode gRPC addr: {}",
grpc_server_addr
);
let mut frontend_instance = FeInstance::try_new(&fe_opts)
.await
.context(BuildFrontendSnafu)?;
let mut frontend_instance = FeInstance::new_standalone(datanode_instance.clone());
frontend_instance.set_catalog_manager(datanode_instance.catalog_manager().clone());
frontend_instance.set_script_handler(datanode_instance);
Ok(Frontend::new(fe_opts, frontend_instance))
@@ -204,7 +198,10 @@ impl TryFrom<StartCommand> for FrontendOptions {
opts.mode = Mode::Standalone;
if let Some(addr) = cmd.http_addr {
opts.http_addr = Some(addr);
opts.http_options = Some(HttpOptions {
addr,
..Default::default()
});
}
if let Some(addr) = cmd.rpc_addr {
// frontend grpc addr conflict with datanode default grpc addr
@@ -254,6 +251,8 @@ impl TryFrom<StartCommand> for FrontendOptions {
#[cfg(test)]
mod tests {
use std::time::Duration;
use super::*;
#[test]
@@ -269,12 +268,19 @@ mod tests {
std::env::current_dir().unwrap().as_path().to_str().unwrap()
)),
influxdb_enable: false,
enable_memory_catalog: false,
};
let fe_opts = FrontendOptions::try_from(cmd).unwrap();
assert_eq!(Mode::Standalone, fe_opts.mode);
assert_eq!("127.0.0.1:3001".to_string(), fe_opts.datanode_rpc_addr);
assert_eq!(Some("127.0.0.1:4000".to_string()), fe_opts.http_addr);
assert_eq!(
"127.0.0.1:4000".to_string(),
fe_opts.http_options.as_ref().unwrap().addr
);
assert_eq!(
Duration::from_secs(30),
fe_opts.http_options.as_ref().unwrap().timeout
);
assert_eq!(
"127.0.0.1:4001".to_string(),
fe_opts.grpc_options.unwrap().addr

View File

@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use bitvec::prelude as bv;
pub use bitvec::prelude;
// `Lsb0` provides the best codegen for bit manipulation,
// see https://github.com/bitvecto-rs/bitvec/blob/main/doc/order/Lsb0.md
pub type BitVec = bv::BitVec<u8, bv::Lsb0>;
pub type BitVec = prelude::BitVec<u8>;

View File

@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
pub mod bitset;
pub mod bit_vec;
pub mod buffer;
pub mod bytes;
pub use bitset::BitVec;
pub use bit_vec::BitVec;

View File

@@ -14,7 +14,6 @@ regex = "1.6"
serde = "1.0"
serde_json = "1.0"
snafu = { version = "0.7", features = ["backtraces"] }
table = { path = "../../table" }
[dev-dependencies]
chrono = "0.4"

View File

@@ -25,9 +25,3 @@ pub const MIN_USER_TABLE_ID: u32 = 1024;
pub const SYSTEM_CATALOG_TABLE_ID: u32 = 0;
/// scripts table id
pub const SCRIPTS_TABLE_ID: u32 = 1;
pub(crate) const CATALOG_KEY_PREFIX: &str = "__c";
pub(crate) const SCHEMA_KEY_PREFIX: &str = "__s";
pub(crate) const TABLE_GLOBAL_KEY_PREFIX: &str = "__tg";
pub(crate) const TABLE_REGIONAL_KEY_PREFIX: &str = "__tr";
pub const TABLE_ID_KEY_PREFIX: &str = "__tid";

View File

@@ -14,10 +14,3 @@
pub mod consts;
pub mod error;
mod helper;
pub use helper::{
build_catalog_prefix, build_schema_prefix, build_table_global_prefix,
build_table_regional_prefix, CatalogKey, CatalogValue, SchemaKey, SchemaValue, TableGlobalKey,
TableGlobalValue, TableRegionalKey, TableRegionalValue,
};

View File

@@ -62,6 +62,15 @@ pub enum StatusCode {
/// Runtime resources exhausted, like creating threads failed.
RuntimeResourcesExhausted = 6000,
// ====== End of server related status code =======
// ====== Begin of auth related status code =====
/// User not exist
UserNotFound = 7000,
/// Unsupported password type
UnsupportedPwdType = 7001,
/// Username and password does not match
UserPwdMismatch = 7002,
// ====== End of auth related status code =====
}
impl StatusCode {

View File

@@ -9,8 +9,8 @@ arc-swap = "1.0"
chrono-tz = "0.6"
common-error = { path = "../error" }
common-function-macro = { path = "../function-macro" }
common-time = { path = "../time" }
common-query = { path = "../query" }
common-time = { path = "../time" }
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
datatypes = { path = "../../datatypes" }
libc = "0.2"
@@ -21,20 +21,6 @@ paste = "1.0"
snafu = { version = "0.7", features = ["backtraces"] }
statrs = "0.15"
[dependencies.arrow]
features = [
"io_csv",
"io_json",
"io_parquet",
"io_parquet_compression",
"io_ipc",
"ahash",
"compute",
"serde_types",
]
package = "arrow2"
version = "0.10"
[dev-dependencies]
ron = "0.7"
serde = { version = "1.0", features = ["derive"] }

View File

@@ -13,10 +13,12 @@
// limitations under the License.
mod pow;
mod rate;
use std::sync::Arc;
pub use pow::PowFunction;
pub use rate::RateFunction;
use crate::scalars::function_registry::FunctionRegistry;
@@ -25,5 +27,6 @@ pub(crate) struct MathFunction;
impl MathFunction {
pub fn register(registry: &FunctionRegistry) {
registry.register(Arc::new(PowFunction::default()));
registry.register(Arc::new(RateFunction::default()))
}
}

View File

@@ -0,0 +1,116 @@
// Copyright 2022 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt;
use arrow::array::Array;
use common_query::error::{FromArrowArraySnafu, Result, TypeCastSnafu};
use common_query::prelude::{Signature, Volatility};
use datatypes::arrow;
use datatypes::prelude::*;
use datatypes::vectors::{Helper, VectorRef};
use snafu::ResultExt;
use crate::scalars::function::{Function, FunctionContext};
/// generates rates from a sequence of adjacent data points.
#[derive(Clone, Debug, Default)]
pub struct RateFunction;
impl fmt::Display for RateFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "RATE")
}
}
impl Function for RateFunction {
fn name(&self) -> &str {
"prom_rate"
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::float64_datatype())
}
fn signature(&self) -> Signature {
Signature::uniform(2, ConcreteDataType::numerics(), Volatility::Immutable)
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
let val = &columns[0].to_arrow_array();
let val_0 = val.slice(0, val.len() - 1);
let val_1 = val.slice(1, val.len() - 1);
let dv = arrow::compute::arithmetics::sub(&*val_1, &*val_0);
let ts = &columns[1].to_arrow_array();
let ts_0 = ts.slice(0, ts.len() - 1);
let ts_1 = ts.slice(1, ts.len() - 1);
let dt = arrow::compute::arithmetics::sub(&*ts_1, &*ts_0);
fn all_to_f64(array: &dyn Array) -> Result<Box<dyn Array>> {
Ok(arrow::compute::cast::cast(
array,
&arrow::datatypes::DataType::Float64,
arrow::compute::cast::CastOptions {
wrapped: true,
partial: true,
},
)
.context(TypeCastSnafu {
typ: arrow::datatypes::DataType::Float64,
})?)
}
let dv = all_to_f64(&*dv)?;
let dt = all_to_f64(&*dt)?;
let rate = arrow::compute::arithmetics::div(&*dv, &*dt);
let v = Helper::try_into_vector(&rate).context(FromArrowArraySnafu)?;
Ok(v)
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use arrow::array::Float64Array;
use common_query::prelude::TypeSignature;
use datatypes::vectors::{Float32Vector, Int64Vector};
use super::*;
#[test]
fn test_rate_function() {
let rate = RateFunction::default();
assert_eq!("prom_rate", rate.name());
assert_eq!(
ConcreteDataType::float64_datatype(),
rate.return_type(&[]).unwrap()
);
assert!(matches!(rate.signature(),
Signature {
type_signature: TypeSignature::Uniform(2, valid_types),
volatility: Volatility::Immutable
} if valid_types == ConcreteDataType::numerics()
));
let values = vec![1.0, 3.0, 6.0];
let ts = vec![0, 1, 2];
let args: Vec<VectorRef> = vec![
Arc::new(Float32Vector::from_vec(values)),
Arc::new(Int64Vector::from_vec(ts)),
];
let vector = rate.eval(FunctionContext::default(), &args).unwrap();
let arr = vector.to_arrow_array();
let expect = Arc::new(Float64Array::from_vec(vec![2.0, 3.0]));
let res = arrow::compute::comparison::eq(&*arr, &*expect);
res.iter().for_each(|x| assert!(matches!(x, Some(true))));
}
}

View File

@@ -14,9 +14,9 @@
use std::sync::Arc;
use arrow::array::PrimitiveArray;
use arrow::compute::cast::primitive_to_primitive;
use arrow::datatypes::DataType::Float64;
use datatypes::arrow::array::PrimitiveArray;
use datatypes::arrow::compute::cast::primitive_to_primitive;
use datatypes::arrow::datatypes::DataType::Float64;
use datatypes::data_type::DataType;
use datatypes::prelude::ScalarVector;
use datatypes::type_id::LogicalTypeId;

View File

@@ -17,11 +17,11 @@
use std::fmt;
use std::sync::Arc;
use arrow::compute::arithmetics;
use arrow::datatypes::DataType as ArrowDatatype;
use arrow::scalar::PrimitiveScalar;
use common_query::error::{IntoVectorSnafu, UnsupportedInputDataTypeSnafu};
use common_query::prelude::{Signature, Volatility};
use datatypes::arrow::compute::arithmetics;
use datatypes::arrow::datatypes::DataType as ArrowDatatype;
use datatypes::arrow::scalar::PrimitiveScalar;
use datatypes::prelude::ConcreteDataType;
use datatypes::vectors::{TimestampVector, VectorRef};
use snafu::ResultExt;

View File

@@ -1,5 +1,5 @@
[package]
name = "common-insert"
name = "common-grpc-expr"
version = "0.1.0"
edition = "2021"
license = "Apache-2.0"
@@ -8,10 +8,12 @@ license = "Apache-2.0"
api = { path = "../../api" }
async-trait = "0.1"
common-base = { path = "../base" }
common-catalog = { path = "../catalog" }
common-error = { path = "../error" }
common-grpc = { path = "../grpc" }
common-query = { path = "../query" }
common-telemetry = { path = "../telemetry" }
common-time = { path = "../time" }
common-query = { path = "../query" }
datatypes = { path = "../../datatypes" }
snafu = { version = "0.7", features = ["backtraces"] }
table = { path = "../../table" }

View File

@@ -0,0 +1,234 @@
// Copyright 2022 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use api::v1::alter_expr::Kind;
use api::v1::{AlterExpr, CreateExpr, DropColumns};
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use datatypes::schema::{ColumnSchema, SchemaBuilder, SchemaRef};
use snafu::{ensure, OptionExt, ResultExt};
use table::metadata::TableId;
use table::requests::{AddColumnRequest, AlterKind, AlterTableRequest, CreateTableRequest};
use crate::error::{
ColumnNotFoundSnafu, CreateSchemaSnafu, InvalidColumnDefSnafu, MissingFieldSnafu,
MissingTimestampColumnSnafu, Result,
};
/// Convert an [`AlterExpr`] to an optional [`AlterTableRequest`]
pub fn alter_expr_to_request(expr: AlterExpr) -> Result<Option<AlterTableRequest>> {
match expr.kind {
Some(Kind::AddColumns(add_columns)) => {
let add_column_requests = add_columns
.add_columns
.into_iter()
.map(|ac| {
let column_def = ac.column_def.context(MissingFieldSnafu {
field: "column_def",
})?;
let schema =
column_def
.try_as_column_schema()
.context(InvalidColumnDefSnafu {
column: &column_def.name,
})?;
Ok(AddColumnRequest {
column_schema: schema,
is_key: ac.is_key,
})
})
.collect::<Result<Vec<_>>>()?;
let alter_kind = AlterKind::AddColumns {
columns: add_column_requests,
};
let request = AlterTableRequest {
catalog_name: expr.catalog_name,
schema_name: expr.schema_name,
table_name: expr.table_name,
alter_kind,
};
Ok(Some(request))
}
Some(Kind::DropColumns(DropColumns { drop_columns })) => {
let alter_kind = AlterKind::DropColumns {
names: drop_columns.into_iter().map(|c| c.name).collect(),
};
let request = AlterTableRequest {
catalog_name: expr.catalog_name,
schema_name: expr.schema_name,
table_name: expr.table_name,
alter_kind,
};
Ok(Some(request))
}
None => Ok(None),
}
}
pub fn create_table_schema(expr: &CreateExpr) -> Result<SchemaRef> {
let column_schemas = expr
.column_defs
.iter()
.map(|x| {
x.try_as_column_schema()
.context(InvalidColumnDefSnafu { column: &x.name })
})
.collect::<Result<Vec<ColumnSchema>>>()?;
ensure!(
column_schemas
.iter()
.any(|column| column.name == expr.time_index),
MissingTimestampColumnSnafu {
msg: format!("CreateExpr: {:?}", expr)
}
);
let column_schemas = column_schemas
.into_iter()
.map(|column_schema| {
if column_schema.name == expr.time_index {
column_schema.with_time_index(true)
} else {
column_schema
}
})
.collect::<Vec<_>>();
Ok(Arc::new(
SchemaBuilder::try_from(column_schemas)
.context(CreateSchemaSnafu)?
.build()
.context(CreateSchemaSnafu)?,
))
}
pub fn create_expr_to_request(table_id: TableId, expr: CreateExpr) -> Result<CreateTableRequest> {
let schema = create_table_schema(&expr)?;
let primary_key_indices = expr
.primary_keys
.iter()
.map(|key| {
schema
.column_index_by_name(key)
.context(ColumnNotFoundSnafu {
column_name: key,
table_name: &expr.table_name,
})
})
.collect::<Result<Vec<usize>>>()?;
let catalog_name = expr
.catalog_name
.unwrap_or_else(|| DEFAULT_CATALOG_NAME.to_string());
let schema_name = expr
.schema_name
.unwrap_or_else(|| DEFAULT_SCHEMA_NAME.to_string());
let region_ids = if expr.region_ids.is_empty() {
vec![0]
} else {
expr.region_ids
};
Ok(CreateTableRequest {
id: table_id,
catalog_name,
schema_name,
table_name: expr.table_name,
desc: expr.desc,
schema,
region_numbers: region_ids,
primary_key_indices,
create_if_not_exists: expr.create_if_not_exists,
table_options: expr.table_options,
})
}
#[cfg(test)]
mod tests {
use api::v1::{AddColumn, AddColumns, ColumnDataType, ColumnDef, DropColumn};
use datatypes::prelude::ConcreteDataType;
use super::*;
#[test]
fn test_alter_expr_to_request() {
let expr = AlterExpr {
catalog_name: None,
schema_name: None,
table_name: "monitor".to_string(),
kind: Some(Kind::AddColumns(AddColumns {
add_columns: vec![AddColumn {
column_def: Some(ColumnDef {
name: "mem_usage".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: false,
default_constraint: None,
}),
is_key: false,
}],
})),
};
let alter_request = alter_expr_to_request(expr).unwrap().unwrap();
assert_eq!(None, alter_request.catalog_name);
assert_eq!(None, alter_request.schema_name);
assert_eq!("monitor".to_string(), alter_request.table_name);
let add_column = match alter_request.alter_kind {
AlterKind::AddColumns { mut columns } => columns.pop().unwrap(),
_ => unreachable!(),
};
assert!(!add_column.is_key);
assert_eq!("mem_usage", add_column.column_schema.name);
assert_eq!(
ConcreteDataType::float64_datatype(),
add_column.column_schema.data_type
);
}
#[test]
fn test_drop_column_expr() {
let expr = AlterExpr {
catalog_name: Some("test_catalog".to_string()),
schema_name: Some("test_schema".to_string()),
table_name: "monitor".to_string(),
kind: Some(Kind::DropColumns(DropColumns {
drop_columns: vec![DropColumn {
name: "mem_usage".to_string(),
}],
})),
};
let alter_request = alter_expr_to_request(expr).unwrap().unwrap();
assert_eq!(Some("test_catalog".to_string()), alter_request.catalog_name);
assert_eq!(Some("test_schema".to_string()), alter_request.schema_name);
assert_eq!("monitor".to_string(), alter_request.table_name);
let mut drop_names = match alter_request.alter_kind {
AlterKind::DropColumns { names } => names,
_ => unreachable!(),
};
assert_eq!(1, drop_names.len());
assert_eq!("mem_usage".to_string(), drop_names.pop().unwrap());
}
}

View File

@@ -22,7 +22,7 @@ use snafu::{Backtrace, ErrorCompat};
#[derive(Debug, Snafu)]
#[snafu(visibility(pub))]
pub enum Error {
#[snafu(display("Column {} not found in table {}", column_name, table_name))]
#[snafu(display("Column `{}` not found in table `{}`", column_name, table_name))]
ColumnNotFound {
column_name: String,
table_name: String,
@@ -57,8 +57,8 @@ pub enum Error {
backtrace: Backtrace,
},
#[snafu(display("Missing timestamp column in request"))]
MissingTimestampColumn { backtrace: Backtrace },
#[snafu(display("Missing timestamp column, msg: {}", msg))]
MissingTimestampColumn { msg: String, backtrace: Backtrace },
#[snafu(display("Invalid column proto: {}", err_msg))]
InvalidColumnProto {
@@ -70,6 +70,26 @@ pub enum Error {
#[snafu(backtrace)]
source: datatypes::error::Error,
},
#[snafu(display("Missing required field in protobuf, field: {}", field))]
MissingField { field: String, backtrace: Backtrace },
#[snafu(display("Invalid column default constraint, source: {}", source))]
ColumnDefaultConstraint {
#[snafu(backtrace)]
source: datatypes::error::Error,
},
#[snafu(display(
"Invalid column proto definition, column: {}, source: {}",
column,
source
))]
InvalidColumnDef {
column: String,
#[snafu(backtrace)]
source: api::error::Error,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -87,6 +107,9 @@ impl ErrorExt for Error {
| Error::MissingTimestampColumn { .. } => StatusCode::InvalidArguments,
Error::InvalidColumnProto { .. } => StatusCode::InvalidArguments,
Error::CreateVector { .. } => StatusCode::InvalidArguments,
Error::MissingField { .. } => StatusCode::InvalidArguments,
Error::ColumnDefaultConstraint { source, .. } => source.status_code(),
Error::InvalidColumnDef { source, .. } => source.status_code(),
}
}
fn backtrace_opt(&self) -> Option<&Backtrace> {

View File

@@ -14,11 +14,9 @@
use std::collections::hash_map::Entry;
use std::collections::{HashMap, HashSet};
use std::ops::Deref;
use std::sync::Arc;
use api::helper::ColumnDataTypeWrapper;
use api::v1::codec::InsertBatch;
use api::v1::column::{SemanticType, Values};
use api::v1::{AddColumn, AddColumns, Column, ColumnDataType, ColumnDef, CreateExpr};
use common_base::BitVec;
@@ -35,9 +33,8 @@ use table::requests::{AddColumnRequest, AlterKind, AlterTableRequest, InsertRequ
use table::Table;
use crate::error::{
ColumnDataTypeSnafu, ColumnNotFoundSnafu, CreateVectorSnafu, DecodeInsertSnafu,
DuplicatedTimestampColumnSnafu, IllegalInsertDataSnafu, InvalidColumnProtoSnafu,
MissingTimestampColumnSnafu, Result,
ColumnDataTypeSnafu, ColumnNotFoundSnafu, CreateVectorSnafu, DuplicatedTimestampColumnSnafu,
IllegalInsertDataSnafu, InvalidColumnProtoSnafu, MissingTimestampColumnSnafu, Result,
};
const TAG_SEMANTIC_TYPE: i32 = SemanticType::Tag as i32;
const TIMESTAMP_SEMANTIC_TYPE: i32 = SemanticType::Timestamp as i32;
@@ -52,35 +49,25 @@ fn build_column_def(column_name: &str, datatype: i32, nullable: bool) -> ColumnD
}
}
pub fn find_new_columns(
schema: &SchemaRef,
insert_batches: &[InsertBatch],
) -> Result<Option<AddColumns>> {
pub fn find_new_columns(schema: &SchemaRef, columns: &[Column]) -> Result<Option<AddColumns>> {
let mut columns_to_add = Vec::default();
let mut new_columns: HashSet<String> = HashSet::default();
for InsertBatch { columns, row_count } in insert_batches {
if *row_count == 0 || columns.is_empty() {
continue;
}
for Column {
column_name,
semantic_type,
datatype,
..
} in columns
for Column {
column_name,
semantic_type,
datatype,
..
} in columns
{
if schema.column_schema_by_name(column_name).is_none() && !new_columns.contains(column_name)
{
if schema.column_schema_by_name(column_name).is_none()
&& !new_columns.contains(column_name)
{
let column_def = Some(build_column_def(column_name, *datatype, true));
columns_to_add.push(AddColumn {
column_def,
is_key: *semantic_type == TAG_SEMANTIC_TYPE,
});
new_columns.insert(column_name.to_string());
}
let column_def = Some(build_column_def(column_name, *datatype, true));
columns_to_add.push(AddColumn {
column_def,
is_key: *semantic_type == TAG_SEMANTIC_TYPE,
});
new_columns.insert(column_name.to_string());
}
}
@@ -201,89 +188,84 @@ pub fn build_create_expr_from_insertion(
schema_name: &str,
table_id: Option<TableId>,
table_name: &str,
insert_batches: &[InsertBatch],
columns: &[Column],
) -> Result<CreateExpr> {
let mut new_columns: HashSet<String> = HashSet::default();
let mut column_defs = Vec::default();
let mut primary_key_indices = Vec::default();
let mut timestamp_index = usize::MAX;
for InsertBatch { columns, row_count } in insert_batches {
if *row_count == 0 || columns.is_empty() {
continue;
}
for Column {
column_name,
semantic_type,
datatype,
..
} in columns
{
if !new_columns.contains(column_name) {
let mut is_nullable = true;
match *semantic_type {
TAG_SEMANTIC_TYPE => primary_key_indices.push(column_defs.len()),
TIMESTAMP_SEMANTIC_TYPE => {
ensure!(
timestamp_index == usize::MAX,
DuplicatedTimestampColumnSnafu {
exists: &columns[timestamp_index].column_name,
duplicated: column_name,
}
);
timestamp_index = column_defs.len();
// Timestamp column must not be null.
is_nullable = false;
}
_ => {}
for Column {
column_name,
semantic_type,
datatype,
..
} in columns
{
if !new_columns.contains(column_name) {
let mut is_nullable = true;
match *semantic_type {
TAG_SEMANTIC_TYPE => primary_key_indices.push(column_defs.len()),
TIMESTAMP_SEMANTIC_TYPE => {
ensure!(
timestamp_index == usize::MAX,
DuplicatedTimestampColumnSnafu {
exists: &columns[timestamp_index].column_name,
duplicated: column_name,
}
);
timestamp_index = column_defs.len();
// Timestamp column must not be null.
is_nullable = false;
}
let column_def = build_column_def(column_name, *datatype, is_nullable);
column_defs.push(column_def);
new_columns.insert(column_name.to_string());
_ => {}
}
let column_def = build_column_def(column_name, *datatype, is_nullable);
column_defs.push(column_def);
new_columns.insert(column_name.to_string());
}
ensure!(timestamp_index != usize::MAX, MissingTimestampColumnSnafu);
let timestamp_field_name = columns[timestamp_index].column_name.clone();
let primary_keys = primary_key_indices
.iter()
.map(|idx| columns[*idx].column_name.clone())
.collect::<Vec<_>>();
let expr = CreateExpr {
catalog_name: Some(catalog_name.to_string()),
schema_name: Some(schema_name.to_string()),
table_name: table_name.to_string(),
desc: Some("Created on insertion".to_string()),
column_defs,
time_index: timestamp_field_name,
primary_keys,
create_if_not_exists: true,
table_options: Default::default(),
table_id,
region_ids: vec![0], // TODO:(hl): region id should be allocated by frontend
};
return Ok(expr);
}
IllegalInsertDataSnafu.fail()
ensure!(
timestamp_index != usize::MAX,
MissingTimestampColumnSnafu { msg: table_name }
);
let timestamp_field_name = columns[timestamp_index].column_name.clone();
let primary_keys = primary_key_indices
.iter()
.map(|idx| columns[*idx].column_name.clone())
.collect::<Vec<_>>();
let expr = CreateExpr {
catalog_name: Some(catalog_name.to_string()),
schema_name: Some(schema_name.to_string()),
table_name: table_name.to_string(),
desc: Some("Created on insertion".to_string()),
column_defs,
time_index: timestamp_field_name,
primary_keys,
create_if_not_exists: true,
table_options: Default::default(),
table_id,
region_ids: vec![0], // TODO:(hl): region id should be allocated by frontend
};
Ok(expr)
}
pub fn insertion_expr_to_request(
catalog_name: &str,
schema_name: &str,
table_name: &str,
insert_batches: Vec<InsertBatch>,
insert_batches: Vec<(Vec<Column>, u32)>,
table: Arc<dyn Table>,
) -> Result<InsertRequest> {
let schema = table.schema();
let mut columns_builders = HashMap::with_capacity(schema.column_schemas().len());
for InsertBatch { columns, row_count } in insert_batches {
for (columns, row_count) in insert_batches {
for Column {
column_name,
values,
@@ -329,14 +311,6 @@ pub fn insertion_expr_to_request(
})
}
#[inline]
pub fn insert_batches(bytes_vec: &[Vec<u8>]) -> Result<Vec<InsertBatch>> {
bytes_vec
.iter()
.map(|bytes| bytes.deref().try_into().context(DecodeInsertSnafu))
.collect()
}
fn add_values_to_builder(
builder: &mut VectorBuilder,
values: Values,
@@ -463,9 +437,8 @@ mod tests {
use std::sync::Arc;
use api::helper::ColumnDataTypeWrapper;
use api::v1::codec::InsertBatch;
use api::v1::column::{self, SemanticType, Values};
use api::v1::{insert_expr, Column, ColumnDataType};
use api::v1::{Column, ColumnDataType};
use common_base::BitVec;
use common_query::physical_plan::PhysicalPlanRef;
use common_query::prelude::Expr;
@@ -479,11 +452,12 @@ mod tests {
use table::Table;
use super::{
build_create_expr_from_insertion, convert_values, find_new_columns, insert_batches,
insertion_expr_to_request, is_null, TAG_SEMANTIC_TYPE, TIMESTAMP_SEMANTIC_TYPE,
build_create_expr_from_insertion, convert_values, insertion_expr_to_request, is_null,
TAG_SEMANTIC_TYPE, TIMESTAMP_SEMANTIC_TYPE,
};
use crate::error;
use crate::error::ColumnDataTypeSnafu;
use crate::insert::find_new_columns;
#[inline]
fn build_column_schema(
@@ -508,11 +482,10 @@ mod tests {
assert!(build_create_expr_from_insertion("", "", table_id, table_name, &[]).is_err());
let mock_batch_bytes = mock_insert_batches();
let insert_batches = insert_batches(&mock_batch_bytes).unwrap();
let insert_batch = mock_insert_batch();
let create_expr =
build_create_expr_from_insertion("", "", table_id, table_name, &insert_batches)
build_create_expr_from_insertion("", "", table_id, table_name, &insert_batch.0)
.unwrap();
assert_eq!(table_id, create_expr.table_id);
@@ -598,9 +571,9 @@ mod tests {
assert!(find_new_columns(&schema, &[]).unwrap().is_none());
let mock_insert_bytes = mock_insert_batches();
let insert_batches = insert_batches(&mock_insert_bytes).unwrap();
let add_columns = find_new_columns(&schema, &insert_batches).unwrap().unwrap();
let insert_batch = mock_insert_batch();
let add_columns = find_new_columns(&schema, &insert_batch.0).unwrap().unwrap();
assert_eq!(2, add_columns.add_columns.len());
let host_column = &add_columns.add_columns[0];
@@ -630,10 +603,7 @@ mod tests {
fn test_insertion_expr_to_request() {
let table: Arc<dyn Table> = Arc::new(DemoTable {});
let values = insert_expr::Values {
values: mock_insert_batches(),
};
let insert_batches = insert_batches(&values.values).unwrap();
let insert_batches = vec![mock_insert_batch()];
let insert_req =
insertion_expr_to_request("greptime", "public", "demo", insert_batches, table).unwrap();
@@ -731,7 +701,7 @@ mod tests {
}
}
fn mock_insert_batches() -> Vec<Vec<u8>> {
fn mock_insert_batch() -> (Vec<Column>, u32) {
let row_count = 2;
let host_vals = column::Values {
@@ -782,10 +752,9 @@ mod tests {
datatype: ColumnDataType::Timestamp as i32,
};
let insert_batch = InsertBatch {
columns: vec![host_column, cpu_column, mem_column, ts_column],
(
vec![host_column, cpu_column, mem_column, ts_column],
row_count,
};
vec![insert_batch.into()]
)
}
}

View File

@@ -1,3 +1,4 @@
#![feature(assert_matches)]
// Copyright 2022 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
@@ -12,9 +13,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.
mod alter;
pub mod error;
mod insert;
pub use alter::{alter_expr_to_request, create_expr_to_request, create_table_schema};
pub use insert::{
build_alter_table_request, build_create_expr_from_insertion, column_to_vector,
find_new_columns, insert_batches, insertion_expr_to_request,
find_new_columns, insertion_expr_to_request,
};

View File

@@ -12,30 +12,16 @@ common-error = { path = "../error" }
common-query = { path = "../query" }
common-recordbatch = { path = "../recordbatch" }
common-runtime = { path = "../runtime" }
datatypes = { path = "../../datatypes" }
dashmap = "5.4"
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
"simd",
] }
datatypes = { path = "../../datatypes" }
snafu = { version = "0.7", features = ["backtraces"] }
tokio = { version = "1.0", features = ["full"] }
tonic = "0.8"
tower = "0.4"
[dependencies.arrow]
package = "arrow2"
version = "0.10"
features = [
"io_csv",
"io_json",
"io_parquet",
"io_parquet_compression",
"io_ipc",
"ahash",
"compute",
"serde_types",
]
[dev-dependencies]
criterion = "0.4"
rand = "0.8"

View File

@@ -14,9 +14,7 @@
use std::any::Any;
use api::DecodeError;
use common_error::prelude::{ErrorExt, StatusCode};
use datafusion::error::DataFusionError;
use snafu::{Backtrace, ErrorCompat, Snafu};
pub type Result<T> = std::result::Result<T, Error>;
@@ -24,33 +22,9 @@ pub type Result<T> = std::result::Result<T, Error>;
#[derive(Debug, Snafu)]
#[snafu(visibility(pub))]
pub enum Error {
#[snafu(display("Unexpected empty physical plan type: {}", name))]
EmptyPhysicalPlan { name: String, backtrace: Backtrace },
#[snafu(display("Unexpected empty physical expr: {}", name))]
EmptyPhysicalExpr { name: String, backtrace: Backtrace },
#[snafu(display("Unsupported datafusion execution plan: {}", name))]
UnsupportedDfPlan { name: String, backtrace: Backtrace },
#[snafu(display("Unsupported datafusion physical expr: {}", name))]
UnsupportedDfExpr { name: String, backtrace: Backtrace },
#[snafu(display("Missing required field in protobuf, field: {}", field))]
MissingField { field: String, backtrace: Backtrace },
#[snafu(display("Failed to new datafusion projection exec, source: {}", source))]
NewProjection {
source: DataFusionError,
backtrace: Backtrace,
},
#[snafu(display("Failed to decode physical plan node, source: {}", source))]
DecodePhysicalPlanNode {
source: DecodeError,
backtrace: Backtrace,
},
#[snafu(display(
"Write type mismatch, column name: {}, expected: {}, actual: {}",
column_name,
@@ -89,17 +63,8 @@ pub enum Error {
impl ErrorExt for Error {
fn status_code(&self) -> StatusCode {
match self {
Error::EmptyPhysicalPlan { .. }
| Error::EmptyPhysicalExpr { .. }
| Error::MissingField { .. }
| Error::TypeMismatch { .. } => StatusCode::InvalidArguments,
Error::UnsupportedDfPlan { .. } | Error::UnsupportedDfExpr { .. } => {
StatusCode::Unsupported
}
Error::NewProjection { .. }
| Error::DecodePhysicalPlanNode { .. }
| Error::CreateChannel { .. }
| Error::Conversion { .. } => StatusCode::Internal,
Error::MissingField { .. } | Error::TypeMismatch { .. } => StatusCode::InvalidArguments,
Error::CreateChannel { .. } | Error::Conversion { .. } => StatusCode::Internal,
Error::CollectRecordBatches { source } => source.status_code(),
Error::ColumnDataType { source } => source.status_code(),
}
@@ -126,50 +91,6 @@ mod tests {
None
}
#[test]
fn test_empty_physical_plan_error() {
let e = throw_none_option()
.context(EmptyPhysicalPlanSnafu { name: "test" })
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
}
#[test]
fn test_empty_physical_expr_error() {
let e = throw_none_option()
.context(EmptyPhysicalExprSnafu { name: "test" })
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
}
#[test]
fn test_unsupported_df_plan_error() {
let e = throw_none_option()
.context(UnsupportedDfPlanSnafu { name: "test" })
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::Unsupported);
}
#[test]
fn test_unsupported_df_expr_error() {
let e = throw_none_option()
.context(UnsupportedDfExprSnafu { name: "test" })
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::Unsupported);
}
#[test]
fn test_missing_field_error() {
let e = throw_none_option()
@@ -181,33 +102,6 @@ mod tests {
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
}
#[test]
fn test_new_projection_error() {
fn throw_df_error() -> StdResult<DataFusionError> {
Err(DataFusionError::NotImplemented("".to_string()))
}
let e = throw_df_error().context(NewProjectionSnafu).err().unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::Internal);
}
#[test]
fn test_decode_physical_plan_node_error() {
fn throw_decode_error() -> StdResult<DecodeError> {
Err(DecodeError::new("test"))
}
let e = throw_decode_error()
.context(DecodePhysicalPlanNodeSnafu)
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::Internal);
}
#[test]
fn test_type_mismatch_error() {
let e = throw_none_option()

View File

@@ -14,10 +14,7 @@
pub mod channel_manager;
pub mod error;
pub mod physical;
pub mod select;
pub mod writer;
pub use error::Error;
pub use physical::plan::{DefaultAsPlanImpl, MockExecution};
pub use physical::AsExcutionPlan;

View File

@@ -1,100 +0,0 @@
// Copyright 2022 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::result::Result;
use std::sync::Arc;
use api::v1::codec;
use datafusion::physical_plan::expressions::Column as DfColumn;
use datafusion::physical_plan::PhysicalExpr as DfPhysicalExpr;
use snafu::OptionExt;
use crate::error::{EmptyPhysicalExprSnafu, Error, UnsupportedDfExprSnafu};
// grpc -> datafusion (physical expr)
pub(crate) fn parse_grpc_physical_expr(
proto: &codec::PhysicalExprNode,
) -> Result<Arc<dyn DfPhysicalExpr>, Error> {
let expr_type = proto.expr_type.as_ref().context(EmptyPhysicalExprSnafu {
name: format!("{:?}", proto),
})?;
// TODO(fys): impl other physical expr
let pexpr: Arc<dyn DfPhysicalExpr> = match expr_type {
codec::physical_expr_node::ExprType::Column(c) => {
let pcol = DfColumn::new(&c.name, c.index as usize);
Arc::new(pcol)
}
};
Ok(pexpr)
}
// datafusion -> grpc (physical expr)
pub(crate) fn parse_df_physical_expr(
df_expr: Arc<dyn DfPhysicalExpr>,
) -> Result<codec::PhysicalExprNode, Error> {
let expr = df_expr.as_any();
// TODO(fys): impl other physical expr
if let Some(expr) = expr.downcast_ref::<DfColumn>() {
Ok(codec::PhysicalExprNode {
expr_type: Some(codec::physical_expr_node::ExprType::Column(
codec::PhysicalColumn {
name: expr.name().to_string(),
index: expr.index() as u64,
},
)),
})
} else {
UnsupportedDfExprSnafu {
name: df_expr.to_string(),
}
.fail()?
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use api::v1::codec::physical_expr_node::ExprType::Column;
use api::v1::codec::{PhysicalColumn, PhysicalExprNode};
use datafusion::physical_plan::expressions::Column as DfColumn;
use datafusion::physical_plan::PhysicalExpr;
use crate::physical::expr::{parse_df_physical_expr, parse_grpc_physical_expr};
#[test]
fn test_column_convert() {
// mock df_column_expr
let df_column = DfColumn::new("name", 11);
let df_column_clone = df_column.clone();
let df_expr = Arc::new(df_column) as Arc<dyn PhysicalExpr>;
// mock grpc_column_expr
let grpc_expr = PhysicalExprNode {
expr_type: Some(Column(PhysicalColumn {
name: "name".to_owned(),
index: 11,
})),
};
let result = parse_df_physical_expr(df_expr).unwrap();
assert_eq!(grpc_expr, result);
let result = parse_grpc_physical_expr(&grpc_expr).unwrap();
let df_column = result.as_any().downcast_ref::<DfColumn>().unwrap();
assert_eq!(df_column_clone, df_column.to_owned());
}
}

View File

@@ -1,280 +0,0 @@
// Copyright 2022 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::ops::Deref;
use std::result::Result;
use std::sync::Arc;
use api::v1::codec::physical_plan_node::PhysicalPlanType;
use api::v1::codec::{MockInputExecNode, PhysicalPlanNode, ProjectionExecNode};
use arrow::array::{PrimitiveArray, Utf8Array};
use arrow::datatypes::{DataType, Field, Schema};
use async_trait::async_trait;
use datafusion::execution::runtime_env::RuntimeEnv;
use datafusion::field_util::SchemaExt;
use datafusion::physical_plan::memory::MemoryStream;
use datafusion::physical_plan::projection::ProjectionExec;
use datafusion::physical_plan::{
ExecutionPlan, PhysicalExpr, SendableRecordBatchStream, Statistics,
};
use datafusion::record_batch::RecordBatch;
use snafu::{OptionExt, ResultExt};
use crate::error::{
DecodePhysicalPlanNodeSnafu, EmptyPhysicalPlanSnafu, Error, MissingFieldSnafu,
NewProjectionSnafu, UnsupportedDfPlanSnafu,
};
use crate::physical::{expr, AsExcutionPlan, ExecutionPlanRef};
pub struct DefaultAsPlanImpl {
pub bytes: Vec<u8>,
}
impl AsExcutionPlan for DefaultAsPlanImpl {
type Error = Error;
// Vec<u8> -> PhysicalPlanNode -> ExecutionPlanRef
fn try_into_physical_plan(&self) -> Result<ExecutionPlanRef, Self::Error> {
let physicalplan_node: PhysicalPlanNode = self
.bytes
.deref()
.try_into()
.context(DecodePhysicalPlanNodeSnafu)?;
physicalplan_node.try_into_physical_plan()
}
// ExecutionPlanRef -> PhysicalPlanNode -> Vec<u8>
fn try_from_physical_plan(plan: ExecutionPlanRef) -> Result<Self, Self::Error>
where
Self: Sized,
{
let bytes: Vec<u8> = PhysicalPlanNode::try_from_physical_plan(plan)?.into();
Ok(DefaultAsPlanImpl { bytes })
}
}
impl AsExcutionPlan for PhysicalPlanNode {
type Error = Error;
fn try_into_physical_plan(&self) -> Result<ExecutionPlanRef, Self::Error> {
let plan = self
.physical_plan_type
.as_ref()
.context(EmptyPhysicalPlanSnafu {
name: format!("{:?}", self),
})?;
// TODO(fys): impl other physical plan type
match plan {
PhysicalPlanType::Projection(projection) => {
let input = if let Some(input) = &projection.input {
input.as_ref().try_into_physical_plan()?
} else {
MissingFieldSnafu { field: "input" }.fail()?
};
let exprs = projection
.expr
.iter()
.zip(projection.expr_name.iter())
.map(|(expr, name)| {
Ok((expr::parse_grpc_physical_expr(expr)?, name.to_string()))
})
.collect::<Result<Vec<(Arc<dyn PhysicalExpr>, String)>, Error>>()?;
let projection =
ProjectionExec::try_new(exprs, input).context(NewProjectionSnafu)?;
Ok(Arc::new(projection))
}
PhysicalPlanType::Mock(mock) => Ok(Arc::new(MockExecution {
name: mock.name.to_string(),
})),
}
}
fn try_from_physical_plan(plan: ExecutionPlanRef) -> Result<Self, Self::Error>
where
Self: Sized,
{
let plan = plan.as_any();
if let Some(exec) = plan.downcast_ref::<ProjectionExec>() {
let input = PhysicalPlanNode::try_from_physical_plan(exec.input().to_owned())?;
let expr = exec
.expr()
.iter()
.map(|expr| expr::parse_df_physical_expr(expr.0.clone()))
.collect::<Result<Vec<_>, Error>>()?;
let expr_name = exec.expr().iter().map(|expr| expr.1.clone()).collect();
Ok(PhysicalPlanNode {
physical_plan_type: Some(PhysicalPlanType::Projection(Box::new(
ProjectionExecNode {
input: Some(Box::new(input)),
expr,
expr_name,
},
))),
})
} else if let Some(exec) = plan.downcast_ref::<MockExecution>() {
Ok(PhysicalPlanNode {
physical_plan_type: Some(PhysicalPlanType::Mock(MockInputExecNode {
name: exec.name.clone(),
})),
})
} else {
UnsupportedDfPlanSnafu {
name: format!("{:?}", plan),
}
.fail()?
}
}
}
// TODO(fys): use "test" feature to enable it
#[derive(Debug)]
pub struct MockExecution {
name: String,
}
impl MockExecution {
pub fn new(name: String) -> Self {
Self { name }
}
}
#[async_trait]
impl ExecutionPlan for MockExecution {
fn as_any(&self) -> &dyn std::any::Any {
self
}
fn schema(&self) -> arrow::datatypes::SchemaRef {
let field1 = Field::new("id", DataType::UInt32, false);
let field2 = Field::new("name", DataType::Utf8, false);
let field3 = Field::new("age", DataType::UInt32, false);
Arc::new(arrow::datatypes::Schema::new(vec![field1, field2, field3]))
}
fn output_partitioning(&self) -> datafusion::physical_plan::Partitioning {
unimplemented!()
}
fn output_ordering(
&self,
) -> Option<&[datafusion::physical_plan::expressions::PhysicalSortExpr]> {
unimplemented!()
}
fn children(&self) -> Vec<ExecutionPlanRef> {
unimplemented!()
}
fn with_new_children(
&self,
_children: Vec<ExecutionPlanRef>,
) -> datafusion::error::Result<ExecutionPlanRef> {
unimplemented!()
}
async fn execute(
&self,
_partition: usize,
_runtime: Arc<RuntimeEnv>,
) -> datafusion::error::Result<SendableRecordBatchStream> {
let id_array = Arc::new(PrimitiveArray::from_slice([1u32, 2, 3, 4, 5]));
let name_array = Arc::new(Utf8Array::<i32>::from_slice([
"zhangsan", "lisi", "wangwu", "Tony", "Mike",
]));
let age_array = Arc::new(PrimitiveArray::from_slice([25u32, 28, 27, 35, 25]));
let schema = Arc::new(Schema::new(vec![
Field::new("id", DataType::UInt32, false),
Field::new("name", DataType::Utf8, false),
Field::new("age", DataType::UInt32, false),
]));
let record_batch =
RecordBatch::try_new(schema, vec![id_array, name_array, age_array]).unwrap();
let data: Vec<RecordBatch> = vec![record_batch];
let projection = Some(vec![0, 1, 2]);
let stream = MemoryStream::try_new(data, self.schema(), projection).unwrap();
Ok(Box::pin(stream))
}
fn statistics(&self) -> Statistics {
todo!()
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use api::v1::codec::PhysicalPlanNode;
use datafusion::physical_plan::expressions::Column;
use datafusion::physical_plan::projection::ProjectionExec;
use crate::physical::plan::{DefaultAsPlanImpl, MockExecution};
use crate::physical::{AsExcutionPlan, ExecutionPlanRef};
#[test]
fn test_convert_df_projection_with_bytes() {
let projection_exec = mock_df_projection();
let bytes = DefaultAsPlanImpl::try_from_physical_plan(projection_exec).unwrap();
let exec = bytes.try_into_physical_plan().unwrap();
verify_df_porjection(exec);
}
#[test]
fn test_convert_df_with_grpc_projection() {
let projection_exec = mock_df_projection();
let projection_node = PhysicalPlanNode::try_from_physical_plan(projection_exec).unwrap();
let exec = projection_node.try_into_physical_plan().unwrap();
verify_df_porjection(exec);
}
fn mock_df_projection() -> Arc<ProjectionExec> {
let mock_input = Arc::new(MockExecution {
name: "mock_input".to_string(),
});
let column1 = Arc::new(Column::new("id", 0));
let column2 = Arc::new(Column::new("name", 1));
Arc::new(
ProjectionExec::try_new(
vec![(column1, "id".to_string()), (column2, "name".to_string())],
mock_input,
)
.unwrap(),
)
}
fn verify_df_porjection(exec: ExecutionPlanRef) {
let projection_exec = exec.as_any().downcast_ref::<ProjectionExec>().unwrap();
let mock_input = projection_exec
.input()
.as_any()
.downcast_ref::<MockExecution>()
.unwrap();
assert_eq!("mock_input", mock_input.name);
assert_eq!(2, projection_exec.expr().len());
assert_eq!("id", projection_exec.expr()[0].1);
assert_eq!("name", projection_exec.expr()[1].1);
}
}

View File

@@ -19,12 +19,12 @@ use api::result::{build_err_result, ObjectResultBuilder};
use api::v1::codec::SelectResult;
use api::v1::column::{SemanticType, Values};
use api::v1::{Column, ObjectResult};
use arrow::array::{Array, BooleanArray, PrimitiveArray};
use common_base::BitVec;
use common_error::prelude::ErrorExt;
use common_error::status_code::StatusCode;
use common_query::Output;
use common_recordbatch::{util, RecordBatches, SendableRecordBatchStream};
use common_recordbatch::{RecordBatches, SendableRecordBatchStream};
use datatypes::arrow::array::{Array, BooleanArray, PrimitiveArray};
use datatypes::arrow_array::{BinaryArray, StringArray};
use datatypes::schema::SchemaRef;
use snafu::{OptionExt, ResultExt};
@@ -47,13 +47,9 @@ pub async fn to_object_result(output: std::result::Result<Output, impl ErrorExt>
}
}
async fn collect(stream: SendableRecordBatchStream) -> Result<ObjectResult> {
let schema = stream.schema();
let recordbatches = util::collect(stream)
let recordbatches = RecordBatches::try_collect(stream)
.await
.and_then(|batches| RecordBatches::try_new(schema, batches))
.context(error::CollectRecordBatchesSnafu)?;
let object_result = build_result(recordbatches)?;
Ok(object_result)
}
@@ -136,7 +132,8 @@ pub fn null_mask(arrays: &Vec<Arc<dyn Array>>, row_count: usize) -> Vec<u8> {
}
macro_rules! convert_arrow_array_to_grpc_vals {
($data_type: expr, $arrays: ident, $(($Type: pat, $CastType: ty, $field: ident, $MapFunction: expr)), +) => {
($data_type: expr, $arrays: ident, $(($Type: pat, $CastType: ty, $field: ident, $MapFunction: expr)), +) => {{
use datatypes::arrow::datatypes::{DataType, TimeUnit};
match $data_type {
$(
$Type => {
@@ -155,7 +152,7 @@ macro_rules! convert_arrow_array_to_grpc_vals {
)+
_ => unimplemented!(),
}
};
}};
}
pub fn values(arrays: &[Arc<dyn Array>]) -> Result<Values> {
@@ -164,7 +161,6 @@ pub fn values(arrays: &[Arc<dyn Array>]) -> Result<Values> {
}
let data_type = arrays[0].data_type();
use arrow::datatypes::DataType;
convert_arrow_array_to_grpc_vals!(
data_type, arrays,
@@ -192,7 +188,7 @@ pub fn values(arrays: &[Arc<dyn Array>]) -> Result<Values> {
(DataType::Date32, PrimitiveArray<i32>, date_values, |x| {*x as i32}),
(DataType::Date64, PrimitiveArray<i64>, datetime_values,|x| {*x as i64}),
(DataType::Timestamp(arrow::datatypes::TimeUnit::Millisecond, _), PrimitiveArray<i64>, ts_millis_values, |x| {*x})
(DataType::Timestamp(TimeUnit::Millisecond, _), PrimitiveArray<i64>, ts_millis_values, |x| {*x})
)
}
@@ -200,11 +196,10 @@ pub fn values(arrays: &[Arc<dyn Array>]) -> Result<Values> {
mod tests {
use std::sync::Arc;
use arrow::array::{Array, BooleanArray, PrimitiveArray};
use arrow::datatypes::{DataType, Field};
use common_recordbatch::{RecordBatch, RecordBatches};
use datafusion::field_util::SchemaExt;
use datatypes::arrow::datatypes::Schema as ArrowSchema;
use datatypes::arrow::array::{Array, BooleanArray, PrimitiveArray};
use datatypes::arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
use datatypes::arrow_array::StringArray;
use datatypes::schema::Schema;
use datatypes::vectors::{UInt32Vector, VectorRef};

View File

@@ -14,7 +14,6 @@
use std::collections::HashMap;
use api::v1::codec::InsertBatch;
use api::v1::column::{SemanticType, Values};
use api::v1::{Column, ColumnDataType};
use common_base::BitVec;
@@ -24,12 +23,14 @@ use crate::error::{Result, TypeMismatchSnafu};
type ColumnName = String;
type RowCount = u32;
// TODO(fys): will remove in the future.
#[derive(Default)]
pub struct LinesWriter {
column_name_index: HashMap<ColumnName, usize>,
null_masks: Vec<BitVec>,
batch: InsertBatch,
batch: (Vec<Column>, RowCount),
lines: usize,
}
@@ -171,20 +172,20 @@ impl LinesWriter {
pub fn commit(&mut self) {
let batch = &mut self.batch;
batch.row_count += 1;
batch.1 += 1;
for i in 0..batch.columns.len() {
for i in 0..batch.0.len() {
let null_mask = &mut self.null_masks[i];
if batch.row_count as usize > null_mask.len() {
if batch.1 as usize > null_mask.len() {
null_mask.push(true);
}
}
}
pub fn finish(mut self) -> InsertBatch {
pub fn finish(mut self) -> (Vec<Column>, RowCount) {
let null_masks = self.null_masks;
for (i, null_mask) in null_masks.into_iter().enumerate() {
let columns = &mut self.batch.columns;
let columns = &mut self.batch.0;
columns[i].null_mask = null_mask.into_vec();
}
self.batch
@@ -204,9 +205,9 @@ impl LinesWriter {
let batch = &mut self.batch;
let to_insert = self.lines;
let mut null_mask = BitVec::with_capacity(to_insert);
null_mask.extend(BitVec::repeat(true, batch.row_count as usize));
null_mask.extend(BitVec::repeat(true, batch.1 as usize));
self.null_masks.push(null_mask);
batch.columns.push(Column {
batch.0.push(Column {
column_name: column_name.to_string(),
semantic_type: semantic_type.into(),
values: Some(Values::with_capacity(datatype, to_insert)),
@@ -217,7 +218,7 @@ impl LinesWriter {
new_idx
}
};
(column_idx, &mut self.batch.columns[column_idx])
(column_idx, &mut self.batch.0[column_idx])
}
}
@@ -282,9 +283,9 @@ mod tests {
writer.commit();
let insert_batch = writer.finish();
assert_eq!(3, insert_batch.row_count);
assert_eq!(3, insert_batch.1);
let columns = insert_batch.columns;
let columns = insert_batch.0;
assert_eq!(9, columns.len());
let column = &columns[0];

View File

@@ -18,10 +18,6 @@ datatypes = { path = "../../datatypes" }
snafu = { version = "0.7", features = ["backtraces"] }
statrs = "0.15"
[dependencies.arrow]
package = "arrow2"
version = "0.10"
[dev-dependencies]
common-base = { path = "../base" }
tokio = { version = "1.0", features = ["full"] }

View File

@@ -80,7 +80,7 @@ impl From<ColumnarValue> for DfColumnarValue {
mod tests {
use std::sync::Arc;
use arrow::datatypes::DataType as ArrowDataType;
use datatypes::arrow::datatypes::DataType as ArrowDataType;
use datatypes::vectors::BooleanVector;
use super::*;

View File

@@ -14,9 +14,11 @@
use std::any::Any;
use arrow::datatypes::DataType as ArrowDatatype;
use arrow::error::ArrowError;
use common_error::prelude::*;
use datafusion_common::DataFusionError;
use datatypes::arrow;
use datatypes::arrow::datatypes::DataType as ArrowDatatype;
use datatypes::error::Error as DataTypeError;
use datatypes::prelude::ConcreteDataType;
use statrs::StatsError;
@@ -26,6 +28,11 @@ common_error::define_opaque_error!(Error);
#[derive(Debug, Snafu)]
#[snafu(visibility(pub))]
pub enum InnerError {
#[snafu(display("Fail to cast array to {:?}, source: {}", typ, source))]
TypeCast {
source: ArrowError,
typ: arrow::datatypes::DataType,
},
#[snafu(display("Fail to execute function, source: {}", source))]
ExecuteFunction {
source: DataFusionError,
@@ -51,6 +58,12 @@ pub enum InnerError {
source: DataTypeError,
},
#[snafu(display("Fail to cast arrow array into vector: {}", source))]
FromArrowArray {
#[snafu(backtrace)]
source: DataTypeError,
},
#[snafu(display("Fail to cast arrow array into vector: {:?}, {}", data_type, source))]
IntoVector {
#[snafu(backtrace)]
@@ -138,13 +151,16 @@ impl ErrorExt for InnerError {
InnerError::InvalidInputs { source, .. }
| InnerError::IntoVector { source, .. }
| InnerError::FromScalarValue { source }
| InnerError::ConvertArrowSchema { source } => source.status_code(),
| InnerError::ConvertArrowSchema { source }
| InnerError::FromArrowArray { source } => source.status_code(),
InnerError::ExecuteRepeatedly { .. }
| InnerError::GeneralDataFusion { .. }
| InnerError::DataFusionExecutionPlan { .. } => StatusCode::Unexpected,
InnerError::UnsupportedInputDataType { .. } => StatusCode::InvalidArguments,
InnerError::UnsupportedInputDataType { .. } | InnerError::TypeCast { .. } => {
StatusCode::InvalidArguments
}
InnerError::ConvertDfRecordBatchStream { source, .. } => source.status_code(),
InnerError::ExecutePhysicalPlan { source } => source.status_code(),
@@ -180,7 +196,7 @@ impl From<BoxedError> for Error {
#[cfg(test)]
mod tests {
use arrow::error::ArrowError;
use datatypes::arrow::error::ArrowError;
use snafu::GenerateImplicitData;
use super::*;

View File

@@ -14,8 +14,8 @@
use std::sync::Arc;
use arrow::datatypes::DataType as ArrowDataType;
use datafusion_expr::ReturnTypeFunction as DfReturnTypeFunction;
use datatypes::arrow::datatypes::DataType as ArrowDataType;
use datatypes::prelude::{ConcreteDataType, DataType};
use datatypes::vectors::VectorRef;
use snafu::ResultExt;

View File

@@ -17,10 +17,10 @@
use std::fmt::Debug;
use std::sync::Arc;
use arrow::array::ArrayRef;
use common_time::timestamp::TimeUnit;
use datafusion_common::Result as DfResult;
use datafusion_expr::Accumulator as DfAccumulator;
use datatypes::arrow::array::ArrayRef;
use datatypes::prelude::*;
use datatypes::value::ListValue;
use datatypes::vectors::{Helper as VectorHelper, VectorRef};
@@ -266,9 +266,9 @@ fn try_convert_list_value(list: ListValue) -> Result<ScalarValue> {
#[cfg(test)]
mod tests {
use arrow::datatypes::DataType;
use common_base::bytes::{Bytes, StringBytes};
use datafusion_common::ScalarValue;
use datatypes::arrow::datatypes::DataType;
use datatypes::value::{ListValue, OrderedFloat};
use super::*;

View File

@@ -72,12 +72,12 @@ pub fn create_aggregate_function(
mod tests {
use std::sync::Arc;
use arrow::array::BooleanArray;
use arrow::datatypes::DataType;
use datafusion_expr::{
ColumnarValue as DfColumnarValue, ScalarUDF as DfScalarUDF,
TypeSignature as DfTypeSignature,
};
use datatypes::arrow::array::BooleanArray;
use datatypes::arrow::datatypes::DataType;
use datatypes::prelude::*;
use datatypes::vectors::{BooleanVector, VectorRef};

View File

@@ -19,11 +19,11 @@
use std::fmt::{self, Debug, Formatter};
use std::sync::Arc;
use arrow::datatypes::DataType as ArrowDataType;
use datafusion_expr::{
AccumulatorFunctionImplementation as DfAccumulatorFunctionImplementation,
AggregateUDF as DfAggregateUdf, StateTypeFunction as DfStateTypeFunction,
};
use datatypes::arrow::datatypes::DataType as ArrowDataType;
use datatypes::prelude::*;
use crate::function::{

View File

@@ -13,7 +13,7 @@
// limitations under the License.
//! Udf module contains foundational types that are used to represent UDFs.
//! It's modifed from datafusion.
//! It's modified from datafusion.
use std::fmt;
use std::fmt::{Debug, Formatter};
use std::sync::Arc;

View File

@@ -17,7 +17,7 @@ use std::fmt::Debug;
use std::sync::Arc;
use async_trait::async_trait;
use common_recordbatch::adapter::{DfRecordBatchStreamAdapter, RecordBatchStreamAdapter};
use common_recordbatch::adapter::{AsyncRecordBatchStreamAdapter, DfRecordBatchStreamAdapter};
use common_recordbatch::{DfSendableRecordBatchStream, SendableRecordBatchStream};
use datafusion::arrow::datatypes::SchemaRef as DfSchemaRef;
use datafusion::error::Result as DfResult;
@@ -39,7 +39,6 @@ pub type PhysicalPlanRef = Arc<dyn PhysicalPlan>;
/// creating the actual `async` [`SendableRecordBatchStream`]s
/// of [`RecordBatch`] that incrementally compute the operator's
/// output from its input partition.
#[async_trait]
pub trait PhysicalPlan: Debug + Send + Sync {
/// Returns the physical plan as [`Any`](std::any::Any) so that it can be
/// downcast to a specific implementation.
@@ -61,7 +60,7 @@ pub trait PhysicalPlan: Debug + Send + Sync {
fn with_new_children(&self, children: Vec<PhysicalPlanRef>) -> Result<PhysicalPlanRef>;
/// Creates an RecordBatch stream.
async fn execute(
fn execute(
&self,
partition: usize,
runtime: Arc<RuntimeEnv>,
@@ -84,7 +83,6 @@ impl PhysicalPlanAdapter {
}
}
#[async_trait]
impl PhysicalPlan for PhysicalPlanAdapter {
fn as_any(&self) -> &dyn Any {
self
@@ -118,18 +116,15 @@ impl PhysicalPlan for PhysicalPlanAdapter {
Ok(Arc::new(PhysicalPlanAdapter::new(self.schema(), plan)))
}
async fn execute(
fn execute(
&self,
partition: usize,
runtime: Arc<RuntimeEnv>,
) -> Result<SendableRecordBatchStream> {
let stream = self
.df_plan
.execute(partition, runtime)
.await
.context(error::DataFusionExecutionPlanSnafu)?;
let stream = RecordBatchStreamAdapter::try_new(stream)
.context(error::ConvertDfRecordBatchStreamSnafu)?;
let df_plan = self.df_plan.clone();
let stream = Box::pin(async move { df_plan.execute(partition, runtime).await });
let stream = AsyncRecordBatchStreamAdapter::new(self.schema(), stream);
Ok(Box::pin(stream))
}
}
@@ -187,7 +182,7 @@ impl DfPhysicalPlan for DfPhysicalPlanAdapter {
partition: usize,
runtime: Arc<RuntimeEnv>,
) -> DfResult<DfSendableRecordBatchStream> {
let stream = self.0.execute(partition, runtime).await?;
let stream = self.0.execute(partition, runtime)?;
Ok(Box::pin(DfRecordBatchStreamAdapter::new(stream)))
}
@@ -199,7 +194,6 @@ impl DfPhysicalPlan for DfPhysicalPlanAdapter {
#[cfg(test)]
mod test {
use arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
use common_recordbatch::{RecordBatch, RecordBatches};
use datafusion::arrow_print;
use datafusion::datasource::TableProvider as DfTableProvider;
@@ -209,6 +203,7 @@ mod test {
use datafusion::prelude::ExecutionContext;
use datafusion_common::field_util::SchemaExt;
use datafusion_expr::Expr;
use datatypes::arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
use datatypes::schema::Schema;
use datatypes::vectors::Int32Vector;
@@ -250,7 +245,6 @@ mod test {
schema: SchemaRef,
}
#[async_trait]
impl PhysicalPlan for MyExecutionPlan {
fn as_any(&self) -> &dyn Any {
self
@@ -272,7 +266,7 @@ mod test {
unimplemented!()
}
async fn execute(
fn execute(
&self,
_partition: usize,
_runtime: Arc<RuntimeEnv>,

View File

@@ -15,9 +15,9 @@
//! Signature module contains foundational types that are used to represent signatures, types,
//! and return types of functions.
//! Copied and modified from datafusion.
use arrow::datatypes::DataType as ArrowDataType;
pub use datafusion::physical_plan::functions::Volatility;
use datafusion_expr::{Signature as DfSignature, TypeSignature as DfTypeSignature};
use datatypes::arrow::datatypes::DataType as ArrowDataType;
use datatypes::data_type::DataType;
use datatypes::prelude::ConcreteDataType;
@@ -53,7 +53,7 @@ pub struct Signature {
}
#[inline]
fn concret_types_to_arrow_types(ts: Vec<ConcreteDataType>) -> Vec<ArrowDataType> {
fn concrete_types_to_arrow_types(ts: Vec<ConcreteDataType>) -> Vec<ArrowDataType> {
ts.iter().map(ConcreteDataType::as_arrow_type).collect()
}
@@ -118,14 +118,14 @@ impl From<TypeSignature> for DfTypeSignature {
fn from(type_signature: TypeSignature) -> DfTypeSignature {
match type_signature {
TypeSignature::Variadic(types) => {
DfTypeSignature::Variadic(concret_types_to_arrow_types(types))
DfTypeSignature::Variadic(concrete_types_to_arrow_types(types))
}
TypeSignature::VariadicEqual => DfTypeSignature::VariadicEqual,
TypeSignature::Uniform(n, types) => {
DfTypeSignature::Uniform(n, concret_types_to_arrow_types(types))
DfTypeSignature::Uniform(n, concrete_types_to_arrow_types(types))
}
TypeSignature::Exact(types) => {
DfTypeSignature::Exact(concret_types_to_arrow_types(types))
DfTypeSignature::Exact(concrete_types_to_arrow_types(types))
}
TypeSignature::Any(n) => DfTypeSignature::Any(n),
TypeSignature::OneOf(ts) => {
@@ -143,7 +143,7 @@ impl From<Signature> for DfSignature {
#[cfg(test)]
mod tests {
use arrow::datatypes::DataType;
use datatypes::arrow::datatypes::DataType;
use super::*;

View File

@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::future::Future;
use std::pin::Pin;
use std::sync::Arc;
use std::task::{Context, Poll};
@@ -19,8 +20,10 @@ use std::task::{Context, Poll};
use datafusion::arrow::datatypes::SchemaRef as DfSchemaRef;
use datafusion::physical_plan::RecordBatchStream as DfRecordBatchStream;
use datafusion_common::record_batch::RecordBatch as DfRecordBatch;
use datafusion_common::DataFusionError;
use datatypes::arrow::error::{ArrowError, Result as ArrowResult};
use datatypes::schema::{Schema, SchemaRef};
use futures::ready;
use snafu::ResultExt;
use crate::error::{self, Result};
@@ -28,6 +31,14 @@ use crate::{
DfSendableRecordBatchStream, RecordBatch, RecordBatchStream, SendableRecordBatchStream, Stream,
};
type FutureStream = Pin<
Box<
dyn std::future::Future<
Output = std::result::Result<DfSendableRecordBatchStream, DataFusionError>,
> + Send,
>,
>;
/// Greptime SendableRecordBatchStream -> DataFusion RecordBatchStream
pub struct DfRecordBatchStreamAdapter {
stream: SendableRecordBatchStream,
@@ -104,3 +115,71 @@ impl Stream for RecordBatchStreamAdapter {
self.stream.size_hint()
}
}
enum AsyncRecordBatchStreamAdapterState {
Uninit(FutureStream),
Inited(std::result::Result<DfSendableRecordBatchStream, DataFusionError>),
}
pub struct AsyncRecordBatchStreamAdapter {
schema: SchemaRef,
state: AsyncRecordBatchStreamAdapterState,
}
impl AsyncRecordBatchStreamAdapter {
pub fn new(schema: SchemaRef, stream: FutureStream) -> Self {
Self {
schema,
state: AsyncRecordBatchStreamAdapterState::Uninit(stream),
}
}
}
impl RecordBatchStream for AsyncRecordBatchStreamAdapter {
fn schema(&self) -> SchemaRef {
self.schema.clone()
}
}
impl Stream for AsyncRecordBatchStreamAdapter {
type Item = Result<RecordBatch>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
loop {
match &mut self.state {
AsyncRecordBatchStreamAdapterState::Uninit(stream_future) => {
self.state = AsyncRecordBatchStreamAdapterState::Inited(ready!(Pin::new(
stream_future
)
.poll(cx)));
continue;
}
AsyncRecordBatchStreamAdapterState::Inited(stream) => match stream {
Ok(stream) => {
return Poll::Ready(ready!(Pin::new(stream).poll_next(cx)).map(|df| {
Ok(RecordBatch {
schema: self.schema(),
df_recordbatch: df.context(error::PollStreamSnafu)?,
})
}));
}
Err(e) => {
return Poll::Ready(Some(
error::CreateRecordBatchesSnafu {
reason: format!("Read error {:?} from stream", e),
}
.fail()
.map_err(|e| e.into()),
))
}
},
}
}
}
// This is not supported for lazy stream.
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
(0, None)
}
}

View File

@@ -21,12 +21,13 @@ use std::pin::Pin;
use std::sync::Arc;
use datafusion::arrow_print;
use datafusion::physical_plan::memory::MemoryStream;
pub use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
use datatypes::prelude::VectorRef;
use datatypes::schema::{Schema, SchemaRef};
use error::Result;
use futures::task::{Context, Poll};
use futures::Stream;
use futures::{Stream, TryStreamExt};
pub use recordbatch::RecordBatch;
use snafu::ensure;
@@ -79,6 +80,12 @@ impl RecordBatches {
Ok(Self { schema, batches })
}
pub async fn try_collect(stream: SendableRecordBatchStream) -> Result<Self> {
let schema = stream.schema();
let batches = stream.try_collect::<Vec<_>>().await?;
Ok(Self { schema, batches })
}
#[inline]
pub fn empty() -> Self {
Self {
@@ -132,6 +139,19 @@ impl RecordBatches {
index: 0,
})
}
pub fn into_df_stream(self) -> DfSendableRecordBatchStream {
let df_record_batches = self
.batches
.into_iter()
.map(|batch| batch.df_recordbatch)
.collect();
// unwrap safety: `MemoryStream::try_new` won't fail
Box::pin(
MemoryStream::try_new(df_record_batches, self.schema.arrow_schema().clone(), None)
.unwrap(),
)
}
}
pub struct SimpleRecordBatchStream {

View File

@@ -23,6 +23,7 @@ use snafu::ResultExt;
use crate::error::{self, Result};
// TODO(yingwen): We should hold vectors in the RecordBatch.
#[derive(Clone, Debug, PartialEq)]
pub struct RecordBatch {
pub schema: SchemaRef,
@@ -103,6 +104,7 @@ impl<'a> Iterator for RecordBatchRowIterator<'a> {
} else {
let mut row = Vec::with_capacity(self.columns);
// TODO(yingwen): Get from the vector if RecordBatch also holds vectors.
for col in 0..self.columns {
let column_array = self.record_batch.df_recordbatch.column(col);
match arrow_array_get(column_array.as_ref(), self.row_cursor)

View File

@@ -9,9 +9,11 @@ bytes = "1.1"
catalog = { path = "../../catalog" }
common-catalog = { path = "../catalog" }
common-error = { path = "../error" }
common-telemetry = { path = "../telemetry" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
"simd",
] }
datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
datatypes = { path = "../../datatypes" }
futures = "0.3"
prost = "0.9"

View File

@@ -0,0 +1,77 @@
// Copyright 2022 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use datafusion::logical_plan::DFSchemaRef;
use substrait_proto::protobuf::extensions::simple_extension_declaration::{
ExtensionFunction, MappingType,
};
use substrait_proto::protobuf::extensions::SimpleExtensionDeclaration;
#[derive(Default)]
pub struct ConvertorContext {
scalar_fn_names: HashMap<String, u32>,
scalar_fn_map: HashMap<u32, String>,
df_schema: Option<DFSchemaRef>,
}
impl ConvertorContext {
pub fn register_scalar_fn<S: AsRef<str>>(&mut self, name: S) -> u32 {
if let Some(anchor) = self.scalar_fn_names.get(name.as_ref()) {
return *anchor;
}
let next_anchor = self.scalar_fn_map.len() as _;
self.scalar_fn_map
.insert(next_anchor, name.as_ref().to_string());
self.scalar_fn_names
.insert(name.as_ref().to_string(), next_anchor);
next_anchor
}
pub fn register_scalar_with_anchor<S: AsRef<str>>(&mut self, name: S, anchor: u32) {
self.scalar_fn_map.insert(anchor, name.as_ref().to_string());
self.scalar_fn_names
.insert(name.as_ref().to_string(), anchor);
}
pub fn find_scalar_fn(&self, anchor: u32) -> Option<&str> {
self.scalar_fn_map.get(&anchor).map(|s| s.as_str())
}
pub fn generate_function_extension(&self) -> Vec<SimpleExtensionDeclaration> {
let mut result = Vec::with_capacity(self.scalar_fn_map.len());
for (anchor, name) in &self.scalar_fn_map {
let declaration = SimpleExtensionDeclaration {
mapping_type: Some(MappingType::ExtensionFunction(ExtensionFunction {
extension_uri_reference: 0,
function_anchor: *anchor,
name: name.clone(),
})),
};
result.push(declaration);
}
result
}
pub(crate) fn set_df_schema(&mut self, schema: DFSchemaRef) {
debug_assert!(self.df_schema.is_none());
self.df_schema.get_or_insert(schema);
}
pub(crate) fn df_schema(&self) -> Option<&DFSchemaRef> {
self.df_schema.as_ref()
}
}

View File

@@ -0,0 +1,762 @@
// Copyright 2022 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::VecDeque;
use std::str::FromStr;
use datafusion::logical_plan::{Column, Expr};
use datafusion_expr::{expr_fn, lit, BuiltinScalarFunction, Operator};
use datatypes::schema::Schema;
use snafu::{ensure, OptionExt};
use substrait_proto::protobuf::expression::field_reference::ReferenceType as FieldReferenceType;
use substrait_proto::protobuf::expression::reference_segment::{
ReferenceType as SegReferenceType, StructField,
};
use substrait_proto::protobuf::expression::{
FieldReference, Literal, ReferenceSegment, RexType, ScalarFunction,
};
use substrait_proto::protobuf::function_argument::ArgType;
use substrait_proto::protobuf::Expression;
use crate::context::ConvertorContext;
use crate::error::{
EmptyExprSnafu, InvalidParametersSnafu, MissingFieldSnafu, Result, UnsupportedExprSnafu,
};
use crate::types::{literal_type_to_scalar_value, scalar_value_as_literal_type};
/// Convert substrait's `Expression` to DataFusion's `Expr`.
pub(crate) fn to_df_expr(
ctx: &ConvertorContext,
expression: Expression,
schema: &Schema,
) -> Result<Expr> {
let expr_rex_type = expression.rex_type.context(EmptyExprSnafu)?;
match expr_rex_type {
RexType::Literal(l) => {
let t = l.literal_type.context(MissingFieldSnafu {
field: "LiteralType",
plan: "Literal",
})?;
let v = literal_type_to_scalar_value(t)?;
Ok(lit(v))
}
RexType::Selection(selection) => convert_selection_rex(*selection, schema),
RexType::ScalarFunction(scalar_fn) => convert_scalar_function(ctx, scalar_fn, schema),
RexType::WindowFunction(_)
| RexType::IfThen(_)
| RexType::SwitchExpression(_)
| RexType::SingularOrList(_)
| RexType::MultiOrList(_)
| RexType::Cast(_)
| RexType::Subquery(_)
| RexType::Enum(_) => UnsupportedExprSnafu {
name: format!("substrait expression {:?}", expr_rex_type),
}
.fail()?,
}
}
/// Convert Substrait's `FieldReference` - `DirectReference` - `StructField` to Datafusion's
/// `Column` expr.
pub fn convert_selection_rex(selection: FieldReference, schema: &Schema) -> Result<Expr> {
if let Some(FieldReferenceType::DirectReference(direct_ref)) = selection.reference_type
&& let Some(SegReferenceType::StructField(field)) = direct_ref.reference_type {
let column_name = schema.column_name_by_index(field.field as _).to_string();
Ok(Expr::Column(Column {
relation: None,
name: column_name,
}))
} else {
InvalidParametersSnafu {
reason: "Only support direct struct reference in Selection Rex",
}
.fail()
}
}
pub fn convert_scalar_function(
ctx: &ConvertorContext,
scalar_fn: ScalarFunction,
schema: &Schema,
) -> Result<Expr> {
// convert argument
let mut inputs = VecDeque::with_capacity(scalar_fn.arguments.len());
for arg in scalar_fn.arguments {
if let Some(ArgType::Value(sub_expr)) = arg.arg_type {
inputs.push_back(to_df_expr(ctx, sub_expr, schema)?);
} else {
InvalidParametersSnafu {
reason: "Only value expression arg is supported to be function argument",
}
.fail()?;
}
}
// convert this scalar function
// map function name
let anchor = scalar_fn.function_reference;
let fn_name = ctx
.find_scalar_fn(anchor)
.with_context(|| InvalidParametersSnafu {
reason: format!("Unregistered scalar function reference: {}", anchor),
})?;
// convenient util
let ensure_arg_len = |expected: usize| -> Result<()> {
ensure!(
inputs.len() == expected,
InvalidParametersSnafu {
reason: format!(
"Invalid number of scalar function {}, expected {} but found {}",
fn_name,
expected,
inputs.len()
)
}
);
Ok(())
};
// construct DataFusion expr
let expr = match fn_name {
// begin binary exprs, with the same order of DF `Operator`'s definition.
"eq" | "equal" => {
ensure_arg_len(2)?;
inputs.pop_front().unwrap().eq(inputs.pop_front().unwrap())
}
"not_eq" | "not_equal" => {
ensure_arg_len(2)?;
inputs
.pop_front()
.unwrap()
.not_eq(inputs.pop_front().unwrap())
}
"lt" => {
ensure_arg_len(2)?;
inputs.pop_front().unwrap().lt(inputs.pop_front().unwrap())
}
"lt_eq" | "lte" => {
ensure_arg_len(2)?;
inputs
.pop_front()
.unwrap()
.lt_eq(inputs.pop_front().unwrap())
}
"gt" => {
ensure_arg_len(2)?;
inputs.pop_front().unwrap().gt(inputs.pop_front().unwrap())
}
"gt_eq" | "gte" => {
ensure_arg_len(2)?;
inputs
.pop_front()
.unwrap()
.gt_eq(inputs.pop_front().unwrap())
}
"plus" => {
ensure_arg_len(2)?;
expr_fn::binary_expr(
inputs.pop_front().unwrap(),
Operator::Plus,
inputs.pop_front().unwrap(),
)
}
"minus" => {
ensure_arg_len(2)?;
expr_fn::binary_expr(
inputs.pop_front().unwrap(),
Operator::Minus,
inputs.pop_front().unwrap(),
)
}
"multiply" => {
ensure_arg_len(2)?;
expr_fn::binary_expr(
inputs.pop_front().unwrap(),
Operator::Multiply,
inputs.pop_front().unwrap(),
)
}
"divide" => {
ensure_arg_len(2)?;
expr_fn::binary_expr(
inputs.pop_front().unwrap(),
Operator::Divide,
inputs.pop_front().unwrap(),
)
}
"modulo" => {
ensure_arg_len(2)?;
expr_fn::binary_expr(
inputs.pop_front().unwrap(),
Operator::Modulo,
inputs.pop_front().unwrap(),
)
}
"and" => {
ensure_arg_len(2)?;
expr_fn::and(inputs.pop_front().unwrap(), inputs.pop_front().unwrap())
}
"or" => {
ensure_arg_len(2)?;
expr_fn::or(inputs.pop_front().unwrap(), inputs.pop_front().unwrap())
}
"like" => {
ensure_arg_len(2)?;
inputs
.pop_front()
.unwrap()
.like(inputs.pop_front().unwrap())
}
"not_like" => {
ensure_arg_len(2)?;
inputs
.pop_front()
.unwrap()
.not_like(inputs.pop_front().unwrap())
}
"is_distinct_from" => {
ensure_arg_len(2)?;
expr_fn::binary_expr(
inputs.pop_front().unwrap(),
Operator::IsDistinctFrom,
inputs.pop_front().unwrap(),
)
}
"is_not_distinct_from" => {
ensure_arg_len(2)?;
expr_fn::binary_expr(
inputs.pop_front().unwrap(),
Operator::IsNotDistinctFrom,
inputs.pop_front().unwrap(),
)
}
"regex_match" => {
ensure_arg_len(2)?;
expr_fn::binary_expr(
inputs.pop_front().unwrap(),
Operator::RegexMatch,
inputs.pop_front().unwrap(),
)
}
"regex_i_match" => {
ensure_arg_len(2)?;
expr_fn::binary_expr(
inputs.pop_front().unwrap(),
Operator::RegexIMatch,
inputs.pop_front().unwrap(),
)
}
"regex_not_match" => {
ensure_arg_len(2)?;
expr_fn::binary_expr(
inputs.pop_front().unwrap(),
Operator::RegexNotMatch,
inputs.pop_front().unwrap(),
)
}
"regex_not_i_match" => {
ensure_arg_len(2)?;
expr_fn::binary_expr(
inputs.pop_front().unwrap(),
Operator::RegexNotIMatch,
inputs.pop_front().unwrap(),
)
}
"bitwise_and" => {
ensure_arg_len(2)?;
expr_fn::binary_expr(
inputs.pop_front().unwrap(),
Operator::BitwiseAnd,
inputs.pop_front().unwrap(),
)
}
"bitwise_or" => {
ensure_arg_len(2)?;
expr_fn::binary_expr(
inputs.pop_front().unwrap(),
Operator::BitwiseOr,
inputs.pop_front().unwrap(),
)
}
// end binary exprs
// start other direct expr, with the same order of DF `Expr`'s definition.
"not" => {
ensure_arg_len(1)?;
inputs.pop_front().unwrap().not()
}
"is_not_null" => {
ensure_arg_len(1)?;
inputs.pop_front().unwrap().is_not_null()
}
"is_null" => {
ensure_arg_len(1)?;
inputs.pop_front().unwrap().is_null()
}
"negative" => {
ensure_arg_len(1)?;
Expr::Negative(Box::new(inputs.pop_front().unwrap()))
}
// skip GetIndexedField, unimplemented.
"between" => {
ensure_arg_len(3)?;
Expr::Between {
expr: Box::new(inputs.pop_front().unwrap()),
negated: false,
low: Box::new(inputs.pop_front().unwrap()),
high: Box::new(inputs.pop_front().unwrap()),
}
}
"not_between" => {
ensure_arg_len(3)?;
Expr::Between {
expr: Box::new(inputs.pop_front().unwrap()),
negated: true,
low: Box::new(inputs.pop_front().unwrap()),
high: Box::new(inputs.pop_front().unwrap()),
}
}
// skip Case, is covered in substrait::SwitchExpression.
// skip Cast and TryCast, is covered in substrait::Cast.
"sort" | "sort_des" => {
ensure_arg_len(1)?;
Expr::Sort {
expr: Box::new(inputs.pop_front().unwrap()),
asc: false,
nulls_first: false,
}
}
"sort_asc" => {
ensure_arg_len(1)?;
Expr::Sort {
expr: Box::new(inputs.pop_front().unwrap()),
asc: true,
nulls_first: false,
}
}
// those are datafusion built-in "scalar functions".
"abs"
| "acos"
| "asin"
| "atan"
| "atan2"
| "ceil"
| "cos"
| "exp"
| "floor"
| "ln"
| "log"
| "log10"
| "log2"
| "power"
| "pow"
| "round"
| "signum"
| "sin"
| "sqrt"
| "tan"
| "trunc"
| "coalesce"
| "make_array"
| "ascii"
| "bit_length"
| "btrim"
| "char_length"
| "character_length"
| "concat"
| "concat_ws"
| "chr"
| "current_date"
| "current_time"
| "date_part"
| "datepart"
| "date_trunc"
| "datetrunc"
| "date_bin"
| "initcap"
| "left"
| "length"
| "lower"
| "lpad"
| "ltrim"
| "md5"
| "nullif"
| "octet_length"
| "random"
| "regexp_replace"
| "repeat"
| "replace"
| "reverse"
| "right"
| "rpad"
| "rtrim"
| "sha224"
| "sha256"
| "sha384"
| "sha512"
| "digest"
| "split_part"
| "starts_with"
| "strpos"
| "substr"
| "to_hex"
| "to_timestamp"
| "to_timestamp_millis"
| "to_timestamp_micros"
| "to_timestamp_seconds"
| "now"
| "translate"
| "trim"
| "upper"
| "uuid"
| "regexp_match"
| "struct"
| "from_unixtime"
| "arrow_typeof" => Expr::ScalarFunction {
fun: BuiltinScalarFunction::from_str(fn_name).unwrap(),
args: inputs.into(),
},
// skip ScalarUDF, unimplemented.
// skip AggregateFunction, is covered in substrait::AggregateRel
// skip WindowFunction, is covered in substrait WindowFunction
// skip AggregateUDF, unimplemented.
// skip InList, unimplemented
// skip Wildcard, unimplemented.
// end other direct expr
_ => UnsupportedExprSnafu {
name: format!("scalar function {}", fn_name),
}
.fail()?,
};
Ok(expr)
}
/// Convert DataFusion's `Expr` to substrait's `Expression`
pub fn expression_from_df_expr(
ctx: &mut ConvertorContext,
expr: &Expr,
schema: &Schema,
) -> Result<Expression> {
let expression = match expr {
// Don't merge them with other unsupported expr arms to preserve the ordering.
Expr::Alias(..) => UnsupportedExprSnafu {
name: expr.to_string(),
}
.fail()?,
Expr::Column(column) => {
let field_reference = convert_column(column, schema)?;
Expression {
rex_type: Some(RexType::Selection(Box::new(field_reference))),
}
}
// Don't merge them with other unsupported expr arms to preserve the ordering.
Expr::ScalarVariable(..) => UnsupportedExprSnafu {
name: expr.to_string(),
}
.fail()?,
Expr::Literal(v) => {
let t = scalar_value_as_literal_type(v)?;
let l = Literal {
nullable: true,
type_variation_reference: 0,
literal_type: Some(t),
};
Expression {
rex_type: Some(RexType::Literal(l)),
}
}
Expr::BinaryExpr { left, op, right } => {
let left = expression_from_df_expr(ctx, left, schema)?;
let right = expression_from_df_expr(ctx, right, schema)?;
let arguments = utils::expression_to_argument(vec![left, right]);
let op_name = utils::name_df_operator(op);
let function_reference = ctx.register_scalar_fn(op_name);
utils::build_scalar_function_expression(function_reference, arguments)
}
Expr::Not(e) => {
let arg = expression_from_df_expr(ctx, e, schema)?;
let arguments = utils::expression_to_argument(vec![arg]);
let op_name = "not";
let function_reference = ctx.register_scalar_fn(op_name);
utils::build_scalar_function_expression(function_reference, arguments)
}
Expr::IsNotNull(e) => {
let arg = expression_from_df_expr(ctx, e, schema)?;
let arguments = utils::expression_to_argument(vec![arg]);
let op_name = "is_not_null";
let function_reference = ctx.register_scalar_fn(op_name);
utils::build_scalar_function_expression(function_reference, arguments)
}
Expr::IsNull(e) => {
let arg = expression_from_df_expr(ctx, e, schema)?;
let arguments = utils::expression_to_argument(vec![arg]);
let op_name = "is_null";
let function_reference = ctx.register_scalar_fn(op_name);
utils::build_scalar_function_expression(function_reference, arguments)
}
Expr::Negative(e) => {
let arg = expression_from_df_expr(ctx, e, schema)?;
let arguments = utils::expression_to_argument(vec![arg]);
let op_name = "negative";
let function_reference = ctx.register_scalar_fn(op_name);
utils::build_scalar_function_expression(function_reference, arguments)
}
// Don't merge them with other unsupported expr arms to preserve the ordering.
Expr::GetIndexedField { .. } => UnsupportedExprSnafu {
name: expr.to_string(),
}
.fail()?,
Expr::Between {
expr,
negated,
low,
high,
} => {
let expr = expression_from_df_expr(ctx, expr, schema)?;
let low = expression_from_df_expr(ctx, low, schema)?;
let high = expression_from_df_expr(ctx, high, schema)?;
let arguments = utils::expression_to_argument(vec![expr, low, high]);
let op_name = if *negated { "not_between" } else { "between" };
let function_reference = ctx.register_scalar_fn(op_name);
utils::build_scalar_function_expression(function_reference, arguments)
}
// Don't merge them with other unsupported expr arms to preserve the ordering.
Expr::Case { .. } | Expr::Cast { .. } | Expr::TryCast { .. } => UnsupportedExprSnafu {
name: expr.to_string(),
}
.fail()?,
Expr::Sort {
expr,
asc,
nulls_first: _,
} => {
let expr = expression_from_df_expr(ctx, expr, schema)?;
let arguments = utils::expression_to_argument(vec![expr]);
let op_name = if *asc { "sort_asc" } else { "sort_des" };
let function_reference = ctx.register_scalar_fn(op_name);
utils::build_scalar_function_expression(function_reference, arguments)
}
Expr::ScalarFunction { fun, args } => {
let arguments = utils::expression_to_argument(
args.iter()
.map(|e| expression_from_df_expr(ctx, e, schema))
.collect::<Result<Vec<_>>>()?,
);
let op_name = utils::name_builtin_scalar_function(fun);
let function_reference = ctx.register_scalar_fn(op_name);
utils::build_scalar_function_expression(function_reference, arguments)
}
// Don't merge them with other unsupported expr arms to preserve the ordering.
Expr::ScalarUDF { .. }
| Expr::AggregateFunction { .. }
| Expr::WindowFunction { .. }
| Expr::AggregateUDF { .. }
| Expr::InList { .. }
| Expr::Wildcard => UnsupportedExprSnafu {
name: expr.to_string(),
}
.fail()?,
};
Ok(expression)
}
/// Convert DataFusion's `Column` expr into substrait's `FieldReference` -
/// `DirectReference` - `StructField`.
pub fn convert_column(column: &Column, schema: &Schema) -> Result<FieldReference> {
let column_name = &column.name;
let field_index =
schema
.column_index_by_name(column_name)
.with_context(|| MissingFieldSnafu {
field: format!("{:?}", column),
plan: format!("schema: {:?}", schema),
})?;
Ok(FieldReference {
reference_type: Some(FieldReferenceType::DirectReference(ReferenceSegment {
reference_type: Some(SegReferenceType::StructField(Box::new(StructField {
field: field_index as _,
child: None,
}))),
})),
root_type: None,
})
}
/// Some utils special for this `DataFusion::Expr` and `Substrait::Expression` conversion.
mod utils {
use datafusion_expr::{BuiltinScalarFunction, Operator};
use substrait_proto::protobuf::expression::{RexType, ScalarFunction};
use substrait_proto::protobuf::function_argument::ArgType;
use substrait_proto::protobuf::{Expression, FunctionArgument};
pub(crate) fn name_df_operator(op: &Operator) -> &str {
match op {
Operator::Eq => "equal",
Operator::NotEq => "not_equal",
Operator::Lt => "lt",
Operator::LtEq => "lte",
Operator::Gt => "gt",
Operator::GtEq => "gte",
Operator::Plus => "plus",
Operator::Minus => "minus",
Operator::Multiply => "multiply",
Operator::Divide => "divide",
Operator::Modulo => "modulo",
Operator::And => "and",
Operator::Or => "or",
Operator::Like => "like",
Operator::NotLike => "not_like",
Operator::IsDistinctFrom => "is_distinct_from",
Operator::IsNotDistinctFrom => "is_not_distinct_from",
Operator::RegexMatch => "regex_match",
Operator::RegexIMatch => "regex_i_match",
Operator::RegexNotMatch => "regex_not_match",
Operator::RegexNotIMatch => "regex_not_i_match",
Operator::BitwiseAnd => "bitwise_and",
Operator::BitwiseOr => "bitwise_or",
}
}
/// Convert list of [Expression] to [FunctionArgument] vector.
pub(crate) fn expression_to_argument<I: IntoIterator<Item = Expression>>(
expressions: I,
) -> Vec<FunctionArgument> {
expressions
.into_iter()
.map(|expr| FunctionArgument {
arg_type: Some(ArgType::Value(expr)),
})
.collect()
}
/// Convenient builder for [Expression]
pub(crate) fn build_scalar_function_expression(
function_reference: u32,
arguments: Vec<FunctionArgument>,
) -> Expression {
Expression {
rex_type: Some(RexType::ScalarFunction(ScalarFunction {
function_reference,
arguments,
output_type: None,
..Default::default()
})),
}
}
pub(crate) fn name_builtin_scalar_function(fun: &BuiltinScalarFunction) -> &str {
match fun {
BuiltinScalarFunction::Abs => "abs",
BuiltinScalarFunction::Acos => "acos",
BuiltinScalarFunction::Asin => "asin",
BuiltinScalarFunction::Atan => "atan",
BuiltinScalarFunction::Ceil => "ceil",
BuiltinScalarFunction::Cos => "cos",
BuiltinScalarFunction::Digest => "digest",
BuiltinScalarFunction::Exp => "exp",
BuiltinScalarFunction::Floor => "floor",
BuiltinScalarFunction::Ln => "ln",
BuiltinScalarFunction::Log => "log",
BuiltinScalarFunction::Log10 => "log10",
BuiltinScalarFunction::Log2 => "log2",
BuiltinScalarFunction::Round => "round",
BuiltinScalarFunction::Signum => "signum",
BuiltinScalarFunction::Sin => "sin",
BuiltinScalarFunction::Sqrt => "sqrt",
BuiltinScalarFunction::Tan => "tan",
BuiltinScalarFunction::Trunc => "trunc",
BuiltinScalarFunction::Array => "make_array",
BuiltinScalarFunction::Ascii => "ascii",
BuiltinScalarFunction::BitLength => "bit_length",
BuiltinScalarFunction::Btrim => "btrim",
BuiltinScalarFunction::CharacterLength => "character_length",
BuiltinScalarFunction::Chr => "chr",
BuiltinScalarFunction::Concat => "concat",
BuiltinScalarFunction::ConcatWithSeparator => "concat_ws",
BuiltinScalarFunction::DatePart => "date_part",
BuiltinScalarFunction::DateTrunc => "date_trunc",
BuiltinScalarFunction::InitCap => "initcap",
BuiltinScalarFunction::Left => "left",
BuiltinScalarFunction::Lpad => "lpad",
BuiltinScalarFunction::Lower => "lower",
BuiltinScalarFunction::Ltrim => "ltrim",
BuiltinScalarFunction::MD5 => "md5",
BuiltinScalarFunction::NullIf => "nullif",
BuiltinScalarFunction::OctetLength => "octet_length",
BuiltinScalarFunction::Random => "random",
BuiltinScalarFunction::RegexpReplace => "regexp_replace",
BuiltinScalarFunction::Repeat => "repeat",
BuiltinScalarFunction::Replace => "replace",
BuiltinScalarFunction::Reverse => "reverse",
BuiltinScalarFunction::Right => "right",
BuiltinScalarFunction::Rpad => "rpad",
BuiltinScalarFunction::Rtrim => "rtrim",
BuiltinScalarFunction::SHA224 => "sha224",
BuiltinScalarFunction::SHA256 => "sha256",
BuiltinScalarFunction::SHA384 => "sha384",
BuiltinScalarFunction::SHA512 => "sha512",
BuiltinScalarFunction::SplitPart => "split_part",
BuiltinScalarFunction::StartsWith => "starts_with",
BuiltinScalarFunction::Strpos => "strpos",
BuiltinScalarFunction::Substr => "substr",
BuiltinScalarFunction::ToHex => "to_hex",
BuiltinScalarFunction::ToTimestamp => "to_timestamp",
BuiltinScalarFunction::ToTimestampMillis => "to_timestamp_millis",
BuiltinScalarFunction::ToTimestampMicros => "to_timestamp_macros",
BuiltinScalarFunction::ToTimestampSeconds => "to_timestamp_seconds",
BuiltinScalarFunction::Now => "now",
BuiltinScalarFunction::Translate => "translate",
BuiltinScalarFunction::Trim => "trim",
BuiltinScalarFunction::Upper => "upper",
BuiltinScalarFunction::RegexpMatch => "regexp_match",
}
}
}
#[cfg(test)]
mod test {
use datatypes::schema::ColumnSchema;
use super::*;
#[test]
fn expr_round_trip() {
let expr = expr_fn::and(
expr_fn::col("column_a").lt_eq(expr_fn::col("column_b")),
expr_fn::col("column_a").gt(expr_fn::col("column_b")),
);
let schema = Schema::new(vec![
ColumnSchema::new(
"column_a",
datatypes::data_type::ConcreteDataType::int64_datatype(),
true,
),
ColumnSchema::new(
"column_b",
datatypes::data_type::ConcreteDataType::float64_datatype(),
true,
),
]);
let mut ctx = ConvertorContext::default();
let substrait_expr = expression_from_df_expr(&mut ctx, &expr, &schema).unwrap();
let converted_expr = to_df_expr(&ctx, substrait_expr, &schema).unwrap();
assert_eq!(expr, converted_expr);
}
}

View File

@@ -17,76 +17,135 @@ use std::sync::Arc;
use bytes::{Buf, Bytes, BytesMut};
use catalog::CatalogManagerRef;
use common_error::prelude::BoxedError;
use common_telemetry::debug;
use datafusion::arrow::datatypes::SchemaRef as ArrowSchemaRef;
use datafusion::datasource::TableProvider;
use datafusion::logical_plan::plan::Filter;
use datafusion::logical_plan::{LogicalPlan, TableScan, ToDFSchema};
use datafusion::physical_plan::project_schema;
use prost::Message;
use snafu::{ensure, OptionExt, ResultExt};
use substrait_proto::protobuf::expression::mask_expression::{StructItem, StructSelect};
use substrait_proto::protobuf::expression::MaskExpression;
use substrait_proto::protobuf::extensions::simple_extension_declaration::MappingType;
use substrait_proto::protobuf::plan_rel::RelType as PlanRelType;
use substrait_proto::protobuf::read_rel::{NamedTable, ReadType};
use substrait_proto::protobuf::rel::RelType;
use substrait_proto::protobuf::{PlanRel, ReadRel, Rel};
use substrait_proto::protobuf::{FilterRel, Plan, PlanRel, ReadRel, Rel};
use table::table::adapter::DfTableProviderAdapter;
use crate::context::ConvertorContext;
use crate::df_expr::{expression_from_df_expr, to_df_expr};
use crate::error::{
DFInternalSnafu, DecodeRelSnafu, EmptyPlanSnafu, EncodeRelSnafu, Error, InternalSnafu,
self, DFInternalSnafu, DecodeRelSnafu, EmptyPlanSnafu, EncodeRelSnafu, Error, InternalSnafu,
InvalidParametersSnafu, MissingFieldSnafu, SchemaNotMatchSnafu, TableNotFoundSnafu,
UnknownPlanSnafu, UnsupportedExprSnafu, UnsupportedPlanSnafu,
};
use crate::schema::{from_schema, to_schema};
use crate::SubstraitPlan;
pub struct DFLogicalSubstraitConvertor {
catalog_manager: CatalogManagerRef,
}
pub struct DFLogicalSubstraitConvertor;
impl SubstraitPlan for DFLogicalSubstraitConvertor {
type Error = Error;
type Plan = LogicalPlan;
fn decode<B: Buf + Send>(&self, message: B) -> Result<Self::Plan, Self::Error> {
let plan_rel = PlanRel::decode(message).context(DecodeRelSnafu)?;
let rel = match plan_rel.rel_type.context(EmptyPlanSnafu)? {
PlanRelType::Rel(rel) => rel,
PlanRelType::Root(_) => UnsupportedPlanSnafu {
name: "Root Relation",
}
.fail()?,
};
self.convert_rel(rel)
fn decode<B: Buf + Send>(
&self,
message: B,
catalog_manager: CatalogManagerRef,
) -> Result<Self::Plan, Self::Error> {
let plan = Plan::decode(message).context(DecodeRelSnafu)?;
self.convert_plan(plan, catalog_manager)
}
fn encode(&self, plan: Self::Plan) -> Result<Bytes, Self::Error> {
let rel = self.convert_plan(plan)?;
let plan_rel = PlanRel {
rel_type: Some(PlanRelType::Rel(rel)),
};
let plan = self.convert_df_plan(plan)?;
let mut buf = BytesMut::new();
plan_rel.encode(&mut buf).context(EncodeRelSnafu)?;
plan.encode(&mut buf).context(EncodeRelSnafu)?;
Ok(buf.freeze())
}
}
impl DFLogicalSubstraitConvertor {
pub fn new(catalog_manager: CatalogManagerRef) -> Self {
Self { catalog_manager }
}
}
impl DFLogicalSubstraitConvertor {
pub fn convert_rel(&self, rel: Rel) -> Result<LogicalPlan, Error> {
let rel_type = rel.rel_type.context(EmptyPlanSnafu)?;
let logical_plan = match rel_type {
RelType::Read(read_rel) => self.convert_read_rel(read_rel),
RelType::Filter(_filter_rel) => UnsupportedPlanSnafu {
name: "Filter Relation",
fn convert_plan(
&self,
mut plan: Plan,
catalog_manager: CatalogManagerRef,
) -> Result<LogicalPlan, Error> {
// prepare convertor context
let mut ctx = ConvertorContext::default();
for simple_ext in plan.extensions {
if let Some(MappingType::ExtensionFunction(function_extension)) =
simple_ext.mapping_type
{
ctx.register_scalar_with_anchor(
function_extension.name,
function_extension.function_anchor,
);
} else {
debug!("Encounter unsupported substrait extension {:?}", simple_ext);
}
}
// extract rel
let rel = if let Some(PlanRel { rel_type }) = plan.relations.pop()
&& let Some(PlanRelType::Rel(rel)) = rel_type {
rel
} else {
UnsupportedPlanSnafu {
name: "Emply or non-Rel relation",
}
.fail()?
};
self.rel_to_logical_plan(&mut ctx, Box::new(rel), catalog_manager)
}
fn rel_to_logical_plan(
&self,
ctx: &mut ConvertorContext,
rel: Box<Rel>,
catalog_manager: CatalogManagerRef,
) -> Result<LogicalPlan, Error> {
let rel_type = rel.rel_type.context(EmptyPlanSnafu)?;
// build logical plan
let logical_plan = match rel_type {
RelType::Read(read_rel) => self.convert_read_rel(ctx, read_rel, catalog_manager)?,
RelType::Filter(filter) => {
let FilterRel {
common: _,
input,
condition,
advanced_extension: _,
} = *filter;
let input = input.context(MissingFieldSnafu {
field: "input",
plan: "Filter",
})?;
let input = Arc::new(self.rel_to_logical_plan(ctx, input, catalog_manager)?);
let condition = condition.context(MissingFieldSnafu {
field: "condition",
plan: "Filter",
})?;
let schema = ctx.df_schema().context(InvalidParametersSnafu {
reason: "the underlying TableScan plan should have included a table schema",
})?;
let schema = schema
.clone()
.try_into()
.context(error::ConvertDfSchemaSnafu)?;
let predicate = to_df_expr(ctx, *condition, &schema)?;
LogicalPlan::Filter(Filter { predicate, input })
}
.fail()?,
RelType::Fetch(_fetch_rel) => UnsupportedPlanSnafu {
name: "Fetch Relation",
}
@@ -127,14 +186,18 @@ impl DFLogicalSubstraitConvertor {
name: "Cross Relation",
}
.fail()?,
}?;
};
Ok(logical_plan)
}
fn convert_read_rel(&self, read_rel: Box<ReadRel>) -> Result<LogicalPlan, Error> {
fn convert_read_rel(
&self,
ctx: &mut ConvertorContext,
read_rel: Box<ReadRel>,
catalog_manager: CatalogManagerRef,
) -> Result<LogicalPlan, Error> {
// Extract the catalog, schema and table name from NamedTable. Assume the first three are those names.
let read_type = read_rel.read_type.context(MissingFieldSnafu {
field: "read_type",
plan: "Read",
@@ -168,8 +231,7 @@ impl DFLogicalSubstraitConvertor {
.map(|mask_expr| self.convert_mask_expression(mask_expr));
// Get table handle from catalog manager
let table_ref = self
.catalog_manager
let table_ref = catalog_manager
.table(&catalog_name, &schema_name, &table_name)
.map_err(BoxedError::new)
.context(InternalSnafu)?
@@ -178,31 +240,40 @@ impl DFLogicalSubstraitConvertor {
})?;
let adapter = Arc::new(DfTableProviderAdapter::new(table_ref));
// Get schema directly from the table, and compare it with the schema retrived from substrait proto.
// Get schema directly from the table, and compare it with the schema retrieved from substrait proto.
let stored_schema = adapter.schema();
let retrived_schema = to_schema(read_rel.base_schema.unwrap_or_default())?;
let retrived_arrow_schema = retrived_schema.arrow_schema();
let retrieved_schema = to_schema(read_rel.base_schema.unwrap_or_default())?;
let retrieved_arrow_schema = retrieved_schema.arrow_schema();
ensure!(
stored_schema.fields == retrived_arrow_schema.fields,
same_schema_without_metadata(&stored_schema, retrieved_arrow_schema),
SchemaNotMatchSnafu {
substrait_schema: retrived_arrow_schema.clone(),
substrait_schema: retrieved_arrow_schema.clone(),
storage_schema: stored_schema
}
);
// Convert filter
let filters = if let Some(filter) = read_rel.filter {
vec![to_df_expr(ctx, *filter, &retrieved_schema)?]
} else {
vec![]
};
// Calculate the projected schema
let projected_schema = project_schema(&stored_schema, projection.as_ref())
.context(DFInternalSnafu)?
.to_dfschema_ref()
.context(DFInternalSnafu)?;
// TODO(ruihang): Support filters and limit
ctx.set_df_schema(projected_schema.clone());
// TODO(ruihang): Support limit
Ok(LogicalPlan::TableScan(TableScan {
table_name,
table_name: format!("{}.{}.{}", catalog_name, schema_name, table_name),
source: adapter,
projection,
projected_schema,
filters: vec![],
filters,
limit: None,
}))
}
@@ -219,16 +290,42 @@ impl DFLogicalSubstraitConvertor {
}
impl DFLogicalSubstraitConvertor {
pub fn convert_plan(&self, plan: LogicalPlan) -> Result<Rel, Error> {
match plan {
fn logical_plan_to_rel(
&self,
ctx: &mut ConvertorContext,
plan: Arc<LogicalPlan>,
) -> Result<Rel, Error> {
Ok(match &*plan {
LogicalPlan::Projection(_) => UnsupportedPlanSnafu {
name: "DataFusion Logical Projection",
}
.fail()?,
LogicalPlan::Filter(_) => UnsupportedPlanSnafu {
name: "DataFusion Logical Filter",
LogicalPlan::Filter(filter) => {
let input = Some(Box::new(
self.logical_plan_to_rel(ctx, filter.input.clone())?,
));
let schema = plan
.schema()
.clone()
.try_into()
.context(error::ConvertDfSchemaSnafu)?;
let condition = Some(Box::new(expression_from_df_expr(
ctx,
&filter.predicate,
&schema,
)?));
let rel = FilterRel {
common: None,
input,
condition,
advanced_extension: None,
};
Rel {
rel_type: Some(RelType::Filter(Box::new(rel))),
}
}
.fail()?,
LogicalPlan::Window(_) => UnsupportedPlanSnafu {
name: "DataFusion Logical Window",
}
@@ -258,10 +355,10 @@ impl DFLogicalSubstraitConvertor {
}
.fail()?,
LogicalPlan::TableScan(table_scan) => {
let read_rel = self.convert_table_scan_plan(table_scan)?;
Ok(Rel {
let read_rel = self.convert_table_scan_plan(ctx, table_scan)?;
Rel {
rel_type: Some(RelType::Read(Box::new(read_rel))),
})
}
}
LogicalPlan::EmptyRelation(_) => UnsupportedPlanSnafu {
name: "DataFusion Logical EmptyRelation",
@@ -284,10 +381,36 @@ impl DFLogicalSubstraitConvertor {
),
}
.fail()?,
}
})
}
pub fn convert_table_scan_plan(&self, table_scan: TableScan) -> Result<ReadRel, Error> {
fn convert_df_plan(&self, plan: LogicalPlan) -> Result<Plan, Error> {
let mut ctx = ConvertorContext::default();
let rel = self.logical_plan_to_rel(&mut ctx, Arc::new(plan))?;
// convert extension
let extensions = ctx.generate_function_extension();
// assemble PlanRel
let plan_rel = PlanRel {
rel_type: Some(PlanRelType::Rel(rel)),
};
Ok(Plan {
extension_uris: vec![],
extensions,
relations: vec![plan_rel],
advanced_extensions: None,
expected_type_urls: vec![],
})
}
pub fn convert_table_scan_plan(
&self,
ctx: &mut ConvertorContext,
table_scan: &TableScan,
) -> Result<ReadRel, Error> {
let provider = table_scan
.source
.as_any()
@@ -308,15 +431,32 @@ impl DFLogicalSubstraitConvertor {
// assemble projection
let projection = table_scan
.projection
.map(|proj| self.convert_schema_projection(&proj));
.as_ref()
.map(|x| self.convert_schema_projection(x));
// assemble base (unprojected) schema using Table's schema.
let base_schema = from_schema(&provider.table().schema())?;
// make conjunction over a list of filters and convert the result to substrait
let filter = if let Some(conjunction) = table_scan
.filters
.iter()
.cloned()
.reduce(|accum, expr| accum.and(expr))
{
Some(Box::new(expression_from_df_expr(
ctx,
&conjunction,
&provider.table().schema(),
)?))
} else {
None
};
let read_rel = ReadRel {
common: None,
base_schema: Some(base_schema),
filter: None,
filter,
projection,
advanced_extension: None,
read_type: Some(read_type),
@@ -342,6 +482,13 @@ impl DFLogicalSubstraitConvertor {
}
}
fn same_schema_without_metadata(lhs: &ArrowSchemaRef, rhs: &ArrowSchemaRef) -> bool {
lhs.fields.len() == rhs.fields.len()
&& lhs.fields.iter().zip(rhs.fields.iter()).all(|(x, y)| {
x.name == y.name && x.data_type == y.data_type && x.is_nullable == y.is_nullable
})
}
#[cfg(test)]
mod test {
use catalog::local::{LocalCatalogManager, MemoryCatalogProvider, MemorySchemaProvider};
@@ -393,10 +540,10 @@ mod test {
}
async fn logical_plan_round_trip(plan: LogicalPlan, catalog: CatalogManagerRef) {
let convertor = DFLogicalSubstraitConvertor::new(catalog);
let convertor = DFLogicalSubstraitConvertor;
let proto = convertor.encode(plan.clone()).unwrap();
let tripped_plan = convertor.decode(proto).unwrap();
let tripped_plan = convertor.decode(proto, catalog).unwrap();
assert_eq!(format!("{:?}", plan), format!("{:?}", tripped_plan));
}
@@ -418,6 +565,7 @@ mod test {
.await
.unwrap();
let adapter = Arc::new(DfTableProviderAdapter::new(table_ref));
let projection = vec![1, 3, 5];
let df_schema = adapter.schema().to_dfschema().unwrap();
let projected_fields = projection
@@ -428,7 +576,10 @@ mod test {
Arc::new(DFSchema::new_with_metadata(projected_fields, Default::default()).unwrap());
let table_scan_plan = LogicalPlan::TableScan(TableScan {
table_name: DEFAULT_TABLE_NAME.to_string(),
table_name: format!(
"{}.{}.{}",
DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, DEFAULT_TABLE_NAME
),
source: adapter,
projection: Some(projection),
projected_schema,

View File

@@ -23,10 +23,10 @@ use snafu::{Backtrace, ErrorCompat, Snafu};
#[derive(Debug, Snafu)]
#[snafu(visibility(pub))]
pub enum Error {
#[snafu(display("Unsupported physical expr: {}", name))]
#[snafu(display("Unsupported physical plan: {}", name))]
UnsupportedPlan { name: String, backtrace: Backtrace },
#[snafu(display("Unsupported physical plan: {}", name))]
#[snafu(display("Unsupported expr: {}", name))]
UnsupportedExpr { name: String, backtrace: Backtrace },
#[snafu(display("Unsupported concrete type: {:?}", ty))]
@@ -81,7 +81,7 @@ pub enum Error {
source: BoxedError,
},
#[snafu(display("Table quering not found: {}", name))]
#[snafu(display("Table querying not found: {}", name))]
TableNotFound { name: String, backtrace: Backtrace },
#[snafu(display("Cannot convert plan doesn't belong to GreptimeDB"))]
@@ -99,6 +99,12 @@ pub enum Error {
storage_schema: datafusion::arrow::datatypes::SchemaRef,
backtrace: Backtrace,
},
#[snafu(display("Failed to convert DataFusion schema, source: {}", source))]
ConvertDfSchema {
#[snafu(backtrace)]
source: datatypes::error::Error,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -120,6 +126,7 @@ impl ErrorExt for Error {
| Error::TableNotFound { .. }
| Error::SchemaNotMatch { .. } => StatusCode::InvalidArguments,
Error::DFInternal { .. } | Error::Internal { .. } => StatusCode::Internal,
Error::ConvertDfSchema { source } => source.status_code(),
}
}

View File

@@ -12,12 +12,17 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#![feature(let_chains)]
mod context;
mod df_expr;
mod df_logical;
pub mod error;
mod schema;
mod types;
use bytes::{Buf, Bytes};
use catalog::CatalogManagerRef;
pub use crate::df_logical::DFLogicalSubstraitConvertor;
@@ -26,7 +31,11 @@ pub trait SubstraitPlan {
type Plan;
fn decode<B: Buf + Send>(&self, message: B) -> Result<Self::Plan, Self::Error>;
fn decode<B: Buf + Send>(
&self,
message: B,
catalog_manager: CatalogManagerRef,
) -> Result<Self::Plan, Self::Error>;
fn encode(&self, plan: Self::Plan) -> Result<Bytes, Self::Error>;
}

View File

@@ -12,17 +12,19 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//! Methods that perform convertion between Substrait's type ([Type](SType)) and GreptimeDB's type ([ConcreteDataType]).
//! Methods that perform conversion between Substrait's type ([Type](SType)) and GreptimeDB's type ([ConcreteDataType]).
//!
//! Substrait use [type variation](https://substrait.io/types/type_variations/) to express different "logical types".
//! Current we only have variations on integer types. Variation 0 (system prefered) are the same with base types, which
//! Current we only have variations on integer types. Variation 0 (system preferred) are the same with base types, which
//! are signed integer (i.e. I8 -> [i8]), and Variation 1 stands for unsigned integer (i.e. I8 -> [u8]).
use datafusion::scalar::ScalarValue;
use datatypes::prelude::ConcreteDataType;
use substrait_proto::protobuf::expression::literal::LiteralType;
use substrait_proto::protobuf::r#type::{self as s_type, Kind, Nullability};
use substrait_proto::protobuf::Type as SType;
use substrait_proto::protobuf::{Type as SType, Type};
use crate::error::{Result, UnsupportedConcreteTypeSnafu, UnsupportedSubstraitTypeSnafu};
use crate::error::{self, Result, UnsupportedConcreteTypeSnafu, UnsupportedSubstraitTypeSnafu};
macro_rules! substrait_kind {
($desc:ident, $concrete_ty:ident) => {{
@@ -134,3 +136,67 @@ pub fn from_concrete_type(ty: ConcreteDataType, nullability: Option<bool>) -> Re
Ok(SType { kind })
}
pub(crate) fn scalar_value_as_literal_type(v: &ScalarValue) -> Result<LiteralType> {
Ok(if v.is_null() {
LiteralType::Null(Type { kind: None })
} else {
match v {
ScalarValue::Boolean(Some(v)) => LiteralType::Boolean(*v),
ScalarValue::Float32(Some(v)) => LiteralType::Fp32(*v),
ScalarValue::Float64(Some(v)) => LiteralType::Fp64(*v),
ScalarValue::Int8(Some(v)) => LiteralType::I8(*v as i32),
ScalarValue::Int16(Some(v)) => LiteralType::I16(*v as i32),
ScalarValue::Int32(Some(v)) => LiteralType::I32(*v),
ScalarValue::Int64(Some(v)) => LiteralType::I64(*v),
ScalarValue::LargeUtf8(Some(v)) => LiteralType::String(v.clone()),
ScalarValue::LargeBinary(Some(v)) => LiteralType::Binary(v.clone()),
// TODO(LFC): Implement other conversions: ScalarValue => LiteralType
_ => {
return error::UnsupportedExprSnafu {
name: format!("{:?}", v),
}
.fail()
}
}
})
}
pub(crate) fn literal_type_to_scalar_value(t: LiteralType) -> Result<ScalarValue> {
Ok(match t {
LiteralType::Null(Type { kind: Some(kind) }) => match kind {
Kind::Bool(_) => ScalarValue::Boolean(None),
Kind::I8(_) => ScalarValue::Int8(None),
Kind::I16(_) => ScalarValue::Int16(None),
Kind::I32(_) => ScalarValue::Int32(None),
Kind::I64(_) => ScalarValue::Int64(None),
Kind::Fp32(_) => ScalarValue::Float32(None),
Kind::Fp64(_) => ScalarValue::Float64(None),
Kind::String(_) => ScalarValue::LargeUtf8(None),
Kind::Binary(_) => ScalarValue::LargeBinary(None),
// TODO(LFC): Implement other conversions: Kind => ScalarValue
_ => {
return error::UnsupportedSubstraitTypeSnafu {
ty: format!("{:?}", kind),
}
.fail()
}
},
LiteralType::Boolean(v) => ScalarValue::Boolean(Some(v)),
LiteralType::I8(v) => ScalarValue::Int8(Some(v as i8)),
LiteralType::I16(v) => ScalarValue::Int16(Some(v as i16)),
LiteralType::I32(v) => ScalarValue::Int32(Some(v)),
LiteralType::I64(v) => ScalarValue::Int64(Some(v)),
LiteralType::Fp32(v) => ScalarValue::Float32(Some(v)),
LiteralType::Fp64(v) => ScalarValue::Float64(Some(v)),
LiteralType::String(v) => ScalarValue::LargeUtf8(Some(v)),
LiteralType::Binary(v) => ScalarValue::LargeBinary(Some(v)),
// TODO(LFC): Implement other conversions: LiteralType => ScalarValue
_ => {
return error::UnsupportedSubstraitTypeSnafu {
ty: format!("{:?}", t),
}
.fail()
}
})
}

View File

@@ -147,6 +147,18 @@ impl From<i64> for Timestamp {
}
}
impl From<Timestamp> for i64 {
fn from(t: Timestamp) -> Self {
t.value
}
}
impl From<Timestamp> for serde_json::Value {
fn from(d: Timestamp) -> Self {
serde_json::Value::String(d.to_iso8601_string())
}
}
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum TimeUnit {
Second,
@@ -197,6 +209,7 @@ impl Hash for Timestamp {
#[cfg(test)]
mod tests {
use chrono::Offset;
use serde_json::Value;
use super::*;
@@ -318,4 +331,39 @@ mod tests {
let ts = Timestamp::from_millis(ts_millis);
assert_eq!("1969-12-31 23:59:58.999+0000", ts.to_iso8601_string());
}
#[test]
fn test_serialize_to_json_value() {
assert_eq!(
"1970-01-01 00:00:01+0000",
match serde_json::Value::from(Timestamp::new(1, TimeUnit::Second)) {
Value::String(s) => s,
_ => unreachable!(),
}
);
assert_eq!(
"1970-01-01 00:00:00.001+0000",
match serde_json::Value::from(Timestamp::new(1, TimeUnit::Millisecond)) {
Value::String(s) => s,
_ => unreachable!(),
}
);
assert_eq!(
"1970-01-01 00:00:00.000001+0000",
match serde_json::Value::from(Timestamp::new(1, TimeUnit::Microsecond)) {
Value::String(s) => s,
_ => unreachable!(),
}
);
assert_eq!(
"1970-01-01 00:00:00.000000001+0000",
match serde_json::Value::from(Timestamp::new(1, TimeUnit::Nanosecond)) {
Value::String(s) => s,
_ => unreachable!(),
}
);
}
}

View File

@@ -11,21 +11,22 @@ python = ["dep:script"]
[dependencies]
api = { path = "../api" }
async-trait = "0.1"
axum = "0.6.0-rc.2"
axum-macros = "0.3.0-rc.1"
axum = "0.6"
axum-macros = "0.3"
backon = "0.2"
catalog = { path = "../catalog" }
common-base = { path = "../common/base" }
common-catalog = { path = "../common/catalog" }
common-error = { path = "../common/error" }
common-grpc = { path = "../common/grpc" }
common-grpc-expr = { path = "../common/grpc-expr" }
common-query = { path = "../common/query" }
common-recordbatch = { path = "../common/recordbatch" }
common-runtime = { path = "../common/runtime" }
common-telemetry = { path = "../common/telemetry" }
common-time = { path = "../common/time" }
common-insert = { path = "../common/insert" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
"simd",
"simd",
] }
datatypes = { path = "../datatypes" }
futures = "0.3"
@@ -34,60 +35,32 @@ log-store = { path = "../log-store" }
meta-client = { path = "../meta-client" }
meta-srv = { path = "../meta-srv", features = ["mock"] }
metrics = "0.20"
mito = { path = "../mito", features = ["test"] }
object-store = { path = "../object-store" }
query = { path = "../query" }
script = { path = "../script", features = ["python"], optional = true }
serde = "1.0"
serde_json = "1.0"
servers = { path = "../servers" }
session = { path = "../session" }
snafu = { version = "0.7", features = ["backtraces"] }
sql = { path = "../sql" }
storage = { path = "../storage" }
store-api = { path = "../store-api" }
substrait = { path = "../common/substrait" }
table = { path = "../table" }
mito = { path = "../mito", features = ["test"] }
tokio = { version = "1.18", features = ["full"] }
tokio-stream = { version = "0.1", features = ["net"] }
tonic = "0.8"
tower = { version = "0.4", features = ["full"] }
tower-http = { version = "0.3", features = ["full"] }
frontend = { path = "../frontend" }
[dependencies.arrow]
package = "arrow2"
version = "0.10"
features = [
"io_csv",
"io_json",
"io_parquet",
"io_parquet_compression",
"io_ipc",
"ahash",
"compute",
"serde_types",
]
[dev-dependencies]
axum-test-helper = { git = "https://github.com/sunng87/axum-test-helper.git", branch = "patch-1" }
client = { path = "../client" }
common-query = { path = "../common/query" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
"simd",
"simd",
] }
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
tempdir = "0.3"
[dev-dependencies.arrow]
package = "arrow2"
version = "0.10"
features = [
"io_csv",
"io_json",
"io_parquet",
"io_parquet_compression",
"io_ipc",
"ahash",
"compute",
"serde_types",
]

Some files were not shown because too many files have changed in this diff Show More