Compare commits

...

65 Commits

Author SHA1 Message Date
shuiyisong
179ff728df refactor: merge servers::context into session (#811)
* refactor: move context to session

* chore: add unit test

* chore: add pg, opentsdb, influxdb and prometheus to channel enum
2022-12-31 00:00:04 +08:00
Yingwen
4d56d896ca feat: Implement delete for the storage engine (#777)
* docs: Fix incorrect comment of Vector::only_null

* feat: Add delete to WriteRequest and WriteBatch

* feat: Filter deleted rows

* fix: Fix panic after reopening engine

This is detected by adding a reopen step to the delete test for region.

* fix: Fix OpType::min_type()

* test: Add delete absent key test

* chore: Address CR comments
2022-12-30 17:12:18 +08:00
discord9
6fe205f3b5 chore: Update RustPython(With GC) (#809)
* chore: use newest RustPython

* chore: use Garbage collected RustPython Fork

* style: format toml
2022-12-30 16:55:43 +08:00
LFC
d13de0aeba refactor: remove AdminExpr, make DDL expressions as normal GRPC requests (#808)
* refactor: remove AdminExpr, make DDL expressions as normal GRPC requests
2022-12-30 16:47:45 +08:00
zyy17
11194f37d4 build: install ca-certificates in docker image building (#807)
refactor: install ca-certificates in docker image building

Signed-off-by: zyy17 <zyylsxm@gmail.com>

Signed-off-by: zyy17 <zyylsxm@gmail.com>
2022-12-30 14:56:39 +08:00
LFC
de6803d253 feat: handle InsertRequest(formerly InsertExpr) in new Arrow Flight (#800)
feat: handle InsertRequest(formerly InsertExpr) in new Arrow Flight interface
2022-12-30 10:24:09 +08:00
Ruihang Xia
d0ef3aa9eb docs: align Jeremy Clarkson to the right side (#804)
docs: align Jeremy Clarkson to right side

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-12-29 16:55:38 +08:00
LFC
04df80e640 fix: further ease the restriction of executing SQLs in new GRPC interface (#797)
* fix: carry not recordbatch result in FlightData, to allow executing SQLs other than selection in new GRPC interface

* Update src/datanode/src/instance/flight/stream.rs

Co-authored-by: Jiachun Feng <jiachun_feng@proton.me>
2022-12-28 16:43:21 +08:00
fys
76236646ef chore: extract some functions from "bootstrap_meta_srv" function (#795)
refactor: bootstrap of meta
2022-12-28 14:29:52 +08:00
LFC
26848f9f5c feat: Replace SelectResult with FlightData (#776)
* feat: replace SelectResult with FlightData

* Update tests/runner/src/env.rs

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
2022-12-28 10:22:46 +08:00
Ruihang Xia
90990584b7 feat: Prom SeriesNormalize plan (#787)
* feat: impl SeriesNormalize plan

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* some tests

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* feat: add metrics

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add license header

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* resolve CR comments

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update tests

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* make time index column a parameter

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* precompute time index column index

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* sign the TODO

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-12-27 22:59:53 +08:00
LFC
a14ec94653 fix: ease the restriction of the original "SelectExpr" (#794)
fix: ease the restriction of the original "SelectExpr" since we used to pass SQLs other than selection in the related GRPC interface
2022-12-27 16:50:12 +08:00
Ruihang Xia
26a3e93ca7 chore: util workspace deps in more places (#792)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-12-27 16:26:59 +08:00
elijah
3978931b8e feat: support parsing the RENAME TABLE statements in the parser (#780)
* feat: add parsing `alter rename table` syntax to the parser

* chore: fix clippy

* chore: add test for parser

* fix: add test for parsing RENAME keyword

* chore: remove unused code

* fix: parse table name object

Co-authored-by: Yingwen <realevenyag@gmail.com>

* chore: fmt code

Co-authored-by: Yingwen <realevenyag@gmail.com>
2022-12-27 14:53:40 +08:00
shuiyisong
d589de63ef feat: pub auth_mysql & add auth boxed err (#788)
* chore: minor openup

* chore: open up auth_mysql and return ()

* chore: typo change

* chore: change according to ci

* chore: change according to ci

* chore: remove tonic status in auth error
2022-12-27 11:04:05 +08:00
LFC
7829e4a219 feat: Implement Arrow Flight Service (except gRPC server) for selection (#768)
* feat: Implement Arrow Flight Service (but not the GRPC server) for selection

Co-authored-by: luofucong <luofucong@greptime.com>
2022-12-26 16:41:10 +08:00
Mike Yang
bc9a46dbb7 feat: support varbinary (#767)
feat: support varbinary for table creation and record insertion
2022-12-26 13:14:12 +08:00
Ruihang Xia
a61e96477b docs: RFC of promql (#779)
* docs: RFC of promql

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* docs: change styles, list drawback of misusing arrow

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-12-26 13:12:24 +08:00
Yingwen
f8500e54c1 refactor: Remove PutOperation and Simplify WriteRequest API (#775)
* chore: Remove unused MutationExtra

* refactor(storage): Refactor Mutation and Payload

Change Mutation from enum to a struct that holds op type and record
batches so the encoder don't need to convert the mutation into record
batch. Now The Payload is no more an enum, it just holds the data, to
be serialized to the WAL, of the WriteBatch. The encoder and decoder
now deal with the Payload instead of the WriteBatch, so we could hold
more information not necessary to be stored to the WAL in the
WriteBatch.

This commit also merge variants in write_batch::Error to storage::Error
as some variants of them denote the same error.

* test(storage): Pass all tests in storage

* chore: Remove unused codes then format codes

* test(storage): Fix test_put_unknown_column test

* style(storage): Fix clippy

* chore: Remove some unused codes

* chore: Rebase upstream and fix clippy

* chore(storage): Remove unused codes

* chore(storage): Update comments

* feat: Remove PayloadType from wal.proto

* chore: Address CR comments

* chore: Remove unused write_batch.proto
2022-12-26 13:11:24 +08:00
discord9
e85780b5e4 refactor: rename some mod.rs to <MOD_NAME>.rs (#784)
* refactor: rename `mod.rs` to <MOD_NAME>.rs

* refactor: not rename mod.rs in benches/
2022-12-26 12:48:34 +08:00
Ning Sun
11bdb33d37 feat: sql query interceptor and plugin refactoring (#773)
* feat: let instance hold plugins

* feat: add sql query interceptor definition

* docs: add comments to key apis

* feat: add implementation for pre-parsing and post-parsing

* feat: add post_execute hook

* test: add tests for interceptor

* chore: add license header

* fix: clippy error

* Update src/cmd/src/frontend.rs

Co-authored-by: LFC <bayinamine@gmail.com>

* refactor: batching post_parsing calls

* refactor: rename AnyMap2 to Plugins

* feat: call pre_execute with logical plan empty at the moment

Co-authored-by: LFC <bayinamine@gmail.com>
2022-12-23 15:22:12 +08:00
LFC
1daba75e7b refactor: use "USE" keyword (#785)
Co-authored-by: luofucong <luofucong@greptime.com>
2022-12-23 14:29:47 +08:00
LFC
dc52a51576 chore: upgrade to Arrow 29.0 and use workspace package and dependencies (#782)
* chore: upgrade to Arrow 29.0 and use workspace package and dependencies

* fix: resolve PR comments

Co-authored-by: luofucong <luofucong@greptime.com>
2022-12-23 14:28:37 +08:00
Ruihang Xia
26af9e6214 ci: setup secrets for setup-protoc job (#783)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-12-23 11:36:39 +08:00
fys
e07791c5e8 chore: make election mod public (#781) 2022-12-22 17:32:35 +08:00
Yingwen
b6d29afcd1 ci: Use lld for coverage (#778)
* ci: Use lld for coverage

* style: Fix clippy
2022-12-22 16:10:37 +08:00
LFC
ea9af42091 chore: upgrade Rust to nightly 2022-12-20 (#772)
* chore: upgrade Rust to nightly 2022-12-20

* chore: upgrade Rust to nightly 2022-12-20

Co-authored-by: luofucong <luofucong@greptime.com>
2022-12-21 19:32:30 +08:00
shuiyisong
d0ebcc3b5a chore: open userinfo constructor (#774) 2022-12-21 17:58:43 +08:00
LFC
77182f5024 chore: upgrade Arrow to version 28, and DataFusion to 15 (#771)
Co-authored-by: luofucong <luofucong@greptime.com>
2022-12-21 17:02:11 +08:00
Ning Sun
539ead5460 feat: check database existence on http api (#764)
* feat: check database existance on http api

* Update src/servers/src/http/handler.rs

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

* feat: use database not found status code

* test: add assertion for status code

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
2022-12-21 10:28:45 +08:00
Ruihang Xia
bc0e4e2cb0 fix: fill NULL based on row_count (#765)
* fix: fill NULL based on row_count

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* simplify code

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: replace set_len with resize

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-12-20 12:12:48 +08:00
Ruihang Xia
7d29670c86 fix: consider null mask in sqlness display util (#763)
* fix: consider null mask in sqlness display util

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add test case

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix test case

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* change placeholder to null

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-12-19 14:20:28 +08:00
LFC
afd88dd53a fix: test_dist_table_scan block (#761)
* fix: `test_dist_table_scan` block

* fix: resolve PR comments

Co-authored-by: luofucong <luofucong@greptime.com>
2022-12-19 11:20:51 +08:00
Ning Sun
efd85df6be feat: add schema check on postgres startup (#758)
* feat: add schema check on postgres startup

* chore: update pgwire to 0.6.3

* test: add test for unspecified db
2022-12-19 10:53:44 +08:00
Ning Sun
ea1896493b feat: allow multiple sql statements in query string (#699)
* feat: allow multiple sql statement in query string

* test: add a test for multiple statement call

* feat: add temprary workaround for standalone mode

* fix: resolve sql parser issue temporarily

* Update src/datanode/src/instance/sql.rs

Co-authored-by: Yingwen <realevenyag@gmail.com>

* fix: adopt new sql handler

* refactor: revert changes in query engine

* refactor: assume sql-statement 1-1 on datanode

* test: use frontend for integration test

* refactor: add statement execution api for explicit single statement call

* fix: typo

* refactor: rename query method

* test: add test case for error

* test: data type change adoption

* chore: add todo from review

* chore: remove obsolete comments

* fix: resolve resolve issues

Co-authored-by: Yingwen <realevenyag@gmail.com>
2022-12-16 19:50:20 +08:00
Jiachun Feng
66bca11401 refactor: remove optional from the protos (#756) 2022-12-16 15:47:51 +08:00
Yingwen
7c16a4a17b refactor(storage): Move write_batch::codec to a separate file (#757)
* refactor(storage): Move write_batch::codec to a separate file

* chore: move new_test_batch to write_batch mod
2022-12-16 15:32:59 +08:00
dennis zhuang
28bd7404ad feat: change column's default property to nullable (#751)
* feat: change column's default property to nullable

* chore: use all instead of any

* fix: compile error

* fix: dependencies order in cargo
2022-12-16 11:17:01 +08:00
Lei, HUANG
0653301754 feat: replace arrow2 with official implementation 🎉 (#753)
* chore: kick off. change datafusion/arrow/parquet to target version

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* chore: replace one last datafusion dep

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* feat: arrow_array switch to arrow

* chore: update dep of binary vector

* chore: fix wrong merge commit

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* feat: Switch to datatypes2

* feat: Make recordbatch compile

* chore: sort Cargo.toml

* feat: Fix common::recordbatch compiler errors

* feat: Fix recordbatch test compiling issue

* fix: api crate (#708)

* fix: rename ConcreteDataType::timestamp_millis_type to ConcreteDataType::timestamp_millisecond_type. fix other warnings regarding timestamp

* fix: revert changes in datatypes2

* fix: helper

* chore: delete datatypes based on arrow2

* feat: Fix some compiler errors in common::query (#710)

* feat: Fix some compiler errors in common::query

* feat: test_collect use vectors api

* fix: common-query subcrate (#712)

* fix: record batch adapter

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix error enum

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: Fix common::query compiler errors (#713)

* feat: Move conversion to ScalarValue to value.rs

* fix: Fix common::query compiler errors

This commit also make InnerError pub(crate)

* feat: Implements diff accumulator using WrapperType (#715)

* feat: Remove usage of opaque error from common::recordbatch

* feat: Remove opaque error from common::query

* feat: Fix diff compiler errors

Now common_function just use common_query's Error and Result. Adds
a LargestType associated type to LogicalPrimitiveType to get the largest
type a logical primitive type can cast to.

* feat: Remove LargestType from NativeType trait

* chore: Update comments

* feat: Restrict Scalar::RefType of WrapperType to itself

Add trait bound `for<'a> Scalar<RefType<'a> = Self>` to WrapperType

* chore: Address CR comments

* chore: Format codes

* fix: fix compile error for mean/polyval/pow/interp ops

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* Revert "fix: fix compile error for mean/polyval/pow/interp ops"

This reverts commit fb0b4eb826.

* fix: Fix compiler errors in argmax/rate/median/norm_cdf (#716)

* fix: Fix compiler errors in argmax/rate/median/norm_cdf

* chore: Address CR comments

* fix: fix compile error for mean/polyval/pow/interp ops (#717)

* fix: fix compile error for mean/polyval/pow/interp ops

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* simplify type bounds

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: fix argmin/percentile/clip/interp/scipy_stats_norm_pdf errors (#718)

fix: fix argmin/percentile/clip/interp/scipy_stats_norm_pdf compiler errors

* fix: fix other compile error in common-function (#719)

* further fixing

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix all compile errors in common function

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: Fix tests and clippy for common-function subcrate (#726)

* further fixing

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix all compile errors in common function

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix tests

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* revert test changes

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: row group pruning (#725)

* fix: row group pruning

* chore: use macro to simplify stats implemetation

* fxi: CR comments

* fix: row group metadata length mismatch

* fix: simplify code

* fix: Fix common::grpc compiler errors (#722)

* fix: Fix common::grpc compiler errors

This commit refactors RecordBatch and holds vectors in the RecordBatch
struct, so we don't need to cast the array to vector when doing
serialization or iterating the batch.

Now we use the vector API instead of the arrow API in grpc crate.

* chore: Address CR comments

* fix common record batch

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: Fix compile error in server subcrate (#727)

* fix: Fix compile error in server subcrate

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove unused type alias

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* explicitly panic

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* Update src/storage/src/sst/parquet.rs

Co-authored-by: Yingwen <realevenyag@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>

* fix: Fix common grpc expr (#730)

* fix compile errors

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* rename fn names

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix styles

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix wranings in common-time

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: pre-cast to avoid tremendous match arms (#734)

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* feat: upgrade storage crate to arrow and parquet offcial impl (#738)

* fix: compile erros

* fix: parquet reader and writer

* fix: parquet reader and writer

* fix: WriteBatch IPC encode/decode

* fix: clippy errors in storage subcrate

* chore: remove suspicious unwrap

* fix: some cr comments

* fix: CR comments

* fix: CR comments

* fix: Fix compiler errors in catalog and mito crates (#742)

* fix: Fix compiler errors in mito

* fix: Fix compiler errors in catalog crate

* style: Fix clippy

* chore: Fix use

* Merge pull request #745

* fix nyc-taxi and util

* Merge branch 'replace-arrow2' into fix-others

* fix substrait

* fix warnings and error in test

* fix: Fix imports in optimizer.rs

* fix: errors in optimzer

* fix: remove unwrap

* fix: Fix compiler errors in query crate (#746)

* fix: Fix compiler errors in state.rs

* fix: fix compiler errors in state

* feat: upgrade sqlparser to 0.26

* fix: fix datafusion engine compiler errors

* fix: Fix some tests in query crate

* fix: Fix all warnings in tests

* feat: Remove `Type` from timestamp's type name

* fix: fix query tests

Now datafusion already supports median, so this commit also remove the
median function

* style: Fix clippy

* feat: Remove RecordBatch::pretty_print

* chore: Address CR comments

* Update src/query/src/query_engine/state.rs

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

* fix: frontend compile errors (#747)

fix: fix compile errors in frontend

* fix: Fix compiler errors in script crate (#749)

* fix: Fix compiler errors in state.rs

* fix: fix compiler errors in state

* feat: upgrade sqlparser to 0.26

* fix: fix datafusion engine compiler errors

* fix: Fix some tests in query crate

* fix: Fix all warnings in tests

* feat: Remove `Type` from timestamp's type name

* fix: fix query tests

Now datafusion already supports median, so this commit also remove the
median function

* style: Fix clippy

* feat: Remove RecordBatch::pretty_print

* chore: Address CR comments

* feat: Add column_by_name to RecordBatch

* feat: modify select_from_rb

* feat: Fix some compiler errors in vector.rs

* feat: Fix more compiler errors in vector.rs

* fix: fix table.rs

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: Fix compiler errors in coprocessor

* fix: Fix some compiler errors

* fix: Fix compiler errors in script

* chore: Remove unused imports and format code

* test: disable interval tests

* test: Fix test_compile_execute test

* style: Fix clippy

* feat: Support interval

* feat: Add RecordBatch::columns and fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

* fix: Fix All The Tests! (#752)

* fix: Fix several tests compile errors

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: some compile errors in tests

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: compile errors in frontend tests

* fix: compile errors in frontend tests

* test: Fix tests in api and common-query

* test: Fix test in sql crate

* fix: resolve substrait error

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* chore: add more test

* test: Fix tests in servers

* fix instance_test

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* test: Fix tests in tests-integration

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Lei, HUANG <mrsatangel@gmail.com>
Co-authored-by: evenyag <realevenyag@gmail.com>

* fix: clippy errors

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: evenyag <realevenyag@gmail.com>
2022-12-15 18:49:12 +08:00
LFC
61d8bc2ea1 refactor(frontend): minor changes around FrontendInstance constructor (#748)
* refactor: minor changes in some testing codes

Co-authored-by: luofucong <luofucong@greptime.com>
2022-12-15 14:34:40 +08:00
Ruihang Xia
e3785fca70 docs: change logo in readme automatically based on github theme (#743)
* docs: adaptive logo on theme

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* switch logos

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* aligh center

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* adjust stylet

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* use new logo image

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-12-14 19:32:51 +08:00
shuiyisong
fda9e80cbf feat: impl static_user_provider (#739)
* feat: add MemUserProvider and impl auth

* feat: impl user_provider option in fe and standalone mode

* chore: add file impl for mem provider

* chore: remove mem opts

* chore: minor change

* chore: refac pg server to use user_provider as indicator for using pwd auth

* chore: fix test

* chore: extract common code

* chore: add unit test

* chore: rebase develop

* chore: add user provider to http server

* chore: minor rename

* chore: change to ref when convert to anymap

* chore: fix according to clippy

* chore: remove clone on startcommand

* chore: fix cr issue

* chore: update tempdir use

* chore: change TryFrom to normal func while parsing anymap

* chore: minor change

* chore: remove to_lowercase
2022-12-14 16:38:29 +08:00
Lei, HUANG
756c068166 feat: logstore compaction (#740)
* feat: add benchmark for wal

* add bin

* feat: impl wal compaction

* chore: This reverts commit ef9f2326

* chore: This reverts commit 9142ec0e

* fix: remove empty files

* fix: failing tests

* fix: CR comments

* fix: Mark log as stable after writer applies manifest

* fix: some cr comments and namings

* chore: rename all stable_xxx to obsolete_xxx

* chore: error message
2022-12-14 16:15:29 +08:00
dennis zhuang
6a4e2e5975 feat: promql create and skeleton (#720)
* feat: adds promql crate

* feat: adds promql-parser dependency and rfc doc

* fix: dependencies order in servers crate

* fix: forgot error.rs

* fix: comment

* fix: license header

* fix: remove docs/rfc/20221207_promql.md
2022-12-13 17:08:22 +08:00
Lei, HUANG
9ad6ddb26e fix: remove useless metaclient field from datanode Instance (#744) 2022-12-13 14:26:26 +08:00
fys
c5661ee362 feat: support http basic authentication (#733)
* feat: support http auth

* add some unit test and log

* fix

* cr

* remove unused #[derive(Clone)]
2022-12-13 10:44:33 +08:00
zyy17
9b093463cc feat: add Makefile to aggregate the commands that developers always use (#736)
* feat: add Makefile to aggregate the commands that developers always use

* refactor: add 'clean' and 'unit-test' target

* refactor: add sqlness-test target and modify some decriptions format

Signed-off-by: zyy17 <zyylsxm@gmail.com>
2022-12-12 13:03:49 +08:00
zyy17
61e0f1a11c refactor: add tls option in frontend cli options (#735)
* refactor: add tls option in frontend cli options

* fix: add 'Eq' trait for fixing clippy error

* fix: remove redundant clone

Signed-off-by: zyy17 <zyylsxm@gmail.com>
2022-12-12 10:02:17 +08:00
Ning Sun
249ebc6937 feat: update pgwire and refactor pg auth handler (#732) 2022-12-09 17:01:55 +08:00
elijah
c1b8981f61 refactor(mito): change the table path to schema/table_id (#728)
refactor: change the table path to `schema/table_id`
2022-12-09 12:59:16 +08:00
Jiachun Feng
949cd3e3af feat: move_value & delete_route (#707)
* feat: move_value & delete_route

* chore: minor refactor

* chore: refactor unit test of metaclient

* chore: map to kv

* Update src/meta-srv/src/service/router.rs

Co-authored-by: Yingwen <realevenyag@gmail.com>

* Update src/meta-srv/src/service/router.rs

Co-authored-by: Yingwen <realevenyag@gmail.com>

* chore: by code review

Co-authored-by: Yingwen <realevenyag@gmail.com>
2022-12-09 11:07:48 +08:00
SSebo
b26982c5d7 feat: support timestamp new syntax (#697)
* feat: support timestamp new syntax

* fix: not null at end of new time stamp index syntax

* chore: simplify code
2022-12-09 10:52:14 +08:00
fys
4fdf26810c feat: support auth in frontend (#688)
* feat: add UserProvider trait

* chore: minor fix

* support pg mysql

* refactor and add some logs

* chore: add license

Co-authored-by: shuiyisong <xixing.sys@gmail.com>
2022-12-08 11:51:52 +08:00
dennis zhuang
7f59758e69 feat: bump opendal version to 0.22 (#721)
* feat: bump opendal version to 0.22

* fix: LoggingLayer
2022-12-08 11:19:21 +08:00
Zheming Li
a521ab5041 fix: set default value when fail to get git info instead of panic (#696)
fix: set default value when fail to git info instead of panic
2022-12-07 13:16:27 +08:00
LFC
833216d317 refactor: directly invoke Datanode methods in standalone mode (part 1) (#694)
* refactor: directly invoke Datanode methods in standalone mode

* test: add more unit tests

* fix: get rid of `println` in testing codes

* fix: resolve PR comments

* fix: resolve PR comments

Co-authored-by: luofucong <luofucong@greptime.com>
2022-12-07 11:37:59 +08:00
Ruihang Xia
90c832b33d refactor: drop support of physical plan query interface (#714)
* refactor: drop support of physical plan query interface

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* refactor: collapse server/grpc sub-module

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* refactor: remove unused errors

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-12-06 19:23:32 +08:00
LFC
8959dbcef8 feat: Substrait logical plan (#704)
* feat: use Substrait logical plan to query data from Datanode in Frontend in distributed mode

* fix: resolve PR comments

* fix: resolve PR comments

* fix: resolve PR comments

Co-authored-by: luofucong <luofucong@greptime.com>
2022-12-06 19:21:57 +08:00
discord9
2034b40f33 chore: update RustPython dependence(With a tweaked fork) (#655)
* refactor: update RsPy

* depend: add `rustpython-pylib`

* feat: add_frozen stdlib for every vm init

* feat: limit stdlib to a selected few

* chore: use `rev` instead of branch` im depend

* refactor: rename to allow_list

* feat: use opt level one

* doc: add username for TODO&change optimize to 0

* style: fmt .toml
2022-12-06 14:15:00 +08:00
SSebo
55e6be7af1 fix: test_server_require_secure_client_secure (#701) 2022-12-06 10:38:54 +08:00
discord9
f9bfb121db feat: add rate() udf (#508)
* feat: rewrite `rate` UDF

* feat: rename to `prom_rate`

* refactor: solve conflict&add license

* refactor: import arrow
2022-12-06 10:30:13 +08:00
Ruihang Xia
6fb413ae50 ci: add toml format linter (#706)
* chore: run taplo format

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* ci: add workflow to check toml

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* rerun formatter with ident to 4 spaces

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update check command

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2022-12-05 20:03:10 +08:00
Ruihang Xia
beb07fc895 feat: new datatypes subcrate based on the official arrow (#705)
* feat: Init datatypes2 crate

* chore: Remove some unimplemented types

* feat: Implements PrimitiveType and PrimitiveVector for datatypes2 (#633)

* feat: Implement primitive types and vectors

* feat: Implement a wrapper type

* feat: Remove VectorType from ScalarRef

* feat: Move some trait bound from NativeType to WrapperType

* feat: pub use  primitive vectors and builders

* feat: Returns error in try_from when type mismatch

* feat: Impl PartialEq for some vectors

* test: Pass vector tests

* chore: Add license header

* test: Pass more vector tests

* feat: Implement some methods of vector Helper

* test: Pass more tests

* style: Fix clippy

* chore: Add license header

* feat: Remove IntoValueRef trait

* feat: Add NativeType trait bound to WrapperType::Native

* docs: Explain what is wrapper type

* chore: Fix typos

* refactor: LogicalPrimitiveType::type_name returns str

* feat: Implements DateType and DateVector (#651)

* feat: Implement DateType and DateVector

* test: Pass more value and data type tests

* chore: Address CR comments

* test: Skip list value test

* feat: datatypes2 datetime (#661)

* feat: impl DateTime type and vector

* fix: add license header

* fix: CR comments and add more tests

* fix: customized serialization for wrapper type

* feat: Implements NullType and NullVector (#658)

* feat: Implements NullType and NullVector

* chore: Address CR comment

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

* chore: Address CR comment

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

* feat: Implements StringType and StringVector (#659)

* feat: implement string vector

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add more test and from

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* cover NUL

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* feat: impl datatypes2/timestamp (#686)

* feat: add timestamp datatype and vectors

* fix: cr comments and reformat code

* chore: add some tests

* feat: Implements ListType and ListVector (#681)

* feat: Implement ListType and ListVector

* test: Pass more tests

* style: Fix clippy

* chore: Fix comment

* chore: Address CR comments

* feat: impl constant vector (#680)

* feat: impl constant vector

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix tests

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* Apply suggestions from code review

Co-authored-by: Yingwen <realevenyag@gmail.com>

* rename fn names

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove println

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>

* feat: Implements Validity (#684)

* feat: Implements Validity

* chore: remove pub from sub mod in vectors

* feat: Implements schema for datatypes2 (#695)

* feat: Add is_timestamp_compatible to DataType

* feat: Implement ColumnSchema and Schema

* feat: Impl RawSchema

* chore: Remove useless codes and run more tests

* chore: Fix clippy

* feat: Impl from_arrow_time_unit and pass schema tests

* chore: add more tests for timestamp (#702)

* chore: add more tests for timestamp

* chore: add replicate test for timestamps

* feat: Implements helper methods for vectors/values (#703)

* feat: Implement helper methods for vectors/values

* chore: Address CR comments

* chore: add more test for timestamp

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: evenyag <realevenyag@gmail.com>
Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>
Co-authored-by: Lei, HUANG <mrsatangel@gmail.com>
2022-12-05 19:59:23 +08:00
Ning Sun
4275e47bdb refactor: use updated mysql_async client (#698) 2022-12-05 11:18:32 +08:00
dennis zhuang
6720bc5f7c fix: validate create table request in mito engine (#690)
* fix: validate create table request in mito engine

* fix: comment

* chore: remove TIMESTAMP_INDEX in system.rs
2022-12-05 11:01:43 +08:00
447 changed files with 18870 additions and 15757 deletions

View File

@@ -24,7 +24,7 @@ on:
name: Code coverage
env:
RUST_TOOLCHAIN: nightly-2022-07-14
RUST_TOOLCHAIN: nightly-2022-12-20
jobs:
coverage:
@@ -34,6 +34,11 @@ jobs:
steps:
- uses: actions/checkout@v3
- uses: arduino/setup-protoc@v1
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: KyleMayes/install-llvm-action@v1
with:
version: "14.0"
- name: Install toolchain
uses: dtolnay/rust-toolchain@master
with:
@@ -48,6 +53,7 @@ jobs:
- name: Collect coverage data
run: cargo llvm-cov nextest --workspace --lcov --output-path lcov.info
env:
CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=lld"
RUST_BACKTRACE: 1
CARGO_INCREMENTAL: 0
GT_S3_BUCKET: ${{ secrets.S3_BUCKET }}

View File

@@ -23,7 +23,7 @@ on:
name: CI
env:
RUST_TOOLCHAIN: nightly-2022-07-14
RUST_TOOLCHAIN: nightly-2022-12-20
jobs:
typos:
@@ -41,6 +41,8 @@ jobs:
steps:
- uses: actions/checkout@v3
- uses: arduino/setup-protoc@v1
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ env.RUST_TOOLCHAIN }}
@@ -49,6 +51,23 @@ jobs:
- name: Run cargo check
run: cargo check --workspace --all-targets
toml:
name: Toml Check
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ env.RUST_TOOLCHAIN }}
- name: Rust Cache
uses: Swatinem/rust-cache@v2
- name: Install taplo
run: cargo install taplo-cli --version ^0.8 --locked
- name: Run taplo
run: taplo format --check --option "indent_string= "
# Use coverage to run test.
# test:
# name: Test Suite
@@ -64,6 +83,8 @@ jobs:
# path: ./llvm
# key: llvm
# - uses: arduino/setup-protoc@v1
# with:
# repo-token: ${{ secrets.GITHUB_TOKEN }}
# - uses: KyleMayes/install-llvm-action@v1
# with:
# version: "14.0"
@@ -97,6 +118,8 @@ jobs:
steps:
- uses: actions/checkout@v3
- uses: arduino/setup-protoc@v1
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ env.RUST_TOOLCHAIN }}
@@ -114,6 +137,8 @@ jobs:
steps:
- uses: actions/checkout@v3
- uses: arduino/setup-protoc@v1
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ env.RUST_TOOLCHAIN }}

View File

@@ -10,7 +10,7 @@ on:
name: Release
env:
RUST_TOOLCHAIN: nightly-2022-07-14
RUST_TOOLCHAIN: nightly-2022-12-20
# FIXME(zyy17): Would be better to use `gh release list -L 1 | cut -f 3` to get the latest release version tag, but for a long time, we will stay at 'v0.1.0-alpha-*'.
SCHEDULED_BUILD_VERSION_PREFIX: v0.1.0-alpha

2799
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -26,6 +26,7 @@ members = [
"src/meta-srv",
"src/mito",
"src/object-store",
"src/promql",
"src/query",
"src/script",
"src/servers",
@@ -34,9 +35,35 @@ members = [
"src/storage",
"src/store-api",
"src/table",
"tests-integration"
,
"tests/runner"]
"tests-integration",
"tests/runner",
]
[workspace.package]
version = "0.1.0"
edition = "2021"
license = "Apache-2.0"
[workspace.dependencies]
arrow = "29.0"
arrow-flight = "29.0"
arrow-schema = { version = "29.0", features = ["serde"] }
async-stream = "0.3"
async-trait = "0.1"
# TODO(LFC): Use released Datafusion when it officially dpendent on Arrow 29.0
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "4917235a398ae20145c87d20984e6367dc1a0c1e" }
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "4917235a398ae20145c87d20984e6367dc1a0c1e" }
datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "4917235a398ae20145c87d20984e6367dc1a0c1e" }
datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "4917235a398ae20145c87d20984e6367dc1a0c1e" }
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "4917235a398ae20145c87d20984e6367dc1a0c1e" }
datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "4917235a398ae20145c87d20984e6367dc1a0c1e" }
futures = "0.3"
parquet = "29.0"
paste = "1.0"
serde = { version = "1.0", features = ["derive"] }
snafu = { version = "0.7", features = ["backtraces"] }
sqlparser = "0.28"
tokio = { version = "1", features = ["full"] }
[profile.release]
debug = true

67
Makefile Normal file
View File

@@ -0,0 +1,67 @@
IMAGE_REGISTRY ?= greptimedb
IMAGE_TAG ?= latest
##@ Build
.PHONY: build
build: ## Build debug version greptime.
cargo build
.PHONY: release
release: ## Build release version greptime.
cargo build --release
.PHONY: clean
clean: ## Clean the project.
cargo clean
.PHONY: fmt
fmt: ## Format all the Rust code.
cargo fmt --all
.PHONY: docker-image
docker-image: ## Build docker image.
docker build --network host -f docker/Dockerfile -t ${IMAGE_REGISTRY}:${IMAGE_TAG} .
##@ Test
.PHONY: unit-test
unit-test: ## Run unit test.
cargo test --workspace
.PHONY: integration-test
integration-test: ## Run integation test.
cargo test integration
.PHONY: sqlness-test
sqlness-test: ## Run sqlness test.
cargo run --bin sqlness-runner
.PHONY: check
check: ## Cargo check all the targets.
cargo check --workspace --all-targets
.PHONY: clippy
clippy: ## Check clippy rules.
cargo clippy --workspace --all-targets -- -D warnings -D clippy::print_stdout -D clippy::print_stderr
.PHONY: fmt-check
fmt-check: ## Check code format.
cargo fmt --all -- --check
##@ General
# The help target prints out all targets with their descriptions organized
# beneath their categories. The categories are represented by '##@' and the
# target descriptions by '##'. The awk commands is responsible for reading the
# entire set of makefiles included in this invocation, looking for lines of the
# file as xyz: ## something, and then pretty-format the target and help. Then,
# if there's a line with ##@ something, that gets pretty-printed as a category.
# More info on the usage of ANSI control characters for terminal formatting:
# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters
# More info on the awk command:
# https://linuxcommand.org/lc3_adv_awk.php
.PHONY: help
help: ## Display help messages.
@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m<target>\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-20s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)

View File

@@ -1,7 +1,12 @@
<p align="center">
<img src="/docs/logo-text-padding.png" alt="GreptimeDB Logo" width="400px"></img>
<picture>
<source media="(prefers-color-scheme: light)" srcset="/docs/logo-text-padding.png">
<source media="(prefers-color-scheme: dark)" srcset="/docs/logo-text-padding-dark.png">
<img alt="GreptimeDB Logo" src="/docs/logo-text-padding.png" width="400px">
</picture>
</p>
<h3 align="center">
The next-generation hybrid timeseries/analytics processing database in the cloud
</h3>

View File

@@ -1,14 +1,14 @@
[package]
name = "benchmarks"
version = "0.1.0"
edition = "2021"
license = "Apache-2.0"
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
arrow = "10"
arrow.workspace = true
clap = { version = "4.0", features = ["derive"] }
client = { path = "../src/client" }
indicatif = "0.17.1"
itertools = "0.10.5"
parquet = { version = "*" }
tokio = { version = "1.21", features = ["full"] }
parquet.workspace = true
tokio.workspace = true

View File

@@ -15,26 +15,21 @@
//! Use the taxi trip records from New York City dataset to bench. You can download the dataset from
//! [here](https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page).
#![feature(once_cell)]
#![allow(clippy::print_stdout)]
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::Instant;
use arrow::array::{ArrayRef, PrimitiveArray, StringArray, TimestampNanosecondArray};
use arrow::datatypes::{DataType, Float64Type, Int64Type};
use arrow::record_batch::RecordBatch;
use clap::Parser;
use client::admin::Admin;
use client::api::v1::column::Values;
use client::api::v1::{Column, ColumnDataType, ColumnDef, CreateExpr, InsertExpr};
use client::{Client, Database, Select};
use client::api::v1::{Column, ColumnDataType, ColumnDef, CreateTableExpr, InsertRequest, TableId};
use client::{Client, Database};
use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
use parquet::arrow::{ArrowReader, ParquetFileArrowReader};
use parquet::file::reader::FileReader;
use parquet::file::serialized_reader::SerializedFileReader;
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
use tokio::task::JoinSet;
const DATABASE_NAME: &str = "greptime";
@@ -86,21 +81,25 @@ async fn write_data(
pb_style: ProgressStyle,
) -> u128 {
let file = std::fs::File::open(&path).unwrap();
let file_reader = Arc::new(SerializedFileReader::new(file).unwrap());
let row_num = file_reader.metadata().file_metadata().num_rows();
let record_batch_reader = ParquetFileArrowReader::new(file_reader)
.get_record_reader(batch_size)
let record_batch_reader_builder = ParquetRecordBatchReaderBuilder::try_new(file).unwrap();
let row_num = record_batch_reader_builder
.metadata()
.file_metadata()
.num_rows();
let record_batch_reader = record_batch_reader_builder
.with_batch_size(batch_size)
.build()
.unwrap();
let progress_bar = mpb.add(ProgressBar::new(row_num as _));
progress_bar.set_style(pb_style);
progress_bar.set_message(format!("{:?}", path));
progress_bar.set_message(format!("{path:?}"));
let mut total_rpc_elapsed_ms = 0;
for record_batch in record_batch_reader {
let record_batch = record_batch.unwrap();
let (columns, row_count) = convert_record_batch(record_batch);
let insert_expr = InsertExpr {
let request = InsertRequest {
schema_name: "public".to_string(),
table_name: TABLE_NAME.to_string(),
region_number: 0,
@@ -108,16 +107,13 @@ async fn write_data(
row_count,
};
let now = Instant::now();
db.insert(insert_expr).await.unwrap();
db.insert(request).await.unwrap();
let elapsed = now.elapsed();
total_rpc_elapsed_ms += elapsed.as_millis();
progress_bar.inc(row_count as _);
}
progress_bar.finish_with_message(format!(
"file {:?} done in {}ms",
path, total_rpc_elapsed_ms
));
progress_bar.finish_with_message(format!("file {path:?} done in {total_rpc_elapsed_ms}ms",));
total_rpc_elapsed_ms
}
@@ -210,133 +206,134 @@ fn build_values(column: &ArrayRef) -> Values {
| DataType::FixedSizeList(_, _)
| DataType::LargeList(_)
| DataType::Struct(_)
| DataType::Union(_, _)
| DataType::Union(_, _, _)
| DataType::Dictionary(_, _)
| DataType::Decimal(_, _)
| DataType::Decimal128(_, _)
| DataType::Decimal256(_, _)
| DataType::Map(_, _) => todo!(),
}
}
fn create_table_expr() -> CreateExpr {
CreateExpr {
catalog_name: Some(CATALOG_NAME.to_string()),
schema_name: Some(SCHEMA_NAME.to_string()),
fn create_table_expr() -> CreateTableExpr {
CreateTableExpr {
catalog_name: CATALOG_NAME.to_string(),
schema_name: SCHEMA_NAME.to_string(),
table_name: TABLE_NAME.to_string(),
desc: None,
desc: "".to_string(),
column_defs: vec![
ColumnDef {
name: "VendorID".to_string(),
datatype: ColumnDataType::Int64 as i32,
is_nullable: true,
default_constraint: None,
default_constraint: vec![],
},
ColumnDef {
name: "tpep_pickup_datetime".to_string(),
datatype: ColumnDataType::Int64 as i32,
is_nullable: true,
default_constraint: None,
default_constraint: vec![],
},
ColumnDef {
name: "tpep_dropoff_datetime".to_string(),
datatype: ColumnDataType::Int64 as i32,
is_nullable: true,
default_constraint: None,
default_constraint: vec![],
},
ColumnDef {
name: "passenger_count".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
default_constraint: vec![],
},
ColumnDef {
name: "trip_distance".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
default_constraint: vec![],
},
ColumnDef {
name: "RatecodeID".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
default_constraint: vec![],
},
ColumnDef {
name: "store_and_fwd_flag".to_string(),
datatype: ColumnDataType::String as i32,
is_nullable: true,
default_constraint: None,
default_constraint: vec![],
},
ColumnDef {
name: "PULocationID".to_string(),
datatype: ColumnDataType::Int64 as i32,
is_nullable: true,
default_constraint: None,
default_constraint: vec![],
},
ColumnDef {
name: "DOLocationID".to_string(),
datatype: ColumnDataType::Int64 as i32,
is_nullable: true,
default_constraint: None,
default_constraint: vec![],
},
ColumnDef {
name: "payment_type".to_string(),
datatype: ColumnDataType::Int64 as i32,
is_nullable: true,
default_constraint: None,
default_constraint: vec![],
},
ColumnDef {
name: "fare_amount".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
default_constraint: vec![],
},
ColumnDef {
name: "extra".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
default_constraint: vec![],
},
ColumnDef {
name: "mta_tax".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
default_constraint: vec![],
},
ColumnDef {
name: "tip_amount".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
default_constraint: vec![],
},
ColumnDef {
name: "tolls_amount".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
default_constraint: vec![],
},
ColumnDef {
name: "improvement_surcharge".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
default_constraint: vec![],
},
ColumnDef {
name: "total_amount".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
default_constraint: vec![],
},
ColumnDef {
name: "congestion_surcharge".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
default_constraint: vec![],
},
ColumnDef {
name: "airport_fee".to_string(),
datatype: ColumnDataType::Float64 as i32,
is_nullable: true,
default_constraint: None,
default_constraint: vec![],
},
],
time_index: "tpep_pickup_datetime".to_string(),
@@ -344,7 +341,7 @@ fn create_table_expr() -> CreateExpr {
create_if_not_exists: false,
table_options: Default::default(),
region_ids: vec![0],
table_id: Some(0),
table_id: Some(TableId { id: 0 }),
}
}
@@ -353,25 +350,23 @@ fn query_set() -> HashMap<String, String> {
ret.insert(
"count_all".to_string(),
format!("SELECT COUNT(*) FROM {};", TABLE_NAME),
format!("SELECT COUNT(*) FROM {TABLE_NAME};"),
);
ret.insert(
"fare_amt_by_passenger".to_string(),
format!("SELECT passenger_count, MIN(fare_amount), MAX(fare_amount), SUM(fare_amount) FROM {} GROUP BY passenger_count",TABLE_NAME)
format!("SELECT passenger_count, MIN(fare_amount), MAX(fare_amount), SUM(fare_amount) FROM {TABLE_NAME} GROUP BY passenger_count")
);
ret
}
async fn do_write(args: &Args, client: &Client) {
let admin = Admin::new("admin", client.clone());
async fn do_write(args: &Args, db: &Database) {
let mut file_list = get_file_list(args.path.clone().expect("Specify data path in argument"));
let mut write_jobs = JoinSet::new();
let create_table_result = admin.create(create_table_expr()).await;
println!("Create table result: {:?}", create_table_result);
let create_table_result = db.create(create_table_expr()).await;
println!("Create table result: {create_table_result:?}");
let progress_bar_style = ProgressStyle::with_template(
"[{elapsed_precise}] {bar:60.cyan/blue} {pos:>7}/{len:7} {msg}",
@@ -385,7 +380,7 @@ async fn do_write(args: &Args, client: &Client) {
let batch_size = args.batch_size;
for _ in 0..args.thread_num {
if let Some(path) = file_list.pop() {
let db = Database::new(DATABASE_NAME, client.clone());
let db = db.clone();
let mpb = multi_progress_bar.clone();
let pb_style = progress_bar_style.clone();
write_jobs.spawn(async move { write_data(batch_size, &db, path, mpb, pb_style).await });
@@ -394,7 +389,7 @@ async fn do_write(args: &Args, client: &Client) {
while write_jobs.join_next().await.is_some() {
file_progress.inc(1);
if let Some(path) = file_list.pop() {
let db = Database::new(DATABASE_NAME, client.clone());
let db = db.clone();
let mpb = multi_progress_bar.clone();
let pb_style = progress_bar_style.clone();
write_jobs.spawn(async move { write_data(batch_size, &db, path, mpb, pb_style).await });
@@ -404,10 +399,10 @@ async fn do_write(args: &Args, client: &Client) {
async fn do_query(num_iter: usize, db: &Database) {
for (query_name, query) in query_set() {
println!("Running query: {}", query);
println!("Running query: {query}");
for i in 0..num_iter {
let now = Instant::now();
let _res = db.select(Select::Sql(query.clone())).await.unwrap();
let _res = db.sql(&query).await.unwrap();
let elapsed = now.elapsed();
println!(
"query {}, iteration {}: {}ms",
@@ -429,13 +424,13 @@ fn main() {
.unwrap()
.block_on(async {
let client = Client::with_urls(vec![&args.endpoint]);
let db = Database::new(DATABASE_NAME, client);
if !args.skip_write {
do_write(&args, &client).await;
do_write(&args, &db).await;
}
if !args.skip_read {
let db = Database::new(DATABASE_NAME, client.clone());
do_query(args.iter_num, &db).await;
}
})

View File

@@ -24,6 +24,8 @@ RUN cargo build --release
# TODO(zyy17): Maybe should use the more secure container image.
FROM ubuntu:22.04 as base
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -y install ca-certificates
WORKDIR /greptime
COPY --from=builder /greptimedb/target/release/greptime /greptime/bin/
ENV PATH /greptime/bin/:$PATH

View File

@@ -1,5 +1,7 @@
FROM ubuntu:22.04
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -y install ca-certificates
ARG TARGETARCH
ADD $TARGETARCH/greptime /greptime/bin/

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 34 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 58 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

View File

@@ -0,0 +1,175 @@
---
Feature Name: "promql-in-rust"
Tracking Issue: https://github.com/GreptimeTeam/greptimedb/issues/596
Date: 2022-12-20
Author: "Ruihang Xia <waynestxia@gmail.com>"
---
Rewrite PromQL in Rust
----------------------
# Summary
A Rust native implementation of PromQL, for GreptimeDB.
# Motivation
Prometheus and its query language PromQL prevails in the cloud-native observability area, which is an important scenario for time series database like GreptimeDB. We already have support for its remote read and write protocols. Users can now integrate GreptimeDB as the storage backend to existing Prometheus deployment, but cannot run PromQL query directly on GreptimeDB like SQL.
This RFC proposes to add support for PromQL. Because it was created in Go, we can't use the existing code easily. For interoperability, performance and extendability, porting its logic to Rust is a good choice.
# Details
## Overview
One of the goals is to make use of our existing basic operators, execution model and runtime to reduce the work. So the entire proposal is built on top of Apache Arrow DataFusion. The rewrote PromQL logic is manifested as `Expr` or `Execution Plan` in DataFusion. And both the intermediate data structure and the result is in the format of `Arrow`'s `RecordBatch`.
The following sections are organized in a top-down manner. Starts with evaluation procedure. Then introduces the building blocks of our new PromQL operation. Follows by an explanation of data model. And end with an example logic plan.
*This RFC is heavily related to Prometheus and PromQL. It won't repeat some basic concepts of them.*
## Evaluation
The original implementation is like an interpreter of parsed PromQL AST. It has two characteristics: (1) Operations are evaluated in place after they are parsed to AST. And some key parameters are separated from the AST because they do not present in the query, but come from other places like another field in the HTTP payload. (2) calculation is performed per timestamp. You can see this pattern many times:
```go
for ts := ev.startTimestamp; ts <= ev.endTimestamp; ts += ev.interval {}
```
These bring out two differences in the proposed implementation. First, to make it more general and clear, the evaluation procedure is reorganized into serval phases (and is the same as DataFusion's). And second, data are evaluated by time series (corresponding to "columnar calculation", if think timestamp as row number).
```
Logic
Query AST Plan
─────────► Parser ───────► Logical ────────► Physical ────┐
Planner Planner │
◄───────────────────────────── Executor ◄────────────────┘
Evaluation Result Execution
Plan
```
- Parser
Provided by [`promql-parser`](https://github.com/GreptimeTeam/promql-parser) crate. Same as the original implementation.
- Logical Planner
Generates a logical plan with all the needed parameters. It should accept something like `EvalStmt` in Go's implementation, which contains query time range, evaluation interval and lookback range.
Another important thing done here is assembling the logic plan, with all the operations baked into logically. Like what's the filter and time range to read, how the data then flows through a selector into a binary operation, etc. Or what's the output schema of every single step. The generated logic plan is deterministic without variables, and can be `EXPLAIN`ed clearly.
- Physical Planner
This step converts a logic plan into evaluatable execution plan. There are not many special things like the previous step. Except when a query is going to be executed distributedly. In this case, a logic plan will be divided into serval parts and sent to serval nodes. One physical planner only sees its own part.
- Executor
As its name shows, this step calculates data to result. And all new calculation logic, the implementation of PromQL in rust, is placed here. And the rewrote functions are using `RecordBatch` and `Array` from `Arrow` as the intermediate data structure.
Each "batch" contains only data from single time series. This is from the underlying storage implementation. Though it's not a requirement of this RFC, having this property can simplify some functions.
Another thing to mention is the rewrote functions don't aware of timestamp or value columns, they are defined only based on the input data types. For example, `increase()` function in PromQL calculates the unbiased delta of data, its implementation here only does this single thing. Let's compare the signature of two implementations:
- Go
```go
func funcIncrease(vals []parser.Value, args parser.Expressions) Vector {}
```
- Rust
```rust
fn prom_increase(input: Array) -> Array {}
```
Some unimportant parameters are omitted. The original Go version only writes the logic for `Point`'s value, either float or histogram. But the proposed rewritten one accepts a generic `Array` as input, which can be any type that suits, from `i8` to `u64` to `TimestampNanosecond`.
## Plan and Expression
They are structures to express logic from PromQL. The proposed implementation is built on top of DataFusion, thus our plan and expression are in form of `ExtensionPlan` and `ScalarUDF`. The only difference between them in this context is the return type: plan returns a record batch while expression returns a single column.
This RFC proposes to add four new plans, they are fundamental building blocks that mainly handle data selection logic in PromQL, for the following calculation expressions.
- `SeriesNormalize`
Sort data inside one series on the timestamp column, and bias "offset" if has. This plan usually comes after `TableScan` (or `TableScan` and `Filter`) plan.
- `VectorManipulator` and `MatrixManipulator`
Corresponding to `InstantSelector` and `RangeSelector`. We don't calculate timestamp by timestamp, thus use "vector" instead of "instant", this image shows the difference. And "matrix" is another name for "range vector", for not confused with our "vector". The following section will detail how they are implemented using Arrow.
![instant_and_vector](instant-and-vector.png)
Due to "interval" parameter in PromQL, data after "selector" (or "manipulator" here) are usually shorter than input. And we have to modify the entire record batch to shorten both timestamp, value and tag columns. So they are formed as plan.
- `PromAggregator`
The carrier of aggregator expressions. This should not be very different from the DataFusion built-in `Aggregate` plan, except PromQL can use "group without" to do reverse selection.
PromQL has around 70 expressions and functions. But luckily we can reuse lots of them from DataFusion. Like unary expression, binary expression and aggregator. We only need to implement those PromQL-specific expressions, like `rate` or `percentile`. The following table lists some typical functions in PromQL, and their signature in the proposed implementation. Other function should be the same.
| Name | In Param(s) | Out Param(s) | Explain |
|-------------------- |------------------------------------------------------ |-------------- |-------------------- |
| instant_delta | Matrix T | Array T | idelta in PromQL |
| increase | Matrix T | Array T | increase in PromQL |
| extrapolate_factor | - Matrix T<br>- Array Timestamp<br>- Array Timestamp | Array T | * |
*: *`extrapolate_factor` is one of the "dark sides" in PromQL. In short it's a translation of this [paragraph](https://github.com/prometheus/prometheus/blob/0372e259baf014bbade3134fd79bcdfd8cbdef2c/promql/functions.go#L134-L159)*
To reuse those common calculation logic, we can break them into serval expressions, and assemble in the logic planning phase. Like `rate()` in PromQL can be represented as `increase / extrapolate_factor`.
## Data Model
This part explains how data is represented. Following the data model in GreptimeDB, all the data are stored as table, with tag columns, timestamp column and value column. Table to record batch is very straightforward. So an instant vector can be thought of as a row (though as said before, we don't use instant vectors) in the table. Given four basic types in PromQL: scalar, string, instant vector and range vector, only the last "range vector" need some tricks to adapt our columnar calculation.
Range vector is some sort of matrix, it's consisted of small one-dimension vectors, with each being an input of range function. And, applying range function to a range vector can be thought of kind of convolution.
![range-vector-with-matrix](range-vector-with-matrix.png)
(Left is an illustration of range vector. Notice the Y-axis has no meaning, it's just put different pieces separately. The right side is an imagined "matrix" as range function. Multiplying the left side to it can get a one-dimension "matrix" with four elements. That's the evaluation result of a range vector.)
To adapt this range vector to record batch, it should be represented by a column. This RFC proposes to use `DictionaryArray` from Arrow to represent range vector, or `Matrix`. This is "misusing" `DictionaryArray` to ship some additional information about an array. Because the range vector is sliding over one series, we only need to know the `offset` and `length` of each slides to reconstruct the matrix from an array:
![matrix-from-array](matrix-from-array.png)
The length is not fixed, it depends on the input's timestamp. An PoC implementation of `Matrix` and `increase()` can be found in [this repo](https://github.com/waynexia/corroding-prometheus).
## Example
The logic plan of PromQL query
```promql
# start: 2022-12-20T10:00:00
# end: 2022-12-21T10:00:00
# interval: 1m
# lookback: 30s
sum (rate(request_duration[5m])) by (idc)
```
looks like
<!-- title: 'PromAggregator: \naggr = sum, column = idc'
operator: prom
inputs:
- title: 'Matrix Manipulator: \ninterval = 1m, range = 5m, expr = div(increase(value), extrapolate_factor(timestamp))'
operator: prom
inputs:
- title: 'Series Normalize: \noffset = 0'
operator: prom
inputs:
- title: 'Filter: \ntimetamp > 2022-12-20T10:00:00 && timestamp < 2022-12-21T10:00:00'
operator: filter
inputs:
- title: 'Table Scan: \ntable = request_duration, timetamp > 2022-12-20T10:00:00 && timestamp < 2022-12-21T10:00:00'
operator: scan -->
![example](example.png)
# Drawbacks
Human-being is always error-prone. It's harder to endeavor to rewrite from the ground and requires more attention to ensure correctness, than translate line-by-line. And, since the evaluator's architecture are different, it might be painful to catch up with PromQL's breaking update (if any) in the future.
Misusing Arrow's DictionaryVector as Matrix is another point. This hack needs some `unsafe` function call to bypass Arrow's check. And though Arrow's API is stable, this is still an undocumented behavior.
# Alternatives
There are a few alternatives we've considered:
- Wrap the existing PromQL's implementation via FFI, and import it to GreptimeDB.
- Translate its evaluator engine line-by-line, rather than rewrite one.
- Integrate the Prometheus server into GreptimeDB via RPC, making it a detached execution engine for PromQL.
The first and second options are making a separate execution engine in GreptimeDB, they may alleviate the pain during rewriting, but will have negative impacts to afterward evolve like resource management. And introduce another deploy component in the last option will bring a complex deploy architecture.
And all of them are more or less redundant in data transportation that affects performance and resources. The proposed built-in executing procedure is also easy to integrate and expose to the existing SQL interface GreptimeDB currently provides. Some concepts in PromQL like sliding windows (range vector in PromQL) are very convenient and ergonomic in analyzing series data. This makes it not only a PromQL evaluator, but also an enhancement to our query system.

View File

@@ -1 +1 @@
nightly-2022-07-14
nightly-2022-12-20

View File

@@ -1,11 +1,11 @@
[package]
name = "api"
version = "0.1.0"
edition = "2021"
license = "Apache-2.0"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
arrow-flight.workspace = true
common-base = { path = "../common/base" }
common-error = { path = "../common/error" }
common-time = { path = "../common/time" }

View File

@@ -20,8 +20,6 @@ fn main() {
.file_descriptor_set_path(default_out_dir.join("greptime_fd.bin"))
.compile(
&[
"greptime/v1/select.proto",
"greptime/v1/physical_plan.proto",
"greptime/v1/greptime.proto",
"greptime/v1/meta/common.proto",
"greptime/v1/meta/heartbeat.proto",

View File

@@ -32,7 +32,10 @@ message Column {
repeated int32 date_values = 14;
repeated int64 datetime_values = 15;
repeated int64 ts_millis_values = 16;
repeated int64 ts_second_values = 16;
repeated int64 ts_millisecond_values = 17;
repeated int64 ts_microsecond_values = 18;
repeated int64 ts_nanosecond_values = 19;
}
// The array of non-null values in this column.
//
@@ -56,7 +59,7 @@ message ColumnDef {
string name = 1;
ColumnDataType datatype = 2;
bool is_nullable = 3;
optional bytes default_constraint = 4;
bytes default_constraint = 4;
}
enum ColumnDataType {
@@ -75,5 +78,8 @@ enum ColumnDataType {
STRING = 12;
DATE = 13;
DATETIME = 14;
TIMESTAMP = 15;
TIMESTAMP_SECOND = 15;
TIMESTAMP_MILLISECOND = 16;
TIMESTAMP_MICROSECOND = 17;
TIMESTAMP_NANOSECOND = 18;
}

View File

@@ -6,17 +6,8 @@ message RequestHeader {
string tenant = 1;
}
message ExprHeader {
uint32 version = 1;
}
message ResultHeader {
uint32 version = 1;
uint32 code = 2;
string err_msg = 3;
}
message MutateResult {
uint32 success = 1;
uint32 failure = 2;
}

View File

@@ -2,6 +2,7 @@ syntax = "proto3";
package greptime.v1;
import "greptime/v1/ddl.proto";
import "greptime/v1/column.proto";
import "greptime/v1/common.proto";
@@ -15,30 +16,21 @@ message DatabaseResponse {
}
message ObjectExpr {
ExprHeader header = 1;
oneof expr {
InsertExpr insert = 2;
SelectExpr select = 3;
UpdateExpr update = 4;
DeleteExpr delete = 5;
oneof request {
InsertRequest insert = 1;
QueryRequest query = 2;
DdlRequest ddl = 3;
}
}
// TODO(fys): Only support sql now, and will support promql etc in the future
message SelectExpr {
oneof expr {
message QueryRequest {
oneof query {
string sql = 1;
bytes logical_plan = 2;
PhysicalPlan physical_plan = 15;
}
}
message PhysicalPlan {
bytes original_ql = 1;
bytes plan = 2;
}
message InsertExpr {
message InsertRequest {
string schema_name = 1;
string table_name = 2;
@@ -47,26 +39,18 @@ message InsertExpr {
// The row_count of all columns, which include null and non-null values.
//
// Note: the row_count of all columns in a InsertExpr must be same.
// Note: the row_count of all columns in a InsertRequest must be same.
uint32 row_count = 4;
// The region number of current insert request.
uint32 region_number = 5;
}
// TODO(jiachun)
message UpdateExpr {}
// TODO(jiachun)
message DeleteExpr {}
message ObjectResult {
ResultHeader header = 1;
oneof result {
SelectResult select = 2;
MutateResult mutate = 3;
}
repeated bytes flight_data = 2;
}
message SelectResult {
bytes raw_data = 1;
message FlightDataExt {
uint32 affected_rows = 1;
}

View File

@@ -5,50 +5,35 @@ package greptime.v1;
import "greptime/v1/column.proto";
import "greptime/v1/common.proto";
message AdminRequest {
string name = 1;
repeated AdminExpr exprs = 2;
}
message AdminResponse {
repeated AdminResult results = 1;
}
message AdminExpr {
ExprHeader header = 1;
// "Data Definition Language" requests, that create, modify or delete the database structures but not the data.
// `DdlRequest` could carry more information than plain SQL, for example, the "table_id" in `CreateTableExpr`.
// So create a new DDL expr if you need it.
message DdlRequest {
oneof expr {
CreateExpr create = 2;
CreateDatabaseExpr create_database = 1;
CreateTableExpr create_table = 2;
AlterExpr alter = 3;
CreateDatabaseExpr create_database = 4;
DropTableExpr drop_table = 5;
DropTableExpr drop_table = 4;
}
}
message AdminResult {
ResultHeader header = 1;
oneof result {
MutateResult mutate = 2;
}
}
// TODO(hl): rename to CreateTableExpr
message CreateExpr {
optional string catalog_name = 1;
optional string schema_name = 2;
message CreateTableExpr {
string catalog_name = 1;
string schema_name = 2;
string table_name = 3;
optional string desc = 4;
string desc = 4;
repeated ColumnDef column_defs = 5;
string time_index = 6;
repeated string primary_keys = 7;
bool create_if_not_exists = 8;
map<string, string> table_options = 9;
optional uint32 table_id = 10;
TableId table_id = 10;
repeated uint32 region_ids = 11;
}
message AlterExpr {
optional string catalog_name = 1;
optional string schema_name = 2;
string catalog_name = 1;
string schema_name = 2;
string table_name = 3;
oneof kind {
AddColumns add_columns = 4;
@@ -62,6 +47,11 @@ message DropTableExpr {
string table_name = 3;
}
message CreateDatabaseExpr {
//TODO(hl): maybe rename to schema_name?
string database_name = 1;
}
message AddColumns {
repeated AddColumn add_columns = 1;
}
@@ -79,7 +69,6 @@ message DropColumn {
string name = 1;
}
message CreateDatabaseExpr {
//TODO(hl): maybe rename to schema_name?
string database_name = 1;
message TableId {
uint32 id = 1;
}

View File

@@ -2,7 +2,6 @@ syntax = "proto3";
package greptime.v1;
import "greptime/v1/admin.proto";
import "greptime/v1/common.proto";
import "greptime/v1/database.proto";
@@ -12,11 +11,9 @@ service Greptime {
message BatchRequest {
RequestHeader header = 1;
repeated AdminRequest admins = 2;
repeated DatabaseRequest databases = 3;
repeated DatabaseRequest databases = 2;
}
message BatchResponse {
repeated AdminResponse admins = 1;
repeated DatabaseResponse databases = 2;
repeated DatabaseResponse databases = 1;
}

View File

@@ -5,6 +5,8 @@ package greptime.v1.meta;
import "greptime/v1/meta/common.proto";
service Router {
rpc Create(CreateRequest) returns (RouteResponse) {}
// Fetch routing information for tables. The smallest unit is the complete
// routing information(all regions) of a table.
//
@@ -26,7 +28,14 @@ service Router {
//
rpc Route(RouteRequest) returns (RouteResponse) {}
rpc Create(CreateRequest) returns (RouteResponse) {}
rpc Delete(DeleteRequest) returns (RouteResponse) {}
}
message CreateRequest {
RequestHeader header = 1;
TableName table_name = 2;
repeated Partition partitions = 3;
}
message RouteRequest {
@@ -35,6 +44,12 @@ message RouteRequest {
repeated TableName table_names = 2;
}
message DeleteRequest {
RequestHeader header = 1;
TableName table_name = 2;
}
message RouteResponse {
ResponseHeader header = 1;
@@ -42,13 +57,6 @@ message RouteResponse {
repeated TableRoute table_routes = 3;
}
message CreateRequest {
RequestHeader header = 1;
TableName table_name = 2;
repeated Partition partitions = 3;
}
message TableRoute {
Table table = 1;
repeated RegionRoute region_routes = 2;

View File

@@ -20,6 +20,9 @@ service Store {
// DeleteRange deletes the given range from the key-value store.
rpc DeleteRange(DeleteRangeRequest) returns (DeleteRangeResponse);
// MoveValue atomically renames the key to the given updated key.
rpc MoveValue(MoveValueRequest) returns (MoveValueResponse);
}
message RangeRequest {
@@ -136,3 +139,21 @@ message DeleteRangeResponse {
// returned.
repeated KeyValue prev_kvs = 3;
}
message MoveValueRequest {
RequestHeader header = 1;
// If from_key dose not exist, return the value of to_key (if it exists).
// If from_key exists, move the value of from_key to to_key (i.e. rename),
// and return the value.
bytes from_key = 2;
bytes to_key = 3;
}
message MoveValueResponse {
ResponseHeader header = 1;
// If from_key dose not exist, return the value of to_key (if it exists).
// If from_key exists, return the value of from_key.
KeyValue kv = 2;
}

View File

@@ -1,33 +0,0 @@
syntax = "proto3";
package greptime.v1.codec;
message PhysicalPlanNode {
oneof PhysicalPlanType {
ProjectionExecNode projection = 1;
MockInputExecNode mock = 99;
// TODO(fys): impl other physical plan node
}
}
message ProjectionExecNode {
PhysicalPlanNode input = 1;
repeated PhysicalExprNode expr = 2;
repeated string expr_name = 3;
}
message PhysicalExprNode {
oneof ExprType {
PhysicalColumn column = 1;
// TODO(fys): impl other physical expr node
}
}
message PhysicalColumn {
string name = 1;
uint64 index = 2;
}
message MockInputExecNode {
string name = 1;
}

View File

@@ -1,10 +0,0 @@
syntax = "proto3";
package greptime.v1.codec;
import "greptime/v1/column.proto";
message SelectResult {
repeated Column columns = 1;
uint32 row_count = 2;
}

View File

@@ -15,6 +15,7 @@
use common_base::BitVec;
use common_time::timestamp::TimeUnit;
use datatypes::prelude::ConcreteDataType;
use datatypes::types::TimestampType;
use datatypes::value::Value;
use datatypes::vectors::VectorRef;
use snafu::prelude::*;
@@ -56,7 +57,16 @@ impl From<ColumnDataTypeWrapper> for ConcreteDataType {
ColumnDataType::String => ConcreteDataType::string_datatype(),
ColumnDataType::Date => ConcreteDataType::date_datatype(),
ColumnDataType::Datetime => ConcreteDataType::datetime_datatype(),
ColumnDataType::Timestamp => ConcreteDataType::timestamp_millis_datatype(),
ColumnDataType::TimestampSecond => ConcreteDataType::timestamp_second_datatype(),
ColumnDataType::TimestampMillisecond => {
ConcreteDataType::timestamp_millisecond_datatype()
}
ColumnDataType::TimestampMicrosecond => {
ConcreteDataType::timestamp_microsecond_datatype()
}
ColumnDataType::TimestampNanosecond => {
ConcreteDataType::timestamp_nanosecond_datatype()
}
}
}
}
@@ -81,7 +91,12 @@ impl TryFrom<ConcreteDataType> for ColumnDataTypeWrapper {
ConcreteDataType::String(_) => ColumnDataType::String,
ConcreteDataType::Date(_) => ColumnDataType::Date,
ConcreteDataType::DateTime(_) => ColumnDataType::Datetime,
ConcreteDataType::Timestamp(_) => ColumnDataType::Timestamp,
ConcreteDataType::Timestamp(unit) => match unit {
TimestampType::Second(_) => ColumnDataType::TimestampSecond,
TimestampType::Millisecond(_) => ColumnDataType::TimestampMillisecond,
TimestampType::Microsecond(_) => ColumnDataType::TimestampMicrosecond,
TimestampType::Nanosecond(_) => ColumnDataType::TimestampNanosecond,
},
ConcreteDataType::Null(_) | ConcreteDataType::List(_) => {
return error::IntoColumnDataTypeSnafu { from: datatype }.fail()
}
@@ -153,8 +168,20 @@ impl Values {
datetime_values: Vec::with_capacity(capacity),
..Default::default()
},
ColumnDataType::Timestamp => Values {
ts_millis_values: Vec::with_capacity(capacity),
ColumnDataType::TimestampSecond => Values {
ts_second_values: Vec::with_capacity(capacity),
..Default::default()
},
ColumnDataType::TimestampMillisecond => Values {
ts_millisecond_values: Vec::with_capacity(capacity),
..Default::default()
},
ColumnDataType::TimestampMicrosecond => Values {
ts_microsecond_values: Vec::with_capacity(capacity),
..Default::default()
},
ColumnDataType::TimestampNanosecond => Values {
ts_nanosecond_values: Vec::with_capacity(capacity),
..Default::default()
},
}
@@ -187,9 +214,12 @@ impl Column {
Value::Binary(val) => values.binary_values.push(val.to_vec()),
Value::Date(val) => values.date_values.push(val.val()),
Value::DateTime(val) => values.datetime_values.push(val.val()),
Value::Timestamp(val) => values
.ts_millis_values
.push(val.convert_to(TimeUnit::Millisecond)),
Value::Timestamp(val) => match val.unit() {
TimeUnit::Second => values.ts_second_values.push(val.value()),
TimeUnit::Millisecond => values.ts_millisecond_values.push(val.value()),
TimeUnit::Microsecond => values.ts_microsecond_values.push(val.value()),
TimeUnit::Nanosecond => values.ts_nanosecond_values.push(val.value()),
},
Value::List(_) => unreachable!(),
});
self.null_mask = null_mask.into_vec();
@@ -200,7 +230,10 @@ impl Column {
mod tests {
use std::sync::Arc;
use datatypes::vectors::BooleanVector;
use datatypes::vectors::{
BooleanVector, TimestampMicrosecondVector, TimestampMillisecondVector,
TimestampNanosecondVector, TimestampSecondVector,
};
use super::*;
@@ -258,8 +291,8 @@ mod tests {
let values = values.datetime_values;
assert_eq!(2, values.capacity());
let values = Values::with_capacity(ColumnDataType::Timestamp, 2);
let values = values.ts_millis_values;
let values = Values::with_capacity(ColumnDataType::TimestampMillisecond, 2);
let values = values.ts_millisecond_values;
assert_eq!(2, values.capacity());
}
@@ -326,8 +359,8 @@ mod tests {
ColumnDataTypeWrapper(ColumnDataType::Datetime).into()
);
assert_eq!(
ConcreteDataType::timestamp_millis_datatype(),
ColumnDataTypeWrapper(ColumnDataType::Timestamp).into()
ConcreteDataType::timestamp_millisecond_datatype(),
ColumnDataTypeWrapper(ColumnDataType::TimestampMillisecond).into()
);
}
@@ -394,8 +427,8 @@ mod tests {
ConcreteDataType::datetime_datatype().try_into().unwrap()
);
assert_eq!(
ColumnDataTypeWrapper(ColumnDataType::Timestamp),
ConcreteDataType::timestamp_millis_datatype()
ColumnDataTypeWrapper(ColumnDataType::TimestampMillisecond),
ConcreteDataType::timestamp_millisecond_datatype()
.try_into()
.unwrap()
);
@@ -412,7 +445,48 @@ mod tests {
assert!(result.is_err());
assert_eq!(
result.unwrap_err().to_string(),
"Failed to create column datatype from List(ListType { inner: Boolean(BooleanType) })"
"Failed to create column datatype from List(ListType { item_type: Boolean(BooleanType) })"
);
}
#[test]
fn test_column_put_timestamp_values() {
let mut column = Column {
column_name: "test".to_string(),
semantic_type: 0,
values: Some(Values {
..Default::default()
}),
null_mask: vec![],
datatype: 0,
};
let vector = Arc::new(TimestampNanosecondVector::from_vec(vec![1, 2, 3]));
column.push_vals(3, vector);
assert_eq!(
vec![1, 2, 3],
column.values.as_ref().unwrap().ts_nanosecond_values
);
let vector = Arc::new(TimestampMillisecondVector::from_vec(vec![4, 5, 6]));
column.push_vals(3, vector);
assert_eq!(
vec![4, 5, 6],
column.values.as_ref().unwrap().ts_millisecond_values
);
let vector = Arc::new(TimestampMicrosecondVector::from_vec(vec![7, 8, 9]));
column.push_vals(3, vector);
assert_eq!(
vec![7, 8, 9],
column.values.as_ref().unwrap().ts_microsecond_values
);
let vector = Arc::new(TimestampSecondVector::from_vec(vec![10, 11, 12]));
column.push_vals(3, vector);
assert_eq!(
vec![10, 11, 12],
column.values.as_ref().unwrap().ts_second_values
);
}

View File

@@ -12,30 +12,19 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use common_error::prelude::ErrorExt;
use arrow_flight::FlightData;
use prost::Message;
use crate::v1::codec::SelectResult;
use crate::v1::{
admin_result, object_result, AdminResult, MutateResult, ObjectResult, ResultHeader,
SelectResult as SelectResultRaw,
};
use crate::v1::{ObjectResult, ResultHeader};
pub const PROTOCOL_VERSION: u32 = 1;
pub type Success = u32;
pub type Failure = u32;
#[derive(Default)]
pub struct ObjectResultBuilder {
version: u32,
code: u32,
err_msg: Option<String>,
result: Option<Body>,
}
pub enum Body {
Mutate((Success, Failure)),
Select(SelectResult),
flight_data: Option<Vec<FlightData>>,
}
impl ObjectResultBuilder {
@@ -62,13 +51,8 @@ impl ObjectResultBuilder {
self
}
pub fn mutate_result(mut self, success: u32, failure: u32) -> Self {
self.result = Some(Body::Mutate((success, failure)));
self
}
pub fn select_result(mut self, select_result: SelectResult) -> Self {
self.result = Some(Body::Select(select_result));
pub fn flight_data(mut self, flight_data: Vec<FlightData>) -> Self {
self.flight_data = Some(flight_data);
self
}
@@ -79,92 +63,24 @@ impl ObjectResultBuilder {
err_msg: self.err_msg.unwrap_or_default(),
});
let result = match self.result {
Some(Body::Mutate((success, failure))) => {
Some(object_result::Result::Mutate(MutateResult {
success,
failure,
}))
}
Some(Body::Select(select)) => Some(object_result::Result::Select(SelectResultRaw {
raw_data: select.into(),
})),
None => None,
};
ObjectResult { header, result }
}
}
pub fn build_err_result(err: &impl ErrorExt) -> ObjectResult {
ObjectResultBuilder::new()
.status_code(err.status_code() as u32)
.err_msg(err.to_string())
.build()
}
#[derive(Debug)]
pub struct AdminResultBuilder {
version: u32,
code: u32,
err_msg: Option<String>,
mutate: Option<(Success, Failure)>,
}
impl AdminResultBuilder {
pub fn status_code(mut self, code: u32) -> Self {
self.code = code;
self
}
pub fn err_msg(mut self, err_msg: String) -> Self {
self.err_msg = Some(err_msg);
self
}
pub fn mutate_result(mut self, success: u32, failure: u32) -> Self {
self.mutate = Some((success, failure));
self
}
pub fn build(self) -> AdminResult {
let header = Some(ResultHeader {
version: self.version,
code: self.code,
err_msg: self.err_msg.unwrap_or_default(),
});
let result = if let Some((success, failure)) = self.mutate {
Some(admin_result::Result::Mutate(MutateResult {
success,
failure,
}))
let flight_data = if let Some(flight_data) = self.flight_data {
flight_data
.into_iter()
.map(|x| x.encode_to_vec())
.collect::<Vec<Vec<u8>>>()
} else {
None
vec![]
};
AdminResult { header, result }
}
}
impl Default for AdminResultBuilder {
fn default() -> Self {
Self {
version: PROTOCOL_VERSION,
code: 0,
err_msg: None,
mutate: None,
ObjectResult {
header,
flight_data,
}
}
}
#[cfg(test)]
mod tests {
use common_error::status_code::StatusCode;
use super::*;
use crate::error::UnknownColumnDataTypeSnafu;
use crate::v1::{object_result, MutateResult};
#[test]
fn test_object_result_builder() {
@@ -172,32 +88,10 @@ mod tests {
.version(101)
.status_code(500)
.err_msg("Failed to read this file!".to_string())
.mutate_result(100, 20)
.build();
let header = obj_result.header.unwrap();
assert_eq!(101, header.version);
assert_eq!(500, header.code);
assert_eq!("Failed to read this file!", header.err_msg);
let result = obj_result.result.unwrap();
assert_eq!(
object_result::Result::Mutate(MutateResult {
success: 100,
failure: 20,
}),
result
);
}
#[test]
fn test_build_err_result() {
let err = UnknownColumnDataTypeSnafu { datatype: 1 }.build();
let err_result = build_err_result(&err);
let header = err_result.header.unwrap();
let result = err_result.result;
assert_eq!(PROTOCOL_VERSION, header.version);
assert_eq!(StatusCode::InvalidArguments as u32, header.code);
assert!(result.is_none());
}
}

View File

@@ -15,7 +15,6 @@
pub use prost::DecodeError;
use prost::Message;
use crate::v1::codec::{PhysicalPlanNode, SelectResult};
use crate::v1::meta::TableRouteValue;
macro_rules! impl_convert_with_bytes {
@@ -36,81 +35,4 @@ macro_rules! impl_convert_with_bytes {
};
}
impl_convert_with_bytes!(SelectResult);
impl_convert_with_bytes!(PhysicalPlanNode);
impl_convert_with_bytes!(TableRouteValue);
#[cfg(test)]
mod tests {
use std::ops::Deref;
use crate::v1::codec::*;
use crate::v1::{column, Column};
const SEMANTIC_TAG: i32 = 0;
#[test]
fn test_convert_select_result() {
let select_result = mock_select_result();
let bytes: Vec<u8> = select_result.into();
let result: SelectResult = bytes.deref().try_into().unwrap();
assert_eq!(8, result.row_count);
assert_eq!(1, result.columns.len());
let column = &result.columns[0];
assert_eq!("foo", column.column_name);
assert_eq!(SEMANTIC_TAG, column.semantic_type);
assert_eq!(vec![1], column.null_mask);
assert_eq!(
vec![2, 3, 4, 5, 6, 7, 8],
column.values.as_ref().unwrap().i32_values
);
}
#[should_panic]
#[test]
fn test_convert_select_result_wrong() {
let select_result = mock_select_result();
let mut bytes: Vec<u8> = select_result.into();
// modify some bytes
bytes[0] = 0b1;
bytes[1] = 0b1;
let result: SelectResult = bytes.deref().try_into().unwrap();
assert_eq!(8, result.row_count);
assert_eq!(1, result.columns.len());
let column = &result.columns[0];
assert_eq!("foo", column.column_name);
assert_eq!(SEMANTIC_TAG, column.semantic_type);
assert_eq!(vec![1], column.null_mask);
assert_eq!(
vec![2, 3, 4, 5, 6, 7, 8],
column.values.as_ref().unwrap().i32_values
);
}
fn mock_select_result() -> SelectResult {
let values = column::Values {
i32_values: vec![2, 3, 4, 5, 6, 7, 8],
..Default::default()
};
let null_mask = vec![1];
let column = Column {
column_name: "foo".to_string(),
semantic_type: SEMANTIC_TAG,
values: Some(values),
null_mask,
..Default::default()
};
SelectResult {
columns: vec![column],
row_count: 8,
}
}
}

View File

@@ -17,9 +17,5 @@ tonic::include_proto!("greptime.v1");
pub const GREPTIME_FD_SET: &[u8] = tonic::include_file_descriptor_set!("greptime_fd");
pub mod codec {
tonic::include_proto!("greptime.v1.codec");
}
mod column_def;
pub mod meta;

View File

@@ -23,12 +23,13 @@ impl ColumnDef {
pub fn try_as_column_schema(&self) -> Result<ColumnSchema> {
let data_type = ColumnDataTypeWrapper::try_new(self.datatype)?;
let constraint = match &self.default_constraint {
None => None,
Some(v) => Some(
ColumnDefaultConstraint::try_from(&v[..])
let constraint = if self.default_constraint.is_empty() {
None
} else {
Some(
ColumnDefaultConstraint::try_from(self.default_constraint.as_slice())
.context(error::ConvertColumnDefaultConstraintSnafu { column: &self.name })?,
),
)
};
ColumnSchema::new(&self.name, data_type.into(), self.is_nullable)

View File

@@ -145,10 +145,12 @@ gen_set_header!(HeartbeatRequest);
gen_set_header!(RouteRequest);
gen_set_header!(CreateRequest);
gen_set_header!(RangeRequest);
gen_set_header!(DeleteRequest);
gen_set_header!(PutRequest);
gen_set_header!(BatchPutRequest);
gen_set_header!(CompareAndPutRequest);
gen_set_header!(DeleteRangeRequest);
gen_set_header!(MoveValueRequest);
#[cfg(test)]
mod tests {

View File

@@ -1,14 +1,13 @@
[package]
name = "catalog"
version = "0.1.0"
edition = "2021"
license = "Apache-2.0"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
api = { path = "../api" }
arc-swap = "1.0"
async-stream = "0.3"
async-stream.workspace = true
async-trait = "0.1"
backoff = { version = "0.4", features = ["tokio"] }
common-catalog = { path = "../common/catalog" }
@@ -19,9 +18,7 @@ common-recordbatch = { path = "../common/recordbatch" }
common-runtime = { path = "../common/runtime" }
common-telemetry = { path = "../common/telemetry" }
common-time = { path = "../common/time" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
"simd",
] }
datafusion.workspace = true
datatypes = { path = "../datatypes" }
futures = "0.3"
futures-util = "0.3"
@@ -33,7 +30,7 @@ serde_json = "1.0"
snafu = { version = "0.7", features = ["backtraces"] }
storage = { path = "../storage" }
table = { path = "../table" }
tokio = { version = "1.18", features = ["full"] }
tokio.workspace = true
[dev-dependencies]
chrono = "0.4"
@@ -42,4 +39,4 @@ mito = { path = "../mito", features = ["test"] }
object-store = { path = "../object-store" }
storage = { path = "../storage" }
tempdir = "0.3"
tokio = { version = "1.0", features = ["full"] }
tokio.workspace = true

View File

@@ -17,7 +17,7 @@ use std::any::Any;
use common_error::ext::{BoxedError, ErrorExt};
use common_error::prelude::{Snafu, StatusCode};
use datafusion::error::DataFusionError;
use datatypes::arrow;
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::RawSchema;
use snafu::{Backtrace, ErrorCompat};
@@ -51,14 +51,12 @@ pub enum Error {
SystemCatalog { msg: String, backtrace: Backtrace },
#[snafu(display(
"System catalog table type mismatch, expected: binary, found: {:?} source: {}",
"System catalog table type mismatch, expected: binary, found: {:?}",
data_type,
source
))]
SystemCatalogTypeMismatch {
data_type: arrow::datatypes::DataType,
#[snafu(backtrace)]
source: datatypes::error::Error,
data_type: ConcreteDataType,
backtrace: Backtrace,
},
#[snafu(display("Invalid system catalog entry type: {:?}", entry_type))]
@@ -222,10 +220,11 @@ impl ErrorExt for Error {
| Error::ValueDeserialize { .. }
| Error::Io { .. } => StatusCode::StorageUnavailable,
Error::RegisterTable { .. } => StatusCode::Internal,
Error::RegisterTable { .. } | Error::SystemCatalogTypeMismatch { .. } => {
StatusCode::Internal
}
Error::ReadSystemCatalog { source, .. } => source.status_code(),
Error::SystemCatalogTypeMismatch { source, .. } => source.status_code(),
Error::InvalidCatalogValue { source, .. } => source.status_code(),
Error::TableExists { .. } => StatusCode::TableAlreadyExists,
@@ -265,7 +264,6 @@ impl From<Error> for DataFusionError {
#[cfg(test)]
mod tests {
use common_error::mock::MockError;
use datatypes::arrow::datatypes::DataType;
use snafu::GenerateImplicitData;
use super::*;
@@ -314,11 +312,8 @@ mod tests {
assert_eq!(
StatusCode::Internal,
Error::SystemCatalogTypeMismatch {
data_type: DataType::Boolean,
source: datatypes::error::Error::UnsupportedArrowType {
arrow_type: DataType::Boolean,
backtrace: Backtrace::generate()
}
data_type: ConcreteDataType::binary_datatype(),
backtrace: Backtrace::generate(),
}
.status_code()
);

View File

@@ -15,65 +15,56 @@
use std::collections::HashMap;
use std::fmt::{Display, Formatter};
use common_catalog::error::{
DeserializeCatalogEntryValueSnafu, Error, InvalidCatalogSnafu, SerializeCatalogEntryValueSnafu,
};
use lazy_static::lazy_static;
use regex::Regex;
use serde::{Deserialize, Serialize, Serializer};
use snafu::{ensure, OptionExt, ResultExt};
use table::metadata::{RawTableInfo, TableId, TableVersion};
use crate::consts::{
CATALOG_KEY_PREFIX, SCHEMA_KEY_PREFIX, TABLE_GLOBAL_KEY_PREFIX, TABLE_REGIONAL_KEY_PREFIX,
};
use crate::error::{
DeserializeCatalogEntryValueSnafu, Error, InvalidCatalogSnafu, SerializeCatalogEntryValueSnafu,
};
const CATALOG_KEY_PREFIX: &str = "__c";
const SCHEMA_KEY_PREFIX: &str = "__s";
const TABLE_GLOBAL_KEY_PREFIX: &str = "__tg";
const TABLE_REGIONAL_KEY_PREFIX: &str = "__tr";
const ALPHANUMERICS_NAME_PATTERN: &str = "[a-zA-Z_][a-zA-Z0-9_]*";
lazy_static! {
static ref CATALOG_KEY_PATTERN: Regex = Regex::new(&format!(
"^{}-({})$",
CATALOG_KEY_PREFIX, ALPHANUMERICS_NAME_PATTERN
"^{CATALOG_KEY_PREFIX}-({ALPHANUMERICS_NAME_PATTERN})$"
))
.unwrap();
}
lazy_static! {
static ref SCHEMA_KEY_PATTERN: Regex = Regex::new(&format!(
"^{}-({})-({})$",
SCHEMA_KEY_PREFIX, ALPHANUMERICS_NAME_PATTERN, ALPHANUMERICS_NAME_PATTERN
"^{SCHEMA_KEY_PREFIX}-({ALPHANUMERICS_NAME_PATTERN})-({ALPHANUMERICS_NAME_PATTERN})$"
))
.unwrap();
}
lazy_static! {
static ref TABLE_GLOBAL_KEY_PATTERN: Regex = Regex::new(&format!(
"^{}-({})-({})-({})$",
TABLE_GLOBAL_KEY_PREFIX,
ALPHANUMERICS_NAME_PATTERN,
ALPHANUMERICS_NAME_PATTERN,
ALPHANUMERICS_NAME_PATTERN
"^{TABLE_GLOBAL_KEY_PREFIX}-({ALPHANUMERICS_NAME_PATTERN})-({ALPHANUMERICS_NAME_PATTERN})-({ALPHANUMERICS_NAME_PATTERN})$"
))
.unwrap();
}
lazy_static! {
static ref TABLE_REGIONAL_KEY_PATTERN: Regex = Regex::new(&format!(
"^{}-({})-({})-({})-([0-9]+)$",
TABLE_REGIONAL_KEY_PREFIX,
ALPHANUMERICS_NAME_PATTERN,
ALPHANUMERICS_NAME_PATTERN,
ALPHANUMERICS_NAME_PATTERN
"^{TABLE_REGIONAL_KEY_PREFIX}-({ALPHANUMERICS_NAME_PATTERN})-({ALPHANUMERICS_NAME_PATTERN})-({ALPHANUMERICS_NAME_PATTERN})-([0-9]+)$"
))
.unwrap();
}
pub fn build_catalog_prefix() -> String {
format!("{}-", CATALOG_KEY_PREFIX)
format!("{CATALOG_KEY_PREFIX}-")
}
pub fn build_schema_prefix(catalog_name: impl AsRef<str>) -> String {
format!("{}-{}-", SCHEMA_KEY_PREFIX, catalog_name.as_ref())
format!("{SCHEMA_KEY_PREFIX}-{}-", catalog_name.as_ref())
}
pub fn build_table_global_prefix(
@@ -81,8 +72,7 @@ pub fn build_table_global_prefix(
schema_name: impl AsRef<str>,
) -> String {
format!(
"{}-{}-{}-",
TABLE_GLOBAL_KEY_PREFIX,
"{TABLE_GLOBAL_KEY_PREFIX}-{}-{}-",
catalog_name.as_ref(),
schema_name.as_ref()
)
@@ -137,7 +127,7 @@ impl TableGlobalKey {
/// Table global info contains necessary info for a datanode to create table regions, including
/// table id, table meta(schema...), region id allocation across datanodes.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct TableGlobalValue {
/// Id of datanode that created the global table info kv. only for debugging.
pub node_id: u64,
@@ -377,7 +367,7 @@ mod tests {
table_info,
};
let serialized = serde_json::to_string(&value).unwrap();
let deserialized = TableGlobalValue::parse(&serialized).unwrap();
let deserialized = TableGlobalValue::parse(serialized).unwrap();
assert_eq!(value, deserialized);
}
}

View File

@@ -29,6 +29,7 @@ use crate::error::{CreateTableSnafu, Result};
pub use crate::schema::{SchemaProvider, SchemaProviderRef};
pub mod error;
pub mod helper;
pub mod local;
pub mod remote;
pub mod schema;
@@ -156,7 +157,7 @@ pub struct RegisterSchemaRequest {
/// Formats table fully-qualified name
pub fn format_full_table_name(catalog: &str, schema: &str, table: &str) -> String {
format!("{}.{}.{}", catalog, schema, table)
format!("{catalog}.{schema}.{table}")
}
pub trait CatalogProviderFactory {
@@ -186,8 +187,7 @@ pub(crate) async fn handle_system_table_request<'a, M: CatalogManager>(
.await
.with_context(|_| CreateTableSnafu {
table_info: format!(
"{}.{}.{}, id: {}",
catalog_name, schema_name, table_name, table_id,
"{catalog_name}.{schema_name}.{table_name}, id: {table_id}",
),
})?;
manager
@@ -199,7 +199,7 @@ pub(crate) async fn handle_system_table_request<'a, M: CatalogManager>(
table: table.clone(),
})
.await?;
info!("Created and registered system table: {}", table_name);
info!("Created and registered system table: {table_name}");
table
};
if let Some(hook) = req.open_hook {

View File

@@ -145,27 +145,34 @@ impl LocalCatalogManager {
/// Convert `RecordBatch` to a vector of `Entry`.
fn record_batch_to_entry(rb: RecordBatch) -> Result<Vec<Entry>> {
ensure!(
rb.df_recordbatch.columns().len() >= 6,
rb.num_columns() >= 6,
SystemCatalogSnafu {
msg: format!("Length mismatch: {}", rb.df_recordbatch.columns().len())
msg: format!("Length mismatch: {}", rb.num_columns())
}
);
let entry_type = UInt8Vector::try_from_arrow_array(&rb.df_recordbatch.columns()[0])
.with_context(|_| SystemCatalogTypeMismatchSnafu {
data_type: rb.df_recordbatch.columns()[ENTRY_TYPE_INDEX]
.data_type()
.clone(),
let entry_type = rb
.column(ENTRY_TYPE_INDEX)
.as_any()
.downcast_ref::<UInt8Vector>()
.with_context(|| SystemCatalogTypeMismatchSnafu {
data_type: rb.column(ENTRY_TYPE_INDEX).data_type(),
})?;
let key = BinaryVector::try_from_arrow_array(&rb.df_recordbatch.columns()[1])
.with_context(|_| SystemCatalogTypeMismatchSnafu {
data_type: rb.df_recordbatch.columns()[KEY_INDEX].data_type().clone(),
let key = rb
.column(KEY_INDEX)
.as_any()
.downcast_ref::<BinaryVector>()
.with_context(|| SystemCatalogTypeMismatchSnafu {
data_type: rb.column(KEY_INDEX).data_type(),
})?;
let value = BinaryVector::try_from_arrow_array(&rb.df_recordbatch.columns()[3])
.with_context(|_| SystemCatalogTypeMismatchSnafu {
data_type: rb.df_recordbatch.columns()[VALUE_INDEX].data_type().clone(),
let value = rb
.column(VALUE_INDEX)
.as_any()
.downcast_ref::<BinaryVector>()
.with_context(|| SystemCatalogTypeMismatchSnafu {
data_type: rb.column(VALUE_INDEX).data_type(),
})?;
let mut res = Vec::with_capacity(rb.num_rows());
@@ -331,7 +338,7 @@ impl CatalogManager for LocalCatalogManager {
let schema = catalog
.schema(schema_name)?
.with_context(|| SchemaNotFoundSnafu {
schema_info: format!("{}.{}", catalog_name, schema_name),
schema_info: format!("{catalog_name}.{schema_name}"),
})?;
{
@@ -445,7 +452,7 @@ impl CatalogManager for LocalCatalogManager {
let schema = catalog
.schema(schema_name)?
.with_context(|| SchemaNotFoundSnafu {
schema_info: format!("{}.{}", catalog_name, schema_name),
schema_info: format!("{catalog_name}.{schema_name}"),
})?;
schema.table(table_name)
}

View File

@@ -20,10 +20,6 @@ use std::sync::Arc;
use arc_swap::ArcSwap;
use async_stream::stream;
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MIN_USER_TABLE_ID};
use common_catalog::{
build_catalog_prefix, build_schema_prefix, build_table_global_prefix, CatalogKey, CatalogValue,
SchemaKey, SchemaValue, TableGlobalKey, TableGlobalValue, TableRegionalKey, TableRegionalValue,
};
use common_telemetry::{debug, info};
use futures::Stream;
use futures_util::StreamExt;
@@ -39,6 +35,10 @@ use crate::error::{
CatalogNotFoundSnafu, CreateTableSnafu, InvalidCatalogValueSnafu, InvalidTableSchemaSnafu,
OpenTableSnafu, Result, SchemaNotFoundSnafu, TableExistsSnafu, UnimplementedSnafu,
};
use crate::helper::{
build_catalog_prefix, build_schema_prefix, build_table_global_prefix, CatalogKey, CatalogValue,
SchemaKey, SchemaValue, TableGlobalKey, TableGlobalValue, TableRegionalKey, TableRegionalValue,
};
use crate::remote::{Kv, KvBackendRef};
use crate::{
handle_system_table_request, CatalogList, CatalogManager, CatalogProvider, CatalogProviderRef,
@@ -331,10 +331,7 @@ impl RemoteCatalogManager {
.open_table(&context, request)
.await
.with_context(|_| OpenTableSnafu {
table_info: format!(
"{}.{}.{}, id:{}",
catalog_name, schema_name, table_name, table_id
),
table_info: format!("{catalog_name}.{schema_name}.{table_name}, id:{table_id}"),
})? {
Some(table) => {
info!(
@@ -355,7 +352,7 @@ impl RemoteCatalogManager {
.clone()
.try_into()
.context(InvalidTableSchemaSnafu {
table_info: format!("{}.{}.{}", catalog_name, schema_name, table_name,),
table_info: format!("{catalog_name}.{schema_name}.{table_name}"),
schema: meta.schema.clone(),
})?;
let req = CreateTableRequest {
@@ -477,7 +474,7 @@ impl CatalogManager for RemoteCatalogManager {
let schema = catalog
.schema(schema_name)?
.with_context(|| SchemaNotFoundSnafu {
schema_info: format!("{}.{}", catalog_name, schema_name),
schema_info: format!("{catalog_name}.{schema_name}"),
})?;
schema.table(table_name)
}

View File

@@ -21,14 +21,13 @@ use common_catalog::consts::{
SYSTEM_CATALOG_TABLE_ID, SYSTEM_CATALOG_TABLE_NAME,
};
use common_query::logical_plan::Expr;
use common_query::physical_plan::{PhysicalPlanRef, RuntimeEnv};
use common_query::physical_plan::{PhysicalPlanRef, SessionContext};
use common_recordbatch::SendableRecordBatchStream;
use common_telemetry::debug;
use common_time::timestamp::Timestamp;
use common_time::util;
use datatypes::prelude::{ConcreteDataType, ScalarVector};
use datatypes::schema::{ColumnSchema, Schema, SchemaBuilder, SchemaRef};
use datatypes::vectors::{BinaryVector, TimestampVector, UInt8Vector};
use datatypes::vectors::{BinaryVector, TimestampMillisecondVector, UInt8Vector};
use serde::{Deserialize, Serialize};
use snafu::{ensure, OptionExt, ResultExt};
use table::engine::{EngineContext, TableEngineRef};
@@ -43,7 +42,6 @@ use crate::error::{
pub const ENTRY_TYPE_INDEX: usize = 0;
pub const KEY_INDEX: usize = 1;
pub const TIMESTAMP_INDEX: usize = 2;
pub const VALUE_INDEX: usize = 3;
pub struct SystemCatalogTable {
@@ -63,7 +61,7 @@ impl Table for SystemCatalogTable {
async fn scan(
&self,
_projection: &Option<Vec<usize>>,
_projection: Option<&Vec<usize>>,
_filters: &[Expr],
_limit: Option<usize>,
) -> table::Result<PhysicalPlanRef> {
@@ -111,7 +109,7 @@ impl SystemCatalogTable {
desc: Some("System catalog table".to_string()),
schema: schema.clone(),
region_numbers: vec![0],
primary_key_indices: vec![ENTRY_TYPE_INDEX, KEY_INDEX, TIMESTAMP_INDEX],
primary_key_indices: vec![ENTRY_TYPE_INDEX, KEY_INDEX],
create_if_not_exists: true,
table_options: HashMap::new(),
};
@@ -128,13 +126,14 @@ impl SystemCatalogTable {
/// Create a stream of all entries inside system catalog table
pub async fn records(&self) -> Result<SendableRecordBatchStream> {
let full_projection = None;
let ctx = SessionContext::new();
let scan = self
.table
.scan(&full_projection, &[], None)
.scan(full_projection, &[], None)
.await
.context(error::SystemCatalogTableScanSnafu)?;
let stream = scan
.execute(0, Arc::new(RuntimeEnv::default()))
.execute(0, ctx.task_ctx())
.context(error::SystemCatalogTableScanExecSnafu)?;
Ok(stream)
}
@@ -162,7 +161,7 @@ fn build_system_catalog_schema() -> Schema {
),
ColumnSchema::new(
"timestamp".to_string(),
ConcreteDataType::timestamp_millis_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(),
false,
)
.with_time_index(true),
@@ -173,12 +172,12 @@ fn build_system_catalog_schema() -> Schema {
),
ColumnSchema::new(
"gmt_created".to_string(),
ConcreteDataType::timestamp_millis_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(),
false,
),
ColumnSchema::new(
"gmt_modified".to_string(),
ConcreteDataType::timestamp_millis_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(),
false,
),
];
@@ -198,7 +197,7 @@ pub fn build_table_insert_request(full_table_name: String, table_id: TableId) ->
}
pub fn build_schema_insert_request(catalog_name: String, schema_name: String) -> InsertRequest {
let full_schema_name = format!("{}.{}", catalog_name, schema_name);
let full_schema_name = format!("{catalog_name}.{schema_name}");
build_insert_request(
EntryType::Schema,
full_schema_name.as_bytes(),
@@ -223,7 +222,7 @@ pub fn build_insert_request(entry_type: EntryType, key: &[u8], value: &[u8]) ->
// Timestamp in key part is intentionally left to 0
columns_values.insert(
"timestamp".to_string(),
Arc::new(TimestampVector::from_slice(&[Timestamp::from_millis(0)])) as _,
Arc::new(TimestampMillisecondVector::from_slice(&[0])) as _,
);
columns_values.insert(
@@ -231,18 +230,15 @@ pub fn build_insert_request(entry_type: EntryType, key: &[u8], value: &[u8]) ->
Arc::new(BinaryVector::from_slice(&[value])) as _,
);
let now = util::current_time_millis();
columns_values.insert(
"gmt_created".to_string(),
Arc::new(TimestampVector::from_slice(&[Timestamp::from_millis(
util::current_time_millis(),
)])) as _,
Arc::new(TimestampMillisecondVector::from_slice(&[now])) as _,
);
columns_values.insert(
"gmt_modified".to_string(),
Arc::new(TimestampVector::from_slice(&[Timestamp::from_millis(
util::current_time_millis(),
)])) as _,
Arc::new(TimestampMillisecondVector::from_slice(&[now])) as _,
);
InsertRequest {
@@ -394,7 +390,7 @@ mod tests {
if let Entry::Catalog(e) = entry {
assert_eq!("some_catalog", e.catalog_name);
} else {
panic!("Unexpected type: {:?}", entry);
panic!("Unexpected type: {entry:?}");
}
}
@@ -411,7 +407,7 @@ mod tests {
assert_eq!("some_catalog", e.catalog_name);
assert_eq!("some_schema", e.schema_name);
} else {
panic!("Unexpected type: {:?}", entry);
panic!("Unexpected type: {entry:?}");
}
}
@@ -430,7 +426,7 @@ mod tests {
assert_eq!("some_table", e.table_name);
assert_eq!(42, e.table_id);
} else {
panic!("Unexpected type: {:?}", entry);
panic!("Unexpected type: {entry:?}");
}
}

View File

@@ -26,9 +26,9 @@ use common_query::logical_plan::Expr;
use common_query::physical_plan::PhysicalPlanRef;
use common_recordbatch::error::Result as RecordBatchResult;
use common_recordbatch::{RecordBatch, RecordBatchStream};
use datatypes::prelude::{ConcreteDataType, VectorBuilder};
use datatypes::prelude::{ConcreteDataType, DataType};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::value::Value;
use datatypes::value::ValueRef;
use datatypes::vectors::VectorRef;
use futures::Stream;
use snafu::ResultExt;
@@ -77,7 +77,7 @@ impl Table for Tables {
async fn scan(
&self,
_projection: &Option<Vec<usize>>,
_projection: Option<&Vec<usize>>,
_filters: &[Expr],
_limit: Option<usize>,
) -> table::error::Result<PhysicalPlanRef> {
@@ -149,26 +149,33 @@ fn tables_to_record_batch(
engine: &str,
) -> Vec<VectorRef> {
let mut catalog_vec =
VectorBuilder::with_capacity(ConcreteDataType::string_datatype(), table_names.len());
ConcreteDataType::string_datatype().create_mutable_vector(table_names.len());
let mut schema_vec =
VectorBuilder::with_capacity(ConcreteDataType::string_datatype(), table_names.len());
ConcreteDataType::string_datatype().create_mutable_vector(table_names.len());
let mut table_name_vec =
VectorBuilder::with_capacity(ConcreteDataType::string_datatype(), table_names.len());
ConcreteDataType::string_datatype().create_mutable_vector(table_names.len());
let mut engine_vec =
VectorBuilder::with_capacity(ConcreteDataType::string_datatype(), table_names.len());
ConcreteDataType::string_datatype().create_mutable_vector(table_names.len());
for table_name in table_names {
catalog_vec.push(&Value::String(catalog_name.into()));
schema_vec.push(&Value::String(schema_name.into()));
table_name_vec.push(&Value::String(table_name.into()));
engine_vec.push(&Value::String(engine.into()));
// Safety: All these vectors are string type.
catalog_vec
.push_value_ref(ValueRef::String(catalog_name))
.unwrap();
schema_vec
.push_value_ref(ValueRef::String(schema_name))
.unwrap();
table_name_vec
.push_value_ref(ValueRef::String(&table_name))
.unwrap();
engine_vec.push_value_ref(ValueRef::String(engine)).unwrap();
}
vec![
catalog_vec.finish(),
schema_vec.finish(),
table_name_vec.finish(),
engine_vec.finish(),
catalog_vec.to_vector(),
schema_vec.to_vector(),
table_name_vec.to_vector(),
engine_vec.to_vector(),
]
}
@@ -340,9 +347,7 @@ fn build_schema_for_tables() -> Schema {
#[cfg(test)]
mod tests {
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_query::physical_plan::RuntimeEnv;
use datatypes::arrow::array::Utf8Array;
use datatypes::arrow::datatypes::DataType;
use common_query::physical_plan::SessionContext;
use futures_util::StreamExt;
use table::table::numbers::NumbersTable;
@@ -365,57 +370,48 @@ mod tests {
.unwrap();
let tables = Tables::new(catalog_list, "test_engine".to_string());
let tables_stream = tables.scan(&None, &[], None).await.unwrap();
let mut tables_stream = tables_stream
.execute(0, Arc::new(RuntimeEnv::default()))
.unwrap();
let tables_stream = tables.scan(None, &[], None).await.unwrap();
let session_ctx = SessionContext::new();
let mut tables_stream = tables_stream.execute(0, session_ctx.task_ctx()).unwrap();
if let Some(t) = tables_stream.next().await {
let batch = t.unwrap().df_recordbatch;
let batch = t.unwrap();
assert_eq!(1, batch.num_rows());
assert_eq!(4, batch.num_columns());
assert_eq!(&DataType::Utf8, batch.column(0).data_type());
assert_eq!(&DataType::Utf8, batch.column(1).data_type());
assert_eq!(&DataType::Utf8, batch.column(2).data_type());
assert_eq!(&DataType::Utf8, batch.column(3).data_type());
assert_eq!(
ConcreteDataType::string_datatype(),
batch.column(0).data_type()
);
assert_eq!(
ConcreteDataType::string_datatype(),
batch.column(1).data_type()
);
assert_eq!(
ConcreteDataType::string_datatype(),
batch.column(2).data_type()
);
assert_eq!(
ConcreteDataType::string_datatype(),
batch.column(3).data_type()
);
assert_eq!(
"greptime",
batch
.column(0)
.as_any()
.downcast_ref::<Utf8Array<i32>>()
.unwrap()
.value(0)
batch.column(0).get_ref(0).as_string().unwrap().unwrap()
);
assert_eq!(
"public",
batch
.column(1)
.as_any()
.downcast_ref::<Utf8Array<i32>>()
.unwrap()
.value(0)
batch.column(1).get_ref(0).as_string().unwrap().unwrap()
);
assert_eq!(
"test_table",
batch
.column(2)
.as_any()
.downcast_ref::<Utf8Array<i32>>()
.unwrap()
.value(0)
batch.column(2).get_ref(0).as_string().unwrap().unwrap()
);
assert_eq!(
"test_engine",
batch
.column(3)
.as_any()
.downcast_ref::<Utf8Array<i32>>()
.unwrap()
.value(0)
batch.column(3).get_ref(0).as_string().unwrap().unwrap()
);
} else {
panic!("Record batch should not be empty!")

View File

@@ -69,8 +69,7 @@ mod tests {
assert!(
err.to_string()
.contains("Table `greptime.public.test_table` already exists"),
"Actual error message: {}",
err
"Actual error message: {err}",
);
}

View File

@@ -189,10 +189,10 @@ impl TableEngine for MockTableEngine {
unimplemented!()
}
fn get_table<'a>(
fn get_table(
&self,
_ctx: &EngineContext,
table_ref: &'a TableReference,
table_ref: &TableReference,
) -> table::Result<Option<TableRef>> {
futures::executor::block_on(async {
Ok(self
@@ -204,7 +204,7 @@ impl TableEngine for MockTableEngine {
})
}
fn table_exists<'a>(&self, _ctx: &EngineContext, table_ref: &'a TableReference) -> bool {
fn table_exists(&self, _ctx: &EngineContext, table_ref: &TableReference) -> bool {
futures::executor::block_on(async {
self.tables
.read()

View File

@@ -22,12 +22,12 @@ mod tests {
use std::collections::HashSet;
use std::sync::Arc;
use catalog::helper::{CatalogKey, CatalogValue, SchemaKey, SchemaValue};
use catalog::remote::{
KvBackend, KvBackendRef, RemoteCatalogManager, RemoteCatalogProvider, RemoteSchemaProvider,
};
use catalog::{CatalogList, CatalogManager, RegisterTableRequest};
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_catalog::{CatalogKey, CatalogValue, SchemaKey, SchemaValue};
use datatypes::schema::Schema;
use futures_util::StreamExt;
use table::engine::{EngineContext, TableEngineRef};

View File

@@ -1,13 +1,12 @@
[package]
name = "client"
version = "0.1.0"
edition = "2021"
license = "Apache-2.0"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
api = { path = "../api" }
async-stream = "0.3"
async-stream.workspace = true
common-base = { path = "../common/base" }
common-error = { path = "../common/error" }
common-grpc = { path = "../common/grpc" }
@@ -15,20 +14,18 @@ common-grpc-expr = { path = "../common/grpc-expr" }
common-query = { path = "../common/query" }
common-recordbatch = { path = "../common/recordbatch" }
common-time = { path = "../common/time" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
"simd",
] }
datafusion.workspace = true
datatypes = { path = "../datatypes" }
enum_dispatch = "0.3"
parking_lot = "0.12"
rand = "0.8"
snafu = { version = "0.7", features = ["backtraces"] }
snafu.workspace = true
tonic = "0.8"
[dev-dependencies]
datanode = { path = "../datanode" }
substrait = { path = "../common/substrait" }
tokio = { version = "1.0", features = ["full"] }
tokio.workspace = true
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }

View File

@@ -1,106 +0,0 @@
// Copyright 2022 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use api::v1::*;
use client::{Client, Database};
fn main() {
tracing::subscriber::set_global_default(tracing_subscriber::FmtSubscriber::builder().finish())
.unwrap();
run();
}
#[tokio::main]
async fn run() {
let client = Client::with_urls(vec!["127.0.0.1:3001"]);
let db = Database::new("greptime", client);
let (columns, row_count) = insert_data();
let expr = InsertExpr {
schema_name: "public".to_string(),
table_name: "demo".to_string(),
region_number: 0,
columns,
row_count,
};
db.insert(expr).await.unwrap();
}
fn insert_data() -> (Vec<Column>, u32) {
const SEMANTIC_TAG: i32 = 0;
const SEMANTIC_FIELD: i32 = 1;
const SEMANTIC_TS: i32 = 2;
let row_count = 4;
let host_vals = column::Values {
string_values: vec![
"host1".to_string(),
"host2".to_string(),
"host3".to_string(),
"host4".to_string(),
],
..Default::default()
};
let host_column = Column {
column_name: "host".to_string(),
semantic_type: SEMANTIC_TAG,
values: Some(host_vals),
null_mask: vec![0],
..Default::default()
};
let cpu_vals = column::Values {
f64_values: vec![0.31, 0.41, 0.2],
..Default::default()
};
let cpu_column = Column {
column_name: "cpu".to_string(),
semantic_type: SEMANTIC_FIELD,
values: Some(cpu_vals),
null_mask: vec![2],
..Default::default()
};
let mem_vals = column::Values {
f64_values: vec![0.1, 0.2, 0.3],
..Default::default()
};
let mem_column = Column {
column_name: "memory".to_string(),
semantic_type: SEMANTIC_FIELD,
values: Some(mem_vals),
null_mask: vec![4],
..Default::default()
};
let ts_vals = column::Values {
i64_values: vec![100, 101, 102, 103],
..Default::default()
};
let ts_column = Column {
column_name: "ts".to_string(),
semantic_type: SEMANTIC_TS,
values: Some(ts_vals),
null_mask: vec![0],
..Default::default()
};
(
vec![host_column, cpu_column, mem_column, ts_column],
row_count,
)
}

View File

@@ -12,8 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use api::v1::{ColumnDataType, ColumnDef, CreateExpr};
use client::admin::Admin;
use api::v1::{ColumnDataType, ColumnDef, CreateTableExpr, TableId};
use client::{Client, Database};
use prost_09::Message;
use substrait_proto::protobuf::plan_rel::RelType as PlanRelType;
@@ -33,41 +32,41 @@ fn main() {
async fn run() {
let client = Client::with_urls(vec!["127.0.0.1:3001"]);
let create_table_expr = CreateExpr {
catalog_name: Some("greptime".to_string()),
schema_name: Some("public".to_string()),
let create_table_expr = CreateTableExpr {
catalog_name: "greptime".to_string(),
schema_name: "public".to_string(),
table_name: "test_logical_dist_exec".to_string(),
desc: None,
desc: "".to_string(),
column_defs: vec![
ColumnDef {
name: "timestamp".to_string(),
datatype: ColumnDataType::Timestamp as i32,
datatype: ColumnDataType::TimestampMillisecond as i32,
is_nullable: false,
default_constraint: None,
default_constraint: vec![],
},
ColumnDef {
name: "key".to_string(),
datatype: ColumnDataType::Uint64 as i32,
is_nullable: false,
default_constraint: None,
default_constraint: vec![],
},
ColumnDef {
name: "value".to_string(),
datatype: ColumnDataType::Uint64 as i32,
is_nullable: false,
default_constraint: None,
default_constraint: vec![],
},
],
time_index: "timestamp".to_string(),
primary_keys: vec!["key".to_string()],
create_if_not_exists: false,
table_options: Default::default(),
table_id: Some(1024),
table_id: Some(TableId { id: 1024 }),
region_ids: vec![0],
};
let admin = Admin::new("create table", client.clone());
let result = admin.create(create_table_expr).await.unwrap();
let db = Database::new("create table", client.clone());
let result = db.create(create_table_expr).await.unwrap();
event!(Level::INFO, "create table result: {:#?}", result);
let logical = mock_logical_plan();

View File

@@ -1,51 +0,0 @@
// Copyright 2022 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use client::{Client, Database};
use common_grpc::MockExecution;
use datafusion::physical_plan::expressions::Column;
use datafusion::physical_plan::projection::ProjectionExec;
use datafusion::physical_plan::{ExecutionPlan, PhysicalExpr};
use tracing::{event, Level};
fn main() {
tracing::subscriber::set_global_default(tracing_subscriber::FmtSubscriber::builder().finish())
.unwrap();
run();
}
#[tokio::main]
async fn run() {
let client = Client::with_urls(vec!["127.0.0.1:3001"]);
let db = Database::new("greptime", client);
let physical = mock_physical_plan();
let result = db.physical_plan(physical, None).await;
event!(Level::INFO, "result: {:#?}", result);
}
fn mock_physical_plan() -> Arc<dyn ExecutionPlan> {
let id_expr = Arc::new(Column::new("id", 0)) as Arc<dyn PhysicalExpr>;
let age_expr = Arc::new(Column::new("age", 2)) as Arc<dyn PhysicalExpr>;
let expr = vec![(id_expr, "id".to_string()), (age_expr, "age".to_string())];
let input =
Arc::new(MockExecution::new("mock_input_exec".to_string())) as Arc<dyn ExecutionPlan>;
let projection = ProjectionExec::try_new(expr, input).unwrap();
Arc::new(projection)
}

View File

@@ -1,34 +0,0 @@
// Copyright 2022 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use client::{Client, Database, Select};
use tracing::{event, Level};
fn main() {
tracing::subscriber::set_global_default(tracing_subscriber::FmtSubscriber::builder().finish())
.unwrap();
run();
}
#[tokio::main]
async fn run() {
let client = Client::with_urls(vec!["127.0.0.1:3001"]);
let db = Database::new("greptime", client);
let sql = Select::Sql("select * from demo".to_string());
let result = db.select(sql).await.unwrap();
event!(Level::INFO, "result: {:#?}", result);
}

View File

@@ -1,137 +0,0 @@
// Copyright 2022 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use api::v1::*;
use common_error::prelude::StatusCode;
use common_query::Output;
use snafu::prelude::*;
use crate::database::PROTOCOL_VERSION;
use crate::{error, Client, Result};
#[derive(Clone, Debug)]
pub struct Admin {
name: String,
client: Client,
}
impl Admin {
pub fn new(name: impl Into<String>, client: Client) -> Self {
Self {
name: name.into(),
client,
}
}
pub async fn create(&self, expr: CreateExpr) -> Result<AdminResult> {
let header = ExprHeader {
version: PROTOCOL_VERSION,
};
let expr = AdminExpr {
header: Some(header),
expr: Some(admin_expr::Expr::Create(expr)),
};
self.do_request(expr).await
}
pub async fn do_request(&self, expr: AdminExpr) -> Result<AdminResult> {
// `remove(0)` is safe because of `do_requests`'s invariants.
Ok(self.do_requests(vec![expr]).await?.remove(0))
}
pub async fn alter(&self, expr: AlterExpr) -> Result<AdminResult> {
let header = ExprHeader {
version: PROTOCOL_VERSION,
};
let expr = AdminExpr {
header: Some(header),
expr: Some(admin_expr::Expr::Alter(expr)),
};
self.do_request(expr).await
}
pub async fn drop_table(&self, expr: DropTableExpr) -> Result<AdminResult> {
let header = ExprHeader {
version: PROTOCOL_VERSION,
};
let expr = AdminExpr {
header: Some(header),
expr: Some(admin_expr::Expr::DropTable(expr)),
};
self.do_request(expr).await
}
/// Invariants: the lengths of input vec (`Vec<AdminExpr>`) and output vec (`Vec<AdminResult>`) are equal.
async fn do_requests(&self, exprs: Vec<AdminExpr>) -> Result<Vec<AdminResult>> {
let expr_count = exprs.len();
let req = AdminRequest {
name: self.name.clone(),
exprs,
};
let resp = self.client.admin(req).await?;
let results = resp.results;
ensure!(
results.len() == expr_count,
error::MissingResultSnafu {
name: "admin_results",
expected: expr_count,
actual: results.len(),
}
);
Ok(results)
}
pub async fn create_database(&self, expr: CreateDatabaseExpr) -> Result<AdminResult> {
let header = ExprHeader {
version: PROTOCOL_VERSION,
};
let expr = AdminExpr {
header: Some(header),
expr: Some(admin_expr::Expr::CreateDatabase(expr)),
};
Ok(self.do_requests(vec![expr]).await?.remove(0))
}
}
pub fn admin_result_to_output(admin_result: AdminResult) -> Result<Output> {
let header = admin_result.header.context(error::MissingHeaderSnafu)?;
if !StatusCode::is_success(header.code) {
return error::DatanodeSnafu {
code: header.code,
msg: header.err_msg,
}
.fail();
}
let result = admin_result.result.context(error::MissingResultSnafu {
name: "result".to_string(),
expected: 1_usize,
actual: 0_usize,
})?;
let output = match result {
admin_result::Result::Mutate(mutate) => {
if mutate.failure != 0 {
return error::MutateFailureSnafu {
failure: mutate.failure,
}
.fail();
}
Output::AffectedRows(mutate.success as usize)
}
};
Ok(output)
}

View File

@@ -104,20 +104,6 @@ impl Client {
self.inner.set_peers(urls);
}
pub async fn admin(&self, req: AdminRequest) -> Result<AdminResponse> {
let req = BatchRequest {
admins: vec![req],
..Default::default()
};
let mut res = self.batch(req).await?;
res.admins.pop().context(error::MissingResultSnafu {
name: "admins",
expected: 1_usize,
actual: 0_usize,
})
}
pub async fn database(&self, req: DatabaseRequest) -> Result<DatabaseResponse> {
let req = BatchRequest {
databases: vec![req],

View File

@@ -12,32 +12,22 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use api::v1::codec::SelectResult as GrpcSelectResult;
use api::v1::column::SemanticType;
use api::v1::ddl_request::Expr as DdlExpr;
use api::v1::{
object_expr, object_result, select_expr, DatabaseRequest, ExprHeader, InsertExpr,
MutateResult as GrpcMutateResult, ObjectExpr, ObjectResult as GrpcObjectResult, PhysicalPlan,
SelectExpr,
object_expr, query_request, AlterExpr, CreateTableExpr, DatabaseRequest, DdlRequest,
DropTableExpr, InsertRequest, ObjectExpr, ObjectResult as GrpcObjectResult, QueryRequest,
};
use common_error::status_code::StatusCode;
use common_grpc::{AsExecutionPlan, DefaultAsPlanImpl};
use common_grpc_expr::column_to_vector;
use common_grpc::flight::{
flight_messages_to_recordbatches, raw_flight_data_to_message, FlightMessage,
};
use common_query::Output;
use common_recordbatch::{RecordBatch, RecordBatches};
use datafusion::physical_plan::ExecutionPlan;
use datatypes::prelude::*;
use datatypes::schema::{ColumnSchema, Schema};
use common_recordbatch::RecordBatches;
use snafu::{ensure, OptionExt, ResultExt};
use crate::error::{
ColumnToVectorSnafu, ConvertSchemaSnafu, DatanodeSnafu, DecodeSelectSnafu, EncodePhysicalSnafu,
};
use crate::error::{ConvertFlightDataSnafu, DatanodeSnafu, IllegalFlightMessagesSnafu};
use crate::{error, Client, Result};
pub const PROTOCOL_VERSION: u32 = 1;
#[derive(Clone, Debug)]
pub struct Database {
name: String,
@@ -56,83 +46,63 @@ impl Database {
&self.name
}
pub async fn insert(&self, insert: InsertExpr) -> Result<ObjectResult> {
let header = ExprHeader {
version: PROTOCOL_VERSION,
};
pub async fn insert(&self, request: InsertRequest) -> Result<RpcOutput> {
let expr = ObjectExpr {
header: Some(header),
expr: Some(object_expr::Expr::Insert(insert)),
request: Some(object_expr::Request::Insert(request)),
};
self.object(expr).await?.try_into()
}
pub async fn batch_insert(&self, insert_exprs: Vec<InsertExpr>) -> Result<Vec<ObjectResult>> {
let header = ExprHeader {
version: PROTOCOL_VERSION,
pub async fn sql(&self, sql: &str) -> Result<RpcOutput> {
let query = QueryRequest {
query: Some(query_request::Query::Sql(sql.to_string())),
};
let obj_exprs = insert_exprs
.into_iter()
.map(|expr| ObjectExpr {
header: Some(header.clone()),
expr: Some(object_expr::Expr::Insert(expr)),
})
.collect();
self.objects(obj_exprs)
.await?
.into_iter()
.map(|result| result.try_into())
.collect()
self.do_query(query).await
}
pub async fn select(&self, expr: Select) -> Result<ObjectResult> {
let select_expr = match expr {
Select::Sql(sql) => SelectExpr {
expr: Some(select_expr::Expr::Sql(sql)),
},
pub async fn logical_plan(&self, logical_plan: Vec<u8>) -> Result<RpcOutput> {
let query = QueryRequest {
query: Some(query_request::Query::LogicalPlan(logical_plan)),
};
self.do_select(select_expr).await
self.do_query(query).await
}
pub async fn physical_plan(
&self,
physical: Arc<dyn ExecutionPlan>,
original_ql: Option<String>,
) -> Result<ObjectResult> {
let plan = DefaultAsPlanImpl::try_from_physical_plan(physical.clone())
.context(EncodePhysicalSnafu { physical })?
.bytes;
let original_ql = original_ql.unwrap_or_default();
let select_expr = SelectExpr {
expr: Some(select_expr::Expr::PhysicalPlan(PhysicalPlan {
original_ql: original_ql.into_bytes(),
plan,
})),
};
self.do_select(select_expr).await
}
pub async fn logical_plan(&self, logical_plan: Vec<u8>) -> Result<ObjectResult> {
let select_expr = SelectExpr {
expr: Some(select_expr::Expr::LogicalPlan(logical_plan)),
};
self.do_select(select_expr).await
}
async fn do_select(&self, select_expr: SelectExpr) -> Result<ObjectResult> {
let header = ExprHeader {
version: PROTOCOL_VERSION,
};
async fn do_query(&self, request: QueryRequest) -> Result<RpcOutput> {
let expr = ObjectExpr {
header: Some(header),
expr: Some(object_expr::Expr::Select(select_expr)),
request: Some(object_expr::Request::Query(request)),
};
let obj_result = self.object(expr).await?;
obj_result.try_into()
}
pub async fn create(&self, expr: CreateTableExpr) -> Result<RpcOutput> {
let expr = ObjectExpr {
request: Some(object_expr::Request::Ddl(DdlRequest {
expr: Some(DdlExpr::CreateTable(expr)),
})),
};
self.object(expr).await?.try_into()
}
pub async fn alter(&self, expr: AlterExpr) -> Result<RpcOutput> {
let expr = ObjectExpr {
request: Some(object_expr::Request::Ddl(DdlRequest {
expr: Some(DdlExpr::Alter(expr)),
})),
};
self.object(expr).await?.try_into()
}
pub async fn drop_table(&self, expr: DropTableExpr) -> Result<RpcOutput> {
let expr = ObjectExpr {
request: Some(object_expr::Request::Ddl(DdlRequest {
expr: Some(DdlExpr::DropTable(expr)),
})),
};
self.object(expr).await?.try_into()
}
pub async fn object(&self, expr: ObjectExpr) -> Result<GrpcObjectResult> {
let res = self.objects(vec![expr]).await?.pop().unwrap();
Ok(res)
@@ -162,12 +132,12 @@ impl Database {
}
#[derive(Debug)]
pub enum ObjectResult {
Select(GrpcSelectResult),
Mutate(GrpcMutateResult),
pub enum RpcOutput {
RecordBatches(RecordBatches),
AffectedRows(usize),
}
impl TryFrom<api::v1::ObjectResult> for ObjectResult {
impl TryFrom<api::v1::ObjectResult> for RpcOutput {
type Error = error::Error;
fn try_from(object_result: api::v1::ObjectResult) -> std::result::Result<Self, Self::Error> {
@@ -180,92 +150,50 @@ impl TryFrom<api::v1::ObjectResult> for ObjectResult {
.fail();
}
let obj_result = object_result.result.context(error::MissingResultSnafu {
name: "result".to_string(),
expected: 1_usize,
actual: 0_usize,
})?;
Ok(match obj_result {
object_result::Result::Select(select) => {
let result = (*select.raw_data).try_into().context(DecodeSelectSnafu)?;
ObjectResult::Select(result)
}
object_result::Result::Mutate(mutate) => ObjectResult::Mutate(mutate),
})
}
}
let flight_messages = raw_flight_data_to_message(object_result.flight_data)
.context(ConvertFlightDataSnafu)?;
pub enum Select {
Sql(String),
}
impl TryFrom<ObjectResult> for Output {
type Error = error::Error;
fn try_from(value: ObjectResult) -> Result<Self> {
let output = match value {
ObjectResult::Select(select) => {
let vectors = select
.columns
.iter()
.map(|column| {
column_to_vector(column, select.row_count).context(ColumnToVectorSnafu)
})
.collect::<Result<Vec<VectorRef>>>()?;
let column_schemas = select
.columns
.iter()
.zip(vectors.iter())
.map(|(column, vector)| {
let datatype = vector.data_type();
// nullable or not, does not affect the output
let mut column_schema =
ColumnSchema::new(&column.column_name, datatype, true);
if column.semantic_type == SemanticType::Timestamp as i32 {
column_schema = column_schema.with_time_index(true);
}
column_schema
})
.collect::<Vec<ColumnSchema>>();
let schema = Arc::new(Schema::try_new(column_schemas).context(ConvertSchemaSnafu)?);
let recordbatches = if vectors.is_empty() {
RecordBatches::try_new(schema, vec![])
} else {
RecordBatch::new(schema, vectors)
.and_then(|batch| RecordBatches::try_new(batch.schema.clone(), vec![batch]))
let output = if let Some(FlightMessage::AffectedRows(rows)) = flight_messages.get(0) {
ensure!(
flight_messages.len() == 1,
IllegalFlightMessagesSnafu {
reason: "Expect 'AffectedRows' Flight messages to be one and only!"
}
.context(error::CreateRecordBatchesSnafu)?;
Output::RecordBatches(recordbatches)
}
ObjectResult::Mutate(mutate) => {
if mutate.failure != 0 {
return error::MutateFailureSnafu {
failure: mutate.failure,
}
.fail();
}
Output::AffectedRows(mutate.success as usize)
}
);
RpcOutput::AffectedRows(*rows)
} else {
let recordbatches = flight_messages_to_recordbatches(flight_messages)
.context(ConvertFlightDataSnafu)?;
RpcOutput::RecordBatches(recordbatches)
};
Ok(output)
}
}
impl From<RpcOutput> for Output {
fn from(value: RpcOutput) -> Self {
match value {
RpcOutput::AffectedRows(x) => Output::AffectedRows(x),
RpcOutput::RecordBatches(x) => Output::RecordBatches(x),
}
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use api::helper::ColumnDataTypeWrapper;
use api::v1::Column;
use common_grpc::select::{null_mask, values};
use common_grpc_expr::column_to_vector;
use datatypes::prelude::{Vector, VectorRef};
use datatypes::vectors::{
BinaryVector, BooleanVector, DateTimeVector, DateVector, Float32Vector, Float64Vector,
Int16Vector, Int32Vector, Int64Vector, Int8Vector, StringVector, UInt16Vector,
UInt32Vector, UInt64Vector, UInt8Vector,
};
use super::*;
#[test]
fn test_column_to_vector() {
let mut column = create_test_column(Arc::new(BooleanVector::from(vec![true])));
@@ -341,12 +269,11 @@ mod tests {
fn create_test_column(vector: VectorRef) -> Column {
let wrapper: ColumnDataTypeWrapper = vector.data_type().try_into().unwrap();
let array = vector.to_arrow_array();
Column {
column_name: "test".to_string(),
semantic_type: 1,
values: Some(values(&[array.clone()]).unwrap()),
null_mask: null_mask(&vec![array], vector.len()),
values: Some(values(&[vector.clone()]).unwrap()),
null_mask: null_mask(&[vector.clone()], vector.len()),
datatype: wrapper.datatype() as i32,
}
}

View File

@@ -13,19 +13,15 @@
// limitations under the License.
use std::any::Any;
use std::sync::Arc;
use api::serde::DecodeError;
use common_error::prelude::*;
use datafusion::physical_plan::ExecutionPlan;
#[derive(Debug, Snafu)]
#[snafu(visibility(pub))]
pub enum Error {
#[snafu(display("Connect failed to {}, source: {}", url, source))]
ConnectFailed {
url: String,
source: tonic::transport::Error,
#[snafu(display("Illegal Flight messages, reason: {}", reason))]
IllegalFlightMessages {
reason: String,
backtrace: Backtrace,
},
@@ -46,34 +42,21 @@ pub enum Error {
backtrace: Backtrace,
},
#[snafu(display("Fail to decode select result, source: {}", source))]
DecodeSelect { source: DecodeError },
#[snafu(display("Error occurred on the data node, code: {}, msg: {}", code, msg))]
Datanode { code: u32, msg: String },
#[snafu(display("Failed to encode physical plan: {:?}, source: {}", physical, source))]
EncodePhysical {
physical: Arc<dyn ExecutionPlan>,
#[snafu(display("Failed to convert FlightData, source: {}", source))]
ConvertFlightData {
#[snafu(backtrace)]
source: common_grpc::Error,
},
#[snafu(display("Mutate result has failure {}", failure))]
MutateFailure { failure: u32, backtrace: Backtrace },
#[snafu(display("Column datatype error, source: {}", source))]
ColumnDataType {
#[snafu(backtrace)]
source: api::error::Error,
},
#[snafu(display("Failed to create RecordBatches, source: {}", source))]
CreateRecordBatches {
#[snafu(backtrace)]
source: common_recordbatch::error::Error,
},
#[snafu(display("Illegal GRPC client state: {}", err_msg))]
IllegalGrpcClientState {
err_msg: String,
@@ -83,12 +66,6 @@ pub enum Error {
#[snafu(display("Missing required field in protobuf, field: {}", field))]
MissingField { field: String, backtrace: Backtrace },
#[snafu(display("Failed to convert schema, source: {}", source))]
ConvertSchema {
#[snafu(backtrace)]
source: datatypes::error::Error,
},
#[snafu(display(
"Failed to create gRPC channel, peer address: {}, source: {}",
addr,
@@ -99,12 +76,6 @@ pub enum Error {
#[snafu(backtrace)]
source: common_grpc::error::Error,
},
#[snafu(display("Failed to convert column to vector, source: {}", source))]
ColumnToVector {
#[snafu(backtrace)]
source: common_grpc_expr::error::Error,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -112,21 +83,17 @@ pub type Result<T> = std::result::Result<T, Error>;
impl ErrorExt for Error {
fn status_code(&self) -> StatusCode {
match self {
Error::ConnectFailed { .. }
Error::IllegalFlightMessages { .. }
| Error::MissingResult { .. }
| Error::MissingHeader { .. }
| Error::TonicStatus { .. }
| Error::DecodeSelect { .. }
| Error::Datanode { .. }
| Error::EncodePhysical { .. }
| Error::MutateFailure { .. }
| Error::ColumnDataType { .. }
| Error::MissingField { .. } => StatusCode::Internal,
Error::ConvertSchema { source } => source.status_code(),
Error::CreateRecordBatches { source } => source.status_code(),
Error::CreateChannel { source, .. } => source.status_code(),
Error::CreateChannel { source, .. } | Error::ConvertFlightData { source } => {
source.status_code()
}
Error::IllegalGrpcClientState { .. } => StatusCode::Unexpected,
Error::ColumnToVector { source, .. } => source.status_code(),
}
}

View File

@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
pub mod admin;
mod client;
mod database;
mod error;
@@ -21,5 +20,5 @@ pub mod load_balance;
pub use api;
pub use self::client::Client;
pub use self::database::{Database, ObjectResult, Select};
pub use self::database::{Database, RpcOutput};
pub use self::error::{Error, Result};

View File

@@ -1,15 +1,16 @@
[package]
name = "cmd"
version = "0.1.0"
edition = "2021"
version.workspace = true
edition.workspace = true
license.workspace = true
default-run = "greptime"
license = "Apache-2.0"
[[bin]]
name = "greptime"
path = "src/bin/greptime.rs"
[dependencies]
anymap = "1.0.0-beta.2"
clap = { version = "3.1", features = ["derive"] }
common-error = { path = "../common/error" }
common-telemetry = { path = "../common/telemetry", features = [
@@ -17,17 +18,17 @@ common-telemetry = { path = "../common/telemetry", features = [
] }
datanode = { path = "../datanode" }
frontend = { path = "../frontend" }
futures = "0.3"
futures.workspace = true
meta-client = { path = "../meta-client" }
meta-srv = { path = "../meta-srv" }
serde = "1.0"
serde.workspace = true
servers = { path = "../servers" }
snafu = { version = "0.7", features = ["backtraces"] }
snafu.workspace = true
tokio = { version = "1.18", features = ["full"] }
toml = "0.5"
[dev-dependencies]
serde = "1.0"
serde.workspace = true
tempdir = "0.3"
[build-dependencies]

View File

@@ -12,8 +12,18 @@
// See the License for the specific language governing permissions and
// limitations under the License.
const DEFAULT_VALUE: &str = "unknown";
fn main() {
build_data::set_GIT_BRANCH();
build_data::set_GIT_COMMIT();
build_data::set_GIT_DIRTY();
println!(
"cargo:rustc-env=GIT_COMMIT={}",
build_data::get_git_commit().unwrap_or_else(|_| DEFAULT_VALUE.to_string())
);
println!(
"cargo:rustc-env=GIT_BRANCH={}",
build_data::get_git_branch().unwrap_or_else(|_| DEFAULT_VALUE.to_string())
);
println!(
"cargo:rustc-env=GIT_DIRTY={}",
build_data::get_git_dirty().map_or(DEFAULT_VALUE.to_string(), |v| v.to_string())
);
}

View File

@@ -77,7 +77,9 @@ fn print_version() -> &'static str {
"\ncommit: ",
env!("GIT_COMMIT"),
"\ndirty: ",
env!("GIT_DIRTY")
env!("GIT_DIRTY"),
"\nversion: ",
env!("CARGO_PKG_VERSION")
)
}

View File

@@ -25,12 +25,6 @@ pub enum Error {
source: datanode::error::Error,
},
#[snafu(display("Failed to build frontend, source: {}", source))]
BuildFrontend {
#[snafu(backtrace)]
source: frontend::error::Error,
},
#[snafu(display("Failed to start frontend, source: {}", source))]
StartFrontend {
#[snafu(backtrace)]
@@ -61,6 +55,12 @@ pub enum Error {
#[snafu(display("Illegal config: {}", msg))]
IllegalConfig { msg: String, backtrace: Backtrace },
#[snafu(display("Illegal auth config: {}", source))]
IllegalAuthConfig {
#[snafu(backtrace)]
source: servers::auth::Error,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -75,7 +75,7 @@ impl ErrorExt for Error {
StatusCode::InvalidArguments
}
Error::IllegalConfig { .. } => StatusCode::InvalidArguments,
Error::BuildFrontend { source, .. } => source.status_code(),
Error::IllegalAuthConfig { .. } => StatusCode::InvalidArguments,
}
}

View File

@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use clap::Parser;
use frontend::frontend::{Frontend, FrontendOptions};
use frontend::grpc::GrpcOptions;
@@ -20,12 +22,15 @@ use frontend::instance::Instance;
use frontend::mysql::MysqlOptions;
use frontend::opentsdb::OpentsdbOptions;
use frontend::postgres::PostgresOptions;
use frontend::Plugins;
use meta_client::MetaClientOpts;
use servers::auth::UserProviderRef;
use servers::http::HttpOptions;
use servers::Mode;
use servers::tls::{TlsMode, TlsOption};
use servers::{auth, Mode};
use snafu::ResultExt;
use crate::error::{self, Result};
use crate::error::{self, IllegalAuthConfigSnafu, Result};
use crate::toml_loader;
#[derive(Parser)]
@@ -71,21 +76,41 @@ pub struct StartCommand {
influxdb_enable: Option<bool>,
#[clap(long)]
metasrv_addr: Option<String>,
#[clap(long)]
tls_mode: Option<TlsMode>,
#[clap(long)]
tls_cert_path: Option<String>,
#[clap(long)]
tls_key_path: Option<String>,
#[clap(long)]
user_provider: Option<String>,
}
impl StartCommand {
async fn run(self) -> Result<()> {
let plugins = Arc::new(load_frontend_plugins(&self.user_provider)?);
let opts: FrontendOptions = self.try_into()?;
let mut frontend = Frontend::new(
opts.clone(),
Instance::try_new(&opts)
.await
.context(error::StartFrontendSnafu)?,
);
let mut instance = Instance::try_new_distributed(&opts)
.await
.context(error::StartFrontendSnafu)?;
instance.set_plugins(plugins.clone());
let mut frontend = Frontend::new(opts, instance, plugins);
frontend.start().await.context(error::StartFrontendSnafu)
}
}
pub fn load_frontend_plugins(user_provider: &Option<String>) -> Result<Plugins> {
let mut plugins = Plugins::new();
if let Some(provider) = user_provider {
let provider = auth::user_provider_from_option(provider).context(IllegalAuthConfigSnafu)?;
plugins.insert::<UserProviderRef>(provider);
}
Ok(plugins)
}
impl TryFrom<StartCommand> for FrontendOptions {
type Error = error::Error;
@@ -96,6 +121,8 @@ impl TryFrom<StartCommand> for FrontendOptions {
FrontendOptions::default()
};
let tls_option = TlsOption::new(cmd.tls_mode, cmd.tls_cert_path, cmd.tls_key_path);
if let Some(addr) = cmd.http_addr {
opts.http_options = Some(HttpOptions {
addr,
@@ -111,12 +138,14 @@ impl TryFrom<StartCommand> for FrontendOptions {
if let Some(addr) = cmd.mysql_addr {
opts.mysql_options = Some(MysqlOptions {
addr,
tls: tls_option.clone(),
..Default::default()
});
}
if let Some(addr) = cmd.postgres_addr {
opts.postgres_options = Some(PostgresOptions {
addr,
tls: tls_option,
..Default::default()
});
}
@@ -147,6 +176,8 @@ impl TryFrom<StartCommand> for FrontendOptions {
mod tests {
use std::time::Duration;
use servers::auth::{Identity, Password, UserProviderRef};
use super::*;
#[test]
@@ -160,6 +191,10 @@ mod tests {
influxdb_enable: Some(false),
config_file: None,
metasrv_addr: None,
tls_mode: None,
tls_cert_path: None,
tls_key_path: None,
user_provider: None,
};
let opts: FrontendOptions = command.try_into().unwrap();
@@ -209,11 +244,14 @@ mod tests {
std::env::current_dir().unwrap().as_path().to_str().unwrap()
)),
metasrv_addr: None,
tls_mode: None,
tls_cert_path: None,
tls_key_path: None,
user_provider: None,
};
let fe_opts = FrontendOptions::try_from(command).unwrap();
assert_eq!(Mode::Distributed, fe_opts.mode);
assert_eq!("127.0.0.1:3001".to_string(), fe_opts.datanode_rpc_addr);
assert_eq!(
"127.0.0.1:4000".to_string(),
fe_opts.http_options.as_ref().unwrap().addr
@@ -223,4 +261,34 @@ mod tests {
fe_opts.http_options.as_ref().unwrap().timeout
);
}
#[tokio::test]
async fn test_try_from_start_command_to_anymap() {
let command = StartCommand {
http_addr: None,
grpc_addr: None,
mysql_addr: None,
postgres_addr: None,
opentsdb_addr: None,
influxdb_enable: None,
config_file: None,
metasrv_addr: None,
tls_mode: None,
tls_cert_path: None,
tls_key_path: None,
user_provider: Some("static_user_provider:cmd:test=test".to_string()),
};
let plugins = load_frontend_plugins(&command.user_provider);
assert!(plugins.is_ok());
let plugins = plugins.unwrap();
let provider = plugins.get::<UserProviderRef>();
assert!(provider.is_some());
let provider = provider.unwrap();
let result = provider
.auth(Identity::UserId("test", None), Password::PlainText("test"))
.await;
assert!(result.is_ok());
}
}

View File

@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use clap::Parser;
use common_telemetry::info;
use datanode::datanode::{Datanode, DatanodeOptions, ObjectStoreConfig};
@@ -24,15 +26,15 @@ use frontend::mysql::MysqlOptions;
use frontend::opentsdb::OpentsdbOptions;
use frontend::postgres::PostgresOptions;
use frontend::prometheus::PrometheusOptions;
use frontend::Plugins;
use serde::{Deserialize, Serialize};
use servers::http::HttpOptions;
use servers::tls::{TlsMode, TlsOption};
use servers::Mode;
use snafu::ResultExt;
use tokio::try_join;
use crate::error::{
BuildFrontendSnafu, Error, IllegalConfigSnafu, Result, StartDatanodeSnafu, StartFrontendSnafu,
};
use crate::error::{Error, IllegalConfigSnafu, Result, StartDatanodeSnafu, StartFrontendSnafu};
use crate::frontend::load_frontend_plugins;
use crate::toml_loader;
#[derive(Parser)]
@@ -104,7 +106,6 @@ impl StandaloneOptions {
influxdb_options: self.influxdb_options,
prometheus_options: self.prometheus_options,
mode: self.mode,
datanode_rpc_addr: "127.0.0.1:3001".to_string(),
meta_client_opts: None,
}
}
@@ -137,12 +138,21 @@ struct StartCommand {
config_file: Option<String>,
#[clap(short = 'm', long = "memory-catalog")]
enable_memory_catalog: bool,
#[clap(long)]
tls_mode: Option<TlsMode>,
#[clap(long)]
tls_cert_path: Option<String>,
#[clap(long)]
tls_key_path: Option<String>,
#[clap(long)]
user_provider: Option<String>,
}
impl StartCommand {
async fn run(self) -> Result<()> {
let enable_memory_catalog = self.enable_memory_catalog;
let config_file = self.config_file.clone();
let plugins = Arc::new(load_frontend_plugins(&self.user_provider)?);
let fe_opts = FrontendOptions::try_from(self)?;
let dn_opts: DatanodeOptions = {
let mut opts: StandaloneOptions = if let Some(path) = config_file {
@@ -162,7 +172,7 @@ impl StartCommand {
let mut datanode = Datanode::new(dn_opts.clone())
.await
.context(StartDatanodeSnafu)?;
let mut frontend = build_frontend(fe_opts, &dn_opts, datanode.get_instance()).await?;
let mut frontend = build_frontend(fe_opts, plugins, datanode.get_instance()).await?;
// Start datanode instance before starting services, to avoid requests come in before internal components are started.
datanode
@@ -171,11 +181,7 @@ impl StartCommand {
.context(StartDatanodeSnafu)?;
info!("Datanode instance started");
try_join!(
async { datanode.start_services().await.context(StartDatanodeSnafu) },
async { frontend.start().await.context(StartFrontendSnafu) }
)?;
frontend.start().await.context(StartFrontendSnafu)?;
Ok(())
}
}
@@ -183,20 +189,13 @@ impl StartCommand {
/// Build frontend instance in standalone mode
async fn build_frontend(
fe_opts: FrontendOptions,
dn_opts: &DatanodeOptions,
plugins: Arc<Plugins>,
datanode_instance: InstanceRef,
) -> Result<Frontend<FeInstance>> {
let grpc_server_addr = &dn_opts.rpc_addr;
info!(
"Build frontend with datanode gRPC addr: {}",
grpc_server_addr
);
let mut frontend_instance = FeInstance::try_new(&fe_opts)
.await
.context(BuildFrontendSnafu)?;
frontend_instance.set_catalog_manager(datanode_instance.catalog_manager().clone());
let mut frontend_instance = FeInstance::new_standalone(datanode_instance.clone());
frontend_instance.set_script_handler(datanode_instance);
Ok(Frontend::new(fe_opts, frontend_instance))
frontend_instance.set_plugins(plugins.clone());
Ok(Frontend::new(fe_opts, frontend_instance, plugins))
}
impl TryFrom<StartCommand> for FrontendOptions {
@@ -225,8 +224,7 @@ impl TryFrom<StartCommand> for FrontendOptions {
if addr == datanode_grpc_addr {
return IllegalConfigSnafu {
msg: format!(
"gRPC listen address conflicts with datanode reserved gRPC addr: {}",
datanode_grpc_addr
"gRPC listen address conflicts with datanode reserved gRPC addr: {datanode_grpc_addr}",
),
}
.fail();
@@ -261,6 +259,18 @@ impl TryFrom<StartCommand> for FrontendOptions {
opts.influxdb_options = Some(InfluxdbOptions { enable: true });
}
let tls_option = TlsOption::new(cmd.tls_mode, cmd.tls_cert_path, cmd.tls_key_path);
if let Some(mut mysql_options) = opts.mysql_options {
mysql_options.tls = tls_option.clone();
opts.mysql_options = Some(mysql_options);
}
if let Some(mut postgres_options) = opts.postgres_options {
postgres_options.tls = tls_option;
opts.postgres_options = Some(postgres_options);
}
Ok(opts)
}
}
@@ -269,6 +279,8 @@ impl TryFrom<StartCommand> for FrontendOptions {
mod tests {
use std::time::Duration;
use servers::auth::{Identity, Password, UserProviderRef};
use super::*;
#[test]
@@ -285,11 +297,14 @@ mod tests {
)),
influxdb_enable: false,
enable_memory_catalog: false,
tls_mode: None,
tls_cert_path: None,
tls_key_path: None,
user_provider: None,
};
let fe_opts = FrontendOptions::try_from(cmd).unwrap();
assert_eq!(Mode::Standalone, fe_opts.mode);
assert_eq!("127.0.0.1:3001".to_string(), fe_opts.datanode_rpc_addr);
assert_eq!(
"127.0.0.1:4000".to_string(),
fe_opts.http_options.as_ref().unwrap().addr
@@ -309,4 +324,33 @@ mod tests {
assert_eq!(2, fe_opts.mysql_options.as_ref().unwrap().runtime_size);
assert!(fe_opts.influxdb_options.as_ref().unwrap().enable);
}
#[tokio::test]
async fn test_try_from_start_command_to_anymap() {
let command = StartCommand {
http_addr: None,
rpc_addr: None,
mysql_addr: None,
postgres_addr: None,
opentsdb_addr: None,
config_file: None,
influxdb_enable: false,
enable_memory_catalog: false,
tls_mode: None,
tls_cert_path: None,
tls_key_path: None,
user_provider: Some("static_user_provider:cmd:test=test".to_string()),
};
let plugins = load_frontend_plugins(&command.user_provider);
assert!(plugins.is_ok());
let plugins = plugins.unwrap();
let provider = plugins.get::<UserProviderRef>();
assert!(provider.is_some());
let provider = provider.unwrap();
let result = provider
.auth(Identity::UserId("test", None), Password::PlainText("test"))
.await;
assert!(result.is_ok());
}
}

View File

@@ -1,8 +1,8 @@
[package]
name = "common-base"
version = "0.1.0"
edition = "2021"
license = "Apache-2.0"
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
bitvec = "1.0"
@@ -10,5 +10,4 @@ bytes = { version = "1.1", features = ["serde"] }
common-error = { path = "../error" }
paste = "1.0"
serde = { version = "1.0", features = ["derive"] }
snafu = { version = "0.7", features = ["backtraces"] }
snafu.workspace = true

View File

@@ -1,8 +1,8 @@
[package]
name = "common-catalog"
version = "0.1.0"
edition = "2021"
license = "Apache-2.0"
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
async-trait = "0.1"
@@ -11,10 +11,9 @@ common-telemetry = { path = "../telemetry" }
datatypes = { path = "../../datatypes" }
lazy_static = "1.4"
regex = "1.6"
serde = "1.0"
serde.workspace = true
serde_json = "1.0"
snafu = { version = "0.7", features = ["backtraces"] }
table = { path = "../../table" }
[dev-dependencies]
chrono = "0.4"

View File

@@ -25,9 +25,3 @@ pub const MIN_USER_TABLE_ID: u32 = 1024;
pub const SYSTEM_CATALOG_TABLE_ID: u32 = 0;
/// scripts table id
pub const SCRIPTS_TABLE_ID: u32 = 1;
pub(crate) const CATALOG_KEY_PREFIX: &str = "__c";
pub(crate) const SCHEMA_KEY_PREFIX: &str = "__s";
pub(crate) const TABLE_GLOBAL_KEY_PREFIX: &str = "__tg";
pub(crate) const TABLE_REGIONAL_KEY_PREFIX: &str = "__tr";
pub const TABLE_ID_KEY_PREFIX: &str = "__tid";

View File

@@ -14,10 +14,3 @@
pub mod consts;
pub mod error;
mod helper;
pub use helper::{
build_catalog_prefix, build_schema_prefix, build_table_global_prefix,
build_table_regional_prefix, CatalogKey, CatalogValue, SchemaKey, SchemaValue, TableGlobalKey,
TableGlobalValue, TableRegionalKey, TableRegionalValue,
};

View File

@@ -1,8 +1,8 @@
[package]
name = "common-error"
version = "0.1.0"
edition = "2021"
license = "Apache-2.0"
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
snafu = { version = "0.7", features = ["backtraces"] }

View File

@@ -131,7 +131,7 @@ mod tests {
assert!(ErrorCompat::backtrace(&err).is_some());
let msg = format!("{:?}", err);
let msg = format!("{err:?}");
assert!(msg.contains("\nBacktrace:\n"));
let fmt_msg = format!("{:?}", DebugFormat::new(&err));
assert_eq!(msg, fmt_msg);
@@ -151,7 +151,7 @@ mod tests {
assert!(err.as_any().downcast_ref::<MockError>().is_some());
assert!(err.source().is_some());
let msg = format!("{:?}", err);
let msg = format!("{err:?}");
assert!(msg.contains("\nBacktrace:\n"));
assert!(msg.contains("Caused by"));

View File

@@ -31,11 +31,11 @@ impl<'a, E: ErrorExt + ?Sized> fmt::Debug for DebugFormat<'a, E> {
write!(f, "{}.", self.0)?;
if let Some(source) = self.0.source() {
// Source error use debug format for more verbose info.
write!(f, " Caused by: {:?}", source)?;
write!(f, " Caused by: {source:?}")?;
}
if let Some(backtrace) = self.0.backtrace_opt() {
// Add a newline to separate causes and backtrace.
write!(f, "\nBacktrace:\n{}", backtrace)?;
write!(f, "\nBacktrace:\n{backtrace}")?;
}
Ok(())

View File

@@ -51,6 +51,7 @@ pub enum StatusCode {
TableNotFound = 4001,
TableColumnNotFound = 4002,
TableColumnExists = 4003,
DatabaseNotFound = 4004,
// ====== End of catalog related status code =======
// ====== Begin of storage related status code =====
@@ -62,6 +63,19 @@ pub enum StatusCode {
/// Runtime resources exhausted, like creating threads failed.
RuntimeResourcesExhausted = 6000,
// ====== End of server related status code =======
// ====== Begin of auth related status code =====
/// User not exist
UserNotFound = 7000,
/// Unsupported password type
UnsupportedPasswordType = 7001,
/// Username and password does not match
UserPasswordMismatch = 7002,
/// Not found http authorization header
AuthHeaderNotFound = 7003,
/// Invalid http authorization header
InvalidAuthHeader = 7004,
// ====== End of auth related status code =====
}
impl StatusCode {
@@ -73,7 +87,7 @@ impl StatusCode {
impl fmt::Display for StatusCode {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// The current debug format is suitable to display.
write!(f, "{:?}", self)
write!(f, "{self:?}")
}
}
@@ -82,7 +96,7 @@ mod tests {
use super::*;
fn assert_status_code_display(code: StatusCode, msg: &str) {
let code_msg = format!("{}", code);
let code_msg = format!("{code}");
assert_eq!(msg, code_msg);
}

View File

@@ -1,8 +1,8 @@
[package]
name = "common-function-macro"
version = "0.1.0"
edition = "2021"
license = "Apache-2.0"
version.workspace = true
edition.workspace = true
license.workspace = true
[lib]
proc-macro = true
@@ -15,5 +15,5 @@ syn = "1.0"
arc-swap = "1.0"
common-query = { path = "../query" }
datatypes = { path = "../../datatypes" }
snafu = { version = "0.7", features = ["backtraces"] }
snafu.workspace = true
static_assertions = "1.1.0"

View File

@@ -1,8 +1,8 @@
[package]
edition = "2021"
name = "common-function"
version = "0.1.0"
license = "Apache-2.0"
edition.workspace = true
version.workspace = true
license.workspace = true
[dependencies]
arc-swap = "1.0"
@@ -11,14 +11,14 @@ common-error = { path = "../error" }
common-function-macro = { path = "../function-macro" }
common-query = { path = "../query" }
common-time = { path = "../time" }
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
datafusion.workspace = true
datatypes = { path = "../../datatypes" }
libc = "0.2"
num = "0.4"
num-traits = "0.2"
once_cell = "1.10"
paste = "1.0"
snafu = { version = "0.7", features = ["backtraces"] }
snafu.workspace = true
statrs = "0.15"
[dev-dependencies]

View File

@@ -12,5 +12,4 @@
// See the License for the specific language governing permissions and
// limitations under the License.
pub mod error;
pub mod scalars;

View File

@@ -23,6 +23,5 @@ pub(crate) mod test;
mod timestamp;
pub mod udf;
pub use aggregate::MedianAccumulatorCreator;
pub use function::{Function, FunctionRef};
pub use function_registry::{FunctionRegistry, FUNCTION_REGISTRY};

View File

@@ -16,7 +16,6 @@ mod argmax;
mod argmin;
mod diff;
mod mean;
mod median;
mod percentile;
mod polyval;
mod scipy_stats_norm_cdf;
@@ -29,7 +28,6 @@ pub use argmin::ArgminAccumulatorCreator;
use common_query::logical_plan::AggregateFunctionCreatorRef;
pub use diff::DiffAccumulatorCreator;
pub use mean::MeanAccumulatorCreator;
pub use median::MedianAccumulatorCreator;
pub use percentile::PercentileAccumulatorCreator;
pub use polyval::PolyvalAccumulatorCreator;
pub use scipy_stats_norm_cdf::ScipyStatsNormCdfAccumulatorCreator;
@@ -88,7 +86,6 @@ impl AggregateFunctions {
};
}
register_aggr_func!("median", 1, MedianAccumulatorCreator);
register_aggr_func!("diff", 1, DiffAccumulatorCreator);
register_aggr_func!("mean", 1, MeanAccumulatorCreator);
register_aggr_func!("polyval", 2, PolyvalAccumulatorCreator);

View File

@@ -20,24 +20,22 @@ use common_query::error::{BadAccumulatorImplSnafu, CreateAccumulatorSnafu, Resul
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;
use datatypes::vectors::ConstantVector;
use datatypes::types::{LogicalPrimitiveType, WrapperType};
use datatypes::vectors::{ConstantVector, Helper};
use datatypes::with_match_primitive_type_id;
use snafu::ensure;
// https://numpy.org/doc/stable/reference/generated/numpy.argmax.html
// return the index of the max value
#[derive(Debug, Default)]
pub struct Argmax<T>
where
T: Primitive + PartialOrd,
{
pub struct Argmax<T> {
max: Option<T>,
n: u64,
}
impl<T> Argmax<T>
where
T: Primitive + PartialOrd,
T: PartialOrd + Copy,
{
fn update(&mut self, value: T, index: u64) {
if let Some(Ordering::Less) = self.max.partial_cmp(&Some(value)) {
@@ -49,8 +47,7 @@ where
impl<T> Accumulator for Argmax<T>
where
T: Primitive + PartialOrd,
for<'a> T: Scalar<RefType<'a> = T>,
T: WrapperType + PartialOrd,
{
fn state(&self) -> Result<Vec<Value>> {
match self.max {
@@ -66,10 +63,10 @@ where
let column = &values[0];
let column: &<T as Scalar>::VectorType = if column.is_const() {
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
unsafe { VectorHelper::static_cast(column.inner()) }
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
unsafe { Helper::static_cast(column.inner()) }
} else {
unsafe { VectorHelper::static_cast(column) }
unsafe { Helper::static_cast(column) }
};
for (i, v) in column.iter_data().enumerate() {
if let Some(value) = v {
@@ -93,8 +90,8 @@ where
let max = &states[0];
let index = &states[1];
let max: &<T as Scalar>::VectorType = unsafe { VectorHelper::static_cast(max) };
let index: &<u64 as Scalar>::VectorType = unsafe { VectorHelper::static_cast(index) };
let max: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(max) };
let index: &<u64 as Scalar>::VectorType = unsafe { Helper::static_cast(index) };
index
.iter_data()
.flatten()
@@ -122,7 +119,7 @@ impl AggregateFunctionCreator for ArgmaxAccumulatorCreator {
with_match_primitive_type_id!(
input_type.logical_type_id(),
|$S| {
Ok(Box::new(Argmax::<$S>::default()))
Ok(Box::new(Argmax::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
},
{
let err_msg = format!(
@@ -154,7 +151,7 @@ impl AggregateFunctionCreator for ArgmaxAccumulatorCreator {
#[cfg(test)]
mod test {
use datatypes::vectors::PrimitiveVector;
use datatypes::vectors::Int32Vector;
use super::*;
#[test]
@@ -166,21 +163,19 @@ mod test {
// test update one not-null value
let mut argmax = Argmax::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![Some(42)]))];
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
assert!(argmax.update_batch(&v).is_ok());
assert_eq!(Value::from(0_u64), argmax.evaluate().unwrap());
// test update one null value
let mut argmax = Argmax::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
Option::<i32>::None,
]))];
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
assert!(argmax.update_batch(&v).is_ok());
assert_eq!(Value::Null, argmax.evaluate().unwrap());
// test update no null-value batch
let mut argmax = Argmax::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
Some(-1i32),
Some(1),
Some(3),
@@ -190,7 +185,7 @@ mod test {
// test update null-value batch
let mut argmax = Argmax::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
Some(-2i32),
None,
Some(4),
@@ -201,7 +196,7 @@ mod test {
// test update with constant vector
let mut argmax = Argmax::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
Arc::new(Int32Vector::from_vec(vec![4])),
10,
))];
assert!(argmax.update_batch(&v).is_ok());

View File

@@ -20,23 +20,20 @@ use common_query::error::{BadAccumulatorImplSnafu, CreateAccumulatorSnafu, Resul
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;
use datatypes::vectors::ConstantVector;
use datatypes::vectors::{ConstantVector, Helper};
use datatypes::with_match_primitive_type_id;
use snafu::ensure;
// // https://numpy.org/doc/stable/reference/generated/numpy.argmin.html
#[derive(Debug, Default)]
pub struct Argmin<T>
where
T: Primitive + PartialOrd,
{
pub struct Argmin<T> {
min: Option<T>,
n: u32,
}
impl<T> Argmin<T>
where
T: Primitive + PartialOrd,
T: Copy + PartialOrd,
{
fn update(&mut self, value: T, index: u32) {
match self.min {
@@ -56,8 +53,7 @@ where
impl<T> Accumulator for Argmin<T>
where
T: Primitive + PartialOrd,
for<'a> T: Scalar<RefType<'a> = T>,
T: WrapperType + PartialOrd,
{
fn state(&self) -> Result<Vec<Value>> {
match self.min {
@@ -75,10 +71,10 @@ where
let column = &values[0];
let column: &<T as Scalar>::VectorType = if column.is_const() {
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
unsafe { VectorHelper::static_cast(column.inner()) }
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
unsafe { Helper::static_cast(column.inner()) }
} else {
unsafe { VectorHelper::static_cast(column) }
unsafe { Helper::static_cast(column) }
};
for (i, v) in column.iter_data().enumerate() {
if let Some(value) = v {
@@ -102,8 +98,8 @@ where
let min = &states[0];
let index = &states[1];
let min: &<T as Scalar>::VectorType = unsafe { VectorHelper::static_cast(min) };
let index: &<u32 as Scalar>::VectorType = unsafe { VectorHelper::static_cast(index) };
let min: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(min) };
let index: &<u32 as Scalar>::VectorType = unsafe { Helper::static_cast(index) };
index
.iter_data()
.flatten()
@@ -131,7 +127,7 @@ impl AggregateFunctionCreator for ArgminAccumulatorCreator {
with_match_primitive_type_id!(
input_type.logical_type_id(),
|$S| {
Ok(Box::new(Argmin::<$S>::default()))
Ok(Box::new(Argmin::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
},
{
let err_msg = format!(
@@ -163,7 +159,7 @@ impl AggregateFunctionCreator for ArgminAccumulatorCreator {
#[cfg(test)]
mod test {
use datatypes::vectors::PrimitiveVector;
use datatypes::vectors::Int32Vector;
use super::*;
#[test]
@@ -175,21 +171,19 @@ mod test {
// test update one not-null value
let mut argmin = Argmin::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![Some(42)]))];
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
assert!(argmin.update_batch(&v).is_ok());
assert_eq!(Value::from(0_u32), argmin.evaluate().unwrap());
// test update one null value
let mut argmin = Argmin::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
Option::<i32>::None,
]))];
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
assert!(argmin.update_batch(&v).is_ok());
assert_eq!(Value::Null, argmin.evaluate().unwrap());
// test update no null-value batch
let mut argmin = Argmin::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
Some(-1i32),
Some(1),
Some(3),
@@ -199,7 +193,7 @@ mod test {
// test update null-value batch
let mut argmin = Argmin::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
Some(-2i32),
None,
Some(4),
@@ -210,7 +204,7 @@ mod test {
// test update with constant vector
let mut argmin = Argmin::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
Arc::new(Int32Vector::from_vec(vec![4])),
10,
))];
assert!(argmin.update_batch(&v).is_ok());

View File

@@ -22,40 +22,32 @@ use common_query::error::{
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;
use datatypes::types::PrimitiveType;
use datatypes::value::ListValue;
use datatypes::vectors::{ConstantVector, ListVector};
use datatypes::vectors::{ConstantVector, Helper, ListVector};
use datatypes::with_match_primitive_type_id;
use num_traits::AsPrimitive;
use snafu::{ensure, OptionExt, ResultExt};
// https://numpy.org/doc/stable/reference/generated/numpy.diff.html
// I is the input type, O is the output type.
#[derive(Debug, Default)]
pub struct Diff<T, SubT>
where
T: Primitive + AsPrimitive<SubT>,
SubT: Primitive + std::ops::Sub<Output = SubT>,
{
values: Vec<T>,
_phantom: PhantomData<SubT>,
pub struct Diff<I, O> {
values: Vec<I>,
_phantom: PhantomData<O>,
}
impl<T, SubT> Diff<T, SubT>
where
T: Primitive + AsPrimitive<SubT>,
SubT: Primitive + std::ops::Sub<Output = SubT>,
{
fn push(&mut self, value: T) {
impl<I, O> Diff<I, O> {
fn push(&mut self, value: I) {
self.values.push(value);
}
}
impl<T, SubT> Accumulator for Diff<T, SubT>
impl<I, O> Accumulator for Diff<I, O>
where
T: Primitive + AsPrimitive<SubT>,
for<'a> T: Scalar<RefType<'a> = T>,
SubT: Primitive + std::ops::Sub<Output = SubT>,
for<'a> SubT: Scalar<RefType<'a> = SubT>,
I: WrapperType,
O: WrapperType,
I::Native: AsPrimitive<O::Native>,
O::Native: std::ops::Sub<Output = O::Native>,
{
fn state(&self) -> Result<Vec<Value>> {
let nums = self
@@ -65,7 +57,7 @@ where
.collect::<Vec<Value>>();
Ok(vec![Value::List(ListValue::new(
Some(Box::new(nums)),
T::default().into().data_type(),
I::LogicalType::build_data_type(),
))])
}
@@ -78,12 +70,12 @@ where
let column = &values[0];
let mut len = 1;
let column: &<T as Scalar>::VectorType = if column.is_const() {
let column: &<I as Scalar>::VectorType = if column.is_const() {
len = column.len();
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
unsafe { VectorHelper::static_cast(column.inner()) }
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
unsafe { Helper::static_cast(column.inner()) }
} else {
unsafe { VectorHelper::static_cast(column) }
unsafe { Helper::static_cast(column) }
};
(0..len).for_each(|_| {
for v in column.iter_data().flatten() {
@@ -109,8 +101,9 @@ where
),
})?;
for state in states.values_iter() {
let state = state.context(FromScalarValueSnafu)?;
self.update_batch(&[state])?
if let Some(state) = state.context(FromScalarValueSnafu)? {
self.update_batch(&[state])?;
}
}
Ok(())
}
@@ -122,11 +115,14 @@ where
let diff = self
.values
.windows(2)
.map(|x| (x[1].as_() - x[0].as_()).into())
.map(|x| {
let native = x[1].into_native().as_() - x[0].into_native().as_();
O::from_native(native).into()
})
.collect::<Vec<Value>>();
let diff = Value::List(ListValue::new(
Some(Box::new(diff)),
SubT::default().into().data_type(),
O::LogicalType::build_data_type(),
));
Ok(diff)
}
@@ -143,7 +139,7 @@ impl AggregateFunctionCreator for DiffAccumulatorCreator {
with_match_primitive_type_id!(
input_type.logical_type_id(),
|$S| {
Ok(Box::new(Diff::<$S,<$S as Primitive>::LargestType>::default()))
Ok(Box::new(Diff::<<$S as LogicalPrimitiveType>::Wrapper, <<$S as LogicalPrimitiveType>::LargestType as LogicalPrimitiveType>::Wrapper>::default()))
},
{
let err_msg = format!(
@@ -163,7 +159,7 @@ impl AggregateFunctionCreator for DiffAccumulatorCreator {
with_match_primitive_type_id!(
input_types[0].logical_type_id(),
|$S| {
Ok(ConcreteDataType::list_datatype(PrimitiveType::<<$S as Primitive>::LargestType>::default().into()))
Ok(ConcreteDataType::list_datatype($S::default().into()))
},
{
unreachable!()
@@ -177,7 +173,7 @@ impl AggregateFunctionCreator for DiffAccumulatorCreator {
with_match_primitive_type_id!(
input_types[0].logical_type_id(),
|$S| {
Ok(vec![ConcreteDataType::list_datatype(PrimitiveType::<$S>::default().into())])
Ok(vec![ConcreteDataType::list_datatype($S::default().into())])
},
{
unreachable!()
@@ -188,9 +184,10 @@ impl AggregateFunctionCreator for DiffAccumulatorCreator {
#[cfg(test)]
mod test {
use datatypes::vectors::PrimitiveVector;
use datatypes::vectors::Int32Vector;
use super::*;
#[test]
fn test_update_batch() {
// test update empty batch, expect not updating anything
@@ -201,21 +198,19 @@ mod test {
// test update one not-null value
let mut diff = Diff::<i32, i64>::default();
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![Some(42)]))];
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
assert!(diff.update_batch(&v).is_ok());
assert_eq!(Value::Null, diff.evaluate().unwrap());
// test update one null value
let mut diff = Diff::<i32, i64>::default();
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
Option::<i32>::None,
]))];
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
assert!(diff.update_batch(&v).is_ok());
assert_eq!(Value::Null, diff.evaluate().unwrap());
// test update no null-value batch
let mut diff = Diff::<i32, i64>::default();
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
Some(-1i32),
Some(1),
Some(2),
@@ -232,7 +227,7 @@ mod test {
// test update null-value batch
let mut diff = Diff::<i32, i64>::default();
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
Some(-2i32),
None,
Some(3),
@@ -251,7 +246,7 @@ mod test {
// test update with constant vector
let mut diff = Diff::<i32, i64>::default();
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
Arc::new(Int32Vector::from_vec(vec![4])),
4,
))];
let values = vec![Value::from(0_i64), Value::from(0_i64), Value::from(0_i64)];

View File

@@ -22,16 +22,14 @@ use common_query::error::{
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;
use datatypes::vectors::{ConstantVector, Float64Vector, UInt64Vector};
use datatypes::types::WrapperType;
use datatypes::vectors::{ConstantVector, Float64Vector, Helper, UInt64Vector};
use datatypes::with_match_primitive_type_id;
use num_traits::AsPrimitive;
use snafu::{ensure, OptionExt};
#[derive(Debug, Default)]
pub struct Mean<T>
where
T: Primitive + AsPrimitive<f64>,
{
pub struct Mean<T> {
sum: f64,
n: u64,
_phantom: PhantomData<T>,
@@ -39,11 +37,12 @@ where
impl<T> Mean<T>
where
T: Primitive + AsPrimitive<f64>,
T: WrapperType,
T::Native: AsPrimitive<f64>,
{
#[inline(always)]
fn push(&mut self, value: T) {
self.sum += value.as_();
self.sum += value.into_native().as_();
self.n += 1;
}
@@ -56,8 +55,8 @@ where
impl<T> Accumulator for Mean<T>
where
T: Primitive + AsPrimitive<f64>,
for<'a> T: Scalar<RefType<'a> = T>,
T: WrapperType,
T::Native: AsPrimitive<f64>,
{
fn state(&self) -> Result<Vec<Value>> {
Ok(vec![self.sum.into(), self.n.into()])
@@ -73,10 +72,10 @@ where
let mut len = 1;
let column: &<T as Scalar>::VectorType = if column.is_const() {
len = column.len();
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
unsafe { VectorHelper::static_cast(column.inner()) }
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
unsafe { Helper::static_cast(column.inner()) }
} else {
unsafe { VectorHelper::static_cast(column) }
unsafe { Helper::static_cast(column) }
};
(0..len).for_each(|_| {
for v in column.iter_data().flatten() {
@@ -150,7 +149,7 @@ impl AggregateFunctionCreator for MeanAccumulatorCreator {
with_match_primitive_type_id!(
input_type.logical_type_id(),
|$S| {
Ok(Box::new(Mean::<$S>::default()))
Ok(Box::new(Mean::<<$S as LogicalPrimitiveType>::Native>::default()))
},
{
let err_msg = format!(
@@ -182,7 +181,7 @@ impl AggregateFunctionCreator for MeanAccumulatorCreator {
#[cfg(test)]
mod test {
use datatypes::vectors::PrimitiveVector;
use datatypes::vectors::Int32Vector;
use super::*;
#[test]
@@ -194,21 +193,19 @@ mod test {
// test update one not-null value
let mut mean = Mean::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![Some(42)]))];
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
assert!(mean.update_batch(&v).is_ok());
assert_eq!(Value::from(42.0_f64), mean.evaluate().unwrap());
// test update one null value
let mut mean = Mean::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
Option::<i32>::None,
]))];
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
assert!(mean.update_batch(&v).is_ok());
assert_eq!(Value::Null, mean.evaluate().unwrap());
// test update no null-value batch
let mut mean = Mean::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
Some(-1i32),
Some(1),
Some(2),
@@ -218,7 +215,7 @@ mod test {
// test update null-value batch
let mut mean = Mean::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
Some(-2i32),
None,
Some(3),
@@ -230,7 +227,7 @@ mod test {
// test update with constant vector
let mut mean = Mean::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
Arc::new(Int32Vector::from_vec(vec![4])),
10,
))];
assert!(mean.update_batch(&v).is_ok());

View File

@@ -1,289 +0,0 @@
// Copyright 2022 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::cmp::Reverse;
use std::collections::BinaryHeap;
use std::sync::Arc;
use common_function_macro::{as_aggr_func_creator, AggrFuncTypeStore};
use common_query::error::{
CreateAccumulatorSnafu, DowncastVectorSnafu, FromScalarValueSnafu, Result,
};
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;
use datatypes::types::OrdPrimitive;
use datatypes::value::ListValue;
use datatypes::vectors::{ConstantVector, ListVector};
use datatypes::with_match_primitive_type_id;
use num::NumCast;
use snafu::{ensure, OptionExt, ResultExt};
// This median calculation algorithm's details can be found at
// https://leetcode.cn/problems/find-median-from-data-stream/
//
// Basically, it uses two heaps, a maximum heap and a minimum. The maximum heap stores numbers that
// are not greater than the median, and the minimum heap stores the greater. In a streaming of
// numbers, when a number is arrived, we adjust the heaps' tops, so that either one top is the
// median or both tops can be averaged to get the median.
//
// The time complexity to update the median is O(logn), O(1) to get the median; and the space
// complexity is O(n). (Ignore the costs for heap expansion.)
//
// From the point of algorithm, [quick select](https://en.wikipedia.org/wiki/Quickselect) might be
// better. But to use quick select here, we need a mutable self in the final calculation(`evaluate`)
// to swap stored numbers in the states vector. Though we can make our `evaluate` received
// `&mut self`, DataFusion calls our accumulator with `&self` (see `DfAccumulatorAdaptor`). That
// means we have to introduce some kinds of interior mutability, and the overhead is not neglectable.
//
// TODO(LFC): Use quick select to get median when we can modify DataFusion's code, and benchmark with two-heap algorithm.
#[derive(Debug, Default)]
pub struct Median<T>
where
T: Primitive,
{
greater: BinaryHeap<Reverse<OrdPrimitive<T>>>,
not_greater: BinaryHeap<OrdPrimitive<T>>,
}
impl<T> Median<T>
where
T: Primitive,
{
fn push(&mut self, value: T) {
let value = OrdPrimitive::<T>(value);
if self.not_greater.is_empty() {
self.not_greater.push(value);
return;
}
// The `unwrap`s below are safe because there are `push`s before them.
if value <= *self.not_greater.peek().unwrap() {
self.not_greater.push(value);
if self.not_greater.len() > self.greater.len() + 1 {
self.greater.push(Reverse(self.not_greater.pop().unwrap()));
}
} else {
self.greater.push(Reverse(value));
if self.greater.len() > self.not_greater.len() {
self.not_greater.push(self.greater.pop().unwrap().0);
}
}
}
}
// UDAFs are built using the trait `Accumulator`, that offers DataFusion the necessary functions
// to use them.
impl<T> Accumulator for Median<T>
where
T: Primitive,
for<'a> T: Scalar<RefType<'a> = T>,
{
// This function serializes our state to `ScalarValue`, which DataFusion uses to pass this
// state between execution stages. Note that this can be arbitrary data.
//
// The `ScalarValue`s returned here will be passed in as argument `states: &[VectorRef]` to
// `merge_batch` function.
fn state(&self) -> Result<Vec<Value>> {
let nums = self
.greater
.iter()
.map(|x| &x.0)
.chain(self.not_greater.iter())
.map(|&n| n.into())
.collect::<Vec<Value>>();
Ok(vec![Value::List(ListValue::new(
Some(Box::new(nums)),
T::default().into().data_type(),
))])
}
// DataFusion calls this function to update the accumulator's state for a batch of inputs rows.
// It is expected this function to update the accumulator's state.
fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
if values.is_empty() {
return Ok(());
}
ensure!(values.len() == 1, InvalidInputStateSnafu);
// This is a unary accumulator, so only one column is provided.
let column = &values[0];
let mut len = 1;
let column: &<T as Scalar>::VectorType = if column.is_const() {
len = column.len();
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
unsafe { VectorHelper::static_cast(column.inner()) }
} else {
unsafe { VectorHelper::static_cast(column) }
};
(0..len).for_each(|_| {
for v in column.iter_data().flatten() {
self.push(v);
}
});
Ok(())
}
// DataFusion executes accumulators in partitions. In some execution stage, DataFusion will
// merge states from other accumulators (returned by `state()` method).
fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
if states.is_empty() {
return Ok(());
}
// The states here are returned by the `state` method. Since we only returned a vector
// with one value in that method, `states[0]` is fine.
let states = &states[0];
let states = states
.as_any()
.downcast_ref::<ListVector>()
.with_context(|| DowncastVectorSnafu {
err_msg: format!(
"expect ListVector, got vector type {}",
states.vector_type_name()
),
})?;
for state in states.values_iter() {
let state = state.context(FromScalarValueSnafu)?;
// merging state is simply accumulate stored numbers from others', so just call update
self.update_batch(&[state])?
}
Ok(())
}
// DataFusion expects this function to return the final value of this aggregator.
fn evaluate(&self) -> Result<Value> {
if self.not_greater.is_empty() {
assert!(
self.greater.is_empty(),
"not expected in two-heap median algorithm, there must be a bug when implementing it"
);
return Ok(Value::Null);
}
// unwrap is safe because we checked not_greater heap's len above
let not_greater = *self.not_greater.peek().unwrap();
let median = if self.not_greater.len() > self.greater.len() {
not_greater.into()
} else {
// unwrap is safe because greater heap len >= not_greater heap len, which is > 0
let greater = self.greater.peek().unwrap();
// the following three NumCast's `unwrap`s are safe because T is primitive
let not_greater_v: f64 = NumCast::from(not_greater.as_primitive()).unwrap();
let greater_v: f64 = NumCast::from(greater.0.as_primitive()).unwrap();
let median: T = NumCast::from((not_greater_v + greater_v) / 2.0).unwrap();
median.into()
};
Ok(median)
}
}
#[as_aggr_func_creator]
#[derive(Debug, Default, AggrFuncTypeStore)]
pub struct MedianAccumulatorCreator {}
impl AggregateFunctionCreator for MedianAccumulatorCreator {
fn creator(&self) -> AccumulatorCreatorFunction {
let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
let input_type = &types[0];
with_match_primitive_type_id!(
input_type.logical_type_id(),
|$S| {
Ok(Box::new(Median::<$S>::default()))
},
{
let err_msg = format!(
"\"MEDIAN\" aggregate function not support data type {:?}",
input_type.logical_type_id(),
);
CreateAccumulatorSnafu { err_msg }.fail()?
}
)
});
creator
}
fn output_type(&self) -> Result<ConcreteDataType> {
let input_types = self.input_types()?;
ensure!(input_types.len() == 1, InvalidInputStateSnafu);
// unwrap is safe because we have checked input_types len must equals 1
Ok(input_types.into_iter().next().unwrap())
}
fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
Ok(vec![ConcreteDataType::list_datatype(self.output_type()?)])
}
}
#[cfg(test)]
mod test {
use datatypes::vectors::PrimitiveVector;
use super::*;
#[test]
fn test_update_batch() {
// test update empty batch, expect not updating anything
let mut median = Median::<i32>::default();
assert!(median.update_batch(&[]).is_ok());
assert!(median.not_greater.is_empty());
assert!(median.greater.is_empty());
assert_eq!(Value::Null, median.evaluate().unwrap());
// test update one not-null value
let mut median = Median::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![Some(42)]))];
assert!(median.update_batch(&v).is_ok());
assert_eq!(Value::Int32(42), median.evaluate().unwrap());
// test update one null value
let mut median = Median::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
Option::<i32>::None,
]))];
assert!(median.update_batch(&v).is_ok());
assert_eq!(Value::Null, median.evaluate().unwrap());
// test update no null-value batch
let mut median = Median::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
Some(-1i32),
Some(1),
Some(2),
]))];
assert!(median.update_batch(&v).is_ok());
assert_eq!(Value::Int32(1), median.evaluate().unwrap());
// test update null-value batch
let mut median = Median::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
Some(-2i32),
None,
Some(3),
Some(4),
]))];
assert!(median.update_batch(&v).is_ok());
assert_eq!(Value::Int32(3), median.evaluate().unwrap());
// test update with constant vector
let mut median = Median::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
10,
))];
assert!(median.update_batch(&v).is_ok());
assert_eq!(Value::Int32(4), median.evaluate().unwrap());
}
}

View File

@@ -26,7 +26,7 @@ use common_query::prelude::*;
use datatypes::prelude::*;
use datatypes::types::OrdPrimitive;
use datatypes::value::{ListValue, OrderedFloat};
use datatypes::vectors::{ConstantVector, Float64Vector, ListVector};
use datatypes::vectors::{ConstantVector, Float64Vector, Helper, ListVector};
use datatypes::with_match_primitive_type_id;
use num::NumCast;
use snafu::{ensure, OptionExt, ResultExt};
@@ -44,15 +44,15 @@ use snafu::{ensure, OptionExt, ResultExt};
// This optional method parameter specifies the method to use when the desired quantile lies between two data points i < j.
// If g is the fractional part of the index surrounded by i and alpha and beta are correction constants modifying i and j.
// i+g = (q-alpha)/(n-alpha-beta+1)
// Below, q is the quantile value, n is the sample size and alpha and beta are constants. The following formula gives an interpolation i + g of where the quantile would be in the sorted sample.
// With i being the floor and g the fractional part of the result.
// Below, 'q' is the quantile value, 'n' is the sample size and alpha and beta are constants. The following formula gives an interpolation "i + g" of where the quantile would be in the sorted sample.
// With 'i' being the floor and 'g' the fractional part of the result.
// the default method is linear where
// alpha = 1
// beta = 1
#[derive(Debug, Default)]
pub struct Percentile<T>
where
T: Primitive,
T: WrapperType,
{
greater: BinaryHeap<Reverse<OrdPrimitive<T>>>,
not_greater: BinaryHeap<OrdPrimitive<T>>,
@@ -62,7 +62,7 @@ where
impl<T> Percentile<T>
where
T: Primitive,
T: WrapperType,
{
fn push(&mut self, value: T) {
let value = OrdPrimitive::<T>(value);
@@ -93,8 +93,7 @@ where
impl<T> Accumulator for Percentile<T>
where
T: Primitive,
for<'a> T: Scalar<RefType<'a> = T>,
T: WrapperType,
{
fn state(&self) -> Result<Vec<Value>> {
let nums = self
@@ -107,7 +106,7 @@ where
Ok(vec![
Value::List(ListValue::new(
Some(Box::new(nums)),
T::default().into().data_type(),
T::LogicalType::build_data_type(),
)),
self.p.into(),
])
@@ -129,14 +128,14 @@ where
let mut len = 1;
let column: &<T as Scalar>::VectorType = if column.is_const() {
len = column.len();
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
unsafe { VectorHelper::static_cast(column.inner()) }
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
unsafe { Helper::static_cast(column.inner()) }
} else {
unsafe { VectorHelper::static_cast(column) }
unsafe { Helper::static_cast(column) }
};
let x = &values[1];
let x = VectorHelper::check_get_scalar::<f64>(x).context(error::InvalidInputsSnafu {
let x = Helper::check_get_scalar::<f64>(x).context(error::InvalidInputTypeSnafu {
err_msg: "expecting \"POLYVAL\" function's second argument to be float64",
})?;
// `get(0)` is safe because we have checked `values[1].len() == values[0].len() != 0`
@@ -209,10 +208,11 @@ where
),
})?;
for value in values.values_iter() {
let value = value.context(FromScalarValueSnafu)?;
let column: &<T as Scalar>::VectorType = unsafe { VectorHelper::static_cast(&value) };
for v in column.iter_data().flatten() {
self.push(v);
if let Some(value) = value.context(FromScalarValueSnafu)? {
let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
for v in column.iter_data().flatten() {
self.push(v);
}
}
}
Ok(())
@@ -259,7 +259,7 @@ impl AggregateFunctionCreator for PercentileAccumulatorCreator {
with_match_primitive_type_id!(
input_type.logical_type_id(),
|$S| {
Ok(Box::new(Percentile::<$S>::default()))
Ok(Box::new(Percentile::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
},
{
let err_msg = format!(
@@ -292,7 +292,7 @@ impl AggregateFunctionCreator for PercentileAccumulatorCreator {
#[cfg(test)]
mod test {
use datatypes::vectors::PrimitiveVector;
use datatypes::vectors::{Float64Vector, Int32Vector};
use super::*;
#[test]
@@ -307,8 +307,8 @@ mod test {
// test update one not-null value
let mut percentile = Percentile::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(PrimitiveVector::<i32>::from(vec![Some(42)])),
Arc::new(PrimitiveVector::<f64>::from(vec![Some(100.0_f64)])),
Arc::new(Int32Vector::from(vec![Some(42)])),
Arc::new(Float64Vector::from(vec![Some(100.0_f64)])),
];
assert!(percentile.update_batch(&v).is_ok());
assert_eq!(Value::from(42.0_f64), percentile.evaluate().unwrap());
@@ -316,8 +316,8 @@ mod test {
// test update one null value
let mut percentile = Percentile::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(PrimitiveVector::<i32>::from(vec![Option::<i32>::None])),
Arc::new(PrimitiveVector::<f64>::from(vec![Some(100.0_f64)])),
Arc::new(Int32Vector::from(vec![Option::<i32>::None])),
Arc::new(Float64Vector::from(vec![Some(100.0_f64)])),
];
assert!(percentile.update_batch(&v).is_ok());
assert_eq!(Value::Null, percentile.evaluate().unwrap());
@@ -325,12 +325,8 @@ mod test {
// test update no null-value batch
let mut percentile = Percentile::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(PrimitiveVector::<i32>::from(vec![
Some(-1i32),
Some(1),
Some(2),
])),
Arc::new(PrimitiveVector::<f64>::from(vec![
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
Arc::new(Float64Vector::from(vec![
Some(100.0_f64),
Some(100.0_f64),
Some(100.0_f64),
@@ -342,13 +338,8 @@ mod test {
// test update null-value batch
let mut percentile = Percentile::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(PrimitiveVector::<i32>::from(vec![
Some(-2i32),
None,
Some(3),
Some(4),
])),
Arc::new(PrimitiveVector::<f64>::from(vec![
Arc::new(Int32Vector::from(vec![Some(-2i32), None, Some(3), Some(4)])),
Arc::new(Float64Vector::from(vec![
Some(100.0_f64),
Some(100.0_f64),
Some(100.0_f64),
@@ -362,13 +353,10 @@ mod test {
let mut percentile = Percentile::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(ConstantVector::new(
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
Arc::new(Int32Vector::from_vec(vec![4])),
2,
)),
Arc::new(PrimitiveVector::<f64>::from(vec![
Some(100.0_f64),
Some(100.0_f64),
])),
Arc::new(Float64Vector::from(vec![Some(100.0_f64), Some(100.0_f64)])),
];
assert!(percentile.update_batch(&v).is_ok());
assert_eq!(Value::from(4_f64), percentile.evaluate().unwrap());
@@ -376,12 +364,8 @@ mod test {
// test left border
let mut percentile = Percentile::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(PrimitiveVector::<i32>::from(vec![
Some(-1i32),
Some(1),
Some(2),
])),
Arc::new(PrimitiveVector::<f64>::from(vec![
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
Arc::new(Float64Vector::from(vec![
Some(0.0_f64),
Some(0.0_f64),
Some(0.0_f64),
@@ -393,12 +377,8 @@ mod test {
// test medium
let mut percentile = Percentile::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(PrimitiveVector::<i32>::from(vec![
Some(-1i32),
Some(1),
Some(2),
])),
Arc::new(PrimitiveVector::<f64>::from(vec![
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
Arc::new(Float64Vector::from(vec![
Some(50.0_f64),
Some(50.0_f64),
Some(50.0_f64),
@@ -410,12 +390,8 @@ mod test {
// test right border
let mut percentile = Percentile::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(PrimitiveVector::<i32>::from(vec![
Some(-1i32),
Some(1),
Some(2),
])),
Arc::new(PrimitiveVector::<f64>::from(vec![
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
Arc::new(Float64Vector::from(vec![
Some(100.0_f64),
Some(100.0_f64),
Some(100.0_f64),
@@ -431,12 +407,8 @@ mod test {
// >> 6.400000000000
let mut percentile = Percentile::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(PrimitiveVector::<i32>::from(vec![
Some(10i32),
Some(7),
Some(4),
])),
Arc::new(PrimitiveVector::<f64>::from(vec![
Arc::new(Int32Vector::from(vec![Some(10i32), Some(7), Some(4)])),
Arc::new(Float64Vector::from(vec![
Some(40.0_f64),
Some(40.0_f64),
Some(40.0_f64),
@@ -451,12 +423,8 @@ mod test {
// >> 9.7000000000000011
let mut percentile = Percentile::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(PrimitiveVector::<i32>::from(vec![
Some(10i32),
Some(7),
Some(4),
])),
Arc::new(PrimitiveVector::<f64>::from(vec![
Arc::new(Int32Vector::from(vec![Some(10i32), Some(7), Some(4)])),
Arc::new(Float64Vector::from(vec![
Some(95.0_f64),
Some(95.0_f64),
Some(95.0_f64),

View File

@@ -23,9 +23,9 @@ use common_query::error::{
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;
use datatypes::types::PrimitiveType;
use datatypes::types::{LogicalPrimitiveType, WrapperType};
use datatypes::value::ListValue;
use datatypes::vectors::{ConstantVector, Int64Vector, ListVector};
use datatypes::vectors::{ConstantVector, Helper, Int64Vector, ListVector};
use datatypes::with_match_primitive_type_id;
use num_traits::AsPrimitive;
use snafu::{ensure, OptionExt, ResultExt};
@@ -34,8 +34,10 @@ use snafu::{ensure, OptionExt, ResultExt};
#[derive(Debug, Default)]
pub struct Polyval<T, PolyT>
where
T: Primitive + AsPrimitive<PolyT>,
PolyT: Primitive + std::ops::Mul<Output = PolyT>,
T: WrapperType,
T::Native: AsPrimitive<PolyT::Native>,
PolyT: WrapperType,
PolyT::Native: std::ops::Mul<Output = PolyT::Native>,
{
values: Vec<T>,
// DataFusion casts constant in into i64 type.
@@ -45,8 +47,10 @@ where
impl<T, PolyT> Polyval<T, PolyT>
where
T: Primitive + AsPrimitive<PolyT>,
PolyT: Primitive + std::ops::Mul<Output = PolyT>,
T: WrapperType,
T::Native: AsPrimitive<PolyT::Native>,
PolyT: WrapperType,
PolyT::Native: std::ops::Mul<Output = PolyT::Native>,
{
fn push(&mut self, value: T) {
self.values.push(value);
@@ -55,11 +59,11 @@ where
impl<T, PolyT> Accumulator for Polyval<T, PolyT>
where
T: Primitive + AsPrimitive<PolyT>,
PolyT: Primitive + std::ops::Mul<Output = PolyT> + std::iter::Sum<PolyT>,
for<'a> T: Scalar<RefType<'a> = T>,
for<'a> PolyT: Scalar<RefType<'a> = PolyT>,
i64: AsPrimitive<PolyT>,
T: WrapperType,
T::Native: AsPrimitive<PolyT::Native>,
PolyT: WrapperType + std::iter::Sum<<PolyT as WrapperType>::Native>,
PolyT::Native: std::ops::Mul<Output = PolyT::Native> + std::iter::Sum<PolyT::Native>,
i64: AsPrimitive<<PolyT as WrapperType>::Native>,
{
fn state(&self) -> Result<Vec<Value>> {
let nums = self
@@ -70,7 +74,7 @@ where
Ok(vec![
Value::List(ListValue::new(
Some(Box::new(nums)),
T::default().into().data_type(),
T::LogicalType::build_data_type(),
)),
self.x.into(),
])
@@ -91,10 +95,10 @@ where
let mut len = 1;
let column: &<T as Scalar>::VectorType = if column.is_const() {
len = column.len();
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
unsafe { VectorHelper::static_cast(column.inner()) }
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
unsafe { Helper::static_cast(column.inner()) }
} else {
unsafe { VectorHelper::static_cast(column) }
unsafe { Helper::static_cast(column) }
};
(0..len).for_each(|_| {
for v in column.iter_data().flatten() {
@@ -103,7 +107,7 @@ where
});
let x = &values[1];
let x = VectorHelper::check_get_scalar::<i64>(x).context(error::InvalidInputsSnafu {
let x = Helper::check_get_scalar::<i64>(x).context(error::InvalidInputTypeSnafu {
err_msg: "expecting \"POLYVAL\" function's second argument to be a positive integer",
})?;
// `get(0)` is safe because we have checked `values[1].len() == values[0].len() != 0`
@@ -172,12 +176,14 @@ where
),
})?;
for value in values.values_iter() {
let value = value.context(FromScalarValueSnafu)?;
let column: &<T as Scalar>::VectorType = unsafe { VectorHelper::static_cast(&value) };
for v in column.iter_data().flatten() {
self.push(v);
if let Some(value) = value.context(FromScalarValueSnafu)? {
let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
for v in column.iter_data().flatten() {
self.push(v);
}
}
}
Ok(())
}
@@ -196,7 +202,7 @@ where
.values
.iter()
.enumerate()
.map(|(i, &value)| value.as_() * (x.pow((len - 1 - i) as u32)).as_())
.map(|(i, &value)| value.into_native().as_() * x.pow((len - 1 - i) as u32).as_())
.sum();
Ok(polyval.into())
}
@@ -213,7 +219,7 @@ impl AggregateFunctionCreator for PolyvalAccumulatorCreator {
with_match_primitive_type_id!(
input_type.logical_type_id(),
|$S| {
Ok(Box::new(Polyval::<$S,<$S as Primitive>::LargestType>::default()))
Ok(Box::new(Polyval::<<$S as LogicalPrimitiveType>::Wrapper, <<$S as LogicalPrimitiveType>::LargestType as LogicalPrimitiveType>::Wrapper>::default()))
},
{
let err_msg = format!(
@@ -234,7 +240,7 @@ impl AggregateFunctionCreator for PolyvalAccumulatorCreator {
with_match_primitive_type_id!(
input_type,
|$S| {
Ok(PrimitiveType::<<$S as Primitive>::LargestType>::default().into())
Ok(<<$S as LogicalPrimitiveType>::LargestType as LogicalPrimitiveType>::build_data_type())
},
{
unreachable!()
@@ -254,7 +260,7 @@ impl AggregateFunctionCreator for PolyvalAccumulatorCreator {
#[cfg(test)]
mod test {
use datatypes::vectors::PrimitiveVector;
use datatypes::vectors::Int32Vector;
use super::*;
#[test]
@@ -268,8 +274,8 @@ mod test {
// test update one not-null value
let mut polyval = Polyval::<i32, i64>::default();
let v: Vec<VectorRef> = vec![
Arc::new(PrimitiveVector::<i32>::from(vec![Some(3)])),
Arc::new(PrimitiveVector::<i64>::from(vec![Some(2_i64)])),
Arc::new(Int32Vector::from(vec![Some(3)])),
Arc::new(Int64Vector::from(vec![Some(2_i64)])),
];
assert!(polyval.update_batch(&v).is_ok());
assert_eq!(Value::Int64(3), polyval.evaluate().unwrap());
@@ -277,8 +283,8 @@ mod test {
// test update one null value
let mut polyval = Polyval::<i32, i64>::default();
let v: Vec<VectorRef> = vec![
Arc::new(PrimitiveVector::<i32>::from(vec![Option::<i32>::None])),
Arc::new(PrimitiveVector::<i64>::from(vec![Some(2_i64)])),
Arc::new(Int32Vector::from(vec![Option::<i32>::None])),
Arc::new(Int64Vector::from(vec![Some(2_i64)])),
];
assert!(polyval.update_batch(&v).is_ok());
assert_eq!(Value::Null, polyval.evaluate().unwrap());
@@ -286,12 +292,8 @@ mod test {
// test update no null-value batch
let mut polyval = Polyval::<i32, i64>::default();
let v: Vec<VectorRef> = vec![
Arc::new(PrimitiveVector::<i32>::from(vec![
Some(3),
Some(0),
Some(1),
])),
Arc::new(PrimitiveVector::<i64>::from(vec![
Arc::new(Int32Vector::from(vec![Some(3), Some(0), Some(1)])),
Arc::new(Int64Vector::from(vec![
Some(2_i64),
Some(2_i64),
Some(2_i64),
@@ -303,13 +305,8 @@ mod test {
// test update null-value batch
let mut polyval = Polyval::<i32, i64>::default();
let v: Vec<VectorRef> = vec![
Arc::new(PrimitiveVector::<i32>::from(vec![
Some(3),
Some(0),
None,
Some(1),
])),
Arc::new(PrimitiveVector::<i64>::from(vec![
Arc::new(Int32Vector::from(vec![Some(3), Some(0), None, Some(1)])),
Arc::new(Int64Vector::from(vec![
Some(2_i64),
Some(2_i64),
Some(2_i64),
@@ -323,10 +320,10 @@ mod test {
let mut polyval = Polyval::<i32, i64>::default();
let v: Vec<VectorRef> = vec![
Arc::new(ConstantVector::new(
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
Arc::new(Int32Vector::from_vec(vec![4])),
2,
)),
Arc::new(PrimitiveVector::<i64>::from(vec![Some(5_i64), Some(5_i64)])),
Arc::new(Int64Vector::from(vec![Some(5_i64), Some(5_i64)])),
];
assert!(polyval.update_batch(&v).is_ok());
assert_eq!(Value::Int64(24), polyval.evaluate().unwrap());

View File

@@ -23,7 +23,7 @@ use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;
use datatypes::value::{ListValue, OrderedFloat};
use datatypes::vectors::{ConstantVector, Float64Vector, ListVector};
use datatypes::vectors::{ConstantVector, Float64Vector, Helper, ListVector};
use datatypes::with_match_primitive_type_id;
use num_traits::AsPrimitive;
use snafu::{ensure, OptionExt, ResultExt};
@@ -33,18 +33,12 @@ use statrs::statistics::Statistics;
// https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html
#[derive(Debug, Default)]
pub struct ScipyStatsNormCdf<T>
where
T: Primitive + AsPrimitive<f64> + std::iter::Sum<T>,
{
pub struct ScipyStatsNormCdf<T> {
values: Vec<T>,
x: Option<f64>,
}
impl<T> ScipyStatsNormCdf<T>
where
T: Primitive + AsPrimitive<f64> + std::iter::Sum<T>,
{
impl<T> ScipyStatsNormCdf<T> {
fn push(&mut self, value: T) {
self.values.push(value);
}
@@ -52,8 +46,8 @@ where
impl<T> Accumulator for ScipyStatsNormCdf<T>
where
T: Primitive + AsPrimitive<f64> + std::iter::Sum<T>,
for<'a> T: Scalar<RefType<'a> = T>,
T: WrapperType + std::iter::Sum<T>,
T::Native: AsPrimitive<f64>,
{
fn state(&self) -> Result<Vec<Value>> {
let nums = self
@@ -64,7 +58,7 @@ where
Ok(vec![
Value::List(ListValue::new(
Some(Box::new(nums)),
T::default().into().data_type(),
T::LogicalType::build_data_type(),
)),
self.x.into(),
])
@@ -86,14 +80,14 @@ where
let mut len = 1;
let column: &<T as Scalar>::VectorType = if column.is_const() {
len = column.len();
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
unsafe { VectorHelper::static_cast(column.inner()) }
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
unsafe { Helper::static_cast(column.inner()) }
} else {
unsafe { VectorHelper::static_cast(column) }
unsafe { Helper::static_cast(column) }
};
let x = &values[1];
let x = VectorHelper::check_get_scalar::<f64>(x).context(error::InvalidInputsSnafu {
let x = Helper::check_get_scalar::<f64>(x).context(error::InvalidInputTypeSnafu {
err_msg: "expecting \"SCIPYSTATSNORMCDF\" function's second argument to be a positive integer",
})?;
let first = x.get(0);
@@ -160,19 +154,19 @@ where
),
})?;
for value in values.values_iter() {
let value = value.context(FromScalarValueSnafu)?;
let column: &<T as Scalar>::VectorType = unsafe { VectorHelper::static_cast(&value) };
for v in column.iter_data().flatten() {
self.push(v);
if let Some(value) = value.context(FromScalarValueSnafu)? {
let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
for v in column.iter_data().flatten() {
self.push(v);
}
}
}
Ok(())
}
fn evaluate(&self) -> Result<Value> {
let values = self.values.iter().map(|&v| v.as_()).collect::<Vec<_>>();
let mean = values.clone().mean();
let std_dev = values.std_dev();
let mean = self.values.iter().map(|v| v.into_native().as_()).mean();
let std_dev = self.values.iter().map(|v| v.into_native().as_()).std_dev();
if mean.is_nan() || std_dev.is_nan() {
Ok(Value::Null)
} else {
@@ -198,7 +192,7 @@ impl AggregateFunctionCreator for ScipyStatsNormCdfAccumulatorCreator {
with_match_primitive_type_id!(
input_type.logical_type_id(),
|$S| {
Ok(Box::new(ScipyStatsNormCdf::<$S>::default()))
Ok(Box::new(ScipyStatsNormCdf::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
},
{
let err_msg = format!(
@@ -230,7 +224,7 @@ impl AggregateFunctionCreator for ScipyStatsNormCdfAccumulatorCreator {
#[cfg(test)]
mod test {
use datatypes::vectors::PrimitiveVector;
use datatypes::vectors::{Float64Vector, Int32Vector};
use super::*;
#[test]
@@ -244,12 +238,8 @@ mod test {
// test update no null-value batch
let mut scipy_stats_norm_cdf = ScipyStatsNormCdf::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(PrimitiveVector::<i32>::from(vec![
Some(-1i32),
Some(1),
Some(2),
])),
Arc::new(PrimitiveVector::<f64>::from(vec![
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
Arc::new(Float64Vector::from(vec![
Some(2.0_f64),
Some(2.0_f64),
Some(2.0_f64),
@@ -264,13 +254,8 @@ mod test {
// test update null-value batch
let mut scipy_stats_norm_cdf = ScipyStatsNormCdf::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(PrimitiveVector::<i32>::from(vec![
Some(-2i32),
None,
Some(3),
Some(4),
])),
Arc::new(PrimitiveVector::<f64>::from(vec![
Arc::new(Int32Vector::from(vec![Some(-2i32), None, Some(3), Some(4)])),
Arc::new(Float64Vector::from(vec![
Some(2.0_f64),
None,
Some(2.0_f64),

View File

@@ -23,7 +23,7 @@ use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;
use datatypes::value::{ListValue, OrderedFloat};
use datatypes::vectors::{ConstantVector, Float64Vector, ListVector};
use datatypes::vectors::{ConstantVector, Float64Vector, Helper, ListVector};
use datatypes::with_match_primitive_type_id;
use num_traits::AsPrimitive;
use snafu::{ensure, OptionExt, ResultExt};
@@ -33,18 +33,12 @@ use statrs::statistics::Statistics;
// https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html
#[derive(Debug, Default)]
pub struct ScipyStatsNormPdf<T>
where
T: Primitive + AsPrimitive<f64> + std::iter::Sum<T>,
{
pub struct ScipyStatsNormPdf<T> {
values: Vec<T>,
x: Option<f64>,
}
impl<T> ScipyStatsNormPdf<T>
where
T: Primitive + AsPrimitive<f64> + std::iter::Sum<T>,
{
impl<T> ScipyStatsNormPdf<T> {
fn push(&mut self, value: T) {
self.values.push(value);
}
@@ -52,8 +46,8 @@ where
impl<T> Accumulator for ScipyStatsNormPdf<T>
where
T: Primitive + AsPrimitive<f64> + std::iter::Sum<T>,
for<'a> T: Scalar<RefType<'a> = T>,
T: WrapperType,
T::Native: AsPrimitive<f64> + std::iter::Sum<T>,
{
fn state(&self) -> Result<Vec<Value>> {
let nums = self
@@ -64,7 +58,7 @@ where
Ok(vec![
Value::List(ListValue::new(
Some(Box::new(nums)),
T::default().into().data_type(),
T::LogicalType::build_data_type(),
)),
self.x.into(),
])
@@ -86,14 +80,14 @@ where
let mut len = 1;
let column: &<T as Scalar>::VectorType = if column.is_const() {
len = column.len();
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
unsafe { VectorHelper::static_cast(column.inner()) }
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
unsafe { Helper::static_cast(column.inner()) }
} else {
unsafe { VectorHelper::static_cast(column) }
unsafe { Helper::static_cast(column) }
};
let x = &values[1];
let x = VectorHelper::check_get_scalar::<f64>(x).context(error::InvalidInputsSnafu {
let x = Helper::check_get_scalar::<f64>(x).context(error::InvalidInputTypeSnafu {
err_msg: "expecting \"SCIPYSTATSNORMPDF\" function's second argument to be a positive integer",
})?;
let first = x.get(0);
@@ -160,19 +154,20 @@ where
),
})?;
for value in values.values_iter() {
let value = value.context(FromScalarValueSnafu)?;
let column: &<T as Scalar>::VectorType = unsafe { VectorHelper::static_cast(&value) };
for v in column.iter_data().flatten() {
self.push(v);
if let Some(value) = value.context(FromScalarValueSnafu)? {
let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
for v in column.iter_data().flatten() {
self.push(v);
}
}
}
Ok(())
}
fn evaluate(&self) -> Result<Value> {
let values = self.values.iter().map(|&v| v.as_()).collect::<Vec<_>>();
let mean = values.clone().mean();
let std_dev = values.std_dev();
let mean = self.values.iter().map(|v| v.into_native().as_()).mean();
let std_dev = self.values.iter().map(|v| v.into_native().as_()).std_dev();
if mean.is_nan() || std_dev.is_nan() {
Ok(Value::Null)
} else {
@@ -198,7 +193,7 @@ impl AggregateFunctionCreator for ScipyStatsNormPdfAccumulatorCreator {
with_match_primitive_type_id!(
input_type.logical_type_id(),
|$S| {
Ok(Box::new(ScipyStatsNormPdf::<$S>::default()))
Ok(Box::new(ScipyStatsNormPdf::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
},
{
let err_msg = format!(
@@ -230,7 +225,7 @@ impl AggregateFunctionCreator for ScipyStatsNormPdfAccumulatorCreator {
#[cfg(test)]
mod test {
use datatypes::vectors::PrimitiveVector;
use datatypes::vectors::{Float64Vector, Int32Vector};
use super::*;
#[test]
@@ -244,12 +239,8 @@ mod test {
// test update no null-value batch
let mut scipy_stats_norm_pdf = ScipyStatsNormPdf::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(PrimitiveVector::<i32>::from(vec![
Some(-1i32),
Some(1),
Some(2),
])),
Arc::new(PrimitiveVector::<f64>::from(vec![
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
Arc::new(Float64Vector::from(vec![
Some(2.0_f64),
Some(2.0_f64),
Some(2.0_f64),
@@ -264,13 +255,8 @@ mod test {
// test update null-value batch
let mut scipy_stats_norm_pdf = ScipyStatsNormPdf::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(PrimitiveVector::<i32>::from(vec![
Some(-2i32),
None,
Some(3),
Some(4),
])),
Arc::new(PrimitiveVector::<f64>::from(vec![
Arc::new(Int32Vector::from(vec![Some(-2i32), None, Some(3), Some(4)])),
Arc::new(Float64Vector::from(vec![
Some(2.0_f64),
None,
Some(2.0_f64),

View File

@@ -14,10 +14,10 @@
use std::iter;
use common_query::error::Result;
use datatypes::prelude::*;
use datatypes::vectors::ConstantVector;
use datatypes::vectors::{ConstantVector, Helper};
use crate::error::Result;
use crate::scalars::expression::ctx::EvalContext;
pub fn scalar_binary_op<L: Scalar, R: Scalar, O: Scalar, F>(
@@ -36,10 +36,9 @@ where
let result = match (l.is_const(), r.is_const()) {
(false, true) => {
let left: &<L as Scalar>::VectorType = unsafe { VectorHelper::static_cast(l) };
let right: &ConstantVector = unsafe { VectorHelper::static_cast(r) };
let right: &<R as Scalar>::VectorType =
unsafe { VectorHelper::static_cast(right.inner()) };
let left: &<L as Scalar>::VectorType = unsafe { Helper::static_cast(l) };
let right: &ConstantVector = unsafe { Helper::static_cast(r) };
let right: &<R as Scalar>::VectorType = unsafe { Helper::static_cast(right.inner()) };
let b = right.get_data(0);
let it = left.iter_data().map(|a| f(a, b, ctx));
@@ -47,8 +46,8 @@ where
}
(false, false) => {
let left: &<L as Scalar>::VectorType = unsafe { VectorHelper::static_cast(l) };
let right: &<R as Scalar>::VectorType = unsafe { VectorHelper::static_cast(r) };
let left: &<L as Scalar>::VectorType = unsafe { Helper::static_cast(l) };
let right: &<R as Scalar>::VectorType = unsafe { Helper::static_cast(r) };
let it = left
.iter_data()
@@ -58,25 +57,22 @@ where
}
(true, false) => {
let left: &ConstantVector = unsafe { VectorHelper::static_cast(l) };
let left: &<L as Scalar>::VectorType =
unsafe { VectorHelper::static_cast(left.inner()) };
let left: &ConstantVector = unsafe { Helper::static_cast(l) };
let left: &<L as Scalar>::VectorType = unsafe { Helper::static_cast(left.inner()) };
let a = left.get_data(0);
let right: &<R as Scalar>::VectorType = unsafe { VectorHelper::static_cast(r) };
let right: &<R as Scalar>::VectorType = unsafe { Helper::static_cast(r) };
let it = right.iter_data().map(|b| f(a, b, ctx));
<O as Scalar>::VectorType::from_owned_iterator(it)
}
(true, true) => {
let left: &ConstantVector = unsafe { VectorHelper::static_cast(l) };
let left: &<L as Scalar>::VectorType =
unsafe { VectorHelper::static_cast(left.inner()) };
let left: &ConstantVector = unsafe { Helper::static_cast(l) };
let left: &<L as Scalar>::VectorType = unsafe { Helper::static_cast(left.inner()) };
let a = left.get_data(0);
let right: &ConstantVector = unsafe { VectorHelper::static_cast(r) };
let right: &<R as Scalar>::VectorType =
unsafe { VectorHelper::static_cast(right.inner()) };
let right: &ConstantVector = unsafe { Helper::static_cast(r) };
let right: &<R as Scalar>::VectorType = unsafe { Helper::static_cast(right.inner()) };
let b = right.get_data(0);
let it = iter::repeat(a)

View File

@@ -13,8 +13,7 @@
// limitations under the License.
use chrono_tz::Tz;
use crate::error::Error;
use common_query::error::Error;
pub struct EvalContext {
_tz: Tz,

View File

@@ -12,10 +12,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use common_query::error::{self, Result};
use datatypes::prelude::*;
use datatypes::vectors::Helper;
use snafu::ResultExt;
use crate::error::{GetScalarVectorSnafu, Result};
use crate::scalars::expression::ctx::EvalContext;
/// TODO: remove the allow_unused when it's used.
@@ -28,7 +29,7 @@ pub fn scalar_unary_op<L: Scalar, O: Scalar, F>(
where
F: Fn(Option<L::RefType<'_>>, &mut EvalContext) -> Option<O>,
{
let left = VectorHelper::check_get_scalar::<L>(l).context(GetScalarVectorSnafu)?;
let left = Helper::check_get_scalar::<L>(l).context(error::GetScalarVectorSnafu)?;
let it = left.iter_data().map(|a| f(a, ctx));
let result = <O as Scalar>::VectorType::from_owned_iterator(it);

View File

@@ -16,12 +16,11 @@ use std::fmt;
use std::sync::Arc;
use chrono_tz::Tz;
use common_query::error::Result;
use common_query::prelude::Signature;
use datatypes::data_type::ConcreteDataType;
use datatypes::vectors::VectorRef;
use crate::error::Result;
#[derive(Clone)]
pub struct FunctionContext {
pub tz: Tz,

View File

@@ -13,10 +13,12 @@
// limitations under the License.
mod pow;
mod rate;
use std::sync::Arc;
pub use pow::PowFunction;
pub use rate::RateFunction;
use crate::scalars::function_registry::FunctionRegistry;
@@ -25,5 +27,6 @@ pub(crate) struct MathFunction;
impl MathFunction {
pub fn register(registry: &FunctionRegistry) {
registry.register(Arc::new(PowFunction::default()));
registry.register(Arc::new(RateFunction::default()))
}
}

View File

@@ -15,15 +15,16 @@
use std::fmt;
use std::sync::Arc;
use common_query::error::Result;
use common_query::prelude::{Signature, Volatility};
use datatypes::data_type::DataType;
use datatypes::prelude::ConcreteDataType;
use datatypes::types::LogicalPrimitiveType;
use datatypes::vectors::VectorRef;
use datatypes::with_match_primitive_type_id;
use num::traits::Pow;
use num_traits::AsPrimitive;
use crate::error::Result;
use crate::scalars::expression::{scalar_binary_op, EvalContext};
use crate::scalars::function::{Function, FunctionContext};
@@ -46,7 +47,7 @@ impl Function for PowFunction {
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
with_match_primitive_type_id!(columns[0].data_type().logical_type_id(), |$S| {
with_match_primitive_type_id!(columns[1].data_type().logical_type_id(), |$T| {
let col = scalar_binary_op::<$S, $T, f64, _>(&columns[0], &columns[1], scalar_pow, &mut EvalContext::default())?;
let col = scalar_binary_op::<<$S as LogicalPrimitiveType>::Native, <$T as LogicalPrimitiveType>::Native, f64, _>(&columns[0], &columns[1], scalar_pow, &mut EvalContext::default())?;
Ok(Arc::new(col))
},{
unreachable!()

View File

@@ -0,0 +1,106 @@
// Copyright 2022 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt;
use common_query::error::{self, Result};
use common_query::prelude::{Signature, Volatility};
use datatypes::arrow::compute::kernels::{arithmetic, cast};
use datatypes::arrow::datatypes::DataType;
use datatypes::prelude::*;
use datatypes::vectors::{Helper, VectorRef};
use snafu::ResultExt;
use crate::scalars::function::{Function, FunctionContext};
/// generates rates from a sequence of adjacent data points.
#[derive(Clone, Debug, Default)]
pub struct RateFunction;
impl fmt::Display for RateFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "RATE")
}
}
impl Function for RateFunction {
fn name(&self) -> &str {
"prom_rate"
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::float64_datatype())
}
fn signature(&self) -> Signature {
Signature::uniform(2, ConcreteDataType::numerics(), Volatility::Immutable)
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
let val = &columns[0].to_arrow_array();
let val_0 = val.slice(0, val.len() - 1);
let val_1 = val.slice(1, val.len() - 1);
let dv = arithmetic::subtract_dyn(&val_1, &val_0).context(error::ArrowComputeSnafu)?;
let ts = &columns[1].to_arrow_array();
let ts_0 = ts.slice(0, ts.len() - 1);
let ts_1 = ts.slice(1, ts.len() - 1);
let dt = arithmetic::subtract_dyn(&ts_1, &ts_0).context(error::ArrowComputeSnafu)?;
let dv = cast::cast(&dv, &DataType::Float64).context(error::TypeCastSnafu {
typ: DataType::Float64,
})?;
let dt = cast::cast(&dt, &DataType::Float64).context(error::TypeCastSnafu {
typ: DataType::Float64,
})?;
let rate = arithmetic::divide_dyn(&dv, &dt).context(error::ArrowComputeSnafu)?;
let v = Helper::try_into_vector(&rate).context(error::FromArrowArraySnafu)?;
Ok(v)
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use common_query::prelude::TypeSignature;
use datatypes::vectors::{Float32Vector, Float64Vector, Int64Vector};
use super::*;
#[test]
fn test_rate_function() {
let rate = RateFunction::default();
assert_eq!("prom_rate", rate.name());
assert_eq!(
ConcreteDataType::float64_datatype(),
rate.return_type(&[]).unwrap()
);
assert!(matches!(rate.signature(),
Signature {
type_signature: TypeSignature::Uniform(2, valid_types),
volatility: Volatility::Immutable
} if valid_types == ConcreteDataType::numerics()
));
let values = vec![1.0, 3.0, 6.0];
let ts = vec![0, 1, 2];
let args: Vec<VectorRef> = vec![
Arc::new(Float32Vector::from_vec(values)),
Arc::new(Int64Vector::from_vec(ts)),
];
let vector = rate.eval(FunctionContext::default(), &args).unwrap();
let expect: VectorRef = Arc::new(Float64Vector::from_vec(vec![2.0, 3.0]));
assert_eq!(expect, vector);
}
}

View File

@@ -13,7 +13,6 @@
// limitations under the License.
mod clip;
#[allow(unused)]
mod interp;
use std::sync::Arc;

View File

@@ -15,14 +15,15 @@
use std::fmt;
use std::sync::Arc;
use common_query::error::Result;
use common_query::prelude::{Signature, Volatility};
use datatypes::data_type::{ConcreteDataType, DataType};
use datatypes::prelude::{Scalar, VectorRef};
use datatypes::with_match_primitive_type_id;
use num_traits::AsPrimitive;
use datatypes::arrow::compute;
use datatypes::arrow::datatypes::ArrowPrimitiveType;
use datatypes::data_type::ConcreteDataType;
use datatypes::prelude::*;
use datatypes::vectors::PrimitiveVector;
use paste::paste;
use crate::error::Result;
use crate::scalars::expression::{scalar_binary_op, EvalContext};
use crate::scalars::function::{Function, FunctionContext};
@@ -34,25 +35,32 @@ macro_rules! define_eval {
($O: ident) => {
paste! {
fn [<eval_ $O>](columns: &[VectorRef]) -> Result<VectorRef> {
with_match_primitive_type_id!(columns[0].data_type().logical_type_id(), |$S| {
with_match_primitive_type_id!(columns[1].data_type().logical_type_id(), |$T| {
with_match_primitive_type_id!(columns[2].data_type().logical_type_id(), |$R| {
// clip(a, min, max) is equals to min(max(a, min), max)
let col: VectorRef = Arc::new(scalar_binary_op::<$S, $T, $O, _>(&columns[0], &columns[1], scalar_max, &mut EvalContext::default())?);
let col = scalar_binary_op::<$O, $R, $O, _>(&col, &columns[2], scalar_min, &mut EvalContext::default())?;
Ok(Arc::new(col))
}, {
unreachable!()
})
}, {
unreachable!()
})
}, {
unreachable!()
})
fn cast_vector(input: &VectorRef) -> VectorRef {
Arc::new(PrimitiveVector::<<$O as WrapperType>::LogicalType>::try_from_arrow_array(
compute::cast(&input.to_arrow_array(), &<<<$O as WrapperType>::LogicalType as LogicalPrimitiveType>::ArrowPrimitive as ArrowPrimitiveType>::DATA_TYPE).unwrap()
).unwrap()) as _
}
let operator_1 = cast_vector(&columns[0]);
let operator_2 = cast_vector(&columns[1]);
let operator_3 = cast_vector(&columns[2]);
// clip(a, min, max) is equals to min(max(a, min), max)
let col: VectorRef = Arc::new(scalar_binary_op::<$O, $O, $O, _>(
&operator_1,
&operator_2,
scalar_max,
&mut EvalContext::default(),
)?);
let col = scalar_binary_op::<$O, $O, $O, _>(
&col,
&operator_3,
scalar_min,
&mut EvalContext::default(),
)?;
Ok(Arc::new(col))
}
}
}
};
}
define_eval!(i64);
@@ -108,27 +116,23 @@ pub fn max<T: PartialOrd>(input: T, max: T) -> T {
}
#[inline]
fn scalar_min<S, T, O>(left: Option<S>, right: Option<T>, _ctx: &mut EvalContext) -> Option<O>
fn scalar_min<O>(left: Option<O>, right: Option<O>, _ctx: &mut EvalContext) -> Option<O>
where
S: AsPrimitive<O>,
T: AsPrimitive<O>,
O: Scalar + Copy + PartialOrd,
{
match (left, right) {
(Some(left), Some(right)) => Some(min(left.as_(), right.as_())),
(Some(left), Some(right)) => Some(min(left, right)),
_ => None,
}
}
#[inline]
fn scalar_max<S, T, O>(left: Option<S>, right: Option<T>, _ctx: &mut EvalContext) -> Option<O>
fn scalar_max<O>(left: Option<O>, right: Option<O>, _ctx: &mut EvalContext) -> Option<O>
where
S: AsPrimitive<O>,
T: AsPrimitive<O>,
O: Scalar + Copy + PartialOrd,
{
match (left, right) {
(Some(left), Some(right)) => Some(max(left.as_(), right.as_())),
(Some(left), Some(right)) => Some(max(left, right)),
_ => None,
}
}
@@ -143,11 +147,15 @@ impl fmt::Display for ClipFunction {
mod tests {
use common_query::prelude::TypeSignature;
use datatypes::value::Value;
use datatypes::vectors::{ConstantVector, Float32Vector, Int32Vector, UInt32Vector};
use datatypes::vectors::{
ConstantVector, Float32Vector, Int16Vector, Int32Vector, Int8Vector, UInt16Vector,
UInt32Vector, UInt8Vector,
};
use super::*;
#[test]
fn test_clip_function() {
fn test_clip_signature() {
let clip = ClipFunction::default();
assert_eq!("clip", clip.name());
@@ -190,16 +198,21 @@ mod tests {
volatility: Volatility::Immutable
} if valid_types == ConcreteDataType::numerics()
));
}
#[test]
fn test_clip_fn_signed() {
let clip = ClipFunction::default();
// eval with signed integers
let args: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from_values(0..10)),
Arc::new(ConstantVector::new(
Arc::new(Int32Vector::from_vec(vec![3])),
Arc::new(Int8Vector::from_vec(vec![3])),
10,
)),
Arc::new(ConstantVector::new(
Arc::new(Int32Vector::from_vec(vec![6])),
Arc::new(Int16Vector::from_vec(vec![6])),
10,
)),
];
@@ -217,16 +230,21 @@ mod tests {
assert!(matches!(vector.get(i), Value::Int64(v) if v == 6));
}
}
}
#[test]
fn test_clip_fn_unsigned() {
let clip = ClipFunction::default();
// eval with unsigned integers
let args: Vec<VectorRef> = vec![
Arc::new(UInt32Vector::from_values(0..10)),
Arc::new(UInt8Vector::from_values(0..10)),
Arc::new(ConstantVector::new(
Arc::new(UInt32Vector::from_vec(vec![3])),
10,
)),
Arc::new(ConstantVector::new(
Arc::new(UInt32Vector::from_vec(vec![6])),
Arc::new(UInt16Vector::from_vec(vec![6])),
10,
)),
];
@@ -244,12 +262,17 @@ mod tests {
assert!(matches!(vector.get(i), Value::UInt64(v) if v == 6));
}
}
}
#[test]
fn test_clip_fn_float() {
let clip = ClipFunction::default();
// eval with floats
let args: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from_values(0..10)),
Arc::new(Int8Vector::from_values(0..10)),
Arc::new(ConstantVector::new(
Arc::new(Int32Vector::from_vec(vec![3])),
Arc::new(UInt32Vector::from_vec(vec![3])),
10,
)),
Arc::new(ConstantVector::new(

View File

@@ -14,41 +14,18 @@
use std::sync::Arc;
use datatypes::arrow::array::PrimitiveArray;
use datatypes::arrow::compute::cast::primitive_to_primitive;
use datatypes::arrow::datatypes::DataType::Float64;
use common_query::error::{self, Result};
use datatypes::arrow::compute::cast;
use datatypes::arrow::datatypes::DataType as ArrowDataType;
use datatypes::data_type::DataType;
use datatypes::prelude::ScalarVector;
use datatypes::type_id::LogicalTypeId;
use datatypes::value::Value;
use datatypes::vectors::{Float64Vector, PrimitiveVector, Vector, VectorRef};
use datatypes::{arrow, with_match_primitive_type_id};
use snafu::{ensure, Snafu};
#[derive(Debug, Snafu)]
pub enum Error {
#[snafu(display(
"The length of the args is not enough, expect at least: {}, have: {}",
expect,
actual,
))]
ArgsLenNotEnough { expect: usize, actual: usize },
#[snafu(display("The sample {} is empty", name))]
SampleEmpty { name: String },
#[snafu(display(
"The length of the len1: {} don't match the length of the len2: {}",
len1,
len2,
))]
LenNotEquals { len1: usize, len2: usize },
}
pub type Result<T> = std::result::Result<T, Error>;
use datatypes::vectors::{Float64Vector, Vector, VectorRef};
use datatypes::with_match_primitive_type_id;
use snafu::{ensure, ResultExt};
/* search the biggest number that smaller than x in xp */
fn linear_search_ascending_vector(x: Value, xp: &PrimitiveVector<f64>) -> usize {
fn linear_search_ascending_vector(x: Value, xp: &Float64Vector) -> usize {
for i in 0..xp.len() {
if x < xp.get(i) {
return i - 1;
@@ -58,7 +35,7 @@ fn linear_search_ascending_vector(x: Value, xp: &PrimitiveVector<f64>) -> usize
}
/* search the biggest number that smaller than x in xp */
fn binary_search_ascending_vector(key: Value, xp: &PrimitiveVector<f64>) -> usize {
fn binary_search_ascending_vector(key: Value, xp: &Float64Vector) -> usize {
let mut left = 0;
let mut right = xp.len();
/* If len <= 4 use linear search. */
@@ -77,27 +54,33 @@ fn binary_search_ascending_vector(key: Value, xp: &PrimitiveVector<f64>) -> usiz
left - 1
}
fn concrete_type_to_primitive_vector(arg: &VectorRef) -> Result<PrimitiveVector<f64>> {
fn concrete_type_to_primitive_vector(arg: &VectorRef) -> Result<Float64Vector> {
with_match_primitive_type_id!(arg.data_type().logical_type_id(), |$S| {
let tmp = arg.to_arrow_array();
let from = tmp.as_any().downcast_ref::<PrimitiveArray<$S>>().expect("cast failed");
let array = primitive_to_primitive(from, &Float64);
Ok(PrimitiveVector::new(array))
let array = cast(&tmp, &ArrowDataType::Float64).context(error::TypeCastSnafu {
typ: ArrowDataType::Float64,
})?;
// Safety: array has been cast to Float64Array.
Ok(Float64Vector::try_from_arrow_array(array).unwrap())
},{
unreachable!()
})
}
/// https://github.com/numpy/numpy/blob/b101756ac02e390d605b2febcded30a1da50cc2c/numpy/core/src/multiarray/compiled_base.c#L491
#[allow(unused)]
pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
let mut left = None;
let mut right = None;
ensure!(
args.len() >= 3,
ArgsLenNotEnoughSnafu {
expect: 3_usize,
actual: args.len()
error::InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not enough, expect at least: {}, have: {}",
3,
args.len()
),
}
);
@@ -109,9 +92,12 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
if args.len() > 3 {
ensure!(
args.len() == 5,
ArgsLenNotEnoughSnafu {
expect: 5_usize,
actual: args.len()
error::InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not enough, expect at least: {}, have: {}",
5,
args.len()
),
}
);
@@ -123,14 +109,32 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
.get_data(0);
}
ensure!(x.len() != 0, SampleEmptySnafu { name: "x" });
ensure!(xp.len() != 0, SampleEmptySnafu { name: "xp" });
ensure!(fp.len() != 0, SampleEmptySnafu { name: "fp" });
ensure!(
x.len() != 0,
error::InvalidFuncArgsSnafu {
err_msg: "The sample x is empty",
}
);
ensure!(
xp.len() != 0,
error::InvalidFuncArgsSnafu {
err_msg: "The sample xp is empty",
}
);
ensure!(
fp.len() != 0,
error::InvalidFuncArgsSnafu {
err_msg: "The sample fp is empty",
}
);
ensure!(
xp.len() == fp.len(),
LenNotEqualsSnafu {
len1: xp.len(),
len2: fp.len(),
error::InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the len1: {} don't match the length of the len2: {}",
xp.len(),
fp.len()
),
}
);
@@ -147,7 +151,7 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
let res;
if xp.len() == 1 {
res = x
let datas = x
.iter_data()
.map(|x| {
if Value::from(x) < xp.get(0) {
@@ -158,7 +162,8 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
fp.get_data(0)
}
})
.collect::<Float64Vector>();
.collect::<Vec<_>>();
res = Float64Vector::from(datas);
} else {
let mut j = 0;
/* only pre-calculate slopes if there are relatively few of them. */
@@ -185,7 +190,7 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
}
slopes = Some(slopes_tmp);
}
res = x
let datas = x
.iter_data()
.map(|x| match x {
Some(xi) => {
@@ -248,7 +253,8 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
}
_ => None,
})
.collect::<Float64Vector>();
.collect::<Vec<_>>();
res = Float64Vector::from(datas);
}
Ok(Arc::new(res) as _)
}
@@ -257,8 +263,7 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
mod tests {
use std::sync::Arc;
use datatypes::prelude::ScalarVectorBuilder;
use datatypes::vectors::{Int32Vector, Int64Vector, PrimitiveVectorBuilder};
use datatypes::vectors::{Int32Vector, Int64Vector};
use super::*;
#[test]
@@ -338,15 +343,11 @@ mod tests {
Arc::new(Int64Vector::from_vec(fp.clone())),
];
let vector = interp(&args).unwrap();
assert!(matches!(vector.get(0), Value::Float64(v) if v==x[0] as f64));
assert!(matches!(vector.get(0), Value::Float64(v) if v == x[0]));
// x=None output:Null
let input = [None, Some(0.0), Some(0.3)];
let mut builder = PrimitiveVectorBuilder::with_capacity(input.len());
for v in input {
builder.push(v);
}
let x = builder.finish();
let input = vec![None, Some(0.0), Some(0.3)];
let x = Float64Vector::from(input);
let args: Vec<VectorRef> = vec![
Arc::new(x),
Arc::new(Int64Vector::from_vec(xp)),

View File

@@ -15,11 +15,11 @@
use std::fmt;
use std::sync::Arc;
use common_query::error::Result;
use common_query::prelude::{Signature, Volatility};
use datatypes::data_type::ConcreteDataType;
use datatypes::prelude::VectorRef;
use crate::error::Result;
use crate::scalars::expression::{scalar_binary_op, EvalContext};
use crate::scalars::function::{Function, FunctionContext};

View File

@@ -17,16 +17,17 @@
use std::fmt;
use std::sync::Arc;
use common_query::error::{IntoVectorSnafu, UnsupportedInputDataTypeSnafu};
use common_query::error::{
ArrowComputeSnafu, IntoVectorSnafu, Result, TypeCastSnafu, UnsupportedInputDataTypeSnafu,
};
use common_query::prelude::{Signature, Volatility};
use datatypes::arrow::compute::arithmetics;
use datatypes::arrow::datatypes::DataType as ArrowDatatype;
use datatypes::arrow::scalar::PrimitiveScalar;
use datatypes::arrow::compute;
use datatypes::arrow::datatypes::{DataType as ArrowDatatype, Int64Type};
use datatypes::data_type::DataType;
use datatypes::prelude::ConcreteDataType;
use datatypes::vectors::{TimestampVector, VectorRef};
use datatypes::vectors::{TimestampMillisecondVector, VectorRef};
use snafu::ResultExt;
use crate::error::Result;
use crate::scalars::function::{Function, FunctionContext};
#[derive(Clone, Debug, Default)]
@@ -40,7 +41,7 @@ impl Function for FromUnixtimeFunction {
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::timestamp_millis_datatype())
Ok(ConcreteDataType::timestamp_millisecond_datatype())
}
fn signature(&self) -> Signature {
@@ -56,14 +57,18 @@ impl Function for FromUnixtimeFunction {
ConcreteDataType::Int64(_) => {
let array = columns[0].to_arrow_array();
// Our timestamp vector's time unit is millisecond
let array = arithmetics::mul_scalar(
&*array,
&PrimitiveScalar::new(ArrowDatatype::Int64, Some(1000i64)),
);
let array = compute::multiply_scalar_dyn::<Int64Type>(&array, 1000i64)
.context(ArrowComputeSnafu)?;
let arrow_datatype = &self.return_type(&[]).unwrap().as_arrow_type();
Ok(Arc::new(
TimestampVector::try_from_arrow_array(array).context(IntoVectorSnafu {
data_type: ArrowDatatype::Int64,
TimestampMillisecondVector::try_from_arrow_array(
compute::cast(&array, arrow_datatype).context(TypeCastSnafu {
typ: ArrowDatatype::Int64,
})?,
)
.context(IntoVectorSnafu {
data_type: arrow_datatype.clone(),
})?,
))
}
@@ -71,8 +76,7 @@ impl Function for FromUnixtimeFunction {
function: NAME,
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
}
.fail()
.map_err(|e| e.into()),
.fail(),
}
}
}
@@ -96,7 +100,7 @@ mod tests {
let f = FromUnixtimeFunction::default();
assert_eq!("from_unixtime", f.name());
assert_eq!(
ConcreteDataType::timestamp_millis_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(),
f.return_type(&[]).unwrap()
);

View File

@@ -19,7 +19,8 @@ use common_query::prelude::{
ColumnarValue, ReturnTypeFunction, ScalarFunctionImplementation, ScalarUdf, ScalarValue,
};
use datatypes::error::Error as DataTypeError;
use datatypes::prelude::{ConcreteDataType, VectorHelper};
use datatypes::prelude::*;
use datatypes::vectors::Helper;
use snafu::ResultExt;
use crate::scalars::function::{FunctionContext, FunctionRef};
@@ -47,7 +48,7 @@ pub fn create_udf(func: FunctionRef) -> ScalarUdf {
let args: Result<Vec<_>, DataTypeError> = args
.iter()
.map(|arg| match arg {
ColumnarValue::Scalar(v) => VectorHelper::try_from_scalar_value(v.clone(), rows),
ColumnarValue::Scalar(v) => Helper::try_from_scalar_value(v.clone(), rows),
ColumnarValue::Vector(v) => Ok(v.clone()),
})
.collect();
@@ -126,12 +127,7 @@ mod tests {
assert_eq!(4, vec.len());
for i in 0..4 {
assert_eq!(
i == 0 || i == 3,
vec.get_data(i).unwrap(),
"failed at {}",
i
)
assert_eq!(i == 0 || i == 3, vec.get_data(i).unwrap(), "Failed at {i}",)
}
}
_ => unreachable!(),

View File

@@ -1,12 +1,12 @@
[package]
name = "common-grpc-expr"
version = "0.1.0"
edition = "2021"
license = "Apache-2.0"
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
api = { path = "../../api" }
async-trait = "0.1"
async-trait.workspace = true
common-base = { path = "../base" }
common-catalog = { path = "../catalog" }
common-error = { path = "../error" }

Some files were not shown because too many files have changed in this diff Show More