Compare commits

...

112 Commits

Author SHA1 Message Date
Lei, HUANG
2ed98ff558 fix: some cr comments 2024-02-20 14:10:57 +08:00
Lei, HUANG
b46386d52a feat: data buffer and related structs 2024-02-19 22:57:25 +08:00
Yingwen
43fd87e051 feat: Defines structs in the merge tree memtable (#3326)
* chore: define mods

* feat: memtable struct

* feat: define structs inside the tree
2024-02-19 11:43:19 +00:00
Zhenchi
40f43de27d fix(index): encode string type to original data to enable fst regex to work (#3324)
Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-02-19 10:52:19 +00:00
Zhenchi
4810c91a64 refactor(index): move option segment_row_count from WriteOptions to IndexOptions (#3307)
Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-02-19 08:03:41 +00:00
JeremyHi
6668d6b042 fix: split write metadata request (#3311)
* feat: add txn_helper

* fix: split the create metadata requests to avoid exceeding the txn limit

* fix: add license header

* chore: some improve
2024-02-19 07:33:09 +00:00
JeremyHi
aa569f7d6b feat: batch get physical table routes (#3319)
* feat: batch get physical table routes

* chore: by comment
2024-02-19 06:51:34 +00:00
dennis zhuang
8b73067815 feat: impl partitions and region_peers information schema (#3278)
* feat: impl partitions table

* fix: typo

* feat: impl region_peers information schema

* chore: rename region_peers to greptime_region_peers

* chore: rename statuses to upper case

* fix: comments

* chore: update partition result

* chore: remove redundant checking

* refactor: replace 42 with constant

* feat: fetch region routes in batch
2024-02-19 06:47:14 +00:00
liyang
1851c20c13 ci: add build artifacts needs in notification (#3320) 2024-02-19 06:42:09 +00:00
tison
29f11d7b7e ci: upgrade actions to avoid node16 deprecation warning (#3317)
* ci: upgrade actions to avoid node16 deprecantion warning

Signed-off-by: tison <wander4096@gmail.com>

* try delete size label action

Signed-off-by: tison <wander4096@gmail.com>

* one more action

Signed-off-by: tison <wander4096@gmail.com>

---------

Signed-off-by: tison <wander4096@gmail.com>
2024-02-18 15:20:26 +00:00
Ruihang Xia
72cd443ba3 feat: organize tracing on query path (#3310)
* feat: organize tracing on query path

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* warp json conversion to TracingContext's methods

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove unnecessary .trace()

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* Update src/query/src/dist_plan/merge_scan.rs

Co-authored-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Zhenchi <zhongzc_arch@outlook.com>
2024-02-18 15:04:57 +00:00
dependabot[bot]
df6260d525 fix: bump libgit2-sys from 0.16.1+1.7.1 to 0.16.2+1.7.2 (#3316)
build(deps): bump libgit2-sys from 0.16.1+1.7.1 to 0.16.2+1.7.2

Bumps [libgit2-sys](https://github.com/rust-lang/git2-rs) from 0.16.1+1.7.1 to 0.16.2+1.7.2.
- [Changelog](https://github.com/rust-lang/git2-rs/blob/master/CHANGELOG.md)
- [Commits](https://github.com/rust-lang/git2-rs/commits)

---
updated-dependencies:
- dependency-name: libgit2-sys
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-02-18 13:25:00 +00:00
Ruihang Xia
94fd51c263 ci: run CI jobs in draft PR (#3314)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: tison <wander4096@gmail.com>
2024-02-18 13:14:57 +00:00
liyang
3bc0c4feda ci: add release result send to slack (#3312)
* feat: add notify release result to slack

* chore: change build image output result name

---------

Co-authored-by: tison <wander4096@gmail.com>
2024-02-18 13:13:04 +00:00
tison
2a26c01412 fix: commit_short sqlness test case (#3313)
* ci: debug sqlness jobs

Signed-off-by: tison <wander4096@gmail.com>

* fix: commit_short test case

Signed-off-by: tison <wander4096@gmail.com>

* fixup! fix: commit_short test case

Signed-off-by: tison <wander4096@gmail.com>

* fixup! fixup! fix: commit_short test case

Signed-off-by: tison <wander4096@gmail.com>

* revert uploading

Signed-off-by: tison <wander4096@gmail.com>

---------

Signed-off-by: tison <wander4096@gmail.com>
2024-02-18 11:51:24 +00:00
tison
4e04a4e48f build: support build without git (#3309)
* build: support build without git

Signed-off-by: tison <wander4096@gmail.com>

* chore

Signed-off-by: tison <wander4096@gmail.com>

* address comment

Signed-off-by: tison <wander4096@gmail.com>

* fix syntax

Signed-off-by: tison <wander4096@gmail.com>

---------

Signed-off-by: tison <wander4096@gmail.com>
2024-02-18 10:30:01 +00:00
tison
b889d57b32 build(deps): Upgrade raft-engine dependency cascading (#3305)
* build(deps): Upgrade raft-engine dependency

Signed-off-by: tison <wander4096@gmail.com>

* genlock and upgrade toolchain

Signed-off-by: tison <wander4096@gmail.com>

* revert toolchain upgrade

Signed-off-by: tison <wander4096@gmail.com>

* Revert "revert toolchain upgrade"

This reverts commit 1c6dd9d7ba.

* toolchain backward and correct dep attr

Signed-off-by: tison <wander4096@gmail.com>

* clippy

Signed-off-by: tison <wander4096@gmail.com>

* revert all

Signed-off-by: tison <wander4096@gmail.com>

* redo

Signed-off-by: tison <wander4096@gmail.com>

---------

Signed-off-by: tison <wander4096@gmail.com>
2024-02-18 02:31:59 +00:00
Zhenchi
f9ce2708d3 feat(mito): add options to ignore building index for specific column ids (#3295)
Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-02-16 08:50:41 +00:00
Zhenchi
34050ea8b5 fix(index): sanitize S3 upload buffer size (#3300)
Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-02-16 06:45:31 +00:00
liyang
31ace9dd5c fix: $TARGET_BIN not found when docker run the image (#3297)
* fix: TARGET_BIN path

* chore: return available versions

* refactor: Use ENV instead of ARG in ci dockerfile

* chore: add TARGET_BIN ENV pass to ENTRYPOINT

* chore: add TARGET_BIN ENV pass to ENTRYPOINT

* chore: update entrypoint

* chore: update entrypoint
2024-02-15 11:33:05 +00:00
Cancai Cai
2a971b0fff chore: update link to official website link (#3299) 2024-02-13 13:32:46 +00:00
Ruihang Xia
2f98fa0d97 fix: correct the case sensitivity behavior for PromQL (#3296)
* fix: correct the case sensitivity behavior for PromQL

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove debug code

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* consolidate sqlness case

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* drop table

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-02-08 03:26:32 +00:00
Hudson C. Dalprá
6b4be3a1cc fix(util): join_path function should not trim leading / (#3280)
* fix(util): join_path function should not trim leading `/`

Signed-off-by: Hudson C. Dalpra <dalpra.hcd@gmail.com>

* fix(util): making required changes at join_path function

* fix(util): added unit tests to match function comments

---------

Signed-off-by: Hudson C. Dalpra <dalpra.hcd@gmail.com>
2024-02-07 10:05:04 +00:00
Zhenchi
141ed51dcc feat(mito): adjust seg size of inverted index to finer granularity instead of row group level (#3289)
* feat(mito): adjust seg size of inverted index to finer granularity instead of row group level

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: wrong metric

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: more suitable name

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* feat: BitVec instead

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-02-07 08:20:00 +00:00
dennis zhuang
e5ec65988b feat: administration functions (#3236)
* feat: adds database() function to return current db

* refactor: refactor meta src and client with new protos

* feat: impl migrate_region and query_procedure_state for procedure service/client

* fix: format

* temp commit

* feat: impl migrate_region SQL function

* chore: clean code for review

* fix: license header

* fix: toml format

* chore: update proto dependency

* chore: apply suggestion

Co-authored-by: Weny Xu <wenymedia@gmail.com>

* chore: apply suggestion

Co-authored-by: Weny Xu <wenymedia@gmail.com>

* chore: apply suggestion

Co-authored-by: JeremyHi <jiachun_feng@proton.me>

* chore: apply suggestion

Co-authored-by: fys <40801205+fengys1996@users.noreply.github.com>

* chore: print key when parsing procedure id fails

* chore: comment

* chore: comment for MigrateRegionFunction

---------

Co-authored-by: Weny Xu <wenymedia@gmail.com>
Co-authored-by: JeremyHi <jiachun_feng@proton.me>
Co-authored-by: fys <40801205+fengys1996@users.noreply.github.com>
2024-02-07 01:12:32 +00:00
Zhenchi
dbf62f3273 chore(index): add BiError to fulfil the requirement of returning two errors (#3291)
Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-02-06 16:03:03 +00:00
Ruihang Xia
e4cd294ac0 feat: put all filter exprs in a filter plan separately (#3288)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-02-06 09:32:26 +00:00
fys
74bfb09195 feat: support cache for batch_get in CachedMetaKvBackend (#3277)
* feat: support cache for batch_get in CachedMetaKvBackend.

* add doc of CachedMetaKvBackend

* fix: cr

* fix: correct some words

* fix cr
2024-02-06 03:15:03 +00:00
shuiyisong
4cbdf64d52 chore: start plugins during standalone startup & comply with current catalog while changing database (#3282)
* chore: start plugins in standalone

* chore: respect current catalog in use statement for mysql

* chore: reduce unnecessory convert to string

* chore: reduce duplicate code
2024-02-06 02:41:37 +00:00
Weny Xu
96f32a166a chore: share cache corss jobs (#3284) 2024-02-05 09:30:22 +00:00
Weny Xu
770da02810 fix: fix incorrect StatusCode parsing (#3281)
* fix: fix incorrect StatusCode parsing

* chore: apply suggestions from CR
2024-02-05 08:06:43 +00:00
discord9
c62c67cf18 feat: Basic Definitions for Expression&Functions for Dataflow (#3267)
* added expression&func

* fix: EvalError derive&imports

* chore: add header

* feat: variadic func

* chore: minor adjust

* feat: accum

* feat: use accum for eval func

* feat: montonic min/max as accumulative

* feat: support min/max Date&DateTime

* chore: fix compile error&add test(WIP)

* test: sum, count, min, max

* feat: remove trait impl for EvalError

* chore: remove all impl retain only type definitions

* refactor: nest datatypes errors

* fix: remove `.build()`

* fix: not derive Clone

* docs: add comment for types

* feat: more func&remove `CurrentDatabase`
2024-02-05 07:49:34 +00:00
Ruihang Xia
51feec2579 feat: use simple filter to prune memtable (#3269)
* switch on clippy warnings

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* feat: use simple filter to prune memtable

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove deadcode

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* refine util function

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-02-04 11:35:55 +00:00
LFC
902570abf6 ci: fix nightly build (#3276)
* ci: fix nightly build

* ci: fix nightly build
2024-02-03 03:20:47 +00:00
shuiyisong
6ab3a88042 fix: use fe_opts after setup_frontend_plugins in standalone (#3275)
* chore: modify standalone startup opts

* chore: move frontend and datanode options
2024-02-02 10:36:08 +00:00
discord9
e89f5dc908 feat: support fraction part in timestamp (#3272)
* feat: support fraction part in timestamp

* test: with timezone
2024-02-01 08:51:26 +00:00
LFC
e375060b73 refactor: add same SST files (#3270)
* Make adding same SST file multiple times possible, instead of panic there.

* Update src/mito2/src/sst/version.rs

Co-authored-by: Yingwen <realevenyag@gmail.com>

---------

Co-authored-by: Yingwen <realevenyag@gmail.com>
2024-01-31 07:21:30 +00:00
LFC
50d16d6330 ci: build GreptimeDB binary for later use (#3244)
* ci: build GreptimeDB binaries for later use

* debug CI

* try larger runner host

* Revert "try larger runner host"

This reverts commit 03c18c0f51.

* fix: resolve PR comments

* revert some unrelated action yamls

* fix CI

* use artifact upload v4 for faster upload and download speed
2024-01-31 03:02:27 +00:00
tison
60e760b168 fix: cli export database default value (#3259)
Signed-off-by: tison <wander4096@gmail.com>
2024-01-30 09:56:05 +00:00
dennis zhuang
43ef0820c0 fix: SQL insertion and column default constraint aware of timezone (#3266)
* fix: missing timezone when parsing sql value to greptimedb value

* test: sql_value_to_value

* fix: column default constraint missing timezone

* test: column def default constraint  with timezone

* test: adds sqlness test for default constraint aware of timezone

* fix: typo

* chore: comment
2024-01-30 09:15:38 +00:00
Ruihang Xia
e0e635105e feat: initial configuration for grafana dashboard (#3263)
* feat: initial configuration for grafana dashboard

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix typo

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* lift up dir

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update readme

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add paths to CI config

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* tweak config details

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add OpenDAL traffic panel

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove sync count

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* Update grafana/README.md

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-01-30 09:11:50 +00:00
discord9
0a9361a63c feat: basic types for Dataflow Framework (#3186)
* feat: basic types for Dataflow Framework

* docs: license header

* chore: add name in todo, remove deprecated code

* chore: typo

* test: linear&repr unit test

* refactor: avoid mod.rs

* feat: Relation Type

* feat: unmat funcs

* feat: simple temporal filter(sliding window)

* refactor: impl Snafu for EvalError

* feat: cast as type

* feat: temporal filter

* feat: time index in RelationType

* refactor: move EvalError to expr

* refactor: error handling for func

* chore: fmt&comment

* make EvalError pub again

* refactor: move ScalarExpr to scalar.rs

* refactor: remove mod.rs for relation

* chore: slim down PR size

* chore: license header

* chore: per review

* chore: more fix per review

* chore: even more fix per review

* chore: fmt

* chore: fmt

* feat: impl From/Into ProtoRow instead

* chore: use cast not cast_with_opt&`Key` struct

* chore: new_unchecked

* feat: `Key::subset_of` method

* chore: toml in order
2024-01-30 07:48:22 +00:00
Weny Xu
ddbd0abe3b fix(Copy From): fix incorrect type casts (#3264)
* refactor: refactor RecordBatchStreamTypeAdapter

* fix(Copy From): fix incorrect type casts

* fix: unit tests

* chore: add comment
2024-01-30 07:16:36 +00:00
Ruihang Xia
a079955d38 chore: adjust storage engine related metrics (#3261)
* chore: adjust metrics to metric engine and mito engine

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* adjust more mito bucket

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix compile

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-01-30 06:43:03 +00:00
JeremyHi
e5a2b0463a feat: Only allow inserts and deletes operations to be executed in parallel (#3257)
* feat: Only allow inserts and deletes operations to be executed in parallel.

* feat: add comment
2024-01-29 11:27:06 +00:00
Weny Xu
691b649f67 chore: switch to free machine (#3256)
* chore: switch to free machine

* chore: switch sqlness runner to 4core
2024-01-29 08:35:12 +00:00
Ruihang Xia
9a28a1eb5e fix: decouple columns in projection and prune (#3253)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-01-29 08:29:21 +00:00
LFC
fc25b7c4ff build: make binary name a Dockerfile "ARG" (#3254)
* build: make binary name a Dockerfile "ARG"

* Update Dockerfile
2024-01-29 08:00:40 +00:00
Ning Sun
d43c638515 ci: merge doc label actor and checker tasks (#3252) 2024-01-29 07:09:11 +00:00
shuiyisong
91c8c62d6f chore: adjust MySQL connection log (#3251)
* chore: adjust mysql connection log level

* chore: adjust import
2024-01-29 06:53:50 +00:00
JeremyHi
3201aea360 feat: create tables in batch on prom write (#3246)
* feat: create tables in batch on prom write

* feat: add logic table ids to log

* fix: miss tabble ids in response
2024-01-26 12:47:24 +00:00
Ruihang Xia
7da8f22cda fix: IntermediateWriter closes underlying writer twice (#3248)
* fix: IntermediateWriter closes underlying writer twice

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* close writer manually on error

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-01-26 10:03:50 +00:00
Ning Sun
14b233c486 test: align chrono and some forked test deps to upstream (#3245)
* test: update chrono and its tests

* chore: switch some deps to upstream version

* test: update timestamp range in sqlness tests
2024-01-26 07:39:51 +00:00
discord9
d5648c18c1 docs: RFC of Dataflow Framework (#3185)
* docs: RFC of Dataflow Framework

* docs: middle layer&metadata store

* chore: fix typo

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

* docs: add figure

* chore: use mermaid instead

---------

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
2024-01-26 07:13:28 +00:00
Ruihang Xia
b0c3be35fb feat: don't map semantic type in metric engine (#3243)
* feat: don't map semantic type in metric engine

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove duplicate set_null

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-01-26 03:50:05 +00:00
Ruihang Xia
5617b284c5 feat: return request outdated error on handling alter (#3239)
* feat: return request outdated error on handling alter

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix tonic code mapping

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy, add comment

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix deadloop

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update UT

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* address CR comments

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* chore: Update log message

* Update src/common/meta/src/ddl/alter_table.rs

Co-authored-by: Weny Xu <wenymedia@gmail.com>

* fix compile

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
Co-authored-by: Weny Xu <wenymedia@gmail.com>
2024-01-26 03:37:46 +00:00
Weny Xu
f99b08796d feat: add insert/select generator & translator (#3240)
* feat: add insert into expr generator & translator

* feat: add select expr generator & translator

* chore: apply suggestions from CR

* fix: fix unit tests
2024-01-26 02:59:17 +00:00
JeremyHi
1fab7ab75a feat: batch create ddl (#3194)
* feat: batch ddl to region request

* feat: return table ids

chore: by comment

chore: remove wal_options

chore: create logical tables lock key

feat: get metadata in procedure

* chore: by comment
2024-01-26 02:43:57 +00:00
Yingwen
3fa070a0cc fix: init parquet reader metrics twice (#3242) 2024-01-26 01:54:51 +00:00
Weny Xu
8bade8f8e4 fix: fix create table ddl return incorrect table id (#3232)
* fix: fix create table ddl return incorrect table id

* refactor: refactor param of Status::done_with_output
2024-01-25 13:58:43 +00:00
Wei
6c2f0c9f53 feat: read metadata from write cache (#3224)
* feat: read meta from write cache

* test: add case

* chore: cr comment

* chore: clippy

* chore: code style

* feat: put metadata to sst cache
2024-01-25 11:39:41 +00:00
LFC
2d57bf0d2a ci: adding DOCKER_BUILD_ROOT docker arg for dev build (#3241)
Update Dockerfile
2024-01-25 09:05:27 +00:00
Weny Xu
673a4bd4ef feat: add pg create alter table expr translator (#3206)
* feat: add pg create table expr translator

* feat: add pg alter table expr translator

* refactor: refactor MappedGenerator

* chore: apply suggestions from CR
2024-01-25 08:00:42 +00:00
LFC
fca44098dc ci: make git's "safe.directory" accept all (#3234)
* Update Dockerfile

* Update Dockerfile

* Update Dockerfile
2024-01-25 07:32:03 +00:00
Ning Sun
1bc4f25de2 feat: http sql api return schema on empty resultset (#3237)
* feat: return schema on empty resultset

* refactor: make schema a required field in http output

* test: update integration test and add schema output
2024-01-25 06:44:28 +00:00
Ning Sun
814924f0b6 fix: security update for shlex and h2 (#3227) 2024-01-24 13:31:34 +00:00
ZonaHe
b0a8046179 feat: update dashboard to v0.4.7 (#3229) 2024-01-24 08:50:21 +00:00
dennis zhuang
7323e9b36f feat: change Range Query’s default align behavior aware of timezone (#3219)
* feat: change Range Query’s default align behavior to 1970-01-01 00:00:00 aware of timezone

* test: test with +23:00 timezone
2024-01-24 08:17:57 +00:00
Weny Xu
f82ddc9491 fix: fix MockInstance rebuild issue (#3218)
* fix: fix MockInstance rebuild issue

* chore: apply suggestions from CR
2024-01-24 07:52:47 +00:00
Ning Sun
1711ad4631 feat: add Arrow IPC output format for http rest api (#3177)
* feat: add arrow format output for sql api

* refactor: remove unwraps

* test: add test for arrow format

* chore: update cargo toml format

* fix: resolve lint warrnings

* fix: ensure outputs size is one
2024-01-24 06:10:05 +00:00
LFC
f81e37f508 refactor: make http server built flexibly (#3225)
* refactor: make http server built flexibly

* Apply suggestions from code review

Co-authored-by: JeremyHi <jiachun_feng@proton.me>

* fix: resolve PR comments

* Fix CI.

---------

Co-authored-by: JeremyHi <jiachun_feng@proton.me>
2024-01-24 03:45:08 +00:00
Weny Xu
d75cf86467 fix: only register region keeper while creating physical table (#3223)
fix: only register region keeper during create physical table
2024-01-23 09:42:32 +00:00
Weny Xu
26535f577d feat: enable concurrent write (#3214)
* feat: enable concurrent write

* chore: apply suggestions from CR

* chore: apply suggestions from CR
2024-01-23 09:20:12 +00:00
Ruihang Xia
8485c9af33 feat: read column and region info from state cache (#3222)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-01-23 09:10:27 +00:00
Weny Xu
19413eb345 refactor!: rename initialize_region_in_background to init_regions_in_background (#3216)
refactor!: change initialize_region_in_background to init_regions_in_background
2024-01-23 04:33:48 +00:00
Weny Xu
007b63dd9d fix: fix default value cannot accept negative number (#3217)
* fix: fix default value cannot accept negative number

* chore: apply suggestions from CR
2024-01-23 03:33:13 +00:00
Weny Xu
364754afa2 feat: add create alter table expr translator (#3203)
* feat: add create table expr translator

* feat: add alter table expr translator

* refactor: expose mod

* refactor: expr generator

* chore: ignore typos check for lorem_words

* feat: add string map helper functions

* chore: remove unit tests
2024-01-23 02:53:42 +00:00
Ruihang Xia
31787f4bfd feat!: switch prom remote write to metric engine (#3198)
* feat: switch prom remote write to metric engine

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* Apply suggestions from code review

Co-authored-by: dennis zhuang <killme2008@gmail.com>

* fix compile

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* read physical table name from url

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove physical table from header

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix merge error

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix format

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add with_metric_engine option to config remote write behavior

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* check parameter

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add specific config param

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* default with_metric_engine to true

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update UT

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: dennis zhuang <killme2008@gmail.com>
2024-01-22 14:48:28 +00:00
dennis zhuang
6a12c27e78 feat: make query be aware of timezone setting (#3175)
* feat: let TypeConversionRule aware query context timezone setting

* chore: don't optimize explain command

* feat: parse string into timestamp with timezone

* fix: compile error

* chore: check the scalar value type in predicate

* chore: remove mut for engine context

* chore: return none if the scalar value is utf8 in time range predicate

* fix: some fixme

* feat: let Date and DateTime parsing from string value be aware of timezone

* chore: tweak

* test: add datetime from_str test with timezone

* feat: construct function context from query context

* test: add timezone test for to_unixtime and date_format function

* fix: typo

* chore: apply suggestion

* test: adds string with timezone

* chore: apply CR suggestion

Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>

* chore: apply suggestion

---------

Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>
2024-01-22 14:14:03 +00:00
shuiyisong
2bf4b08a6b chore: change default factor to compute memory size (#3211)
* chore: change default factor to compute memory size

* chore: update doc

* chore: update comment in example config

* chore: extract factor to const and update comments

* chore: update comment by cr suggestion

Co-authored-by: dennis zhuang <killme2008@gmail.com>

---------

Co-authored-by: dennis zhuang <killme2008@gmail.com>
2024-01-22 09:03:29 +00:00
Ruihang Xia
8cc7129397 fix: remove __name__ matcher from processed matcher list (#3213)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-01-22 08:50:28 +00:00
Ruihang Xia
278e4c8c30 feat: lazy initialize vector builder on write (#3210)
* feat: lazy initialize vector builder on write

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* avoid using ConstantVector

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* simplify expression

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* Update src/metric-engine/src/engine/create.rs

Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>
2024-01-22 07:00:04 +00:00
Weny Xu
de13de1454 feat: introduce information schema provider cache (#3208)
fix: introduce information schema provider cache
2024-01-22 06:41:39 +00:00
Lei, HUANG
3834ea7422 feat: copy database from (#3164)
* wip: impl COPY DATABASE FROM parser

* wip: impl copy database from

* wip: add some ut

* wip: add continue_on_error option

* test: add sqlness cases for copy database

* fix: trailing newline

* fix: typo

* fix: some cr comments

* chore: resolve confilicts

* fix: some cr comments
2024-01-22 06:33:54 +00:00
Weny Xu
966875ee11 chore: bump opendal to v0.44.2 (#3209) 2024-01-21 11:47:18 +00:00
Wei
e5a8831fa0 refactor: read parquet metadata (#3199)
* feat: MetadataLoader

* refactor code

* chore: clippy

* chore: cr comment

* chore: add TODO

* chore: cr comment

Co-authored-by: Zhenchi <zhongzc_arch@outlook.com>

* chore: clippy

---------

Co-authored-by: Zhenchi <zhongzc_arch@outlook.com>
2024-01-21 07:21:29 +00:00
Weny Xu
4278c858f3 feat: make procedure able to return output (#3201)
* feat: make procedure able to return output

* refactor: change Output to Any
2024-01-21 06:56:45 +00:00
Weny Xu
986f3bb07d refactor: reduce number of parquet metadata reads and enable reader buffer (#3197)
refactor: reduce reading parquet metadata times and enable read buffer
2024-01-19 12:26:38 +00:00
Weny Xu
440cd00ad0 feat(tests-fuzz): add CreateTableExprGenerator & AlterTableExprGenerator (#3182)
* feat(tests-fuzz): add CreateTableExprGenerator

* refactor: move Column to root of ir mod

* feat: add AlterTableExprGenerator

* feat: add Serialize and Deserialize derive

* chore: refactor the AlterExprGenerator
2024-01-19 09:39:28 +00:00
dennis zhuang
5e89472b2e feat: adds parse options for SQL parser (#3193)
* feat: adds parse options for parser and timezone to scan request

* chore: remove timezone in ScanRequest

* feat: remove timezone in parse options and adds type checking to parititon columns

* fix: comment

* chore: apply suggestions

Co-authored-by: Yingwen <realevenyag@gmail.com>

* fix: format

---------

Co-authored-by: Yingwen <realevenyag@gmail.com>
2024-01-19 09:16:36 +00:00
Yiran
632edd05e5 docs: update SDK links in README.md (#3156)
* docs: update SDK links in README.md

* chore: typo
2024-01-19 08:54:43 +00:00
Zhenchi
2e4c48ae7a fix(index): S3 EntityTooSmall error (#3192)
* fix(index): S3 `EntityTooSmall` error

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: config api

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-01-19 02:57:07 +00:00
Ruihang Xia
cde5a36f5e feat: precise filter for mito parquet reader (#3178)
* impl SimpleFilterEvaluator

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* time index and field filter

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* finish parquet filter

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove empty Batch

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix fmt

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix typo

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update metric

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* use projected schema from batch

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* correct naming

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove unnecessary error

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-01-18 06:59:48 +00:00
niebayes
63205907fb refactor: introduce common-wal to aggregate wal stuff (#3171)
* refactor: aggregate wal configs

* refactor: move wal options to common-wal

* chore: slim Cargo.toml

* fix: add missing crates

* fix: format

* chore: update comments

* chore: add testing feature gate for test_util

* fix: apply suggestions from code review

Co-authored-by: JeremyHi <jiachun_feng@proton.me>

* fix: apply suggestions from code review

* fix: compiling

---------

Co-authored-by: JeremyHi <jiachun_feng@proton.me>
2024-01-18 03:49:37 +00:00
Wei
3d7d2fdb4a feat: auto config cache size according to memory size (#3165)
* feat: auto config cache and buffer size according to mem size

* feat: utils

* refactor: add util function to common config

* refactor: check cgroups

* refactor: code

* fix: test

* fix: test

* chore: cr comment

Co-authored-by: Yingwen <realevenyag@gmail.com>
Co-authored-by: Dennis Zhuang <killme2008@gmail.com>

* chore: remove default comment

---------

Co-authored-by: Yingwen <realevenyag@gmail.com>
Co-authored-by: Dennis Zhuang <killme2008@gmail.com>
2024-01-17 14:35:35 +00:00
LFC
3cfd60e139 refactor: expose region edit in mito engine (#3179)
* refactor: expose region edit in mito engine

* feat: add a method for editing region directly

* fix: resolve PR comments

* Apply suggestions from code review

Co-authored-by: dennis zhuang <killme2008@gmail.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>

* fix: resolve PR comments

* fix: resolve PR comments

* fix: resolve PR comments

---------

Co-authored-by: dennis zhuang <killme2008@gmail.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
2024-01-17 14:25:08 +00:00
shuiyisong
a29b9f71be chore: carry metrics in flight metadata from datanode to frontend (#3113)
* chore: carry metrics in flight metadata from datanode to frontend

* chore: fix typo

* fix: ignore metric flight message on client

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* chore: add cr comment

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

* chore: add cr comment

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

* chore: update proto

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
2024-01-17 11:38:03 +00:00
shuiyisong
ae160c2def fix: change back GREPTIME_DB_HEADER_NAME header key name (#3184)
fix: change back dbname header key
2024-01-17 09:30:32 +00:00
Ruihang Xia
fbd0197794 refactor: remove TableEngine (#3181)
* refactor: remove TableEngine

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* Update src/table/src/table_reference.rs

Co-authored-by: LFC <990479+MichaelScofield@users.noreply.github.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: LFC <990479+MichaelScofield@users.noreply.github.com>
2024-01-17 04:18:37 +00:00
dennis zhuang
204b9433b8 feat: adds date_format function (#3167)
* feat: adds date_format function

* fix: compile error

* chore: use system timezone for FunctionContext and EvalContext

* test: as_formatted_string

* test: sqlness test

* chore: rename function
2024-01-17 03:24:40 +00:00
niebayes
d020a3db23 chore: expose promql test to distributed instance (#3176) 2024-01-17 02:44:55 +00:00
JeremyHi
c6c4ea5e64 feat: tables stream with CatalogManager (#3180)
* feat: add tables for CatalogManager

* feat: replace table with tables

* Update src/catalog/src/information_schema/columns.rs

Co-authored-by: Weny Xu <wenymedia@gmail.com>

* Update src/catalog/src/information_schema/columns.rs

Co-authored-by: Weny Xu <wenymedia@gmail.com>

* Update src/catalog/src/information_schema/tables.rs

Co-authored-by: Weny Xu <wenymedia@gmail.com>

* Update src/catalog/src/information_schema/tables.rs

Co-authored-by: Weny Xu <wenymedia@gmail.com>

* feat: tables for MemoryCatalogManager

---------

Co-authored-by: Weny Xu <wenymedia@gmail.com>
2024-01-17 02:31:15 +00:00
Weny Xu
7a1b856dfb feat: add tests-fuzz crate (#3173) 2024-01-16 09:02:09 +00:00
JeremyHi
c2edaffa5c feat: let tables API return a stream (#3170) 2024-01-15 12:36:39 +00:00
tison
189df91882 docs: Update README.md (#3168)
* docs: Update README.md

Complying with ASF policy we should refer to Apache projects in their full form in the first and most prominent usage.

* Update README.md

* Update README.md
2024-01-15 10:54:53 +00:00
tison
3ef86aac97 docs: add tracking issue for inverted-index RFC (#3169) 2024-01-15 10:54:35 +00:00
Wei
07de65d2ac test: engine with write cache (#3163)
* feat: write cache test for engine

* chore: unused

* chore: comment

* refactor: super to crate

* chore: cr comment

Co-authored-by: Yingwen <realevenyag@gmail.com>

* chore: clippy

---------

Co-authored-by: Yingwen <realevenyag@gmail.com>
2024-01-15 10:02:53 +00:00
Ning Sun
1294d6f6e1 feat: upgrade pgwire to 0.19 (#3157)
* feat: upgrade pgwire to 0.19

* fix: update pgwire to 0.19.1
2024-01-15 09:14:08 +00:00
Zhenchi
6f07d69155 feat(mito): enable inverted index (#3158)
* feat(mito): enable inverted index

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix typos

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix typos

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* accidentally resolved the incorrect filtering issue within the Metric Engine

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix test

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* Update src/mito2/src/access_layer.rs

* Update src/mito2/src/test_util/scheduler_util.rs

Co-authored-by: Yingwen <realevenyag@gmail.com>

* fix: format -> join_dir

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* refactor: move intermediate_manager from arg of write_and_upload_sst to field of WriteCache

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* refactor: add IndexerBuidler

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix clippy

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
2024-01-15 09:08:07 +00:00
WU Jingdi
816d94892c feat: support HTTP&gRPC&pg set timezone (#3125)
* feat: support HTTP&gRPC&pg set timezone

* chore: fix code advice

* chore: fix code advice
2024-01-15 06:29:31 +00:00
LFC
93f28c2a37 refactor: make grpc service able to be added dynamically (#3160) 2024-01-15 04:33:27 +00:00
Eugene Tolbakov
ca4d690424 feat: add modulo function (#3147)
* feat: add modulo function

* fix: address CR feedback
2024-01-13 00:24:25 +00:00
559 changed files with 24854 additions and 5217 deletions

View File

@@ -53,7 +53,7 @@ runs:
uses: docker/setup-buildx-action@v2 uses: docker/setup-buildx-action@v2
- name: Download amd64 artifacts - name: Download amd64 artifacts
uses: actions/download-artifact@v3 uses: actions/download-artifact@v4
with: with:
name: ${{ inputs.amd64-artifact-name }} name: ${{ inputs.amd64-artifact-name }}
@@ -66,7 +66,7 @@ runs:
mv ${{ inputs.amd64-artifact-name }} amd64 mv ${{ inputs.amd64-artifact-name }} amd64
- name: Download arm64 artifacts - name: Download arm64 artifacts
uses: actions/download-artifact@v3 uses: actions/download-artifact@v4
if: ${{ inputs.arm64-artifact-name }} if: ${{ inputs.arm64-artifact-name }}
with: with:
name: ${{ inputs.arm64-artifact-name }} name: ${{ inputs.arm64-artifact-name }}

View File

@@ -25,7 +25,7 @@ inputs:
runs: runs:
using: composite using: composite
steps: steps:
- uses: arduino/setup-protoc@v1 - uses: arduino/setup-protoc@v3
- name: Install rust toolchain - name: Install rust toolchain
uses: dtolnay/rust-toolchain@master uses: dtolnay/rust-toolchain@master
@@ -38,7 +38,7 @@ runs:
uses: Swatinem/rust-cache@v2 uses: Swatinem/rust-cache@v2
- name: Install Python - name: Install Python
uses: actions/setup-python@v4 uses: actions/setup-python@v5
with: with:
python-version: '3.10' python-version: '3.10'

View File

@@ -15,7 +15,7 @@ runs:
# |- greptime-darwin-amd64-v0.5.0.sha256sum/greptime-darwin-amd64-v0.5.0.sha256sum # |- greptime-darwin-amd64-v0.5.0.sha256sum/greptime-darwin-amd64-v0.5.0.sha256sum
# ... # ...
- name: Download artifacts - name: Download artifacts
uses: actions/download-artifact@v3 uses: actions/download-artifact@v4
- name: Create git tag for release - name: Create git tag for release
if: ${{ github.event_name != 'push' }} # Meaning this is a scheduled or manual workflow. if: ${{ github.event_name != 'push' }} # Meaning this is a scheduled or manual workflow.

View File

@@ -73,7 +73,7 @@ runs:
using: composite using: composite
steps: steps:
- name: Download artifacts - name: Download artifacts
uses: actions/download-artifact@v3 uses: actions/download-artifact@v4
with: with:
path: ${{ inputs.artifacts-dir }} path: ${{ inputs.artifacts-dir }}

View File

@@ -6,7 +6,7 @@ inputs:
required: true required: true
target-file: target-file:
description: The path of the target artifact description: The path of the target artifact
required: true required: false
version: version:
description: Version of the artifact description: Version of the artifact
required: true required: true
@@ -18,6 +18,7 @@ runs:
using: composite using: composite
steps: steps:
- name: Create artifacts directory - name: Create artifacts directory
if: ${{ inputs.target-file != '' }}
working-directory: ${{ inputs.working-dir }} working-directory: ${{ inputs.working-dir }}
shell: bash shell: bash
run: | run: |
@@ -49,15 +50,15 @@ runs:
run: Get-FileHash ${{ inputs.artifacts-dir }}.tar.gz -Algorithm SHA256 | select -ExpandProperty Hash > ${{ inputs.artifacts-dir }}.sha256sum run: Get-FileHash ${{ inputs.artifacts-dir }}.tar.gz -Algorithm SHA256 | select -ExpandProperty Hash > ${{ inputs.artifacts-dir }}.sha256sum
# Note: The artifacts will be double zip compressed(related issue: https://github.com/actions/upload-artifact/issues/39). # Note: The artifacts will be double zip compressed(related issue: https://github.com/actions/upload-artifact/issues/39).
# However, when we use 'actions/download-artifact@v3' to download the artifacts, it will be automatically unzipped. # However, when we use 'actions/download-artifact' to download the artifacts, it will be automatically unzipped.
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v3 uses: actions/upload-artifact@v4
with: with:
name: ${{ inputs.artifacts-dir }} name: ${{ inputs.artifacts-dir }}
path: ${{ inputs.working-dir }}/${{ inputs.artifacts-dir }}.tar.gz path: ${{ inputs.working-dir }}/${{ inputs.artifacts-dir }}.tar.gz
- name: Upload checksum - name: Upload checksum
uses: actions/upload-artifact@v3 uses: actions/upload-artifact@v4
with: with:
name: ${{ inputs.artifacts-dir }}.sha256sum name: ${{ inputs.artifacts-dir }}.sha256sum
path: ${{ inputs.working-dir }}/${{ inputs.artifacts-dir }}.sha256sum path: ${{ inputs.working-dir }}/${{ inputs.artifacts-dir }}.sha256sum

View File

@@ -19,8 +19,8 @@ jobs:
apidoc: apidoc:
runs-on: ubuntu-20.04 runs-on: ubuntu-20.04
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
- uses: arduino/setup-protoc@v1 - uses: arduino/setup-protoc@v3
with: with:
repo-token: ${{ secrets.GITHUB_TOKEN }} repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: dtolnay/rust-toolchain@master - uses: dtolnay/rust-toolchain@master

View File

@@ -101,7 +101,7 @@ jobs:
version: ${{ steps.create-version.outputs.version }} version: ${{ steps.create-version.outputs.version }}
steps: steps:
- name: Checkout - name: Checkout
uses: actions/checkout@v3 uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
@@ -155,12 +155,12 @@ jobs:
runs-on: ${{ needs.allocate-runners.outputs.linux-amd64-runner }} runs-on: ${{ needs.allocate-runners.outputs.linux-amd64-runner }}
steps: steps:
- name: Checkout - name: Checkout
uses: actions/checkout@v3 uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
- name: Checkout greptimedb - name: Checkout greptimedb
uses: actions/checkout@v3 uses: actions/checkout@v4
with: with:
repository: ${{ inputs.repository }} repository: ${{ inputs.repository }}
ref: ${{ inputs.commit }} ref: ${{ inputs.commit }}
@@ -184,12 +184,12 @@ jobs:
runs-on: ${{ needs.allocate-runners.outputs.linux-arm64-runner }} runs-on: ${{ needs.allocate-runners.outputs.linux-arm64-runner }}
steps: steps:
- name: Checkout - name: Checkout
uses: actions/checkout@v3 uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
- name: Checkout greptimedb - name: Checkout greptimedb
uses: actions/checkout@v3 uses: actions/checkout@v4
with: with:
repository: ${{ inputs.repository }} repository: ${{ inputs.repository }}
ref: ${{ inputs.commit }} ref: ${{ inputs.commit }}
@@ -216,7 +216,7 @@ jobs:
outputs: outputs:
build-result: ${{ steps.set-build-result.outputs.build-result }} build-result: ${{ steps.set-build-result.outputs.build-result }}
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
@@ -247,7 +247,7 @@ jobs:
runs-on: ubuntu-20.04 runs-on: ubuntu-20.04
continue-on-error: true continue-on-error: true
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
@@ -281,7 +281,7 @@ jobs:
] ]
steps: steps:
- name: Checkout - name: Checkout
uses: actions/checkout@v3 uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
@@ -306,7 +306,7 @@ jobs:
] ]
steps: steps:
- name: Checkout - name: Checkout
uses: actions/checkout@v3 uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
@@ -330,14 +330,14 @@ jobs:
env: env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }} SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }}
steps: steps:
- name: Notifiy nightly build successful result - name: Notifiy dev build successful result
uses: slackapi/slack-github-action@v1.23.0 uses: slackapi/slack-github-action@v1.23.0
if: ${{ needs.release-images-to-dockerhub.outputs.build-result == 'success' }} if: ${{ needs.release-images-to-dockerhub.outputs.build-result == 'success' }}
with: with:
payload: | payload: |
{"text": "GreptimeDB's ${{ env.NEXT_RELEASE_VERSION }} build has completed successfully."} {"text": "GreptimeDB's ${{ env.NEXT_RELEASE_VERSION }} build has completed successfully."}
- name: Notifiy nightly build failed result - name: Notifiy dev build failed result
uses: slackapi/slack-github-action@v1.23.0 uses: slackapi/slack-github-action@v1.23.0
if: ${{ needs.release-images-to-dockerhub.outputs.build-result != 'success' }} if: ${{ needs.release-images-to-dockerhub.outputs.build-result != 'success' }}
with: with:

View File

@@ -9,6 +9,7 @@ on:
- '.dockerignore' - '.dockerignore'
- 'docker/**' - 'docker/**'
- '.gitignore' - '.gitignore'
- 'grafana/**'
push: push:
branches: branches:
- main - main
@@ -19,6 +20,7 @@ on:
- '.dockerignore' - '.dockerignore'
- 'docker/**' - 'docker/**'
- '.gitignore' - '.gitignore'
- 'grafana/**'
workflow_dispatch: workflow_dispatch:
name: CI name: CI
@@ -35,20 +37,19 @@ jobs:
name: Spell Check with Typos name: Spell Check with Typos
runs-on: ubuntu-20.04 runs-on: ubuntu-20.04
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
- uses: crate-ci/typos@v1.13.10 - uses: crate-ci/typos@v1.13.10
check: check:
name: Check name: Check
if: github.event.pull_request.draft == false
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
strategy: strategy:
matrix: matrix:
os: [ windows-latest-8-cores, ubuntu-20.04 ] os: [ windows-latest, ubuntu-20.04 ]
timeout-minutes: 60 timeout-minutes: 60
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
- uses: arduino/setup-protoc@v1 - uses: arduino/setup-protoc@v3
with: with:
repo-token: ${{ secrets.GITHUB_TOKEN }} repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: dtolnay/rust-toolchain@master - uses: dtolnay/rust-toolchain@master
@@ -56,46 +57,86 @@ jobs:
toolchain: ${{ env.RUST_TOOLCHAIN }} toolchain: ${{ env.RUST_TOOLCHAIN }}
- name: Rust Cache - name: Rust Cache
uses: Swatinem/rust-cache@v2 uses: Swatinem/rust-cache@v2
with:
# Shares across multiple jobs
# Shares with `Clippy` job
shared-key: "check-lint"
- name: Run cargo check - name: Run cargo check
run: cargo check --locked --workspace --all-targets run: cargo check --locked --workspace --all-targets
toml: toml:
name: Toml Check name: Toml Check
if: github.event.pull_request.draft == false
runs-on: ubuntu-20.04 runs-on: ubuntu-20.04
timeout-minutes: 60 timeout-minutes: 60
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@master - uses: dtolnay/rust-toolchain@master
with: with:
toolchain: stable toolchain: stable
- name: Rust Cache - name: Rust Cache
uses: Swatinem/rust-cache@v2 uses: Swatinem/rust-cache@v2
with:
# Shares across multiple jobs
shared-key: "check-toml"
- name: Install taplo - name: Install taplo
run: cargo +stable install taplo-cli --version ^0.8 --locked run: cargo +stable install taplo-cli --version ^0.9 --locked
- name: Run taplo - name: Run taplo
run: taplo format --check run: taplo format --check
sqlness: build:
name: Sqlness Test name: Build GreptimeDB binaries
if: github.event.pull_request.draft == false
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
strategy: strategy:
matrix: matrix:
os: [ ubuntu-20.04-8-cores ] os: [ ubuntu-20.04 ]
timeout-minutes: 60 timeout-minutes: 60
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
- uses: arduino/setup-protoc@v1 - uses: arduino/setup-protoc@v3
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: dtolnay/rust-toolchain@master - uses: dtolnay/rust-toolchain@master
with: with:
toolchain: ${{ env.RUST_TOOLCHAIN }} toolchain: ${{ env.RUST_TOOLCHAIN }}
- name: Rust Cache - uses: Swatinem/rust-cache@v2
uses: Swatinem/rust-cache@v2 with:
# Shares across multiple jobs
shared-key: "build-binaries"
- name: Build greptime binaries
shell: bash
run: cargo build
- name: Pack greptime binaries
shell: bash
run: |
mkdir bins && \
mv ./target/debug/greptime bins && \
mv ./target/debug/sqlness-runner bins
- name: Print greptime binaries info
run: ls -lh bins
- name: Upload artifacts
uses: ./.github/actions/upload-artifacts
with:
artifacts-dir: bins
version: current
sqlness:
name: Sqlness Test
needs: build
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ ubuntu-20.04 ]
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
- name: Download pre-built binaries
uses: actions/download-artifact@v4
with:
name: bins
path: .
- name: Unzip binaries
run: tar -xvf ./bins.tar.gz
- name: Run sqlness - name: Run sqlness
run: cargo sqlness run: RUST_BACKTRACE=1 ./bins/sqlness-runner -c ./tests/cases --bins-dir ./bins
# FIXME: Logs cannot found be on failure (or even success). Need to figure out the cause.
- name: Upload sqlness logs - name: Upload sqlness logs
if: always() if: always()
uses: actions/upload-artifact@v3 uses: actions/upload-artifact@v3
@@ -106,27 +147,27 @@ jobs:
sqlness-kafka-wal: sqlness-kafka-wal:
name: Sqlness Test with Kafka Wal name: Sqlness Test with Kafka Wal
if: github.event.pull_request.draft == false needs: build
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
strategy: strategy:
matrix: matrix:
os: [ ubuntu-20.04-8-cores ] os: [ ubuntu-20.04 ]
timeout-minutes: 60 timeout-minutes: 60
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
- uses: arduino/setup-protoc@v1 - name: Download pre-built binaries
uses: actions/download-artifact@v4
with: with:
repo-token: ${{ secrets.GITHUB_TOKEN }} name: bins
- uses: dtolnay/rust-toolchain@master path: .
with: - name: Unzip binaries
toolchain: ${{ env.RUST_TOOLCHAIN }} run: tar -xvf ./bins.tar.gz
- name: Rust Cache
uses: Swatinem/rust-cache@v2
- name: Setup kafka server - name: Setup kafka server
working-directory: tests-integration/fixtures/kafka working-directory: tests-integration/fixtures/kafka
run: docker compose -f docker-compose-standalone.yml up -d --wait run: docker compose -f docker-compose-standalone.yml up -d --wait
- name: Run sqlness - name: Run sqlness
run: cargo sqlness -w kafka -k 127.0.0.1:9092 run: RUST_BACKTRACE=1 ./bins/sqlness-runner -w kafka -k 127.0.0.1:9092 -c ./tests/cases --bins-dir ./bins
# FIXME: Logs cannot be found on failure (or even success). Need to figure out the cause.
- name: Upload sqlness logs - name: Upload sqlness logs
if: always() if: always()
uses: actions/upload-artifact@v3 uses: actions/upload-artifact@v3
@@ -137,12 +178,11 @@ jobs:
fmt: fmt:
name: Rustfmt name: Rustfmt
if: github.event.pull_request.draft == false
runs-on: ubuntu-20.04 runs-on: ubuntu-20.04
timeout-minutes: 60 timeout-minutes: 60
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
- uses: arduino/setup-protoc@v1 - uses: arduino/setup-protoc@v3
with: with:
repo-token: ${{ secrets.GITHUB_TOKEN }} repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: dtolnay/rust-toolchain@master - uses: dtolnay/rust-toolchain@master
@@ -151,17 +191,19 @@ jobs:
components: rustfmt components: rustfmt
- name: Rust Cache - name: Rust Cache
uses: Swatinem/rust-cache@v2 uses: Swatinem/rust-cache@v2
with:
# Shares across multiple jobs
shared-key: "check-rust-fmt"
- name: Run cargo fmt - name: Run cargo fmt
run: cargo fmt --all -- --check run: cargo fmt --all -- --check
clippy: clippy:
name: Clippy name: Clippy
if: github.event.pull_request.draft == false
runs-on: ubuntu-20.04 runs-on: ubuntu-20.04
timeout-minutes: 60 timeout-minutes: 60
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
- uses: arduino/setup-protoc@v1 - uses: arduino/setup-protoc@v3
with: with:
repo-token: ${{ secrets.GITHUB_TOKEN }} repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: dtolnay/rust-toolchain@master - uses: dtolnay/rust-toolchain@master
@@ -170,6 +212,10 @@ jobs:
components: clippy components: clippy
- name: Rust Cache - name: Rust Cache
uses: Swatinem/rust-cache@v2 uses: Swatinem/rust-cache@v2
with:
# Shares across multiple jobs
# Shares with `Check` job
shared-key: "check-lint"
- name: Run cargo clippy - name: Run cargo clippy
run: cargo clippy --workspace --all-targets -- -D warnings run: cargo clippy --workspace --all-targets -- -D warnings
@@ -178,8 +224,8 @@ jobs:
runs-on: ubuntu-20.04-8-cores runs-on: ubuntu-20.04-8-cores
timeout-minutes: 60 timeout-minutes: 60
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
- uses: arduino/setup-protoc@v1 - uses: arduino/setup-protoc@v3
with: with:
repo-token: ${{ secrets.GITHUB_TOKEN }} repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: KyleMayes/install-llvm-action@v1 - uses: KyleMayes/install-llvm-action@v1
@@ -192,12 +238,15 @@ jobs:
components: llvm-tools-preview components: llvm-tools-preview
- name: Rust Cache - name: Rust Cache
uses: Swatinem/rust-cache@v2 uses: Swatinem/rust-cache@v2
with:
# Shares cross multiple jobs
shared-key: "coverage-test"
- name: Install latest nextest release - name: Install latest nextest release
uses: taiki-e/install-action@nextest uses: taiki-e/install-action@nextest
- name: Install cargo-llvm-cov - name: Install cargo-llvm-cov
uses: taiki-e/install-action@cargo-llvm-cov uses: taiki-e/install-action@cargo-llvm-cov
- name: Install Python - name: Install Python
uses: actions/setup-python@v4 uses: actions/setup-python@v5
with: with:
python-version: '3.10' python-version: '3.10'
- name: Install PyArrow Package - name: Install PyArrow Package

View File

@@ -14,7 +14,7 @@ jobs:
runs-on: ubuntu-20.04 runs-on: ubuntu-20.04
steps: steps:
- name: create an issue in doc repo - name: create an issue in doc repo
uses: dacbd/create-issue-action@main uses: dacbd/create-issue-action@v1.2.1
with: with:
owner: GreptimeTeam owner: GreptimeTeam
repo: docs repo: docs
@@ -28,7 +28,7 @@ jobs:
runs-on: ubuntu-20.04 runs-on: ubuntu-20.04
steps: steps:
- name: create an issue in cloud repo - name: create an issue in cloud repo
uses: dacbd/create-issue-action@main uses: dacbd/create-issue-action@v1.2.1
with: with:
owner: GreptimeTeam owner: GreptimeTeam
repo: greptimedb-cloud repo: greptimedb-cloud

View File

@@ -12,14 +12,14 @@ jobs:
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }} if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: github/issue-labeler@v3.3 - uses: github/issue-labeler@v3.4
with: with:
configuration-path: .github/doc-label-config.yml configuration-path: .github/doc-label-config.yml
enable-versioned-regex: false enable-versioned-regex: false
repo-token: ${{ secrets.GITHUB_TOKEN }} repo-token: ${{ secrets.GITHUB_TOKEN }}
sync-labels: 1 sync-labels: 1
- name: create an issue in doc repo - name: create an issue in doc repo
uses: dacbd/create-issue-action@main uses: dacbd/create-issue-action@v1.2.1
if: ${{ github.event.action == 'opened' && contains(github.event.pull_request.body, '- [ ] This PR does not require documentation updates.') }} if: ${{ github.event.action == 'opened' && contains(github.event.pull_request.body, '- [ ] This PR does not require documentation updates.') }}
with: with:
owner: GreptimeTeam owner: GreptimeTeam
@@ -29,3 +29,8 @@ jobs:
body: | body: |
A document change request is generated from A document change request is generated from
${{ github.event.issue.html_url || github.event.pull_request.html_url }} ${{ github.event.issue.html_url || github.event.pull_request.html_url }}
- name: Check doc labels
uses: docker://agilepathway/pull-request-label-checker:latest
with:
one_of: Doc update required,Doc not needed
repo_token: ${{ secrets.GITHUB_TOKEN }}

View File

@@ -9,6 +9,7 @@ on:
- '.dockerignore' - '.dockerignore'
- 'docker/**' - 'docker/**'
- '.gitignore' - '.gitignore'
- 'grafana/**'
push: push:
branches: branches:
- main - main
@@ -19,6 +20,7 @@ on:
- '.dockerignore' - '.dockerignore'
- 'docker/**' - 'docker/**'
- '.gitignore' - '.gitignore'
- 'grafana/**'
workflow_dispatch: workflow_dispatch:
name: CI name: CI
@@ -31,39 +33,34 @@ jobs:
name: Spell Check with Typos name: Spell Check with Typos
runs-on: ubuntu-20.04 runs-on: ubuntu-20.04
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
- uses: crate-ci/typos@v1.13.10 - uses: crate-ci/typos@v1.13.10
check: check:
name: Check name: Check
if: github.event.pull_request.draft == false
runs-on: ubuntu-20.04 runs-on: ubuntu-20.04
steps: steps:
- run: 'echo "No action required"' - run: 'echo "No action required"'
fmt: fmt:
name: Rustfmt name: Rustfmt
if: github.event.pull_request.draft == false
runs-on: ubuntu-20.04 runs-on: ubuntu-20.04
steps: steps:
- run: 'echo "No action required"' - run: 'echo "No action required"'
clippy: clippy:
name: Clippy name: Clippy
if: github.event.pull_request.draft == false
runs-on: ubuntu-20.04 runs-on: ubuntu-20.04
steps: steps:
- run: 'echo "No action required"' - run: 'echo "No action required"'
coverage: coverage:
if: github.event.pull_request.draft == false
runs-on: ubuntu-20.04 runs-on: ubuntu-20.04
steps: steps:
- run: 'echo "No action required"' - run: 'echo "No action required"'
sqlness: sqlness:
name: Sqlness Test name: Sqlness Test
if: github.event.pull_request.draft == false
runs-on: ubuntu-20.04 runs-on: ubuntu-20.04
steps: steps:
- run: 'echo "No action required"' - run: 'echo "No action required"'

View File

@@ -11,6 +11,6 @@ jobs:
runs-on: ubuntu-20.04 runs-on: ubuntu-20.04
name: license-header-check name: license-header-check
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v4
- name: Check License Header - name: Check License Header
uses: korandoru/hawkeye@v3 uses: korandoru/hawkeye@v4

View File

@@ -85,7 +85,7 @@ jobs:
version: ${{ steps.create-version.outputs.version }} version: ${{ steps.create-version.outputs.version }}
steps: steps:
- name: Checkout - name: Checkout
uses: actions/checkout@v3 uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
@@ -137,7 +137,7 @@ jobs:
] ]
runs-on: ${{ needs.allocate-runners.outputs.linux-amd64-runner }} runs-on: ${{ needs.allocate-runners.outputs.linux-amd64-runner }}
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
@@ -156,7 +156,7 @@ jobs:
] ]
runs-on: ${{ needs.allocate-runners.outputs.linux-arm64-runner }} runs-on: ${{ needs.allocate-runners.outputs.linux-arm64-runner }}
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
@@ -179,7 +179,7 @@ jobs:
outputs: outputs:
nightly-build-result: ${{ steps.set-nightly-build-result.outputs.nightly-build-result }} nightly-build-result: ${{ steps.set-nightly-build-result.outputs.nightly-build-result }}
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
@@ -211,7 +211,7 @@ jobs:
# The ACR have daily sync with DockerHub, so don't worry about the image not being updated. # The ACR have daily sync with DockerHub, so don't worry about the image not being updated.
continue-on-error: true continue-on-error: true
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
@@ -245,7 +245,7 @@ jobs:
] ]
steps: steps:
- name: Checkout - name: Checkout
uses: actions/checkout@v3 uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
@@ -270,7 +270,7 @@ jobs:
] ]
steps: steps:
- name: Checkout - name: Checkout
uses: actions/checkout@v3 uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0

View File

@@ -24,8 +24,8 @@ jobs:
os: [ windows-latest-8-cores ] os: [ windows-latest-8-cores ]
timeout-minutes: 60 timeout-minutes: 60
steps: steps:
- uses: actions/checkout@v4.1.0 - uses: actions/checkout@v4
- uses: arduino/setup-protoc@v1 - uses: arduino/setup-protoc@v3
with: with:
repo-token: ${{ secrets.GITHUB_TOKEN }} repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: dtolnay/rust-toolchain@master - uses: dtolnay/rust-toolchain@master
@@ -57,8 +57,8 @@ jobs:
timeout-minutes: 60 timeout-minutes: 60
steps: steps:
- run: git config --global core.autocrlf false - run: git config --global core.autocrlf false
- uses: actions/checkout@v4.1.0 - uses: actions/checkout@v4
- uses: arduino/setup-protoc@v1 - uses: arduino/setup-protoc@v3
with: with:
repo-token: ${{ secrets.GITHUB_TOKEN }} repo-token: ${{ secrets.GITHUB_TOKEN }}
- name: Install Rust toolchain - name: Install Rust toolchain
@@ -71,7 +71,7 @@ jobs:
- name: Install Cargo Nextest - name: Install Cargo Nextest
uses: taiki-e/install-action@nextest uses: taiki-e/install-action@nextest
- name: Install Python - name: Install Python
uses: actions/setup-python@v4 uses: actions/setup-python@v5
with: with:
python-version: '3.10' python-version: '3.10'
- name: Install PyArrow Package - name: Install PyArrow Package

View File

@@ -13,7 +13,7 @@ jobs:
runs-on: ubuntu-22.04 runs-on: ubuntu-22.04
steps: steps:
- name: Checkout - name: Checkout
uses: actions/checkout@v3 uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0

View File

@@ -13,7 +13,7 @@ jobs:
runs-on: ubuntu-20.04 runs-on: ubuntu-20.04
timeout-minutes: 10 timeout-minutes: 10
steps: steps:
- uses: thehanimo/pr-title-checker@v1.3.4 - uses: thehanimo/pr-title-checker@v1.4.2
with: with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
pass_on_octokit_error: false pass_on_octokit_error: false
@@ -22,7 +22,7 @@ jobs:
runs-on: ubuntu-20.04 runs-on: ubuntu-20.04
timeout-minutes: 10 timeout-minutes: 10
steps: steps:
- uses: thehanimo/pr-title-checker@v1.3.4 - uses: thehanimo/pr-title-checker@v1.4.2
with: with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
pass_on_octokit_error: false pass_on_octokit_error: false

View File

@@ -30,7 +30,7 @@ jobs:
runs-on: ubuntu-20.04-16-cores runs-on: ubuntu-20.04-16-cores
steps: steps:
- name: Checkout - name: Checkout
uses: actions/checkout@v3 uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0

View File

@@ -114,7 +114,7 @@ jobs:
version: ${{ steps.create-version.outputs.version }} version: ${{ steps.create-version.outputs.version }}
steps: steps:
- name: Checkout - name: Checkout
uses: actions/checkout@v3 uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
@@ -168,7 +168,7 @@ jobs:
] ]
runs-on: ${{ needs.allocate-runners.outputs.linux-amd64-runner }} runs-on: ${{ needs.allocate-runners.outputs.linux-amd64-runner }}
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
@@ -187,7 +187,7 @@ jobs:
] ]
runs-on: ${{ needs.allocate-runners.outputs.linux-arm64-runner }} runs-on: ${{ needs.allocate-runners.outputs.linux-arm64-runner }}
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
@@ -226,7 +226,7 @@ jobs:
] ]
if: ${{ inputs.build_macos_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }} if: ${{ inputs.build_macos_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
@@ -240,6 +240,11 @@ jobs:
disable-run-tests: ${{ env.DISABLE_RUN_TESTS }} disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
artifacts-dir: ${{ matrix.artifacts-dir-prefix }}-${{ needs.allocate-runners.outputs.version }} artifacts-dir: ${{ matrix.artifacts-dir-prefix }}-${{ needs.allocate-runners.outputs.version }}
- name: Set build macos result
id: set-build-macos-result
run: |
echo "build-macos-result=success" >> $GITHUB_OUTPUT
build-windows-artifacts: build-windows-artifacts:
name: Build Windows artifacts name: Build Windows artifacts
strategy: strategy:
@@ -262,7 +267,7 @@ jobs:
steps: steps:
- run: git config --global core.autocrlf false - run: git config --global core.autocrlf false
- uses: actions/checkout@v3 - uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
@@ -276,6 +281,11 @@ jobs:
disable-run-tests: ${{ env.DISABLE_RUN_TESTS }} disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
artifacts-dir: ${{ matrix.artifacts-dir-prefix }}-${{ needs.allocate-runners.outputs.version }} artifacts-dir: ${{ matrix.artifacts-dir-prefix }}-${{ needs.allocate-runners.outputs.version }}
- name: Set build windows result
id: set-build-windows-result
run: |
echo "build-windows-result=success" >> $GITHUB_OUTPUT
release-images-to-dockerhub: release-images-to-dockerhub:
name: Build and push images to DockerHub name: Build and push images to DockerHub
if: ${{ inputs.release_images || github.event_name == 'push' || github.event_name == 'schedule' }} if: ${{ inputs.release_images || github.event_name == 'push' || github.event_name == 'schedule' }}
@@ -286,7 +296,7 @@ jobs:
] ]
runs-on: ubuntu-2004-16-cores runs-on: ubuntu-2004-16-cores
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
@@ -299,6 +309,11 @@ jobs:
image-registry-password: ${{ secrets.DOCKERHUB_TOKEN }} image-registry-password: ${{ secrets.DOCKERHUB_TOKEN }}
version: ${{ needs.allocate-runners.outputs.version }} version: ${{ needs.allocate-runners.outputs.version }}
- name: Set build image result
id: set-image-build-result
run: |
echo "build-image-result=success" >> $GITHUB_OUTPUT
release-cn-artifacts: release-cn-artifacts:
name: Release artifacts to CN region name: Release artifacts to CN region
if: ${{ inputs.release_images || github.event_name == 'push' || github.event_name == 'schedule' }} if: ${{ inputs.release_images || github.event_name == 'push' || github.event_name == 'schedule' }}
@@ -316,7 +331,7 @@ jobs:
# The ACR have daily sync with DockerHub, so don't worry about the image not being updated. # The ACR have daily sync with DockerHub, so don't worry about the image not being updated.
continue-on-error: true continue-on-error: true
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
@@ -352,7 +367,7 @@ jobs:
] ]
runs-on: ubuntu-20.04 runs-on: ubuntu-20.04
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
@@ -375,7 +390,7 @@ jobs:
] ]
steps: steps:
- name: Checkout - name: Checkout
uses: actions/checkout@v3 uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
@@ -400,7 +415,7 @@ jobs:
] ]
steps: steps:
- name: Checkout - name: Checkout
uses: actions/checkout@v3 uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
@@ -413,3 +428,29 @@ jobs:
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.EC2_RUNNER_REGION }} aws-region: ${{ vars.EC2_RUNNER_REGION }}
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
notification:
if: ${{ always() }} # Not requiring successful dependent jobs, always run.
name: Send notification to Greptime team
needs: [
release-images-to-dockerhub,
build-macos-artifacts,
build-windows-artifacts,
]
runs-on: ubuntu-20.04
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }}
steps:
- name: Notifiy release successful result
uses: slackapi/slack-github-action@v1.25.0
if: ${{ needs.release-images-to-dockerhub.outputs.build-image-result == 'success' && needs.build-windows-artifacts.outputs.build-windows-result == 'success' && needs.build-macos-artifacts.outputs.build-macos-result == 'success' }}
with:
payload: |
{"text": "GreptimeDB's release version has completed successfully."}
- name: Notifiy release failed result
uses: slackapi/slack-github-action@v1.25.0
if: ${{ needs.release-images-to-dockerhub.outputs.build-image-result != 'success' || needs.build-windows-artifacts.outputs.build-windows-result != 'success' || needs.build-macos-artifacts.outputs.build-macos-result != 'success' }}
with:
payload: |
{"text": "GreptimeDB's release version has failed, please check 'https://github.com/GreptimeTeam/greptimedb/actions/workflows/release.yml'."}

View File

@@ -1,25 +0,0 @@
name: size-labeler
on: [pull_request_target]
jobs:
labeler:
runs-on: ubuntu-latest
name: Label the PR size
permissions:
issues: write
pull-requests: write
steps:
- uses: codelytv/pr-size-labeler@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
s_label: 'Size: S'
s_max_size: '100'
m_label: 'Size: M'
m_max_size: '500'
l_label: 'Size: L'
l_max_size: '1000'
xl_label: 'Size: XL'
fail_if_xl: 'false'
message_if_xl: ""
files_to_ignore: 'Cargo.lock'

View File

@@ -1,19 +0,0 @@
name: Check user doc labels
on:
pull_request:
types:
- opened
- reopened
- labeled
- unlabeled
jobs:
check_labels:
name: Check doc labels
runs-on: ubuntu-latest
steps:
- uses: docker://agilepathway/pull-request-label-checker:latest
with:
one_of: Doc update required,Doc not needed
repo_token: ${{ secrets.GITHUB_TOKEN }}

804
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -29,9 +29,11 @@ members = [
"src/common/time", "src/common/time",
"src/common/decimal", "src/common/decimal",
"src/common/version", "src/common/version",
"src/common/wal",
"src/datanode", "src/datanode",
"src/datatypes", "src/datatypes",
"src/file-engine", "src/file-engine",
"src/flow",
"src/frontend", "src/frontend",
"src/log-store", "src/log-store",
"src/meta-client", "src/meta-client",
@@ -52,6 +54,7 @@ members = [
"src/store-api", "src/store-api",
"src/table", "src/table",
"src/index", "src/index",
"tests-fuzz",
"tests-integration", "tests-integration",
"tests/runner", "tests/runner",
] ]
@@ -68,15 +71,18 @@ aquamarine = "0.3"
arrow = { version = "47.0" } arrow = { version = "47.0" }
arrow-array = "47.0" arrow-array = "47.0"
arrow-flight = "47.0" arrow-flight = "47.0"
arrow-ipc = "47.0"
arrow-schema = { version = "47.0", features = ["serde"] } arrow-schema = { version = "47.0", features = ["serde"] }
async-stream = "0.3" async-stream = "0.3"
async-trait = "0.1" async-trait = "0.1"
axum = { version = "0.6", features = ["headers"] }
base64 = "0.21" base64 = "0.21"
bigdecimal = "0.4.2" bigdecimal = "0.4.2"
bitflags = "2.4.1" bitflags = "2.4.1"
bytemuck = "1.12" bytemuck = "1.12"
bytes = { version = "1.5", features = ["serde"] } bytes = { version = "1.5", features = ["serde"] }
chrono = { version = "0.4", features = ["serde"] } chrono = { version = "0.4", features = ["serde"] }
clap = { version = "4.4", features = ["derive"] }
dashmap = "5.4" dashmap = "5.4"
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" } datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" } datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
@@ -90,13 +96,14 @@ etcd-client = "0.12"
fst = "0.4.7" fst = "0.4.7"
futures = "0.3" futures = "0.3"
futures-util = "0.3" futures-util = "0.3"
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "a31ea166fc015ea7ff111ac94e26c3a5d64364d2" } greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "96f1f0404f421ee560a4310c73c5071e49168168" }
humantime-serde = "1.1" humantime-serde = "1.1"
itertools = "0.10" itertools = "0.10"
lazy_static = "1.4" lazy_static = "1.4"
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "abbd357c1e193cd270ea65ee7652334a150b628f" } meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "abbd357c1e193cd270ea65ee7652334a150b628f" }
mockall = "0.11.4" mockall = "0.11.4"
moka = "0.12" moka = "0.12"
num_cpus = "1.16"
once_cell = "1.18" once_cell = "1.18"
opentelemetry-proto = { git = "https://github.com/waynexia/opentelemetry-rust.git", rev = "33841b38dda79b15f2024952be5f32533325ca02", features = [ opentelemetry-proto = { git = "https://github.com/waynexia/opentelemetry-rust.git", rev = "33841b38dda79b15f2024952be5f32533325ca02", features = [
"gen-tonic", "gen-tonic",
@@ -108,7 +115,7 @@ paste = "1.0"
pin-project = "1.0" pin-project = "1.0"
prometheus = { version = "0.13.3", features = ["process"] } prometheus = { version = "0.13.3", features = ["process"] }
prost = "0.12" prost = "0.12"
raft-engine = { git = "https://github.com/tikv/raft-engine.git", rev = "22dfb426cd994602b57725ef080287d3e53db479" } raft-engine = { version = "0.4.1", default-features = false }
rand = "0.8" rand = "0.8"
regex = "1.8" regex = "1.8"
regex-automata = { version = "0.2", features = ["transducer"] } regex-automata = { version = "0.2", features = ["transducer"] }
@@ -121,8 +128,10 @@ rskafka = "0.5"
rust_decimal = "1.33" rust_decimal = "1.33"
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0" serde_json = "1.0"
serde_with = "3"
smallvec = { version = "1", features = ["serde"] } smallvec = { version = "1", features = ["serde"] }
snafu = "0.7" snafu = "0.7"
sysinfo = "0.30"
# on branch v0.38.x # on branch v0.38.x
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "6a93567ae38d42be5c8d08b13c8ff4dde26502ef", features = [ sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "6a93567ae38d42be5c8d08b13c8ff4dde26502ef", features = [
"visitor", "visitor",
@@ -155,7 +164,6 @@ common-grpc-expr = { path = "src/common/grpc-expr" }
common-macro = { path = "src/common/macro" } common-macro = { path = "src/common/macro" }
common-mem-prof = { path = "src/common/mem-prof" } common-mem-prof = { path = "src/common/mem-prof" }
common-meta = { path = "src/common/meta" } common-meta = { path = "src/common/meta" }
common-pprof = { path = "src/common/pprof" }
common-procedure = { path = "src/common/procedure" } common-procedure = { path = "src/common/procedure" }
common-procedure-test = { path = "src/common/procedure-test" } common-procedure-test = { path = "src/common/procedure-test" }
common-query = { path = "src/common/query" } common-query = { path = "src/common/query" }
@@ -165,6 +173,7 @@ common-telemetry = { path = "src/common/telemetry" }
common-test-util = { path = "src/common/test-util" } common-test-util = { path = "src/common/test-util" }
common-time = { path = "src/common/time" } common-time = { path = "src/common/time" }
common-version = { path = "src/common/version" } common-version = { path = "src/common/version" }
common-wal = { path = "src/common/wal" }
datanode = { path = "src/datanode" } datanode = { path = "src/datanode" }
datatypes = { path = "src/datatypes" } datatypes = { path = "src/datatypes" }
file-engine = { path = "src/file-engine" } file-engine = { path = "src/file-engine" }

View File

@@ -65,7 +65,7 @@ endif
build: ## Build debug version greptime. build: ## Build debug version greptime.
cargo ${CARGO_EXTENSION} build ${CARGO_BUILD_OPTS} cargo ${CARGO_EXTENSION} build ${CARGO_BUILD_OPTS}
.POHNY: build-by-dev-builder .PHONY: build-by-dev-builder
build-by-dev-builder: ## Build greptime by dev-builder. build-by-dev-builder: ## Build greptime by dev-builder.
docker run --network=host \ docker run --network=host \
-v ${PWD}:/greptimedb -v ${CARGO_REGISTRY_CACHE}:/root/.cargo/registry \ -v ${PWD}:/greptimedb -v ${CARGO_REGISTRY_CACHE}:/root/.cargo/registry \
@@ -144,11 +144,12 @@ multi-platform-buildx: ## Create buildx multi-platform builder.
docker buildx inspect ${BUILDX_BUILDER_NAME} || docker buildx create --name ${BUILDX_BUILDER_NAME} --driver docker-container --bootstrap --use docker buildx inspect ${BUILDX_BUILDER_NAME} || docker buildx create --name ${BUILDX_BUILDER_NAME} --driver docker-container --bootstrap --use
##@ Test ##@ Test
.PHONY: test
test: nextest ## Run unit and integration tests. test: nextest ## Run unit and integration tests.
cargo nextest run ${NEXTEST_OPTS} cargo nextest run ${NEXTEST_OPTS}
.PHONY: nextest ## Install nextest tools. .PHONY: nextest
nextest: nextest: ## Install nextest tools.
cargo --list | grep nextest || cargo install cargo-nextest --locked cargo --list | grep nextest || cargo install cargo-nextest --locked
.PHONY: sqlness-test .PHONY: sqlness-test

View File

@@ -127,12 +127,16 @@ To write and query data, GreptimeDB is compatible with multiple [protocols and c
- [GreptimeDB C++ Client](https://github.com/GreptimeTeam/greptimedb-client-cpp) - [GreptimeDB C++ Client](https://github.com/GreptimeTeam/greptimedb-client-cpp)
- [GreptimeDB Erlang Client](https://github.com/GreptimeTeam/greptimedb-client-erl) - [GreptimeDB Erlang Client](https://github.com/GreptimeTeam/greptimedb-client-erl)
- [GreptimeDB Go Client](https://github.com/GreptimeTeam/greptimedb-client-go) - [GreptimeDB Go Ingester](https://github.com/GreptimeTeam/greptimedb-ingester-go)
- [GreptimeDB Java Client](https://github.com/GreptimeTeam/greptimedb-client-java) - [GreptimeDB Java Ingester](https://github.com/GreptimeTeam/greptimedb-ingester-java)
- [GreptimeDB Python Client](https://github.com/GreptimeTeam/greptimedb-client-py) (WIP) - [GreptimeDB Python Client](https://github.com/GreptimeTeam/greptimedb-client-py) (WIP)
- [GreptimeDB Rust Client](https://github.com/GreptimeTeam/greptimedb-client-rust) - [GreptimeDB Rust Client](https://github.com/GreptimeTeam/greptimedb-client-rust)
- [GreptimeDB JavaScript Client](https://github.com/GreptimeTeam/greptime-js-sdk) - [GreptimeDB JavaScript Client](https://github.com/GreptimeTeam/greptime-js-sdk)
### Grafana Dashboard
Our official Grafana dashboard is available at [grafana](./grafana/README.md) directory.
## Project Status ## Project Status
This project is in its early stage and under heavy development. We move fast and This project is in its early stage and under heavy development. We move fast and
@@ -161,18 +165,17 @@ In addition, you may:
## License ## License
GreptimeDB uses the [Apache 2.0 license][1] to strike a balance between GreptimeDB uses the [Apache License 2.0](https://apache.org/licenses/LICENSE-2.0.txt) to strike a balance between
open contributions and allowing you to use the software however you want. open contributions and allowing you to use the software however you want.
[1]: <https://github.com/greptimeTeam/greptimedb/blob/main/LICENSE>
## Contributing ## Contributing
Please refer to [contribution guidelines](CONTRIBUTING.md) for more information. Please refer to [contribution guidelines](CONTRIBUTING.md) for more information.
## Acknowledgement ## Acknowledgement
- GreptimeDB uses [Apache Arrow](https://arrow.apache.org/) as the memory model and [Apache Parquet](https://parquet.apache.org/) as the persistent file format.
- GreptimeDB's query engine is powered by [Apache Arrow DataFusion](https://github.com/apache/arrow-datafusion). - GreptimeDB uses [Apache Arrow™](https://arrow.apache.org/) as the memory model and [Apache Parquet™](https://parquet.apache.org/) as the persistent file format.
- [Apache OpenDAL (incubating)](https://opendal.apache.org) gives GreptimeDB a very general and elegant data access abstraction layer. - GreptimeDB's query engine is powered by [Apache Arrow DataFusion™](https://arrow.apache.org/datafusion/).
- [Apache OpenDAL™](https://opendal.apache.org) gives GreptimeDB a very general and elegant data access abstraction layer.
- GreptimeDB's meta service is based on [etcd](https://etcd.io/). - GreptimeDB's meta service is based on [etcd](https://etcd.io/).
- GreptimeDB uses [RustPython](https://github.com/RustPython/RustPython) for experimental embedded python scripting. - GreptimeDB uses [RustPython](https://github.com/RustPython/RustPython) for experimental embedded python scripting.

View File

@@ -7,7 +7,7 @@ license.workspace = true
[dependencies] [dependencies]
arrow.workspace = true arrow.workspace = true
chrono.workspace = true chrono.workspace = true
clap = { version = "4.0", features = ["derive"] } clap.workspace = true
client.workspace = true client.workspace = true
futures-util.workspace = true futures-util.workspace = true
indicatif = "0.17.1" indicatif = "0.17.1"

View File

@@ -258,7 +258,7 @@ fn create_table_expr(table_name: &str) -> CreateTableExpr {
catalog_name: CATALOG_NAME.to_string(), catalog_name: CATALOG_NAME.to_string(),
schema_name: SCHEMA_NAME.to_string(), schema_name: SCHEMA_NAME.to_string(),
table_name: table_name.to_string(), table_name: table_name.to_string(),
desc: "".to_string(), desc: String::default(),
column_defs: vec![ column_defs: vec![
ColumnDef { ColumnDef {
name: "VendorID".to_string(), name: "VendorID".to_string(),

View File

@@ -14,7 +14,7 @@ require_lease_before_startup = false
# Initialize all regions in the background during the startup. # Initialize all regions in the background during the startup.
# By default, it provides services after all regions have been initialized. # By default, it provides services after all regions have been initialized.
initialize_region_in_background = false init_regions_in_background = false
[heartbeat] [heartbeat]
# Interval for sending heartbeat messages to the Metasrv, 3 seconds by default. # Interval for sending heartbeat messages to the Metasrv, 3 seconds by default.
@@ -94,15 +94,18 @@ compress_manifest = false
max_background_jobs = 4 max_background_jobs = 4
# Interval to auto flush a region if it has not flushed yet. # Interval to auto flush a region if it has not flushed yet.
auto_flush_interval = "1h" auto_flush_interval = "1h"
# Global write buffer size for all regions. # Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB.
global_write_buffer_size = "1GB" global_write_buffer_size = "1GB"
# Global write buffer size threshold to reject write requests (default 2G). # Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size`
global_write_buffer_reject_size = "2GB" global_write_buffer_reject_size = "2GB"
# Cache size for SST metadata (default 128MB). Setting it to 0 to disable the cache. # Cache size for SST metadata. Setting it to 0 to disable the cache.
# If not set, it's default to 1/32 of OS memory with a max limitation of 128MB.
sst_meta_cache_size = "128MB" sst_meta_cache_size = "128MB"
# Cache size for vectors and arrow arrays (default 512MB). Setting it to 0 to disable the cache. # Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.
# If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
vector_cache_size = "512MB" vector_cache_size = "512MB"
# Cache size for pages of SST row groups (default 512MB). Setting it to 0 to disable the cache. # Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
# If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
page_cache_size = "512MB" page_cache_size = "512MB"
# Buffer size for SST writing. # Buffer size for SST writing.
sst_write_buffer_size = "8MB" sst_write_buffer_size = "8MB"
@@ -116,6 +119,25 @@ parallel_scan_channel_size = 32
# Whether to allow stale WAL entries read during replay. # Whether to allow stale WAL entries read during replay.
allow_stale_entries = false allow_stale_entries = false
[region_engine.mito.inverted_index]
# Whether to create the index on flush.
# - "auto": automatically
# - "disable": never
create_on_flush = "auto"
# Whether to create the index on compaction.
# - "auto": automatically
# - "disable": never
create_on_compaction = "auto"
# Whether to apply the index on query
# - "auto": automatically
# - "disable": never
apply_on_query = "auto"
# Memory threshold for performing an external sort during index creation.
# Setting to empty will disable external sorting, forcing all sorting operations to happen in memory.
mem_threshold_on_create = "64MB"
# File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`).
intermediate_path = ""
# Log options, see `standalone.example.toml` # Log options, see `standalone.example.toml`
# [logging] # [logging]
# dir = "/tmp/greptimedb/logs" # dir = "/tmp/greptimedb/logs"

View File

@@ -57,6 +57,9 @@ enable = true
# Prometheus remote storage options, see `standalone.example.toml`. # Prometheus remote storage options, see `standalone.example.toml`.
[prom_store] [prom_store]
enable = true enable = true
# Whether to store the data from Prometheus remote write in metric engine.
# true by default
with_metric_engine = true
# Metasrv client options, see `datanode.example.toml`. # Metasrv client options, see `datanode.example.toml`.
[meta_client] [meta_client]
@@ -66,6 +69,13 @@ timeout = "3s"
ddl_timeout = "10s" ddl_timeout = "10s"
connect_timeout = "1s" connect_timeout = "1s"
tcp_nodelay = true tcp_nodelay = true
# The configuration about the cache of the Metadata.
# default: 100000
metadata_cache_max_capacity = 100000
# default: 10m
metadata_cache_ttl = "10m"
# default: 5m
metadata_cache_tti = "5m"
# Log options, see `standalone.example.toml` # Log options, see `standalone.example.toml`
# [logging] # [logging]

View File

@@ -81,6 +81,9 @@ enable = true
[prom_store] [prom_store]
# Whether to enable Prometheus remote write and read in HTTP API, true by default. # Whether to enable Prometheus remote write and read in HTTP API, true by default.
enable = true enable = true
# Whether to store the data from Prometheus remote write in metric engine.
# true by default
with_metric_engine = true
[wal] [wal]
# Available wal providers: # Available wal providers:
@@ -88,7 +91,25 @@ enable = true
# - "kafka" # - "kafka"
provider = "raft_engine" provider = "raft_engine"
# There're none raft-engine wal config since meta srv only involves in remote wal currently. # Raft-engine wal options.
# WAL data directory
# dir = "/tmp/greptimedb/wal"
# WAL file size in bytes.
file_size = "256MB"
# WAL purge threshold.
purge_threshold = "4GB"
# WAL purge interval in seconds.
purge_interval = "10m"
# WAL read batch size.
read_batch_size = 128
# Whether to sync log file after every write.
sync_write = false
# Whether to reuse logically truncated log files.
enable_log_recycle = true
# Whether to pre-create log files on start up
prefill_log_files = false
# Duration for fsyncing log files.
sync_period = "1000ms"
# Kafka wal options. # Kafka wal options.
# The broker endpoints of the Kafka cluster. ["127.0.0.1:9092"] by default. # The broker endpoints of the Kafka cluster. ["127.0.0.1:9092"] by default.
@@ -125,25 +146,6 @@ provider = "raft_engine"
# The deadline of retries. # The deadline of retries.
# backoff_deadline = "5mins" # backoff_deadline = "5mins"
# WAL data directory
# dir = "/tmp/greptimedb/wal"
# WAL file size in bytes.
file_size = "256MB"
# WAL purge threshold.
purge_threshold = "4GB"
# WAL purge interval in seconds.
purge_interval = "10m"
# WAL read batch size.
read_batch_size = 128
# Whether to sync log file after every write.
sync_write = false
# Whether to reuse logically truncated log files.
enable_log_recycle = true
# Whether to pre-create log files on start up
prefill_log_files = false
# Duration for fsyncing log files.
sync_period = "1000ms"
# Metadata storage options. # Metadata storage options.
[metadata_store] [metadata_store]
# Kv file size in bytes. # Kv file size in bytes.
@@ -194,15 +196,18 @@ compress_manifest = false
max_background_jobs = 4 max_background_jobs = 4
# Interval to auto flush a region if it has not flushed yet. # Interval to auto flush a region if it has not flushed yet.
auto_flush_interval = "1h" auto_flush_interval = "1h"
# Global write buffer size for all regions. # Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB.
global_write_buffer_size = "1GB" global_write_buffer_size = "1GB"
# Global write buffer size threshold to reject write requests (default 2G). # Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size`
global_write_buffer_reject_size = "2GB" global_write_buffer_reject_size = "2GB"
# Cache size for SST metadata (default 128MB). Setting it to 0 to disable the cache. # Cache size for SST metadata. Setting it to 0 to disable the cache.
# If not set, it's default to 1/32 of OS memory with a max limitation of 128MB.
sst_meta_cache_size = "128MB" sst_meta_cache_size = "128MB"
# Cache size for vectors and arrow arrays (default 512MB). Setting it to 0 to disable the cache. # Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.
# If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
vector_cache_size = "512MB" vector_cache_size = "512MB"
# Cache size for pages of SST row groups (default 512MB). Setting it to 0 to disable the cache. # Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
# If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
page_cache_size = "512MB" page_cache_size = "512MB"
# Buffer size for SST writing. # Buffer size for SST writing.
sst_write_buffer_size = "8MB" sst_write_buffer_size = "8MB"
@@ -216,6 +221,25 @@ parallel_scan_channel_size = 32
# Whether to allow stale WAL entries read during replay. # Whether to allow stale WAL entries read during replay.
allow_stale_entries = false allow_stale_entries = false
[region_engine.mito.inverted_index]
# Whether to create the index on flush.
# - "auto": automatically
# - "disable": never
create_on_flush = "auto"
# Whether to create the index on compaction.
# - "auto": automatically
# - "disable": never
create_on_compaction = "auto"
# Whether to apply the index on query
# - "auto": automatically
# - "disable": never
apply_on_query = "auto"
# Memory threshold for performing an external sort during index creation.
# Setting to empty will disable external sorting, forcing all sorting operations to happen in memory.
mem_threshold_on_create = "64M"
# File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`).
intermediate_path = ""
# Log options # Log options
# [logging] # [logging]
# Specify logs directory. # Specify logs directory.

View File

@@ -1,5 +1,11 @@
FROM ubuntu:22.04 FROM ubuntu:22.04
# The root path under which contains all the dependencies to build this Dockerfile.
ARG DOCKER_BUILD_ROOT=.
# The binary name of GreptimeDB executable.
# Defaults to "greptime", but sometimes in other projects it might be different.
ARG TARGET_BIN=greptime
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
ca-certificates \ ca-certificates \
python3.10 \ python3.10 \
@@ -7,14 +13,16 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
python3-pip \ python3-pip \
curl curl
COPY ./docker/python/requirements.txt /etc/greptime/requirements.txt COPY $DOCKER_BUILD_ROOT/docker/python/requirements.txt /etc/greptime/requirements.txt
RUN python3 -m pip install -r /etc/greptime/requirements.txt RUN python3 -m pip install -r /etc/greptime/requirements.txt
ARG TARGETARCH ARG TARGETARCH
ADD $TARGETARCH/greptime /greptime/bin/ ADD $TARGETARCH/$TARGET_BIN /greptime/bin/
ENV PATH /greptime/bin/:$PATH ENV PATH /greptime/bin/:$PATH
ENTRYPOINT ["greptime"] ENV TARGET_BIN=$TARGET_BIN
ENTRYPOINT ["sh", "-c", "exec $TARGET_BIN \"$@\"", "--"]

View File

@@ -1,5 +1,8 @@
FROM ubuntu:20.04 FROM ubuntu:20.04
# The root path under which contains all the dependencies to build this Dockerfile.
ARG DOCKER_BUILD_ROOT=.
ENV LANG en_US.utf8 ENV LANG en_US.utf8
WORKDIR /greptimedb WORKDIR /greptimedb
@@ -27,10 +30,20 @@ RUN apt-get -y purge python3.8 && \
ln -s /usr/bin/python3.10 /usr/bin/python3 && \ ln -s /usr/bin/python3.10 /usr/bin/python3 && \
curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10
RUN git config --global --add safe.directory /greptimedb # Silence all `safe.directory` warnings, to avoid the "detect dubious repository" error when building with submodules.
# Disabling the safe directory check here won't pose extra security issues, because in our usage for this dev build
# image, we use it solely on our own environment (that github action's VM, or ECS created dynamically by ourselves),
# and the repositories are pulled from trusted sources (still us, of course). Doing so does not violate the intention
# of the Git's addition to the "safe.directory" at the first place (see the commit message here:
# https://github.com/git/git/commit/8959555cee7ec045958f9b6dd62e541affb7e7d9).
# There's also another solution to this, that we add the desired submodules to the safe directory, instead of using
# wildcard here. However, that requires the git's config files and the submodules all owned by the very same user.
# It's troublesome to do this since the dev build runs in Docker, which is under user "root"; while outside the Docker,
# it can be a different user that have prepared the submodules.
RUN git config --global --add safe.directory *
# Install Python dependencies. # Install Python dependencies.
COPY ./docker/python/requirements.txt /etc/greptime/requirements.txt COPY $DOCKER_BUILD_ROOT/docker/python/requirements.txt /etc/greptime/requirements.txt
RUN python3 -m pip install -r /etc/greptime/requirements.txt RUN python3 -m pip install -r /etc/greptime/requirements.txt
# Install Rust. # Install Rust.

View File

@@ -1,6 +1,6 @@
--- ---
Feature Name: Inverted Index for SST File Feature Name: Inverted Index for SST File
Tracking Issue: TBD Tracking Issue: https://github.com/GreptimeTeam/greptimedb/issues/2705
Date: 2023-11-03 Date: 2023-11-03
Author: "Zhong Zhenchi <zhongzc_arch@outlook.com>" Author: "Zhong Zhenchi <zhongzc_arch@outlook.com>"
--- ---

View File

@@ -0,0 +1,97 @@
---
Feature Name: Dataflow Framework
Tracking Issue: https://github.com/GreptimeTeam/greptimedb/issues/3187
Date: 2024-01-17
Author: "Discord9 <discord9@163.com>"
---
# Summary
This RFC proposes a Lightweight Module for executing continuous aggregation queries on a stream of data.
# Motivation
Being able to do continuous aggregation is a very powerful tool. It allows you to do things like:
1. downsample data from i.e. 1 milliseconds to 1 second
2. calculate the average of a stream of data
3. Keeping a sliding window of data in memory
In order to do those things while maintaining a low memory footprint, you need to be able to manage the data in a smart way. Hence, we only store necessary data in memory, and send/recv data deltas to/from the client.
# Details
## System boundary / What it's and isn't
- GreptimeFlow provides a way to perform continuous aggregation over time-series data.
- It's not a complete streaming-processing system. Only a must subset functionalities are provided.
- Flow can process a configured range of fresh data. Data exceeding this range will be dropped directly. Thus it cannot handle random datasets (random on timestamp).
- Both sliding windows (e.g., latest 5m from present) and fixed windows (every 5m from some time) are supported. And these two are the major targeting scenarios.
- Flow can handle most aggregate operators within one table(i.e. Sum, avg, min, max and comparison operators). But others (join, trigger, txn etc.) are not the target feature.
## Framework
- Greptime Flow's is built on top of [Hydroflow](https://github.com/hydro-project/hydroflow).
- We have three choices for the Dataflow/Streaming process framework for our simple continuous aggregation feature:
1. Based on the timely/differential dataflow crate that [materialize](https://github.com/MaterializeInc/materialize) based on. Later, it's proved too obscure for a simple usage, and is hard to customize memory usage control.
2. Based on a simple dataflow framework that we write from ground up, like what [arroyo](https://www.arroyo.dev/) or [risingwave](https://www.risingwave.dev/) did, for example the core streaming logic of [arroyo](https://github.com/ArroyoSystems/arroyo/blob/master/arroyo-datastream/src/lib.rs) only takes up to 2000 line of codes. However, it means maintaining another layer of dataflow framework, which might seem easy in the beginning, but I fear it might be too burdensome to maintain once we need more features.
3. Based on a simple and lower level dataflow framework that someone else write, like [hydroflow](https://github.com/hydro-project/hydroflow), this approach combines the best of both worlds. Firstly, it boasts ease of comprehension and customization. Secondly, the dataflow framework offers precisely the necessary features for crafting uncomplicated single-node dataflow programs while delivering decent performance.
Hence, we choose the third option, and use a simple logical plan that's anagonistic to the underlying dataflow framework, as it only describe how the dataflow graph should be doing, not how it do that. And we built operator in hydroflow to execute the plan. And the result hydroflow graph is wrapped in a engine that only support data in/out and tick event to flush and compute the result. This provide a thin middle layer that's easy to maintain and allow switching to other dataflow framework if necessary.
## Deploy mode and protocol
- Greptime Flow is an independent streaming compute component. It can be used either within a standalone node or as a dedicated node at the same level as frontend in distributed mode.
- It accepts insert request Rows, which is used between frontend and datanode.
- New flow job is submitted in the format of modified SQL query like snowflake do, like: `CREATE TASK avg_over_5m WINDOW_SIZE = "5m" AS SELECT avg(value) FROM table WHERE time > now() - 5m GROUP BY time(1m)`. Flow job then got stored in MetaSrv.
- It also persists results in the format of Rows to frontend.
- The query plan uses Substrait as codec format. It's the same with GreptimeDB's query engine.
- Greptime Flow needs a WAL for recovering. It's possible to reuse datanode's.
The workflow is shown in the following diagram
```mermaid
graph TB
subgraph Flownode["Flownode"]
subgraph Dataflows
df1("Dataflow_1")
df2("Dataflow_2")
end
end
subgraph Frontend["Frontend"]
newLines["Mirror Insert
Create Task From Query
Write result from flow node"]
end
subgraph Datanode["Datanode"]
end
User --> Frontend
Frontend -->|Register Task| Metasrv
Metasrv -->|Read Task Metadata| Frontend
Frontend -->|Create Task| Flownode
Frontend -->|Mirror Insert| Flownode
Flownode -->|Write back| Frontend
Frontend --> Datanode
Datanode --> Frontend
```
## Lifecycle of data
- New data is inserted into frontend like before. Frontend will mirror insert request to Flow node if there is configured flow job.
- Depending on the timestamp of incoming data, flow will either drop it (outdated data) or process it (fresh data).
- Greptime Flow will periodically write results back to the result table through frontend.
- Those result will then be written into a result table stored in datanode.
- A small table of intermediate state is kept in memory, which is used to calculate the result.
## Supported operations
- Greptime Flow accepts a configurable "materialize window", data point exceeds that time window is discarded.
- Data within that "materialize window" is queryable and updateable.
- Greptime Flow can handle partitioning, if and only if the input query can be transformed to a fully partitioned plan according to the existing commutative rules. Otherwise the corresponding flow job has to be calculated in a single node.
- Notice that Greptime Flow has to see all the data belongs to one partition.
- Deletion and duplicate insertion are not supported at early stage.
## Miscellaneous
- Greptime Flow can translate SQL to it's own plan, however only a selected few aggregate function is supported for now, like min/max/sum/count/avg
- Greptime Flow's operator is configurable in terms of the size of the materialize window, whether to allow delay of incoming data etc., so simplest operator can choose to not tolerate any delay to save memory.
# Future Work
- Support UDF that can do one-to-one mapping. Preferably, we can reuse the UDF mechanism in GreptimeDB.
- Support join operator.
- Design syntax for config operator for different materialize window and delay tolerance.
- Support cross partition merge operator that allows complex query plan that not necessary accord with partitioning rule to communicate between nodes and create final materialize result.
- Duplicate insertion, which can be reverted easily within the current framework, so supporting it could be easy
- Deletion within "materialize window", this requires operators like min/max to store all inputs within materialize window, which might require further optimization.

10
grafana/README.md Normal file
View File

@@ -0,0 +1,10 @@
Grafana dashboard for GreptimeDB
--------------------------------
GreptimeDB's official Grafana dashboard.
Status notify: we are still working on this config. It's expected to change frequently in the recent days. Please feel free to submit your feedback and/or contribution to this dashboard 🤗
# How to use
Open Grafana Dashboard page, choose `New` -> `Import`. And upload `greptimedb.json` file.

2399
grafana/greptimedb.json Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -10,11 +10,10 @@ testing = []
[dependencies] [dependencies]
api.workspace = true api.workspace = true
arc-swap = "1.0" arc-swap = "1.0"
arrow-schema.workspace = true
arrow.workspace = true arrow.workspace = true
arrow-schema.workspace = true
async-stream.workspace = true async-stream.workspace = true
async-trait = "0.1" async-trait = "0.1"
build-data = "0.1"
common-catalog.workspace = true common-catalog.workspace = true
common-error.workspace = true common-error.workspace = true
common-grpc.workspace = true common-grpc.workspace = true
@@ -25,6 +24,7 @@ common-recordbatch.workspace = true
common-runtime.workspace = true common-runtime.workspace = true
common-telemetry.workspace = true common-telemetry.workspace = true
common-time.workspace = true common-time.workspace = true
common-version.workspace = true
dashmap.workspace = true dashmap.workspace = true
datafusion.workspace = true datafusion.workspace = true
datatypes.workspace = true datatypes.workspace = true
@@ -33,7 +33,7 @@ futures-util.workspace = true
itertools.workspace = true itertools.workspace = true
lazy_static.workspace = true lazy_static.workspace = true
meta-client.workspace = true meta-client.workspace = true
moka = { workspace = true, features = ["future"] } moka = { workspace = true, features = ["future", "sync"] }
parking_lot = "0.12" parking_lot = "0.12"
partition.workspace = true partition.workspace = true
paste = "1.0" paste = "1.0"

View File

@@ -41,6 +41,14 @@ pub enum Error {
source: BoxedError, source: BoxedError,
}, },
#[snafu(display("Failed to list {}.{}'s tables", catalog, schema))]
ListTables {
location: Location,
catalog: String,
schema: String,
source: BoxedError,
},
#[snafu(display("Failed to re-compile script due to internal error"))] #[snafu(display("Failed to re-compile script due to internal error"))]
CompileScriptInternal { CompileScriptInternal {
location: Location, location: Location,
@@ -156,6 +164,15 @@ pub enum Error {
location: Location, location: Location,
}, },
#[snafu(display("Failed to find table partitions: #{table}"))]
FindPartitions {
source: partition::error::Error,
table: String,
},
#[snafu(display("Failed to find region routes"))]
FindRegionRoutes { source: partition::error::Error },
#[snafu(display("Failed to read system catalog table records"))] #[snafu(display("Failed to read system catalog table records"))]
ReadSystemCatalog { ReadSystemCatalog {
location: Location, location: Location,
@@ -246,11 +263,14 @@ impl ErrorExt for Error {
match self { match self {
Error::InvalidKey { .. } Error::InvalidKey { .. }
| Error::SchemaNotFound { .. } | Error::SchemaNotFound { .. }
| Error::TableNotFound { .. }
| Error::CatalogNotFound { .. } | Error::CatalogNotFound { .. }
| Error::FindPartitions { .. }
| Error::FindRegionRoutes { .. }
| Error::InvalidEntryType { .. } | Error::InvalidEntryType { .. }
| Error::ParallelOpenTable { .. } => StatusCode::Unexpected, | Error::ParallelOpenTable { .. } => StatusCode::Unexpected,
Error::TableNotFound { .. } => StatusCode::TableNotFound,
Error::SystemCatalog { .. } Error::SystemCatalog { .. }
| Error::EmptyValue { .. } | Error::EmptyValue { .. }
| Error::ValueDeserialize { .. } => StatusCode::StorageUnavailable, | Error::ValueDeserialize { .. } => StatusCode::StorageUnavailable,
@@ -270,9 +290,9 @@ impl ErrorExt for Error {
StatusCode::InvalidArguments StatusCode::InvalidArguments
} }
Error::ListCatalogs { source, .. } | Error::ListSchemas { source, .. } => { Error::ListCatalogs { source, .. }
source.status_code() | Error::ListSchemas { source, .. }
} | Error::ListTables { source, .. } => source.status_code(),
Error::OpenSystemCatalog { source, .. } Error::OpenSystemCatalog { source, .. }
| Error::CreateSystemCatalog { source, .. } | Error::CreateSystemCatalog { source, .. }
@@ -333,7 +353,7 @@ mod tests {
assert_eq!( assert_eq!(
StatusCode::StorageUnavailable, StatusCode::StorageUnavailable,
Error::SystemCatalog { Error::SystemCatalog {
msg: "".to_string(), msg: String::default(),
location: Location::generate(), location: Location::generate(),
} }
.status_code() .status_code()

View File

@@ -15,7 +15,9 @@
mod columns; mod columns;
mod key_column_usage; mod key_column_usage;
mod memory_table; mod memory_table;
mod partitions;
mod predicate; mod predicate;
mod region_peers;
mod runtime_metrics; mod runtime_metrics;
mod schemata; mod schemata;
mod table_names; mod table_names;
@@ -47,6 +49,8 @@ use self::columns::InformationSchemaColumns;
use crate::error::Result; use crate::error::Result;
use crate::information_schema::key_column_usage::InformationSchemaKeyColumnUsage; use crate::information_schema::key_column_usage::InformationSchemaKeyColumnUsage;
use crate::information_schema::memory_table::{get_schema_columns, MemoryTable}; use crate::information_schema::memory_table::{get_schema_columns, MemoryTable};
use crate::information_schema::partitions::InformationSchemaPartitions;
use crate::information_schema::region_peers::InformationSchemaRegionPeers;
use crate::information_schema::runtime_metrics::InformationSchemaMetrics; use crate::information_schema::runtime_metrics::InformationSchemaMetrics;
use crate::information_schema::schemata::InformationSchemaSchemata; use crate::information_schema::schemata::InformationSchemaSchemata;
use crate::information_schema::tables::InformationSchemaTables; use crate::information_schema::tables::InformationSchemaTables;
@@ -74,6 +78,7 @@ lazy_static! {
TRIGGERS, TRIGGERS,
GLOBAL_STATUS, GLOBAL_STATUS,
SESSION_STATUS, SESSION_STATUS,
PARTITIONS,
]; ];
} }
@@ -156,6 +161,10 @@ impl InformationSchemaProvider {
BUILD_INFO.to_string(), BUILD_INFO.to_string(),
self.build_table(BUILD_INFO).unwrap(), self.build_table(BUILD_INFO).unwrap(),
); );
tables.insert(
REGION_PEERS.to_string(),
self.build_table(REGION_PEERS).unwrap(),
);
} }
tables.insert(TABLES.to_string(), self.build_table(TABLES).unwrap()); tables.insert(TABLES.to_string(), self.build_table(TABLES).unwrap());
@@ -226,6 +235,14 @@ impl InformationSchemaProvider {
self.catalog_manager.clone(), self.catalog_manager.clone(),
)) as _), )) as _),
RUNTIME_METRICS => Some(Arc::new(InformationSchemaMetrics::new())), RUNTIME_METRICS => Some(Arc::new(InformationSchemaMetrics::new())),
PARTITIONS => Some(Arc::new(InformationSchemaPartitions::new(
self.catalog_name.clone(),
self.catalog_manager.clone(),
)) as _),
REGION_PEERS => Some(Arc::new(InformationSchemaRegionPeers::new(
self.catalog_name.clone(),
self.catalog_manager.clone(),
)) as _),
_ => None, _ => None,
} }
} }
@@ -312,6 +329,7 @@ impl DataSource for InformationTableDataSource {
schema: projected_schema, schema: projected_schema,
stream: Box::pin(stream), stream: Box::pin(stream),
output_ordering: None, output_ordering: None,
metrics: Default::default(),
}; };
Ok(Box::pin(stream)) Ok(Box::pin(stream))

View File

@@ -31,6 +31,7 @@ use datatypes::scalars::ScalarVectorBuilder;
use datatypes::schema::{ColumnSchema, Schema, SchemaRef}; use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::value::Value; use datatypes::value::Value;
use datatypes::vectors::{StringVectorBuilder, VectorRef}; use datatypes::vectors::{StringVectorBuilder, VectorRef};
use futures::TryStreamExt;
use snafu::{OptionExt, ResultExt}; use snafu::{OptionExt, ResultExt};
use store_api::storage::{ScanRequest, TableId}; use store_api::storage::{ScanRequest, TableId};
@@ -57,6 +58,7 @@ const COLUMN_DEFAULT: &str = "column_default";
const IS_NULLABLE: &str = "is_nullable"; const IS_NULLABLE: &str = "is_nullable";
const COLUMN_TYPE: &str = "column_type"; const COLUMN_TYPE: &str = "column_type";
const COLUMN_COMMENT: &str = "column_comment"; const COLUMN_COMMENT: &str = "column_comment";
const INIT_CAPACITY: usize = 42;
impl InformationSchemaColumns { impl InformationSchemaColumns {
pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self { pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
@@ -153,16 +155,16 @@ impl InformationSchemaColumnsBuilder {
schema, schema,
catalog_name, catalog_name,
catalog_manager, catalog_manager,
catalog_names: StringVectorBuilder::with_capacity(42), catalog_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
schema_names: StringVectorBuilder::with_capacity(42), schema_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
table_names: StringVectorBuilder::with_capacity(42), table_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
column_names: StringVectorBuilder::with_capacity(42), column_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
data_types: StringVectorBuilder::with_capacity(42), data_types: StringVectorBuilder::with_capacity(INIT_CAPACITY),
semantic_types: StringVectorBuilder::with_capacity(42), semantic_types: StringVectorBuilder::with_capacity(INIT_CAPACITY),
column_defaults: StringVectorBuilder::with_capacity(42), column_defaults: StringVectorBuilder::with_capacity(INIT_CAPACITY),
is_nullables: StringVectorBuilder::with_capacity(42), is_nullables: StringVectorBuilder::with_capacity(INIT_CAPACITY),
column_types: StringVectorBuilder::with_capacity(42), column_types: StringVectorBuilder::with_capacity(INIT_CAPACITY),
column_comments: StringVectorBuilder::with_capacity(42), column_comments: StringVectorBuilder::with_capacity(INIT_CAPACITY),
} }
} }
@@ -176,21 +178,9 @@ impl InformationSchemaColumnsBuilder {
let predicates = Predicates::from_scan_request(&request); let predicates = Predicates::from_scan_request(&request);
for schema_name in catalog_manager.schema_names(&catalog_name).await? { for schema_name in catalog_manager.schema_names(&catalog_name).await? {
if !catalog_manager let mut stream = catalog_manager.tables(&catalog_name, &schema_name).await;
.schema_exists(&catalog_name, &schema_name)
.await?
{
continue;
}
for table_name in catalog_manager while let Some(table) = stream.try_next().await? {
.table_names(&catalog_name, &schema_name)
.await?
{
if let Some(table) = catalog_manager
.table(&catalog_name, &schema_name, &table_name)
.await?
{
let keys = &table.table_info().meta.primary_key_indices; let keys = &table.table_info().meta.primary_key_indices;
let schema = table.schema(); let schema = table.schema();
@@ -207,14 +197,11 @@ impl InformationSchemaColumnsBuilder {
&predicates, &predicates,
&catalog_name, &catalog_name,
&schema_name, &schema_name,
&table_name, &table.table_info().name,
semantic_type, semantic_type,
column, column,
); );
} }
} else {
unreachable!();
}
} }
} }

View File

@@ -23,10 +23,10 @@ use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter; use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream; use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream; use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef}; use datatypes::prelude::{ConcreteDataType, MutableVector, ScalarVectorBuilder, VectorRef};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef}; use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::value::Value; use datatypes::value::Value;
use datatypes::vectors::{StringVectorBuilder, UInt32VectorBuilder}; use datatypes::vectors::{ConstantVector, StringVector, StringVectorBuilder, UInt32VectorBuilder};
use snafu::{OptionExt, ResultExt}; use snafu::{OptionExt, ResultExt};
use store_api::storage::{ScanRequest, TableId}; use store_api::storage::{ScanRequest, TableId};
@@ -44,6 +44,7 @@ const TABLE_SCHEMA: &str = "table_schema";
const TABLE_NAME: &str = "table_name"; const TABLE_NAME: &str = "table_name";
const COLUMN_NAME: &str = "column_name"; const COLUMN_NAME: &str = "column_name";
const ORDINAL_POSITION: &str = "ordinal_position"; const ORDINAL_POSITION: &str = "ordinal_position";
const INIT_CAPACITY: usize = 42;
/// The virtual table implementation for `information_schema.KEY_COLUMN_USAGE`. /// The virtual table implementation for `information_schema.KEY_COLUMN_USAGE`.
pub(super) struct InformationSchemaKeyColumnUsage { pub(super) struct InformationSchemaKeyColumnUsage {
@@ -162,9 +163,6 @@ struct InformationSchemaKeyColumnUsageBuilder {
column_name: StringVectorBuilder, column_name: StringVectorBuilder,
ordinal_position: UInt32VectorBuilder, ordinal_position: UInt32VectorBuilder,
position_in_unique_constraint: UInt32VectorBuilder, position_in_unique_constraint: UInt32VectorBuilder,
referenced_table_schema: StringVectorBuilder,
referenced_table_name: StringVectorBuilder,
referenced_column_name: StringVectorBuilder,
} }
impl InformationSchemaKeyColumnUsageBuilder { impl InformationSchemaKeyColumnUsageBuilder {
@@ -177,18 +175,15 @@ impl InformationSchemaKeyColumnUsageBuilder {
schema, schema,
catalog_name, catalog_name,
catalog_manager, catalog_manager,
constraint_catalog: StringVectorBuilder::with_capacity(42), constraint_catalog: StringVectorBuilder::with_capacity(INIT_CAPACITY),
constraint_schema: StringVectorBuilder::with_capacity(42), constraint_schema: StringVectorBuilder::with_capacity(INIT_CAPACITY),
constraint_name: StringVectorBuilder::with_capacity(42), constraint_name: StringVectorBuilder::with_capacity(INIT_CAPACITY),
table_catalog: StringVectorBuilder::with_capacity(42), table_catalog: StringVectorBuilder::with_capacity(INIT_CAPACITY),
table_schema: StringVectorBuilder::with_capacity(42), table_schema: StringVectorBuilder::with_capacity(INIT_CAPACITY),
table_name: StringVectorBuilder::with_capacity(42), table_name: StringVectorBuilder::with_capacity(INIT_CAPACITY),
column_name: StringVectorBuilder::with_capacity(42), column_name: StringVectorBuilder::with_capacity(INIT_CAPACITY),
ordinal_position: UInt32VectorBuilder::with_capacity(42), ordinal_position: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
position_in_unique_constraint: UInt32VectorBuilder::with_capacity(42), position_in_unique_constraint: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
referenced_table_schema: StringVectorBuilder::with_capacity(42),
referenced_table_name: StringVectorBuilder::with_capacity(42),
referenced_column_name: StringVectorBuilder::with_capacity(42),
} }
} }
@@ -301,12 +296,15 @@ impl InformationSchemaKeyColumnUsageBuilder {
self.column_name.push(Some(column_name)); self.column_name.push(Some(column_name));
self.ordinal_position.push(Some(ordinal_position)); self.ordinal_position.push(Some(ordinal_position));
self.position_in_unique_constraint.push(None); self.position_in_unique_constraint.push(None);
self.referenced_table_schema.push(None);
self.referenced_table_name.push(None);
self.referenced_column_name.push(None);
} }
fn finish(&mut self) -> Result<RecordBatch> { fn finish(&mut self) -> Result<RecordBatch> {
let rows_num = self.table_catalog.len();
let null_string_vector = Arc::new(ConstantVector::new(
Arc::new(StringVector::from(vec![None as Option<&str>])),
rows_num,
));
let columns: Vec<VectorRef> = vec![ let columns: Vec<VectorRef> = vec![
Arc::new(self.constraint_catalog.finish()), Arc::new(self.constraint_catalog.finish()),
Arc::new(self.constraint_schema.finish()), Arc::new(self.constraint_schema.finish()),
@@ -317,9 +315,9 @@ impl InformationSchemaKeyColumnUsageBuilder {
Arc::new(self.column_name.finish()), Arc::new(self.column_name.finish()),
Arc::new(self.ordinal_position.finish()), Arc::new(self.ordinal_position.finish()),
Arc::new(self.position_in_unique_constraint.finish()), Arc::new(self.position_in_unique_constraint.finish()),
Arc::new(self.referenced_table_schema.finish()), null_string_vector.clone(),
Arc::new(self.referenced_table_name.finish()), null_string_vector.clone(),
Arc::new(self.referenced_column_name.finish()), null_string_vector,
]; ];
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu) RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
} }

View File

@@ -21,8 +21,6 @@ use datatypes::vectors::{Int64Vector, StringVector};
use crate::information_schema::table_names::*; use crate::information_schema::table_names::*;
const UNKNOWN: &str = "unknown";
/// Find the schema and columns by the table_name, only valid for memory tables. /// Find the schema and columns by the table_name, only valid for memory tables.
/// Safety: the user MUST ensure the table schema exists, panic otherwise. /// Safety: the user MUST ensure the table schema exists, panic otherwise.
pub fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>) { pub fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>) {
@@ -72,7 +70,9 @@ pub fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>) {
], ],
), ),
BUILD_INFO => ( BUILD_INFO => {
let build_info = common_version::build_info();
(
string_columns(&[ string_columns(&[
"GIT_BRANCH", "GIT_BRANCH",
"GIT_COMMIT", "GIT_COMMIT",
@@ -81,21 +81,16 @@ pub fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>) {
"PKG_VERSION", "PKG_VERSION",
]), ]),
vec![ vec![
Arc::new(StringVector::from(vec![ Arc::new(StringVector::from(vec![build_info.branch.to_string()])),
build_data::get_git_branch().unwrap_or_else(|_| UNKNOWN.to_string()) Arc::new(StringVector::from(vec![build_info.commit.to_string()])),
])), Arc::new(StringVector::from(vec![build_info
Arc::new(StringVector::from(vec![ .commit_short
build_data::get_git_commit().unwrap_or_else(|_| UNKNOWN.to_string()) .to_string()])),
])), Arc::new(StringVector::from(vec![build_info.dirty.to_string()])),
Arc::new(StringVector::from(vec![ Arc::new(StringVector::from(vec![build_info.version.to_string()])),
build_data::get_git_commit_short().unwrap_or_else(|_| UNKNOWN.to_string())
])),
Arc::new(StringVector::from(vec![
build_data::get_git_dirty().map_or(UNKNOWN.to_string(), |v| v.to_string())
])),
Arc::new(StringVector::from(vec![option_env!("CARGO_PKG_VERSION")])),
], ],
), )
}
CHARACTER_SETS => ( CHARACTER_SETS => (
vec![ vec![

View File

@@ -0,0 +1,399 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::{Arc, Weak};
use arrow_schema::SchemaRef as ArrowSchemaRef;
use common_catalog::consts::INFORMATION_SCHEMA_PARTITIONS_TABLE_ID;
use common_error::ext::BoxedError;
use common_query::physical_plan::TaskContext;
use common_recordbatch::adapter::RecordBatchStreamAdapter;
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
use common_time::datetime::DateTime;
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::value::Value;
use datatypes::vectors::{
ConstantVector, DateTimeVector, DateTimeVectorBuilder, Int64Vector, Int64VectorBuilder,
MutableVector, StringVector, StringVectorBuilder, UInt64VectorBuilder,
};
use futures::TryStreamExt;
use partition::manager::PartitionInfo;
use partition::partition::PartitionDef;
use snafu::{OptionExt, ResultExt};
use store_api::storage::{RegionId, ScanRequest, TableId};
use table::metadata::{TableInfo, TableType};
use super::PARTITIONS;
use crate::error::{
CreateRecordBatchSnafu, FindPartitionsSnafu, InternalSnafu, Result,
UpgradeWeakCatalogManagerRefSnafu,
};
use crate::information_schema::{InformationTable, Predicates};
use crate::kvbackend::KvBackendCatalogManager;
use crate::CatalogManager;
const TABLE_CATALOG: &str = "table_catalog";
const TABLE_SCHEMA: &str = "table_schema";
const TABLE_NAME: &str = "table_name";
const PARTITION_NAME: &str = "partition_name";
const PARTITION_EXPRESSION: &str = "partition_expression";
/// The region id
const GREPTIME_PARTITION_ID: &str = "greptime_partition_id";
const INIT_CAPACITY: usize = 42;
/// The `PARTITIONS` table provides information about partitioned tables.
/// See https://dev.mysql.com/doc/refman/8.0/en/information-schema-partitions-table.html
/// We provide an extral column `greptime_partition_id` for GreptimeDB region id.
pub(super) struct InformationSchemaPartitions {
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
}
impl InformationSchemaPartitions {
pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
Self {
schema: Self::schema(),
catalog_name,
catalog_manager,
}
}
pub(crate) fn schema() -> SchemaRef {
Arc::new(Schema::new(vec![
ColumnSchema::new(TABLE_CATALOG, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(TABLE_SCHEMA, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(TABLE_NAME, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(PARTITION_NAME, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(
"subpartition_name",
ConcreteDataType::string_datatype(),
true,
),
ColumnSchema::new(
"partition_ordinal_position",
ConcreteDataType::int64_datatype(),
true,
),
ColumnSchema::new(
"subpartition_ordinal_position",
ConcreteDataType::int64_datatype(),
true,
),
ColumnSchema::new(
"partition_method",
ConcreteDataType::string_datatype(),
true,
),
ColumnSchema::new(
"subpartition_method",
ConcreteDataType::string_datatype(),
true,
),
ColumnSchema::new(
PARTITION_EXPRESSION,
ConcreteDataType::string_datatype(),
true,
),
ColumnSchema::new(
"subpartition_expression",
ConcreteDataType::string_datatype(),
true,
),
ColumnSchema::new(
"partition_description",
ConcreteDataType::string_datatype(),
true,
),
ColumnSchema::new("table_rows", ConcreteDataType::int64_datatype(), true),
ColumnSchema::new("avg_row_length", ConcreteDataType::int64_datatype(), true),
ColumnSchema::new("data_length", ConcreteDataType::int64_datatype(), true),
ColumnSchema::new("max_data_length", ConcreteDataType::int64_datatype(), true),
ColumnSchema::new("index_length", ConcreteDataType::int64_datatype(), true),
ColumnSchema::new("data_free", ConcreteDataType::int64_datatype(), true),
ColumnSchema::new("create_time", ConcreteDataType::datetime_datatype(), true),
ColumnSchema::new("update_time", ConcreteDataType::datetime_datatype(), true),
ColumnSchema::new("check_time", ConcreteDataType::datetime_datatype(), true),
ColumnSchema::new("checksum", ConcreteDataType::int64_datatype(), true),
ColumnSchema::new(
"partition_comment",
ConcreteDataType::string_datatype(),
true,
),
ColumnSchema::new("nodegroup", ConcreteDataType::string_datatype(), true),
ColumnSchema::new("tablespace_name", ConcreteDataType::string_datatype(), true),
ColumnSchema::new(
GREPTIME_PARTITION_ID,
ConcreteDataType::uint64_datatype(),
true,
),
]))
}
fn builder(&self) -> InformationSchemaPartitionsBuilder {
InformationSchemaPartitionsBuilder::new(
self.schema.clone(),
self.catalog_name.clone(),
self.catalog_manager.clone(),
)
}
}
impl InformationTable for InformationSchemaPartitions {
fn table_id(&self) -> TableId {
INFORMATION_SCHEMA_PARTITIONS_TABLE_ID
}
fn table_name(&self) -> &'static str {
PARTITIONS
}
fn schema(&self) -> SchemaRef {
self.schema.clone()
}
fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_partitions(Some(request))
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
}),
));
Ok(Box::pin(
RecordBatchStreamAdapter::try_new(stream)
.map_err(BoxedError::new)
.context(InternalSnafu)?,
))
}
}
struct InformationSchemaPartitionsBuilder {
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
catalog_names: StringVectorBuilder,
schema_names: StringVectorBuilder,
table_names: StringVectorBuilder,
partition_names: StringVectorBuilder,
partition_ordinal_positions: Int64VectorBuilder,
partition_expressions: StringVectorBuilder,
create_times: DateTimeVectorBuilder,
partition_ids: UInt64VectorBuilder,
}
impl InformationSchemaPartitionsBuilder {
fn new(
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
) -> Self {
Self {
schema,
catalog_name,
catalog_manager,
catalog_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
schema_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
table_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
partition_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
partition_ordinal_positions: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
partition_expressions: StringVectorBuilder::with_capacity(INIT_CAPACITY),
create_times: DateTimeVectorBuilder::with_capacity(INIT_CAPACITY),
partition_ids: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
}
}
/// Construct the `information_schema.partitions` virtual table
async fn make_partitions(&mut self, request: Option<ScanRequest>) -> Result<RecordBatch> {
let catalog_name = self.catalog_name.clone();
let catalog_manager = self
.catalog_manager
.upgrade()
.context(UpgradeWeakCatalogManagerRefSnafu)?;
let partition_manager = catalog_manager
.as_any()
.downcast_ref::<KvBackendCatalogManager>()
.map(|catalog_manager| catalog_manager.partition_manager());
let predicates = Predicates::from_scan_request(&request);
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
let mut stream = catalog_manager.tables(&catalog_name, &schema_name).await;
while let Some(table) = stream.try_next().await? {
let table_info = table.table_info();
if table_info.table_type == TableType::Temporary {
continue;
}
let table_id = table_info.ident.table_id;
let partitions = if let Some(partition_manager) = &partition_manager {
partition_manager
.find_table_partitions(table_id)
.await
.context(FindPartitionsSnafu {
table: &table_info.name,
})?
} else {
// Current node must be a standalone instance, contains only one partition by default.
// TODO(dennis): change it when we support multi-regions for standalone.
vec![PartitionInfo {
id: RegionId::new(table_id, 0),
partition: PartitionDef::new(vec![], vec![]),
}]
};
self.add_partitions(
&predicates,
&table_info,
&catalog_name,
&schema_name,
&table_info.name,
&partitions,
);
}
}
self.finish()
}
#[allow(clippy::too_many_arguments)]
fn add_partitions(
&mut self,
predicates: &Predicates,
table_info: &TableInfo,
catalog_name: &str,
schema_name: &str,
table_name: &str,
partitions: &[PartitionInfo],
) {
let row = [
(TABLE_CATALOG, &Value::from(catalog_name)),
(TABLE_SCHEMA, &Value::from(schema_name)),
(TABLE_NAME, &Value::from(table_name)),
];
if !predicates.eval(&row) {
return;
}
for (index, partition) in partitions.iter().enumerate() {
let partition_name = format!("p{index}");
self.catalog_names.push(Some(catalog_name));
self.schema_names.push(Some(schema_name));
self.table_names.push(Some(table_name));
self.partition_names.push(Some(&partition_name));
self.partition_ordinal_positions
.push(Some((index + 1) as i64));
let expressions = if partition.partition.partition_columns().is_empty() {
None
} else {
Some(partition.partition.to_string())
};
self.partition_expressions.push(expressions.as_deref());
self.create_times.push(Some(DateTime::from(
table_info.meta.created_on.timestamp_millis(),
)));
self.partition_ids.push(Some(partition.id.as_u64()));
}
}
fn finish(&mut self) -> Result<RecordBatch> {
let rows_num = self.catalog_names.len();
let null_string_vector = Arc::new(ConstantVector::new(
Arc::new(StringVector::from(vec![None as Option<&str>])),
rows_num,
));
let null_i64_vector = Arc::new(ConstantVector::new(
Arc::new(Int64Vector::from(vec![None])),
rows_num,
));
let null_datetime_vector = Arc::new(ConstantVector::new(
Arc::new(DateTimeVector::from(vec![None])),
rows_num,
));
let partition_methods = Arc::new(ConstantVector::new(
Arc::new(StringVector::from(vec![Some("RANGE")])),
rows_num,
));
let columns: Vec<VectorRef> = vec![
Arc::new(self.catalog_names.finish()),
Arc::new(self.schema_names.finish()),
Arc::new(self.table_names.finish()),
Arc::new(self.partition_names.finish()),
null_string_vector.clone(),
Arc::new(self.partition_ordinal_positions.finish()),
null_i64_vector.clone(),
partition_methods,
null_string_vector.clone(),
Arc::new(self.partition_expressions.finish()),
null_string_vector.clone(),
null_string_vector.clone(),
// TODO(dennis): rows and index statistics info
null_i64_vector.clone(),
null_i64_vector.clone(),
null_i64_vector.clone(),
null_i64_vector.clone(),
null_i64_vector.clone(),
null_i64_vector.clone(),
Arc::new(self.create_times.finish()),
// TODO(dennis): supports update_time
null_datetime_vector.clone(),
null_datetime_vector,
null_i64_vector,
null_string_vector.clone(),
null_string_vector.clone(),
null_string_vector,
Arc::new(self.partition_ids.finish()),
];
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
}
}
impl DfPartitionStream for InformationSchemaPartitions {
fn schema(&self) -> &ArrowSchemaRef {
self.schema.arrow_schema()
}
fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_partitions(None)
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
}),
))
}
}

View File

@@ -0,0 +1,279 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use core::pin::pin;
use std::sync::{Arc, Weak};
use arrow_schema::SchemaRef as ArrowSchemaRef;
use common_catalog::consts::INFORMATION_SCHEMA_REGION_PEERS_TABLE_ID;
use common_error::ext::BoxedError;
use common_meta::rpc::router::RegionRoute;
use common_query::physical_plan::TaskContext;
use common_recordbatch::adapter::RecordBatchStreamAdapter;
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::value::Value;
use datatypes::vectors::{Int64VectorBuilder, StringVectorBuilder, UInt64VectorBuilder};
use futures::{StreamExt, TryStreamExt};
use snafu::{OptionExt, ResultExt};
use store_api::storage::{ScanRequest, TableId};
use table::metadata::TableType;
use super::REGION_PEERS;
use crate::error::{
CreateRecordBatchSnafu, FindRegionRoutesSnafu, InternalSnafu, Result,
UpgradeWeakCatalogManagerRefSnafu,
};
use crate::information_schema::{InformationTable, Predicates};
use crate::kvbackend::KvBackendCatalogManager;
use crate::CatalogManager;
const REGION_ID: &str = "region_id";
const PEER_ID: &str = "peer_id";
const PEER_ADDR: &str = "peer_addr";
const IS_LEADER: &str = "is_leader";
const STATUS: &str = "status";
const DOWN_SECONDS: &str = "down_seconds";
const INIT_CAPACITY: usize = 42;
/// The `REGION_PEERS` table provides information about the region distribution and routes. Including fields:
///
/// - `region_id`: the region id
/// - `peer_id`: the region storage datanode peer id
/// - `peer_addr`: the region storage datanode peer address
/// - `is_leader`: whether the peer is the leader
/// - `status`: the region status, `ALIVE` or `DOWNGRADED`.
/// - `down_seconds`: the duration of being offline, in seconds.
///
pub(super) struct InformationSchemaRegionPeers {
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
}
impl InformationSchemaRegionPeers {
pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
Self {
schema: Self::schema(),
catalog_name,
catalog_manager,
}
}
pub(crate) fn schema() -> SchemaRef {
Arc::new(Schema::new(vec![
ColumnSchema::new(REGION_ID, ConcreteDataType::uint64_datatype(), false),
ColumnSchema::new(PEER_ID, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new(PEER_ADDR, ConcreteDataType::string_datatype(), true),
ColumnSchema::new(IS_LEADER, ConcreteDataType::string_datatype(), true),
ColumnSchema::new(STATUS, ConcreteDataType::string_datatype(), true),
ColumnSchema::new(DOWN_SECONDS, ConcreteDataType::int64_datatype(), true),
]))
}
fn builder(&self) -> InformationSchemaRegionPeersBuilder {
InformationSchemaRegionPeersBuilder::new(
self.schema.clone(),
self.catalog_name.clone(),
self.catalog_manager.clone(),
)
}
}
impl InformationTable for InformationSchemaRegionPeers {
fn table_id(&self) -> TableId {
INFORMATION_SCHEMA_REGION_PEERS_TABLE_ID
}
fn table_name(&self) -> &'static str {
REGION_PEERS
}
fn schema(&self) -> SchemaRef {
self.schema.clone()
}
fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_region_peers(Some(request))
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
}),
));
Ok(Box::pin(
RecordBatchStreamAdapter::try_new(stream)
.map_err(BoxedError::new)
.context(InternalSnafu)?,
))
}
}
struct InformationSchemaRegionPeersBuilder {
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
region_ids: UInt64VectorBuilder,
peer_ids: UInt64VectorBuilder,
peer_addrs: StringVectorBuilder,
is_leaders: StringVectorBuilder,
statuses: StringVectorBuilder,
down_seconds: Int64VectorBuilder,
}
impl InformationSchemaRegionPeersBuilder {
fn new(
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
) -> Self {
Self {
schema,
catalog_name,
catalog_manager,
region_ids: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
peer_ids: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
peer_addrs: StringVectorBuilder::with_capacity(INIT_CAPACITY),
is_leaders: StringVectorBuilder::with_capacity(INIT_CAPACITY),
statuses: StringVectorBuilder::with_capacity(INIT_CAPACITY),
down_seconds: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
}
}
/// Construct the `information_schema.region_peers` virtual table
async fn make_region_peers(&mut self, request: Option<ScanRequest>) -> Result<RecordBatch> {
let catalog_name = self.catalog_name.clone();
let catalog_manager = self
.catalog_manager
.upgrade()
.context(UpgradeWeakCatalogManagerRefSnafu)?;
let partition_manager = catalog_manager
.as_any()
.downcast_ref::<KvBackendCatalogManager>()
.map(|catalog_manager| catalog_manager.partition_manager());
let predicates = Predicates::from_scan_request(&request);
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
let table_id_stream = catalog_manager
.tables(&catalog_name, &schema_name)
.await
.try_filter_map(|t| async move {
let table_info = t.table_info();
if table_info.table_type == TableType::Temporary {
Ok(None)
} else {
Ok(Some(table_info.ident.table_id))
}
});
const BATCH_SIZE: usize = 128;
// Split table ids into chunks
let mut table_id_chunks = pin!(table_id_stream.ready_chunks(BATCH_SIZE));
while let Some(table_ids) = table_id_chunks.next().await {
let table_ids = table_ids.into_iter().collect::<Result<Vec<_>>>()?;
let table_routes = if let Some(partition_manager) = &partition_manager {
partition_manager
.find_region_routes_batch(&table_ids)
.await
.context(FindRegionRoutesSnafu)?
} else {
table_ids.into_iter().map(|id| (id, vec![])).collect()
};
for routes in table_routes.values() {
self.add_region_peers(&predicates, routes);
}
}
}
self.finish()
}
fn add_region_peers(&mut self, predicates: &Predicates, routes: &[RegionRoute]) {
for route in routes {
let region_id = route.region.id.as_u64();
let peer_id = route.leader_peer.clone().map(|p| p.id);
let peer_addr = route.leader_peer.clone().map(|p| p.addr);
let status = if let Some(status) = route.leader_status {
Some(status.as_ref().to_string())
} else {
// Alive by default
Some("ALIVE".to_string())
};
let row = [(REGION_ID, &Value::from(region_id))];
if !predicates.eval(&row) {
return;
}
// TODO(dennis): adds followers.
self.region_ids.push(Some(region_id));
self.peer_ids.push(peer_id);
self.peer_addrs.push(peer_addr.as_deref());
self.is_leaders.push(Some("Yes"));
self.statuses.push(status.as_deref());
self.down_seconds
.push(route.leader_down_millis().map(|m| m / 1000));
}
}
fn finish(&mut self) -> Result<RecordBatch> {
let columns: Vec<VectorRef> = vec![
Arc::new(self.region_ids.finish()),
Arc::new(self.peer_ids.finish()),
Arc::new(self.peer_addrs.finish()),
Arc::new(self.is_leaders.finish()),
Arc::new(self.statuses.finish()),
Arc::new(self.down_seconds.finish()),
];
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
}
}
impl DfPartitionStream for InformationSchemaRegionPeers {
fn schema(&self) -> &ArrowSchemaRef {
self.schema.arrow_schema()
}
fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_region_peers(None)
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
}),
))
}
}

View File

@@ -41,6 +41,7 @@ const CATALOG_NAME: &str = "catalog_name";
const SCHEMA_NAME: &str = "schema_name"; const SCHEMA_NAME: &str = "schema_name";
const DEFAULT_CHARACTER_SET_NAME: &str = "default_character_set_name"; const DEFAULT_CHARACTER_SET_NAME: &str = "default_character_set_name";
const DEFAULT_COLLATION_NAME: &str = "default_collation_name"; const DEFAULT_COLLATION_NAME: &str = "default_collation_name";
const INIT_CAPACITY: usize = 42;
/// The `information_schema.schemata` table implementation. /// The `information_schema.schemata` table implementation.
pub(super) struct InformationSchemaSchemata { pub(super) struct InformationSchemaSchemata {
@@ -144,11 +145,11 @@ impl InformationSchemaSchemataBuilder {
schema, schema,
catalog_name, catalog_name,
catalog_manager, catalog_manager,
catalog_names: StringVectorBuilder::with_capacity(42), catalog_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
schema_names: StringVectorBuilder::with_capacity(42), schema_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
charset_names: StringVectorBuilder::with_capacity(42), charset_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
collation_names: StringVectorBuilder::with_capacity(42), collation_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
sql_paths: StringVectorBuilder::with_capacity(42), sql_paths: StringVectorBuilder::with_capacity(INIT_CAPACITY),
} }
} }
@@ -162,13 +163,6 @@ impl InformationSchemaSchemataBuilder {
let predicates = Predicates::from_scan_request(&request); let predicates = Predicates::from_scan_request(&request);
for schema_name in catalog_manager.schema_names(&catalog_name).await? { for schema_name in catalog_manager.schema_names(&catalog_name).await? {
if !catalog_manager
.schema_exists(&catalog_name, &schema_name)
.await?
{
continue;
}
self.add_schema(&predicates, &catalog_name, &schema_name); self.add_schema(&predicates, &catalog_name, &schema_name);
} }

View File

@@ -39,3 +39,5 @@ pub const TRIGGERS: &str = "triggers";
pub const GLOBAL_STATUS: &str = "global_status"; pub const GLOBAL_STATUS: &str = "global_status";
pub const SESSION_STATUS: &str = "session_status"; pub const SESSION_STATUS: &str = "session_status";
pub const RUNTIME_METRICS: &str = "runtime_metrics"; pub const RUNTIME_METRICS: &str = "runtime_metrics";
pub const PARTITIONS: &str = "partitions";
pub const REGION_PEERS: &str = "greptime_region_peers";

View File

@@ -27,6 +27,7 @@ use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef}; use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::value::Value; use datatypes::value::Value;
use datatypes::vectors::{StringVectorBuilder, UInt32VectorBuilder}; use datatypes::vectors::{StringVectorBuilder, UInt32VectorBuilder};
use futures::TryStreamExt;
use snafu::{OptionExt, ResultExt}; use snafu::{OptionExt, ResultExt};
use store_api::storage::{ScanRequest, TableId}; use store_api::storage::{ScanRequest, TableId};
use table::metadata::TableType; use table::metadata::TableType;
@@ -44,6 +45,7 @@ const TABLE_NAME: &str = "table_name";
const TABLE_TYPE: &str = "table_type"; const TABLE_TYPE: &str = "table_type";
const TABLE_ID: &str = "table_id"; const TABLE_ID: &str = "table_id";
const ENGINE: &str = "engine"; const ENGINE: &str = "engine";
const INIT_CAPACITY: usize = 42;
pub(super) struct InformationSchemaTables { pub(super) struct InformationSchemaTables {
schema: SchemaRef, schema: SchemaRef,
@@ -140,12 +142,12 @@ impl InformationSchemaTablesBuilder {
schema, schema,
catalog_name, catalog_name,
catalog_manager, catalog_manager,
catalog_names: StringVectorBuilder::with_capacity(42), catalog_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
schema_names: StringVectorBuilder::with_capacity(42), schema_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
table_names: StringVectorBuilder::with_capacity(42), table_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
table_types: StringVectorBuilder::with_capacity(42), table_types: StringVectorBuilder::with_capacity(INIT_CAPACITY),
table_ids: UInt32VectorBuilder::with_capacity(42), table_ids: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
engines: StringVectorBuilder::with_capacity(42), engines: StringVectorBuilder::with_capacity(INIT_CAPACITY),
} }
} }
@@ -159,34 +161,19 @@ impl InformationSchemaTablesBuilder {
let predicates = Predicates::from_scan_request(&request); let predicates = Predicates::from_scan_request(&request);
for schema_name in catalog_manager.schema_names(&catalog_name).await? { for schema_name in catalog_manager.schema_names(&catalog_name).await? {
if !catalog_manager let mut stream = catalog_manager.tables(&catalog_name, &schema_name).await;
.schema_exists(&catalog_name, &schema_name)
.await?
{
continue;
}
for table_name in catalog_manager while let Some(table) = stream.try_next().await? {
.table_names(&catalog_name, &schema_name)
.await?
{
if let Some(table) = catalog_manager
.table(&catalog_name, &schema_name, &table_name)
.await?
{
let table_info = table.table_info(); let table_info = table.table_info();
self.add_table( self.add_table(
&predicates, &predicates,
&catalog_name, &catalog_name,
&schema_name, &schema_name,
&table_name, &table_info.name,
table.table_type(), table.table_type(),
Some(table_info.ident.table_id), Some(table_info.ident.table_id),
Some(&table_info.meta.engine), Some(&table_info.meta.engine),
); );
} else {
unreachable!();
}
} }
} }

View File

@@ -12,11 +12,9 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
pub use client::{CachedMetaKvBackend, MetaKvBackend}; pub use client::{CachedMetaKvBackend, CachedMetaKvBackendBuilder, MetaKvBackend};
mod client; mod client;
mod manager; mod manager;
#[cfg(feature = "testing")]
pub mod mock;
pub use manager::KvBackendCatalogManager; pub use manager::KvBackendCatalogManager;

View File

@@ -14,8 +14,10 @@
use std::any::Any; use std::any::Any;
use std::fmt::Debug; use std::fmt::Debug;
use std::sync::Arc; use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::{Arc, Mutex};
use std::time::Duration; use std::time::Duration;
use std::usize;
use common_error::ext::BoxedError; use common_error::ext::BoxedError;
use common_meta::cache_invalidator::KvCacheInvalidator; use common_meta::cache_invalidator::KvCacheInvalidator;
@@ -33,18 +35,91 @@ use meta_client::client::MetaClient;
use moka::future::{Cache, CacheBuilder}; use moka::future::{Cache, CacheBuilder};
use snafu::{OptionExt, ResultExt}; use snafu::{OptionExt, ResultExt};
use crate::metrics::{METRIC_CATALOG_KV_GET, METRIC_CATALOG_KV_REMOTE_GET}; use crate::metrics::{
METRIC_CATALOG_KV_BATCH_GET, METRIC_CATALOG_KV_GET, METRIC_CATALOG_KV_REMOTE_GET,
};
const CACHE_MAX_CAPACITY: u64 = 10000; const DEFAULT_CACHE_MAX_CAPACITY: u64 = 10000;
const CACHE_TTL_SECOND: u64 = 10 * 60; const DEFAULT_CACHE_TTL: Duration = Duration::from_secs(10 * 60);
const CACHE_TTI_SECOND: u64 = 5 * 60; const DEFAULT_CACHE_TTI: Duration = Duration::from_secs(5 * 60);
pub struct CachedMetaKvBackendBuilder {
cache_max_capacity: Option<u64>,
cache_ttl: Option<Duration>,
cache_tti: Option<Duration>,
meta_client: Arc<MetaClient>,
}
impl CachedMetaKvBackendBuilder {
pub fn new(meta_client: Arc<MetaClient>) -> Self {
Self {
cache_max_capacity: None,
cache_ttl: None,
cache_tti: None,
meta_client,
}
}
pub fn cache_max_capacity(mut self, cache_max_capacity: u64) -> Self {
self.cache_max_capacity.replace(cache_max_capacity);
self
}
pub fn cache_ttl(mut self, cache_ttl: Duration) -> Self {
self.cache_ttl.replace(cache_ttl);
self
}
pub fn cache_tti(mut self, cache_tti: Duration) -> Self {
self.cache_tti.replace(cache_tti);
self
}
pub fn build(self) -> CachedMetaKvBackend {
let cache_max_capacity = self
.cache_max_capacity
.unwrap_or(DEFAULT_CACHE_MAX_CAPACITY);
let cache_ttl = self.cache_ttl.unwrap_or(DEFAULT_CACHE_TTL);
let cache_tti = self.cache_tti.unwrap_or(DEFAULT_CACHE_TTI);
let cache = Arc::new(
CacheBuilder::new(cache_max_capacity)
.time_to_live(cache_ttl)
.time_to_idle(cache_tti)
.build(),
);
let kv_backend = Arc::new(MetaKvBackend {
client: self.meta_client,
});
let name = format!("CachedKvBackend({})", kv_backend.name());
let version = AtomicUsize::new(0);
CachedMetaKvBackend {
kv_backend,
cache,
name,
version,
}
}
}
pub type CacheBackendRef = Arc<Cache<Vec<u8>, KeyValue>>; pub type CacheBackendRef = Arc<Cache<Vec<u8>, KeyValue>>;
/// A wrapper of `MetaKvBackend` with cache support.
///
/// CachedMetaKvBackend is mainly used to read metadata information from Metasrv, and provides
/// cache for get and batch_get. One way to trigger cache invalidation of CachedMetaKvBackend:
/// when metadata information changes, Metasrv will broadcast a metadata invalidation request.
///
/// Therefore, it is recommended to use CachedMetaKvBackend to only read metadata related
/// information. Note: If you read other information, you may read expired data, which depends on
/// TTL and TTI for cache.
pub struct CachedMetaKvBackend { pub struct CachedMetaKvBackend {
kv_backend: KvBackendRef, kv_backend: KvBackendRef,
cache: CacheBackendRef, cache: CacheBackendRef,
name: String, name: String,
version: AtomicUsize,
} }
impl TxnService for CachedMetaKvBackend { impl TxnService for CachedMetaKvBackend {
@@ -96,7 +171,38 @@ impl KvBackend for CachedMetaKvBackend {
} }
async fn batch_get(&self, req: BatchGetRequest) -> Result<BatchGetResponse> { async fn batch_get(&self, req: BatchGetRequest) -> Result<BatchGetResponse> {
self.kv_backend.batch_get(req).await let _timer = METRIC_CATALOG_KV_BATCH_GET.start_timer();
let mut kvs = Vec::with_capacity(req.keys.len());
let mut miss_keys = Vec::with_capacity(req.keys.len());
for key in req.keys {
if let Some(val) = self.cache.get(&key).await {
kvs.push(val);
} else {
miss_keys.push(key);
}
}
let batch_get_req = BatchGetRequest::new().with_keys(miss_keys.clone());
let pre_version = self.version();
let unhit_kvs = self.kv_backend.batch_get(batch_get_req).await?.kvs;
for kv in unhit_kvs.iter() {
self.cache.insert(kv.key().to_vec(), kv.clone()).await;
}
if !self.validate_version(pre_version) {
for key in miss_keys.iter() {
self.cache.invalidate(key).await;
}
}
kvs.extend(unhit_kvs);
Ok(BatchGetResponse { kvs })
} }
async fn compare_and_put(&self, req: CompareAndPutRequest) -> Result<CompareAndPutResponse> { async fn compare_and_put(&self, req: CompareAndPutRequest) -> Result<CompareAndPutResponse> {
@@ -154,8 +260,14 @@ impl KvBackend for CachedMetaKvBackend {
async fn get(&self, key: &[u8]) -> Result<Option<KeyValue>> { async fn get(&self, key: &[u8]) -> Result<Option<KeyValue>> {
let _timer = METRIC_CATALOG_KV_GET.start_timer(); let _timer = METRIC_CATALOG_KV_GET.start_timer();
let pre_version = Arc::new(Mutex::new(None));
let init = async { let init = async {
let version_clone = pre_version.clone();
let _timer = METRIC_CATALOG_KV_REMOTE_GET.start_timer(); let _timer = METRIC_CATALOG_KV_REMOTE_GET.start_timer();
version_clone.lock().unwrap().replace(self.version());
self.kv_backend.get(key).await.map(|val| { self.kv_backend.get(key).await.map(|val| {
val.with_context(|| CacheNotGetSnafu { val.with_context(|| CacheNotGetSnafu {
key: String::from_utf8_lossy(key), key: String::from_utf8_lossy(key),
@@ -166,7 +278,7 @@ impl KvBackend for CachedMetaKvBackend {
// currently moka doesn't have `optionally_try_get_with_by_ref` // currently moka doesn't have `optionally_try_get_with_by_ref`
// TODO(fys): change to moka method when available // TODO(fys): change to moka method when available
// https://github.com/moka-rs/moka/issues/254 // https://github.com/moka-rs/moka/issues/254
match self.cache.try_get_with_by_ref(key, init).await { let ret = match self.cache.try_get_with_by_ref(key, init).await {
Ok(val) => Ok(Some(val)), Ok(val) => Ok(Some(val)),
Err(e) => match e.as_ref() { Err(e) => match e.as_ref() {
CacheNotGet { .. } => Ok(None), CacheNotGet { .. } => Ok(None),
@@ -175,29 +287,40 @@ impl KvBackend for CachedMetaKvBackend {
} }
.map_err(|e| GetKvCache { .map_err(|e| GetKvCache {
err_msg: e.to_string(), err_msg: e.to_string(),
}) });
// "cache.invalidate_key" and "cache.try_get_with_by_ref" are not mutually exclusive. So we need
// to use the version mechanism to prevent expired data from being put into the cache.
if pre_version
.lock()
.unwrap()
.as_ref()
.map_or(false, |v| !self.validate_version(*v))
{
self.cache.invalidate(key).await;
}
ret
} }
} }
#[async_trait::async_trait] #[async_trait::async_trait]
impl KvCacheInvalidator for CachedMetaKvBackend { impl KvCacheInvalidator for CachedMetaKvBackend {
async fn invalidate_key(&self, key: &[u8]) { async fn invalidate_key(&self, key: &[u8]) {
self.create_new_version();
self.cache.invalidate(key).await; self.cache.invalidate(key).await;
debug!("invalidated cache key: {}", String::from_utf8_lossy(key)); debug!("invalidated cache key: {}", String::from_utf8_lossy(key));
} }
} }
impl CachedMetaKvBackend { impl CachedMetaKvBackend {
pub fn new(client: Arc<MetaClient>) -> Self { // only for test
let kv_backend = Arc::new(MetaKvBackend { client }); #[cfg(test)]
Self::wrap(kv_backend) fn wrap(kv_backend: KvBackendRef) -> Self {
}
pub fn wrap(kv_backend: KvBackendRef) -> Self {
let cache = Arc::new( let cache = Arc::new(
CacheBuilder::new(CACHE_MAX_CAPACITY) CacheBuilder::new(DEFAULT_CACHE_MAX_CAPACITY)
.time_to_live(Duration::from_secs(CACHE_TTL_SECOND)) .time_to_live(DEFAULT_CACHE_TTL)
.time_to_idle(Duration::from_secs(CACHE_TTI_SECOND)) .time_to_idle(DEFAULT_CACHE_TTI)
.build(), .build(),
); );
@@ -206,12 +329,25 @@ impl CachedMetaKvBackend {
kv_backend, kv_backend,
cache, cache,
name, name,
version: AtomicUsize::new(0),
} }
} }
pub fn cache(&self) -> &CacheBackendRef { pub fn cache(&self) -> &CacheBackendRef {
&self.cache &self.cache
} }
fn version(&self) -> usize {
self.version.load(Ordering::Relaxed)
}
fn validate_version(&self, pre_version: usize) -> bool {
self.version() == pre_version
}
fn create_new_version(&self) -> usize {
self.version.fetch_add(1, Ordering::Relaxed) + 1
}
} }
#[derive(Debug)] #[derive(Debug)]
@@ -308,3 +444,162 @@ impl KvBackend for MetaKvBackend {
self self
} }
} }
#[cfg(test)]
mod tests {
use std::any::Any;
use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::Arc;
use async_trait::async_trait;
use common_meta::kv_backend::{KvBackend, TxnService};
use common_meta::rpc::store::{
BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse,
BatchPutRequest, BatchPutResponse, CompareAndPutRequest, CompareAndPutResponse,
DeleteRangeRequest, DeleteRangeResponse, PutRequest, PutResponse, RangeRequest,
RangeResponse,
};
use common_meta::rpc::KeyValue;
use dashmap::DashMap;
use super::CachedMetaKvBackend;
#[derive(Default)]
pub struct SimpleKvBackend {
inner_map: DashMap<Vec<u8>, Vec<u8>>,
get_execute_times: Arc<AtomicU32>,
}
impl TxnService for SimpleKvBackend {
type Error = common_meta::error::Error;
}
#[async_trait]
impl KvBackend for SimpleKvBackend {
fn name(&self) -> &str {
"SimpleKvBackend"
}
fn as_any(&self) -> &dyn Any {
self
}
async fn batch_get(&self, req: BatchGetRequest) -> Result<BatchGetResponse, Self::Error> {
let mut kvs = Vec::with_capacity(req.keys.len());
for key in req.keys.iter() {
if let Some(kv) = self.get(key).await? {
kvs.push(kv);
}
}
Ok(BatchGetResponse { kvs })
}
async fn put(&self, req: PutRequest) -> Result<PutResponse, Self::Error> {
self.inner_map.insert(req.key, req.value);
// always return None as prev_kv, since we don't use it in this test.
Ok(PutResponse { prev_kv: None })
}
async fn get(&self, key: &[u8]) -> Result<Option<KeyValue>, Self::Error> {
self.get_execute_times
.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
Ok(self.inner_map.get(key).map(|v| KeyValue {
key: key.to_vec(),
value: v.value().clone(),
}))
}
async fn range(&self, _req: RangeRequest) -> Result<RangeResponse, Self::Error> {
todo!()
}
async fn batch_put(&self, _req: BatchPutRequest) -> Result<BatchPutResponse, Self::Error> {
todo!()
}
async fn compare_and_put(
&self,
_req: CompareAndPutRequest,
) -> Result<CompareAndPutResponse, Self::Error> {
todo!()
}
async fn delete_range(
&self,
_req: DeleteRangeRequest,
) -> Result<DeleteRangeResponse, Self::Error> {
todo!()
}
async fn batch_delete(
&self,
_req: BatchDeleteRequest,
) -> Result<BatchDeleteResponse, Self::Error> {
todo!()
}
}
#[tokio::test]
async fn test_cached_kv_backend() {
let simple_kv = Arc::new(SimpleKvBackend::default());
let get_execute_times = simple_kv.get_execute_times.clone();
let cached_kv = CachedMetaKvBackend::wrap(simple_kv);
add_some_vals(&cached_kv).await;
let batch_get_req = BatchGetRequest {
keys: vec![b"k1".to_vec(), b"k2".to_vec()],
};
assert_eq!(get_execute_times.load(Ordering::SeqCst), 0);
for _ in 0..10 {
let _batch_get_resp = cached_kv.batch_get(batch_get_req.clone()).await.unwrap();
assert_eq!(get_execute_times.load(Ordering::SeqCst), 2);
}
let batch_get_req = BatchGetRequest {
keys: vec![b"k1".to_vec(), b"k2".to_vec(), b"k3".to_vec()],
};
let _batch_get_resp = cached_kv.batch_get(batch_get_req.clone()).await.unwrap();
assert_eq!(get_execute_times.load(Ordering::SeqCst), 3);
for _ in 0..10 {
let _batch_get_resp = cached_kv.batch_get(batch_get_req.clone()).await.unwrap();
assert_eq!(get_execute_times.load(Ordering::SeqCst), 3);
}
}
async fn add_some_vals(kv_backend: &impl KvBackend) {
kv_backend
.put(PutRequest {
key: b"k1".to_vec(),
value: b"v1".to_vec(),
prev_kv: false,
})
.await
.unwrap();
kv_backend
.put(PutRequest {
key: b"k2".to_vec(),
value: b"v2".to_vec(),
prev_kv: false,
})
.await
.unwrap();
kv_backend
.put(PutRequest {
key: b"k3".to_vec(),
value: b"v3".to_vec(),
prev_kv: false,
})
.await
.unwrap();
}
}

View File

@@ -16,17 +16,21 @@ use std::any::Any;
use std::collections::BTreeSet; use std::collections::BTreeSet;
use std::sync::{Arc, Weak}; use std::sync::{Arc, Weak};
use async_stream::try_stream;
use common_catalog::consts::{DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, NUMBERS_TABLE_ID}; use common_catalog::consts::{DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, NUMBERS_TABLE_ID};
use common_error::ext::BoxedError; use common_error::ext::BoxedError;
use common_meta::cache_invalidator::{CacheInvalidator, CacheInvalidatorRef, Context}; use common_meta::cache_invalidator::{CacheInvalidator, CacheInvalidatorRef, Context};
use common_meta::error::Result as MetaResult; use common_meta::error::Result as MetaResult;
use common_meta::key::catalog_name::CatalogNameKey; use common_meta::key::catalog_name::CatalogNameKey;
use common_meta::key::schema_name::SchemaNameKey; use common_meta::key::schema_name::SchemaNameKey;
use common_meta::key::table_info::TableInfoValue;
use common_meta::key::table_name::TableNameKey; use common_meta::key::table_name::TableNameKey;
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef}; use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
use common_meta::kv_backend::KvBackendRef; use common_meta::kv_backend::KvBackendRef;
use common_meta::table_name::TableName; use common_meta::table_name::TableName;
use futures_util::TryStreamExt; use futures_util::stream::BoxStream;
use futures_util::{StreamExt, TryStreamExt};
use moka::sync::Cache;
use partition::manager::{PartitionRuleManager, PartitionRuleManagerRef}; use partition::manager::{PartitionRuleManager, PartitionRuleManagerRef};
use snafu::prelude::*; use snafu::prelude::*;
use table::dist_table::DistTable; use table::dist_table::DistTable;
@@ -35,8 +39,8 @@ use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME};
use table::TableRef; use table::TableRef;
use crate::error::{ use crate::error::{
self as catalog_err, ListCatalogsSnafu, ListSchemasSnafu, Result as CatalogResult, self as catalog_err, ListCatalogsSnafu, ListSchemasSnafu, ListTablesSnafu,
TableMetadataManagerSnafu, Result as CatalogResult, TableMetadataManagerSnafu,
}; };
use crate::information_schema::InformationSchemaProvider; use crate::information_schema::InformationSchemaProvider;
use crate::CatalogManager; use crate::CatalogManager;
@@ -58,20 +62,30 @@ pub struct KvBackendCatalogManager {
system_catalog: SystemCatalog, system_catalog: SystemCatalog,
} }
#[async_trait::async_trait] fn make_table(table_info_value: TableInfoValue) -> CatalogResult<TableRef> {
impl CacheInvalidator for KvBackendCatalogManager { let table_info = table_info_value
async fn invalidate_table_name(&self, ctx: &Context, table_name: TableName) -> MetaResult<()> { .table_info
self.cache_invalidator .try_into()
.invalidate_table_name(ctx, table_name) .context(catalog_err::InvalidTableInfoInCatalogSnafu)?;
.await Ok(DistTable::table(Arc::new(table_info)))
} }
#[async_trait::async_trait]
impl CacheInvalidator for KvBackendCatalogManager {
async fn invalidate_table_id(&self, ctx: &Context, table_id: TableId) -> MetaResult<()> { async fn invalidate_table_id(&self, ctx: &Context, table_id: TableId) -> MetaResult<()> {
self.cache_invalidator self.cache_invalidator
.invalidate_table_id(ctx, table_id) .invalidate_table_id(ctx, table_id)
.await .await
} }
async fn invalidate_table_name(&self, ctx: &Context, table_name: TableName) -> MetaResult<()> {
self.cache_invalidator
.invalidate_table_name(ctx, table_name)
.await
} }
}
const DEFAULT_CACHED_CATALOG: u64 = 128;
impl KvBackendCatalogManager { impl KvBackendCatalogManager {
pub fn new(backend: KvBackendRef, cache_invalidator: CacheInvalidatorRef) -> Arc<Self> { pub fn new(backend: KvBackendRef, cache_invalidator: CacheInvalidatorRef) -> Arc<Self> {
@@ -81,9 +95,10 @@ impl KvBackendCatalogManager {
cache_invalidator, cache_invalidator,
system_catalog: SystemCatalog { system_catalog: SystemCatalog {
catalog_manager: me.clone(), catalog_manager: me.clone(),
catalog_cache: Cache::new(DEFAULT_CACHED_CATALOG),
information_schema_provider: Arc::new(InformationSchemaProvider::new( information_schema_provider: Arc::new(InformationSchemaProvider::new(
// The catalog name is not used in system_catalog, so let it empty // The catalog name is not used in system_catalog, so let it empty
"".to_string(), String::default(),
me.clone(), me.clone(),
)), )),
}, },
@@ -101,6 +116,10 @@ impl KvBackendCatalogManager {
#[async_trait::async_trait] #[async_trait::async_trait]
impl CatalogManager for KvBackendCatalogManager { impl CatalogManager for KvBackendCatalogManager {
fn as_any(&self) -> &dyn Any {
self
}
async fn catalog_names(&self) -> CatalogResult<Vec<String>> { async fn catalog_names(&self) -> CatalogResult<Vec<String>> {
let stream = self let stream = self
.table_metadata_manager .table_metadata_manager
@@ -135,18 +154,22 @@ impl CatalogManager for KvBackendCatalogManager {
} }
async fn table_names(&self, catalog: &str, schema: &str) -> CatalogResult<Vec<String>> { async fn table_names(&self, catalog: &str, schema: &str) -> CatalogResult<Vec<String>> {
let mut tables = self let stream = self
.table_metadata_manager .table_metadata_manager
.table_name_manager() .table_name_manager()
.tables(catalog, schema) .tables(catalog, schema)
.await;
let mut tables = stream
.try_collect::<Vec<_>>()
.await .await
.context(TableMetadataManagerSnafu)? .map_err(BoxedError::new)
.context(ListTablesSnafu { catalog, schema })?
.into_iter() .into_iter()
.map(|(k, _)| k) .map(|(k, _)| k)
.collect::<Vec<String>>(); .collect::<Vec<_>>();
tables.extend_from_slice(&self.system_catalog.table_names(schema)); tables.extend_from_slice(&self.system_catalog.table_names(schema));
Ok(tables) Ok(tables.into_iter().collect())
} }
async fn catalog_exists(&self, catalog: &str) -> CatalogResult<bool> { async fn catalog_exists(&self, catalog: &str) -> CatalogResult<bool> {
@@ -215,17 +238,56 @@ impl CatalogManager for KvBackendCatalogManager {
else { else {
return Ok(None); return Ok(None);
}; };
let table_info = Arc::new( make_table(table_info_value).map(Some)
table_info_value
.table_info
.try_into()
.context(catalog_err::InvalidTableInfoInCatalogSnafu)?,
);
Ok(Some(DistTable::table(table_info)))
} }
fn as_any(&self) -> &dyn Any { async fn tables<'a>(
self &'a self,
catalog: &'a str,
schema: &'a str,
) -> BoxStream<'a, CatalogResult<TableRef>> {
let sys_tables = try_stream!({
// System tables
let sys_table_names = self.system_catalog.table_names(schema);
for table_name in sys_table_names {
if let Some(table) = self.system_catalog.table(catalog, schema, &table_name) {
yield table;
}
}
});
let table_id_stream = self
.table_metadata_manager
.table_name_manager()
.tables(catalog, schema)
.await
.map_ok(|(_, v)| v.table_id());
const BATCH_SIZE: usize = 128;
let user_tables = try_stream!({
// Split table ids into chunks
let mut table_id_chunks = table_id_stream.ready_chunks(BATCH_SIZE);
while let Some(table_ids) = table_id_chunks.next().await {
let table_ids = table_ids
.into_iter()
.collect::<Result<Vec<_>, _>>()
.map_err(BoxedError::new)
.context(ListTablesSnafu { catalog, schema })?;
let table_info_values = self
.table_metadata_manager
.table_info_manager()
.batch_get(&table_ids)
.await
.context(TableMetadataManagerSnafu)?;
for table_info_value in table_info_values.into_values() {
yield make_table(table_info_value)?;
}
}
});
Box::pin(sys_tables.chain(user_tables))
} }
} }
@@ -238,6 +300,7 @@ impl CatalogManager for KvBackendCatalogManager {
#[derive(Clone)] #[derive(Clone)]
struct SystemCatalog { struct SystemCatalog {
catalog_manager: Weak<KvBackendCatalogManager>, catalog_manager: Weak<KvBackendCatalogManager>,
catalog_cache: Cache<String, Arc<InformationSchemaProvider>>,
information_schema_provider: Arc<InformationSchemaProvider>, information_schema_provider: Arc<InformationSchemaProvider>,
} }
@@ -273,7 +336,12 @@ impl SystemCatalog {
fn table(&self, catalog: &str, schema: &str, table_name: &str) -> Option<TableRef> { fn table(&self, catalog: &str, schema: &str, table_name: &str) -> Option<TableRef> {
if schema == INFORMATION_SCHEMA_NAME { if schema == INFORMATION_SCHEMA_NAME {
let information_schema_provider = let information_schema_provider =
InformationSchemaProvider::new(catalog.to_string(), self.catalog_manager.clone()); self.catalog_cache.get_with_by_ref(catalog, move || {
Arc::new(InformationSchemaProvider::new(
catalog.to_string(),
self.catalog_manager.clone(),
))
});
information_schema_provider.table(table_name) information_schema_provider.table(table_name)
} else if schema == DEFAULT_SCHEMA_NAME && table_name == NUMBERS_TABLE_NAME { } else if schema == DEFAULT_SCHEMA_NAME && table_name == NUMBERS_TABLE_NAME {
Some(NumbersTable::table(NUMBERS_TABLE_ID)) Some(NumbersTable::table(NUMBERS_TABLE_ID))

View File

@@ -1,128 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::sync::{Arc, RwLock as StdRwLock};
use common_recordbatch::RecordBatch;
use datatypes::data_type::ConcreteDataType;
use datatypes::schema::{ColumnSchema, Schema};
use datatypes::vectors::StringVector;
use table::engine::{CloseTableResult, EngineContext, TableEngine};
use table::metadata::TableId;
use table::requests::{
AlterTableRequest, CloseTableRequest, CreateTableRequest, DropTableRequest, OpenTableRequest,
TruncateTableRequest,
};
use table::test_util::MemTable;
use table::TableRef;
#[derive(Default)]
pub struct MockTableEngine {
tables: StdRwLock<HashMap<TableId, TableRef>>,
}
#[async_trait::async_trait]
impl TableEngine for MockTableEngine {
fn name(&self) -> &str {
"MockTableEngine"
}
/// Create a table with only one column
async fn create_table(
&self,
_ctx: &EngineContext,
request: CreateTableRequest,
) -> table::Result<TableRef> {
let table_id = request.id;
let schema = Arc::new(Schema::new(vec![ColumnSchema::new(
"name",
ConcreteDataType::string_datatype(),
true,
)]));
let data = vec![Arc::new(StringVector::from(vec!["a", "b", "c"])) as _];
let record_batch = RecordBatch::new(schema, data).unwrap();
let table = MemTable::new_with_catalog(
&request.table_name,
record_batch,
table_id,
request.catalog_name,
request.schema_name,
vec![0],
);
let mut tables = self.tables.write().unwrap();
let _ = tables.insert(table_id, table.clone() as TableRef);
Ok(table)
}
async fn open_table(
&self,
_ctx: &EngineContext,
request: OpenTableRequest,
) -> table::Result<Option<TableRef>> {
Ok(self.tables.read().unwrap().get(&request.table_id).cloned())
}
async fn alter_table(
&self,
_ctx: &EngineContext,
_request: AlterTableRequest,
) -> table::Result<TableRef> {
unimplemented!()
}
fn get_table(
&self,
_ctx: &EngineContext,
table_id: TableId,
) -> table::Result<Option<TableRef>> {
Ok(self.tables.read().unwrap().get(&table_id).cloned())
}
fn table_exists(&self, _ctx: &EngineContext, table_id: TableId) -> bool {
self.tables.read().unwrap().contains_key(&table_id)
}
async fn drop_table(
&self,
_ctx: &EngineContext,
_request: DropTableRequest,
) -> table::Result<bool> {
unimplemented!()
}
async fn close_table(
&self,
_ctx: &EngineContext,
request: CloseTableRequest,
) -> table::Result<CloseTableResult> {
let _ = self.tables.write().unwrap().remove(&request.table_id);
Ok(CloseTableResult::Released(vec![]))
}
async fn close(&self) -> table::Result<()> {
Ok(())
}
async fn truncate_table(
&self,
_ctx: &EngineContext,
_request: TruncateTableRequest,
) -> table::Result<bool> {
Ok(true)
}
}

View File

@@ -20,6 +20,7 @@ use std::fmt::{Debug, Formatter};
use std::sync::Arc; use std::sync::Arc;
use futures::future::BoxFuture; use futures::future::BoxFuture;
use futures_util::stream::BoxStream;
use table::metadata::TableId; use table::metadata::TableId;
use table::requests::CreateTableRequest; use table::requests::CreateTableRequest;
use table::TableRef; use table::TableRef;
@@ -56,6 +57,13 @@ pub trait CatalogManager: Send + Sync {
schema: &str, schema: &str,
table_name: &str, table_name: &str,
) -> Result<Option<TableRef>>; ) -> Result<Option<TableRef>>;
/// Returns all tables with a stream by catalog and schema.
async fn tables<'a>(
&'a self,
catalog: &'a str,
schema: &'a str,
) -> BoxStream<'a, Result<TableRef>>;
} }
pub type CatalogManagerRef = Arc<dyn CatalogManager>; pub type CatalogManagerRef = Arc<dyn CatalogManager>;

View File

@@ -17,10 +17,12 @@ use std::collections::hash_map::Entry;
use std::collections::HashMap; use std::collections::HashMap;
use std::sync::{Arc, RwLock, Weak}; use std::sync::{Arc, RwLock, Weak};
use async_stream::{stream, try_stream};
use common_catalog::build_db_string; use common_catalog::build_db_string;
use common_catalog::consts::{ use common_catalog::consts::{
DEFAULT_CATALOG_NAME, DEFAULT_PRIVATE_SCHEMA_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, DEFAULT_CATALOG_NAME, DEFAULT_PRIVATE_SCHEMA_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME,
}; };
use futures_util::stream::BoxStream;
use snafu::OptionExt; use snafu::OptionExt;
use table::TableRef; use table::TableRef;
@@ -39,10 +41,64 @@ pub struct MemoryCatalogManager {
#[async_trait::async_trait] #[async_trait::async_trait]
impl CatalogManager for MemoryCatalogManager { impl CatalogManager for MemoryCatalogManager {
fn as_any(&self) -> &dyn Any {
self
}
async fn catalog_names(&self) -> Result<Vec<String>> {
Ok(self.catalogs.read().unwrap().keys().cloned().collect())
}
async fn schema_names(&self, catalog: &str) -> Result<Vec<String>> {
Ok(self
.catalogs
.read()
.unwrap()
.get(catalog)
.with_context(|| CatalogNotFoundSnafu {
catalog_name: catalog,
})?
.keys()
.cloned()
.collect())
}
async fn table_names(&self, catalog: &str, schema: &str) -> Result<Vec<String>> {
Ok(self
.catalogs
.read()
.unwrap()
.get(catalog)
.with_context(|| CatalogNotFoundSnafu {
catalog_name: catalog,
})?
.get(schema)
.with_context(|| SchemaNotFoundSnafu { catalog, schema })?
.keys()
.cloned()
.collect())
}
async fn catalog_exists(&self, catalog: &str) -> Result<bool> {
self.catalog_exist_sync(catalog)
}
async fn schema_exists(&self, catalog: &str, schema: &str) -> Result<bool> { async fn schema_exists(&self, catalog: &str, schema: &str) -> Result<bool> {
self.schema_exist_sync(catalog, schema) self.schema_exist_sync(catalog, schema)
} }
async fn table_exists(&self, catalog: &str, schema: &str, table: &str) -> Result<bool> {
let catalogs = self.catalogs.read().unwrap();
Ok(catalogs
.get(catalog)
.with_context(|| CatalogNotFoundSnafu {
catalog_name: catalog,
})?
.get(schema)
.with_context(|| SchemaNotFoundSnafu { catalog, schema })?
.contains_key(table))
}
async fn table( async fn table(
&self, &self,
catalog: &str, catalog: &str,
@@ -61,57 +117,35 @@ impl CatalogManager for MemoryCatalogManager {
Ok(result) Ok(result)
} }
async fn catalog_exists(&self, catalog: &str) -> Result<bool> { async fn tables<'a>(
self.catalog_exist_sync(catalog) &'a self,
} catalog: &'a str,
schema: &'a str,
async fn table_exists(&self, catalog: &str, schema: &str, table: &str) -> Result<bool> { ) -> BoxStream<'a, Result<TableRef>> {
let catalogs = self.catalogs.read().unwrap(); let catalogs = self.catalogs.read().unwrap();
Ok(catalogs
.get(catalog) let Some(schemas) = catalogs.get(catalog) else {
.with_context(|| CatalogNotFoundSnafu { return Box::pin(stream!({
yield CatalogNotFoundSnafu {
catalog_name: catalog, catalog_name: catalog,
})?
.get(schema)
.with_context(|| SchemaNotFoundSnafu { catalog, schema })?
.contains_key(table))
} }
.fail();
}));
};
async fn catalog_names(&self) -> Result<Vec<String>> { let Some(tables) = schemas.get(schema) else {
Ok(self.catalogs.read().unwrap().keys().cloned().collect()) return Box::pin(stream!({
yield SchemaNotFoundSnafu { catalog, schema }.fail();
}));
};
let tables = tables.values().cloned().collect::<Vec<_>>();
return Box::pin(try_stream!({
for table in tables {
yield table;
} }
}));
async fn schema_names(&self, catalog_name: &str) -> Result<Vec<String>> {
Ok(self
.catalogs
.read()
.unwrap()
.get(catalog_name)
.with_context(|| CatalogNotFoundSnafu { catalog_name })?
.keys()
.cloned()
.collect())
}
async fn table_names(&self, catalog_name: &str, schema_name: &str) -> Result<Vec<String>> {
Ok(self
.catalogs
.read()
.unwrap()
.get(catalog_name)
.with_context(|| CatalogNotFoundSnafu { catalog_name })?
.get(schema_name)
.with_context(|| SchemaNotFoundSnafu {
catalog: catalog_name,
schema: schema_name,
})?
.keys()
.cloned()
.collect())
}
fn as_any(&self) -> &dyn Any {
self
} }
} }
@@ -307,6 +341,7 @@ pub fn new_memory_catalog_manager() -> Result<Arc<MemoryCatalogManager>> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use common_catalog::consts::*; use common_catalog::consts::*;
use futures_util::TryStreamExt;
use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME}; use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME};
use super::*; use super::*;
@@ -331,8 +366,18 @@ mod tests {
NUMBERS_TABLE_NAME, NUMBERS_TABLE_NAME,
) )
.await .await
.unwrap()
.unwrap(); .unwrap();
let _ = table.unwrap(); let stream = catalog_list
.tables(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME)
.await;
let tables = stream.try_collect::<Vec<_>>().await.unwrap();
assert_eq!(tables.len(), 1);
assert_eq!(
table.table_info().table_id(),
tables[0].table_info().table_id()
);
assert!(catalog_list assert!(catalog_list
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "not_exists") .table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "not_exists")
.await .await

View File

@@ -32,4 +32,6 @@ lazy_static! {
register_histogram!("greptime_catalog_kv_get_remote", "catalog kv get remote").unwrap(); register_histogram!("greptime_catalog_kv_get_remote", "catalog kv get remote").unwrap();
pub static ref METRIC_CATALOG_KV_GET: Histogram = pub static ref METRIC_CATALOG_KV_GET: Histogram =
register_histogram!("greptime_catalog_kv_get", "catalog kv get").unwrap(); register_histogram!("greptime_catalog_kv_get", "catalog kv get").unwrap();
pub static ref METRIC_CATALOG_KV_BATCH_GET: Histogram =
register_histogram!("greptime_catalog_kv_batch_get", "catalog kv batch get").unwrap();
} }

View File

@@ -9,6 +9,7 @@ testing = []
[dependencies] [dependencies]
api.workspace = true api.workspace = true
arc-swap = "1.6"
arrow-flight.workspace = true arrow-flight.workspace = true
async-stream.workspace = true async-stream.workspace = true
async-trait.workspace = true async-trait.workspace = true
@@ -35,8 +36,8 @@ prost.workspace = true
rand.workspace = true rand.workspace = true
session.workspace = true session.workspace = true
snafu.workspace = true snafu.workspace = true
tokio-stream = { workspace = true, features = ["net"] }
tokio.workspace = true tokio.workspace = true
tokio-stream = { workspace = true, features = ["net"] }
tonic.workspace = true tonic.workspace = true
[dev-dependencies] [dev-dependencies]

View File

@@ -37,7 +37,7 @@ async fn run() {
catalog_name: "greptime".to_string(), catalog_name: "greptime".to_string(),
schema_name: "public".to_string(), schema_name: "public".to_string(),
table_name: "test_logical_dist_exec".to_string(), table_name: "test_logical_dist_exec".to_string(),
desc: "".to_string(), desc: String::default(),
column_defs: vec![ column_defs: vec![
ColumnDef { ColumnDef {
name: "timestamp".to_string(), name: "timestamp".to_string(),

View File

@@ -122,7 +122,7 @@ impl Client {
self.inner.set_peers(urls); self.inner.set_peers(urls);
} }
fn find_channel(&self) -> Result<(String, Channel)> { pub fn find_channel(&self) -> Result<(String, Channel)> {
let addr = self let addr = self
.inner .inner
.get_peer() .get_peer()

View File

@@ -47,6 +47,9 @@ pub struct Database {
// The dbname follows naming rule as out mysql, postgres and http // The dbname follows naming rule as out mysql, postgres and http
// protocol. The server treat dbname in priority of catalog/schema. // protocol. The server treat dbname in priority of catalog/schema.
dbname: String, dbname: String,
// The time zone indicates the time zone where the user is located.
// Some queries need to be aware of the user's time zone to perform some specific actions.
timezone: String,
client: Client, client: Client,
ctx: FlightContext, ctx: FlightContext,
@@ -58,7 +61,8 @@ impl Database {
Self { Self {
catalog: catalog.into(), catalog: catalog.into(),
schema: schema.into(), schema: schema.into(),
dbname: "".to_string(), dbname: String::default(),
timezone: String::default(),
client, client,
ctx: FlightContext::default(), ctx: FlightContext::default(),
} }
@@ -73,8 +77,9 @@ impl Database {
/// environment /// environment
pub fn new_with_dbname(dbname: impl Into<String>, client: Client) -> Self { pub fn new_with_dbname(dbname: impl Into<String>, client: Client) -> Self {
Self { Self {
catalog: "".to_string(), catalog: String::default(),
schema: "".to_string(), schema: String::default(),
timezone: String::default(),
dbname: dbname.into(), dbname: dbname.into(),
client, client,
ctx: FlightContext::default(), ctx: FlightContext::default(),
@@ -105,6 +110,14 @@ impl Database {
self.dbname = dbname.into(); self.dbname = dbname.into();
} }
pub fn timezone(&self) -> &String {
&self.timezone
}
pub fn set_timezone(&mut self, timezone: impl Into<String>) {
self.timezone = timezone.into();
}
pub fn set_auth(&mut self, auth: AuthScheme) { pub fn set_auth(&mut self, auth: AuthScheme) {
self.ctx.auth_header = Some(AuthHeader { self.ctx.auth_header = Some(AuthHeader {
auth_scheme: Some(auth), auth_scheme: Some(auth),
@@ -161,6 +174,7 @@ impl Database {
schema: self.schema.clone(), schema: self.schema.clone(),
authorization: self.ctx.auth_header.clone(), authorization: self.ctx.auth_header.clone(),
dbname: self.dbname.clone(), dbname: self.dbname.clone(),
timezone: self.timezone.clone(),
// TODO(Taylor-lagrange): add client grpc tracing // TODO(Taylor-lagrange): add client grpc tracing
tracing_context: W3cTrace::new(), tracing_context: W3cTrace::new(),
}), }),
@@ -295,30 +309,36 @@ impl Database {
); );
Ok(Output::AffectedRows(rows)) Ok(Output::AffectedRows(rows))
} }
FlightMessage::Recordbatch(_) => IllegalFlightMessagesSnafu { FlightMessage::Recordbatch(_) | FlightMessage::Metrics(_) => {
reason: "The first flight message cannot be a RecordBatch message", IllegalFlightMessagesSnafu {
reason: "The first flight message cannot be a RecordBatch or Metrics message",
}
.fail()
} }
.fail(),
FlightMessage::Schema(schema) => { FlightMessage::Schema(schema) => {
let stream = Box::pin(stream!({ let stream = Box::pin(stream!({
while let Some(flight_message) = flight_message_stream.next().await { while let Some(flight_message) = flight_message_stream.next().await {
let flight_message = flight_message let flight_message = flight_message
.map_err(BoxedError::new) .map_err(BoxedError::new)
.context(ExternalSnafu)?; .context(ExternalSnafu)?;
let FlightMessage::Recordbatch(record_batch) = flight_message else { match flight_message {
yield IllegalFlightMessagesSnafu {reason: "A Schema message must be succeeded exclusively by a set of RecordBatch messages"} FlightMessage::Recordbatch(record_batch) => yield Ok(record_batch),
FlightMessage::Metrics(_) => {}
FlightMessage::AffectedRows(_) | FlightMessage::Schema(_) => {
yield IllegalFlightMessagesSnafu {reason: format!("A Schema message must be succeeded exclusively by a set of RecordBatch messages, flight_message: {:?}", flight_message)}
.fail() .fail()
.map_err(BoxedError::new) .map_err(BoxedError::new)
.context(ExternalSnafu); .context(ExternalSnafu);
break; break;
}; }
yield Ok(record_batch); }
} }
})); }));
let record_batch_stream = RecordBatchStreamWrapper { let record_batch_stream = RecordBatchStreamWrapper {
schema, schema,
stream, stream,
output_ordering: None, output_ordering: None,
metrics: Default::default(),
}; };
Ok(Output::Stream(Box::pin(record_batch_stream))) Ok(Output::Stream(Box::pin(record_batch_stream)))
} }

View File

@@ -12,8 +12,11 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use std::sync::Arc;
use api::v1::region::{QueryRequest, RegionRequest, RegionResponse}; use api::v1::region::{QueryRequest, RegionRequest, RegionResponse};
use api::v1::ResponseHeader; use api::v1::ResponseHeader;
use arc_swap::ArcSwapOption;
use arrow_flight::Ticket; use arrow_flight::Ticket;
use async_stream::stream; use async_stream::stream;
use async_trait::async_trait; use async_trait::async_trait;
@@ -25,6 +28,7 @@ use common_meta::error::{self as meta_error, Result as MetaResult};
use common_recordbatch::error::ExternalSnafu; use common_recordbatch::error::ExternalSnafu;
use common_recordbatch::{RecordBatchStreamWrapper, SendableRecordBatchStream}; use common_recordbatch::{RecordBatchStreamWrapper, SendableRecordBatchStream};
use common_telemetry::error; use common_telemetry::error;
use common_telemetry::tracing_context::TracingContext;
use prost::Message; use prost::Message;
use snafu::{location, Location, OptionExt, ResultExt}; use snafu::{location, Location, OptionExt, ResultExt};
use tokio_stream::StreamExt; use tokio_stream::StreamExt;
@@ -119,12 +123,27 @@ impl RegionRequester {
.fail(); .fail();
}; };
let metrics_str = Arc::new(ArcSwapOption::from(None));
let ref_str = metrics_str.clone();
let tracing_context = TracingContext::from_current_span();
let stream = Box::pin(stream!({ let stream = Box::pin(stream!({
let _span = tracing_context.attach(common_telemetry::tracing::info_span!(
"poll_flight_data_stream"
));
while let Some(flight_message) = flight_message_stream.next().await { while let Some(flight_message) = flight_message_stream.next().await {
let flight_message = flight_message let flight_message = flight_message
.map_err(BoxedError::new) .map_err(BoxedError::new)
.context(ExternalSnafu)?; .context(ExternalSnafu)?;
let FlightMessage::Recordbatch(record_batch) = flight_message else {
match flight_message {
FlightMessage::Recordbatch(record_batch) => yield Ok(record_batch),
FlightMessage::Metrics(s) => {
ref_str.swap(Some(Arc::new(s)));
break;
}
_ => {
yield IllegalFlightMessagesSnafu { yield IllegalFlightMessagesSnafu {
reason: "A Schema message must be succeeded exclusively by a set of RecordBatch messages" reason: "A Schema message must be succeeded exclusively by a set of RecordBatch messages"
} }
@@ -132,14 +151,15 @@ impl RegionRequester {
.map_err(BoxedError::new) .map_err(BoxedError::new)
.context(ExternalSnafu); .context(ExternalSnafu);
break; break;
}; }
yield Ok(record_batch); }
} }
})); }));
let record_batch_stream = RecordBatchStreamWrapper { let record_batch_stream = RecordBatchStreamWrapper {
schema, schema,
stream, stream,
output_ordering: None, output_ordering: None,
metrics: metrics_str,
}; };
Ok(Box::pin(record_batch_stream)) Ok(Box::pin(record_batch_stream))
} }
@@ -230,7 +250,7 @@ mod test {
let result = check_response_header(Some(ResponseHeader { let result = check_response_header(Some(ResponseHeader {
status: Some(PbStatus { status: Some(PbStatus {
status_code: StatusCode::Success as u32, status_code: StatusCode::Success as u32,
err_msg: "".to_string(), err_msg: String::default(),
}), }),
})); }));
assert!(result.is_ok()); assert!(result.is_ok());
@@ -238,7 +258,7 @@ mod test {
let result = check_response_header(Some(ResponseHeader { let result = check_response_header(Some(ResponseHeader {
status: Some(PbStatus { status: Some(PbStatus {
status_code: u32::MAX, status_code: u32::MAX,
err_msg: "".to_string(), err_msg: String::default(),
}), }),
})); }));
assert!(matches!( assert!(matches!(

View File

@@ -18,7 +18,7 @@ async-trait.workspace = true
auth.workspace = true auth.workspace = true
catalog.workspace = true catalog.workspace = true
chrono.workspace = true chrono.workspace = true
clap = { version = "4.4", features = ["derive"] } clap.workspace = true
client.workspace = true client.workspace = true
common-base.workspace = true common-base.workspace = true
common-catalog.workspace = true common-catalog.workspace = true
@@ -29,10 +29,12 @@ common-meta.workspace = true
common-procedure.workspace = true common-procedure.workspace = true
common-query.workspace = true common-query.workspace = true
common-recordbatch.workspace = true common-recordbatch.workspace = true
common-runtime.workspace = true
common-telemetry = { workspace = true, features = [ common-telemetry = { workspace = true, features = [
"deadlock_detection", "deadlock_detection",
] } ] }
common-time.workspace = true common-time.workspace = true
common-wal.workspace = true
config = "0.13" config = "0.13"
datanode.workspace = true datanode.workspace = true
datatypes.workspace = true datatypes.workspace = true

View File

@@ -13,5 +13,5 @@
// limitations under the License. // limitations under the License.
fn main() { fn main() {
common_version::setup_git_versions(); common_version::setup_build_info();
} }

View File

@@ -156,6 +156,7 @@ fn create_region_routes(regions: Vec<RegionNumber>) -> Vec<RegionRoute> {
}), }),
follower_peers: vec![], follower_peers: vec![],
leader_status: None, leader_status: None,
leader_down_since: None,
}); });
} }

View File

@@ -58,8 +58,8 @@ pub struct ExportCommand {
#[clap(long)] #[clap(long)]
output_dir: String, output_dir: String,
/// The name of the catalog to export. Default to "greptime-*"". /// The name of the catalog to export.
#[clap(long, default_value = "")] #[clap(long, default_value = "greptime-*")]
database: String, database: String,
/// Parallelism of the export. /// Parallelism of the export.

View File

@@ -16,7 +16,9 @@ use std::path::PathBuf;
use std::sync::Arc; use std::sync::Arc;
use std::time::Instant; use std::time::Instant;
use catalog::kvbackend::{CachedMetaKvBackend, KvBackendCatalogManager}; use catalog::kvbackend::{
CachedMetaKvBackend, CachedMetaKvBackendBuilder, KvBackendCatalogManager,
};
use client::{Client, Database, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME}; use client::{Client, Database, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_base::Plugins; use common_base::Plugins;
use common_error::ext::ErrorExt; use common_error::ext::ErrorExt;
@@ -157,19 +159,20 @@ impl Repl {
let start = Instant::now(); let start = Instant::now();
let output = if let Some(query_engine) = &self.query_engine { let output = if let Some(query_engine) = &self.query_engine {
let stmt = QueryLanguageParser::parse_sql(&sql)
.with_context(|_| ParseSqlSnafu { sql: sql.clone() })?;
let query_ctx = QueryContext::with(self.database.catalog(), self.database.schema()); let query_ctx = QueryContext::with(self.database.catalog(), self.database.schema());
let stmt = QueryLanguageParser::parse_sql(&sql, &query_ctx)
.with_context(|_| ParseSqlSnafu { sql: sql.clone() })?;
let plan = query_engine let plan = query_engine
.planner() .planner()
.plan(stmt, query_ctx) .plan(stmt, query_ctx.clone())
.await .await
.context(PlanStatementSnafu)?; .context(PlanStatementSnafu)?;
let LogicalPlan::DfPlan(plan) = let LogicalPlan::DfPlan(plan) = query_engine
query_engine.optimize(&plan).context(PlanStatementSnafu)?; .optimize(&query_engine.engine_context(query_ctx), &plan)
.context(PlanStatementSnafu)?;
let plan = DFLogicalSubstraitConvertor {} let plan = DFLogicalSubstraitConvertor {}
.encode(&plan) .encode(&plan)
@@ -247,7 +250,8 @@ async fn create_query_engine(meta_addr: &str) -> Result<DatafusionQueryEngine> {
.context(StartMetaClientSnafu)?; .context(StartMetaClientSnafu)?;
let meta_client = Arc::new(meta_client); let meta_client = Arc::new(meta_client);
let cached_meta_backend = Arc::new(CachedMetaKvBackend::new(meta_client.clone())); let cached_meta_backend =
Arc::new(CachedMetaKvBackendBuilder::new(meta_client.clone()).build());
let catalog_list = let catalog_list =
KvBackendCatalogManager::new(cached_meta_backend.clone(), cached_meta_backend); KvBackendCatalogManager::new(cached_meta_backend.clone(), cached_meta_backend);

View File

@@ -18,10 +18,11 @@ use std::time::Duration;
use async_trait::async_trait; use async_trait::async_trait;
use catalog::kvbackend::MetaKvBackend; use catalog::kvbackend::MetaKvBackend;
use clap::Parser; use clap::Parser;
use common_config::WalConfig;
use common_telemetry::{info, logging}; use common_telemetry::{info, logging};
use common_wal::config::DatanodeWalConfig;
use datanode::config::DatanodeOptions; use datanode::config::DatanodeOptions;
use datanode::datanode::{Datanode, DatanodeBuilder}; use datanode::datanode::{Datanode, DatanodeBuilder};
use datanode::service::DatanodeServiceBuilder;
use meta_client::MetaClientOptions; use meta_client::MetaClientOptions;
use servers::Mode; use servers::Mode;
use snafu::{OptionExt, ResultExt}; use snafu::{OptionExt, ResultExt};
@@ -35,9 +36,13 @@ pub struct Instance {
} }
impl Instance { impl Instance {
fn new(datanode: Datanode) -> Self { pub fn new(datanode: Datanode) -> Self {
Self { datanode } Self { datanode }
} }
pub fn datanode_mut(&mut self) -> &mut Datanode {
&mut self.datanode
}
} }
#[async_trait] #[async_trait]
@@ -169,7 +174,7 @@ impl StartCommand {
// `wal_dir` only affects raft-engine config. // `wal_dir` only affects raft-engine config.
if let Some(wal_dir) = &self.wal_dir if let Some(wal_dir) = &self.wal_dir
&& let WalConfig::RaftEngine(raft_engine_config) = &mut opts.wal && let DatanodeWalConfig::RaftEngine(raft_engine_config) = &mut opts.wal
{ {
if raft_engine_config if raft_engine_config
.dir .dir
@@ -219,15 +224,20 @@ impl StartCommand {
client: Arc::new(meta_client.clone()), client: Arc::new(meta_client.clone()),
}); });
let datanode = DatanodeBuilder::new(opts, plugins) let mut datanode = DatanodeBuilder::new(opts.clone(), plugins)
.with_meta_client(meta_client) .with_meta_client(meta_client)
.with_kv_backend(meta_backend) .with_kv_backend(meta_backend)
.enable_region_server_service()
.enable_http_service()
.build() .build()
.await .await
.context(StartDatanodeSnafu)?; .context(StartDatanodeSnafu)?;
let services = DatanodeServiceBuilder::new(&opts)
.with_default_grpc_server(&datanode.region_server())
.enable_http_service()
.build()
.context(StartDatanodeSnafu)?;
datanode.setup_services(services);
Ok(Instance::new(datanode)) Ok(Instance::new(datanode))
} }
} }
@@ -306,7 +316,7 @@ mod tests {
assert_eq!("127.0.0.1:3001".to_string(), options.rpc_addr); assert_eq!("127.0.0.1:3001".to_string(), options.rpc_addr);
assert_eq!(Some(42), options.node_id); assert_eq!(Some(42), options.node_id);
let WalConfig::RaftEngine(raft_engine_config) = options.wal else { let DatanodeWalConfig::RaftEngine(raft_engine_config) = options.wal else {
unreachable!() unreachable!()
}; };
assert_eq!("/other/wal", raft_engine_config.dir.unwrap()); assert_eq!("/other/wal", raft_engine_config.dir.unwrap());
@@ -494,7 +504,7 @@ mod tests {
}; };
// Should be read from env, env > default values. // Should be read from env, env > default values.
let WalConfig::RaftEngine(raft_engine_config) = opts.wal else { let DatanodeWalConfig::RaftEngine(raft_engine_config) = opts.wal else {
unreachable!() unreachable!()
}; };
assert_eq!(raft_engine_config.read_batch_size, 100); assert_eq!(raft_engine_config.read_batch_size, 100);

View File

@@ -249,6 +249,12 @@ pub enum Error {
source: BoxedError, source: BoxedError,
location: Location, location: Location,
}, },
#[snafu(display("Failed to build runtime"))]
BuildRuntime {
location: Location,
source: common_runtime::error::Error,
},
} }
pub type Result<T> = std::result::Result<T, Error>; pub type Result<T> = std::result::Result<T, Error>;
@@ -298,6 +304,8 @@ impl ErrorExt for Error {
Error::SerdeJson { .. } | Error::FileIo { .. } => StatusCode::Unexpected, Error::SerdeJson { .. } | Error::FileIo { .. } => StatusCode::Unexpected,
Error::Other { source, .. } => source.status_code(), Error::Other { source, .. } => source.status_code(),
Error::BuildRuntime { source, .. } => source.status_code(),
} }
} }

View File

@@ -16,7 +16,7 @@ use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
use async_trait::async_trait; use async_trait::async_trait;
use catalog::kvbackend::CachedMetaKvBackend; use catalog::kvbackend::CachedMetaKvBackendBuilder;
use clap::Parser; use clap::Parser;
use client::client_manager::DatanodeClients; use client::client_manager::DatanodeClients;
use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler; use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
@@ -46,6 +46,10 @@ impl Instance {
fn new(frontend: FeInstance) -> Self { fn new(frontend: FeInstance) -> Self {
Self { frontend } Self { frontend }
} }
pub fn mut_inner(&mut self) -> &mut FeInstance {
&mut self.frontend
}
} }
#[async_trait] #[async_trait]
@@ -224,15 +228,27 @@ impl StartCommand {
let meta_client_options = opts.meta_client.as_ref().context(MissingConfigSnafu { let meta_client_options = opts.meta_client.as_ref().context(MissingConfigSnafu {
msg: "'meta_client'", msg: "'meta_client'",
})?; })?;
let cache_max_capacity = meta_client_options.metadata_cache_max_capacity;
let cache_ttl = meta_client_options.metadata_cache_ttl;
let cache_tti = meta_client_options.metadata_cache_tti;
let meta_client = FeInstance::create_meta_client(meta_client_options) let meta_client = FeInstance::create_meta_client(meta_client_options)
.await .await
.context(StartFrontendSnafu)?; .context(StartFrontendSnafu)?;
let meta_backend = Arc::new(CachedMetaKvBackend::new(meta_client.clone())); let cached_meta_backend = CachedMetaKvBackendBuilder::new(meta_client.clone())
.cache_max_capacity(cache_max_capacity)
.cache_ttl(cache_ttl)
.cache_tti(cache_tti)
.build();
let cached_meta_backend = Arc::new(cached_meta_backend);
let executor = HandlerGroupExecutor::new(vec![ let executor = HandlerGroupExecutor::new(vec![
Arc::new(ParseMailboxMessageHandler), Arc::new(ParseMailboxMessageHandler),
Arc::new(InvalidateTableCacheHandler::new(meta_backend.clone())), Arc::new(InvalidateTableCacheHandler::new(
cached_meta_backend.clone(),
)),
]); ]);
let heartbeat_task = HeartbeatTask::new( let heartbeat_task = HeartbeatTask::new(
@@ -242,24 +258,22 @@ impl StartCommand {
); );
let mut instance = FrontendBuilder::new( let mut instance = FrontendBuilder::new(
meta_backend.clone(), cached_meta_backend.clone(),
Arc::new(DatanodeClients::default()), Arc::new(DatanodeClients::default()),
meta_client, meta_client,
) )
.with_cache_invalidator(meta_backend) .with_cache_invalidator(cached_meta_backend)
.with_plugin(plugins.clone()) .with_plugin(plugins.clone())
.with_heartbeat_task(heartbeat_task) .with_heartbeat_task(heartbeat_task)
.try_build() .try_build()
.await .await
.context(StartFrontendSnafu)?; .context(StartFrontendSnafu)?;
let servers = Services::new(plugins) let servers = Services::new(opts.clone(), Arc::new(instance.clone()), plugins)
.build(opts.clone(), Arc::new(instance.clone())) .build()
.await
.context(StartFrontendSnafu)?; .context(StartFrontendSnafu)?;
instance instance
.build_servers(opts, servers) .build_servers(opts, servers)
.await
.context(StartFrontendSnafu)?; .context(StartFrontendSnafu)?;
Ok(Instance::new(instance)) Ok(Instance::new(instance))

View File

@@ -35,6 +35,11 @@ lazy_static::lazy_static! {
pub trait App { pub trait App {
fn name(&self) -> &str; fn name(&self) -> &str;
/// A hook for implementor to make something happened before actual startup. Defaults to no-op.
fn pre_start(&mut self) -> error::Result<()> {
Ok(())
}
async fn start(&mut self) -> error::Result<()>; async fn start(&mut self) -> error::Result<()>;
async fn stop(&self) -> error::Result<()>; async fn stop(&self) -> error::Result<()>;
@@ -43,6 +48,8 @@ pub trait App {
pub async fn start_app(mut app: Box<dyn App>) -> error::Result<()> { pub async fn start_app(mut app: Box<dyn App>) -> error::Result<()> {
let name = app.name().to_string(); let name = app.name().to_string();
app.pre_start()?;
tokio::select! { tokio::select! {
result = app.start() => { result = app.start() => {
if let Err(err) = result { if let Err(err) = result {

View File

@@ -14,8 +14,8 @@
use clap::ArgMatches; use clap::ArgMatches;
use common_config::KvBackendConfig; use common_config::KvBackendConfig;
use common_meta::wal::WalConfig as MetaSrvWalConfig;
use common_telemetry::logging::{LoggingOptions, TracingOptions}; use common_telemetry::logging::{LoggingOptions, TracingOptions};
use common_wal::config::MetaSrvWalConfig;
use config::{Config, Environment, File, FileFormat}; use config::{Config, Environment, File, FileFormat};
use datanode::config::{DatanodeOptions, ProcedureConfig}; use datanode::config::{DatanodeOptions, ProcedureConfig};
use frontend::error::{Result as FeResult, TomlFormatSnafu}; use frontend::error::{Result as FeResult, TomlFormatSnafu};
@@ -173,8 +173,8 @@ impl Options {
mod tests { mod tests {
use std::io::Write; use std::io::Write;
use common_config::WalConfig;
use common_test_util::temp_dir::create_named_temp_file; use common_test_util::temp_dir::create_named_temp_file;
use common_wal::config::DatanodeWalConfig;
use datanode::config::{DatanodeOptions, ObjectStoreConfig}; use datanode::config::{DatanodeOptions, ObjectStoreConfig};
use super::*; use super::*;
@@ -281,7 +281,7 @@ mod tests {
); );
// Should be the values from config file, not environment variables. // Should be the values from config file, not environment variables.
let WalConfig::RaftEngine(raft_engine_config) = opts.wal else { let DatanodeWalConfig::RaftEngine(raft_engine_config) = opts.wal else {
unreachable!() unreachable!()
}; };
assert_eq!(raft_engine_config.dir.unwrap(), "/tmp/greptimedb/wal"); assert_eq!(raft_engine_config.dir.unwrap(), "/tmp/greptimedb/wal");

View File

@@ -18,7 +18,6 @@ use std::{fs, path};
use async_trait::async_trait; use async_trait::async_trait;
use clap::Parser; use clap::Parser;
use common_catalog::consts::MIN_USER_TABLE_ID; use common_catalog::consts::MIN_USER_TABLE_ID;
use common_config::wal::StandaloneWalConfig;
use common_config::{metadata_store_dir, KvBackendConfig}; use common_config::{metadata_store_dir, KvBackendConfig};
use common_meta::cache_invalidator::DummyCacheInvalidator; use common_meta::cache_invalidator::DummyCacheInvalidator;
use common_meta::datanode_manager::DatanodeManagerRef; use common_meta::datanode_manager::DatanodeManagerRef;
@@ -29,11 +28,12 @@ use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
use common_meta::kv_backend::KvBackendRef; use common_meta::kv_backend::KvBackendRef;
use common_meta::region_keeper::MemoryRegionKeeper; use common_meta::region_keeper::MemoryRegionKeeper;
use common_meta::sequence::SequenceBuilder; use common_meta::sequence::SequenceBuilder;
use common_meta::wal::{WalOptionsAllocator, WalOptionsAllocatorRef}; use common_meta::wal_options_allocator::{WalOptionsAllocator, WalOptionsAllocatorRef};
use common_procedure::ProcedureManagerRef; use common_procedure::ProcedureManagerRef;
use common_telemetry::info; use common_telemetry::info;
use common_telemetry::logging::LoggingOptions; use common_telemetry::logging::LoggingOptions;
use common_time::timezone::set_default_timezone; use common_time::timezone::set_default_timezone;
use common_wal::config::StandaloneWalConfig;
use datanode::config::{DatanodeOptions, ProcedureConfig, RegionEngineConfig, StorageConfig}; use datanode::config::{DatanodeOptions, ProcedureConfig, RegionEngineConfig, StorageConfig};
use datanode::datanode::{Datanode, DatanodeBuilder}; use datanode::datanode::{Datanode, DatanodeBuilder};
use file_engine::config::EngineConfig as FileEngineConfig; use file_engine::config::EngineConfig as FileEngineConfig;
@@ -213,6 +213,10 @@ impl App for Instance {
.await .await
.context(StartWalOptionsAllocatorSnafu)?; .context(StartWalOptionsAllocatorSnafu)?;
plugins::start_frontend_plugins(self.frontend.plugins().clone())
.await
.context(StartFrontendSnafu)?;
self.frontend.start().await.context(StartFrontendSnafu)?; self.frontend.start().await.context(StartFrontendSnafu)?;
Ok(()) Ok(())
} }
@@ -368,20 +372,18 @@ impl StartCommand {
#[allow(unused_variables)] #[allow(unused_variables)]
#[allow(clippy::diverging_sub_expression)] #[allow(clippy::diverging_sub_expression)]
async fn build(self, opts: MixOptions) -> Result<Instance> { async fn build(self, opts: MixOptions) -> Result<Instance> {
let mut fe_opts = opts.frontend.clone(); info!("Standalone start command: {:#?}", self);
info!("Building standalone instance with {opts:#?}");
let mut fe_opts = opts.frontend;
#[allow(clippy::unnecessary_mut_passed)] #[allow(clippy::unnecessary_mut_passed)]
let fe_plugins = plugins::setup_frontend_plugins(&mut fe_opts) // mut ref is MUST, DO NOT change it let fe_plugins = plugins::setup_frontend_plugins(&mut fe_opts) // mut ref is MUST, DO NOT change it
.await .await
.context(StartFrontendSnafu)?; .context(StartFrontendSnafu)?;
let dn_opts = opts.datanode.clone(); let dn_opts = opts.datanode;
info!("Standalone start command: {:#?}", self); set_default_timezone(fe_opts.default_timezone.as_deref()).context(InitTimezoneSnafu)?;
info!("Building standalone instance with {opts:#?}");
set_default_timezone(opts.frontend.default_timezone.as_deref())
.context(InitTimezoneSnafu)?;
// Ensure the data_home directory exists. // Ensure the data_home directory exists.
fs::create_dir_all(path::Path::new(&opts.data_home)).context(CreateDirSnafu { fs::create_dir_all(path::Path::new(&opts.data_home)).context(CreateDirSnafu {
@@ -437,13 +439,11 @@ impl StartCommand {
.await .await
.context(StartFrontendSnafu)?; .context(StartFrontendSnafu)?;
let servers = Services::new(fe_plugins) let servers = Services::new(fe_opts.clone(), Arc::new(frontend.clone()), fe_plugins)
.build(opts.clone(), Arc::new(frontend.clone())) .build()
.await
.context(StartFrontendSnafu)?; .context(StartFrontendSnafu)?;
frontend frontend
.build_servers(opts, servers) .build_servers(fe_opts, servers)
.await
.context(StartFrontendSnafu)?; .context(StartFrontendSnafu)?;
Ok(Instance { Ok(Instance {
@@ -497,8 +497,8 @@ mod tests {
use auth::{Identity, Password, UserProviderRef}; use auth::{Identity, Password, UserProviderRef};
use common_base::readable_size::ReadableSize; use common_base::readable_size::ReadableSize;
use common_config::WalConfig;
use common_test_util::temp_dir::create_named_temp_file; use common_test_util::temp_dir::create_named_temp_file;
use common_wal::config::DatanodeWalConfig;
use datanode::config::{FileConfig, GcsConfig}; use datanode::config::{FileConfig, GcsConfig};
use servers::Mode; use servers::Mode;
@@ -605,7 +605,7 @@ mod tests {
assert_eq!(None, fe_opts.mysql.reject_no_database); assert_eq!(None, fe_opts.mysql.reject_no_database);
assert!(fe_opts.influxdb.enable); assert!(fe_opts.influxdb.enable);
let WalConfig::RaftEngine(raft_engine_config) = dn_opts.wal else { let DatanodeWalConfig::RaftEngine(raft_engine_config) = dn_opts.wal else {
unreachable!() unreachable!()
}; };
assert_eq!("/tmp/greptimedb/test/wal", raft_engine_config.dir.unwrap()); assert_eq!("/tmp/greptimedb/test/wal", raft_engine_config.dir.unwrap());

View File

@@ -216,7 +216,7 @@ mod tests {
let bytes = StringBytes::from(hello.clone()); let bytes = StringBytes::from(hello.clone());
assert_eq!(bytes.len(), hello.len()); assert_eq!(bytes.len(), hello.len());
let zero = "".to_string(); let zero = String::default();
let bytes = StringBytes::from(zero); let bytes = StringBytes::from(zero);
assert!(bytes.is_empty()); assert!(bytes.is_empty());
} }

View File

@@ -33,7 +33,7 @@ pub const GIB: u64 = MIB * BINARY_DATA_MAGNITUDE;
pub const TIB: u64 = GIB * BINARY_DATA_MAGNITUDE; pub const TIB: u64 = GIB * BINARY_DATA_MAGNITUDE;
pub const PIB: u64 = TIB * BINARY_DATA_MAGNITUDE; pub const PIB: u64 = TIB * BINARY_DATA_MAGNITUDE;
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd)] #[derive(Clone, Copy, PartialEq, Eq, Ord, PartialOrd)]
pub struct ReadableSize(pub u64); pub struct ReadableSize(pub u64);
impl ReadableSize { impl ReadableSize {

View File

@@ -82,6 +82,10 @@ pub const INFORMATION_SCHEMA_GLOBAL_STATUS_TABLE_ID: u32 = 25;
pub const INFORMATION_SCHEMA_SESSION_STATUS_TABLE_ID: u32 = 26; pub const INFORMATION_SCHEMA_SESSION_STATUS_TABLE_ID: u32 = 26;
/// id for information_schema.RUNTIME_METRICS /// id for information_schema.RUNTIME_METRICS
pub const INFORMATION_SCHEMA_RUNTIME_METRICS_TABLE_ID: u32 = 27; pub const INFORMATION_SCHEMA_RUNTIME_METRICS_TABLE_ID: u32 = 27;
/// id for information_schema.PARTITIONS
pub const INFORMATION_SCHEMA_PARTITIONS_TABLE_ID: u32 = 28;
/// id for information_schema.REGION_PEERS
pub const INFORMATION_SCHEMA_REGION_PEERS_TABLE_ID: u32 = 29;
/// ----- End of information_schema tables ----- /// ----- End of information_schema tables -----
pub const MITO_ENGINE: &str = "mito"; pub const MITO_ENGINE: &str = "mito";

View File

@@ -56,11 +56,22 @@ pub fn build_db_string(catalog: &str, schema: &str) -> String {
/// - if `[<catalog>-]` is provided, we split database name with `-` and use /// - if `[<catalog>-]` is provided, we split database name with `-` and use
/// `<catalog>` and `<schema>`. /// `<catalog>` and `<schema>`.
pub fn parse_catalog_and_schema_from_db_string(db: &str) -> (&str, &str) { pub fn parse_catalog_and_schema_from_db_string(db: &str) -> (&str, &str) {
match parse_optional_catalog_and_schema_from_db_string(db) {
(Some(catalog), schema) => (catalog, schema),
(None, schema) => (DEFAULT_CATALOG_NAME, schema),
}
}
/// Attempt to parse catalog and schema from given database name
///
/// Similar to [`parse_catalog_and_schema_from_db_string`] but returns an optional
/// catalog if it's not provided in the database name.
pub fn parse_optional_catalog_and_schema_from_db_string(db: &str) -> (Option<&str>, &str) {
let parts = db.splitn(2, '-').collect::<Vec<&str>>(); let parts = db.splitn(2, '-').collect::<Vec<&str>>();
if parts.len() == 2 { if parts.len() == 2 {
(parts[0], parts[1]) (Some(parts[0]), parts[1])
} else { } else {
(DEFAULT_CATALOG_NAME, db) (None, db)
} }
} }
@@ -90,5 +101,20 @@ mod tests {
("catalog", "schema1-schema2"), ("catalog", "schema1-schema2"),
parse_catalog_and_schema_from_db_string("catalog-schema1-schema2") parse_catalog_and_schema_from_db_string("catalog-schema1-schema2")
); );
assert_eq!(
(None, "fullschema"),
parse_optional_catalog_and_schema_from_db_string("fullschema")
);
assert_eq!(
(Some("catalog"), "schema"),
parse_optional_catalog_and_schema_from_db_string("catalog-schema")
);
assert_eq!(
(Some("catalog"), "schema1-schema2"),
parse_optional_catalog_and_schema_from_db_string("catalog-schema1-schema2")
);
} }
} }

View File

@@ -7,8 +7,6 @@ license.workspace = true
[dependencies] [dependencies]
common-base.workspace = true common-base.workspace = true
humantime-serde.workspace = true humantime-serde.workspace = true
rskafka.workspace = true num_cpus.workspace = true
serde.workspace = true serde.workspace = true
serde_json.workspace = true sysinfo.workspace = true
serde_with = "3"
toml.workspace = true

View File

@@ -12,13 +12,11 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
pub mod wal; pub mod utils;
use common_base::readable_size::ReadableSize; use common_base::readable_size::ReadableSize;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
pub use crate::wal::{KafkaWalOptions, WalConfig, WalOptions, WAL_OPTIONS_KEY};
pub fn metadata_store_dir(store_dir: &str) -> String { pub fn metadata_store_dir(store_dir: &str) -> String {
format!("{store_dir}/metadata") format!("{store_dir}/metadata")
} }

View File

@@ -0,0 +1,55 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use common_base::readable_size::ReadableSize;
use sysinfo::System;
/// Get the CPU core number of system, aware of cgroups.
pub fn get_cpus() -> usize {
// This function will check cgroups
num_cpus::get()
}
/// Get the total memory of the system.
/// If `cgroup_limits` is enabled, it will also check it.
pub fn get_sys_total_memory() -> Option<ReadableSize> {
if sysinfo::IS_SUPPORTED_SYSTEM {
let mut sys_info = System::new();
sys_info.refresh_memory();
let mut total_memory = sys_info.total_memory();
// Compare with cgroups memory limit, use smaller values
// This method is only implemented for Linux. It always returns None for all other systems.
if let Some(cgroup_limits) = sys_info.cgroup_limits() {
total_memory = total_memory.min(cgroup_limits.total_memory)
}
Some(ReadableSize(total_memory))
} else {
None
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_get_cpus() {
assert!(get_cpus() > 0);
}
#[test]
fn test_get_sys_total_memory() {
assert!(get_sys_total_memory().unwrap() > ReadableSize::mb(0));
}
}

View File

@@ -1,154 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
pub mod kafka;
pub mod raft_engine;
use serde::{Deserialize, Serialize};
use serde_with::with_prefix;
pub use crate::wal::kafka::{KafkaConfig, KafkaOptions as KafkaWalOptions, StandaloneKafkaConfig};
pub use crate::wal::raft_engine::RaftEngineConfig;
/// An encoded wal options will be wrapped into a (WAL_OPTIONS_KEY, encoded wal options) key-value pair
/// and inserted into the options of a `RegionCreateRequest`.
pub const WAL_OPTIONS_KEY: &str = "wal_options";
/// Wal config for datanode.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(tag = "provider", rename_all = "snake_case")]
pub enum WalConfig {
RaftEngine(RaftEngineConfig),
Kafka(KafkaConfig),
}
impl From<StandaloneWalConfig> for WalConfig {
fn from(value: StandaloneWalConfig) -> Self {
match value {
StandaloneWalConfig::RaftEngine(config) => WalConfig::RaftEngine(config),
StandaloneWalConfig::Kafka(config) => WalConfig::Kafka(config.base),
}
}
}
impl Default for WalConfig {
fn default() -> Self {
WalConfig::RaftEngine(RaftEngineConfig::default())
}
}
/// Wal config for datanode.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(tag = "provider", rename_all = "snake_case")]
pub enum StandaloneWalConfig {
RaftEngine(RaftEngineConfig),
Kafka(StandaloneKafkaConfig),
}
impl Default for StandaloneWalConfig {
fn default() -> Self {
StandaloneWalConfig::RaftEngine(RaftEngineConfig::default())
}
}
/// Wal options allocated to a region.
/// A wal options is encoded by metasrv with `serde_json::to_string`, and then decoded
/// by datanode with `serde_json::from_str`.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
#[serde(tag = "wal.provider", rename_all = "snake_case")]
pub enum WalOptions {
#[default]
RaftEngine,
#[serde(with = "prefix_wal_kafka")]
Kafka(KafkaWalOptions),
}
with_prefix!(prefix_wal_kafka "wal.kafka.");
#[cfg(test)]
mod tests {
use std::time::Duration;
use common_base::readable_size::ReadableSize;
use rskafka::client::partition::Compression as RsKafkaCompression;
use crate::wal::kafka::KafkaBackoffConfig;
use crate::wal::{KafkaConfig, KafkaWalOptions, WalOptions};
#[test]
fn test_serde_kafka_config() {
// With all fields.
let toml_str = r#"
broker_endpoints = ["127.0.0.1:9092"]
max_batch_size = "1MB"
linger = "200ms"
consumer_wait_timeout = "100ms"
backoff_init = "500ms"
backoff_max = "10s"
backoff_base = 2
backoff_deadline = "5mins"
"#;
let decoded: KafkaConfig = toml::from_str(toml_str).unwrap();
let expected = KafkaConfig {
broker_endpoints: vec!["127.0.0.1:9092".to_string()],
compression: RsKafkaCompression::default(),
max_batch_size: ReadableSize::mb(1),
linger: Duration::from_millis(200),
consumer_wait_timeout: Duration::from_millis(100),
backoff: KafkaBackoffConfig {
init: Duration::from_millis(500),
max: Duration::from_secs(10),
base: 2,
deadline: Some(Duration::from_secs(60 * 5)),
},
};
assert_eq!(decoded, expected);
// With some fields missing.
let toml_str = r#"
broker_endpoints = ["127.0.0.1:9092"]
linger = "200ms"
"#;
let decoded: KafkaConfig = toml::from_str(toml_str).unwrap();
let expected = KafkaConfig {
broker_endpoints: vec!["127.0.0.1:9092".to_string()],
linger: Duration::from_millis(200),
..Default::default()
};
assert_eq!(decoded, expected);
}
#[test]
fn test_serde_wal_options() {
// Test serde raft-engine wal options.
let wal_options = WalOptions::RaftEngine;
let encoded = serde_json::to_string(&wal_options).unwrap();
let expected = r#"{"wal.provider":"raft_engine"}"#;
assert_eq!(&encoded, expected);
let decoded: WalOptions = serde_json::from_str(&encoded).unwrap();
assert_eq!(decoded, wal_options);
// Test serde kafka wal options.
let wal_options = WalOptions::Kafka(KafkaWalOptions {
topic: "test_topic".to_string(),
});
let encoded = serde_json::to_string(&wal_options).unwrap();
let expected = r#"{"wal.provider":"kafka","wal.kafka.topic":"test_topic"}"#;
assert_eq!(&encoded, expected);
let decoded: WalOptions = serde_json::from_str(&encoded).unwrap();
assert_eq!(decoded, wal_options);
}
}

View File

@@ -5,8 +5,8 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
arrow-schema.workspace = true
arrow.workspace = true arrow.workspace = true
arrow-schema.workspace = true
async-compression = { version = "0.3", features = [ async-compression = { version = "0.3", features = [
"bzip2", "bzip2",
"gzip", "gzip",
@@ -19,6 +19,7 @@ async-trait.workspace = true
bytes.workspace = true bytes.workspace = true
common-error.workspace = true common-error.workspace = true
common-macro.workspace = true common-macro.workspace = true
common-recordbatch.workspace = true
common-runtime.workspace = true common-runtime.workspace = true
datafusion.workspace = true datafusion.workspace = true
datatypes.workspace = true datatypes.workspace = true
@@ -33,8 +34,8 @@ regex = "1.7"
serde.workspace = true serde.workspace = true
snafu.workspace = true snafu.workspace = true
strum.workspace = true strum.workspace = true
tokio-util.workspace = true
tokio.workspace = true tokio.workspace = true
tokio-util.workspace = true
url = "2.3" url = "2.3"
[dev-dependencies] [dev-dependencies]

View File

@@ -47,7 +47,7 @@ pub trait ArrowWriterCloser {
impl< impl<
T: AsyncWrite + Send + Unpin, T: AsyncWrite + Send + Unpin,
U: DfRecordBatchEncoder + ArrowWriterCloser, U: DfRecordBatchEncoder + ArrowWriterCloser,
F: FnMut(String) -> Fut, F: Fn(String) -> Fut,
Fut: Future<Output = Result<T>>, Fut: Future<Output = Result<T>>,
> LazyBufferedWriter<T, U, F> > LazyBufferedWriter<T, U, F>
{ {
@@ -75,7 +75,7 @@ impl<
impl< impl<
T: AsyncWrite + Send + Unpin, T: AsyncWrite + Send + Unpin,
U: DfRecordBatchEncoder, U: DfRecordBatchEncoder,
F: FnMut(String) -> Fut, F: Fn(String) -> Fut,
Fut: Future<Output = Result<T>>, Fut: Future<Output = Result<T>>,
> LazyBufferedWriter<T, U, F> > LazyBufferedWriter<T, U, F>
{ {
@@ -149,7 +149,7 @@ impl<
if let Some(ref mut writer) = self.writer { if let Some(ref mut writer) = self.writer {
Ok(writer) Ok(writer)
} else { } else {
let writer = (self.writer_factory)(self.path.clone()).await?; let writer = (self.writer_factory)(self.path.to_string()).await?;
Ok(self.writer.insert(writer)) Ok(self.writer.insert(writer))
} }
} }

View File

@@ -193,13 +193,15 @@ pub async fn stream_to_file<T: DfRecordBatchEncoder, U: Fn(SharedBuffer) -> T>(
store: ObjectStore, store: ObjectStore,
path: &str, path: &str,
threshold: usize, threshold: usize,
concurrency: usize,
encoder_factory: U, encoder_factory: U,
) -> Result<usize> { ) -> Result<usize> {
let buffer = SharedBuffer::with_capacity(threshold); let buffer = SharedBuffer::with_capacity(threshold);
let encoder = encoder_factory(buffer.clone()); let encoder = encoder_factory(buffer.clone());
let mut writer = LazyBufferedWriter::new(threshold, buffer, encoder, path, |path| async { let mut writer = LazyBufferedWriter::new(threshold, buffer, encoder, path, |path| async {
store store
.writer(&path) .writer_with(&path)
.concurrent(concurrency)
.await .await
.context(error::WriteObjectSnafu { path }) .context(error::WriteObjectSnafu { path })
}); });

View File

@@ -193,8 +193,9 @@ pub async fn stream_to_csv(
store: ObjectStore, store: ObjectStore,
path: &str, path: &str,
threshold: usize, threshold: usize,
concurrency: usize,
) -> Result<usize> { ) -> Result<usize> {
stream_to_file(stream, store, path, threshold, |buffer| { stream_to_file(stream, store, path, threshold, concurrency, |buffer| {
csv::Writer::new(buffer) csv::Writer::new(buffer)
}) })
.await .await

View File

@@ -152,8 +152,9 @@ pub async fn stream_to_json(
store: ObjectStore, store: ObjectStore,
path: &str, path: &str,
threshold: usize, threshold: usize,
concurrency: usize,
) -> Result<usize> { ) -> Result<usize> {
stream_to_file(stream, store, path, threshold, |buffer| { stream_to_file(stream, store, path, threshold, concurrency, |buffer| {
json::LineDelimitedWriter::new(buffer) json::LineDelimitedWriter::new(buffer)
}) })
.await .await

View File

@@ -12,18 +12,14 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use std::pin::Pin;
use std::sync::Arc; use std::sync::Arc;
use std::task::{Context, Poll};
use arrow::compute::cast;
use arrow_schema::{ArrowError, Schema, SchemaRef}; use arrow_schema::{ArrowError, Schema, SchemaRef};
use async_trait::async_trait; use async_trait::async_trait;
use datafusion::arrow::record_batch::RecordBatch as DfRecordBatch; use common_recordbatch::adapter::RecordBatchStreamTypeAdapter;
use datafusion::datasource::physical_plan::{FileMeta, FileOpenFuture, FileOpener}; use datafusion::datasource::physical_plan::{FileMeta, FileOpenFuture, FileOpener};
use datafusion::error::{DataFusionError, Result as DfResult}; use datafusion::error::{DataFusionError, Result as DfResult};
use datafusion::physical_plan::RecordBatchStream; use futures::{StreamExt, TryStreamExt};
use futures::{Stream, StreamExt, TryStreamExt};
use object_store::ObjectStore; use object_store::ObjectStore;
use orc_rust::arrow_reader::{create_arrow_schema, Cursor}; use orc_rust::arrow_reader::{create_arrow_schema, Cursor};
use orc_rust::async_arrow_reader::ArrowStreamReader; use orc_rust::async_arrow_reader::ArrowStreamReader;
@@ -61,73 +57,6 @@ pub async fn infer_orc_schema<R: AsyncRead + AsyncSeek + Unpin + Send + 'static>
Ok(create_arrow_schema(&cursor)) Ok(create_arrow_schema(&cursor))
} }
pub struct OrcArrowStreamReaderAdapter<T: AsyncRead + AsyncSeek + Unpin + Send + 'static> {
output_schema: SchemaRef,
projection: Vec<usize>,
stream: ArrowStreamReader<T>,
}
impl<T: AsyncRead + AsyncSeek + Unpin + Send + 'static> OrcArrowStreamReaderAdapter<T> {
pub fn new(
output_schema: SchemaRef,
stream: ArrowStreamReader<T>,
projection: Option<Vec<usize>>,
) -> Self {
let projection = if let Some(projection) = projection {
projection
} else {
(0..output_schema.fields().len()).collect()
};
Self {
output_schema,
projection,
stream,
}
}
}
impl<T: AsyncRead + AsyncSeek + Unpin + Send + 'static> RecordBatchStream
for OrcArrowStreamReaderAdapter<T>
{
fn schema(&self) -> SchemaRef {
self.output_schema.clone()
}
}
impl<T: AsyncRead + AsyncSeek + Unpin + Send + 'static> Stream for OrcArrowStreamReaderAdapter<T> {
type Item = DfResult<DfRecordBatch>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
let batch = futures::ready!(Pin::new(&mut self.stream).poll_next(cx))
.map(|r| r.map_err(|e| DataFusionError::External(Box::new(e))));
let projected_schema = self.output_schema.project(&self.projection)?;
let batch = batch.map(|b| {
b.and_then(|b| {
let mut columns = Vec::with_capacity(self.projection.len());
for idx in self.projection.iter() {
let column = b.column(*idx);
let field = self.output_schema.field(*idx);
if column.data_type() != field.data_type() {
let output = cast(&column, field.data_type())?;
columns.push(output)
} else {
columns.push(column.clone())
}
}
let record_batch = DfRecordBatch::try_new(projected_schema.into(), columns)?;
Ok(record_batch)
})
});
Poll::Ready(batch)
}
}
#[async_trait] #[async_trait]
impl FileFormat for OrcFormat { impl FileFormat for OrcFormat {
async fn infer_schema(&self, store: &ObjectStore, path: &str) -> Result<Schema> { async fn infer_schema(&self, store: &ObjectStore, path: &str) -> Result<Schema> {
@@ -166,7 +95,15 @@ impl OrcOpener {
impl FileOpener for OrcOpener { impl FileOpener for OrcOpener {
fn open(&self, meta: FileMeta) -> DfResult<FileOpenFuture> { fn open(&self, meta: FileMeta) -> DfResult<FileOpenFuture> {
let object_store = self.object_store.clone(); let object_store = self.object_store.clone();
let output_schema = self.output_schema.clone(); let projected_schema = if let Some(projection) = &self.projection {
let projected_schema = self
.output_schema
.project(projection)
.map_err(|e| DataFusionError::External(Box::new(e)))?;
Arc::new(projected_schema)
} else {
self.output_schema.clone()
};
let projection = self.projection.clone(); let projection = self.projection.clone();
Ok(Box::pin(async move { Ok(Box::pin(async move {
let reader = object_store let reader = object_store
@@ -178,7 +115,8 @@ impl FileOpener for OrcOpener {
.await .await
.map_err(|e| DataFusionError::External(Box::new(e)))?; .map_err(|e| DataFusionError::External(Box::new(e)))?;
let stream = OrcArrowStreamReaderAdapter::new(output_schema, stream_reader, projection); let stream =
RecordBatchStreamTypeAdapter::new(projected_schema, stream_reader, projection);
let adopted = stream.map_err(|e| ArrowError::ExternalError(Box::new(e))); let adopted = stream.map_err(|e| ArrowError::ExternalError(Box::new(e)));
Ok(adopted.boxed()) Ok(adopted.boxed())

View File

@@ -12,8 +12,6 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use std::future::Future;
use std::pin::Pin;
use std::result; use std::result;
use std::sync::Arc; use std::sync::Arc;
@@ -31,7 +29,7 @@ use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet;
use datafusion::physical_plan::SendableRecordBatchStream; use datafusion::physical_plan::SendableRecordBatchStream;
use futures::future::BoxFuture; use futures::future::BoxFuture;
use futures::StreamExt; use futures::StreamExt;
use object_store::{ObjectStore, Reader}; use object_store::{ObjectStore, Reader, Writer};
use parquet::basic::{Compression, ZstdLevel}; use parquet::basic::{Compression, ZstdLevel};
use parquet::file::properties::WriterProperties; use parquet::file::properties::WriterProperties;
use snafu::ResultExt; use snafu::ResultExt;
@@ -171,22 +169,33 @@ pub struct BufferedWriter {
type InnerBufferedWriter = LazyBufferedWriter< type InnerBufferedWriter = LazyBufferedWriter<
object_store::Writer, object_store::Writer,
ArrowWriter<SharedBuffer>, ArrowWriter<SharedBuffer>,
Box< impl Fn(String) -> BoxFuture<'static, Result<Writer>>,
dyn FnMut(
String,
)
-> Pin<Box<dyn Future<Output = error::Result<object_store::Writer>> + Send>>
+ Send,
>,
>; >;
impl BufferedWriter { impl BufferedWriter {
fn make_write_factory(
store: ObjectStore,
concurrency: usize,
) -> impl Fn(String) -> BoxFuture<'static, Result<Writer>> {
move |path| {
let store = store.clone();
Box::pin(async move {
store
.writer_with(&path)
.concurrent(concurrency)
.await
.context(error::WriteObjectSnafu { path })
})
}
}
pub async fn try_new( pub async fn try_new(
path: String, path: String,
store: ObjectStore, store: ObjectStore,
arrow_schema: SchemaRef, arrow_schema: SchemaRef,
props: Option<WriterProperties>, props: Option<WriterProperties>,
buffer_threshold: usize, buffer_threshold: usize,
concurrency: usize,
) -> error::Result<Self> { ) -> error::Result<Self> {
let buffer = SharedBuffer::with_capacity(buffer_threshold); let buffer = SharedBuffer::with_capacity(buffer_threshold);
@@ -199,15 +208,7 @@ impl BufferedWriter {
buffer, buffer,
arrow_writer, arrow_writer,
&path, &path,
Box::new(move |path| { Self::make_write_factory(store, concurrency),
let store = store.clone();
Box::pin(async move {
store
.writer(&path)
.await
.context(error::WriteObjectSnafu { path })
})
}),
), ),
}) })
} }
@@ -236,6 +237,7 @@ pub async fn stream_to_parquet(
store: ObjectStore, store: ObjectStore,
path: &str, path: &str,
threshold: usize, threshold: usize,
concurrency: usize,
) -> Result<usize> { ) -> Result<usize> {
let write_props = WriterProperties::builder() let write_props = WriterProperties::builder()
.set_compression(Compression::ZSTD(ZstdLevel::default())) .set_compression(Compression::ZSTD(ZstdLevel::default()))
@@ -247,6 +249,7 @@ pub async fn stream_to_parquet(
schema, schema,
Some(write_props), Some(write_props),
threshold, threshold,
concurrency,
) )
.await?; .await?;
let mut rows_written = 0; let mut rows_written = 0;

View File

@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#![feature(assert_matches)] #![feature(assert_matches)]
#![feature(type_alias_impl_trait)]
pub mod buffered_writer; pub mod buffered_writer;
pub mod compression; pub mod compression;

View File

@@ -113,6 +113,7 @@ pub async fn setup_stream_to_json_test(origin_path: &str, threshold: impl Fn(usi
tmp_store.clone(), tmp_store.clone(),
&output_path, &output_path,
threshold(size), threshold(size),
8
) )
.await .await
.is_ok()); .is_ok());
@@ -150,6 +151,7 @@ pub async fn setup_stream_to_csv_test(origin_path: &str, threshold: impl Fn(usiz
tmp_store.clone(), tmp_store.clone(),
&output_path, &output_path,
threshold(size), threshold(size),
8
) )
.await .await
.is_ok()); .is_ok());

View File

@@ -14,10 +14,10 @@
use std::fmt; use std::fmt;
use strum::{AsRefStr, EnumString}; use strum::{AsRefStr, EnumIter, EnumString, FromRepr};
/// Common status code for public API. /// Common status code for public API.
#[derive(Debug, Clone, Copy, PartialEq, Eq, EnumString, AsRefStr)] #[derive(Debug, Clone, Copy, PartialEq, Eq, EnumString, AsRefStr, EnumIter, FromRepr)]
pub enum StatusCode { pub enum StatusCode {
// ====== Begin of common status code ============== // ====== Begin of common status code ==============
/// Success. /// Success.
@@ -68,6 +68,8 @@ pub enum StatusCode {
// ====== Begin of storage related status code ===== // ====== Begin of storage related status code =====
/// Storage is temporarily unable to handle the request /// Storage is temporarily unable to handle the request
StorageUnavailable = 5000, StorageUnavailable = 5000,
/// Request is outdated, e.g., version mismatch
RequestOutdated = 5001,
// ====== End of storage related status code ======= // ====== End of storage related status code =======
// ====== Begin of server related status code ===== // ====== Begin of server related status code =====
@@ -135,7 +137,8 @@ impl StatusCode {
| StatusCode::AuthHeaderNotFound | StatusCode::AuthHeaderNotFound
| StatusCode::InvalidAuthHeader | StatusCode::InvalidAuthHeader
| StatusCode::AccessDenied | StatusCode::AccessDenied
| StatusCode::PermissionDenied => false, | StatusCode::PermissionDenied
| StatusCode::RequestOutdated => false,
} }
} }
@@ -172,53 +175,13 @@ impl StatusCode {
| StatusCode::AuthHeaderNotFound | StatusCode::AuthHeaderNotFound
| StatusCode::InvalidAuthHeader | StatusCode::InvalidAuthHeader
| StatusCode::AccessDenied | StatusCode::AccessDenied
| StatusCode::PermissionDenied => false, | StatusCode::PermissionDenied
| StatusCode::RequestOutdated => false,
} }
} }
pub fn from_u32(value: u32) -> Option<Self> { pub fn from_u32(value: u32) -> Option<Self> {
match value { StatusCode::from_repr(value as usize)
v if v == StatusCode::Success as u32 => Some(StatusCode::Success),
v if v == StatusCode::Unknown as u32 => Some(StatusCode::Unknown),
v if v == StatusCode::Unsupported as u32 => Some(StatusCode::Unsupported),
v if v == StatusCode::Unexpected as u32 => Some(StatusCode::Unexpected),
v if v == StatusCode::Internal as u32 => Some(StatusCode::Internal),
v if v == StatusCode::InvalidArguments as u32 => Some(StatusCode::InvalidArguments),
v if v == StatusCode::Cancelled as u32 => Some(StatusCode::Cancelled),
v if v == StatusCode::InvalidSyntax as u32 => Some(StatusCode::InvalidSyntax),
v if v == StatusCode::PlanQuery as u32 => Some(StatusCode::PlanQuery),
v if v == StatusCode::EngineExecuteQuery as u32 => Some(StatusCode::EngineExecuteQuery),
v if v == StatusCode::TableAlreadyExists as u32 => Some(StatusCode::TableAlreadyExists),
v if v == StatusCode::TableNotFound as u32 => Some(StatusCode::TableNotFound),
v if v == StatusCode::RegionNotFound as u32 => Some(StatusCode::RegionNotFound),
v if v == StatusCode::RegionNotReady as u32 => Some(StatusCode::RegionNotReady),
v if v == StatusCode::RegionBusy as u32 => Some(StatusCode::RegionBusy),
v if v == StatusCode::RegionAlreadyExists as u32 => {
Some(StatusCode::RegionAlreadyExists)
}
v if v == StatusCode::RegionReadonly as u32 => Some(StatusCode::RegionReadonly),
v if v == StatusCode::TableColumnNotFound as u32 => {
Some(StatusCode::TableColumnNotFound)
}
v if v == StatusCode::TableColumnExists as u32 => Some(StatusCode::TableColumnExists),
v if v == StatusCode::DatabaseNotFound as u32 => Some(StatusCode::DatabaseNotFound),
v if v == StatusCode::StorageUnavailable as u32 => Some(StatusCode::StorageUnavailable),
v if v == StatusCode::RuntimeResourcesExhausted as u32 => {
Some(StatusCode::RuntimeResourcesExhausted)
}
v if v == StatusCode::RateLimited as u32 => Some(StatusCode::RateLimited),
v if v == StatusCode::UserNotFound as u32 => Some(StatusCode::UserNotFound),
v if v == StatusCode::UnsupportedPasswordType as u32 => {
Some(StatusCode::UnsupportedPasswordType)
}
v if v == StatusCode::UserPasswordMismatch as u32 => {
Some(StatusCode::UserPasswordMismatch)
}
v if v == StatusCode::AuthHeaderNotFound as u32 => Some(StatusCode::AuthHeaderNotFound),
v if v == StatusCode::InvalidAuthHeader as u32 => Some(StatusCode::InvalidAuthHeader),
v if v == StatusCode::AccessDenied as u32 => Some(StatusCode::AccessDenied),
_ => None,
}
} }
} }
@@ -231,6 +194,8 @@ impl fmt::Display for StatusCode {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use strum::IntoEnumIterator;
use super::*; use super::*;
fn assert_status_code_display(code: StatusCode, msg: &str) { fn assert_status_code_display(code: StatusCode, msg: &str) {
@@ -244,6 +209,16 @@ mod tests {
assert_status_code_display(StatusCode::TableAlreadyExists, "TableAlreadyExists"); assert_status_code_display(StatusCode::TableAlreadyExists, "TableAlreadyExists");
} }
#[test]
fn test_from_u32() {
for code in StatusCode::iter() {
let num = code as u32;
assert_eq!(StatusCode::from_u32(num), Some(code));
}
assert_eq!(StatusCode::from_u32(10000), None);
}
#[test] #[test]
fn test_is_success() { fn test_is_success() {
assert!(StatusCode::is_success(0)); assert!(StatusCode::is_success(0));

View File

@@ -5,13 +5,17 @@ version.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
api.workspace = true
arc-swap = "1.0" arc-swap = "1.0"
build-data = "0.1" async-trait.workspace = true
chrono-tz = "0.6" chrono-tz = "0.6"
common-error.workspace = true common-error.workspace = true
common-macro.workspace = true common-macro.workspace = true
common-query.workspace = true common-query.workspace = true
common-runtime.workspace = true
common-telemetry.workspace = true
common-time.workspace = true common-time.workspace = true
common-version.workspace = true
datafusion.workspace = true datafusion.workspace = true
datatypes.workspace = true datatypes.workspace = true
libc = "0.2" libc = "0.2"
@@ -19,8 +23,10 @@ num = "0.4"
num-traits = "0.2" num-traits = "0.2"
once_cell.workspace = true once_cell.workspace = true
paste = "1.0" paste = "1.0"
session.workspace = true
snafu.workspace = true snafu.workspace = true
statrs = "0.16" statrs = "0.16"
table.workspace = true
[dev-dependencies] [dev-dependencies]
ron = "0.7" ron = "0.7"

View File

@@ -15,21 +15,26 @@
use std::fmt; use std::fmt;
use std::sync::Arc; use std::sync::Arc;
use chrono_tz::Tz;
use common_query::error::Result; use common_query::error::Result;
use common_query::prelude::Signature; use common_query::prelude::Signature;
use datatypes::data_type::ConcreteDataType; use datatypes::data_type::ConcreteDataType;
use datatypes::vectors::VectorRef; use datatypes::vectors::VectorRef;
use session::context::{QueryContextBuilder, QueryContextRef};
use crate::state::FunctionState;
/// The function execution context
#[derive(Clone)] #[derive(Clone)]
pub struct FunctionContext { pub struct FunctionContext {
pub tz: Tz, pub query_ctx: QueryContextRef,
pub state: Arc<FunctionState>,
} }
impl Default for FunctionContext { impl Default for FunctionContext {
fn default() -> Self { fn default() -> Self {
Self { Self {
tz: "UTC".parse::<Tz>().unwrap(), query_ctx: QueryContextBuilder::default().build(),
state: Arc::new(FunctionState::default()),
} }
} }
} }

View File

@@ -25,6 +25,7 @@ use crate::scalars::math::MathFunction;
use crate::scalars::numpy::NumpyFunction; use crate::scalars::numpy::NumpyFunction;
use crate::scalars::timestamp::TimestampFunction; use crate::scalars::timestamp::TimestampFunction;
use crate::system::SystemFunction; use crate::system::SystemFunction;
use crate::table::TableFunction;
#[derive(Default)] #[derive(Default)]
pub struct FunctionRegistry { pub struct FunctionRegistry {
@@ -74,13 +75,19 @@ impl FunctionRegistry {
pub static FUNCTION_REGISTRY: Lazy<Arc<FunctionRegistry>> = Lazy::new(|| { pub static FUNCTION_REGISTRY: Lazy<Arc<FunctionRegistry>> = Lazy::new(|| {
let function_registry = FunctionRegistry::default(); let function_registry = FunctionRegistry::default();
// Utility functions
MathFunction::register(&function_registry); MathFunction::register(&function_registry);
NumpyFunction::register(&function_registry); NumpyFunction::register(&function_registry);
TimestampFunction::register(&function_registry); TimestampFunction::register(&function_registry);
DateFunction::register(&function_registry); DateFunction::register(&function_registry);
// Aggregate functions
AggregateFunctions::register(&function_registry); AggregateFunctions::register(&function_registry);
// System and administration functions
SystemFunction::register(&function_registry); SystemFunction::register(&function_registry);
TableFunction::register(&function_registry);
Arc::new(function_registry) Arc::new(function_registry)
}); });

View File

@@ -13,13 +13,14 @@
// limitations under the License. // limitations under the License.
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration;
use api::v1::meta::ProcedureStateResponse;
use async_trait::async_trait; use async_trait::async_trait;
use common_query::error::Result;
use session::context::QueryContextRef; use session::context::QueryContextRef;
use table::requests::{DeleteRequest, InsertRequest}; use table::requests::{DeleteRequest, InsertRequest};
use crate::error::Result;
pub type AffectedRows = usize; pub type AffectedRows = usize;
/// A trait for handling table mutations in `QueryEngine`. /// A trait for handling table mutations in `QueryEngine`.
@@ -30,6 +31,24 @@ pub trait TableMutationHandler: Send + Sync {
/// Delete rows from the table. /// Delete rows from the table.
async fn delete(&self, request: DeleteRequest, ctx: QueryContextRef) -> Result<AffectedRows>; async fn delete(&self, request: DeleteRequest, ctx: QueryContextRef) -> Result<AffectedRows>;
/// Migrate a region from source peer to target peer, returns the procedure id if success.
async fn migrate_region(
&self,
region_id: u64,
from_peer: u64,
to_peer: u64,
replay_timeout: Duration,
) -> Result<String>;
}
/// A trait for handling meta service requests in `QueryEngine`.
#[async_trait]
pub trait MetaServiceHandler: Send + Sync {
/// Query the procedure' state by its id
async fn query_procedure_state(&self, pid: &str) -> Result<ProcedureStateResponse>;
} }
pub type TableMutationHandlerRef = Arc<dyn TableMutationHandler>; pub type TableMutationHandlerRef = Arc<dyn TableMutationHandler>;
pub type MetaServiceHandlerRef = Arc<dyn MetaServiceHandler>;

View File

@@ -13,8 +13,11 @@
// limitations under the License. // limitations under the License.
pub mod scalars; pub mod scalars;
pub mod system; mod system;
mod table;
pub mod function; pub mod function;
pub mod function_registry; pub mod function_registry;
pub mod handlers;
pub mod helper; pub mod helper;
pub mod state;

View File

@@ -14,9 +14,11 @@
use std::sync::Arc; use std::sync::Arc;
mod date_add; mod date_add;
mod date_format;
mod date_sub; mod date_sub;
use date_add::DateAddFunction; use date_add::DateAddFunction;
use date_format::DateFormatFunction;
use date_sub::DateSubFunction; use date_sub::DateSubFunction;
use crate::function_registry::FunctionRegistry; use crate::function_registry::FunctionRegistry;
@@ -27,5 +29,6 @@ impl DateFunction {
pub fn register(registry: &FunctionRegistry) { pub fn register(registry: &FunctionRegistry) {
registry.register(Arc::new(DateAddFunction)); registry.register(Arc::new(DateAddFunction));
registry.register(Arc::new(DateSubFunction)); registry.register(Arc::new(DateSubFunction));
registry.register(Arc::new(DateFormatFunction));
} }
} }

View File

@@ -0,0 +1,306 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt;
use common_error::ext::BoxedError;
use common_query::error::{self, InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
use common_query::prelude::Signature;
use datatypes::prelude::{ConcreteDataType, MutableVector, ScalarVectorBuilder};
use datatypes::vectors::{StringVectorBuilder, VectorRef};
use snafu::{ensure, ResultExt};
use crate::function::{Function, FunctionContext};
use crate::helper;
/// A function that formats timestamp/date/datetime into string by the format
#[derive(Clone, Debug, Default)]
pub struct DateFormatFunction;
const NAME: &str = "date_format";
impl Function for DateFormatFunction {
fn name(&self) -> &str {
NAME
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::string_datatype())
}
fn signature(&self) -> Signature {
helper::one_of_sigs2(
vec![
ConcreteDataType::date_datatype(),
ConcreteDataType::datetime_datatype(),
ConcreteDataType::timestamp_second_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(),
ConcreteDataType::timestamp_microsecond_datatype(),
ConcreteDataType::timestamp_nanosecond_datatype(),
],
vec![ConcreteDataType::string_datatype()],
)
}
fn eval(&self, func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure!(
columns.len() == 2,
InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not correct, expect 2, have: {}",
columns.len()
),
}
);
let left = &columns[0];
let formats = &columns[1];
let size = left.len();
let left_datatype = columns[0].data_type();
let mut results = StringVectorBuilder::with_capacity(size);
match left_datatype {
ConcreteDataType::Timestamp(_) => {
for i in 0..size {
let ts = left.get(i).as_timestamp();
let format = formats.get(i).as_string();
let result = match (ts, format) {
(Some(ts), Some(fmt)) => Some(
ts.as_formatted_string(&fmt, Some(&func_ctx.query_ctx.timezone()))
.map_err(BoxedError::new)
.context(error::ExecuteSnafu)?,
),
_ => None,
};
results.push(result.as_deref());
}
}
ConcreteDataType::Date(_) => {
for i in 0..size {
let date = left.get(i).as_date();
let format = formats.get(i).as_string();
let result = match (date, format) {
(Some(date), Some(fmt)) => date
.as_formatted_string(&fmt, Some(&func_ctx.query_ctx.timezone()))
.map_err(BoxedError::new)
.context(error::ExecuteSnafu)?,
_ => None,
};
results.push(result.as_deref());
}
}
ConcreteDataType::DateTime(_) => {
for i in 0..size {
let datetime = left.get(i).as_datetime();
let format = formats.get(i).as_string();
let result = match (datetime, format) {
(Some(datetime), Some(fmt)) => datetime
.as_formatted_string(&fmt, Some(&func_ctx.query_ctx.timezone()))
.map_err(BoxedError::new)
.context(error::ExecuteSnafu)?,
_ => None,
};
results.push(result.as_deref());
}
}
_ => {
return UnsupportedInputDataTypeSnafu {
function: NAME,
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
}
.fail();
}
}
Ok(results.to_vector())
}
}
impl fmt::Display for DateFormatFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "DATE_FORMAT")
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use common_query::prelude::{TypeSignature, Volatility};
use datatypes::prelude::{ConcreteDataType, ScalarVector};
use datatypes::value::Value;
use datatypes::vectors::{DateTimeVector, DateVector, StringVector, TimestampSecondVector};
use super::{DateFormatFunction, *};
#[test]
fn test_date_format_misc() {
let f = DateFormatFunction;
assert_eq!("date_format", f.name());
assert_eq!(
ConcreteDataType::string_datatype(),
f.return_type(&[ConcreteDataType::timestamp_microsecond_datatype()])
.unwrap()
);
assert_eq!(
ConcreteDataType::string_datatype(),
f.return_type(&[ConcreteDataType::timestamp_second_datatype()])
.unwrap()
);
assert_eq!(
ConcreteDataType::string_datatype(),
f.return_type(&[ConcreteDataType::date_datatype()]).unwrap()
);
assert_eq!(
ConcreteDataType::string_datatype(),
f.return_type(&[ConcreteDataType::datetime_datatype()])
.unwrap()
);
assert!(matches!(f.signature(),
Signature {
type_signature: TypeSignature::OneOf(sigs),
volatility: Volatility::Immutable
} if sigs.len() == 6));
}
#[test]
fn test_timestamp_date_format() {
let f = DateFormatFunction;
let times = vec![Some(123), None, Some(42), None];
let formats = vec![
"%Y-%m-%d %T.%3f",
"%Y-%m-%d %T.%3f",
"%Y-%m-%d %T.%3f",
"%Y-%m-%d %T.%3f",
];
let results = [
Some("1970-01-01 00:02:03.000"),
None,
Some("1970-01-01 00:00:42.000"),
None,
];
let time_vector = TimestampSecondVector::from(times.clone());
let interval_vector = StringVector::from_vec(formats);
let args: Vec<VectorRef> = vec![Arc::new(time_vector), Arc::new(interval_vector)];
let vector = f.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(4, vector.len());
for (i, _t) in times.iter().enumerate() {
let v = vector.get(i);
let result = results.get(i).unwrap();
if result.is_none() {
assert_eq!(Value::Null, v);
continue;
}
match v {
Value::String(s) => {
assert_eq!(s.as_utf8(), result.unwrap());
}
_ => unreachable!(),
}
}
}
#[test]
fn test_date_date_format() {
let f = DateFormatFunction;
let dates = vec![Some(123), None, Some(42), None];
let formats = vec![
"%Y-%m-%d %T.%3f",
"%Y-%m-%d %T.%3f",
"%Y-%m-%d %T.%3f",
"%Y-%m-%d %T.%3f",
];
let results = [
Some("1970-05-04 00:00:00.000"),
None,
Some("1970-02-12 00:00:00.000"),
None,
];
let date_vector = DateVector::from(dates.clone());
let interval_vector = StringVector::from_vec(formats);
let args: Vec<VectorRef> = vec![Arc::new(date_vector), Arc::new(interval_vector)];
let vector = f.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(4, vector.len());
for (i, _t) in dates.iter().enumerate() {
let v = vector.get(i);
let result = results.get(i).unwrap();
if result.is_none() {
assert_eq!(Value::Null, v);
continue;
}
match v {
Value::String(s) => {
assert_eq!(s.as_utf8(), result.unwrap());
}
_ => unreachable!(),
}
}
}
#[test]
fn test_datetime_date_format() {
let f = DateFormatFunction;
let dates = vec![Some(123), None, Some(42), None];
let formats = vec![
"%Y-%m-%d %T.%3f",
"%Y-%m-%d %T.%3f",
"%Y-%m-%d %T.%3f",
"%Y-%m-%d %T.%3f",
];
let results = [
Some("1970-01-01 00:00:00.123"),
None,
Some("1970-01-01 00:00:00.042"),
None,
];
let date_vector = DateTimeVector::from(dates.clone());
let interval_vector = StringVector::from_vec(formats);
let args: Vec<VectorRef> = vec![Arc::new(date_vector), Arc::new(interval_vector)];
let vector = f.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(4, vector.len());
for (i, _t) in dates.iter().enumerate() {
let v = vector.get(i);
let result = results.get(i).unwrap();
if result.is_none() {
assert_eq!(Value::Null, v);
continue;
}
match v {
Value::String(s) => {
assert_eq!(s.as_utf8(), result.unwrap());
}
_ => unreachable!(),
}
}
}
}

View File

@@ -12,20 +12,20 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use chrono_tz::Tz;
use common_query::error::Error; use common_query::error::Error;
use common_time::timezone::get_timezone;
use common_time::Timezone;
pub struct EvalContext { pub struct EvalContext {
_tz: Tz, pub timezone: Timezone,
pub error: Option<Error>, pub error: Option<Error>,
} }
impl Default for EvalContext { impl Default for EvalContext {
fn default() -> Self { fn default() -> Self {
let tz = "UTC".parse::<Tz>().unwrap();
Self { Self {
error: None, error: None,
_tz: tz, timezone: get_timezone(None).clone(),
} }
} }
} }

View File

@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
mod modulo;
mod pow; mod pow;
mod rate; mod rate;
@@ -30,11 +31,13 @@ use snafu::ResultExt;
use crate::function::{Function, FunctionContext}; use crate::function::{Function, FunctionContext};
use crate::function_registry::FunctionRegistry; use crate::function_registry::FunctionRegistry;
use crate::scalars::math::modulo::ModuloFunction;
pub(crate) struct MathFunction; pub(crate) struct MathFunction;
impl MathFunction { impl MathFunction {
pub fn register(registry: &FunctionRegistry) { pub fn register(registry: &FunctionRegistry) {
registry.register(Arc::new(ModuloFunction));
registry.register(Arc::new(PowFunction)); registry.register(Arc::new(PowFunction));
registry.register(Arc::new(RateFunction)); registry.register(Arc::new(RateFunction));
registry.register(Arc::new(RangeFunction)) registry.register(Arc::new(RangeFunction))

View File

@@ -0,0 +1,241 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt;
use std::fmt::Display;
use common_query::error;
use common_query::error::{ArrowComputeSnafu, InvalidFuncArgsSnafu, Result};
use common_query::prelude::{Signature, Volatility};
use datatypes::arrow::compute;
use datatypes::arrow::compute::kernels::numeric;
use datatypes::arrow::datatypes::DataType as ArrowDataType;
use datatypes::prelude::ConcreteDataType;
use datatypes::vectors::{Helper, VectorRef};
use snafu::{ensure, ResultExt};
use crate::function::{Function, FunctionContext};
const NAME: &str = "mod";
/// The function to find remainders
#[derive(Clone, Debug, Default)]
pub struct ModuloFunction;
impl Display for ModuloFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", NAME.to_ascii_uppercase())
}
}
impl Function for ModuloFunction {
fn name(&self) -> &str {
NAME
}
fn return_type(&self, input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
if input_types.iter().all(ConcreteDataType::is_signed) {
Ok(ConcreteDataType::int64_datatype())
} else if input_types.iter().all(ConcreteDataType::is_unsigned) {
Ok(ConcreteDataType::uint64_datatype())
} else {
Ok(ConcreteDataType::float64_datatype())
}
}
fn signature(&self) -> Signature {
Signature::uniform(2, ConcreteDataType::numerics(), Volatility::Immutable)
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure!(
columns.len() == 2,
InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not correct, expect exactly two, have: {}",
columns.len()
),
}
);
let nums = &columns[0];
let divs = &columns[1];
let nums_arrow_array = &nums.to_arrow_array();
let divs_arrow_array = &divs.to_arrow_array();
let array = numeric::rem(nums_arrow_array, divs_arrow_array).context(ArrowComputeSnafu)?;
let result = match nums.data_type() {
ConcreteDataType::Int8(_)
| ConcreteDataType::Int16(_)
| ConcreteDataType::Int32(_)
| ConcreteDataType::Int64(_) => compute::cast(&array, &ArrowDataType::Int64),
ConcreteDataType::UInt8(_)
| ConcreteDataType::UInt16(_)
| ConcreteDataType::UInt32(_)
| ConcreteDataType::UInt64(_) => compute::cast(&array, &ArrowDataType::UInt64),
ConcreteDataType::Float32(_) | ConcreteDataType::Float64(_) => {
compute::cast(&array, &ArrowDataType::Float64)
}
_ => unreachable!("unexpected datatype: {:?}", nums.data_type()),
}
.context(ArrowComputeSnafu)?;
Helper::try_into_vector(&result).context(error::FromArrowArraySnafu)
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use common_error::ext::ErrorExt;
use datatypes::value::Value;
use datatypes::vectors::{Float64Vector, Int32Vector, StringVector, UInt32Vector};
use super::*;
#[test]
fn test_mod_function_signed() {
let function = ModuloFunction;
assert_eq!("mod", function.name());
assert_eq!(
ConcreteDataType::int64_datatype(),
function
.return_type(&[ConcreteDataType::int64_datatype()])
.unwrap()
);
assert_eq!(
ConcreteDataType::int64_datatype(),
function
.return_type(&[ConcreteDataType::int32_datatype()])
.unwrap()
);
let nums = vec![18, -17, 5, -6];
let divs = vec![4, 8, -5, -5];
let args: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from_vec(nums.clone())),
Arc::new(Int32Vector::from_vec(divs.clone())),
];
let result = function.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(result.len(), 4);
for i in 0..3 {
let p: i64 = (nums[i] % divs[i]) as i64;
assert!(matches!(result.get(i), Value::Int64(v) if v == p));
}
}
#[test]
fn test_mod_function_unsigned() {
let function = ModuloFunction;
assert_eq!("mod", function.name());
assert_eq!(
ConcreteDataType::uint64_datatype(),
function
.return_type(&[ConcreteDataType::uint64_datatype()])
.unwrap()
);
assert_eq!(
ConcreteDataType::uint64_datatype(),
function
.return_type(&[ConcreteDataType::uint32_datatype()])
.unwrap()
);
let nums: Vec<u32> = vec![18, 17, 5, 6];
let divs: Vec<u32> = vec![4, 8, 5, 5];
let args: Vec<VectorRef> = vec![
Arc::new(UInt32Vector::from_vec(nums.clone())),
Arc::new(UInt32Vector::from_vec(divs.clone())),
];
let result = function.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(result.len(), 4);
for i in 0..3 {
let p: u64 = (nums[i] % divs[i]) as u64;
assert!(matches!(result.get(i), Value::UInt64(v) if v == p));
}
}
#[test]
fn test_mod_function_float() {
let function = ModuloFunction;
assert_eq!("mod", function.name());
assert_eq!(
ConcreteDataType::float64_datatype(),
function
.return_type(&[ConcreteDataType::float64_datatype()])
.unwrap()
);
assert_eq!(
ConcreteDataType::float64_datatype(),
function
.return_type(&[ConcreteDataType::float32_datatype()])
.unwrap()
);
let nums = vec![18.0, 17.0, 5.0, 6.0];
let divs = vec![4.0, 8.0, 5.0, 5.0];
let args: Vec<VectorRef> = vec![
Arc::new(Float64Vector::from_vec(nums.clone())),
Arc::new(Float64Vector::from_vec(divs.clone())),
];
let result = function.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(result.len(), 4);
for i in 0..3 {
let p: f64 = nums[i] % divs[i];
assert!(matches!(result.get(i), Value::Float64(v) if v == p));
}
}
#[test]
fn test_mod_function_errors() {
let function = ModuloFunction;
assert_eq!("mod", function.name());
let nums = vec![27];
let divs = vec![0];
let args: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from_vec(nums.clone())),
Arc::new(Int32Vector::from_vec(divs.clone())),
];
let result = function.eval(FunctionContext::default(), &args);
assert!(result.is_err());
let err_msg = result.unwrap_err().output_msg();
assert_eq!(
err_msg,
"Failed to perform compute operation on arrow arrays: Divide by zero error"
);
let nums = vec![27];
let args: Vec<VectorRef> = vec![Arc::new(Int32Vector::from_vec(nums.clone()))];
let result = function.eval(FunctionContext::default(), &args);
assert!(result.is_err());
let err_msg = result.unwrap_err().output_msg();
assert!(
err_msg.contains("The length of the args is not correct, expect exactly two, have: 1")
);
let nums = vec!["27"];
let divs = vec!["4"];
let args: Vec<VectorRef> = vec![
Arc::new(StringVector::from(nums.clone())),
Arc::new(StringVector::from(divs.clone())),
];
let result = function.eval(FunctionContext::default(), &args);
assert!(result.is_err());
let err_msg = result.unwrap_err().output_msg();
assert!(err_msg.contains("Invalid arithmetic operation"));
}
}

View File

@@ -104,7 +104,6 @@ impl fmt::Display for GreatestFunction {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use std::str::FromStr;
use std::sync::Arc; use std::sync::Arc;
use common_time::Date; use common_time::Date;
@@ -137,11 +136,11 @@ mod tests {
assert_eq!(result.len(), 2); assert_eq!(result.len(), 2);
assert_eq!( assert_eq!(
result.get(0), result.get(0),
Value::Date(Date::from_str("2001-02-01").unwrap()) Value::Date(Date::from_str_utc("2001-02-01").unwrap())
); );
assert_eq!( assert_eq!(
result.get(1), result.get(1),
Value::Date(Date::from_str("2012-12-23").unwrap()) Value::Date(Date::from_str_utc("2012-12-23").unwrap())
); );
} }
@@ -162,11 +161,11 @@ mod tests {
assert_eq!(result.len(), 2); assert_eq!(result.len(), 2);
assert_eq!( assert_eq!(
result.get(0), result.get(0),
Value::Date(Date::from_str("1970-01-01").unwrap()) Value::Date(Date::from_str_utc("1970-01-01").unwrap())
); );
assert_eq!( assert_eq!(
result.get(1), result.get(1),
Value::Date(Date::from_str("1970-01-03").unwrap()) Value::Date(Date::from_str_utc("1970-01-03").unwrap())
); );
} }
} }

View File

@@ -13,7 +13,6 @@
// limitations under the License. // limitations under the License.
use std::fmt; use std::fmt;
use std::str::FromStr;
use std::sync::Arc; use std::sync::Arc;
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu}; use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
@@ -31,16 +30,17 @@ pub struct ToUnixtimeFunction;
const NAME: &str = "to_unixtime"; const NAME: &str = "to_unixtime";
fn convert_to_seconds(arg: &str) -> Option<i64> { fn convert_to_seconds(arg: &str, func_ctx: &FunctionContext) -> Option<i64> {
if let Ok(dt) = DateTime::from_str(arg) { let timezone = &func_ctx.query_ctx.timezone();
if let Ok(dt) = DateTime::from_str(arg, Some(timezone)) {
return Some(dt.val() / 1000); return Some(dt.val() / 1000);
} }
if let Ok(ts) = Timestamp::from_str(arg) { if let Ok(ts) = Timestamp::from_str(arg, Some(timezone)) {
return Some(ts.split().0); return Some(ts.split().0);
} }
if let Ok(date) = Date::from_str(arg) { if let Ok(date) = Date::from_str(arg, Some(timezone)) {
return Some(date.to_secs()); return Some(date.to_secs());
} }
@@ -92,7 +92,7 @@ impl Function for ToUnixtimeFunction {
) )
} }
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> { fn eval(&self, func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure!( ensure!(
columns.len() == 1, columns.len() == 1,
InvalidFuncArgsSnafu { InvalidFuncArgsSnafu {
@@ -108,7 +108,7 @@ impl Function for ToUnixtimeFunction {
match columns[0].data_type() { match columns[0].data_type() {
ConcreteDataType::String(_) => Ok(Arc::new(Int64Vector::from( ConcreteDataType::String(_) => Ok(Arc::new(Int64Vector::from(
(0..vector.len()) (0..vector.len())
.map(|i| convert_to_seconds(&vector.get(i).to_string())) .map(|i| convert_to_seconds(&vector.get(i).to_string(), &func_ctx))
.collect::<Vec<_>>(), .collect::<Vec<_>>(),
))), ))),
ConcreteDataType::Int64(_) | ConcreteDataType::Int32(_) => { ConcreteDataType::Int64(_) | ConcreteDataType::Int32(_) => {

View File

@@ -21,20 +21,30 @@ use common_query::prelude::{
use datatypes::error::Error as DataTypeError; use datatypes::error::Error as DataTypeError;
use datatypes::prelude::*; use datatypes::prelude::*;
use datatypes::vectors::Helper; use datatypes::vectors::Helper;
use session::context::QueryContextRef;
use snafu::ResultExt; use snafu::ResultExt;
use crate::function::{FunctionContext, FunctionRef}; use crate::function::{FunctionContext, FunctionRef};
use crate::state::FunctionState;
/// Create a ScalarUdf from function. /// Create a ScalarUdf from function, query context and state.
pub fn create_udf(func: FunctionRef) -> ScalarUdf { pub fn create_udf(
func: FunctionRef,
query_ctx: QueryContextRef,
state: Arc<FunctionState>,
) -> ScalarUdf {
let func_cloned = func.clone(); let func_cloned = func.clone();
let return_type: ReturnTypeFunction = Arc::new(move |input_types: &[ConcreteDataType]| { let return_type: ReturnTypeFunction = Arc::new(move |input_types: &[ConcreteDataType]| {
Ok(Arc::new(func_cloned.return_type(input_types)?)) Ok(Arc::new(func_cloned.return_type(input_types)?))
}); });
let func_cloned = func.clone(); let func_cloned = func.clone();
let fun: ScalarFunctionImplementation = Arc::new(move |args: &[ColumnarValue]| { let fun: ScalarFunctionImplementation = Arc::new(move |args: &[ColumnarValue]| {
let func_ctx = FunctionContext::default(); let func_ctx = FunctionContext {
query_ctx: query_ctx.clone(),
state: state.clone(),
};
let len = args let len = args
.iter() .iter()
@@ -70,6 +80,7 @@ mod tests {
use datatypes::prelude::{ScalarVector, Vector, VectorRef}; use datatypes::prelude::{ScalarVector, Vector, VectorRef};
use datatypes::value::Value; use datatypes::value::Value;
use datatypes::vectors::{BooleanVector, ConstantVector}; use datatypes::vectors::{BooleanVector, ConstantVector};
use session::context::QueryContextBuilder;
use super::*; use super::*;
use crate::function::Function; use crate::function::Function;
@@ -78,6 +89,7 @@ mod tests {
#[test] #[test]
fn test_create_udf() { fn test_create_udf() {
let f = Arc::new(TestAndFunction); let f = Arc::new(TestAndFunction);
let query_ctx = QueryContextBuilder::default().build();
let args: Vec<VectorRef> = vec![ let args: Vec<VectorRef> = vec![
Arc::new(ConstantVector::new( Arc::new(ConstantVector::new(
@@ -95,7 +107,7 @@ mod tests {
} }
// create a udf and test it again // create a udf and test it again
let udf = create_udf(f.clone()); let udf = create_udf(f.clone(), query_ctx, Arc::new(FunctionState::default()));
assert_eq!("test_and", udf.name); assert_eq!("test_and", udf.name);
assert_eq!(f.signature(), udf.signature); assert_eq!(f.signature(), udf.signature);

Some files were not shown because too many files have changed in this diff Show More