Compare commits

...

287 Commits

Author SHA1 Message Date
Yingwen
5d644c0b7f chore: bump version to v0.7.0 (#3433) 2024-03-05 12:07:37 +00:00
Ruihang Xia
020635063c feat: implement multi-dim partition rule (#3409)
* generate expr rule

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* implement show create for new partition rule

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* implement row spliter

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: fix failed tests

Signed-off-by: WenyXu <wenymedia@gmail.com>

* chore: fix lint issues

Signed-off-by: WenyXu <wenymedia@gmail.com>

* chore: ignore tests for deprecated partition rule

* chore: remove unused partition rule tests setup

* test(sqlness): add basic partition tests

* test(multi_dim): add basic find region test

* address CR comments

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Signed-off-by: WenyXu <wenymedia@gmail.com>
Co-authored-by: WenyXu <wenymedia@gmail.com>
2024-03-05 11:39:15 +00:00
dependabot[bot]
97cbfcfe23 build(deps): bump mio from 0.8.10 to 0.8.11 (#3434)
Bumps [mio](https://github.com/tokio-rs/mio) from 0.8.10 to 0.8.11.
- [Release notes](https://github.com/tokio-rs/mio/releases)
- [Changelog](https://github.com/tokio-rs/mio/blob/master/CHANGELOG.md)
- [Commits](https://github.com/tokio-rs/mio/compare/v0.8.10...v0.8.11)

---
updated-dependencies:
- dependency-name: mio
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-03-05 11:04:14 +00:00
Lei, HUANG
7183fa198c refactor: make MergeTreeMemtable the default choice (#3430)
* refactor: make MergeTreeMemtable the default choice

* refactor: reformat

* chore: add doc to config
2024-03-05 10:00:08 +00:00
Lei, HUANG
02b18fbca1 feat: decode prom requests to grpc (#3425)
* hack: inline decode

* move to servers

* fix: samples lost

* add bench

* remove useless functions

* wip

* feat: remove object pools

* fix: minor issues

* fix: remove useless dep

* chore: rebase main

* format

* finish

* fix: format

* feat: introduce request pool

* try to fix license issue

* fix: clippy

* resolve comments

* fix:typo

* remove useless comments
2024-03-05 09:47:32 +00:00
shuiyisong
7b1c3503d0 fix: complete interceptors for all frontend entry (#3428) 2024-03-05 09:38:47 +00:00
liyang
6fd2ff49d5 ci: refine windows output env (#3431) 2024-03-05 08:38:28 +00:00
WU Jingdi
53f2a5846c feat: support tracing rule sampler (#3405)
* feat: support tracing rule sampler

* chore: simplify code
2024-03-05 15:40:02 +08:00
Yingwen
49157868f9 feat: Correct server metrics and add more metrics for scan (#3426)
* feat: drop timer on stream terminated

* refactor: combine metrics into a histogram vec

* refactor: frontend grpc metrics

* feat: add metrics middleware layer to grpc server

* refactor: move http metrics layer to metrics mod

* feat: bucket for grpc/http elapsed

* feat: remove duplicate metrics

* style: fix cilppy

* fix: incorrect bucket of promql series

* feat: more metrics for mito

* feat: convert cost

* test: fix metrics test
2024-03-04 10:15:10 +00:00
Ruihang Xia
ae2c18e1cf docs(rfcs): multi-dimension partition rule (#3350)
* docs(rfcs): multi-dimension partition rule

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* change math block type

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix typo

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update tracking issue

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update discussion

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix typo

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-03-04 08:10:54 +00:00
dennis zhuang
e6819412c5 refactor: show tables and show databases (#3423)
* refactor: show tables and show databases

* chore: clean code
2024-03-04 06:15:17 +00:00
tison
2a675e0794 docs: update pull_request_template.md (#3421) 2024-03-03 09:51:44 +00:00
JeremyHi
0edf1bbacc feat: reduce a clone of string (#3422) 2024-03-03 08:09:17 +00:00
Eugene Tolbakov
8609977b52 feat: add verbose support for tql explain/analyze (#3390)
* feat: add verbose support for tql explain/analyze

* chore: apply clippy suggestions

* feat: add sqlness tests

* fix: adjust sqlness replace rules

* fix: address CR (move tql explain/analyze inside common folder)

* fix: address CR(improve comments to indicate that verbose is optional)
2024-03-02 11:18:22 +00:00
JeremyHi
2d975e4f22 feat: tableref cache (#3420)
* feat: tableref cache

* chore: minor refactor

* chore: avoid to string

* chore: change log level

* feat: add metrics for prometheus remote write decode
2024-03-02 07:37:31 +00:00
Kould
00cbbc97ae feat: support Create Table ... Like (#3372)
* feat: support `Create Table ... Like`

* fix: `check_permission` for `Create Table ... Like`

* style: renaming `name` -> `table_name` & `target` -> `source_name` and make `Create Table ... Like` testcase more complicated

* rebase

* avoid _ fn

Signed-off-by: tison <wander4096@gmail.com>

---------

Signed-off-by: tison <wander4096@gmail.com>
Co-authored-by: tison <wander4096@gmail.com>
2024-03-02 06:34:13 +00:00
niebayes
7d30c2484b fix: mitigate memory spike during startup (#3418)
* fix: fix memory spike during startup

* fix: allocate a region write ctx for each wal entry
2024-03-01 07:46:05 +00:00
Lei, HUANG
376409b857 feat: employ sparse key encoding for shard lookup (#3410)
* feat: employ short key encoding for shard lookup

* fix: license

* chore: simplify code

* refactor: only enable sparse encoding to speed lookup on metric engine

* fix: names
2024-03-01 06:22:15 +00:00
Ning Sun
d4a54a085b feat: add configuration for tls watch option (#3395)
* feat: add configuration for tls watch option

* test: sleep longer to ensure async task run

* test: update config api integration test

* refactor: rename function
2024-03-01 03:49:54 +00:00
dennis zhuang
c1a370649e fix: show table names not complete from information_schema (#3417) 2024-03-01 02:51:46 +00:00
JeremyHi
3cad9d989d fix: partition region id (#3414) 2024-02-29 09:09:59 +00:00
JohnsonLee
a50025269f feat: Support automatic DNS lookup for kafka bootstrap servers (#3379)
* feat: Support automatic DNS lookup for kafka bootstrap servers

* Revert "feat: Support automatic DNS lookup for kafka bootstrap servers"

This reverts commit 5baed7b01d.

* feat: Support automatic DNS lookup for Kafka broker

* fix: resolve broker endpoint in client manager

* fix: apply clippy lints

* refactor: slimplify the code with clippy hint

* refactor: move resolve_broker_endpoint to common/wal/src/lib.rs

* test: add mock test for resolver_broker_endpoint

* refactor: accept niebayes's advice

* refactor: rename EndpointIpNotFound to EndpointIPV4NotFound

* refactor: remove mock test and simplify the implementation

* docs: add comments about test_vallid_host_ipv6

* Apply suggestions from code review

Co-authored-by: niebayes <niebayes@gmail.com>

* move more common code

Signed-off-by: tison <wander4096@gmail.com>

---------

Signed-off-by: tison <wander4096@gmail.com>
Co-authored-by: tison <wander4096@gmail.com>
Co-authored-by: niebayes <niebayes@gmail.com>
2024-02-29 07:29:20 +00:00
JeremyHi
a3533c4ea0 feat: zero copy on split rows (#3407) 2024-02-28 13:27:52 +00:00
Lei, HUANG
3413fc0781 refactor: move some costly methods in DataBuffer::read out of read lock (#3406)
* refactor: move some costly methods in DataBuffer::read out of read lock

* refactor: also replace ShardReader with ShardReaderBuilder
2024-02-28 12:22:44 +00:00
tison
dc205a2c5d feat: enable ArrowFlight compression (#3403)
* feat: enable ArrowFlight compression

Signed-off-by: tison <wander4096@gmail.com>

* turn on features

Signed-off-by: tison <wander4096@gmail.com>

---------

Signed-off-by: tison <wander4096@gmail.com>
2024-02-28 08:55:44 +00:00
Lei, HUANG
a0a8e8c587 fix: some read metrics (#3404)
* fix: some read metrics

* chore: fix some metrics

* fix
2024-02-28 08:47:49 +00:00
Zhenchi
c3c80b92c8 feat(index): measure memory usage in global instead of single-column and add metrics (#3383)
* feat(index): measure memory usage in global instead of single-column and add metrics

* feat: add leading zeros to streamline memory usage

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: fmt

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: remove println

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-02-28 06:49:24 +00:00
Weny Xu
a8cbec824c refactor: refactor TableRouteManager (#3392)
* feat: introduce TableRouteStorage

* refactor: remove get & batch_get in TableRouteManager

* refactor: move txn related fn to TableRouteStorage

* chore: apply suggestions from CR

* chore(codecov): ingore tests-integration dir
2024-02-28 06:18:09 +00:00
tison
33d894c1f0 build: do not retry for connrefused (#3402)
* build: do not retry for connrefused

Signed-off-by: tison <wander4096@gmail.com>

* simplify layout

Signed-off-by: tison <wander4096@gmail.com>

---------

Signed-off-by: tison <wander4096@gmail.com>
2024-02-28 06:15:23 +00:00
Lei, HUANG
7942b8fae9 chore: add metris for memtable read path (#3397)
* chore: add metris for read path

* chore: add more metrics
2024-02-28 03:37:19 +00:00
Yingwen
b97f957489 feat: Use a partition level map to look up pk index (#3400)
* feat: partition level map

* test: test shard and builder

* fix: do not use pk index from shard builder

* feat: add multi key test

* fix: freeze shard before finding pk in shards
2024-02-28 03:17:09 +00:00
tison
f3d69e9563 chore: retry fetch dashboard assets (#3394)
Signed-off-by: tison <wander4096@gmail.com>
2024-02-27 10:07:21 +00:00
dennis zhuang
4b36c285f1 feat: flush or compact table and region functions (#3363)
* feat: adds Requester to process table flush and compaction request

* feat: admin_fn macros for administration functions

* test: add query result

* feat: impl flush_region, flush_table, compact_region, and flush_region functions

* docs: add Arguments to admin_fn macro

* chore: apply suggestion

Co-authored-by: Zhenchi <zhongzc_arch@outlook.com>

* chore: apply suggestion

Co-authored-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: group_requests_by_peer and adds log

* Update src/common/macro/src/admin_fn.rs

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

* feat: adds todo for spawan thread

* feat: rebase with main

---------

Co-authored-by: Zhenchi <zhongzc_arch@outlook.com>
Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
2024-02-27 08:57:38 +00:00
discord9
dbb1ce1a9b feat(flow): impl for MapFilterProject (#3359)
* feat: mfp impls

* fix: after rebase

* test: temporal filter mfp

* refactor: more comments&test

* test: permute

* fix: check input len when eval

* refactor: err handle&docs: more explain graph

* docs: better flowchart map,filter,project

* refactor: visit_* falliable

* chore: better temp lint allow

* fix: permute partially

* chore: remove duplicated checks

* docs: more explain&tests for clarity

* refactor: use ensure! instead
2024-02-27 08:13:55 +00:00
Ruihang Xia
3544c9334c feat!: new partition grammar - parser part (#3347)
* parser part

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix test in sql

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* comment out and ignore some logic

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update sqlness cases

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update region migration test

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* temporary disable region migration test

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* allow dead code

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update integration test

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-02-27 07:20:16 +00:00
Lei, HUANG
492a00969d feat: enable zstd compression and encodings in merge tree data part (#3380)
* feat: enable zstd compression in merge tree data part to save memory

* feat: also enable customized column encoding in DataPartEncoder
2024-02-27 06:54:56 +00:00
Yingwen
206666bff6 feat: Implement partition eviction and only add value size to write buffer size (#3393)
* feat: track key bytes in dict

* chore: done allocating on finish

* feat: evict keys

* chore: do not add to write buffer

* chore: only count value bytes

* fix: reset key bytes

* feat: remove write buffer manager from shards

* feat: change dict size compute method

* chore: adjust dictionary size by os memory
2024-02-27 06:28:57 +00:00
Weny Xu
7453d9779d fix: throw errors instead of panic (#3391)
* fix: throw errors instead of panic

* chore: apply suggestions from CR
2024-02-27 03:46:12 +00:00
liyang
8e3e0fd528 ci: add builder result outputs in release action (#3381) 2024-02-27 03:43:16 +00:00
dimbtp
b1e290f959 fix: range fix in modulo function tests (#3389)
fix: range fix for modulo tests
2024-02-26 15:50:23 +00:00
Ruihang Xia
d8dc93fccc feat(grafana): enable shared tooltip, add raft engine throughput (#3387)
feat: enable shared tooltip, add raft engine throughput

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-02-26 11:31:15 +00:00
Ning Sun
3887d207b6 feat: make tls certificates/keys reloadable (part 1) (#3335)
* feat: make tls certificates/keys reloadable (part 1)

* feat: add notify watcher for cert/key files

* test: add unit test for watcher

* fix: correct usage of watcher

* fix: skip watch when tls disabled
2024-02-26 09:37:54 +00:00
Ruihang Xia
e859f0e67d chore: skip reorder workspace tables in taplo (#3388)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-02-26 08:57:49 +00:00
Ruihang Xia
ce397ebcc6 feat: change how region id maps to region worker (#3384)
* feat: change how region id maps to region worker

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add overflow test

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-02-26 08:42:29 +00:00
Yingwen
26011ed0b6 fix: resets dict builder keys counter and avoid unnecessary pruning (#3386)
* fix: dict builder resets num_keys on finish

* feat: skip empty shard and builder

* feat: avoid pruning if possible

Implementations:
- Apply all filters on the partition column
- If no filter to prune, skip decoding keys
2024-02-26 08:24:46 +00:00
Lei, HUANG
8087822ab2 refactor: change the receivers of merge tree components (#3378)
* refactor: change the receivers of Shard::read/DataBuffer::read/DataParts::read to &self instead of &mut self

* refactor: remove allow(dead_code) in merge tree
2024-02-26 06:50:55 +00:00
Yingwen
e481f073f5 feat: Implement dedup for the new memtable and expose the config (#3377)
* fix: KeyValues num_fields() is incorrect

* chore: fix warnings

* feat: support dedup

* feat: allow using the new memtable

* feat: serde default for config

* fix: resets pk index after finishing a dict
2024-02-25 13:06:01 +00:00
Lei, HUANG
606309f49a fix: remove unused imports in memtable_util.rs (#3376) 2024-02-25 09:23:28 +00:00
Yingwen
8059b95e37 feat: Implement iter for the new memtable (#3373)
* chore: read shard builder

* chore: reuse pk weights

* chore: prune key

* chore: shard reader wip

* refactor: shard builder DataBatch

* feat: merge shard readers

* feat: return shard id in shard readers

* feat: impl partition reader

* chore: impl partition read

* feat: impl iter tree

* chore: save last yield pk id

* style: fix clippy

* refactor: rename ShardReaderImpl to ShardReader

* chore: address CR comment
2024-02-25 07:42:16 +00:00
Lei, HUANG
afe4633320 feat: merge tree dedup reader (#3375)
* feat: add dedup option to merge tree component

* feat: impl dedup reader for shard reader

* refactor: DedupReader::new to DedupReader::try_new

* refactor: remove DedupReader::current_key field

* fix: some cr comments

* fix: fmt

* fix: remove shard_id method from DedupSource
2024-02-24 13:50:49 +00:00
Yingwen
abbfd23d4b feat: Add freeze and fork method to the memtable (#3374)
* feat: add fork method to the memtable

* feat: allow mark immutable returns result

* feat: use fork to create the mutable memtable

* feat: remove memtable builder from freeze

* chore: warninigs

* fix: inspect error

* feat: iter returns result

* chore: maintains memtable id in region

* chore: update comment

* fix: remove region status if failed to freeze a memtable

* chroe: update comment

* chore: iter should not require sync

* chore: implement freeze and fork for the new memtable
2024-02-24 12:11:16 +00:00
Yingwen
1df64f294b refactor: Remove Item from merger's Node trait (#3371)
* refactor: data reader returns reference to data batch

* refactor: use range to create merger

* chore: Reference RecordBatch in DataBatch

* fix: top node not read if no next node

* refactor: move timestamp_array_to_i64_slice to data mod

* style: fix cilppy

* chore: derive copy for DataBatch

* chore: address CR comments
2024-02-24 07:19:48 +00:00
LFC
a6564e72b4 fix: treat "0" and "1" as valid boolean values. (#3370)
* Treat "0" and "1" as valid boolean values.

* Update src/sql/src/statements.rs

Co-authored-by: tison <wander4096@gmail.com>

* Fix tests.

---------

Co-authored-by: tison <wander4096@gmail.com>
2024-02-23 14:34:27 +00:00
Lei, HUANG
1f1d1b4f57 feat: distinguish between different read paths (#3369)
* feat: distinguish between different read paths

* fix: reformat code
2024-02-23 12:40:39 +00:00
Yingwen
b144836935 feat: Implement write and fork for the new memtable (#3357)
* feat: write to a shard or a shard builder

* feat: freeze and fork for partition and shards

* chore: shard builder

* chore: change dict reader to support random access

* test: test write shard

* test: test write

* test: test memtable

* feat: add new and write_row to DataParts

* refactor: partition freeze shards

* refactor: write_with_pk_id

* style: fix clippy

* chore: add methods to get pk weights

* chroe: fix compiler errors
2024-02-23 07:20:55 +00:00
dependabot[bot]
93d9f48dd7 build(deps): bump libgit2-sys from 0.16.1+1.7.1 to 0.16.2+1.7.2 (#3367)
Bumps [libgit2-sys](https://github.com/rust-lang/git2-rs) from 0.16.1+1.7.1 to 0.16.2+1.7.2.
- [Changelog](https://github.com/rust-lang/git2-rs/blob/master/CHANGELOG.md)
- [Commits](https://github.com/rust-lang/git2-rs/commits)

---
updated-dependencies:
- dependency-name: libgit2-sys
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-02-23 14:30:09 +08:00
Lei, HUANG
90e9b69035 feat: impl merge reader for DataParts (#3361)
* feat: impl merge reader for DataParts

* fix: fmt

* fix: sort rows with pk and ts according to sequnce desc

* fix: remove pk weight as pk index are already replace by weights

* fix: format

* fix: some cr comments

* fix: some cr comments

* refactor: simply trait's associated types

* fix: some cr comments
2024-02-23 06:07:55 +00:00
LFC
2035e7bf4c refactor: set the actual bound port in server handler (#3353)
* refactor: set the actual bound port so we can use port 0 in testing

* Update src/servers/src/server.rs

Co-authored-by: Weny Xu <wenymedia@gmail.com>

* fmt

---------

Co-authored-by: Weny Xu <wenymedia@gmail.com>
2024-02-23 02:49:11 +00:00
Ruihang Xia
7341f23019 feat: skip filling NULL for put and delete requests (#3364)
* feat: optimize for sparse data

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove old structures

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-02-22 14:30:43 +00:00
tison
41ee0cdd5a build(deps): Upgrade opensrv to 0.7.0 (#3362)
* build(deps): Upgrade opensrv to 0.7.0

Signed-off-by: tison <wander4096@gmail.com>

* workaround X is not X by casting

Signed-off-by: tison <wander4096@gmail.com>

---------

Signed-off-by: tison <wander4096@gmail.com>
2024-02-22 13:11:26 +00:00
Kould
578dd8f87a feat: add isnull function (#3360)
* code fmt

* feat: add isnull function

* feat: add isnull function
2024-02-22 12:41:25 +00:00
Weny Xu
1dc4fec662 refactor: allocate table ids in the procedure (#3293)
* refactor: refactor the create logical tables

* test(create_logical_tables): add tests for on_prepare

* test(create_logical_tables): add tests for on_create_metadata

* refactor: rename to create_logical_tables_metadata

* chore: fmt toml

* chore: apply suggestions from CR
2024-02-22 10:53:28 +00:00
Ruihang Xia
f26505b625 fix: typo in lint config (#3358)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-02-22 08:56:33 +00:00
Ruihang Xia
8289b0dec2 ci: align docs workflow jobs with develop.yml (#3356)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-02-22 07:01:15 +00:00
Yingwen
53105b99e7 test: fix list_files_and_parse_table_name path issue on windows (#3349)
* fix: always converts path to slash

* chore: print

* chore: normalize dir

* chore: compile

* chore: rm print
2024-02-22 06:16:41 +00:00
dennis zhuang
564fe3beca feat: impl migrate_region and procedure_state SQL function (#3325)
* fix: logical region can't find region routes

* feat: fetch partitions info in batch

* refactor: rename batch functions

* refactor: rename DdlTaskExecutor to ProcedureExecutor

* feat: impl migrate_region and query_procedure_state for ProcedureExecutor

* feat: adds SQL function procedure_state and finish migrate_region impl

* fix: constant vector

* feat: unit tests for migrate_region and procedure_state

* test: test region migration by SQL

* fix: compile error after rebeasing

* fix: clippy warnings

* feat: ensure procedure_state and migrate_region can be only called under greptime catalog

* fix: license header
2024-02-22 02:37:11 +00:00
SteveLauC
e9a2b0a9ee chore: use workspace-wide lints (#3352)
* chore: use workspace-wide lints

* respond to review
2024-02-22 01:01:10 +00:00
discord9
860b1e9d9e feat(flow): impl ScalarExpr&Scalar Function (#3283)
* feat: impl for ScalarExpr

* feat: plain functions

* refactor: simpler trait bound&tests

* chore: remove unused imports

* chore: fmt

* refactor: early ret on first error

* refactor: remove abunant match arm

* chore: per review

* doc: `support` fn

* chore: per review more

* chore: more per review

* fix: extract_bound

* chore: per review

* refactor: reduce nest
2024-02-21 12:53:16 +00:00
Yingwen
7c88d721c2 Merge pull request #3348
* feat: define functions for partitions

* feat: write partitions

* feat: fork and freeze partition

* feat: create iter by partition

* style: fix clippy

* chore: typos

* feat: add scan method to builder

* feat: check whether the builder should freeze first
2024-02-21 20:50:34 +08:00
Lei, HUANG
90169c868d feat: merge tree data parts (#3346)
* feat: add iter method for DataPart

* chore: rename iter to reader

* chore: some doc

* fix: resolve some comments

* fix: remove metadata in DataPart
2024-02-21 11:37:29 +00:00
tison
4c07606da6 refactor: put together HTTP headers (#3337)
* refactor: put together HTTP headers

Signed-off-by: tison <wander4096@gmail.com>

* do refactor

Signed-off-by: tison <wander4096@gmail.com>

* drop dirty commit

Signed-off-by: tison <wander4096@gmail.com>

* reduce changeset

Signed-off-by: tison <wander4096@gmail.com>

* fixup compilations

Signed-off-by: tison <wander4096@gmail.com>

* tidy files

Signed-off-by: tison <wander4096@gmail.com>

* drop common-api

Signed-off-by: tison <wander4096@gmail.com>

* fmt

Signed-off-by: tison <wander4096@gmail.com>

---------

Signed-off-by: tison <wander4096@gmail.com>
2024-02-21 09:51:10 +00:00
tison
a7bf458a37 chore: remove unused deprecated table_dir_with_catalog_and_schema (#3341) 2024-02-21 08:46:36 +00:00
tison
fa08085119 ci: upgrade actions to node20-based version (#3345)
* ci: upgrade actions to node20-based version

Signed-off-by: tison <wander4096@gmail.com>

* distinguish artifact name

Signed-off-by: tison <wander4096@gmail.com>

---------

Signed-off-by: tison <wander4096@gmail.com>
2024-02-21 08:09:09 +00:00
Lei, HUANG
86a98c80f5 feat: replace pk index with pk_weight during freeze (#3343)
* feat: replace pk index with pk_weight during freeze

* chore: add parameter to control pk_index replacement

* fix: dedup pk weights also

* fix: generate pk array before dedup
2024-02-21 08:05:25 +00:00
tison
085a380019 build(deps): axum-tets-helper has included patch-1 (#3333)
Signed-off-by: tison <wander4096@gmail.com>
2024-02-21 07:49:42 +00:00
tison
d9a96344ee ci: try fix log location (#3342)
Signed-off-by: tison <wander4096@gmail.com>
2024-02-21 07:01:51 +00:00
Weny Xu
41656c8635 refactor: allocate table id in the procedure (#3271)
* refactor: replace TableMetadataManager with TableNameManager

* refactor: allocate table id in the procedure

* refactor: refactor client logical of handling retries

* feat(test_util): add TestCreateTableExprBuilder

* feat(test_util): add MockDatanodeManager

* feat(test_util): add new_ddl_context

* feat(test_util): add build_raw_table_info_from_expr

* feat(test_util): add MockDatanodeManager::new

* feat(procedure): add downcast_output_ref to Status

* test(create_table): add tests for CreateTableProcedure on_prepare

* refactor(ddl): rename handle_operate_region_error to add_peer_context_if_need

* test(create_table): add tests for CreateTableProcedure on_datanode_create_regions

* test(create_table): add tests for CreateTableProcedure on_create_metadata

* refactor(meta): use CreateTableExprBuilder

* feat(create_table): ensure number of partitions is greater than 0

* refactor: rename to add_peer_context_if_needed

* feat: add context for panic

* refactor: simplify the should_retry

* refactor: use Option<&T> instead of &Option<T>

* refactor: move downcast_output_ref under cfg(test)

* chore: fmt toml
2024-02-21 04:38:46 +00:00
tison
cf08a3de6b chore: support configure GITHUB_PROXY_URL when fetch dashboard assets (#3340)
Signed-off-by: tison <wander4096@gmail.com>
2024-02-21 02:38:14 +00:00
Yingwen
f087a843bb feat: Implement KeyDictBuilder for the merge tree memtable (#3334)
* feat: dict builder

* feat: write and scan dict builder

* chore: address CR comments
2024-02-20 15:39:17 +00:00
Lei, HUANG
450dfe324d feat: data buffer and related structs (#3329)
* feat: data buffer and related structs

* fix: some cr comments

* chore: remove freeze_threshold in DataBuffer

* fix: use LazyMutableVectorBuilder instead of two vector; add option to control dedup

* fix: dedup rows according to both pk weights and timestamps

* fix: assembly DataBatch on demand
2024-02-20 09:22:45 +00:00
tison
3dfe4a2e5a chore: check dirs before create RaftEngine store (#3327)
* chore: check dirs before create RaftEngine store

Signed-off-by: tison <wander4096@gmail.com>

* fix impl

Signed-off-by: tison <wander4096@gmail.com>

* improve naming

Signed-off-by: tison <wander4096@gmail.com>

---------

Signed-off-by: tison <wander4096@gmail.com>
2024-02-20 07:48:15 +00:00
LFC
eded08897d test: add data compatibility test (#3109)
* test: data files compatibility test

* rework compatibility test

* revert unneeded changes

* revert unneeded changes

* debug CI

* Update .github/workflows/develop.yml

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

---------

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
2024-02-20 07:44:04 +00:00
Ruihang Xia
b1f54d8a03 fix: disable ansi contorl char when stdout is redirected (#3332)
* fix: disable ansi contorl char when stdout is redirected

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* don't touch file logging layer

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update comment

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* disable ansi for two file layers

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Co-authored-by: LFC <bayinamine@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: LFC <bayinamine@gmail.com>
2024-02-20 06:42:56 +00:00
shuiyisong
bf5e1905cd refactor: bring metrics to http output (#3247)
* refactor: bring metrics to http output

* chore: remove unwrap

* chore: make walk plan accumulate

* chore: change field name and comment

* chore: add metrics to http resp header

* chore: move PrometheusJsonResponse to a separate file and impl IntoResponse

* chore: put metrics in prometheus resp header too
2024-02-20 03:25:18 +00:00
Zhenchi
6628c41c36 feat(metric-engine): set index options for data region (#3330)
Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-02-20 02:38:35 +00:00
Yingwen
43fd87e051 feat: Defines structs in the merge tree memtable (#3326)
* chore: define mods

* feat: memtable struct

* feat: define structs inside the tree
2024-02-19 11:43:19 +00:00
Zhenchi
40f43de27d fix(index): encode string type to original data to enable fst regex to work (#3324)
Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-02-19 10:52:19 +00:00
Zhenchi
4810c91a64 refactor(index): move option segment_row_count from WriteOptions to IndexOptions (#3307)
Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-02-19 08:03:41 +00:00
JeremyHi
6668d6b042 fix: split write metadata request (#3311)
* feat: add txn_helper

* fix: split the create metadata requests to avoid exceeding the txn limit

* fix: add license header

* chore: some improve
2024-02-19 07:33:09 +00:00
JeremyHi
aa569f7d6b feat: batch get physical table routes (#3319)
* feat: batch get physical table routes

* chore: by comment
2024-02-19 06:51:34 +00:00
dennis zhuang
8b73067815 feat: impl partitions and region_peers information schema (#3278)
* feat: impl partitions table

* fix: typo

* feat: impl region_peers information schema

* chore: rename region_peers to greptime_region_peers

* chore: rename statuses to upper case

* fix: comments

* chore: update partition result

* chore: remove redundant checking

* refactor: replace 42 with constant

* feat: fetch region routes in batch
2024-02-19 06:47:14 +00:00
liyang
1851c20c13 ci: add build artifacts needs in notification (#3320) 2024-02-19 06:42:09 +00:00
tison
29f11d7b7e ci: upgrade actions to avoid node16 deprecation warning (#3317)
* ci: upgrade actions to avoid node16 deprecantion warning

Signed-off-by: tison <wander4096@gmail.com>

* try delete size label action

Signed-off-by: tison <wander4096@gmail.com>

* one more action

Signed-off-by: tison <wander4096@gmail.com>

---------

Signed-off-by: tison <wander4096@gmail.com>
2024-02-18 15:20:26 +00:00
Ruihang Xia
72cd443ba3 feat: organize tracing on query path (#3310)
* feat: organize tracing on query path

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* warp json conversion to TracingContext's methods

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove unnecessary .trace()

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* Update src/query/src/dist_plan/merge_scan.rs

Co-authored-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Zhenchi <zhongzc_arch@outlook.com>
2024-02-18 15:04:57 +00:00
dependabot[bot]
df6260d525 fix: bump libgit2-sys from 0.16.1+1.7.1 to 0.16.2+1.7.2 (#3316)
build(deps): bump libgit2-sys from 0.16.1+1.7.1 to 0.16.2+1.7.2

Bumps [libgit2-sys](https://github.com/rust-lang/git2-rs) from 0.16.1+1.7.1 to 0.16.2+1.7.2.
- [Changelog](https://github.com/rust-lang/git2-rs/blob/master/CHANGELOG.md)
- [Commits](https://github.com/rust-lang/git2-rs/commits)

---
updated-dependencies:
- dependency-name: libgit2-sys
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-02-18 13:25:00 +00:00
Ruihang Xia
94fd51c263 ci: run CI jobs in draft PR (#3314)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: tison <wander4096@gmail.com>
2024-02-18 13:14:57 +00:00
liyang
3bc0c4feda ci: add release result send to slack (#3312)
* feat: add notify release result to slack

* chore: change build image output result name

---------

Co-authored-by: tison <wander4096@gmail.com>
2024-02-18 13:13:04 +00:00
tison
2a26c01412 fix: commit_short sqlness test case (#3313)
* ci: debug sqlness jobs

Signed-off-by: tison <wander4096@gmail.com>

* fix: commit_short test case

Signed-off-by: tison <wander4096@gmail.com>

* fixup! fix: commit_short test case

Signed-off-by: tison <wander4096@gmail.com>

* fixup! fixup! fix: commit_short test case

Signed-off-by: tison <wander4096@gmail.com>

* revert uploading

Signed-off-by: tison <wander4096@gmail.com>

---------

Signed-off-by: tison <wander4096@gmail.com>
2024-02-18 11:51:24 +00:00
tison
4e04a4e48f build: support build without git (#3309)
* build: support build without git

Signed-off-by: tison <wander4096@gmail.com>

* chore

Signed-off-by: tison <wander4096@gmail.com>

* address comment

Signed-off-by: tison <wander4096@gmail.com>

* fix syntax

Signed-off-by: tison <wander4096@gmail.com>

---------

Signed-off-by: tison <wander4096@gmail.com>
2024-02-18 10:30:01 +00:00
tison
b889d57b32 build(deps): Upgrade raft-engine dependency cascading (#3305)
* build(deps): Upgrade raft-engine dependency

Signed-off-by: tison <wander4096@gmail.com>

* genlock and upgrade toolchain

Signed-off-by: tison <wander4096@gmail.com>

* revert toolchain upgrade

Signed-off-by: tison <wander4096@gmail.com>

* Revert "revert toolchain upgrade"

This reverts commit 1c6dd9d7ba.

* toolchain backward and correct dep attr

Signed-off-by: tison <wander4096@gmail.com>

* clippy

Signed-off-by: tison <wander4096@gmail.com>

* revert all

Signed-off-by: tison <wander4096@gmail.com>

* redo

Signed-off-by: tison <wander4096@gmail.com>

---------

Signed-off-by: tison <wander4096@gmail.com>
2024-02-18 02:31:59 +00:00
Zhenchi
f9ce2708d3 feat(mito): add options to ignore building index for specific column ids (#3295)
Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-02-16 08:50:41 +00:00
Zhenchi
34050ea8b5 fix(index): sanitize S3 upload buffer size (#3300)
Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-02-16 06:45:31 +00:00
liyang
31ace9dd5c fix: $TARGET_BIN not found when docker run the image (#3297)
* fix: TARGET_BIN path

* chore: return available versions

* refactor: Use ENV instead of ARG in ci dockerfile

* chore: add TARGET_BIN ENV pass to ENTRYPOINT

* chore: add TARGET_BIN ENV pass to ENTRYPOINT

* chore: update entrypoint

* chore: update entrypoint
2024-02-15 11:33:05 +00:00
Cancai Cai
2a971b0fff chore: update link to official website link (#3299) 2024-02-13 13:32:46 +00:00
Ruihang Xia
2f98fa0d97 fix: correct the case sensitivity behavior for PromQL (#3296)
* fix: correct the case sensitivity behavior for PromQL

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove debug code

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* consolidate sqlness case

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* drop table

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-02-08 03:26:32 +00:00
Hudson C. Dalprá
6b4be3a1cc fix(util): join_path function should not trim leading / (#3280)
* fix(util): join_path function should not trim leading `/`

Signed-off-by: Hudson C. Dalpra <dalpra.hcd@gmail.com>

* fix(util): making required changes at join_path function

* fix(util): added unit tests to match function comments

---------

Signed-off-by: Hudson C. Dalpra <dalpra.hcd@gmail.com>
2024-02-07 10:05:04 +00:00
Zhenchi
141ed51dcc feat(mito): adjust seg size of inverted index to finer granularity instead of row group level (#3289)
* feat(mito): adjust seg size of inverted index to finer granularity instead of row group level

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: wrong metric

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: more suitable name

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* feat: BitVec instead

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-02-07 08:20:00 +00:00
dennis zhuang
e5ec65988b feat: administration functions (#3236)
* feat: adds database() function to return current db

* refactor: refactor meta src and client with new protos

* feat: impl migrate_region and query_procedure_state for procedure service/client

* fix: format

* temp commit

* feat: impl migrate_region SQL function

* chore: clean code for review

* fix: license header

* fix: toml format

* chore: update proto dependency

* chore: apply suggestion

Co-authored-by: Weny Xu <wenymedia@gmail.com>

* chore: apply suggestion

Co-authored-by: Weny Xu <wenymedia@gmail.com>

* chore: apply suggestion

Co-authored-by: JeremyHi <jiachun_feng@proton.me>

* chore: apply suggestion

Co-authored-by: fys <40801205+fengys1996@users.noreply.github.com>

* chore: print key when parsing procedure id fails

* chore: comment

* chore: comment for MigrateRegionFunction

---------

Co-authored-by: Weny Xu <wenymedia@gmail.com>
Co-authored-by: JeremyHi <jiachun_feng@proton.me>
Co-authored-by: fys <40801205+fengys1996@users.noreply.github.com>
2024-02-07 01:12:32 +00:00
Zhenchi
dbf62f3273 chore(index): add BiError to fulfil the requirement of returning two errors (#3291)
Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-02-06 16:03:03 +00:00
Ruihang Xia
e4cd294ac0 feat: put all filter exprs in a filter plan separately (#3288)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-02-06 09:32:26 +00:00
fys
74bfb09195 feat: support cache for batch_get in CachedMetaKvBackend (#3277)
* feat: support cache for batch_get in CachedMetaKvBackend.

* add doc of CachedMetaKvBackend

* fix: cr

* fix: correct some words

* fix cr
2024-02-06 03:15:03 +00:00
shuiyisong
4cbdf64d52 chore: start plugins during standalone startup & comply with current catalog while changing database (#3282)
* chore: start plugins in standalone

* chore: respect current catalog in use statement for mysql

* chore: reduce unnecessory convert to string

* chore: reduce duplicate code
2024-02-06 02:41:37 +00:00
Weny Xu
96f32a166a chore: share cache corss jobs (#3284) 2024-02-05 09:30:22 +00:00
Weny Xu
770da02810 fix: fix incorrect StatusCode parsing (#3281)
* fix: fix incorrect StatusCode parsing

* chore: apply suggestions from CR
2024-02-05 08:06:43 +00:00
discord9
c62c67cf18 feat: Basic Definitions for Expression&Functions for Dataflow (#3267)
* added expression&func

* fix: EvalError derive&imports

* chore: add header

* feat: variadic func

* chore: minor adjust

* feat: accum

* feat: use accum for eval func

* feat: montonic min/max as accumulative

* feat: support min/max Date&DateTime

* chore: fix compile error&add test(WIP)

* test: sum, count, min, max

* feat: remove trait impl for EvalError

* chore: remove all impl retain only type definitions

* refactor: nest datatypes errors

* fix: remove `.build()`

* fix: not derive Clone

* docs: add comment for types

* feat: more func&remove `CurrentDatabase`
2024-02-05 07:49:34 +00:00
Ruihang Xia
51feec2579 feat: use simple filter to prune memtable (#3269)
* switch on clippy warnings

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* feat: use simple filter to prune memtable

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove deadcode

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* refine util function

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-02-04 11:35:55 +00:00
LFC
902570abf6 ci: fix nightly build (#3276)
* ci: fix nightly build

* ci: fix nightly build
2024-02-03 03:20:47 +00:00
shuiyisong
6ab3a88042 fix: use fe_opts after setup_frontend_plugins in standalone (#3275)
* chore: modify standalone startup opts

* chore: move frontend and datanode options
2024-02-02 10:36:08 +00:00
discord9
e89f5dc908 feat: support fraction part in timestamp (#3272)
* feat: support fraction part in timestamp

* test: with timezone
2024-02-01 08:51:26 +00:00
LFC
e375060b73 refactor: add same SST files (#3270)
* Make adding same SST file multiple times possible, instead of panic there.

* Update src/mito2/src/sst/version.rs

Co-authored-by: Yingwen <realevenyag@gmail.com>

---------

Co-authored-by: Yingwen <realevenyag@gmail.com>
2024-01-31 07:21:30 +00:00
LFC
50d16d6330 ci: build GreptimeDB binary for later use (#3244)
* ci: build GreptimeDB binaries for later use

* debug CI

* try larger runner host

* Revert "try larger runner host"

This reverts commit 03c18c0f51.

* fix: resolve PR comments

* revert some unrelated action yamls

* fix CI

* use artifact upload v4 for faster upload and download speed
2024-01-31 03:02:27 +00:00
tison
60e760b168 fix: cli export database default value (#3259)
Signed-off-by: tison <wander4096@gmail.com>
2024-01-30 09:56:05 +00:00
dennis zhuang
43ef0820c0 fix: SQL insertion and column default constraint aware of timezone (#3266)
* fix: missing timezone when parsing sql value to greptimedb value

* test: sql_value_to_value

* fix: column default constraint missing timezone

* test: column def default constraint  with timezone

* test: adds sqlness test for default constraint aware of timezone

* fix: typo

* chore: comment
2024-01-30 09:15:38 +00:00
Ruihang Xia
e0e635105e feat: initial configuration for grafana dashboard (#3263)
* feat: initial configuration for grafana dashboard

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix typo

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* lift up dir

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update readme

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add paths to CI config

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* tweak config details

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add OpenDAL traffic panel

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove sync count

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* Update grafana/README.md

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-01-30 09:11:50 +00:00
discord9
0a9361a63c feat: basic types for Dataflow Framework (#3186)
* feat: basic types for Dataflow Framework

* docs: license header

* chore: add name in todo, remove deprecated code

* chore: typo

* test: linear&repr unit test

* refactor: avoid mod.rs

* feat: Relation Type

* feat: unmat funcs

* feat: simple temporal filter(sliding window)

* refactor: impl Snafu for EvalError

* feat: cast as type

* feat: temporal filter

* feat: time index in RelationType

* refactor: move EvalError to expr

* refactor: error handling for func

* chore: fmt&comment

* make EvalError pub again

* refactor: move ScalarExpr to scalar.rs

* refactor: remove mod.rs for relation

* chore: slim down PR size

* chore: license header

* chore: per review

* chore: more fix per review

* chore: even more fix per review

* chore: fmt

* chore: fmt

* feat: impl From/Into ProtoRow instead

* chore: use cast not cast_with_opt&`Key` struct

* chore: new_unchecked

* feat: `Key::subset_of` method

* chore: toml in order
2024-01-30 07:48:22 +00:00
Weny Xu
ddbd0abe3b fix(Copy From): fix incorrect type casts (#3264)
* refactor: refactor RecordBatchStreamTypeAdapter

* fix(Copy From): fix incorrect type casts

* fix: unit tests

* chore: add comment
2024-01-30 07:16:36 +00:00
Ruihang Xia
a079955d38 chore: adjust storage engine related metrics (#3261)
* chore: adjust metrics to metric engine and mito engine

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* adjust more mito bucket

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix compile

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-01-30 06:43:03 +00:00
JeremyHi
e5a2b0463a feat: Only allow inserts and deletes operations to be executed in parallel (#3257)
* feat: Only allow inserts and deletes operations to be executed in parallel.

* feat: add comment
2024-01-29 11:27:06 +00:00
Weny Xu
691b649f67 chore: switch to free machine (#3256)
* chore: switch to free machine

* chore: switch sqlness runner to 4core
2024-01-29 08:35:12 +00:00
Ruihang Xia
9a28a1eb5e fix: decouple columns in projection and prune (#3253)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-01-29 08:29:21 +00:00
LFC
fc25b7c4ff build: make binary name a Dockerfile "ARG" (#3254)
* build: make binary name a Dockerfile "ARG"

* Update Dockerfile
2024-01-29 08:00:40 +00:00
Ning Sun
d43c638515 ci: merge doc label actor and checker tasks (#3252) 2024-01-29 07:09:11 +00:00
shuiyisong
91c8c62d6f chore: adjust MySQL connection log (#3251)
* chore: adjust mysql connection log level

* chore: adjust import
2024-01-29 06:53:50 +00:00
JeremyHi
3201aea360 feat: create tables in batch on prom write (#3246)
* feat: create tables in batch on prom write

* feat: add logic table ids to log

* fix: miss tabble ids in response
2024-01-26 12:47:24 +00:00
Ruihang Xia
7da8f22cda fix: IntermediateWriter closes underlying writer twice (#3248)
* fix: IntermediateWriter closes underlying writer twice

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* close writer manually on error

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-01-26 10:03:50 +00:00
Ning Sun
14b233c486 test: align chrono and some forked test deps to upstream (#3245)
* test: update chrono and its tests

* chore: switch some deps to upstream version

* test: update timestamp range in sqlness tests
2024-01-26 07:39:51 +00:00
discord9
d5648c18c1 docs: RFC of Dataflow Framework (#3185)
* docs: RFC of Dataflow Framework

* docs: middle layer&metadata store

* chore: fix typo

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

* docs: add figure

* chore: use mermaid instead

---------

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
2024-01-26 07:13:28 +00:00
Ruihang Xia
b0c3be35fb feat: don't map semantic type in metric engine (#3243)
* feat: don't map semantic type in metric engine

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove duplicate set_null

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-01-26 03:50:05 +00:00
Ruihang Xia
5617b284c5 feat: return request outdated error on handling alter (#3239)
* feat: return request outdated error on handling alter

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix tonic code mapping

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy, add comment

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix deadloop

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update UT

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* address CR comments

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* chore: Update log message

* Update src/common/meta/src/ddl/alter_table.rs

Co-authored-by: Weny Xu <wenymedia@gmail.com>

* fix compile

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
Co-authored-by: Weny Xu <wenymedia@gmail.com>
2024-01-26 03:37:46 +00:00
Weny Xu
f99b08796d feat: add insert/select generator & translator (#3240)
* feat: add insert into expr generator & translator

* feat: add select expr generator & translator

* chore: apply suggestions from CR

* fix: fix unit tests
2024-01-26 02:59:17 +00:00
JeremyHi
1fab7ab75a feat: batch create ddl (#3194)
* feat: batch ddl to region request

* feat: return table ids

chore: by comment

chore: remove wal_options

chore: create logical tables lock key

feat: get metadata in procedure

* chore: by comment
2024-01-26 02:43:57 +00:00
Yingwen
3fa070a0cc fix: init parquet reader metrics twice (#3242) 2024-01-26 01:54:51 +00:00
Weny Xu
8bade8f8e4 fix: fix create table ddl return incorrect table id (#3232)
* fix: fix create table ddl return incorrect table id

* refactor: refactor param of Status::done_with_output
2024-01-25 13:58:43 +00:00
Wei
6c2f0c9f53 feat: read metadata from write cache (#3224)
* feat: read meta from write cache

* test: add case

* chore: cr comment

* chore: clippy

* chore: code style

* feat: put metadata to sst cache
2024-01-25 11:39:41 +00:00
LFC
2d57bf0d2a ci: adding DOCKER_BUILD_ROOT docker arg for dev build (#3241)
Update Dockerfile
2024-01-25 09:05:27 +00:00
Weny Xu
673a4bd4ef feat: add pg create alter table expr translator (#3206)
* feat: add pg create table expr translator

* feat: add pg alter table expr translator

* refactor: refactor MappedGenerator

* chore: apply suggestions from CR
2024-01-25 08:00:42 +00:00
LFC
fca44098dc ci: make git's "safe.directory" accept all (#3234)
* Update Dockerfile

* Update Dockerfile

* Update Dockerfile
2024-01-25 07:32:03 +00:00
Ning Sun
1bc4f25de2 feat: http sql api return schema on empty resultset (#3237)
* feat: return schema on empty resultset

* refactor: make schema a required field in http output

* test: update integration test and add schema output
2024-01-25 06:44:28 +00:00
Ning Sun
814924f0b6 fix: security update for shlex and h2 (#3227) 2024-01-24 13:31:34 +00:00
ZonaHe
b0a8046179 feat: update dashboard to v0.4.7 (#3229) 2024-01-24 08:50:21 +00:00
dennis zhuang
7323e9b36f feat: change Range Query’s default align behavior aware of timezone (#3219)
* feat: change Range Query’s default align behavior to 1970-01-01 00:00:00 aware of timezone

* test: test with +23:00 timezone
2024-01-24 08:17:57 +00:00
Weny Xu
f82ddc9491 fix: fix MockInstance rebuild issue (#3218)
* fix: fix MockInstance rebuild issue

* chore: apply suggestions from CR
2024-01-24 07:52:47 +00:00
Ning Sun
1711ad4631 feat: add Arrow IPC output format for http rest api (#3177)
* feat: add arrow format output for sql api

* refactor: remove unwraps

* test: add test for arrow format

* chore: update cargo toml format

* fix: resolve lint warrnings

* fix: ensure outputs size is one
2024-01-24 06:10:05 +00:00
LFC
f81e37f508 refactor: make http server built flexibly (#3225)
* refactor: make http server built flexibly

* Apply suggestions from code review

Co-authored-by: JeremyHi <jiachun_feng@proton.me>

* fix: resolve PR comments

* Fix CI.

---------

Co-authored-by: JeremyHi <jiachun_feng@proton.me>
2024-01-24 03:45:08 +00:00
Weny Xu
d75cf86467 fix: only register region keeper while creating physical table (#3223)
fix: only register region keeper during create physical table
2024-01-23 09:42:32 +00:00
Weny Xu
26535f577d feat: enable concurrent write (#3214)
* feat: enable concurrent write

* chore: apply suggestions from CR

* chore: apply suggestions from CR
2024-01-23 09:20:12 +00:00
Ruihang Xia
8485c9af33 feat: read column and region info from state cache (#3222)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-01-23 09:10:27 +00:00
Weny Xu
19413eb345 refactor!: rename initialize_region_in_background to init_regions_in_background (#3216)
refactor!: change initialize_region_in_background to init_regions_in_background
2024-01-23 04:33:48 +00:00
Weny Xu
007b63dd9d fix: fix default value cannot accept negative number (#3217)
* fix: fix default value cannot accept negative number

* chore: apply suggestions from CR
2024-01-23 03:33:13 +00:00
Weny Xu
364754afa2 feat: add create alter table expr translator (#3203)
* feat: add create table expr translator

* feat: add alter table expr translator

* refactor: expose mod

* refactor: expr generator

* chore: ignore typos check for lorem_words

* feat: add string map helper functions

* chore: remove unit tests
2024-01-23 02:53:42 +00:00
Ruihang Xia
31787f4bfd feat!: switch prom remote write to metric engine (#3198)
* feat: switch prom remote write to metric engine

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* Apply suggestions from code review

Co-authored-by: dennis zhuang <killme2008@gmail.com>

* fix compile

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* read physical table name from url

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove physical table from header

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix merge error

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix format

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add with_metric_engine option to config remote write behavior

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* check parameter

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add specific config param

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* default with_metric_engine to true

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update UT

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: dennis zhuang <killme2008@gmail.com>
2024-01-22 14:48:28 +00:00
dennis zhuang
6a12c27e78 feat: make query be aware of timezone setting (#3175)
* feat: let TypeConversionRule aware query context timezone setting

* chore: don't optimize explain command

* feat: parse string into timestamp with timezone

* fix: compile error

* chore: check the scalar value type in predicate

* chore: remove mut for engine context

* chore: return none if the scalar value is utf8 in time range predicate

* fix: some fixme

* feat: let Date and DateTime parsing from string value be aware of timezone

* chore: tweak

* test: add datetime from_str test with timezone

* feat: construct function context from query context

* test: add timezone test for to_unixtime and date_format function

* fix: typo

* chore: apply suggestion

* test: adds string with timezone

* chore: apply CR suggestion

Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>

* chore: apply suggestion

---------

Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>
2024-01-22 14:14:03 +00:00
shuiyisong
2bf4b08a6b chore: change default factor to compute memory size (#3211)
* chore: change default factor to compute memory size

* chore: update doc

* chore: update comment in example config

* chore: extract factor to const and update comments

* chore: update comment by cr suggestion

Co-authored-by: dennis zhuang <killme2008@gmail.com>

---------

Co-authored-by: dennis zhuang <killme2008@gmail.com>
2024-01-22 09:03:29 +00:00
Ruihang Xia
8cc7129397 fix: remove __name__ matcher from processed matcher list (#3213)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-01-22 08:50:28 +00:00
Ruihang Xia
278e4c8c30 feat: lazy initialize vector builder on write (#3210)
* feat: lazy initialize vector builder on write

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* avoid using ConstantVector

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* simplify expression

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* Update src/metric-engine/src/engine/create.rs

Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>
2024-01-22 07:00:04 +00:00
Weny Xu
de13de1454 feat: introduce information schema provider cache (#3208)
fix: introduce information schema provider cache
2024-01-22 06:41:39 +00:00
Lei, HUANG
3834ea7422 feat: copy database from (#3164)
* wip: impl COPY DATABASE FROM parser

* wip: impl copy database from

* wip: add some ut

* wip: add continue_on_error option

* test: add sqlness cases for copy database

* fix: trailing newline

* fix: typo

* fix: some cr comments

* chore: resolve confilicts

* fix: some cr comments
2024-01-22 06:33:54 +00:00
Weny Xu
966875ee11 chore: bump opendal to v0.44.2 (#3209) 2024-01-21 11:47:18 +00:00
Wei
e5a8831fa0 refactor: read parquet metadata (#3199)
* feat: MetadataLoader

* refactor code

* chore: clippy

* chore: cr comment

* chore: add TODO

* chore: cr comment

Co-authored-by: Zhenchi <zhongzc_arch@outlook.com>

* chore: clippy

---------

Co-authored-by: Zhenchi <zhongzc_arch@outlook.com>
2024-01-21 07:21:29 +00:00
Weny Xu
4278c858f3 feat: make procedure able to return output (#3201)
* feat: make procedure able to return output

* refactor: change Output to Any
2024-01-21 06:56:45 +00:00
Weny Xu
986f3bb07d refactor: reduce number of parquet metadata reads and enable reader buffer (#3197)
refactor: reduce reading parquet metadata times and enable read buffer
2024-01-19 12:26:38 +00:00
Weny Xu
440cd00ad0 feat(tests-fuzz): add CreateTableExprGenerator & AlterTableExprGenerator (#3182)
* feat(tests-fuzz): add CreateTableExprGenerator

* refactor: move Column to root of ir mod

* feat: add AlterTableExprGenerator

* feat: add Serialize and Deserialize derive

* chore: refactor the AlterExprGenerator
2024-01-19 09:39:28 +00:00
dennis zhuang
5e89472b2e feat: adds parse options for SQL parser (#3193)
* feat: adds parse options for parser and timezone to scan request

* chore: remove timezone in ScanRequest

* feat: remove timezone in parse options and adds type checking to parititon columns

* fix: comment

* chore: apply suggestions

Co-authored-by: Yingwen <realevenyag@gmail.com>

* fix: format

---------

Co-authored-by: Yingwen <realevenyag@gmail.com>
2024-01-19 09:16:36 +00:00
Yiran
632edd05e5 docs: update SDK links in README.md (#3156)
* docs: update SDK links in README.md

* chore: typo
2024-01-19 08:54:43 +00:00
Zhenchi
2e4c48ae7a fix(index): S3 EntityTooSmall error (#3192)
* fix(index): S3 `EntityTooSmall` error

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: config api

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-01-19 02:57:07 +00:00
Ruihang Xia
cde5a36f5e feat: precise filter for mito parquet reader (#3178)
* impl SimpleFilterEvaluator

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* time index and field filter

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* finish parquet filter

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove empty Batch

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix fmt

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix typo

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update metric

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* use projected schema from batch

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* correct naming

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove unnecessary error

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-01-18 06:59:48 +00:00
niebayes
63205907fb refactor: introduce common-wal to aggregate wal stuff (#3171)
* refactor: aggregate wal configs

* refactor: move wal options to common-wal

* chore: slim Cargo.toml

* fix: add missing crates

* fix: format

* chore: update comments

* chore: add testing feature gate for test_util

* fix: apply suggestions from code review

Co-authored-by: JeremyHi <jiachun_feng@proton.me>

* fix: apply suggestions from code review

* fix: compiling

---------

Co-authored-by: JeremyHi <jiachun_feng@proton.me>
2024-01-18 03:49:37 +00:00
Wei
3d7d2fdb4a feat: auto config cache size according to memory size (#3165)
* feat: auto config cache and buffer size according to mem size

* feat: utils

* refactor: add util function to common config

* refactor: check cgroups

* refactor: code

* fix: test

* fix: test

* chore: cr comment

Co-authored-by: Yingwen <realevenyag@gmail.com>
Co-authored-by: Dennis Zhuang <killme2008@gmail.com>

* chore: remove default comment

---------

Co-authored-by: Yingwen <realevenyag@gmail.com>
Co-authored-by: Dennis Zhuang <killme2008@gmail.com>
2024-01-17 14:35:35 +00:00
LFC
3cfd60e139 refactor: expose region edit in mito engine (#3179)
* refactor: expose region edit in mito engine

* feat: add a method for editing region directly

* fix: resolve PR comments

* Apply suggestions from code review

Co-authored-by: dennis zhuang <killme2008@gmail.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>

* fix: resolve PR comments

* fix: resolve PR comments

* fix: resolve PR comments

---------

Co-authored-by: dennis zhuang <killme2008@gmail.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
2024-01-17 14:25:08 +00:00
shuiyisong
a29b9f71be chore: carry metrics in flight metadata from datanode to frontend (#3113)
* chore: carry metrics in flight metadata from datanode to frontend

* chore: fix typo

* fix: ignore metric flight message on client

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* chore: add cr comment

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

* chore: add cr comment

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

* chore: update proto

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
2024-01-17 11:38:03 +00:00
shuiyisong
ae160c2def fix: change back GREPTIME_DB_HEADER_NAME header key name (#3184)
fix: change back dbname header key
2024-01-17 09:30:32 +00:00
Ruihang Xia
fbd0197794 refactor: remove TableEngine (#3181)
* refactor: remove TableEngine

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* Update src/table/src/table_reference.rs

Co-authored-by: LFC <990479+MichaelScofield@users.noreply.github.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: LFC <990479+MichaelScofield@users.noreply.github.com>
2024-01-17 04:18:37 +00:00
dennis zhuang
204b9433b8 feat: adds date_format function (#3167)
* feat: adds date_format function

* fix: compile error

* chore: use system timezone for FunctionContext and EvalContext

* test: as_formatted_string

* test: sqlness test

* chore: rename function
2024-01-17 03:24:40 +00:00
niebayes
d020a3db23 chore: expose promql test to distributed instance (#3176) 2024-01-17 02:44:55 +00:00
JeremyHi
c6c4ea5e64 feat: tables stream with CatalogManager (#3180)
* feat: add tables for CatalogManager

* feat: replace table with tables

* Update src/catalog/src/information_schema/columns.rs

Co-authored-by: Weny Xu <wenymedia@gmail.com>

* Update src/catalog/src/information_schema/columns.rs

Co-authored-by: Weny Xu <wenymedia@gmail.com>

* Update src/catalog/src/information_schema/tables.rs

Co-authored-by: Weny Xu <wenymedia@gmail.com>

* Update src/catalog/src/information_schema/tables.rs

Co-authored-by: Weny Xu <wenymedia@gmail.com>

* feat: tables for MemoryCatalogManager

---------

Co-authored-by: Weny Xu <wenymedia@gmail.com>
2024-01-17 02:31:15 +00:00
Weny Xu
7a1b856dfb feat: add tests-fuzz crate (#3173) 2024-01-16 09:02:09 +00:00
JeremyHi
c2edaffa5c feat: let tables API return a stream (#3170) 2024-01-15 12:36:39 +00:00
tison
189df91882 docs: Update README.md (#3168)
* docs: Update README.md

Complying with ASF policy we should refer to Apache projects in their full form in the first and most prominent usage.

* Update README.md

* Update README.md
2024-01-15 10:54:53 +00:00
tison
3ef86aac97 docs: add tracking issue for inverted-index RFC (#3169) 2024-01-15 10:54:35 +00:00
Wei
07de65d2ac test: engine with write cache (#3163)
* feat: write cache test for engine

* chore: unused

* chore: comment

* refactor: super to crate

* chore: cr comment

Co-authored-by: Yingwen <realevenyag@gmail.com>

* chore: clippy

---------

Co-authored-by: Yingwen <realevenyag@gmail.com>
2024-01-15 10:02:53 +00:00
Ning Sun
1294d6f6e1 feat: upgrade pgwire to 0.19 (#3157)
* feat: upgrade pgwire to 0.19

* fix: update pgwire to 0.19.1
2024-01-15 09:14:08 +00:00
Zhenchi
6f07d69155 feat(mito): enable inverted index (#3158)
* feat(mito): enable inverted index

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix typos

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix typos

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* accidentally resolved the incorrect filtering issue within the Metric Engine

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix test

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* Update src/mito2/src/access_layer.rs

* Update src/mito2/src/test_util/scheduler_util.rs

Co-authored-by: Yingwen <realevenyag@gmail.com>

* fix: format -> join_dir

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* refactor: move intermediate_manager from arg of write_and_upload_sst to field of WriteCache

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* refactor: add IndexerBuidler

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix clippy

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
2024-01-15 09:08:07 +00:00
WU Jingdi
816d94892c feat: support HTTP&gRPC&pg set timezone (#3125)
* feat: support HTTP&gRPC&pg set timezone

* chore: fix code advice

* chore: fix code advice
2024-01-15 06:29:31 +00:00
LFC
93f28c2a37 refactor: make grpc service able to be added dynamically (#3160) 2024-01-15 04:33:27 +00:00
Eugene Tolbakov
ca4d690424 feat: add modulo function (#3147)
* feat: add modulo function

* fix: address CR feedback
2024-01-13 00:24:25 +00:00
Weny Xu
75975adcb6 fix: fix tests failed on windows (#3155)
* fix: fix tests failed on windows

* feat: add comments

* Update src/object-store/src/util.rs

Co-authored-by: Yingwen <realevenyag@gmail.com>

---------

Co-authored-by: Yingwen <realevenyag@gmail.com>
2024-01-12 09:12:11 +00:00
Ruihang Xia
527e523a38 fix: handle non-identical time index and field column in PromQL set operation (#3145)
* handle different field columns

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix and/unless on different time index

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update sqlness result

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-01-12 06:27:03 +00:00
Weny Xu
aad2afd3f2 chore: bump version to 0.6.0 (#3154) 2024-01-12 06:25:14 +00:00
Weny Xu
bf88b3b4a0 fix: fix store all wal options (#3149)
* fix: fix store all wal options

* fix: incorrect updating DatanodeTable value
2024-01-12 04:48:14 +00:00
Weny Xu
bf96ce3049 fix: print detailed error (#3146) 2024-01-12 04:02:32 +00:00
Weny Xu
430ffe0e28 fix(kafka): overwrite the EntryId with Offset while consuming records (#3148)
* fix(kafka): overwrite the EntryId with Offset while consuming the KafkaRecords

* fix: temporarily workaround of incorrect entry Id
2024-01-12 03:46:17 +00:00
Zhenchi
c1190bae7b feat(mito): support write cache for index file (#3144)
* feat(mito): support write cache for index file

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: address comments

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: merge main

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-01-12 02:40:56 +00:00
Ruihang Xia
0882da4d01 feat: support PromQL operations over the same metric (#3124)
* update sqlness result

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update ut cases

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove deadcode

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-01-11 23:07:17 +00:00
Wei
8ec1e42754 feat: read data from write cache (#3128)
* feat: read from write cache

* chore: add read ranges test

* fix: use get instead of contains_key

* chore: clippy

* chore: cr comment

Co-authored-by: Yingwen <realevenyag@gmail.com>

* fix: with_label_values

---------

Co-authored-by: Yingwen <realevenyag@gmail.com>
2024-01-11 12:06:28 +00:00
Ruihang Xia
b00b49284e feat: manager kafka cluster in sqlness runner (#3143)
* feat: manager kafka cluster in sqlness runner

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* pull up clippy config

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* Apply suggestions from code review

Co-authored-by: niebayes <niebayes@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: niebayes <niebayes@gmail.com>
2024-01-11 09:47:19 +00:00
Ruihang Xia
09b3c7029b feat: handle drop request for metric table (#3136)
* handle drop request

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* adjust procedure manager

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add create table sqlness test

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* insert/query metric table

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* address CR comments

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* Update src/common/meta/src/kv_backend.rs

Co-authored-by: JeremyHi <jiachun_feng@proton.me>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* reuse region option for metadata region

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* tweak variable name

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: JeremyHi <jiachun_feng@proton.me>
2024-01-11 09:38:43 +00:00
niebayes
f5798e2833 fix: remove incorrect wal comments in config file (#3142)
fix: kafka config comments
2024-01-11 09:34:24 +00:00
Zhenchi
fd8fb641fd feat(parquet): introduce inverted index applier to reader (#3130)
* feat(parquet): introduce inverted index applier to reader

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* feat: purger removes index file

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix test

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* chore: add TODO for escape route

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* chore: add TODO for escape route

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* Update src/mito2/src/access_layer.rs

Co-authored-by: dennis zhuang <killme2008@gmail.com>

* Update src/mito2/src/sst/parquet/reader.rs

Co-authored-by: dennis zhuang <killme2008@gmail.com>

* feat: min-max index to prune row groups filtered by inverted index

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* feat: file_meta.inverted_index_available -> file_meta.available_indexes

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* chore: add TODO for leveraging WriteCache

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix fmt

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: misset available indexes

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* feat: add index file size

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* refactor: use smallvec to reduce heap allocation

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: add index size to disk usage

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
Co-authored-by: dennis zhuang <killme2008@gmail.com>
2024-01-11 08:04:59 +00:00
Weny Xu
312e8e824e fix: save code in debug_assert! (#3137)
fix: save code in debug_assert!
2024-01-11 06:07:08 +00:00
Wei
29a7f301df feat: write and upload sst (#3106)
* feat: write and upload sst file

* refactor: unit test

* cr comment

* chore: typos

* chore: cr comment

* chore: conflict

* Apply suggestions from code review

Co-authored-by: dennis zhuang <killme2008@gmail.com>

* chore: fmt

* chore: style

Co-authored-by: Yingwen <realevenyag@gmail.com>

---------

Co-authored-by: dennis zhuang <killme2008@gmail.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
2024-01-11 02:34:16 +00:00
LFC
51a3fbc7bf refactor: change how frontend grpc services are orchestrated (#3134) 2024-01-11 02:26:44 +00:00
Lanqing Yang
d521bc9dc5 chore: impl KvBackend for MetaPeerClient (#3076) 2024-01-10 14:16:03 +00:00
Weny Xu
7fad4e8356 fix: incorrect parsing broker_endpoints env variable (#3135) 2024-01-10 13:59:49 +00:00
Ning Sun
b6033f62cd refactor: implement version as built-in function and use fixed mysql version (#3133)
* refactor:  implement version as built-in function

* test: add sqlness test for version()
2024-01-10 11:04:18 +00:00
dennis zhuang
fd3f23ea15 feat: adds runtime_metrics (#3127)
* feat: adds runtime_metrics

* fix: comment

* feat: refactor metrics table

* chore: ensure build_info and runtime_metrics only avaiable in greptime catalog

* feat: adds timestamp column
2024-01-10 10:51:30 +00:00
niebayes
1b0e39a7f2 chore: stop exposing num_partitions (#3132) 2024-01-10 10:45:18 +00:00
Weny Xu
3ab370265a feat: expose the region migration replay_timeout argument (#3129)
* feat: expose region migration args

* fix: fix ci
2024-01-10 09:47:59 +00:00
Weny Xu
ec8266b969 refactor: refactor the locks in the procedure (#3126)
* feat: add lock key

* refactor: procedure lock keys

* chore: apply suggestions from CR
2024-01-10 09:46:39 +00:00
Zhenchi
490312bf57 fix: unstable time record test (#3131)
Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-01-10 09:41:52 +00:00
Ning Sun
1fc168bf6a feat: update our cross schema check to cross catalog (#3123) 2024-01-09 09:38:48 +00:00
Zhenchi
db98484796 feat(inverted_index): introduce SstIndexCreator (#3107)
* feat(inverted_index): introduce SstIndexCreator

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* chore: tiny polish

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* feat: distinguish intermediate store and index store

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* chore: move comment as doc comment

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* refactor: column id as index name

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-01-09 09:24:16 +00:00
Ruihang Xia
7d0d2163d2 fix: expose unsupported datatype error on mysql protocol (#3121)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-01-09 09:13:53 +00:00
Ruihang Xia
c4582c05cc chore: change the default doc checkbox to no need (#3122)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-01-09 17:12:54 +08:00
niebayes
a0a31c8acc chore(remote_wal): remove topic alias (#3120)
chore: remove topic alias
2024-01-09 07:35:02 +00:00
tison
0db1861452 chore(python): Print Python interpreter version (#3118)
* chore(pyo3_backend): Print bundle Python interpreter version

Signed-off-by: tison <wander4096@gmail.com>

* print RustPython interpreter version on init

Signed-off-by: tison <wander4096@gmail.com>

---------

Signed-off-by: tison <wander4096@gmail.com>
2024-01-09 07:04:23 +00:00
Wei
225ae953d1 feat: add parquet metadata to cache (#3097)
* feat: parquet metadata to sst meta cache

* chore: clippy

* refactor: move code to access_layer

* chore: clone()
2024-01-09 07:00:42 +00:00
Lei, HUANG
2c1b1cecc8 chore: add bound check for raft-engine logstore (#3073)
* chore: add bound check for raft-engine logstore

* feat: add bound check to append_batch API

* chore: check entry id during replay

* chore: resolve conflicts

* feat: add allow_stale_entries options to force obsolete wal entries

* chore: resolve some comments
2024-01-09 06:42:46 +00:00
Lei, HUANG
62db28b465 feat: add options to enable log recycle and periodical fsync (#3114)
* feat: add options to enable log recycle and periodical fsync

* fix: resolve review comments

* fix: conflicts
2024-01-09 06:41:23 +00:00
fys
6e860bc0fd feat: support grpc for otlp trace and metrics (#3105)
* feat: add grpc support for otlp trace and metrics

* cr: add some comment

* fix: ut

* fix: cr
2024-01-09 05:01:48 +00:00
Yingwen
8bd4a36136 feat(mito): Init the write cache in datanode (#3100)
* feat: add builder to build cache manager

* refactor: make MitoEngine::new async

* refactor: refactor object store creation

* refactor: add helper fn to attaches layers

* feat: fn to build fs store

* feat: add write cache to engine

* feat: config write cache

* style: fix clippy

* test: fix test

* feat: add warning

* chore: add experimental prefix to configs

* test: fix config test

* test: test weighted size

* feat: add switch to enable write cache

* fix: update cache stats by using get

* style: use then
2024-01-09 04:40:22 +00:00
Ruihang Xia
af0c4c068a feat: support PromQL function vector (#3036)
* produce vector plan

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* work with OR

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* apply review sugg

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* move common const strings to common_query

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add comment for GREPTIME_COUNT

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-01-09 03:44:00 +00:00
dennis zhuang
26cbcb8b3a docs: update issue template (#3119) 2024-01-09 02:45:55 +00:00
niebayes
122b47210e chore: bump version to 0.5.1 (#3116) 2024-01-08 11:32:56 +00:00
tison
316d843482 feat: support CSV format in sql HTTP API (#3062)
* chore: fix typo

Signed-off-by: tison <wander4096@gmail.com>

* add csv format

Signed-off-by: tison <wander4096@gmail.com>

* flatten response

Signed-off-by: tison <wander4096@gmail.com>

* more flatten response

Signed-off-by: tison <wander4096@gmail.com>

* add CSV format

Signed-off-by: tison <wander4096@gmail.com>

* format InfluxdbV1Response

Signed-off-by: tison <wander4096@gmail.com>

* format ErrorResponse

Signed-off-by: tison <wander4096@gmail.com>

* propagate ErrorResponse to InfluxdbV1Response

Signed-off-by: tison <wander4096@gmail.com>

* format GreptimedbV1Response

Signed-off-by: tison <wander4096@gmail.com>

* format CsvResponse

Signed-off-by: tison <wander4096@gmail.com>

* impl IntoResponse for QueryResponse

Signed-off-by: tison <wander4096@gmail.com>

* promql

Signed-off-by: tison <wander4096@gmail.com>

* sql

Signed-off-by: tison <wander4096@gmail.com>

* compile

Signed-off-by: tison <wander4096@gmail.com>

* fixup aide

Signed-off-by: tison <wander4096@gmail.com>

* clear debt

Signed-off-by: tison <wander4096@gmail.com>

* fixup UT test_recordbatches_conversion

Signed-off-by: tison <wander4096@gmail.com>

* fixup IT cases

Signed-off-by: tison <wander4096@gmail.com>

* fixup more IT cases

Signed-off-by: tison <wander4096@gmail.com>

* fixup test-integration cases

Signed-off-by: tison <wander4096@gmail.com>

* update comment

Signed-off-by: tison <wander4096@gmail.com>

* fixup deserialize and most query < 1ms

Signed-off-by: tison <wander4096@gmail.com>

* fixup auth tests

Signed-off-by: tison <wander4096@gmail.com>

* fixup tests

Signed-off-by: tison <wander4096@gmail.com>

* fixup and align X-GreptimeDB headers

Signed-off-by: tison <wander4096@gmail.com>

* fixup compile

Signed-off-by: tison <wander4096@gmail.com>

---------

Signed-off-by: tison <wander4096@gmail.com>
2024-01-08 10:54:27 +00:00
niebayes
8c58d3f85b test(remote_wal): add unit tests for kafka remote wal (#2993)
* test: add unit tests

* feat: introduce kafka runtime backed by testcontainers

* test: add test for kafka runtime

* fix: format

* chore: make kafka image ready to be used

* feat: add entry builder

* tmp

* test: add unit tests for client manager

* test: add some unit tests for kafka log store

* chore: resolve some todos

* chore: resolve some todos

* test: add unit tests for kafka log store

* chore: add deprecate develop branch warning

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* tmp: ready to move unit tests to an indie dir

* test: update unit tests for client manager

* test: add unit tests for meta srv remote wal

* fix: license

* fix: test

* refactor: kafka image

* doc: add doc example for kafka image

* chore: migrate kafka image to an indie PR

* fix: CR

* fix: CR

* fix: test

* fix: CR

* fix: update Cargo.toml

* fix: CR

* feat: skip test if no endpoints env

* fix: format

* test: rewrite parallel test with barrier

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
2024-01-08 10:48:11 +00:00
LFC
fcacb100a2 chore: expose some codes to let other projects use them (#3115) 2024-01-08 06:32:01 +00:00
Weny Xu
58ada1dfef fix: check env before running kafka test (#3110)
* fix: check env before running kafka test

* Apply suggestions from code review

Co-authored-by: niebayes <niebayes@gmail.com>

---------

Co-authored-by: niebayes <niebayes@gmail.com>
2024-01-08 06:30:43 +00:00
Weny Xu
f78c467a86 chore: bump opendal to 0.44.1 (#3111) 2024-01-08 03:55:58 +00:00
niebayes
78303639db feat(remote_wal): split an entry if it's too large (#3092)
* feat: split an entry if it's too large

* chore: rewrite check records

* test: add some unit tests for record

* chore: rewrite entry splitting

* chore: add unit tests for build records

* chore: add more unit tests for record

* chore: rewrite encdec of record

* revert: ignored test

* fix: set limit for max_batch_size

* fix: clippy

* chore: remove heavy logging

* fix: CR

* fix: properly terminate

* fix: CR

* fix: compiling

* fix: sqlness

* fix: CR

* fix: license

* fix: license
2024-01-05 12:41:43 +00:00
JeremyHi
bd1a5dc265 feat: metric engine support alter (#3098)
* feat: metric engine support alter

* chore: by comment

* feat: get physical table route for frontend
2024-01-05 09:46:39 +00:00
Weny Xu
e0a43f37d7 chore: bump opendal to 0.44 (#3058)
* chore: bump opendal to 0.44

* fix: fix test_object_store_cache_policy

* Revert "fix: fix test_object_store_cache_policy"

This reverts commit 46c37c343f66114e0f6ee7a0a3b9ee2b79c810af.

* fix: fix test_object_store_cache_policy

* fix: fix test_file_backend_with_lru_cache

* chore: apply suggestions from CR

* fix(mito): fix mito2 cache

* chore: apply suggestions from CR

* chore: apply suggestions from CR
2024-01-05 09:05:41 +00:00
zyy17
a89840f5f9 refactor(metrics): add 'greptime_' prefix for every metrics (#3093)
* refactor(metrics): add 'greptimedb_' prefix for every metrics

* chore: use 'greptime_' as prefix

* chore: add some prefix for new metrics

* chore: fix format error
2024-01-05 08:12:23 +00:00
dennis zhuang
c2db970687 feat: pushdown filters for some information_schema tables (#3091)
* feat: pushdown scan request to information_schema tables stream

* feat: supports filter pushdown for columns

* feat: supports filter pushdown for some information_schema tables

* fix: typo

* fix: predicate evaluate

* fix: typo

* test: predicates

* fix: comment

* fix: pub mod

* docs: improve comments

* fix: cr comments and supports like predicate

* chore: typo

* fix: cargo toml format

* chore: apply suggestion
2024-01-05 07:18:22 +00:00
LFC
e0525dbfeb chore: expose some codes to let other projects use them (#3102) 2024-01-05 06:54:01 +00:00
Weny Xu
cdc9021160 feat(metric): implement role and region_disk_usage (#3095)
* feat(metric): implement `role` and `region_disk_usage`

* Update src/datanode/src/region_server.rs

* Update src/datanode/src/heartbeat.rs

---------

Co-authored-by: LFC <990479+MichaelScofield@users.noreply.github.com>
2024-01-05 06:53:52 +00:00
dennis zhuang
702ea32538 docs: update the description of greptimedb project (#3099)
* docs: update the info of greptimedb project

* chore: move up SQL/PromQL
2024-01-05 03:06:02 +00:00
Weny Xu
342faa4e07 test: add tests for lease keeper with logical table (#3096) 2024-01-05 02:29:48 +00:00
tison
44ba131987 fix: improve redact sql regexp (#3080)
Signed-off-by: tison <wander4096@gmail.com>
2024-01-04 14:53:20 +00:00
Yingwen
96b6235f25 feat(mito): Add WriteCache struct and write SSTs to write cache (#2999)
* docs: remove todo

* feat: add upload cache

* feat: add cache to sst write path

* feat: add storage to part

* feat: add dir to part

* feat: revert storage name

* feat: flush use upload part writer

* feat: use upload part writer in compaction task

* refactor: upload part writer builds parquet writer

* chore: suppress warnings

* refactor: rename UploadCache to WriteCache

* refactor: move source to write_all()

* chore: typos

* chore: remove output mod

* feat: changes upload to async method

* docs: update cache

* chore: fix compiler errors

* docs: remove comment

* chore: simplify upload part

* refactor: remove option from cache manager param to access layer

* feat: remove cache home from file cache

* feat: write cache holds file cache

* feat: add recover and pub some methods

* feat: remove usages of UploadPartWriter

* refactor: move sst_file_path to sst mod

* refactor: use write cache in access layer

* refactor: remove upload

* style: fix clippy

* refactor: pub write cache method/structs
2024-01-04 10:53:43 +00:00
Weny Xu
f1a4750576 feat(tests-integration): add more region migration integration tests (#3094) 2024-01-04 08:18:46 +00:00
Zhenchi
d973cf81f0 feat(inverted_index): implement apply for SstIndexApplier (#3088)
* feat(inverted_index): implement apply for SstIndexApplier

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* chore: rename metrics

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-01-04 07:33:03 +00:00
Weny Xu
284a496f54 feat: add logs for upgrading candidate region and updating metadata (#3077)
* feat: add logs for upgrading candidate region

* feat: add logs for update metadata

* chore: apply suggestions from CR
2024-01-04 06:57:07 +00:00
WU Jingdi
4d250ed054 fix: Optimize export metric behavior (#3047)
* fix: optimze export metric bahavior

* chor: fix ci

* chore: update config format

* chore: fix format
2024-01-04 06:40:50 +00:00
LFC
ec43b9183d feat: table route for metric engine (#3053)
* feat: table route for metric engine

* feat: register logical regions

* fix: open logical region (#96)

---------

Co-authored-by: JeremyHi <jiachun_feng@proton.me>
2024-01-04 06:30:17 +00:00
ZonaHe
b025bed45c feat: update dashboard to v0.4.6 (#3089)
Co-authored-by: ZonaHex <ZonaHex@users.noreply.github.com>
2024-01-04 02:56:41 +00:00
Weny Xu
21694c2a1d feat: abort region migration if leader region peer is unexpected (#3086) 2024-01-03 11:46:51 +00:00
ClSlaid
5c66ce6e88 chore: remove unnecessary result wrappings (#3084)
patch: remove unnecessary result wrappings

Signed-off-by: 蔡略 <cailue@bupt.edu.cn>
2024-01-03 10:20:33 +00:00
Weny Xu
b2b752337b fix: fix non-physical error msg (#3087) 2024-01-03 09:40:03 +00:00
Weny Xu
aa22f9c94a refactor: allow procedure to acquire share lock (#3061)
* feat: implement `KeyRwLock`

* refactor: use KeyRwLock instead of LockMap

* refactor: use StringKey instead of String

* chore: remove redundant code

* refactor: cleanup KeyRwLock staled locks before granting new lock

* feat: clean staled locks manually

* feat: sort lock key in lexicographically order

* feat: ensure the ref count before dropping the rwlock

* feat: add more tests for rwlock

* feat: drop the key guards first

* feat: drops the key guards in the reverse order

* chore: apply suggestions from CR

* chore: apply suggestions from CR

* chore: apply suggestions from CR
2024-01-03 08:05:45 +00:00
Weny Xu
611a8aa2fe feat(tests-integration): add a naive region migration integration test (#3078)
* fix: fix heartbeat handler ignore upgrade candidate instruction

* fix: fix handler did not inject wal options

* feat: expose `RegionMigrationProcedureTask`

* feat(tests-integration): add a naive region migration test

* chore: apply suggestions from CR

* feat: add test if the target region has migrated

* chore: apply suggestions from CR
2024-01-03 07:12:59 +00:00
Zhenchi
e4c71843e6 feat(inverted_index): get memory usage of appliers (#3081)
Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-01-03 06:56:56 +00:00
Zhenchi
e1ad7af10c feat(puffin): finish return written bytes (#3082)
Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-01-03 06:55:09 +00:00
Zhenchi
b9302e4f0d feat(inverted_index): Add applier builder to convert Expr to Predicates (Part 2) (#3068)
* feat(inverted_index.integration): Add applier builder to convert Expr to Predicates (Part 1)

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* feat(inverted_index.integration): Add applier builder to convert Expr to Predicates (Part 2)

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* test: add comparison unit tests

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* test: add eq_list unit tests

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* test: add in_list unit tests

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* test: add and unit tests

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* test: strip tests

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: address comments

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-01-03 05:14:40 +00:00
Yingwen
2e686fe053 feat(mito): Implement file cache (#3022)
* feat: recover cache

* feat: moka features

* test: tests for file cache

* chore: suppress warninig

* fix: parse_inde_key consider suffix

* feat: update cache

* feat: expose cache file path

* feat: use cache_path in test
2024-01-03 02:05:06 +00:00
Weny Xu
128d3717fa test(tests-integration): add a naive test with kafka wal (#3071)
* chore(tests-integration): add setup tests with kafka wal to README.md

* feat(tests-integration): add meta wal config

* fix(tests-integration): fix sign of both_instances_cases_with_kafka_wal

* chore(tests-integration): set num_topic to 3 for tests

* test(tests-integration): add a naive test with kafka wal

* chore: apply suggestions from CR
2024-01-02 09:05:20 +00:00
Weny Xu
2b181e91e0 refactor: unify the injection of WAL option (#3066)
* feat: add prepare_wal_option

* refactor: use integer hashmap

* feat: unify the injection of WAL option

* fix: fix procedure_flow_upgrade_candidate_with_retry

* chore: apply suggestions from CR
2024-01-02 07:40:02 +00:00
Weny Xu
d87ab06b28 feat: add kafka wal integration test utils (#3069)
* feat(tests-integration): add wal_config

* feat: add kafka wal integration test utils
2024-01-02 07:38:43 +00:00
Weny Xu
5653389063 feat!: correct the kafka config option (#3065)
* feat: correct the kafka config option

* refactor: rewrite the verbose comments
2024-01-02 07:31:37 +00:00
dimbtp
c4d7b0d91d feat: add some tables for information_schema (#3060)
* feat: add information_schema.optimizer_trace

* feat: add information_schema.parameters

* feat: add information_schema.profiling

* feat: add information_schema.referential_constraints

* feat: add information_schema.routines

* feat: add information_schema.schema_privileges

* feat: add information_schema.table_privileges

* feat: add information_schema.triggers

* fix: update sql test result

* feat: add information_schema.global_status

* feat: add information_schema.session_status

* fix: update sql test result

* fix: add TODO for some tables

* Update src/catalog/src/information_schema/memory_table/tables.rs

Co-authored-by: Yingwen <realevenyag@gmail.com>

---------

Co-authored-by: dennis zhuang <killme2008@gmail.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
2024-01-02 04:10:59 +00:00
dimbtp
f735f739e5 feat: add information_schema.key_column_usage (#3057)
* feat: add information_schema.key_column_usage

* fix: follow #3057 review comments

* fix: add sql test for `key_column_usage` table

* fix: fix spell typo

* fix: resolve conflict in sql test result
2023-12-31 12:29:06 +00:00
dimbtp
6070e88077 feat: add information_schema.files (#3054)
* feat: add information_schema.files

* fix: update information_schema.result

* fix: change `EXTRA` field type to string
2023-12-31 02:08:16 +00:00
niebayes
9db168875c fix(remote_wal): some known issues (#3052)
* fix: some known issues

* fix: CR

* fix: CR

* chore: replace Mutex with RwLock
2023-12-30 15:28:10 +00:00
AntiTopQuark
4460af800f feat(TableRouteValue): add panic notes and type checks (#3031)
* refactor(TableRouteValue): add panic notes and type checks

* chore: add deprecate develop branch warning

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add error defines and checks

* Update README.md

* update code format and fix tests

* update name of error

* delete unused note

* fix unsafe .expect() for region_route()

* update error name

* update unwrap

* update code format

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
2023-12-30 13:02:26 +00:00
Zhenchi
69a53130c2 feat(inverted_index): Add applier builder to convert Expr to Predicates (Part 1) (#3034)
* feat(inverted_index.integration): Add applier builder to convert Expr to Predicates (Part 1)

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* chore: add docs

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: typos

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: address comments

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* Update src/mito2/src/sst/index/applier/builder.rs

Co-authored-by: Yingwen <realevenyag@gmail.com>

* fix: remove unwrap

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* chore: error source

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
2023-12-30 07:32:32 +00:00
Ning Sun
1c94d4c506 ci: fix duplicatd doc issue (#3056) 2023-12-30 13:36:14 +08:00
Ning Sun
41e51d4ab3 chore: attempt to add doc issue in label task (#3021)
* chore: attempt to add doc issue in label task

* ci: check pr body for doc issue creation
2023-12-29 20:17:34 +08:00
dennis zhuang
11ae85b1cd feat: adds information_schema.schemata (#3051)
* feat: improve information_schema.columns

* feat: adds information_schema.schemata

* fix: instance test

* fix: comment
2023-12-29 09:22:31 +00:00
LFC
7551432cff refactor: merge standalone and metasrv table metadata allocators (#3035)
* refactor: merge standalone and metasrv table metadata allocators

* Update src/common/meta/src/ddl/table_meta.rs

Co-authored-by: niebayes <niebayes@gmail.com>

* Update src/common/meta/src/ddl/table_meta.rs

Co-authored-by: Weny Xu <wenymedia@gmail.com>

---------

Co-authored-by: niebayes <niebayes@gmail.com>
Co-authored-by: Weny Xu <wenymedia@gmail.com>
2023-12-29 08:50:59 +00:00
Weny Xu
e16f093282 test(remote_wal): add sqlness with kafka wal (#3027)
* feat(sqlness): add kafka wal config

* chore: add sqlness with kafka wal ci config

* fix: fix config

* chore: apply suggestions from CR

* fix: add metasrv config to sqlness with kafka

* fix: replay memtable should from flushed_entry_id + 1

* fix: should set append flag to fopen

* feat: start wal allocator in standalone meta mode

* feat: append a noop record after kafka topic initialization

* test: ignore tests temporally

* test: change sqlness kafka wal config
2023-12-29 08:17:22 +00:00
Weny Xu
301ffc1d91 feat(remote_wal): append a noop record after kafka topic initialization (#3040)
* feat: append a noop record after kafka topic initialization

* chore: apply suggestions from CR

* feat: ignore the noop record during the read
2023-12-29 07:46:48 +00:00
Weny Xu
d22072f68b feat: expose region migration http endpoint (#3032)
* feat: add region migration endpoint

* feat: implement naive peer registry

* chore: apply suggestions from CR

* chore: rename `ContextFactoryImpl` to `DefaultContextFactory`

* chore: rename unregister to deregister

* refactor: use lease-based alive datanode checking
2023-12-29 06:57:00 +00:00
Weny Xu
b526d159c3 fix: replay memtable should from flushed_entry_id + 1 (#3038)
* fix: replay memtable should from flushed_entry_id + 1

* chore: apply suggestions from CR
2023-12-28 16:12:07 +00:00
ZonaHe
7152407428 feat: update dashboard to v0.4.5 (#3033)
Co-authored-by: ZonaHex <ZonaHex@users.noreply.github.com>
2023-12-28 11:51:43 +00:00
Ruihang Xia
b58296de22 feat: Implement OR for PromQL (#3024)
* with anit-join

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* impl UnionDistinctOn

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* unify schema

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add sqlness case

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add UTs

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* Update src/promql/src/planner.rs

Co-authored-by: dennis zhuang <killme2008@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: dennis zhuang <killme2008@gmail.com>
2023-12-28 06:56:17 +00:00
Yingwen
1d80a0f2d6 chore: Update CI badge in README.md (#3028)
chore: Update README.md

Fix CI badge
2023-12-28 05:59:27 +00:00
Ruihang Xia
286b9af661 chore: change all reference from develop to main (#3026)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2023-12-28 04:11:00 +00:00
dennis zhuang
af13eeaad3 feat: adds character_sets, collations and events etc. (#3017)
feat: adds character_sets, collations and events etc. to information_schema
2023-12-28 04:01:42 +00:00
864 changed files with 57797 additions and 11095 deletions

View File

@@ -3,13 +3,3 @@ linker = "aarch64-linux-gnu-gcc"
[alias]
sqlness = "run --bin sqlness-runner --"
[build]
rustflags = [
# lints
# TODO: use lint configuration in cargo https://github.com/rust-lang/cargo/issues/5034
"-Wclippy::print_stdout",
"-Wclippy::print_stderr",
"-Wclippy::implicit_clone",
]

View File

@@ -19,3 +19,5 @@ GT_GCS_BUCKET = GCS bucket
GT_GCS_SCOPE = GCS scope
GT_GCS_CREDENTIAL_PATH = GCS credential path
GT_GCS_ENDPOINT = GCS end point
# Settings for kafka wal test
GT_KAFKA_ENDPOINTS = localhost:9092

View File

@@ -21,6 +21,7 @@ body:
- Locking issue
- Performance issue
- Unexpected error
- User Experience
- Other
validations:
required: true
@@ -33,9 +34,14 @@ body:
multiple: true
options:
- Standalone mode
- Distributed Cluster
- Storage Engine
- Query Engine
- Table Engine
- Write Protocols
- MetaSrv
- Frontend
- Datanode
- Meta
- Other
validations:
required: true
@@ -77,6 +83,17 @@ body:
validations:
required: true
- type: input
id: greptimedb
attributes:
label: What version of GreptimeDB did you use?
description: |
Please provide the version of GreptimeDB. For example:
0.5.1 etc. You can get it by executing command line `greptime --version`.
placeholder: "0.5.1"
validations:
required: true
- type: textarea
id: logs
attributes:

View File

@@ -53,7 +53,7 @@ runs:
uses: docker/setup-buildx-action@v2
- name: Download amd64 artifacts
uses: actions/download-artifact@v3
uses: actions/download-artifact@v4
with:
name: ${{ inputs.amd64-artifact-name }}
@@ -66,7 +66,7 @@ runs:
mv ${{ inputs.amd64-artifact-name }} amd64
- name: Download arm64 artifacts
uses: actions/download-artifact@v3
uses: actions/download-artifact@v4
if: ${{ inputs.arm64-artifact-name }}
with:
name: ${{ inputs.arm64-artifact-name }}

View File

@@ -34,7 +34,7 @@ runs:
- name: Upload sqlness logs
if: ${{ failure() && inputs.disable-run-tests == 'false' }} # Only upload logs when the integration tests failed.
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: sqlness-logs
path: /tmp/greptime-*.log

View File

@@ -67,7 +67,7 @@ runs:
- name: Upload sqlness logs
if: ${{ failure() }} # Only upload logs when the integration tests failed.
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: sqlness-logs
path: /tmp/greptime-*.log

View File

@@ -25,7 +25,7 @@ inputs:
runs:
using: composite
steps:
- uses: arduino/setup-protoc@v1
- uses: arduino/setup-protoc@v3
- name: Install rust toolchain
uses: dtolnay/rust-toolchain@master
@@ -38,7 +38,7 @@ runs:
uses: Swatinem/rust-cache@v2
- name: Install Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: '3.10'
@@ -62,10 +62,10 @@ runs:
- name: Upload sqlness logs
if: ${{ failure() }} # Only upload logs when the integration tests failed.
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: sqlness-logs
path: ${{ runner.temp }}/greptime-*.log
path: /tmp/greptime-*.log
retention-days: 3
- name: Build greptime binary

View File

@@ -15,7 +15,7 @@ runs:
# |- greptime-darwin-amd64-v0.5.0.sha256sum/greptime-darwin-amd64-v0.5.0.sha256sum
# ...
- name: Download artifacts
uses: actions/download-artifact@v3
uses: actions/download-artifact@v4
- name: Create git tag for release
if: ${{ github.event_name != 'push' }} # Meaning this is a scheduled or manual workflow.

View File

@@ -73,7 +73,7 @@ runs:
using: composite
steps:
- name: Download artifacts
uses: actions/download-artifact@v3
uses: actions/download-artifact@v4
with:
path: ${{ inputs.artifacts-dir }}

View File

@@ -6,7 +6,7 @@ inputs:
required: true
target-file:
description: The path of the target artifact
required: true
required: false
version:
description: Version of the artifact
required: true
@@ -18,6 +18,7 @@ runs:
using: composite
steps:
- name: Create artifacts directory
if: ${{ inputs.target-file != '' }}
working-directory: ${{ inputs.working-dir }}
shell: bash
run: |
@@ -49,15 +50,15 @@ runs:
run: Get-FileHash ${{ inputs.artifacts-dir }}.tar.gz -Algorithm SHA256 | select -ExpandProperty Hash > ${{ inputs.artifacts-dir }}.sha256sum
# Note: The artifacts will be double zip compressed(related issue: https://github.com/actions/upload-artifact/issues/39).
# However, when we use 'actions/download-artifact@v3' to download the artifacts, it will be automatically unzipped.
# However, when we use 'actions/download-artifact' to download the artifacts, it will be automatically unzipped.
- name: Upload artifacts
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: ${{ inputs.artifacts-dir }}
path: ${{ inputs.working-dir }}/${{ inputs.artifacts-dir }}.tar.gz
- name: Upload checksum
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: ${{ inputs.artifacts-dir }}.sha256sum
path: ${{ inputs.working-dir }}/${{ inputs.artifacts-dir }}.sha256sum

View File

@@ -1,8 +1,10 @@
I hereby agree to the terms of the [GreptimeDB CLA](https://gist.github.com/xtang/6378857777706e568c1949c7578592cc)
I hereby agree to the terms of the [GreptimeDB CLA](https://github.com/GreptimeTeam/.github/blob/main/CLA.md).
## Refer to a related PR or issue link (optional)
## What's changed and what's your intention?
_PLEASE DO NOT LEAVE THIS EMPTY !!!_
__!!! DO NOT LEAVE THIS BLOCK EMPTY !!!__
Please explain IN DETAIL what the changes are in this PR and why they are needed:
@@ -15,6 +17,4 @@ Please explain IN DETAIL what the changes are in this PR and why they are needed
- [ ] I have written the necessary rustdoc comments.
- [ ] I have added the necessary unit tests and integration tests.
- [ ] This PR does not require documentation updates.
## Refer to a related PR or issue link (optional)
- [x] This PR does not require documentation updates.

View File

@@ -1,7 +1,7 @@
on:
push:
branches:
- develop
- main
paths-ignore:
- 'docs/**'
- 'config/**'
@@ -19,8 +19,8 @@ jobs:
apidoc:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v3
- uses: arduino/setup-protoc@v1
- uses: actions/checkout@v4
- uses: arduino/setup-protoc@v3
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: dtolnay/rust-toolchain@master

View File

@@ -101,7 +101,7 @@ jobs:
version: ${{ steps.create-version.outputs.version }}
steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
fetch-depth: 0
@@ -155,12 +155,12 @@ jobs:
runs-on: ${{ needs.allocate-runners.outputs.linux-amd64-runner }}
steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Checkout greptimedb
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
repository: ${{ inputs.repository }}
ref: ${{ inputs.commit }}
@@ -184,12 +184,12 @@ jobs:
runs-on: ${{ needs.allocate-runners.outputs.linux-arm64-runner }}
steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Checkout greptimedb
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
repository: ${{ inputs.repository }}
ref: ${{ inputs.commit }}
@@ -216,7 +216,7 @@ jobs:
outputs:
build-result: ${{ steps.set-build-result.outputs.build-result }}
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 0
@@ -247,7 +247,7 @@ jobs:
runs-on: ubuntu-20.04
continue-on-error: true
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 0
@@ -281,7 +281,7 @@ jobs:
]
steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
fetch-depth: 0
@@ -306,7 +306,7 @@ jobs:
]
steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
fetch-depth: 0
@@ -330,14 +330,14 @@ jobs:
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }}
steps:
- name: Notifiy nightly build successful result
- name: Notifiy dev build successful result
uses: slackapi/slack-github-action@v1.23.0
if: ${{ needs.release-images-to-dockerhub.outputs.build-result == 'success' }}
with:
payload: |
{"text": "GreptimeDB's ${{ env.NEXT_RELEASE_VERSION }} build has completed successfully."}
- name: Notifiy nightly build failed result
- name: Notifiy dev build failed result
uses: slackapi/slack-github-action@v1.23.0
if: ${{ needs.release-images-to-dockerhub.outputs.build-result != 'success' }}
with:

View File

@@ -1,7 +1,7 @@
on:
merge_group:
pull_request:
types: [opened, synchronize, reopened, ready_for_review]
types: [ opened, synchronize, reopened, ready_for_review ]
paths-ignore:
- 'docs/**'
- 'config/**'
@@ -9,9 +9,9 @@ on:
- '.dockerignore'
- 'docker/**'
- '.gitignore'
- 'grafana/**'
push:
branches:
- develop
- main
paths-ignore:
- 'docs/**'
@@ -20,6 +20,7 @@ on:
- '.dockerignore'
- 'docker/**'
- '.gitignore'
- 'grafana/**'
workflow_dispatch:
name: CI
@@ -36,20 +37,19 @@ jobs:
name: Spell Check with Typos
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- uses: crate-ci/typos@v1.13.10
check:
name: Check
if: github.event.pull_request.draft == false
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ windows-latest-8-cores, ubuntu-20.04 ]
os: [ windows-latest, ubuntu-20.04 ]
timeout-minutes: 60
steps:
- uses: actions/checkout@v3
- uses: arduino/setup-protoc@v1
- uses: actions/checkout@v4
- uses: arduino/setup-protoc@v3
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: dtolnay/rust-toolchain@master
@@ -57,62 +57,130 @@ jobs:
toolchain: ${{ env.RUST_TOOLCHAIN }}
- name: Rust Cache
uses: Swatinem/rust-cache@v2
with:
# Shares across multiple jobs
# Shares with `Clippy` job
shared-key: "check-lint"
- name: Run cargo check
run: cargo check --locked --workspace --all-targets
toml:
name: Toml Check
if: github.event.pull_request.draft == false
runs-on: ubuntu-20.04
timeout-minutes: 60
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@master
with:
toolchain: stable
- name: Rust Cache
uses: Swatinem/rust-cache@v2
with:
# Shares across multiple jobs
shared-key: "check-toml"
- name: Install taplo
run: cargo +stable install taplo-cli --version ^0.8 --locked
run: cargo +stable install taplo-cli --version ^0.9 --locked
- name: Run taplo
run: taplo format --check
sqlness:
name: Sqlness Test
if: github.event.pull_request.draft == false
build:
name: Build GreptimeDB binaries
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ ubuntu-20.04-8-cores ]
os: [ ubuntu-20.04 ]
timeout-minutes: 60
steps:
- uses: actions/checkout@v3
- uses: arduino/setup-protoc@v1
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: actions/checkout@v4
- uses: arduino/setup-protoc@v3
- uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ env.RUST_TOOLCHAIN }}
- name: Rust Cache
uses: Swatinem/rust-cache@v2
- uses: Swatinem/rust-cache@v2
with:
# Shares across multiple jobs
shared-key: "build-binaries"
- name: Build greptime binaries
shell: bash
run: cargo build
- name: Pack greptime binaries
shell: bash
run: |
mkdir bins && \
mv ./target/debug/greptime bins && \
mv ./target/debug/sqlness-runner bins
- name: Print greptime binaries info
run: ls -lh bins
- name: Upload artifacts
uses: ./.github/actions/upload-artifacts
with:
artifacts-dir: bins
version: current
sqlness:
name: Sqlness Test
needs: build
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ ubuntu-20.04 ]
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
- name: Download pre-built binaries
uses: actions/download-artifact@v4
with:
name: bins
path: .
- name: Unzip binaries
run: tar -xvf ./bins.tar.gz
- name: Run sqlness
run: cargo sqlness
run: RUST_BACKTRACE=1 ./bins/sqlness-runner -c ./tests/cases --bins-dir ./bins
- name: Upload sqlness logs
if: always()
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: sqlness-logs
path: ${{ runner.temp }}/greptime-*.log
path: /tmp/greptime-*.log
retention-days: 3
sqlness-kafka-wal:
name: Sqlness Test with Kafka Wal
needs: build
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ ubuntu-20.04 ]
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
- name: Download pre-built binaries
uses: actions/download-artifact@v4
with:
name: bins
path: .
- name: Unzip binaries
run: tar -xvf ./bins.tar.gz
- name: Setup kafka server
working-directory: tests-integration/fixtures/kafka
run: docker compose -f docker-compose-standalone.yml up -d --wait
- name: Run sqlness
run: RUST_BACKTRACE=1 ./bins/sqlness-runner -w kafka -k 127.0.0.1:9092 -c ./tests/cases --bins-dir ./bins
- name: Upload sqlness logs
if: always()
uses: actions/upload-artifact@v4
with:
name: sqlness-logs-with-kafka-wal
path: /tmp/greptime-*.log
retention-days: 3
fmt:
name: Rustfmt
if: github.event.pull_request.draft == false
runs-on: ubuntu-20.04
timeout-minutes: 60
steps:
- uses: actions/checkout@v3
- uses: arduino/setup-protoc@v1
- uses: actions/checkout@v4
- uses: arduino/setup-protoc@v3
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: dtolnay/rust-toolchain@master
@@ -121,17 +189,19 @@ jobs:
components: rustfmt
- name: Rust Cache
uses: Swatinem/rust-cache@v2
with:
# Shares across multiple jobs
shared-key: "check-rust-fmt"
- name: Run cargo fmt
run: cargo fmt --all -- --check
clippy:
name: Clippy
if: github.event.pull_request.draft == false
runs-on: ubuntu-20.04
timeout-minutes: 60
steps:
- uses: actions/checkout@v3
- uses: arduino/setup-protoc@v1
- uses: actions/checkout@v4
- uses: arduino/setup-protoc@v3
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: dtolnay/rust-toolchain@master
@@ -140,6 +210,10 @@ jobs:
components: clippy
- name: Rust Cache
uses: Swatinem/rust-cache@v2
with:
# Shares across multiple jobs
# Shares with `Check` job
shared-key: "check-lint"
- name: Run cargo clippy
run: cargo clippy --workspace --all-targets -- -D warnings
@@ -148,8 +222,8 @@ jobs:
runs-on: ubuntu-20.04-8-cores
timeout-minutes: 60
steps:
- uses: actions/checkout@v3
- uses: arduino/setup-protoc@v1
- uses: actions/checkout@v4
- uses: arduino/setup-protoc@v3
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: KyleMayes/install-llvm-action@v1
@@ -162,12 +236,15 @@ jobs:
components: llvm-tools-preview
- name: Rust Cache
uses: Swatinem/rust-cache@v2
with:
# Shares cross multiple jobs
shared-key: "coverage-test"
- name: Install latest nextest release
uses: taiki-e/install-action@nextest
- name: Install cargo-llvm-cov
uses: taiki-e/install-action@cargo-llvm-cov
- name: Install Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Install PyArrow Package
@@ -192,10 +269,28 @@ jobs:
GT_KAFKA_ENDPOINTS: 127.0.0.1:9092
UNITTEST_LOG_DIR: "__unittest_logs"
- name: Codecov upload
uses: codecov/codecov-action@v2
uses: codecov/codecov-action@v4
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: ./lcov.info
flags: rust
fail_ci_if_error: false
verbose: true
compat:
name: Compatibility Test
needs: build
runs-on: ubuntu-20.04
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
- name: Download pre-built binaries
uses: actions/download-artifact@v4
with:
name: bins
path: .
- name: Unzip binaries
run: |
mkdir -p ./bins/current
tar -xvf ./bins.tar.gz --strip-components=1 -C ./bins/current
- run: ./tests/compat/test-compat.sh 0.6.0

View File

@@ -14,7 +14,7 @@ jobs:
runs-on: ubuntu-20.04
steps:
- name: create an issue in doc repo
uses: dacbd/create-issue-action@main
uses: dacbd/create-issue-action@v1.2.1
with:
owner: GreptimeTeam
repo: docs
@@ -28,7 +28,7 @@ jobs:
runs-on: ubuntu-20.04
steps:
- name: create an issue in cloud repo
uses: dacbd/create-issue-action@main
uses: dacbd/create-issue-action@v1.2.1
with:
owner: GreptimeTeam
repo: greptimedb-cloud

View File

@@ -12,9 +12,25 @@ jobs:
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
runs-on: ubuntu-latest
steps:
- uses: github/issue-labeler@v3.3
- uses: github/issue-labeler@v3.4
with:
configuration-path: .github/doc-label-config.yml
enable-versioned-regex: false
repo-token: ${{ secrets.GITHUB_TOKEN }}
sync-labels: 1
- name: create an issue in doc repo
uses: dacbd/create-issue-action@v1.2.1
if: ${{ github.event.action == 'opened' && contains(github.event.pull_request.body, '- [ ] This PR does not require documentation updates.') }}
with:
owner: GreptimeTeam
repo: docs
token: ${{ secrets.DOCS_REPO_TOKEN }}
title: Update docs for ${{ github.event.issue.title || github.event.pull_request.title }}
body: |
A document change request is generated from
${{ github.event.issue.html_url || github.event.pull_request.html_url }}
- name: Check doc labels
uses: docker://agilepathway/pull-request-label-checker:latest
with:
one_of: Doc update required,Doc not needed
repo_token: ${{ secrets.GITHUB_TOKEN }}

View File

@@ -9,9 +9,9 @@ on:
- '.dockerignore'
- 'docker/**'
- '.gitignore'
- 'grafana/**'
push:
branches:
- develop
- main
paths:
- 'docs/**'
@@ -20,6 +20,7 @@ on:
- '.dockerignore'
- 'docker/**'
- '.gitignore'
- 'grafana/**'
workflow_dispatch:
name: CI
@@ -32,39 +33,46 @@ jobs:
name: Spell Check with Typos
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- uses: crate-ci/typos@v1.13.10
check:
name: Check
if: github.event.pull_request.draft == false
runs-on: ubuntu-20.04
steps:
- run: 'echo "No action required"'
fmt:
name: Rustfmt
if: github.event.pull_request.draft == false
runs-on: ubuntu-20.04
steps:
- run: 'echo "No action required"'
clippy:
name: Clippy
if: github.event.pull_request.draft == false
runs-on: ubuntu-20.04
steps:
- run: 'echo "No action required"'
coverage:
if: github.event.pull_request.draft == false
runs-on: ubuntu-20.04
steps:
- run: 'echo "No action required"'
sqlness:
name: Sqlness Test
if: github.event.pull_request.draft == false
runs-on: ubuntu-20.04
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ ubuntu-20.04 ]
steps:
- run: 'echo "No action required"'
sqlness-kafka-wal:
name: Sqlness Test with Kafka Wal
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ ubuntu-20.04 ]
steps:
- run: 'echo "No action required"'

View File

@@ -3,7 +3,7 @@ name: License checker
on:
push:
branches:
- develop
- main
pull_request:
types: [opened, synchronize, reopened, ready_for_review]
jobs:
@@ -11,6 +11,6 @@ jobs:
runs-on: ubuntu-20.04
name: license-header-check
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Check License Header
uses: korandoru/hawkeye@v3
uses: korandoru/hawkeye@v4

View File

@@ -85,7 +85,7 @@ jobs:
version: ${{ steps.create-version.outputs.version }}
steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
fetch-depth: 0
@@ -137,7 +137,7 @@ jobs:
]
runs-on: ${{ needs.allocate-runners.outputs.linux-amd64-runner }}
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 0
@@ -156,7 +156,7 @@ jobs:
]
runs-on: ${{ needs.allocate-runners.outputs.linux-arm64-runner }}
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 0
@@ -179,7 +179,7 @@ jobs:
outputs:
nightly-build-result: ${{ steps.set-nightly-build-result.outputs.nightly-build-result }}
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 0
@@ -211,7 +211,7 @@ jobs:
# The ACR have daily sync with DockerHub, so don't worry about the image not being updated.
continue-on-error: true
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 0
@@ -245,7 +245,7 @@ jobs:
]
steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
fetch-depth: 0
@@ -270,7 +270,7 @@ jobs:
]
steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
fetch-depth: 0

View File

@@ -24,8 +24,8 @@ jobs:
os: [ windows-latest-8-cores ]
timeout-minutes: 60
steps:
- uses: actions/checkout@v4.1.0
- uses: arduino/setup-protoc@v1
- uses: actions/checkout@v4
- uses: arduino/setup-protoc@v3
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: dtolnay/rust-toolchain@master
@@ -45,10 +45,10 @@ jobs:
{"text": "Nightly CI failed for sqlness tests"}
- name: Upload sqlness logs
if: always()
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: sqlness-logs
path: ${{ runner.temp }}/greptime-*.log
path: /tmp/greptime-*.log
retention-days: 3
test-on-windows:
@@ -57,8 +57,8 @@ jobs:
timeout-minutes: 60
steps:
- run: git config --global core.autocrlf false
- uses: actions/checkout@v4.1.0
- uses: arduino/setup-protoc@v1
- uses: actions/checkout@v4
- uses: arduino/setup-protoc@v3
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- name: Install Rust toolchain
@@ -71,7 +71,7 @@ jobs:
- name: Install Cargo Nextest
uses: taiki-e/install-action@nextest
- name: Install Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Install PyArrow Package

View File

@@ -13,7 +13,7 @@ jobs:
runs-on: ubuntu-22.04
steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
fetch-depth: 0

View File

@@ -13,7 +13,7 @@ jobs:
runs-on: ubuntu-20.04
timeout-minutes: 10
steps:
- uses: thehanimo/pr-title-checker@v1.3.4
- uses: thehanimo/pr-title-checker@v1.4.2
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
pass_on_octokit_error: false
@@ -22,7 +22,7 @@ jobs:
runs-on: ubuntu-20.04
timeout-minutes: 10
steps:
- uses: thehanimo/pr-title-checker@v1.3.4
- uses: thehanimo/pr-title-checker@v1.4.2
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
pass_on_octokit_error: false

View File

@@ -30,7 +30,7 @@ jobs:
runs-on: ubuntu-20.04-16-cores
steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
fetch-depth: 0

View File

@@ -91,7 +91,7 @@ env:
# The scheduled version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-YYYYMMDD', like v0.2.0-nigthly-20230313;
NIGHTLY_RELEASE_PREFIX: nightly
# Note: The NEXT_RELEASE_VERSION should be modified manually by every formal release.
NEXT_RELEASE_VERSION: v0.6.0
NEXT_RELEASE_VERSION: v0.8.0
jobs:
allocate-runners:
@@ -114,7 +114,7 @@ jobs:
version: ${{ steps.create-version.outputs.version }}
steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
fetch-depth: 0
@@ -168,7 +168,7 @@ jobs:
]
runs-on: ${{ needs.allocate-runners.outputs.linux-amd64-runner }}
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 0
@@ -187,7 +187,7 @@ jobs:
]
runs-on: ${{ needs.allocate-runners.outputs.linux-arm64-runner }}
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 0
@@ -221,12 +221,14 @@ jobs:
arch: x86_64-apple-darwin
artifacts-dir-prefix: greptime-darwin-amd64-pyo3
runs-on: ${{ matrix.os }}
outputs:
build-macos-result: ${{ steps.set-build-macos-result.outputs.build-macos-result }}
needs: [
allocate-runners,
]
if: ${{ inputs.build_macos_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 0
@@ -240,6 +242,11 @@ jobs:
disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
artifacts-dir: ${{ matrix.artifacts-dir-prefix }}-${{ needs.allocate-runners.outputs.version }}
- name: Set build macos result
id: set-build-macos-result
run: |
echo "build-macos-result=success" >> $GITHUB_OUTPUT
build-windows-artifacts:
name: Build Windows artifacts
strategy:
@@ -255,6 +262,8 @@ jobs:
features: pyo3_backend,servers/dashboard
artifacts-dir-prefix: greptime-windows-amd64-pyo3
runs-on: ${{ matrix.os }}
outputs:
build-windows-result: ${{ steps.set-build-windows-result.outputs.build-windows-result }}
needs: [
allocate-runners,
]
@@ -262,7 +271,7 @@ jobs:
steps:
- run: git config --global core.autocrlf false
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 0
@@ -276,6 +285,11 @@ jobs:
disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
artifacts-dir: ${{ matrix.artifacts-dir-prefix }}-${{ needs.allocate-runners.outputs.version }}
- name: Set build windows result
id: set-build-windows-result
run: |
echo "build-windows-result=success" >> $Env:GITHUB_OUTPUT
release-images-to-dockerhub:
name: Build and push images to DockerHub
if: ${{ inputs.release_images || github.event_name == 'push' || github.event_name == 'schedule' }}
@@ -285,8 +299,10 @@ jobs:
build-linux-arm64-artifacts,
]
runs-on: ubuntu-2004-16-cores
outputs:
build-image-result: ${{ steps.set-build-image-result.outputs.build-image-result }}
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 0
@@ -299,6 +315,11 @@ jobs:
image-registry-password: ${{ secrets.DOCKERHUB_TOKEN }}
version: ${{ needs.allocate-runners.outputs.version }}
- name: Set build image result
id: set-build-image-result
run: |
echo "build-image-result=success" >> $GITHUB_OUTPUT
release-cn-artifacts:
name: Release artifacts to CN region
if: ${{ inputs.release_images || github.event_name == 'push' || github.event_name == 'schedule' }}
@@ -316,7 +337,7 @@ jobs:
# The ACR have daily sync with DockerHub, so don't worry about the image not being updated.
continue-on-error: true
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 0
@@ -352,7 +373,7 @@ jobs:
]
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 0
@@ -375,7 +396,7 @@ jobs:
]
steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
fetch-depth: 0
@@ -400,7 +421,7 @@ jobs:
]
steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
fetch-depth: 0
@@ -413,3 +434,29 @@ jobs:
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.EC2_RUNNER_REGION }}
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
notification:
if: ${{ always() }} # Not requiring successful dependent jobs, always run.
name: Send notification to Greptime team
needs: [
release-images-to-dockerhub,
build-macos-artifacts,
build-windows-artifacts,
]
runs-on: ubuntu-20.04
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }}
steps:
- name: Notifiy release successful result
uses: slackapi/slack-github-action@v1.25.0
if: ${{ needs.release-images-to-dockerhub.outputs.build-image-result == 'success' && needs.build-windows-artifacts.outputs.build-windows-result == 'success' && needs.build-macos-artifacts.outputs.build-macos-result == 'success' }}
with:
payload: |
{"text": "GreptimeDB's release version has completed successfully."}
- name: Notifiy release failed result
uses: slackapi/slack-github-action@v1.25.0
if: ${{ needs.release-images-to-dockerhub.outputs.build-image-result != 'success' || needs.build-windows-artifacts.outputs.build-windows-result != 'success' || needs.build-macos-artifacts.outputs.build-macos-result != 'success' }}
with:
payload: |
{"text": "GreptimeDB's release version has failed, please check 'https://github.com/GreptimeTeam/greptimedb/actions/workflows/release.yml'."}

View File

@@ -1,25 +0,0 @@
name: size-labeler
on: [pull_request_target]
jobs:
labeler:
runs-on: ubuntu-latest
name: Label the PR size
permissions:
issues: write
pull-requests: write
steps:
- uses: codelytv/pr-size-labeler@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
s_label: 'Size: S'
s_max_size: '100'
m_label: 'Size: M'
m_max_size: '500'
l_label: 'Size: L'
l_max_size: '1000'
xl_label: 'Size: XL'
fail_if_xl: 'false'
message_if_xl: ""
files_to_ignore: 'Cargo.lock'

View File

@@ -1,19 +0,0 @@
name: Check user doc labels
on:
pull_request:
types:
- opened
- reopened
- labeled
- unlabeled
jobs:
check_labels:
name: Check doc labels
runs-on: ubuntu-latest
steps:
- uses: docker://agilepathway/pull-request-label-checker:latest
with:
one_of: Doc update required,Doc not needed
repo_token: ${{ secrets.GITHUB_TOKEN }}

View File

@@ -10,7 +10,7 @@ Follow our [README](https://github.com/GreptimeTeam/greptimedb#readme) to get th
It can feel intimidating to contribute to a complex project, but it can also be exciting and fun. These general notes will help everyone participate in this communal activity.
- Follow the [Code of Conduct](https://github.com/GreptimeTeam/greptimedb/blob/develop/CODE_OF_CONDUCT.md)
- Follow the [Code of Conduct](https://github.com/GreptimeTeam/greptimedb/blob/main/CODE_OF_CONDUCT.md)
- Small changes make huge differences. We will happily accept a PR making a single character change if it helps move forward. Don't wait to have everything working.
- Check the closed issues before opening your issue.
- Try to follow the existing style of the code.
@@ -26,7 +26,7 @@ Pull requests are great, but we accept all kinds of other help if you like. Such
## Code of Conduct
Also, there are things that we are not looking for because they don't match the goals of the product or benefit the community. Please read [Code of Conduct](https://github.com/GreptimeTeam/greptimedb/blob/develop/CODE_OF_CONDUCT.md); we hope everyone can keep good manners and become an honored member.
Also, there are things that we are not looking for because they don't match the goals of the product or benefit the community. Please read [Code of Conduct](https://github.com/GreptimeTeam/greptimedb/blob/main/CODE_OF_CONDUCT.md); we hope everyone can keep good manners and become an honored member.
## License

1151
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -18,6 +18,7 @@ members = [
"src/common/grpc-expr",
"src/common/mem-prof",
"src/common/meta",
"src/common/plugins",
"src/common/procedure",
"src/common/procedure-test",
"src/common/query",
@@ -29,9 +30,11 @@ members = [
"src/common/time",
"src/common/decimal",
"src/common/version",
"src/common/wal",
"src/datanode",
"src/datatypes",
"src/file-engine",
"src/flow",
"src/frontend",
"src/log-store",
"src/meta-client",
@@ -52,31 +55,41 @@ members = [
"src/store-api",
"src/table",
"src/index",
"tests-fuzz",
"tests-integration",
"tests/runner",
]
resolver = "2"
[workspace.package]
version = "0.5.0"
version = "0.7.0"
edition = "2021"
license = "Apache-2.0"
[workspace.lints]
clippy.print_stdout = "warn"
clippy.print_stderr = "warn"
clippy.implicit_clone = "warn"
rust.unknown_lints = "deny"
[workspace.dependencies]
ahash = { version = "0.8", features = ["compile-time-rng"] }
aquamarine = "0.3"
arrow = { version = "47.0" }
arrow-array = "47.0"
arrow-flight = "47.0"
arrow-ipc = { version = "47.0", features = ["lz4"] }
arrow-schema = { version = "47.0", features = ["serde"] }
async-stream = "0.3"
async-trait = "0.1"
axum = { version = "0.6", features = ["headers"] }
base64 = "0.21"
bigdecimal = "0.4.2"
bitflags = "2.4.1"
bytemuck = "1.12"
bytes = { version = "1.5", features = ["serde"] }
chrono = { version = "0.4", features = ["serde"] }
clap = { version = "4.4", features = ["derive"] }
dashmap = "5.4"
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" }
@@ -90,13 +103,14 @@ etcd-client = "0.12"
fst = "0.4.7"
futures = "0.3"
futures-util = "0.3"
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "a31ea166fc015ea7ff111ac94e26c3a5d64364d2" }
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "96f1f0404f421ee560a4310c73c5071e49168168" }
humantime-serde = "1.1"
itertools = "0.10"
lazy_static = "1.4"
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "abbd357c1e193cd270ea65ee7652334a150b628f" }
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "80b72716dcde47ec4161478416a5c6c21343364d" }
mockall = "0.11.4"
moka = "0.12"
num_cpus = "1.16"
once_cell = "1.18"
opentelemetry-proto = { git = "https://github.com/waynexia/opentelemetry-rust.git", rev = "33841b38dda79b15f2024952be5f32533325ca02", features = [
"gen-tonic",
@@ -108,10 +122,10 @@ paste = "1.0"
pin-project = "1.0"
prometheus = { version = "0.13.3", features = ["process"] }
prost = "0.12"
raft-engine = { git = "https://github.com/tikv/raft-engine.git", rev = "22dfb426cd994602b57725ef080287d3e53db479" }
raft-engine = { version = "0.4.1", default-features = false }
rand = "0.8"
regex = "1.8"
regex-automata = { version = "0.1", features = ["transducer"] }
regex-automata = { version = "0.2", features = ["transducer"] }
reqwest = { version = "0.11", default-features = false, features = [
"json",
"rustls-tls-native-roots",
@@ -121,8 +135,10 @@ rskafka = "0.5"
rust_decimal = "1.33"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
smallvec = "1"
serde_with = "3"
smallvec = { version = "1", features = ["serde"] }
snafu = "0.7"
sysinfo = "0.30"
# on branch v0.38.x
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "6a93567ae38d42be5c8d08b13c8ff4dde26502ef", features = [
"visitor",
@@ -155,7 +171,7 @@ common-grpc-expr = { path = "src/common/grpc-expr" }
common-macro = { path = "src/common/macro" }
common-mem-prof = { path = "src/common/mem-prof" }
common-meta = { path = "src/common/meta" }
common-pprof = { path = "src/common/pprof" }
common-plugins = { path = "src/common/plugins" }
common-procedure = { path = "src/common/procedure" }
common-procedure-test = { path = "src/common/procedure-test" }
common-query = { path = "src/common/query" }
@@ -165,10 +181,12 @@ common-telemetry = { path = "src/common/telemetry" }
common-test-util = { path = "src/common/test-util" }
common-time = { path = "src/common/time" }
common-version = { path = "src/common/version" }
common-wal = { path = "src/common/wal" }
datanode = { path = "src/datanode" }
datatypes = { path = "src/datatypes" }
file-engine = { path = "src/file-engine" }
frontend = { path = "src/frontend" }
index = { path = "src/index" }
log-store = { path = "src/log-store" }
meta-client = { path = "src/meta-client" }
meta-srv = { path = "src/meta-srv" }
@@ -179,6 +197,7 @@ operator = { path = "src/operator" }
partition = { path = "src/partition" }
plugins = { path = "src/plugins" }
promql = { path = "src/promql" }
puffin = { path = "src/puffin" }
query = { path = "src/query" }
script = { path = "src/script" }
servers = { path = "src/servers" }
@@ -190,7 +209,7 @@ table = { path = "src/table" }
[workspace.dependencies.meter-macros]
git = "https://github.com/GreptimeTeam/greptime-meter.git"
rev = "abbd357c1e193cd270ea65ee7652334a150b628f"
rev = "80b72716dcde47ec4161478416a5c6c21343364d"
[profile.release]
debug = 1

View File

@@ -65,7 +65,7 @@ endif
build: ## Build debug version greptime.
cargo ${CARGO_EXTENSION} build ${CARGO_BUILD_OPTS}
.POHNY: build-by-dev-builder
.PHONY: build-by-dev-builder
build-by-dev-builder: ## Build greptime by dev-builder.
docker run --network=host \
-v ${PWD}:/greptimedb -v ${CARGO_REGISTRY_CACHE}:/root/.cargo/registry \
@@ -144,11 +144,12 @@ multi-platform-buildx: ## Create buildx multi-platform builder.
docker buildx inspect ${BUILDX_BUILDER_NAME} || docker buildx create --name ${BUILDX_BUILDER_NAME} --driver docker-container --bootstrap --use
##@ Test
.PHONY: test
test: nextest ## Run unit and integration tests.
cargo nextest run ${NEXTEST_OPTS}
.PHONY: nextest ## Install nextest tools.
nextest:
.PHONY: nextest
nextest: ## Install nextest tools.
cargo --list | grep nextest || cargo install cargo-nextest --locked
.PHONY: sqlness-test

View File

@@ -1,8 +1,8 @@
<p align="center">
<picture>
<source media="(prefers-color-scheme: light)" srcset="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@develop/docs/logo-text-padding.png">
<source media="(prefers-color-scheme: dark)" srcset="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@develop/docs/logo-text-padding-dark.png">
<img alt="GreptimeDB Logo" src="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@develop/docs/logo-text-padding.png" width="400px">
<source media="(prefers-color-scheme: light)" srcset="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@main/docs/logo-text-padding.png">
<source media="(prefers-color-scheme: dark)" srcset="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@main/docs/logo-text-padding-dark.png">
<img alt="GreptimeDB Logo" src="https://cdn.jsdelivr.net/gh/GreptimeTeam/greptimedb@main/docs/logo-text-padding.png" width="400px">
</picture>
</p>
@@ -12,11 +12,11 @@
</h3>
<p align="center">
<a href="https://codecov.io/gh/GrepTimeTeam/greptimedb"><img src="https://codecov.io/gh/GrepTimeTeam/greptimedb/branch/develop/graph/badge.svg?token=FITFDI3J3C"></img></a>
<a href="https://codecov.io/gh/GrepTimeTeam/greptimedb"><img src="https://codecov.io/gh/GrepTimeTeam/greptimedb/branch/main/graph/badge.svg?token=FITFDI3J3C"></img></a>
&nbsp;
<a href="https://github.com/GreptimeTeam/greptimedb/actions/workflows/develop.yml"><img src="https://github.com/GreptimeTeam/greptimedb/actions/workflows/develop.yml/badge.svg" alt="CI"></img></a>
&nbsp;
<a href="https://github.com/greptimeTeam/greptimedb/blob/develop/LICENSE"><img src="https://img.shields.io/github/license/greptimeTeam/greptimedb"></a>
<a href="https://github.com/greptimeTeam/greptimedb/blob/main/LICENSE"><img src="https://img.shields.io/github/license/greptimeTeam/greptimedb"></a>
</p>
<p align="center">
@@ -29,21 +29,17 @@
## What is GreptimeDB
GreptimeDB is an open-source time-series database with a special focus on
scalability, analytical capabilities and efficiency. It's designed to work on
infrastructure of the cloud era, and users benefit from its elasticity and commodity
storage.
GreptimeDB is an open-source time-series database focusing on efficiency, scalability, and analytical capabilities.
It's designed to work on infrastructure of the cloud era, and users benefit from its elasticity and commodity storage.
Our core developers have been building time-series data platform
for years. Based on their best-practices, GreptimeDB is born to give you:
Our core developers have been building time-series data platforms for years. Based on their best-practices, GreptimeDB is born to give you:
- A standalone binary that scales to highly-available distributed cluster, providing a transparent experience for cluster users
- Optimized columnar layout for handling time-series data; compacted, compressed, and stored on various storage backends
- Flexible indexes, tackling high cardinality issues down
- Distributed, parallel query execution, leveraging elastic computing resource
- Native SQL, and Python scripting for advanced analytical scenarios
- Widely adopted database protocols and APIs, native PromQL supports
- Extensible table engine architecture for extensive workloads
- Optimized columnar layout for handling time-series data; compacted, compressed, and stored on various storage backends, particularly cloud object storage with 50x cost efficiency.
- Fully open-source distributed cluster architecture that harnesses the power of cloud-native elastic computing resources.
- Seamless scalability from a standalone binary at edge to a robust, highly available distributed cluster in cloud, with a transparent experience for both developers and administrators.
- Native SQL and PromQL for queries, and Python scripting to facilitate complex analytical tasks.
- Flexible indexing capabilities and distributed, parallel-processing query engine, tackling high cardinality issues down.
- Widely adopted database protocols and APIs, including MySQL, PostgreSQL, and Prometheus Remote Storage, etc.
## Quick Start
@@ -131,12 +127,16 @@ To write and query data, GreptimeDB is compatible with multiple [protocols and c
- [GreptimeDB C++ Client](https://github.com/GreptimeTeam/greptimedb-client-cpp)
- [GreptimeDB Erlang Client](https://github.com/GreptimeTeam/greptimedb-client-erl)
- [GreptimeDB Go Client](https://github.com/GreptimeTeam/greptimedb-client-go)
- [GreptimeDB Java Client](https://github.com/GreptimeTeam/greptimedb-client-java)
- [GreptimeDB Go Ingester](https://github.com/GreptimeTeam/greptimedb-ingester-go)
- [GreptimeDB Java Ingester](https://github.com/GreptimeTeam/greptimedb-ingester-java)
- [GreptimeDB Python Client](https://github.com/GreptimeTeam/greptimedb-client-py) (WIP)
- [GreptimeDB Rust Client](https://github.com/GreptimeTeam/greptimedb-client-rust)
- [GreptimeDB JavaScript Client](https://github.com/GreptimeTeam/greptime-js-sdk)
### Grafana Dashboard
Our official Grafana dashboard is available at [grafana](./grafana/README.md) directory.
## Project Status
This project is in its early stage and under heavy development. We move fast and
@@ -165,18 +165,17 @@ In addition, you may:
## License
GreptimeDB uses the [Apache 2.0 license][1] to strike a balance between
GreptimeDB uses the [Apache License 2.0](https://apache.org/licenses/LICENSE-2.0.txt) to strike a balance between
open contributions and allowing you to use the software however you want.
[1]: <https://github.com/greptimeTeam/greptimedb/blob/develop/LICENSE>
## Contributing
Please refer to [contribution guidelines](CONTRIBUTING.md) for more information.
## Acknowledgement
- GreptimeDB uses [Apache Arrow](https://arrow.apache.org/) as the memory model and [Apache Parquet](https://parquet.apache.org/) as the persistent file format.
- GreptimeDB's query engine is powered by [Apache Arrow DataFusion](https://github.com/apache/arrow-datafusion).
- [Apache OpenDAL (incubating)](https://opendal.apache.org) gives GreptimeDB a very general and elegant data access abstraction layer.
- GreptimeDB uses [Apache Arrow™](https://arrow.apache.org/) as the memory model and [Apache Parquet™](https://parquet.apache.org/) as the persistent file format.
- GreptimeDB's query engine is powered by [Apache Arrow DataFusion™](https://arrow.apache.org/datafusion/).
- [Apache OpenDAL™](https://opendal.apache.org) gives GreptimeDB a very general and elegant data access abstraction layer.
- GreptimeDB's meta service is based on [etcd](https://etcd.io/).
- GreptimeDB uses [RustPython](https://github.com/RustPython/RustPython) for experimental embedded python scripting.

View File

@@ -4,10 +4,13 @@ version.workspace = true
edition.workspace = true
license.workspace = true
[lints]
workspace = true
[dependencies]
arrow.workspace = true
chrono.workspace = true
clap = { version = "4.0", features = ["derive"] }
clap.workspace = true
client.workspace = true
futures-util.workspace = true
indicatif = "0.17.1"

View File

@@ -258,7 +258,7 @@ fn create_table_expr(table_name: &str) -> CreateTableExpr {
catalog_name: CATALOG_NAME.to_string(),
schema_name: SCHEMA_NAME.to_string(),
table_name: table_name.to_string(),
desc: "".to_string(),
desc: String::default(),
column_defs: vec![
ColumnDef {
name: "VendorID".to_string(),
@@ -504,7 +504,7 @@ async fn do_query(num_iter: usize, db: &Database, table_name: &str) {
let res = db.sql(&query).await.unwrap();
match res {
Output::AffectedRows(_) | Output::RecordBatches(_) => (),
Output::Stream(stream) => {
Output::Stream(stream, _) => {
stream.try_collect::<Vec<_>>().await.unwrap();
}
}

View File

@@ -8,5 +8,6 @@ coverage:
ignore:
- "**/error*.rs" # ignore all error.rs files
- "tests/runner/*.rs" # ignore integration test runner
- "tests-integration/**/*.rs" # ignore integration tests
comment: # this is a top-level key
layout: "diff"

View File

@@ -14,7 +14,7 @@ require_lease_before_startup = false
# Initialize all regions in the background during the startup.
# By default, it provides services after all regions have been initialized.
initialize_region_in_background = false
init_regions_in_background = false
[heartbeat]
# Interval for sending heartbeat messages to the Metasrv, 3 seconds by default.
@@ -34,11 +34,7 @@ connect_timeout = "1s"
tcp_nodelay = true
# WAL options.
# Currently, users are expected to choose the wal through the provider field.
# When a wal provider is chose, the user should comment out all other wal config
# except those corresponding to the chosen one.
[wal]
# WAL data directory
provider = "raft_engine"
# Raft-engine wal options, see `standalone.example.toml`.
@@ -51,9 +47,10 @@ sync_write = false
# Kafka wal options, see `standalone.example.toml`.
# broker_endpoints = ["127.0.0.1:9092"]
# max_batch_size = "4MB"
# Warning: Kafka has a default limit of 1MB per message in a topic.
# max_batch_size = "1MB"
# linger = "200ms"
# produce_record_timeout = "100ms"
# consumer_wait_timeout = "100ms"
# backoff_init = "500ms"
# backoff_max = "10s"
# backoff_base = 2
@@ -97,15 +94,18 @@ compress_manifest = false
max_background_jobs = 4
# Interval to auto flush a region if it has not flushed yet.
auto_flush_interval = "1h"
# Global write buffer size for all regions.
# Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB.
global_write_buffer_size = "1GB"
# Global write buffer size threshold to reject write requests (default 2G).
# Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size`
global_write_buffer_reject_size = "2GB"
# Cache size for SST metadata (default 128MB). Setting it to 0 to disable the cache.
# Cache size for SST metadata. Setting it to 0 to disable the cache.
# If not set, it's default to 1/32 of OS memory with a max limitation of 128MB.
sst_meta_cache_size = "128MB"
# Cache size for vectors and arrow arrays (default 512MB). Setting it to 0 to disable the cache.
# Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.
# If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
vector_cache_size = "512MB"
# Cache size for pages of SST row groups (default 512MB). Setting it to 0 to disable the cache.
# Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
# If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
page_cache_size = "512MB"
# Buffer size for SST writing.
sst_write_buffer_size = "8MB"
@@ -116,6 +116,39 @@ sst_write_buffer_size = "8MB"
scan_parallelism = 0
# Capacity of the channel to send data from parallel scan tasks to the main task (default 32).
parallel_scan_channel_size = 32
# Whether to allow stale WAL entries read during replay.
allow_stale_entries = false
[region_engine.mito.inverted_index]
# Whether to create the index on flush.
# - "auto": automatically
# - "disable": never
create_on_flush = "auto"
# Whether to create the index on compaction.
# - "auto": automatically
# - "disable": never
create_on_compaction = "auto"
# Whether to apply the index on query
# - "auto": automatically
# - "disable": never
apply_on_query = "auto"
# Memory threshold for performing an external sort during index creation.
# Setting to empty will disable external sorting, forcing all sorting operations to happen in memory.
mem_threshold_on_create = "64M"
# File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`).
intermediate_path = ""
[region_engine.mito.memtable]
# Memtable type.
# - "experimental": experimental memtable
# - "time_series": time-series memtable (deprecated)
type = "experimental"
# The max number of keys in one shard.
index_max_keys_per_shard = 8192
# The max rows of data inside the actively writing buffer in one shard.
data_freeze_threshold = 32768
# Max dictionary bytes.
fork_dictionary_bytes = "1GiB"
# Log options, see `standalone.example.toml`
# [logging]
@@ -129,11 +162,10 @@ parallel_scan_channel_size = 32
# [export_metrics]
# whether enable export metrics, default is false
# enable = false
# The url of metrics export endpoint, default is `frontend` default HTTP endpoint.
# endpoint = "127.0.0.1:4000"
# The database name of exported metrics stores, user needs to specify a valid database
# db = ""
# The interval of export metrics
# write_interval = "30s"
# [export_metrics.remote_write]
# The url the metrics send to. The url is empty by default, url example: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`
# url = ""
# HTTP headers of Prometheus remote-write carry
# headers = {}

View File

@@ -31,6 +31,7 @@ runtime_size = 2
mode = "disable"
cert_path = ""
key_path = ""
watch = false
# PostgresSQL server options, see `standalone.example.toml`.
[postgres]
@@ -43,6 +44,7 @@ runtime_size = 2
mode = "disable"
cert_path = ""
key_path = ""
watch = false
# OpenTSDB protocol options, see `standalone.example.toml`.
[opentsdb]
@@ -57,6 +59,9 @@ enable = true
# Prometheus remote storage options, see `standalone.example.toml`.
[prom_store]
enable = true
# Whether to store the data from Prometheus remote write in metric engine.
# true by default
with_metric_engine = true
# Metasrv client options, see `datanode.example.toml`.
[meta_client]
@@ -66,6 +71,13 @@ timeout = "3s"
ddl_timeout = "10s"
connect_timeout = "1s"
tcp_nodelay = true
# The configuration about the cache of the Metadata.
# default: 100000
metadata_cache_max_capacity = 100000
# default: 10m
metadata_cache_ttl = "10m"
# default: 5m
metadata_cache_tti = "5m"
# Log options, see `standalone.example.toml`
# [logging]
@@ -87,11 +99,8 @@ tcp_nodelay = true
# [export_metrics]
# whether enable export metrics, default is false
# enable = false
# The url of metrics export endpoint, default is `frontend` default HTTP endpoint.
# endpoint = "127.0.0.1:4000"
# The database name of exported metrics stores, user needs to specify a valid database
# db = ""
# The interval of export metrics
# write_interval = "30s"
# HTTP headers of Prometheus remote-write carry
# headers = {}
# for `frontend`, `self_import` is recommend to collect metrics generated by itself
# [export_metrics.self_import]
# db = "information_schema"

View File

@@ -64,8 +64,6 @@ provider = "raft_engine"
# selector_type = "round_robin"
# A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.
# topic_name_prefix = "greptimedb_wal_topic"
# Number of partitions per topic.
# num_partitions = 1
# Expected number of replicas of each partition.
# replication_factor = 1
# Above which a topic creation operation will be cancelled.
@@ -86,11 +84,10 @@ provider = "raft_engine"
# [export_metrics]
# whether enable export metrics, default is false
# enable = false
# The url of metrics export endpoint, default is `frontend` default HTTP endpoint.
# endpoint = "127.0.0.1:4000"
# The database name of exported metrics stores, user needs to specify a valid database
# db = ""
# The interval of export metrics
# write_interval = "30s"
# [export_metrics.remote_write]
# The url the metrics send to. The url is empty by default, url example: `http://127.0.0.1:4000/v1/prometheus/write?db=information_schema`
# url = ""
# HTTP headers of Prometheus remote-write carry
# headers = {}

View File

@@ -44,6 +44,8 @@ mode = "disable"
cert_path = ""
# Private key file path.
key_path = ""
# Watch for Certificate and key file change and auto reload
watch = false
# PostgresSQL server options.
[postgres]
@@ -62,6 +64,8 @@ mode = "disable"
cert_path = ""
# private key file path.
key_path = ""
# Watch for Certificate and key file change and auto reload
watch = false
# OpenTSDB protocol options.
[opentsdb]
@@ -81,6 +85,9 @@ enable = true
[prom_store]
# Whether to enable Prometheus remote write and read in HTTP API, true by default.
enable = true
# Whether to store the data from Prometheus remote write in metric engine.
# true by default
with_metric_engine = true
[wal]
# Available wal providers:
@@ -88,43 +95,7 @@ enable = true
# - "kafka"
provider = "raft_engine"
# There're none raft-engine wal config since meta srv only involves in remote wal currently.
# Kafka wal options.
# The broker endpoints of the Kafka cluster. ["127.0.0.1:9092"] by default.
# broker_endpoints = ["127.0.0.1:9092"]
# Number of topics to be created upon start.
# num_topics = 64
# Topic selector type.
# Available selector types:
# - "round_robin" (default)
# selector_type = "round_robin"
# A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.
# topic_name_prefix = "greptimedb_wal_topic"
# Number of partitions per topic.
# num_partitions = 1
# Expected number of replicas of each partition.
# replication_factor = 1
# The maximum log size a kafka batch producer could buffer.
# max_batch_size = "4MB"
# The linger duration of a kafka batch producer.
# linger = "200ms"
# The maximum amount of time (in milliseconds) to wait for Kafka records to be returned.
# produce_record_timeout = "100ms"
# Above which a topic creation operation will be cancelled.
# create_topic_timeout = "30s"
# The initial backoff for kafka clients.
# backoff_init = "500ms"
# The maximum backoff for kafka clients.
# backoff_max = "10s"
# Exponential backoff rate, i.e. next backoff = base * current backoff.
# backoff_base = 2
# Stop reconnecting if the total wait time reaches the deadline. If this config is missing, the reconnecting won't terminate.
# backoff_deadline = "5mins"
# Raft-engine wal options.
# WAL data directory
# dir = "/tmp/greptimedb/wal"
# WAL file size in bytes.
@@ -137,6 +108,47 @@ purge_interval = "10m"
read_batch_size = 128
# Whether to sync log file after every write.
sync_write = false
# Whether to reuse logically truncated log files.
enable_log_recycle = true
# Whether to pre-create log files on start up
prefill_log_files = false
# Duration for fsyncing log files.
sync_period = "1000ms"
# Kafka wal options.
# The broker endpoints of the Kafka cluster. ["127.0.0.1:9092"] by default.
# broker_endpoints = ["127.0.0.1:9092"]
# Number of topics to be created upon start.
# num_topics = 64
# Topic selector type.
# Available selector types:
# - "round_robin" (default)
# selector_type = "round_robin"
# The prefix of topic name.
# topic_name_prefix = "greptimedb_wal_topic"
# The number of replicas of each partition.
# Warning: the replication factor must be positive and must not be greater than the number of broker endpoints.
# replication_factor = 1
# The max size of a single producer batch.
# Warning: Kafka has a default limit of 1MB per message in a topic.
# max_batch_size = "1MB"
# The linger duration.
# linger = "200ms"
# The consumer wait timeout.
# consumer_wait_timeout = "100ms"
# Create topic timeout.
# create_topic_timeout = "30s"
# The initial backoff delay.
# backoff_init = "500ms"
# The maximum backoff delay.
# backoff_max = "10s"
# Exponential backoff rate, i.e. next backoff = base * current backoff.
# backoff_base = 2
# The deadline of retries.
# backoff_deadline = "5mins"
# Metadata storage options.
[metadata_store]
@@ -188,15 +200,18 @@ compress_manifest = false
max_background_jobs = 4
# Interval to auto flush a region if it has not flushed yet.
auto_flush_interval = "1h"
# Global write buffer size for all regions.
# Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB.
global_write_buffer_size = "1GB"
# Global write buffer size threshold to reject write requests (default 2G).
# Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size`
global_write_buffer_reject_size = "2GB"
# Cache size for SST metadata (default 128MB). Setting it to 0 to disable the cache.
# Cache size for SST metadata. Setting it to 0 to disable the cache.
# If not set, it's default to 1/32 of OS memory with a max limitation of 128MB.
sst_meta_cache_size = "128MB"
# Cache size for vectors and arrow arrays (default 512MB). Setting it to 0 to disable the cache.
# Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.
# If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
vector_cache_size = "512MB"
# Cache size for pages of SST row groups (default 512MB). Setting it to 0 to disable the cache.
# Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
# If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
page_cache_size = "512MB"
# Buffer size for SST writing.
sst_write_buffer_size = "8MB"
@@ -207,6 +222,39 @@ sst_write_buffer_size = "8MB"
scan_parallelism = 0
# Capacity of the channel to send data from parallel scan tasks to the main task (default 32).
parallel_scan_channel_size = 32
# Whether to allow stale WAL entries read during replay.
allow_stale_entries = false
[region_engine.mito.inverted_index]
# Whether to create the index on flush.
# - "auto": automatically
# - "disable": never
create_on_flush = "auto"
# Whether to create the index on compaction.
# - "auto": automatically
# - "disable": never
create_on_compaction = "auto"
# Whether to apply the index on query
# - "auto": automatically
# - "disable": never
apply_on_query = "auto"
# Memory threshold for performing an external sort during index creation.
# Setting to empty will disable external sorting, forcing all sorting operations to happen in memory.
mem_threshold_on_create = "64M"
# File system path to store intermediate files for external sorting (default `{data_home}/index_intermediate`).
intermediate_path = ""
[region_engine.mito.memtable]
# Memtable type.
# - "experimental": experimental memtable
# - "time_series": time-series memtable (deprecated)
type = "experimental"
# The max number of keys in one shard.
index_max_keys_per_shard = 8192
# The max rows of data inside the actively writing buffer in one shard.
data_freeze_threshold = 32768
# Max dictionary bytes.
fork_dictionary_bytes = "1GiB"
# Log options
# [logging]
@@ -218,10 +266,11 @@ parallel_scan_channel_size = 32
# enable_otlp_tracing = false
# tracing exporter endpoint with format `ip:port`, we use grpc oltp as exporter, default endpoint is `localhost:4317`
# otlp_endpoint = "localhost:4317"
# The percentage of tracing will be sampled and exported. Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1. ratio > 1 are treated as 1. Fractions < 0 are treated as 0
# tracing_sample_ratio = 1.0
# Whether to append logs to stdout. Defaults to true.
# append_stdout = true
# The percentage of tracing will be sampled and exported. Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1. ratio > 1 are treated as 1. Fractions < 0 are treated as 0
# [logging.tracing_sample_ratio]
# default_ratio = 0.0
# Standalone export the metrics generated by itself
# encoded to Prometheus remote-write format
@@ -230,11 +279,8 @@ parallel_scan_channel_size = 32
# [export_metrics]
# whether enable export metrics, default is false
# enable = false
# The url of metrics export endpoint, default is `frontend` default HTTP endpoint.
# endpoint = "127.0.0.1:4000"
# The database name of exported metrics stores, user needs to specify a valid database
# db = ""
# The interval of export metrics
# write_interval = "30s"
# HTTP headers of Prometheus remote-write carry
# headers = {}
# for `standalone`, `self_import` is recommend to collect metrics generated by itself
# [export_metrics.self_import]
# db = "information_schema"

View File

@@ -1,5 +1,11 @@
FROM ubuntu:22.04
# The root path under which contains all the dependencies to build this Dockerfile.
ARG DOCKER_BUILD_ROOT=.
# The binary name of GreptimeDB executable.
# Defaults to "greptime", but sometimes in other projects it might be different.
ARG TARGET_BIN=greptime
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
ca-certificates \
python3.10 \
@@ -7,14 +13,16 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
python3-pip \
curl
COPY ./docker/python/requirements.txt /etc/greptime/requirements.txt
COPY $DOCKER_BUILD_ROOT/docker/python/requirements.txt /etc/greptime/requirements.txt
RUN python3 -m pip install -r /etc/greptime/requirements.txt
ARG TARGETARCH
ADD $TARGETARCH/greptime /greptime/bin/
ADD $TARGETARCH/$TARGET_BIN /greptime/bin/
ENV PATH /greptime/bin/:$PATH
ENTRYPOINT ["greptime"]
ENV TARGET_BIN=$TARGET_BIN
ENTRYPOINT ["sh", "-c", "exec $TARGET_BIN \"$@\"", "--"]

View File

@@ -1,5 +1,8 @@
FROM ubuntu:20.04
# The root path under which contains all the dependencies to build this Dockerfile.
ARG DOCKER_BUILD_ROOT=.
ENV LANG en_US.utf8
WORKDIR /greptimedb
@@ -27,10 +30,20 @@ RUN apt-get -y purge python3.8 && \
ln -s /usr/bin/python3.10 /usr/bin/python3 && \
curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10
RUN git config --global --add safe.directory /greptimedb
# Silence all `safe.directory` warnings, to avoid the "detect dubious repository" error when building with submodules.
# Disabling the safe directory check here won't pose extra security issues, because in our usage for this dev build
# image, we use it solely on our own environment (that github action's VM, or ECS created dynamically by ourselves),
# and the repositories are pulled from trusted sources (still us, of course). Doing so does not violate the intention
# of the Git's addition to the "safe.directory" at the first place (see the commit message here:
# https://github.com/git/git/commit/8959555cee7ec045958f9b6dd62e541affb7e7d9).
# There's also another solution to this, that we add the desired submodules to the safe directory, instead of using
# wildcard here. However, that requires the git's config files and the submodules all owned by the very same user.
# It's troublesome to do this since the dev build runs in Docker, which is under user "root"; while outside the Docker,
# it can be a different user that have prepared the submodules.
RUN git config --global --add safe.directory *
# Install Python dependencies.
COPY ./docker/python/requirements.txt /etc/greptime/requirements.txt
COPY $DOCKER_BUILD_ROOT/docker/python/requirements.txt /etc/greptime/requirements.txt
RUN python3 -m pip install -r /etc/greptime/requirements.txt
# Install Rust.

View File

@@ -1,6 +1,6 @@
---
Feature Name: Inverted Index for SST File
Tracking Issue: TBD
Tracking Issue: https://github.com/GreptimeTeam/greptimedb/issues/2705
Date: 2023-11-03
Author: "Zhong Zhenchi <zhongzc_arch@outlook.com>"
---

View File

@@ -0,0 +1,97 @@
---
Feature Name: Dataflow Framework
Tracking Issue: https://github.com/GreptimeTeam/greptimedb/issues/3187
Date: 2024-01-17
Author: "Discord9 <discord9@163.com>"
---
# Summary
This RFC proposes a Lightweight Module for executing continuous aggregation queries on a stream of data.
# Motivation
Being able to do continuous aggregation is a very powerful tool. It allows you to do things like:
1. downsample data from i.e. 1 milliseconds to 1 second
2. calculate the average of a stream of data
3. Keeping a sliding window of data in memory
In order to do those things while maintaining a low memory footprint, you need to be able to manage the data in a smart way. Hence, we only store necessary data in memory, and send/recv data deltas to/from the client.
# Details
## System boundary / What it's and isn't
- GreptimeFlow provides a way to perform continuous aggregation over time-series data.
- It's not a complete streaming-processing system. Only a must subset functionalities are provided.
- Flow can process a configured range of fresh data. Data exceeding this range will be dropped directly. Thus it cannot handle random datasets (random on timestamp).
- Both sliding windows (e.g., latest 5m from present) and fixed windows (every 5m from some time) are supported. And these two are the major targeting scenarios.
- Flow can handle most aggregate operators within one table(i.e. Sum, avg, min, max and comparison operators). But others (join, trigger, txn etc.) are not the target feature.
## Framework
- Greptime Flow's is built on top of [Hydroflow](https://github.com/hydro-project/hydroflow).
- We have three choices for the Dataflow/Streaming process framework for our simple continuous aggregation feature:
1. Based on the timely/differential dataflow crate that [materialize](https://github.com/MaterializeInc/materialize) based on. Later, it's proved too obscure for a simple usage, and is hard to customize memory usage control.
2. Based on a simple dataflow framework that we write from ground up, like what [arroyo](https://www.arroyo.dev/) or [risingwave](https://www.risingwave.dev/) did, for example the core streaming logic of [arroyo](https://github.com/ArroyoSystems/arroyo/blob/master/arroyo-datastream/src/lib.rs) only takes up to 2000 line of codes. However, it means maintaining another layer of dataflow framework, which might seem easy in the beginning, but I fear it might be too burdensome to maintain once we need more features.
3. Based on a simple and lower level dataflow framework that someone else write, like [hydroflow](https://github.com/hydro-project/hydroflow), this approach combines the best of both worlds. Firstly, it boasts ease of comprehension and customization. Secondly, the dataflow framework offers precisely the necessary features for crafting uncomplicated single-node dataflow programs while delivering decent performance.
Hence, we choose the third option, and use a simple logical plan that's anagonistic to the underlying dataflow framework, as it only describe how the dataflow graph should be doing, not how it do that. And we built operator in hydroflow to execute the plan. And the result hydroflow graph is wrapped in a engine that only support data in/out and tick event to flush and compute the result. This provide a thin middle layer that's easy to maintain and allow switching to other dataflow framework if necessary.
## Deploy mode and protocol
- Greptime Flow is an independent streaming compute component. It can be used either within a standalone node or as a dedicated node at the same level as frontend in distributed mode.
- It accepts insert request Rows, which is used between frontend and datanode.
- New flow job is submitted in the format of modified SQL query like snowflake do, like: `CREATE TASK avg_over_5m WINDOW_SIZE = "5m" AS SELECT avg(value) FROM table WHERE time > now() - 5m GROUP BY time(1m)`. Flow job then got stored in MetaSrv.
- It also persists results in the format of Rows to frontend.
- The query plan uses Substrait as codec format. It's the same with GreptimeDB's query engine.
- Greptime Flow needs a WAL for recovering. It's possible to reuse datanode's.
The workflow is shown in the following diagram
```mermaid
graph TB
subgraph Flownode["Flownode"]
subgraph Dataflows
df1("Dataflow_1")
df2("Dataflow_2")
end
end
subgraph Frontend["Frontend"]
newLines["Mirror Insert
Create Task From Query
Write result from flow node"]
end
subgraph Datanode["Datanode"]
end
User --> Frontend
Frontend -->|Register Task| Metasrv
Metasrv -->|Read Task Metadata| Frontend
Frontend -->|Create Task| Flownode
Frontend -->|Mirror Insert| Flownode
Flownode -->|Write back| Frontend
Frontend --> Datanode
Datanode --> Frontend
```
## Lifecycle of data
- New data is inserted into frontend like before. Frontend will mirror insert request to Flow node if there is configured flow job.
- Depending on the timestamp of incoming data, flow will either drop it (outdated data) or process it (fresh data).
- Greptime Flow will periodically write results back to the result table through frontend.
- Those result will then be written into a result table stored in datanode.
- A small table of intermediate state is kept in memory, which is used to calculate the result.
## Supported operations
- Greptime Flow accepts a configurable "materialize window", data point exceeds that time window is discarded.
- Data within that "materialize window" is queryable and updateable.
- Greptime Flow can handle partitioning, if and only if the input query can be transformed to a fully partitioned plan according to the existing commutative rules. Otherwise the corresponding flow job has to be calculated in a single node.
- Notice that Greptime Flow has to see all the data belongs to one partition.
- Deletion and duplicate insertion are not supported at early stage.
## Miscellaneous
- Greptime Flow can translate SQL to it's own plan, however only a selected few aggregate function is supported for now, like min/max/sum/count/avg
- Greptime Flow's operator is configurable in terms of the size of the materialize window, whether to allow delay of incoming data etc., so simplest operator can choose to not tolerate any delay to save memory.
# Future Work
- Support UDF that can do one-to-one mapping. Preferably, we can reuse the UDF mechanism in GreptimeDB.
- Support join operator.
- Design syntax for config operator for different materialize window and delay tolerance.
- Support cross partition merge operator that allows complex query plan that not necessary accord with partitioning rule to communicate between nodes and create final materialize result.
- Duplicate insertion, which can be reverted easily within the current framework, so supporting it could be easy
- Deletion within "materialize window", this requires operators like min/max to store all inputs within materialize window, which might require further optimization.

Binary file not shown.

After

Width:  |  Height:  |  Size: 65 KiB

View File

@@ -0,0 +1,101 @@
---
Feature Name: Multi-dimension Partition Rule
Tracking Issue: https://github.com/GreptimeTeam/greptimedb/issues/3351
Date: 2024-02-21
Author: "Ruihang Xia <waynestxia@gmail.com>"
---
# Summary
A new region partition scheme that runs on multiple dimensions of the key space. The partition rule is defined by a set of simple expressions on the partition key columns.
# Motivation
The current partition rule is from MySQL's [`RANGE Partition`](https://dev.mysql.com/doc/refman/8.0/en/partitioning-range.html), which is based on a single dimension. It is sort of a [Hilbert Curve](https://en.wikipedia.org/wiki/Hilbert_curve) and pick several point on the curve to divide the space. It is neither easy to understand how the data get partitioned nor flexible enough to handle complex partitioning requirements.
Considering the future requirements like region repartitioning or autonomous rebalancing, where both workload and partition may change frequently. Here proposes a new region partition scheme that uses a set of simple expressions on the partition key columns to divide the key space.
# Details
## Partition rule
First, we define a simple expression that can be used to define the partition rule. The simple expression is a binary expression expression on the partition key columns that can be evaluated to a boolean value. The binary operator is limited to comparison operators only, like `=`, `!=`, `>`, `>=`, `<`, `<=`. And the operands are limited either literal value or partition column.
Example of valid simple expressions are $`col_A = 10`$, $`col_A \gt 10 \& col_B \gt 20`$ or $`col_A \ne 10`$.
Those expressions can be used as predicates to divide the key space into different regions. The following example have two partition columns `Col A` and `Col B`, and four partitioned regions.
```math
\left\{\begin{aligned}
&col_A \le 10 &Region_1 \\
&10 \lt col_A \& col_A \le 20 &Region_2 \\
&20 \lt col_A \space \& \space col_B \lt 100 &Region_3 \\
&20 \lt col_A \space \& \space col_B \ge 100 &Region_4
\end{aligned}\right\}
```
An advantage of this scheme is that it is easy to understand how the data get partitioned. The above example can be visualized in a 2D space (two partition column is involved in the example).
![example](2d-example.png)
Here each expression draws a line in the 2D space. Managing data partitioning becomes a matter of drawing lines in the key space.
To make it easy to use, there is a "default region" which catches all the data that doesn't match any of previous expressions. The default region exist by default and do not need to specify. It is also possible to remove this default region if the DB finds it is not necessary.
## SQL interface
The SQL interface is in response to two parts: specifying the partition columns and the partition rule. Thouth we are targeting an autonomous system, it's still allowed to give some bootstrap rules or hints on creating table.
Partition column is specified by `PARTITION ON COLUMNS` sub-clause in `CREATE TABLE`:
```sql
CREATE TABLE t (...)
PARTITION ON COLUMNS (...) ();
```
Two following brackets are for partition columns and partition rule respectively.
Columns provided here are only used as an allow-list of how the partition rule can be defined. Which means (a) the sequence between columns doesn't matter, (b) the columns provided here are not necessarily being used in the partition rule.
The partition rule part is a list of comma-separated simple expressions. Expressions here are not corresponding to region, as they might be changed by system to fit various workload.
A full example of `CREATE TABLE` with partition rule is:
```sql
CREATE TABLE IF NOT EXISTS demo (
a STRING,
b STRING,
c STRING,
d STRING,
ts TIMESTAMP,
memory DOUBLE,
TIME INDEX (ts),
PRIMARY KEY (a, b, c, d)
)
PARTITION ON COLUMNS (c, b, a) (
a < 10,
10 >= a AND a < 20,
20 >= a AND b < 100,
20 >= a AND b > 100
)
```
## Combine with storage
Examining columns separately suits our columnar storage very well in two aspects.
1. The simple expression can be pushed down to storage and file format, and is likely to hit existing index. Makes pruning operation very efficient.
2. Columns in columnar storage are not tightly coupled like in the traditional row storages, which means we can easily add or remove columns from partition rule without much impact (like a global reshuffle) on data.
The data file itself can be "projected" to the key space as a polyhedron, it is guaranteed that each plane is in parallel with some coordinate planes (in a 2D scenario, this is saying that all the files can be projected to a rectangle). Thus partition or repartition also only need to consider related columns.
![sst-project](sst-project.png)
An additional limitation is that considering how the index works and how we organize the primary keys at present, the partition columns are limited to be a subset of primary keys for better performance.
# Drawbacks
This is a breaking change.

Binary file not shown.

After

Width:  |  Height:  |  Size: 71 KiB

10
grafana/README.md Normal file
View File

@@ -0,0 +1,10 @@
Grafana dashboard for GreptimeDB
--------------------------------
GreptimeDB's official Grafana dashboard.
Status notify: we are still working on this config. It's expected to change frequently in the recent days. Please feel free to submit your feedback and/or contribution to this dashboard 🤗
# How to use
Open Grafana Dashboard page, choose `New` -> `Import`. And upload `greptimedb.json` file.

2513
grafana/greptimedb.json Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -1,8 +1,7 @@
#!/usr/bin/env bash
# This script is used to download built dashboard assets from the "GreptimeTeam/dashboard" repository.
set -e -x
set -ex
declare -r SCRIPT_DIR=$(cd $(dirname ${0}) >/dev/null 2>&1 && pwd)
declare -r ROOT_DIR=$(dirname ${SCRIPT_DIR})
@@ -13,13 +12,34 @@ RELEASE_VERSION="$(cat $STATIC_DIR/VERSION | tr -d '\t\r\n ')"
echo "Downloading assets to dir: $OUT_DIR"
cd $OUT_DIR
if [[ -z "$GITHUB_PROXY_URL" ]]; then
GITHUB_URL="https://github.com"
else
GITHUB_URL="${GITHUB_PROXY_URL%/}"
fi
function retry_fetch() {
local url=$1
local filename=$2
curl --connect-timeout 10 --retry 3 -fsSL $url --output $filename || {
echo "Failed to download $url"
echo "You may try to set http_proxy and https_proxy environment variables."
if [[ -z "$GITHUB_PROXY_URL" ]]; then
echo "You may try to set GITHUB_PROXY_URL=http://mirror.ghproxy.com/"
fi
exit 1
}
}
# Download the SHA256 checksum attached to the release. To verify the integrity
# of the download, this checksum will be used to check the download tar file
# containing the built dashboard assets.
curl -Ls https://github.com/GreptimeTeam/dashboard/releases/download/$RELEASE_VERSION/sha256.txt --output sha256.txt
retry_fetch "${GITHUB_URL}/GreptimeTeam/dashboard/releases/download/${RELEASE_VERSION}/sha256.txt" sha256.txt
# Download the tar file containing the built dashboard assets.
curl -L https://github.com/GreptimeTeam/dashboard/releases/download/$RELEASE_VERSION/build.tar.gz --output build.tar.gz
retry_fetch "${GITHUB_URL}/GreptimeTeam/dashboard/releases/download/$RELEASE_VERSION/build.tar.gz" build.tar.gz
# Verify the checksums match; exit if they don't.
case "$(uname -s)" in

View File

@@ -4,6 +4,9 @@ version.workspace = true
edition.workspace = true
license.workspace = true
[lints]
workspace = true
[dependencies]
common-base.workspace = true
common-decimal.workspace = true

View File

@@ -8,6 +8,9 @@ license.workspace = true
default = []
testing = []
[lints]
workspace = true
[dependencies]
api.workspace = true
async-trait.workspace = true

View File

@@ -7,13 +7,16 @@ license.workspace = true
[features]
testing = []
[lints]
workspace = true
[dependencies]
api.workspace = true
arc-swap = "1.0"
arrow.workspace = true
arrow-schema.workspace = true
async-stream.workspace = true
async-trait = "0.1"
build-data = "0.1"
common-catalog.workspace = true
common-error.workspace = true
common-grpc.workspace = true
@@ -24,14 +27,16 @@ common-recordbatch.workspace = true
common-runtime.workspace = true
common-telemetry.workspace = true
common-time.workspace = true
common-version.workspace = true
dashmap.workspace = true
datafusion.workspace = true
datatypes.workspace = true
futures = "0.3"
futures-util.workspace = true
itertools.workspace = true
lazy_static.workspace = true
meta-client.workspace = true
moka = { workspace = true, features = ["future"] }
moka = { workspace = true, features = ["future", "sync"] }
parking_lot = "0.12"
partition.workspace = true
paste = "1.0"

View File

@@ -41,6 +41,14 @@ pub enum Error {
source: BoxedError,
},
#[snafu(display("Failed to list {}.{}'s tables", catalog, schema))]
ListTables {
location: Location,
catalog: String,
schema: String,
source: BoxedError,
},
#[snafu(display("Failed to re-compile script due to internal error"))]
CompileScriptInternal {
location: Location,
@@ -156,6 +164,12 @@ pub enum Error {
location: Location,
},
#[snafu(display("Failed to find table partitions"))]
FindPartitions { source: partition::error::Error },
#[snafu(display("Failed to find region routes"))]
FindRegionRoutes { source: partition::error::Error },
#[snafu(display("Failed to read system catalog table records"))]
ReadSystemCatalog {
location: Location,
@@ -237,6 +251,12 @@ pub enum Error {
source: common_meta::error::Error,
location: Location,
},
#[snafu(display("Get null from table cache, key: {}", key))]
TableCacheNotGet { key: String, location: Location },
#[snafu(display("Failed to get table cache, err: {}", err_msg))]
GetTableCache { err_msg: String },
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -246,11 +266,14 @@ impl ErrorExt for Error {
match self {
Error::InvalidKey { .. }
| Error::SchemaNotFound { .. }
| Error::TableNotFound { .. }
| Error::CatalogNotFound { .. }
| Error::FindPartitions { .. }
| Error::FindRegionRoutes { .. }
| Error::InvalidEntryType { .. }
| Error::ParallelOpenTable { .. } => StatusCode::Unexpected,
Error::TableNotFound { .. } => StatusCode::TableNotFound,
Error::SystemCatalog { .. }
| Error::EmptyValue { .. }
| Error::ValueDeserialize { .. } => StatusCode::StorageUnavailable,
@@ -270,9 +293,9 @@ impl ErrorExt for Error {
StatusCode::InvalidArguments
}
Error::ListCatalogs { source, .. } | Error::ListSchemas { source, .. } => {
source.status_code()
}
Error::ListCatalogs { source, .. }
| Error::ListSchemas { source, .. }
| Error::ListTables { source, .. } => source.status_code(),
Error::OpenSystemCatalog { source, .. }
| Error::CreateSystemCatalog { source, .. }
@@ -294,6 +317,7 @@ impl ErrorExt for Error {
Error::QueryAccessDenied { .. } => StatusCode::AccessDenied,
Error::Datafusion { .. } => StatusCode::EngineExecuteQuery,
Error::TableMetadataManager { source, .. } => source.status_code(),
Error::TableCacheNotGet { .. } | Error::GetTableCache { .. } => StatusCode::Internal,
}
}
@@ -333,7 +357,7 @@ mod tests {
assert_eq!(
StatusCode::StorageUnavailable,
Error::SystemCatalog {
msg: "".to_string(),
msg: String::default(),
location: Location::generate(),
}
.status_code()

View File

@@ -13,20 +13,27 @@
// limitations under the License.
mod columns;
mod key_column_usage;
mod memory_table;
mod partitions;
mod predicate;
mod region_peers;
mod runtime_metrics;
pub mod schemata;
mod table_names;
mod tables;
pub mod tables;
use std::collections::HashMap;
use std::sync::{Arc, Weak};
use common_catalog::consts::{self, INFORMATION_SCHEMA_NAME};
use common_catalog::consts::{self, DEFAULT_CATALOG_NAME, INFORMATION_SCHEMA_NAME};
use common_error::ext::BoxedError;
use common_recordbatch::{RecordBatchStreamWrapper, SendableRecordBatchStream};
use datatypes::schema::SchemaRef;
use futures_util::StreamExt;
use lazy_static::lazy_static;
use paste::paste;
pub(crate) use predicate::Predicates;
use snafu::ResultExt;
use store_api::data_source::DataSource;
use store_api::storage::{ScanRequest, TableId};
@@ -40,7 +47,12 @@ pub use table_names::*;
use self::columns::InformationSchemaColumns;
use crate::error::Result;
use crate::information_schema::key_column_usage::InformationSchemaKeyColumnUsage;
use crate::information_schema::memory_table::{get_schema_columns, MemoryTable};
use crate::information_schema::partitions::InformationSchemaPartitions;
use crate::information_schema::region_peers::InformationSchemaRegionPeers;
use crate::information_schema::runtime_metrics::InformationSchemaMetrics;
use crate::information_schema::schemata::InformationSchemaSchemata;
use crate::information_schema::tables::InformationSchemaTables;
use crate::CatalogManager;
@@ -50,7 +62,23 @@ lazy_static! {
ENGINES,
COLUMN_PRIVILEGES,
COLUMN_STATISTICS,
BUILD_INFO,
CHARACTER_SETS,
COLLATIONS,
COLLATION_CHARACTER_SET_APPLICABILITY,
CHECK_CONSTRAINTS,
EVENTS,
FILES,
OPTIMIZER_TRACE,
PARAMETERS,
PROFILING,
REFERENTIAL_CONSTRAINTS,
ROUTINES,
SCHEMA_PRIVILEGES,
TABLE_PRIVILEGES,
TRIGGERS,
GLOBAL_STATUS,
SESSION_STATUS,
PARTITIONS,
];
}
@@ -120,12 +148,36 @@ impl InformationSchemaProvider {
fn build_tables(&mut self) {
let mut tables = HashMap::new();
// Carefully consider the tables that may expose sensitive cluster configurations,
// authentication details, and other critical information.
// Only put these tables under `greptime` catalog to prevent info leak.
if self.catalog_name == DEFAULT_CATALOG_NAME {
tables.insert(
RUNTIME_METRICS.to_string(),
self.build_table(RUNTIME_METRICS).unwrap(),
);
tables.insert(
BUILD_INFO.to_string(),
self.build_table(BUILD_INFO).unwrap(),
);
tables.insert(
REGION_PEERS.to_string(),
self.build_table(REGION_PEERS).unwrap(),
);
}
tables.insert(TABLES.to_string(), self.build_table(TABLES).unwrap());
tables.insert(SCHEMATA.to_string(), self.build_table(SCHEMATA).unwrap());
tables.insert(COLUMNS.to_string(), self.build_table(COLUMNS).unwrap());
tables.insert(
KEY_COLUMN_USAGE.to_string(),
self.build_table(KEY_COLUMN_USAGE).unwrap(),
);
// Add memory tables
for name in MEMORY_TABLES.iter() {
tables.insert((*name).to_string(), self.build_table(name).unwrap());
tables.insert((*name).to_string(), self.build_table(name).expect(name));
}
self.tables = tables;
@@ -134,7 +186,7 @@ impl InformationSchemaProvider {
fn build_table(&self, name: &str) -> Option<TableRef> {
self.information_table(name).map(|table| {
let table_info = Self::table_info(self.catalog_name.clone(), &table);
let filter_pushdown = FilterPushDownType::Unsupported;
let filter_pushdown = FilterPushDownType::Inexact;
let thin_table = ThinTable::new(table_info, filter_pushdown);
let data_source = Arc::new(InformationTableDataSource::new(table));
@@ -156,6 +208,41 @@ impl InformationSchemaProvider {
COLUMN_PRIVILEGES => setup_memory_table!(COLUMN_PRIVILEGES),
COLUMN_STATISTICS => setup_memory_table!(COLUMN_STATISTICS),
BUILD_INFO => setup_memory_table!(BUILD_INFO),
CHARACTER_SETS => setup_memory_table!(CHARACTER_SETS),
COLLATIONS => setup_memory_table!(COLLATIONS),
COLLATION_CHARACTER_SET_APPLICABILITY => {
setup_memory_table!(COLLATION_CHARACTER_SET_APPLICABILITY)
}
CHECK_CONSTRAINTS => setup_memory_table!(CHECK_CONSTRAINTS),
EVENTS => setup_memory_table!(EVENTS),
FILES => setup_memory_table!(FILES),
OPTIMIZER_TRACE => setup_memory_table!(OPTIMIZER_TRACE),
PARAMETERS => setup_memory_table!(PARAMETERS),
PROFILING => setup_memory_table!(PROFILING),
REFERENTIAL_CONSTRAINTS => setup_memory_table!(REFERENTIAL_CONSTRAINTS),
ROUTINES => setup_memory_table!(ROUTINES),
SCHEMA_PRIVILEGES => setup_memory_table!(SCHEMA_PRIVILEGES),
TABLE_PRIVILEGES => setup_memory_table!(TABLE_PRIVILEGES),
TRIGGERS => setup_memory_table!(TRIGGERS),
GLOBAL_STATUS => setup_memory_table!(GLOBAL_STATUS),
SESSION_STATUS => setup_memory_table!(SESSION_STATUS),
KEY_COLUMN_USAGE => Some(Arc::new(InformationSchemaKeyColumnUsage::new(
self.catalog_name.clone(),
self.catalog_manager.clone(),
)) as _),
SCHEMATA => Some(Arc::new(InformationSchemaSchemata::new(
self.catalog_name.clone(),
self.catalog_manager.clone(),
)) as _),
RUNTIME_METRICS => Some(Arc::new(InformationSchemaMetrics::new())),
PARTITIONS => Some(Arc::new(InformationSchemaPartitions::new(
self.catalog_name.clone(),
self.catalog_manager.clone(),
)) as _),
REGION_PEERS => Some(Arc::new(InformationSchemaRegionPeers::new(
self.catalog_name.clone(),
self.catalog_manager.clone(),
)) as _),
_ => None,
}
}
@@ -187,7 +274,7 @@ trait InformationTable {
fn schema(&self) -> SchemaRef;
fn to_stream(&self) -> Result<SendableRecordBatchStream>;
fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream>;
fn table_type(&self) -> TableType {
TableType::Temporary
@@ -221,7 +308,7 @@ impl DataSource for InformationTableDataSource {
&self,
request: ScanRequest,
) -> std::result::Result<SendableRecordBatchStream, BoxedError> {
let projection = request.projection;
let projection = request.projection.clone();
let projected_schema = match &projection {
Some(projection) => self.try_project(projection)?,
None => self.table.schema(),
@@ -229,7 +316,7 @@ impl DataSource for InformationTableDataSource {
let stream = self
.table
.to_stream()
.to_stream(request)
.map_err(BoxedError::new)
.context(TablesRecordBatchSnafu)
.map_err(BoxedError::new)?
@@ -242,6 +329,7 @@ impl DataSource for InformationTableDataSource {
schema: projected_schema,
stream: Box::pin(stream),
output_ordering: None,
metrics: Default::default(),
};
Ok(Box::pin(stream))

View File

@@ -29,14 +29,17 @@ use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatc
use datatypes::prelude::{ConcreteDataType, DataType};
use datatypes::scalars::ScalarVectorBuilder;
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::value::Value;
use datatypes::vectors::{StringVectorBuilder, VectorRef};
use futures::TryStreamExt;
use snafu::{OptionExt, ResultExt};
use store_api::storage::TableId;
use store_api::storage::{ScanRequest, TableId};
use super::{InformationTable, COLUMNS};
use crate::error::{
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
};
use crate::information_schema::Predicates;
use crate::CatalogManager;
pub(super) struct InformationSchemaColumns {
@@ -51,6 +54,11 @@ const TABLE_NAME: &str = "table_name";
const COLUMN_NAME: &str = "column_name";
const DATA_TYPE: &str = "data_type";
const SEMANTIC_TYPE: &str = "semantic_type";
const COLUMN_DEFAULT: &str = "column_default";
const IS_NULLABLE: &str = "is_nullable";
const COLUMN_TYPE: &str = "column_type";
const COLUMN_COMMENT: &str = "column_comment";
const INIT_CAPACITY: usize = 42;
impl InformationSchemaColumns {
pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
@@ -69,6 +77,10 @@ impl InformationSchemaColumns {
ColumnSchema::new(COLUMN_NAME, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(DATA_TYPE, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(SEMANTIC_TYPE, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(COLUMN_DEFAULT, ConcreteDataType::string_datatype(), true),
ColumnSchema::new(IS_NULLABLE, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(COLUMN_TYPE, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(COLUMN_COMMENT, ConcreteDataType::string_datatype(), true),
]))
}
@@ -94,14 +106,14 @@ impl InformationTable for InformationSchemaColumns {
self.schema.clone()
}
fn to_stream(&self) -> Result<SendableRecordBatchStream> {
fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_columns()
.make_columns(Some(request))
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
@@ -126,6 +138,11 @@ struct InformationSchemaColumnsBuilder {
column_names: StringVectorBuilder,
data_types: StringVectorBuilder,
semantic_types: StringVectorBuilder,
column_defaults: StringVectorBuilder,
is_nullables: StringVectorBuilder,
column_types: StringVectorBuilder,
column_comments: StringVectorBuilder,
}
impl InformationSchemaColumnsBuilder {
@@ -138,62 +155,52 @@ impl InformationSchemaColumnsBuilder {
schema,
catalog_name,
catalog_manager,
catalog_names: StringVectorBuilder::with_capacity(42),
schema_names: StringVectorBuilder::with_capacity(42),
table_names: StringVectorBuilder::with_capacity(42),
column_names: StringVectorBuilder::with_capacity(42),
data_types: StringVectorBuilder::with_capacity(42),
semantic_types: StringVectorBuilder::with_capacity(42),
catalog_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
schema_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
table_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
column_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
data_types: StringVectorBuilder::with_capacity(INIT_CAPACITY),
semantic_types: StringVectorBuilder::with_capacity(INIT_CAPACITY),
column_defaults: StringVectorBuilder::with_capacity(INIT_CAPACITY),
is_nullables: StringVectorBuilder::with_capacity(INIT_CAPACITY),
column_types: StringVectorBuilder::with_capacity(INIT_CAPACITY),
column_comments: StringVectorBuilder::with_capacity(INIT_CAPACITY),
}
}
/// Construct the `information_schema.columns` virtual table
async fn make_columns(&mut self) -> Result<RecordBatch> {
async fn make_columns(&mut self, request: Option<ScanRequest>) -> Result<RecordBatch> {
let catalog_name = self.catalog_name.clone();
let catalog_manager = self
.catalog_manager
.upgrade()
.context(UpgradeWeakCatalogManagerRefSnafu)?;
let predicates = Predicates::from_scan_request(&request);
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
if !catalog_manager
.schema_exists(&catalog_name, &schema_name)
.await?
{
continue;
}
let mut stream = catalog_manager.tables(&catalog_name, &schema_name).await;
for table_name in catalog_manager
.table_names(&catalog_name, &schema_name)
.await?
{
if let Some(table) = catalog_manager
.table(&catalog_name, &schema_name, &table_name)
.await?
{
let keys = &table.table_info().meta.primary_key_indices;
let schema = table.schema();
while let Some(table) = stream.try_next().await? {
let keys = &table.table_info().meta.primary_key_indices;
let schema = table.schema();
for (idx, column) in schema.column_schemas().iter().enumerate() {
let semantic_type = if column.is_time_index() {
SEMANTIC_TYPE_TIME_INDEX
} else if keys.contains(&idx) {
SEMANTIC_TYPE_PRIMARY_KEY
} else {
SEMANTIC_TYPE_FIELD
};
for (idx, column) in schema.column_schemas().iter().enumerate() {
let semantic_type = if column.is_time_index() {
SEMANTIC_TYPE_TIME_INDEX
} else if keys.contains(&idx) {
SEMANTIC_TYPE_PRIMARY_KEY
} else {
SEMANTIC_TYPE_FIELD
};
self.add_column(
&catalog_name,
&schema_name,
&table_name,
&column.name,
&column.data_type.name(),
semantic_type,
);
}
} else {
unreachable!();
self.add_column(
&predicates,
&catalog_name,
&schema_name,
&table.table_info().name,
semantic_type,
column,
);
}
}
}
@@ -203,19 +210,48 @@ impl InformationSchemaColumnsBuilder {
fn add_column(
&mut self,
predicates: &Predicates,
catalog_name: &str,
schema_name: &str,
table_name: &str,
column_name: &str,
data_type: &str,
semantic_type: &str,
column_schema: &ColumnSchema,
) {
let data_type = &column_schema.data_type.name();
let row = [
(TABLE_CATALOG, &Value::from(catalog_name)),
(TABLE_SCHEMA, &Value::from(schema_name)),
(TABLE_NAME, &Value::from(table_name)),
(COLUMN_NAME, &Value::from(column_schema.name.as_str())),
(DATA_TYPE, &Value::from(data_type.as_str())),
(SEMANTIC_TYPE, &Value::from(semantic_type)),
];
if !predicates.eval(&row) {
return;
}
self.catalog_names.push(Some(catalog_name));
self.schema_names.push(Some(schema_name));
self.table_names.push(Some(table_name));
self.column_names.push(Some(column_name));
self.column_names.push(Some(&column_schema.name));
self.data_types.push(Some(data_type));
self.semantic_types.push(Some(semantic_type));
self.column_defaults.push(
column_schema
.default_constraint()
.map(|s| format!("{}", s))
.as_deref(),
);
if column_schema.is_nullable() {
self.is_nullables.push(Some("Yes"));
} else {
self.is_nullables.push(Some("No"));
}
self.column_types.push(Some(data_type));
self.column_comments
.push(column_schema.column_comment().map(|x| x.as_ref()));
}
fn finish(&mut self) -> Result<RecordBatch> {
@@ -226,6 +262,10 @@ impl InformationSchemaColumnsBuilder {
Arc::new(self.column_names.finish()),
Arc::new(self.data_types.finish()),
Arc::new(self.semantic_types.finish()),
Arc::new(self.column_defaults.finish()),
Arc::new(self.is_nullables.finish()),
Arc::new(self.column_types.finish()),
Arc::new(self.column_comments.finish()),
];
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
@@ -244,7 +284,7 @@ impl DfPartitionStream for InformationSchemaColumns {
schema,
futures::stream::once(async move {
builder
.make_columns()
.make_columns(None)
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)

View File

@@ -0,0 +1,345 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::{Arc, Weak};
use arrow_schema::SchemaRef as ArrowSchemaRef;
use common_catalog::consts::INFORMATION_SCHEMA_KEY_COLUMN_USAGE_TABLE_ID;
use common_error::ext::BoxedError;
use common_query::physical_plan::TaskContext;
use common_recordbatch::adapter::RecordBatchStreamAdapter;
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
use datatypes::prelude::{ConcreteDataType, MutableVector, ScalarVectorBuilder, VectorRef};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::value::Value;
use datatypes::vectors::{ConstantVector, StringVector, StringVectorBuilder, UInt32VectorBuilder};
use snafu::{OptionExt, ResultExt};
use store_api::storage::{ScanRequest, TableId};
use super::KEY_COLUMN_USAGE;
use crate::error::{
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
};
use crate::information_schema::{InformationTable, Predicates};
use crate::CatalogManager;
const CONSTRAINT_SCHEMA: &str = "constraint_schema";
const CONSTRAINT_NAME: &str = "constraint_name";
const TABLE_CATALOG: &str = "table_catalog";
const TABLE_SCHEMA: &str = "table_schema";
const TABLE_NAME: &str = "table_name";
const COLUMN_NAME: &str = "column_name";
const ORDINAL_POSITION: &str = "ordinal_position";
const INIT_CAPACITY: usize = 42;
/// The virtual table implementation for `information_schema.KEY_COLUMN_USAGE`.
pub(super) struct InformationSchemaKeyColumnUsage {
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
}
impl InformationSchemaKeyColumnUsage {
pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
Self {
schema: Self::schema(),
catalog_name,
catalog_manager,
}
}
pub(crate) fn schema() -> SchemaRef {
Arc::new(Schema::new(vec![
ColumnSchema::new(
"constraint_catalog",
ConcreteDataType::string_datatype(),
false,
),
ColumnSchema::new(
CONSTRAINT_SCHEMA,
ConcreteDataType::string_datatype(),
false,
),
ColumnSchema::new(CONSTRAINT_NAME, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(TABLE_CATALOG, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(TABLE_SCHEMA, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(TABLE_NAME, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(COLUMN_NAME, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(ORDINAL_POSITION, ConcreteDataType::uint32_datatype(), false),
ColumnSchema::new(
"position_in_unique_constraint",
ConcreteDataType::uint32_datatype(),
true,
),
ColumnSchema::new(
"referenced_table_schema",
ConcreteDataType::string_datatype(),
true,
),
ColumnSchema::new(
"referenced_table_name",
ConcreteDataType::string_datatype(),
true,
),
ColumnSchema::new(
"referenced_column_name",
ConcreteDataType::string_datatype(),
true,
),
]))
}
fn builder(&self) -> InformationSchemaKeyColumnUsageBuilder {
InformationSchemaKeyColumnUsageBuilder::new(
self.schema.clone(),
self.catalog_name.clone(),
self.catalog_manager.clone(),
)
}
}
impl InformationTable for InformationSchemaKeyColumnUsage {
fn table_id(&self) -> TableId {
INFORMATION_SCHEMA_KEY_COLUMN_USAGE_TABLE_ID
}
fn table_name(&self) -> &'static str {
KEY_COLUMN_USAGE
}
fn schema(&self) -> SchemaRef {
self.schema.clone()
}
fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_key_column_usage(Some(request))
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
}),
));
Ok(Box::pin(
RecordBatchStreamAdapter::try_new(stream)
.map_err(BoxedError::new)
.context(InternalSnafu)?,
))
}
}
/// Builds the `information_schema.KEY_COLUMN_USAGE` table row by row
///
/// Columns are based on <https://dev.mysql.com/doc/refman/8.2/en/information-schema-key-column-usage-table.html>
struct InformationSchemaKeyColumnUsageBuilder {
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
constraint_catalog: StringVectorBuilder,
constraint_schema: StringVectorBuilder,
constraint_name: StringVectorBuilder,
table_catalog: StringVectorBuilder,
table_schema: StringVectorBuilder,
table_name: StringVectorBuilder,
column_name: StringVectorBuilder,
ordinal_position: UInt32VectorBuilder,
position_in_unique_constraint: UInt32VectorBuilder,
}
impl InformationSchemaKeyColumnUsageBuilder {
fn new(
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
) -> Self {
Self {
schema,
catalog_name,
catalog_manager,
constraint_catalog: StringVectorBuilder::with_capacity(INIT_CAPACITY),
constraint_schema: StringVectorBuilder::with_capacity(INIT_CAPACITY),
constraint_name: StringVectorBuilder::with_capacity(INIT_CAPACITY),
table_catalog: StringVectorBuilder::with_capacity(INIT_CAPACITY),
table_schema: StringVectorBuilder::with_capacity(INIT_CAPACITY),
table_name: StringVectorBuilder::with_capacity(INIT_CAPACITY),
column_name: StringVectorBuilder::with_capacity(INIT_CAPACITY),
ordinal_position: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
position_in_unique_constraint: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
}
}
/// Construct the `information_schema.KEY_COLUMN_USAGE` virtual table
async fn make_key_column_usage(&mut self, request: Option<ScanRequest>) -> Result<RecordBatch> {
let catalog_name = self.catalog_name.clone();
let catalog_manager = self
.catalog_manager
.upgrade()
.context(UpgradeWeakCatalogManagerRefSnafu)?;
let predicates = Predicates::from_scan_request(&request);
let mut primary_constraints = vec![];
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
if !catalog_manager
.schema_exists(&catalog_name, &schema_name)
.await?
{
continue;
}
for table_name in catalog_manager
.table_names(&catalog_name, &schema_name)
.await?
{
if let Some(table) = catalog_manager
.table(&catalog_name, &schema_name, &table_name)
.await?
{
let keys = &table.table_info().meta.primary_key_indices;
let schema = table.schema();
for (idx, column) in schema.column_schemas().iter().enumerate() {
if column.is_time_index() {
self.add_key_column_usage(
&predicates,
&schema_name,
"TIME INDEX",
&schema_name,
&table_name,
&column.name,
1, //always 1 for time index
);
}
if keys.contains(&idx) {
primary_constraints.push((
schema_name.clone(),
table_name.clone(),
column.name.clone(),
));
}
// TODO(dimbtp): foreign key constraint not supported yet
}
} else {
unreachable!();
}
}
}
for (i, (schema_name, table_name, column_name)) in
primary_constraints.into_iter().enumerate()
{
self.add_key_column_usage(
&predicates,
&schema_name,
"PRIMARY",
&schema_name,
&table_name,
&column_name,
i as u32 + 1,
);
}
self.finish()
}
// TODO(dimbtp): Foreign key constraint has not `None` value for last 4
// fields, but it is not supported yet.
#[allow(clippy::too_many_arguments)]
fn add_key_column_usage(
&mut self,
predicates: &Predicates,
constraint_schema: &str,
constraint_name: &str,
table_schema: &str,
table_name: &str,
column_name: &str,
ordinal_position: u32,
) {
let row = [
(CONSTRAINT_SCHEMA, &Value::from(constraint_schema)),
(CONSTRAINT_NAME, &Value::from(constraint_name)),
(TABLE_SCHEMA, &Value::from(table_schema)),
(TABLE_NAME, &Value::from(table_name)),
(COLUMN_NAME, &Value::from(column_name)),
(ORDINAL_POSITION, &Value::from(ordinal_position)),
];
if !predicates.eval(&row) {
return;
}
self.constraint_catalog.push(Some("def"));
self.constraint_schema.push(Some(constraint_schema));
self.constraint_name.push(Some(constraint_name));
self.table_catalog.push(Some("def"));
self.table_schema.push(Some(table_schema));
self.table_name.push(Some(table_name));
self.column_name.push(Some(column_name));
self.ordinal_position.push(Some(ordinal_position));
self.position_in_unique_constraint.push(None);
}
fn finish(&mut self) -> Result<RecordBatch> {
let rows_num = self.table_catalog.len();
let null_string_vector = Arc::new(ConstantVector::new(
Arc::new(StringVector::from(vec![None as Option<&str>])),
rows_num,
));
let columns: Vec<VectorRef> = vec![
Arc::new(self.constraint_catalog.finish()),
Arc::new(self.constraint_schema.finish()),
Arc::new(self.constraint_name.finish()),
Arc::new(self.table_catalog.finish()),
Arc::new(self.table_schema.finish()),
Arc::new(self.table_name.finish()),
Arc::new(self.column_name.finish()),
Arc::new(self.ordinal_position.finish()),
Arc::new(self.position_in_unique_constraint.finish()),
null_string_vector.clone(),
null_string_vector.clone(),
null_string_vector,
];
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
}
}
impl DfPartitionStream for InformationSchemaKeyColumnUsage {
fn schema(&self) -> &ArrowSchemaRef {
self.schema.arrow_schema()
}
fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_key_column_usage(None)
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
}),
))
}
}

View File

@@ -26,7 +26,7 @@ use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatc
use datatypes::schema::SchemaRef;
use datatypes::vectors::VectorRef;
use snafu::ResultExt;
use store_api::storage::TableId;
use store_api::storage::{ScanRequest, TableId};
pub use tables::get_schema_columns;
use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
@@ -74,7 +74,7 @@ impl InformationTable for MemoryTable {
self.schema.clone()
}
fn to_stream(&self) -> Result<SendableRecordBatchStream> {
fn to_stream(&self, _request: ScanRequest) -> Result<SendableRecordBatchStream> {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
@@ -169,7 +169,7 @@ mod tests {
assert_eq!("test", table.table_name());
assert_eq!(schema, InformationTable::schema(&table));
let stream = table.to_stream().unwrap();
let stream = table.to_stream(ScanRequest::default()).unwrap();
let batches = RecordBatches::try_collect(stream).await.unwrap();
@@ -198,7 +198,7 @@ mod tests {
assert_eq!("test", table.table_name());
assert_eq!(schema, InformationTable::schema(&table));
let stream = table.to_stream().unwrap();
let stream = table.to_stream(ScanRequest::default()).unwrap();
let batches = RecordBatches::try_collect(stream).await.unwrap();

View File

@@ -17,12 +17,10 @@ use std::sync::Arc;
use common_catalog::consts::MITO_ENGINE;
use datatypes::prelude::{ConcreteDataType, VectorRef};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::vectors::StringVector;
use datatypes::vectors::{Int64Vector, StringVector};
use crate::information_schema::table_names::*;
const UNKNOWN: &str = "unknown";
/// Find the schema and columns by the table_name, only valid for memory tables.
/// Safety: the user MUST ensure the table schema exists, panic otherwise.
pub fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>) {
@@ -72,29 +70,340 @@ pub fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>) {
],
),
BUILD_INFO => (
string_columns(&[
"GIT_BRANCH",
"GIT_COMMIT",
"GIT_COMMIT_SHORT",
"GIT_DIRTY",
"PKG_VERSION",
]),
BUILD_INFO => {
let build_info = common_version::build_info();
(
string_columns(&[
"GIT_BRANCH",
"GIT_COMMIT",
"GIT_COMMIT_SHORT",
"GIT_DIRTY",
"PKG_VERSION",
]),
vec![
Arc::new(StringVector::from(vec![build_info.branch.to_string()])),
Arc::new(StringVector::from(vec![build_info.commit.to_string()])),
Arc::new(StringVector::from(vec![build_info
.commit_short
.to_string()])),
Arc::new(StringVector::from(vec![build_info.dirty.to_string()])),
Arc::new(StringVector::from(vec![build_info.version.to_string()])),
],
)
}
CHARACTER_SETS => (
vec![
Arc::new(StringVector::from(vec![
build_data::get_git_branch().unwrap_or_else(|_| UNKNOWN.to_string())
])),
Arc::new(StringVector::from(vec![
build_data::get_git_commit().unwrap_or_else(|_| UNKNOWN.to_string())
])),
Arc::new(StringVector::from(vec![
build_data::get_git_commit_short().unwrap_or_else(|_| UNKNOWN.to_string())
])),
Arc::new(StringVector::from(vec![
build_data::get_git_dirty().map_or(UNKNOWN.to_string(), |v| v.to_string())
])),
Arc::new(StringVector::from(vec![option_env!("CARGO_PKG_VERSION")])),
string_column("CHARACTER_SET_NAME"),
string_column("DEFAULT_COLLATE_NAME"),
string_column("DESCRIPTION"),
bigint_column("MAXLEN"),
],
vec![
Arc::new(StringVector::from(vec!["utf8"])),
Arc::new(StringVector::from(vec!["utf8_bin"])),
Arc::new(StringVector::from(vec!["UTF-8 Unicode"])),
Arc::new(Int64Vector::from_slice([4])),
],
),
COLLATIONS => (
vec![
string_column("COLLATION_NAME"),
string_column("CHARACTER_SET_NAME"),
bigint_column("ID"),
string_column("IS_DEFAULT"),
string_column("IS_COMPILED"),
bigint_column("SORTLEN"),
],
vec![
Arc::new(StringVector::from(vec!["utf8_bin"])),
Arc::new(StringVector::from(vec!["utf8"])),
Arc::new(Int64Vector::from_slice([1])),
Arc::new(StringVector::from(vec!["Yes"])),
Arc::new(StringVector::from(vec!["Yes"])),
Arc::new(Int64Vector::from_slice([1])),
],
),
COLLATION_CHARACTER_SET_APPLICABILITY => (
vec![
string_column("COLLATION_NAME"),
string_column("CHARACTER_SET_NAME"),
],
vec![
Arc::new(StringVector::from(vec!["utf8_bin"])),
Arc::new(StringVector::from(vec!["utf8"])),
],
),
CHECK_CONSTRAINTS => (
string_columns(&[
"CONSTRAINT_CATALOG",
"CONSTRAINT_SCHEMA",
"CONSTRAINT_NAME",
"CHECK_CLAUSE",
]),
// Not support check constraints yet
vec![],
),
EVENTS => (
vec![
string_column("EVENT_CATALOG"),
string_column("EVENT_SCHEMA"),
string_column("EVENT_NAME"),
string_column("DEFINER"),
string_column("TIME_ZONE"),
string_column("EVENT_BODY"),
string_column("EVENT_DEFINITION"),
string_column("EVENT_TYPE"),
datetime_column("EXECUTE_AT"),
bigint_column("INTERVAL_VALUE"),
string_column("INTERVAL_FIELD"),
string_column("SQL_MODE"),
datetime_column("STARTS"),
datetime_column("ENDS"),
string_column("STATUS"),
string_column("ON_COMPLETION"),
datetime_column("CREATED"),
datetime_column("LAST_ALTERED"),
datetime_column("LAST_EXECUTED"),
string_column("EVENT_COMMENT"),
bigint_column("ORIGINATOR"),
string_column("CHARACTER_SET_CLIENT"),
string_column("COLLATION_CONNECTION"),
string_column("DATABASE_COLLATION"),
],
vec![],
),
FILES => (
vec![
bigint_column("FILE_ID"),
string_column("FILE_NAME"),
string_column("FILE_TYPE"),
string_column("TABLESPACE_NAME"),
string_column("TABLE_CATALOG"),
string_column("TABLE_SCHEMA"),
string_column("TABLE_NAME"),
string_column("LOGFILE_GROUP_NAME"),
bigint_column("LOGFILE_GROUP_NUMBER"),
string_column("ENGINE"),
string_column("FULLTEXT_KEYS"),
bigint_column("DELETED_ROWS"),
bigint_column("UPDATE_COUNT"),
bigint_column("FREE_EXTENTS"),
bigint_column("TOTAL_EXTENTS"),
bigint_column("EXTENT_SIZE"),
bigint_column("INITIAL_SIZE"),
bigint_column("MAXIMUM_SIZE"),
bigint_column("AUTOEXTEND_SIZE"),
datetime_column("CREATION_TIME"),
datetime_column("LAST_UPDATE_TIME"),
datetime_column("LAST_ACCESS_TIME"),
datetime_column("RECOVER_TIME"),
bigint_column("TRANSACTION_COUNTER"),
string_column("VERSION"),
string_column("ROW_FORMAT"),
bigint_column("TABLE_ROWS"),
bigint_column("AVG_ROW_LENGTH"),
bigint_column("DATA_LENGTH"),
bigint_column("MAX_DATA_LENGTH"),
bigint_column("INDEX_LENGTH"),
bigint_column("DATA_FREE"),
datetime_column("CREATE_TIME"),
datetime_column("UPDATE_TIME"),
datetime_column("CHECK_TIME"),
string_column("CHECKSUM"),
string_column("STATUS"),
string_column("EXTRA"),
],
vec![],
),
OPTIMIZER_TRACE => (
vec![
string_column("QUERY"),
string_column("TRACE"),
bigint_column("MISSING_BYTES_BEYOND_MAX_MEM_SIZE"),
bigint_column("INSUFFICIENT_PRIVILEGES"),
],
vec![],
),
// MySQL(https://dev.mysql.com/doc/refman/8.2/en/information-schema-parameters-table.html)
// has the spec that is different from
// PostgreSQL(https://www.postgresql.org/docs/current/infoschema-parameters.html).
// Follow `MySQL` spec here.
PARAMETERS => (
vec![
string_column("SPECIFIC_CATALOG"),
string_column("SPECIFIC_SCHEMA"),
string_column("SPECIFIC_NAME"),
bigint_column("ORDINAL_POSITION"),
string_column("PARAMETER_MODE"),
string_column("PARAMETER_NAME"),
string_column("DATA_TYPE"),
bigint_column("CHARACTER_MAXIMUM_LENGTH"),
bigint_column("CHARACTER_OCTET_LENGTH"),
bigint_column("NUMERIC_PRECISION"),
bigint_column("NUMERIC_SCALE"),
bigint_column("DATETIME_PRECISION"),
string_column("CHARACTER_SET_NAME"),
string_column("COLLATION_NAME"),
string_column("DTD_IDENTIFIER"),
string_column("ROUTINE_TYPE"),
],
vec![],
),
PROFILING => (
vec![
bigint_column("QUERY_ID"),
bigint_column("SEQ"),
string_column("STATE"),
bigint_column("DURATION"),
bigint_column("CPU_USER"),
bigint_column("CPU_SYSTEM"),
bigint_column("CONTEXT_VOLUNTARY"),
bigint_column("CONTEXT_INVOLUNTARY"),
bigint_column("BLOCK_OPS_IN"),
bigint_column("BLOCK_OPS_OUT"),
bigint_column("MESSAGES_SENT"),
bigint_column("MESSAGES_RECEIVED"),
bigint_column("PAGE_FAULTS_MAJOR"),
bigint_column("PAGE_FAULTS_MINOR"),
bigint_column("SWAPS"),
string_column("SOURCE_FUNCTION"),
string_column("SOURCE_FILE"),
bigint_column("SOURCE_LINE"),
],
vec![],
),
// TODO: _Must_ reimplement this table when foreign key constraint is supported.
REFERENTIAL_CONSTRAINTS => (
vec![
string_column("CONSTRAINT_CATALOG"),
string_column("CONSTRAINT_SCHEMA"),
string_column("CONSTRAINT_NAME"),
string_column("UNIQUE_CONSTRAINT_CATALOG"),
string_column("UNIQUE_CONSTRAINT_SCHEMA"),
string_column("UNIQUE_CONSTRAINT_NAME"),
string_column("MATCH_OPTION"),
string_column("UPDATE_RULE"),
string_column("DELETE_RULE"),
string_column("TABLE_NAME"),
string_column("REFERENCED_TABLE_NAME"),
],
vec![],
),
ROUTINES => (
vec![
string_column("SPECIFIC_NAME"),
string_column("ROUTINE_CATALOG"),
string_column("ROUTINE_SCHEMA"),
string_column("ROUTINE_NAME"),
string_column("ROUTINE_TYPE"),
string_column("DATA_TYPE"),
bigint_column("CHARACTER_MAXIMUM_LENGTH"),
bigint_column("CHARACTER_OCTET_LENGTH"),
bigint_column("NUMERIC_PRECISION"),
bigint_column("NUMERIC_SCALE"),
bigint_column("DATETIME_PRECISION"),
string_column("CHARACTER_SET_NAME"),
string_column("COLLATION_NAME"),
string_column("DTD_IDENTIFIER"),
string_column("ROUTINE_BODY"),
string_column("ROUTINE_DEFINITION"),
string_column("EXTERNAL_NAME"),
string_column("EXTERNAL_LANGUAGE"),
string_column("PARAMETER_STYLE"),
string_column("IS_DETERMINISTIC"),
string_column("SQL_DATA_ACCESS"),
string_column("SQL_PATH"),
string_column("SECURITY_TYPE"),
datetime_column("CREATED"),
datetime_column("LAST_ALTERED"),
string_column("SQL_MODE"),
string_column("ROUTINE_COMMENT"),
string_column("DEFINER"),
string_column("CHARACTER_SET_CLIENT"),
string_column("COLLATION_CONNECTION"),
string_column("DATABASE_COLLATION"),
],
vec![],
),
SCHEMA_PRIVILEGES => (
vec![
string_column("GRANTEE"),
string_column("TABLE_CATALOG"),
string_column("TABLE_SCHEMA"),
string_column("PRIVILEGE_TYPE"),
string_column("IS_GRANTABLE"),
],
vec![],
),
TABLE_PRIVILEGES => (
vec![
string_column("GRANTEE"),
string_column("TABLE_CATALOG"),
string_column("TABLE_SCHEMA"),
string_column("TABLE_NAME"),
string_column("PRIVILEGE_TYPE"),
string_column("IS_GRANTABLE"),
],
vec![],
),
TRIGGERS => (
vec![
string_column("TRIGGER_CATALOG"),
string_column("TRIGGER_SCHEMA"),
string_column("TRIGGER_NAME"),
string_column("EVENT_MANIPULATION"),
string_column("EVENT_OBJECT_CATALOG"),
string_column("EVENT_OBJECT_SCHEMA"),
string_column("EVENT_OBJECT_TABLE"),
bigint_column("ACTION_ORDER"),
string_column("ACTION_CONDITION"),
string_column("ACTION_STATEMENT"),
string_column("ACTION_ORIENTATION"),
string_column("ACTION_TIMING"),
string_column("ACTION_REFERENCE_OLD_TABLE"),
string_column("ACTION_REFERENCE_NEW_TABLE"),
string_column("ACTION_REFERENCE_OLD_ROW"),
string_column("ACTION_REFERENCE_NEW_ROW"),
datetime_column("CREATED"),
string_column("SQL_MODE"),
string_column("DEFINER"),
string_column("CHARACTER_SET_CLIENT"),
string_column("COLLATION_CONNECTION"),
string_column("DATABASE_COLLATION"),
],
vec![],
),
// TODO: Considering store internal metrics in `global_status` and
// `session_status` tables.
GLOBAL_STATUS => (
vec![
string_column("VARIABLE_NAME"),
string_column("VARIABLE_VALUE"),
],
vec![],
),
SESSION_STATUS => (
vec![
string_column("VARIABLE_NAME"),
string_column("VARIABLE_VALUE"),
],
vec![],
),
_ => unreachable!("Unknown table in information_schema: {}", table_name),
@@ -115,6 +424,22 @@ fn string_column(name: &str) -> ColumnSchema {
)
}
fn bigint_column(name: &str) -> ColumnSchema {
ColumnSchema::new(
str::to_lowercase(name),
ConcreteDataType::int64_datatype(),
false,
)
}
fn datetime_column(name: &str) -> ColumnSchema {
ColumnSchema::new(
str::to_lowercase(name),
ConcreteDataType::datetime_datatype(),
false,
)
}
#[cfg(test)]
mod tests {
use super::*;

View File

@@ -0,0 +1,424 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use core::pin::pin;
use std::sync::{Arc, Weak};
use arrow_schema::SchemaRef as ArrowSchemaRef;
use common_catalog::consts::INFORMATION_SCHEMA_PARTITIONS_TABLE_ID;
use common_error::ext::BoxedError;
use common_query::physical_plan::TaskContext;
use common_recordbatch::adapter::RecordBatchStreamAdapter;
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
use common_time::datetime::DateTime;
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::value::Value;
use datatypes::vectors::{
ConstantVector, DateTimeVector, DateTimeVectorBuilder, Int64Vector, Int64VectorBuilder,
MutableVector, StringVector, StringVectorBuilder, UInt64VectorBuilder,
};
use futures::{StreamExt, TryStreamExt};
use partition::manager::PartitionInfo;
use partition::partition::PartitionDef;
use snafu::{OptionExt, ResultExt};
use store_api::storage::{RegionId, ScanRequest, TableId};
use table::metadata::{TableInfo, TableType};
use super::PARTITIONS;
use crate::error::{
CreateRecordBatchSnafu, FindPartitionsSnafu, InternalSnafu, Result,
UpgradeWeakCatalogManagerRefSnafu,
};
use crate::information_schema::{InformationTable, Predicates};
use crate::kvbackend::KvBackendCatalogManager;
use crate::CatalogManager;
const TABLE_CATALOG: &str = "table_catalog";
const TABLE_SCHEMA: &str = "table_schema";
const TABLE_NAME: &str = "table_name";
const PARTITION_NAME: &str = "partition_name";
const PARTITION_EXPRESSION: &str = "partition_expression";
/// The region id
const GREPTIME_PARTITION_ID: &str = "greptime_partition_id";
const INIT_CAPACITY: usize = 42;
/// The `PARTITIONS` table provides information about partitioned tables.
/// See https://dev.mysql.com/doc/refman/8.0/en/information-schema-partitions-table.html
/// We provide an extral column `greptime_partition_id` for GreptimeDB region id.
pub(super) struct InformationSchemaPartitions {
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
}
impl InformationSchemaPartitions {
pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
Self {
schema: Self::schema(),
catalog_name,
catalog_manager,
}
}
pub(crate) fn schema() -> SchemaRef {
Arc::new(Schema::new(vec![
ColumnSchema::new(TABLE_CATALOG, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(TABLE_SCHEMA, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(TABLE_NAME, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(PARTITION_NAME, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(
"subpartition_name",
ConcreteDataType::string_datatype(),
true,
),
ColumnSchema::new(
"partition_ordinal_position",
ConcreteDataType::int64_datatype(),
true,
),
ColumnSchema::new(
"subpartition_ordinal_position",
ConcreteDataType::int64_datatype(),
true,
),
ColumnSchema::new(
"partition_method",
ConcreteDataType::string_datatype(),
true,
),
ColumnSchema::new(
"subpartition_method",
ConcreteDataType::string_datatype(),
true,
),
ColumnSchema::new(
PARTITION_EXPRESSION,
ConcreteDataType::string_datatype(),
true,
),
ColumnSchema::new(
"subpartition_expression",
ConcreteDataType::string_datatype(),
true,
),
ColumnSchema::new(
"partition_description",
ConcreteDataType::string_datatype(),
true,
),
ColumnSchema::new("table_rows", ConcreteDataType::int64_datatype(), true),
ColumnSchema::new("avg_row_length", ConcreteDataType::int64_datatype(), true),
ColumnSchema::new("data_length", ConcreteDataType::int64_datatype(), true),
ColumnSchema::new("max_data_length", ConcreteDataType::int64_datatype(), true),
ColumnSchema::new("index_length", ConcreteDataType::int64_datatype(), true),
ColumnSchema::new("data_free", ConcreteDataType::int64_datatype(), true),
ColumnSchema::new("create_time", ConcreteDataType::datetime_datatype(), true),
ColumnSchema::new("update_time", ConcreteDataType::datetime_datatype(), true),
ColumnSchema::new("check_time", ConcreteDataType::datetime_datatype(), true),
ColumnSchema::new("checksum", ConcreteDataType::int64_datatype(), true),
ColumnSchema::new(
"partition_comment",
ConcreteDataType::string_datatype(),
true,
),
ColumnSchema::new("nodegroup", ConcreteDataType::string_datatype(), true),
ColumnSchema::new("tablespace_name", ConcreteDataType::string_datatype(), true),
ColumnSchema::new(
GREPTIME_PARTITION_ID,
ConcreteDataType::uint64_datatype(),
true,
),
]))
}
fn builder(&self) -> InformationSchemaPartitionsBuilder {
InformationSchemaPartitionsBuilder::new(
self.schema.clone(),
self.catalog_name.clone(),
self.catalog_manager.clone(),
)
}
}
impl InformationTable for InformationSchemaPartitions {
fn table_id(&self) -> TableId {
INFORMATION_SCHEMA_PARTITIONS_TABLE_ID
}
fn table_name(&self) -> &'static str {
PARTITIONS
}
fn schema(&self) -> SchemaRef {
self.schema.clone()
}
fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_partitions(Some(request))
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
}),
));
Ok(Box::pin(
RecordBatchStreamAdapter::try_new(stream)
.map_err(BoxedError::new)
.context(InternalSnafu)?,
))
}
}
struct InformationSchemaPartitionsBuilder {
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
catalog_names: StringVectorBuilder,
schema_names: StringVectorBuilder,
table_names: StringVectorBuilder,
partition_names: StringVectorBuilder,
partition_ordinal_positions: Int64VectorBuilder,
partition_expressions: StringVectorBuilder,
create_times: DateTimeVectorBuilder,
partition_ids: UInt64VectorBuilder,
}
impl InformationSchemaPartitionsBuilder {
fn new(
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
) -> Self {
Self {
schema,
catalog_name,
catalog_manager,
catalog_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
schema_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
table_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
partition_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
partition_ordinal_positions: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
partition_expressions: StringVectorBuilder::with_capacity(INIT_CAPACITY),
create_times: DateTimeVectorBuilder::with_capacity(INIT_CAPACITY),
partition_ids: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
}
}
/// Construct the `information_schema.partitions` virtual table
async fn make_partitions(&mut self, request: Option<ScanRequest>) -> Result<RecordBatch> {
let catalog_name = self.catalog_name.clone();
let catalog_manager = self
.catalog_manager
.upgrade()
.context(UpgradeWeakCatalogManagerRefSnafu)?;
let partition_manager = catalog_manager
.as_any()
.downcast_ref::<KvBackendCatalogManager>()
.map(|catalog_manager| catalog_manager.partition_manager());
let predicates = Predicates::from_scan_request(&request);
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
let table_info_stream = catalog_manager
.tables(&catalog_name, &schema_name)
.await
.try_filter_map(|t| async move {
let table_info = t.table_info();
if table_info.table_type == TableType::Temporary {
Ok(None)
} else {
Ok(Some(table_info))
}
});
const BATCH_SIZE: usize = 128;
// Split table infos into chunks
let mut table_info_chunks = pin!(table_info_stream.ready_chunks(BATCH_SIZE));
while let Some(table_infos) = table_info_chunks.next().await {
let table_infos = table_infos.into_iter().collect::<Result<Vec<_>>>()?;
let table_ids: Vec<TableId> =
table_infos.iter().map(|info| info.ident.table_id).collect();
let mut table_partitions = if let Some(partition_manager) = &partition_manager {
partition_manager
.batch_find_table_partitions(&table_ids)
.await
.context(FindPartitionsSnafu)?
} else {
// Current node must be a standalone instance, contains only one partition by default.
// TODO(dennis): change it when we support multi-regions for standalone.
table_ids
.into_iter()
.map(|table_id| {
(
table_id,
vec![PartitionInfo {
id: RegionId::new(table_id, 0),
partition: PartitionDef::new(vec![], vec![]),
}],
)
})
.collect()
};
for table_info in table_infos {
let partitions = table_partitions
.remove(&table_info.ident.table_id)
.unwrap_or(vec![]);
self.add_partitions(
&predicates,
&table_info,
&catalog_name,
&schema_name,
&table_info.name,
&partitions,
);
}
}
}
self.finish()
}
#[allow(clippy::too_many_arguments)]
fn add_partitions(
&mut self,
predicates: &Predicates,
table_info: &TableInfo,
catalog_name: &str,
schema_name: &str,
table_name: &str,
partitions: &[PartitionInfo],
) {
let row = [
(TABLE_CATALOG, &Value::from(catalog_name)),
(TABLE_SCHEMA, &Value::from(schema_name)),
(TABLE_NAME, &Value::from(table_name)),
];
if !predicates.eval(&row) {
return;
}
for (index, partition) in partitions.iter().enumerate() {
let partition_name = format!("p{index}");
self.catalog_names.push(Some(catalog_name));
self.schema_names.push(Some(schema_name));
self.table_names.push(Some(table_name));
self.partition_names.push(Some(&partition_name));
self.partition_ordinal_positions
.push(Some((index + 1) as i64));
let expressions = if partition.partition.partition_columns().is_empty() {
None
} else {
Some(partition.partition.to_string())
};
self.partition_expressions.push(expressions.as_deref());
self.create_times.push(Some(DateTime::from(
table_info.meta.created_on.timestamp_millis(),
)));
self.partition_ids.push(Some(partition.id.as_u64()));
}
}
fn finish(&mut self) -> Result<RecordBatch> {
let rows_num = self.catalog_names.len();
let null_string_vector = Arc::new(ConstantVector::new(
Arc::new(StringVector::from(vec![None as Option<&str>])),
rows_num,
));
let null_i64_vector = Arc::new(ConstantVector::new(
Arc::new(Int64Vector::from(vec![None])),
rows_num,
));
let null_datetime_vector = Arc::new(ConstantVector::new(
Arc::new(DateTimeVector::from(vec![None])),
rows_num,
));
let partition_methods = Arc::new(ConstantVector::new(
Arc::new(StringVector::from(vec![Some("RANGE")])),
rows_num,
));
let columns: Vec<VectorRef> = vec![
Arc::new(self.catalog_names.finish()),
Arc::new(self.schema_names.finish()),
Arc::new(self.table_names.finish()),
Arc::new(self.partition_names.finish()),
null_string_vector.clone(),
Arc::new(self.partition_ordinal_positions.finish()),
null_i64_vector.clone(),
partition_methods,
null_string_vector.clone(),
Arc::new(self.partition_expressions.finish()),
null_string_vector.clone(),
null_string_vector.clone(),
// TODO(dennis): rows and index statistics info
null_i64_vector.clone(),
null_i64_vector.clone(),
null_i64_vector.clone(),
null_i64_vector.clone(),
null_i64_vector.clone(),
null_i64_vector.clone(),
Arc::new(self.create_times.finish()),
// TODO(dennis): supports update_time
null_datetime_vector.clone(),
null_datetime_vector,
null_i64_vector,
null_string_vector.clone(),
null_string_vector.clone(),
null_string_vector,
Arc::new(self.partition_ids.finish()),
];
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
}
}
impl DfPartitionStream for InformationSchemaPartitions {
fn schema(&self) -> &ArrowSchemaRef {
self.schema.arrow_schema()
}
fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_partitions(None)
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
}),
))
}
}

View File

@@ -0,0 +1,609 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use arrow::array::StringArray;
use arrow::compute::kernels::comparison;
use common_query::logical_plan::DfExpr;
use datafusion::common::ScalarValue;
use datafusion::logical_expr::expr::Like;
use datafusion::logical_expr::Operator;
use datatypes::value::Value;
use store_api::storage::ScanRequest;
type ColumnName = String;
/// Predicate to filter `information_schema` tables stream,
/// we only support these simple predicates currently.
/// TODO(dennis): supports more predicate types.
#[derive(Clone, PartialEq, Eq, Debug)]
enum Predicate {
Eq(ColumnName, Value),
Like(ColumnName, String, bool),
NotEq(ColumnName, Value),
InList(ColumnName, Vec<Value>),
And(Box<Predicate>, Box<Predicate>),
Or(Box<Predicate>, Box<Predicate>),
Not(Box<Predicate>),
}
impl Predicate {
/// Evaluate the predicate with the row, returns:
/// - `None` when the predicate can't evaluate with the row.
/// - `Some(true)` when the predicate is satisfied,
/// - `Some(false)` when the predicate is not satisfied,
fn eval(&self, row: &[(&str, &Value)]) -> Option<bool> {
match self {
Predicate::Eq(c, v) => {
for (column, value) in row {
if c != column {
continue;
}
return Some(v == *value);
}
}
Predicate::Like(c, pattern, case_insensitive) => {
for (column, value) in row {
if c != column {
continue;
}
let Value::String(bs) = value else {
continue;
};
return like_utf8(bs.as_utf8(), pattern, case_insensitive);
}
}
Predicate::NotEq(c, v) => {
for (column, value) in row {
if c != column {
continue;
}
return Some(v != *value);
}
}
Predicate::InList(c, values) => {
for (column, value) in row {
if c != column {
continue;
}
return Some(values.iter().any(|v| v == *value));
}
}
Predicate::And(left, right) => {
let left = left.eval(row);
// short-circuit
if matches!(left, Some(false)) {
return Some(false);
}
return match (left, right.eval(row)) {
(Some(left), Some(right)) => Some(left && right),
(None, Some(false)) => Some(false),
_ => None,
};
}
Predicate::Or(left, right) => {
let left = left.eval(row);
// short-circuit
if matches!(left, Some(true)) {
return Some(true);
}
return match (left, right.eval(row)) {
(Some(left), Some(right)) => Some(left || right),
(None, Some(true)) => Some(true),
_ => None,
};
}
Predicate::Not(p) => {
let Some(b) = p.eval(row) else {
return None;
};
return Some(!b);
}
}
// Can't evaluate predicate with the row
None
}
/// Try to create a predicate from datafusion [`Expr`], return None if fails.
fn from_expr(expr: DfExpr) -> Option<Predicate> {
match expr {
// NOT expr
DfExpr::Not(expr) => {
let Some(p) = Self::from_expr(*expr) else {
return None;
};
Some(Predicate::Not(Box::new(p)))
}
// expr LIKE pattern
DfExpr::Like(Like {
negated,
expr,
pattern,
case_insensitive,
..
}) if is_column(&expr) && is_string_literal(&pattern) => {
// Safety: ensured by gurad
let DfExpr::Column(c) = *expr else {
unreachable!();
};
let DfExpr::Literal(ScalarValue::Utf8(Some(pattern))) = *pattern else {
unreachable!();
};
let p = Predicate::Like(c.name, pattern, case_insensitive);
if negated {
Some(Predicate::Not(Box::new(p)))
} else {
Some(p)
}
}
// left OP right
DfExpr::BinaryExpr(bin) => match (*bin.left, bin.op, *bin.right) {
// left == right
(DfExpr::Literal(scalar), Operator::Eq, DfExpr::Column(c))
| (DfExpr::Column(c), Operator::Eq, DfExpr::Literal(scalar)) => {
let Ok(v) = Value::try_from(scalar) else {
return None;
};
Some(Predicate::Eq(c.name, v))
}
// left != right
(DfExpr::Literal(scalar), Operator::NotEq, DfExpr::Column(c))
| (DfExpr::Column(c), Operator::NotEq, DfExpr::Literal(scalar)) => {
let Ok(v) = Value::try_from(scalar) else {
return None;
};
Some(Predicate::NotEq(c.name, v))
}
// left AND right
(left, Operator::And, right) => {
let Some(left) = Self::from_expr(left) else {
return None;
};
let Some(right) = Self::from_expr(right) else {
return None;
};
Some(Predicate::And(Box::new(left), Box::new(right)))
}
// left OR right
(left, Operator::Or, right) => {
let Some(left) = Self::from_expr(left) else {
return None;
};
let Some(right) = Self::from_expr(right) else {
return None;
};
Some(Predicate::Or(Box::new(left), Box::new(right)))
}
_ => None,
},
// [NOT] IN (LIST)
DfExpr::InList(list) => {
match (*list.expr, list.list, list.negated) {
// column [NOT] IN (v1, v2, v3, ...)
(DfExpr::Column(c), list, negated) if is_all_scalars(&list) => {
let mut values = Vec::with_capacity(list.len());
for scalar in list {
// Safety: checked by `is_all_scalars`
let DfExpr::Literal(scalar) = scalar else {
unreachable!();
};
let Ok(value) = Value::try_from(scalar) else {
return None;
};
values.push(value);
}
let predicate = Predicate::InList(c.name, values);
if negated {
Some(Predicate::Not(Box::new(predicate)))
} else {
Some(predicate)
}
}
_ => None,
}
}
_ => None,
}
}
}
/// Perform SQL left LIKE right, return `None` if fail to evaluate.
/// - `s` the target string
/// - `pattern` the pattern just like '%abc'
/// - `case_insensitive` whether to perform case-insensitive like or not.
fn like_utf8(s: &str, pattern: &str, case_insensitive: &bool) -> Option<bool> {
let array = StringArray::from(vec![s]);
let patterns = StringArray::new_scalar(pattern);
let Ok(booleans) = (if *case_insensitive {
comparison::ilike(&array, &patterns)
} else {
comparison::like(&array, &patterns)
}) else {
return None;
};
// Safety: at least one value in result
Some(booleans.value(0))
}
fn is_string_literal(expr: &DfExpr) -> bool {
matches!(expr, DfExpr::Literal(ScalarValue::Utf8(Some(_))))
}
fn is_column(expr: &DfExpr) -> bool {
matches!(expr, DfExpr::Column(_))
}
/// A list of predicate
pub struct Predicates {
predicates: Vec<Predicate>,
}
impl Predicates {
/// Try its best to create predicates from [`ScanRequest`].
pub fn from_scan_request(request: &Option<ScanRequest>) -> Predicates {
if let Some(request) = request {
let mut predicates = Vec::with_capacity(request.filters.len());
for filter in &request.filters {
if let Some(predicate) = Predicate::from_expr(filter.df_expr().clone()) {
predicates.push(predicate);
}
}
Self { predicates }
} else {
Self {
predicates: Vec::new(),
}
}
}
/// Evaluate the predicates with the row.
/// returns true when all the predicates are satisfied or can't be evaluated.
pub fn eval(&self, row: &[(&str, &Value)]) -> bool {
// fast path
if self.predicates.is_empty() {
return true;
}
self.predicates
.iter()
.filter_map(|p| p.eval(row))
.all(|b| b)
}
}
/// Returns true when the values are all [`DfExpr::Literal`].
fn is_all_scalars(list: &[DfExpr]) -> bool {
list.iter().all(|v| matches!(v, DfExpr::Literal(_)))
}
#[cfg(test)]
mod tests {
use datafusion::common::{Column, ScalarValue};
use datafusion::logical_expr::expr::InList;
use datafusion::logical_expr::BinaryExpr;
use super::*;
#[test]
fn test_predicate_eval() {
let a_col = "a".to_string();
let b_col = "b".to_string();
let a_value = Value::from("a_value");
let b_value = Value::from("b_value");
let wrong_value = Value::from("wrong_value");
let a_row = [(a_col.as_str(), &a_value)];
let b_row = [("b", &wrong_value)];
let wrong_row = [(a_col.as_str(), &wrong_value)];
// Predicate::Eq
let p = Predicate::Eq(a_col.clone(), a_value.clone());
assert!(p.eval(&a_row).unwrap());
assert!(p.eval(&b_row).is_none());
assert!(!p.eval(&wrong_row).unwrap());
// Predicate::NotEq
let p = Predicate::NotEq(a_col.clone(), a_value.clone());
assert!(!p.eval(&a_row).unwrap());
assert!(p.eval(&b_row).is_none());
assert!(p.eval(&wrong_row).unwrap());
// Predicate::InList
let p = Predicate::InList(a_col.clone(), vec![a_value.clone(), b_value.clone()]);
assert!(p.eval(&a_row).unwrap());
assert!(p.eval(&b_row).is_none());
assert!(!p.eval(&wrong_row).unwrap());
assert!(p.eval(&[(&a_col, &b_value)]).unwrap());
let p1 = Predicate::Eq(a_col.clone(), a_value.clone());
let p2 = Predicate::Eq(b_col.clone(), b_value.clone());
let row = [(a_col.as_str(), &a_value), (b_col.as_str(), &b_value)];
let wrong_row = [(a_col.as_str(), &a_value), (b_col.as_str(), &wrong_value)];
//Predicate::And
let p = Predicate::And(Box::new(p1.clone()), Box::new(p2.clone()));
assert!(p.eval(&row).unwrap());
assert!(!p.eval(&wrong_row).unwrap());
assert!(p.eval(&[]).is_none());
assert!(p.eval(&[("c", &a_value)]).is_none());
assert!(!p
.eval(&[(a_col.as_str(), &b_value), (b_col.as_str(), &a_value)])
.unwrap());
assert!(!p
.eval(&[(a_col.as_str(), &b_value), (b_col.as_str(), &b_value)])
.unwrap());
assert!(p
.eval(&[(a_col.as_ref(), &a_value), ("c", &a_value)])
.is_none());
assert!(!p
.eval(&[(a_col.as_ref(), &b_value), ("c", &a_value)])
.unwrap());
//Predicate::Or
let p = Predicate::Or(Box::new(p1), Box::new(p2));
assert!(p.eval(&row).unwrap());
assert!(p.eval(&wrong_row).unwrap());
assert!(p.eval(&[]).is_none());
assert!(p.eval(&[("c", &a_value)]).is_none());
assert!(!p
.eval(&[(a_col.as_str(), &b_value), (b_col.as_str(), &a_value)])
.unwrap());
assert!(p
.eval(&[(a_col.as_str(), &b_value), (b_col.as_str(), &b_value)])
.unwrap());
assert!(p
.eval(&[(a_col.as_ref(), &a_value), ("c", &a_value)])
.unwrap());
assert!(p
.eval(&[(a_col.as_ref(), &b_value), ("c", &a_value)])
.is_none());
}
#[test]
fn test_predicate_like() {
// case insensitive
let expr = DfExpr::Like(Like {
negated: false,
expr: Box::new(column("a")),
pattern: Box::new(string_literal("%abc")),
case_insensitive: true,
escape_char: None,
});
let p = Predicate::from_expr(expr).unwrap();
assert!(
matches!(&p, Predicate::Like(c, pattern, case_insensitive) if
c == "a"
&& pattern == "%abc"
&& *case_insensitive)
);
let match_row = [
("a", &Value::from("hello AbC")),
("b", &Value::from("b value")),
];
let unmatch_row = [("a", &Value::from("bca")), ("b", &Value::from("b value"))];
assert!(p.eval(&match_row).unwrap());
assert!(!p.eval(&unmatch_row).unwrap());
assert!(p.eval(&[]).is_none());
// case sensitive
let expr = DfExpr::Like(Like {
negated: false,
expr: Box::new(column("a")),
pattern: Box::new(string_literal("%abc")),
case_insensitive: false,
escape_char: None,
});
let p = Predicate::from_expr(expr).unwrap();
assert!(
matches!(&p, Predicate::Like(c, pattern, case_insensitive) if
c == "a"
&& pattern == "%abc"
&& !*case_insensitive)
);
assert!(!p.eval(&match_row).unwrap());
assert!(!p.eval(&unmatch_row).unwrap());
assert!(p.eval(&[]).is_none());
// not like
let expr = DfExpr::Like(Like {
negated: true,
expr: Box::new(column("a")),
pattern: Box::new(string_literal("%abc")),
case_insensitive: true,
escape_char: None,
});
let p = Predicate::from_expr(expr).unwrap();
assert!(!p.eval(&match_row).unwrap());
assert!(p.eval(&unmatch_row).unwrap());
assert!(p.eval(&[]).is_none());
}
fn column(name: &str) -> DfExpr {
DfExpr::Column(Column {
relation: None,
name: name.to_string(),
})
}
fn string_literal(v: &str) -> DfExpr {
DfExpr::Literal(ScalarValue::Utf8(Some(v.to_string())))
}
fn match_string_value(v: &Value, expected: &str) -> bool {
matches!(v, Value::String(bs) if bs.as_utf8() == expected)
}
fn match_string_values(vs: &[Value], expected: &[&str]) -> bool {
assert_eq!(vs.len(), expected.len());
let mut result = true;
for (i, v) in vs.iter().enumerate() {
result = result && match_string_value(v, expected[i]);
}
result
}
fn mock_exprs() -> (DfExpr, DfExpr) {
let expr1 = DfExpr::BinaryExpr(BinaryExpr {
left: Box::new(column("a")),
op: Operator::Eq,
right: Box::new(string_literal("a_value")),
});
let expr2 = DfExpr::BinaryExpr(BinaryExpr {
left: Box::new(column("b")),
op: Operator::NotEq,
right: Box::new(string_literal("b_value")),
});
(expr1, expr2)
}
#[test]
fn test_predicate_from_expr() {
let (expr1, expr2) = mock_exprs();
let p1 = Predicate::from_expr(expr1.clone()).unwrap();
assert!(matches!(&p1, Predicate::Eq(column, v) if column == "a"
&& match_string_value(v, "a_value")));
let p2 = Predicate::from_expr(expr2.clone()).unwrap();
assert!(matches!(&p2, Predicate::NotEq(column, v) if column == "b"
&& match_string_value(v, "b_value")));
let and_expr = DfExpr::BinaryExpr(BinaryExpr {
left: Box::new(expr1.clone()),
op: Operator::And,
right: Box::new(expr2.clone()),
});
let or_expr = DfExpr::BinaryExpr(BinaryExpr {
left: Box::new(expr1.clone()),
op: Operator::Or,
right: Box::new(expr2.clone()),
});
let not_expr = DfExpr::Not(Box::new(expr1.clone()));
let and_p = Predicate::from_expr(and_expr).unwrap();
assert!(matches!(and_p, Predicate::And(left, right) if *left == p1 && *right == p2));
let or_p = Predicate::from_expr(or_expr).unwrap();
assert!(matches!(or_p, Predicate::Or(left, right) if *left == p1 && *right == p2));
let not_p = Predicate::from_expr(not_expr).unwrap();
assert!(matches!(not_p, Predicate::Not(p) if *p == p1));
let inlist_expr = DfExpr::InList(InList {
expr: Box::new(column("a")),
list: vec![string_literal("a1"), string_literal("a2")],
negated: false,
});
let inlist_p = Predicate::from_expr(inlist_expr).unwrap();
assert!(matches!(&inlist_p, Predicate::InList(c, values) if c == "a"
&& match_string_values(values, &["a1", "a2"])));
let inlist_expr = DfExpr::InList(InList {
expr: Box::new(column("a")),
list: vec![string_literal("a1"), string_literal("a2")],
negated: true,
});
let inlist_p = Predicate::from_expr(inlist_expr).unwrap();
assert!(matches!(inlist_p, Predicate::Not(p) if
matches!(&*p,
Predicate::InList(c, values) if c == "a"
&& match_string_values(values, &["a1", "a2"]))));
}
#[test]
fn test_predicates_from_scan_request() {
let predicates = Predicates::from_scan_request(&None);
assert!(predicates.predicates.is_empty());
let (expr1, expr2) = mock_exprs();
let request = ScanRequest {
filters: vec![expr1.into(), expr2.into()],
..Default::default()
};
let predicates = Predicates::from_scan_request(&Some(request));
assert_eq!(2, predicates.predicates.len());
assert!(
matches!(&predicates.predicates[0], Predicate::Eq(column, v) if column == "a"
&& match_string_value(v, "a_value"))
);
assert!(
matches!(&predicates.predicates[1], Predicate::NotEq(column, v) if column == "b"
&& match_string_value(v, "b_value"))
);
}
#[test]
fn test_predicates_eval_row() {
let wrong_row = [
("a", &Value::from("a_value")),
("b", &Value::from("b_value")),
("c", &Value::from("c_value")),
];
let row = [
("a", &Value::from("a_value")),
("b", &Value::from("not_b_value")),
("c", &Value::from("c_value")),
];
let c_row = [("c", &Value::from("c_value"))];
// test empty predicates, always returns true
let predicates = Predicates::from_scan_request(&None);
assert!(predicates.eval(&row));
assert!(predicates.eval(&wrong_row));
assert!(predicates.eval(&c_row));
let (expr1, expr2) = mock_exprs();
let request = ScanRequest {
filters: vec![expr1.into(), expr2.into()],
..Default::default()
};
let predicates = Predicates::from_scan_request(&Some(request));
assert!(predicates.eval(&row));
assert!(!predicates.eval(&wrong_row));
assert!(predicates.eval(&c_row));
}
}

View File

@@ -0,0 +1,279 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use core::pin::pin;
use std::sync::{Arc, Weak};
use arrow_schema::SchemaRef as ArrowSchemaRef;
use common_catalog::consts::INFORMATION_SCHEMA_REGION_PEERS_TABLE_ID;
use common_error::ext::BoxedError;
use common_meta::rpc::router::RegionRoute;
use common_query::physical_plan::TaskContext;
use common_recordbatch::adapter::RecordBatchStreamAdapter;
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::value::Value;
use datatypes::vectors::{Int64VectorBuilder, StringVectorBuilder, UInt64VectorBuilder};
use futures::{StreamExt, TryStreamExt};
use snafu::{OptionExt, ResultExt};
use store_api::storage::{ScanRequest, TableId};
use table::metadata::TableType;
use super::REGION_PEERS;
use crate::error::{
CreateRecordBatchSnafu, FindRegionRoutesSnafu, InternalSnafu, Result,
UpgradeWeakCatalogManagerRefSnafu,
};
use crate::information_schema::{InformationTable, Predicates};
use crate::kvbackend::KvBackendCatalogManager;
use crate::CatalogManager;
const REGION_ID: &str = "region_id";
const PEER_ID: &str = "peer_id";
const PEER_ADDR: &str = "peer_addr";
const IS_LEADER: &str = "is_leader";
const STATUS: &str = "status";
const DOWN_SECONDS: &str = "down_seconds";
const INIT_CAPACITY: usize = 42;
/// The `REGION_PEERS` table provides information about the region distribution and routes. Including fields:
///
/// - `region_id`: the region id
/// - `peer_id`: the region storage datanode peer id
/// - `peer_addr`: the region storage datanode peer address
/// - `is_leader`: whether the peer is the leader
/// - `status`: the region status, `ALIVE` or `DOWNGRADED`.
/// - `down_seconds`: the duration of being offline, in seconds.
///
pub(super) struct InformationSchemaRegionPeers {
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
}
impl InformationSchemaRegionPeers {
pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
Self {
schema: Self::schema(),
catalog_name,
catalog_manager,
}
}
pub(crate) fn schema() -> SchemaRef {
Arc::new(Schema::new(vec![
ColumnSchema::new(REGION_ID, ConcreteDataType::uint64_datatype(), false),
ColumnSchema::new(PEER_ID, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new(PEER_ADDR, ConcreteDataType::string_datatype(), true),
ColumnSchema::new(IS_LEADER, ConcreteDataType::string_datatype(), true),
ColumnSchema::new(STATUS, ConcreteDataType::string_datatype(), true),
ColumnSchema::new(DOWN_SECONDS, ConcreteDataType::int64_datatype(), true),
]))
}
fn builder(&self) -> InformationSchemaRegionPeersBuilder {
InformationSchemaRegionPeersBuilder::new(
self.schema.clone(),
self.catalog_name.clone(),
self.catalog_manager.clone(),
)
}
}
impl InformationTable for InformationSchemaRegionPeers {
fn table_id(&self) -> TableId {
INFORMATION_SCHEMA_REGION_PEERS_TABLE_ID
}
fn table_name(&self) -> &'static str {
REGION_PEERS
}
fn schema(&self) -> SchemaRef {
self.schema.clone()
}
fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_region_peers(Some(request))
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
}),
));
Ok(Box::pin(
RecordBatchStreamAdapter::try_new(stream)
.map_err(BoxedError::new)
.context(InternalSnafu)?,
))
}
}
struct InformationSchemaRegionPeersBuilder {
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
region_ids: UInt64VectorBuilder,
peer_ids: UInt64VectorBuilder,
peer_addrs: StringVectorBuilder,
is_leaders: StringVectorBuilder,
statuses: StringVectorBuilder,
down_seconds: Int64VectorBuilder,
}
impl InformationSchemaRegionPeersBuilder {
fn new(
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
) -> Self {
Self {
schema,
catalog_name,
catalog_manager,
region_ids: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
peer_ids: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
peer_addrs: StringVectorBuilder::with_capacity(INIT_CAPACITY),
is_leaders: StringVectorBuilder::with_capacity(INIT_CAPACITY),
statuses: StringVectorBuilder::with_capacity(INIT_CAPACITY),
down_seconds: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
}
}
/// Construct the `information_schema.region_peers` virtual table
async fn make_region_peers(&mut self, request: Option<ScanRequest>) -> Result<RecordBatch> {
let catalog_name = self.catalog_name.clone();
let catalog_manager = self
.catalog_manager
.upgrade()
.context(UpgradeWeakCatalogManagerRefSnafu)?;
let partition_manager = catalog_manager
.as_any()
.downcast_ref::<KvBackendCatalogManager>()
.map(|catalog_manager| catalog_manager.partition_manager());
let predicates = Predicates::from_scan_request(&request);
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
let table_id_stream = catalog_manager
.tables(&catalog_name, &schema_name)
.await
.try_filter_map(|t| async move {
let table_info = t.table_info();
if table_info.table_type == TableType::Temporary {
Ok(None)
} else {
Ok(Some(table_info.ident.table_id))
}
});
const BATCH_SIZE: usize = 128;
// Split table ids into chunks
let mut table_id_chunks = pin!(table_id_stream.ready_chunks(BATCH_SIZE));
while let Some(table_ids) = table_id_chunks.next().await {
let table_ids = table_ids.into_iter().collect::<Result<Vec<_>>>()?;
let table_routes = if let Some(partition_manager) = &partition_manager {
partition_manager
.batch_find_region_routes(&table_ids)
.await
.context(FindRegionRoutesSnafu)?
} else {
table_ids.into_iter().map(|id| (id, vec![])).collect()
};
for routes in table_routes.values() {
self.add_region_peers(&predicates, routes);
}
}
}
self.finish()
}
fn add_region_peers(&mut self, predicates: &Predicates, routes: &[RegionRoute]) {
for route in routes {
let region_id = route.region.id.as_u64();
let peer_id = route.leader_peer.clone().map(|p| p.id);
let peer_addr = route.leader_peer.clone().map(|p| p.addr);
let status = if let Some(status) = route.leader_status {
Some(status.as_ref().to_string())
} else {
// Alive by default
Some("ALIVE".to_string())
};
let row = [(REGION_ID, &Value::from(region_id))];
if !predicates.eval(&row) {
return;
}
// TODO(dennis): adds followers.
self.region_ids.push(Some(region_id));
self.peer_ids.push(peer_id);
self.peer_addrs.push(peer_addr.as_deref());
self.is_leaders.push(Some("Yes"));
self.statuses.push(status.as_deref());
self.down_seconds
.push(route.leader_down_millis().map(|m| m / 1000));
}
}
fn finish(&mut self) -> Result<RecordBatch> {
let columns: Vec<VectorRef> = vec![
Arc::new(self.region_ids.finish()),
Arc::new(self.peer_ids.finish()),
Arc::new(self.peer_addrs.finish()),
Arc::new(self.is_leaders.finish()),
Arc::new(self.statuses.finish()),
Arc::new(self.down_seconds.finish()),
];
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
}
}
impl DfPartitionStream for InformationSchemaRegionPeers {
fn schema(&self) -> &ArrowSchemaRef {
self.schema.arrow_schema()
}
fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_region_peers(None)
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
}),
))
}
}

View File

@@ -0,0 +1,250 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use arrow_schema::SchemaRef as ArrowSchemaRef;
use common_catalog::consts::INFORMATION_SCHEMA_RUNTIME_METRICS_TABLE_ID;
use common_error::ext::BoxedError;
use common_query::physical_plan::TaskContext;
use common_recordbatch::adapter::RecordBatchStreamAdapter;
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
use common_time::util::current_time_millis;
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
use datatypes::prelude::{ConcreteDataType, MutableVector};
use datatypes::scalars::ScalarVectorBuilder;
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::vectors::{
ConstantVector, Float64VectorBuilder, StringVector, StringVectorBuilder,
TimestampMillisecondVector, VectorRef,
};
use itertools::Itertools;
use snafu::ResultExt;
use store_api::storage::{ScanRequest, TableId};
use super::{InformationTable, RUNTIME_METRICS};
use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
pub(super) struct InformationSchemaMetrics {
schema: SchemaRef,
}
const METRIC_NAME: &str = "metric_name";
const METRIC_VALUE: &str = "value";
const METRIC_LABELS: &str = "labels";
const NODE: &str = "node";
const NODE_TYPE: &str = "node_type";
const TIMESTAMP: &str = "timestamp";
/// The `information_schema.runtime_metrics` virtual table.
/// It provides the GreptimeDB runtime metrics for the users by SQL.
impl InformationSchemaMetrics {
pub(super) fn new() -> Self {
Self {
schema: Self::schema(),
}
}
fn schema() -> SchemaRef {
Arc::new(Schema::new(vec![
ColumnSchema::new(METRIC_NAME, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(METRIC_VALUE, ConcreteDataType::float64_datatype(), false),
ColumnSchema::new(METRIC_LABELS, ConcreteDataType::string_datatype(), true),
ColumnSchema::new(NODE, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(NODE_TYPE, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(
TIMESTAMP,
ConcreteDataType::timestamp_millisecond_datatype(),
false,
),
]))
}
fn builder(&self) -> InformationSchemaMetricsBuilder {
InformationSchemaMetricsBuilder::new(self.schema.clone())
}
}
impl InformationTable for InformationSchemaMetrics {
fn table_id(&self) -> TableId {
INFORMATION_SCHEMA_RUNTIME_METRICS_TABLE_ID
}
fn table_name(&self) -> &'static str {
RUNTIME_METRICS
}
fn schema(&self) -> SchemaRef {
self.schema.clone()
}
fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_metrics(Some(request))
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
}),
));
Ok(Box::pin(
RecordBatchStreamAdapter::try_new(stream)
.map_err(BoxedError::new)
.context(InternalSnafu)?,
))
}
}
struct InformationSchemaMetricsBuilder {
schema: SchemaRef,
metric_names: StringVectorBuilder,
metric_values: Float64VectorBuilder,
metric_labels: StringVectorBuilder,
}
impl InformationSchemaMetricsBuilder {
fn new(schema: SchemaRef) -> Self {
Self {
schema,
metric_names: StringVectorBuilder::with_capacity(42),
metric_values: Float64VectorBuilder::with_capacity(42),
metric_labels: StringVectorBuilder::with_capacity(42),
}
}
fn add_metric(&mut self, metric_name: &str, labels: String, metric_value: f64) {
self.metric_names.push(Some(metric_name));
self.metric_values.push(Some(metric_value));
self.metric_labels.push(Some(&labels));
}
async fn make_metrics(&mut self, _request: Option<ScanRequest>) -> Result<RecordBatch> {
let metric_families = prometheus::gather();
let write_request =
common_telemetry::metric::convert_metric_to_write_request(metric_families, None, 0);
for ts in write_request.timeseries {
//Safety: always has `__name__` label
let metric_name = ts
.labels
.iter()
.find_map(|label| {
if label.name == "__name__" {
Some(label.value.clone())
} else {
None
}
})
.unwrap();
self.add_metric(
&metric_name,
ts.labels
.into_iter()
.filter_map(|label| {
if label.name == "__name__" {
None
} else {
Some(format!("{}={}", label.name, label.value))
}
})
.join(", "),
// Safety: always has a sample
ts.samples[0].value,
);
}
self.finish()
}
fn finish(&mut self) -> Result<RecordBatch> {
let rows_num = self.metric_names.len();
let unknowns = Arc::new(ConstantVector::new(
Arc::new(StringVector::from(vec!["unknown"])),
rows_num,
));
let timestamps = Arc::new(ConstantVector::new(
Arc::new(TimestampMillisecondVector::from_slice([
current_time_millis(),
])),
rows_num,
));
let columns: Vec<VectorRef> = vec![
Arc::new(self.metric_names.finish()),
Arc::new(self.metric_values.finish()),
Arc::new(self.metric_labels.finish()),
// TODO(dennis): supports node and node_type for cluster
unknowns.clone(),
unknowns,
timestamps,
];
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
}
}
impl DfPartitionStream for InformationSchemaMetrics {
fn schema(&self) -> &ArrowSchemaRef {
self.schema.arrow_schema()
}
fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_metrics(None)
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
}),
))
}
}
#[cfg(test)]
mod tests {
use common_recordbatch::RecordBatches;
use super::*;
#[tokio::test]
async fn test_make_metrics() {
let metrics = InformationSchemaMetrics::new();
let stream = metrics.to_stream(ScanRequest::default()).unwrap();
let batches = RecordBatches::try_collect(stream).await.unwrap();
let result_literal = batches.pretty_print().unwrap();
assert!(result_literal.contains(METRIC_NAME));
assert!(result_literal.contains(METRIC_VALUE));
assert!(result_literal.contains(METRIC_LABELS));
assert!(result_literal.contains(NODE));
assert!(result_literal.contains(NODE_TYPE));
assert!(result_literal.contains(TIMESTAMP));
}
}

View File

@@ -0,0 +1,222 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::{Arc, Weak};
use arrow_schema::SchemaRef as ArrowSchemaRef;
use common_catalog::consts::INFORMATION_SCHEMA_SCHEMATA_TABLE_ID;
use common_error::ext::BoxedError;
use common_query::physical_plan::TaskContext;
use common_recordbatch::adapter::RecordBatchStreamAdapter;
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::value::Value;
use datatypes::vectors::StringVectorBuilder;
use snafu::{OptionExt, ResultExt};
use store_api::storage::{ScanRequest, TableId};
use super::SCHEMATA;
use crate::error::{
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
};
use crate::information_schema::{InformationTable, Predicates};
use crate::CatalogManager;
pub const CATALOG_NAME: &str = "catalog_name";
pub const SCHEMA_NAME: &str = "schema_name";
const DEFAULT_CHARACTER_SET_NAME: &str = "default_character_set_name";
const DEFAULT_COLLATION_NAME: &str = "default_collation_name";
const INIT_CAPACITY: usize = 42;
/// The `information_schema.schemata` table implementation.
pub(super) struct InformationSchemaSchemata {
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
}
impl InformationSchemaSchemata {
pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self {
Self {
schema: Self::schema(),
catalog_name,
catalog_manager,
}
}
pub(crate) fn schema() -> SchemaRef {
Arc::new(Schema::new(vec![
ColumnSchema::new(CATALOG_NAME, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(SCHEMA_NAME, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(
DEFAULT_CHARACTER_SET_NAME,
ConcreteDataType::string_datatype(),
false,
),
ColumnSchema::new(
DEFAULT_COLLATION_NAME,
ConcreteDataType::string_datatype(),
false,
),
ColumnSchema::new("sql_path", ConcreteDataType::string_datatype(), true),
]))
}
fn builder(&self) -> InformationSchemaSchemataBuilder {
InformationSchemaSchemataBuilder::new(
self.schema.clone(),
self.catalog_name.clone(),
self.catalog_manager.clone(),
)
}
}
impl InformationTable for InformationSchemaSchemata {
fn table_id(&self) -> TableId {
INFORMATION_SCHEMA_SCHEMATA_TABLE_ID
}
fn table_name(&self) -> &'static str {
SCHEMATA
}
fn schema(&self) -> SchemaRef {
self.schema.clone()
}
fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_schemata(Some(request))
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
}),
));
Ok(Box::pin(
RecordBatchStreamAdapter::try_new(stream)
.map_err(BoxedError::new)
.context(InternalSnafu)?,
))
}
}
/// Builds the `information_schema.schemata` table row by row
///
/// Columns are based on <https://docs.pingcap.com/tidb/stable/information-schema-schemata>
struct InformationSchemaSchemataBuilder {
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
catalog_names: StringVectorBuilder,
schema_names: StringVectorBuilder,
charset_names: StringVectorBuilder,
collation_names: StringVectorBuilder,
sql_paths: StringVectorBuilder,
}
impl InformationSchemaSchemataBuilder {
fn new(
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
) -> Self {
Self {
schema,
catalog_name,
catalog_manager,
catalog_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
schema_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
charset_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
collation_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
sql_paths: StringVectorBuilder::with_capacity(INIT_CAPACITY),
}
}
/// Construct the `information_schema.schemata` virtual table
async fn make_schemata(&mut self, request: Option<ScanRequest>) -> Result<RecordBatch> {
let catalog_name = self.catalog_name.clone();
let catalog_manager = self
.catalog_manager
.upgrade()
.context(UpgradeWeakCatalogManagerRefSnafu)?;
let predicates = Predicates::from_scan_request(&request);
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
self.add_schema(&predicates, &catalog_name, &schema_name);
}
self.finish()
}
fn add_schema(&mut self, predicates: &Predicates, catalog_name: &str, schema_name: &str) {
let row = [
(CATALOG_NAME, &Value::from(catalog_name)),
(SCHEMA_NAME, &Value::from(schema_name)),
(DEFAULT_CHARACTER_SET_NAME, &Value::from("utf8")),
(DEFAULT_COLLATION_NAME, &Value::from("utf8_bin")),
];
if !predicates.eval(&row) {
return;
}
self.catalog_names.push(Some(catalog_name));
self.schema_names.push(Some(schema_name));
self.charset_names.push(Some("utf8"));
self.collation_names.push(Some("utf8_bin"));
self.sql_paths.push(None);
}
fn finish(&mut self) -> Result<RecordBatch> {
let columns: Vec<VectorRef> = vec![
Arc::new(self.catalog_names.finish()),
Arc::new(self.schema_names.finish()),
Arc::new(self.charset_names.finish()),
Arc::new(self.collation_names.finish()),
Arc::new(self.sql_paths.finish()),
];
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
}
}
impl DfPartitionStream for InformationSchemaSchemata {
fn schema(&self) -> &ArrowSchemaRef {
self.schema.arrow_schema()
}
fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_schemata(None)
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
}),
))
}
}

View File

@@ -20,3 +20,24 @@ pub const ENGINES: &str = "engines";
pub const COLUMN_PRIVILEGES: &str = "column_privileges";
pub const COLUMN_STATISTICS: &str = "column_statistics";
pub const BUILD_INFO: &str = "build_info";
pub const CHARACTER_SETS: &str = "character_sets";
pub const COLLATIONS: &str = "collations";
pub const COLLATION_CHARACTER_SET_APPLICABILITY: &str = "collation_character_set_applicability";
pub const CHECK_CONSTRAINTS: &str = "check_constraints";
pub const EVENTS: &str = "events";
pub const FILES: &str = "files";
pub const SCHEMATA: &str = "schemata";
pub const KEY_COLUMN_USAGE: &str = "key_column_usage";
pub const OPTIMIZER_TRACE: &str = "optimizer_trace";
pub const PARAMETERS: &str = "parameters";
pub const PROFILING: &str = "profiling";
pub const REFERENTIAL_CONSTRAINTS: &str = "referential_constraints";
pub const ROUTINES: &str = "routines";
pub const SCHEMA_PRIVILEGES: &str = "schema_privileges";
pub const TABLE_PRIVILEGES: &str = "table_privileges";
pub const TRIGGERS: &str = "triggers";
pub const GLOBAL_STATUS: &str = "global_status";
pub const SESSION_STATUS: &str = "session_status";
pub const RUNTIME_METRICS: &str = "runtime_metrics";
pub const PARTITIONS: &str = "partitions";
pub const REGION_PEERS: &str = "greptime_region_peers";

View File

@@ -25,18 +25,28 @@ use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::value::Value;
use datatypes::vectors::{StringVectorBuilder, UInt32VectorBuilder};
use futures::TryStreamExt;
use snafu::{OptionExt, ResultExt};
use store_api::storage::TableId;
use store_api::storage::{ScanRequest, TableId};
use table::metadata::TableType;
use super::TABLES;
use crate::error::{
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
};
use crate::information_schema::InformationTable;
use crate::information_schema::{InformationTable, Predicates};
use crate::CatalogManager;
pub const TABLE_CATALOG: &str = "table_catalog";
pub const TABLE_SCHEMA: &str = "table_schema";
pub const TABLE_NAME: &str = "table_name";
pub const TABLE_TYPE: &str = "table_type";
const TABLE_ID: &str = "table_id";
const ENGINE: &str = "engine";
const INIT_CAPACITY: usize = 42;
pub(super) struct InformationSchemaTables {
schema: SchemaRef,
catalog_name: String,
@@ -54,12 +64,12 @@ impl InformationSchemaTables {
pub(crate) fn schema() -> SchemaRef {
Arc::new(Schema::new(vec![
ColumnSchema::new("table_catalog", ConcreteDataType::string_datatype(), false),
ColumnSchema::new("table_schema", ConcreteDataType::string_datatype(), false),
ColumnSchema::new("table_name", ConcreteDataType::string_datatype(), false),
ColumnSchema::new("table_type", ConcreteDataType::string_datatype(), false),
ColumnSchema::new("table_id", ConcreteDataType::uint32_datatype(), true),
ColumnSchema::new("engine", ConcreteDataType::string_datatype(), true),
ColumnSchema::new(TABLE_CATALOG, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(TABLE_SCHEMA, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(TABLE_NAME, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(TABLE_TYPE, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(TABLE_ID, ConcreteDataType::uint32_datatype(), true),
ColumnSchema::new(ENGINE, ConcreteDataType::string_datatype(), true),
]))
}
@@ -85,14 +95,14 @@ impl InformationTable for InformationSchemaTables {
self.schema.clone()
}
fn to_stream(&self) -> Result<SendableRecordBatchStream> {
fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_tables()
.make_tables(Some(request))
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
@@ -132,59 +142,48 @@ impl InformationSchemaTablesBuilder {
schema,
catalog_name,
catalog_manager,
catalog_names: StringVectorBuilder::with_capacity(42),
schema_names: StringVectorBuilder::with_capacity(42),
table_names: StringVectorBuilder::with_capacity(42),
table_types: StringVectorBuilder::with_capacity(42),
table_ids: UInt32VectorBuilder::with_capacity(42),
engines: StringVectorBuilder::with_capacity(42),
catalog_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
schema_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
table_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
table_types: StringVectorBuilder::with_capacity(INIT_CAPACITY),
table_ids: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
engines: StringVectorBuilder::with_capacity(INIT_CAPACITY),
}
}
/// Construct the `information_schema.tables` virtual table
async fn make_tables(&mut self) -> Result<RecordBatch> {
async fn make_tables(&mut self, request: Option<ScanRequest>) -> Result<RecordBatch> {
let catalog_name = self.catalog_name.clone();
let catalog_manager = self
.catalog_manager
.upgrade()
.context(UpgradeWeakCatalogManagerRefSnafu)?;
let predicates = Predicates::from_scan_request(&request);
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
if !catalog_manager
.schema_exists(&catalog_name, &schema_name)
.await?
{
continue;
}
let mut stream = catalog_manager.tables(&catalog_name, &schema_name).await;
for table_name in catalog_manager
.table_names(&catalog_name, &schema_name)
.await?
{
if let Some(table) = catalog_manager
.table(&catalog_name, &schema_name, &table_name)
.await?
{
let table_info = table.table_info();
self.add_table(
&catalog_name,
&schema_name,
&table_name,
table.table_type(),
Some(table_info.ident.table_id),
Some(&table_info.meta.engine),
);
} else {
unreachable!();
}
while let Some(table) = stream.try_next().await? {
let table_info = table.table_info();
self.add_table(
&predicates,
&catalog_name,
&schema_name,
&table_info.name,
table.table_type(),
Some(table_info.ident.table_id),
Some(&table_info.meta.engine),
);
}
}
self.finish()
}
#[allow(clippy::too_many_arguments)]
fn add_table(
&mut self,
predicates: &Predicates,
catalog_name: &str,
schema_name: &str,
table_name: &str,
@@ -192,14 +191,27 @@ impl InformationSchemaTablesBuilder {
table_id: Option<u32>,
engine: Option<&str>,
) {
self.catalog_names.push(Some(catalog_name));
self.schema_names.push(Some(schema_name));
self.table_names.push(Some(table_name));
self.table_types.push(Some(match table_type {
let table_type = match table_type {
TableType::Base => "BASE TABLE",
TableType::View => "VIEW",
TableType::Temporary => "LOCAL TEMPORARY",
}));
};
let row = [
(TABLE_CATALOG, &Value::from(catalog_name)),
(TABLE_SCHEMA, &Value::from(schema_name)),
(TABLE_NAME, &Value::from(table_name)),
(TABLE_TYPE, &Value::from(table_type)),
];
if !predicates.eval(&row) {
return;
}
self.catalog_names.push(Some(catalog_name));
self.schema_names.push(Some(schema_name));
self.table_names.push(Some(table_name));
self.table_types.push(Some(table_type));
self.table_ids.push(table_id);
self.engines.push(engine);
}
@@ -229,7 +241,7 @@ impl DfPartitionStream for InformationSchemaTables {
schema,
futures::stream::once(async move {
builder
.make_tables()
.make_tables(None)
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)

View File

@@ -12,11 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
pub use client::{CachedMetaKvBackend, MetaKvBackend};
pub use client::{CachedMetaKvBackend, CachedMetaKvBackendBuilder, MetaKvBackend};
mod client;
mod manager;
#[cfg(feature = "testing")]
pub mod mock;
pub use manager::KvBackendCatalogManager;

View File

@@ -14,8 +14,10 @@
use std::any::Any;
use std::fmt::Debug;
use std::sync::Arc;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::{Arc, Mutex};
use std::time::Duration;
use std::usize;
use common_error::ext::BoxedError;
use common_meta::cache_invalidator::KvCacheInvalidator;
@@ -33,18 +35,89 @@ use meta_client::client::MetaClient;
use moka::future::{Cache, CacheBuilder};
use snafu::{OptionExt, ResultExt};
use crate::metrics::{METRIC_CATALOG_KV_GET, METRIC_CATALOG_KV_REMOTE_GET};
use crate::metrics::{
METRIC_CATALOG_KV_BATCH_GET, METRIC_CATALOG_KV_GET, METRIC_CATALOG_KV_REMOTE_GET,
};
const CACHE_MAX_CAPACITY: u64 = 10000;
const CACHE_TTL_SECOND: u64 = 10 * 60;
const CACHE_TTI_SECOND: u64 = 5 * 60;
const DEFAULT_CACHE_MAX_CAPACITY: u64 = 10000;
const DEFAULT_CACHE_TTL: Duration = Duration::from_secs(10 * 60);
const DEFAULT_CACHE_TTI: Duration = Duration::from_secs(5 * 60);
pub type CacheBackendRef = Arc<Cache<Vec<u8>, KeyValue>>;
pub struct CachedMetaKvBackendBuilder {
cache_max_capacity: Option<u64>,
cache_ttl: Option<Duration>,
cache_tti: Option<Duration>,
meta_client: Arc<MetaClient>,
}
impl CachedMetaKvBackendBuilder {
pub fn new(meta_client: Arc<MetaClient>) -> Self {
Self {
cache_max_capacity: None,
cache_ttl: None,
cache_tti: None,
meta_client,
}
}
pub fn cache_max_capacity(mut self, cache_max_capacity: u64) -> Self {
self.cache_max_capacity.replace(cache_max_capacity);
self
}
pub fn cache_ttl(mut self, cache_ttl: Duration) -> Self {
self.cache_ttl.replace(cache_ttl);
self
}
pub fn cache_tti(mut self, cache_tti: Duration) -> Self {
self.cache_tti.replace(cache_tti);
self
}
pub fn build(self) -> CachedMetaKvBackend {
let cache_max_capacity = self
.cache_max_capacity
.unwrap_or(DEFAULT_CACHE_MAX_CAPACITY);
let cache_ttl = self.cache_ttl.unwrap_or(DEFAULT_CACHE_TTL);
let cache_tti = self.cache_tti.unwrap_or(DEFAULT_CACHE_TTI);
let cache = CacheBuilder::new(cache_max_capacity)
.time_to_live(cache_ttl)
.time_to_idle(cache_tti)
.build();
let kv_backend = Arc::new(MetaKvBackend {
client: self.meta_client,
});
let name = format!("CachedKvBackend({})", kv_backend.name());
let version = AtomicUsize::new(0);
CachedMetaKvBackend {
kv_backend,
cache,
name,
version,
}
}
}
pub type CacheBackend = Cache<Vec<u8>, KeyValue>;
/// A wrapper of `MetaKvBackend` with cache support.
///
/// CachedMetaKvBackend is mainly used to read metadata information from Metasrv, and provides
/// cache for get and batch_get. One way to trigger cache invalidation of CachedMetaKvBackend:
/// when metadata information changes, Metasrv will broadcast a metadata invalidation request.
///
/// Therefore, it is recommended to use CachedMetaKvBackend to only read metadata related
/// information. Note: If you read other information, you may read expired data, which depends on
/// TTL and TTI for cache.
pub struct CachedMetaKvBackend {
kv_backend: KvBackendRef,
cache: CacheBackendRef,
cache: CacheBackend,
name: String,
version: AtomicUsize,
}
impl TxnService for CachedMetaKvBackend {
@@ -96,7 +169,38 @@ impl KvBackend for CachedMetaKvBackend {
}
async fn batch_get(&self, req: BatchGetRequest) -> Result<BatchGetResponse> {
self.kv_backend.batch_get(req).await
let _timer = METRIC_CATALOG_KV_BATCH_GET.start_timer();
let mut kvs = Vec::with_capacity(req.keys.len());
let mut miss_keys = Vec::with_capacity(req.keys.len());
for key in req.keys {
if let Some(val) = self.cache.get(&key).await {
kvs.push(val);
} else {
miss_keys.push(key);
}
}
let batch_get_req = BatchGetRequest::new().with_keys(miss_keys.clone());
let pre_version = self.version();
let unhit_kvs = self.kv_backend.batch_get(batch_get_req).await?.kvs;
for kv in unhit_kvs.iter() {
self.cache.insert(kv.key().to_vec(), kv.clone()).await;
}
if !self.validate_version(pre_version) {
for key in miss_keys.iter() {
self.cache.invalidate(key).await;
}
}
kvs.extend(unhit_kvs);
Ok(BatchGetResponse { kvs })
}
async fn compare_and_put(&self, req: CompareAndPutRequest) -> Result<CompareAndPutResponse> {
@@ -154,8 +258,14 @@ impl KvBackend for CachedMetaKvBackend {
async fn get(&self, key: &[u8]) -> Result<Option<KeyValue>> {
let _timer = METRIC_CATALOG_KV_GET.start_timer();
let pre_version = Arc::new(Mutex::new(None));
let init = async {
let version_clone = pre_version.clone();
let _timer = METRIC_CATALOG_KV_REMOTE_GET.start_timer();
version_clone.lock().unwrap().replace(self.version());
self.kv_backend.get(key).await.map(|val| {
val.with_context(|| CacheNotGetSnafu {
key: String::from_utf8_lossy(key),
@@ -166,7 +276,7 @@ impl KvBackend for CachedMetaKvBackend {
// currently moka doesn't have `optionally_try_get_with_by_ref`
// TODO(fys): change to moka method when available
// https://github.com/moka-rs/moka/issues/254
match self.cache.try_get_with_by_ref(key, init).await {
let ret = match self.cache.try_get_with_by_ref(key, init).await {
Ok(val) => Ok(Some(val)),
Err(e) => match e.as_ref() {
CacheNotGet { .. } => Ok(None),
@@ -175,43 +285,65 @@ impl KvBackend for CachedMetaKvBackend {
}
.map_err(|e| GetKvCache {
err_msg: e.to_string(),
})
});
// "cache.invalidate_key" and "cache.try_get_with_by_ref" are not mutually exclusive. So we need
// to use the version mechanism to prevent expired data from being put into the cache.
if pre_version
.lock()
.unwrap()
.as_ref()
.map_or(false, |v| !self.validate_version(*v))
{
self.cache.invalidate(key).await;
}
ret
}
}
#[async_trait::async_trait]
impl KvCacheInvalidator for CachedMetaKvBackend {
async fn invalidate_key(&self, key: &[u8]) {
self.create_new_version();
self.cache.invalidate(key).await;
debug!("invalidated cache key: {}", String::from_utf8_lossy(key));
}
}
impl CachedMetaKvBackend {
pub fn new(client: Arc<MetaClient>) -> Self {
let kv_backend = Arc::new(MetaKvBackend { client });
Self::wrap(kv_backend)
}
pub fn wrap(kv_backend: KvBackendRef) -> Self {
let cache = Arc::new(
CacheBuilder::new(CACHE_MAX_CAPACITY)
.time_to_live(Duration::from_secs(CACHE_TTL_SECOND))
.time_to_idle(Duration::from_secs(CACHE_TTI_SECOND))
.build(),
);
// only for test
#[cfg(test)]
fn wrap(kv_backend: KvBackendRef) -> Self {
let cache = CacheBuilder::new(DEFAULT_CACHE_MAX_CAPACITY)
.time_to_live(DEFAULT_CACHE_TTL)
.time_to_idle(DEFAULT_CACHE_TTI)
.build();
let name = format!("CachedKvBackend({})", kv_backend.name());
Self {
kv_backend,
cache,
name,
version: AtomicUsize::new(0),
}
}
pub fn cache(&self) -> &CacheBackendRef {
pub fn cache(&self) -> &CacheBackend {
&self.cache
}
fn version(&self) -> usize {
self.version.load(Ordering::Relaxed)
}
fn validate_version(&self, pre_version: usize) -> bool {
self.version() == pre_version
}
fn create_new_version(&self) -> usize {
self.version.fetch_add(1, Ordering::Relaxed) + 1
}
}
#[derive(Debug)]
@@ -308,3 +440,162 @@ impl KvBackend for MetaKvBackend {
self
}
}
#[cfg(test)]
mod tests {
use std::any::Any;
use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::Arc;
use async_trait::async_trait;
use common_meta::kv_backend::{KvBackend, TxnService};
use common_meta::rpc::store::{
BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse,
BatchPutRequest, BatchPutResponse, CompareAndPutRequest, CompareAndPutResponse,
DeleteRangeRequest, DeleteRangeResponse, PutRequest, PutResponse, RangeRequest,
RangeResponse,
};
use common_meta::rpc::KeyValue;
use dashmap::DashMap;
use super::CachedMetaKvBackend;
#[derive(Default)]
pub struct SimpleKvBackend {
inner_map: DashMap<Vec<u8>, Vec<u8>>,
get_execute_times: Arc<AtomicU32>,
}
impl TxnService for SimpleKvBackend {
type Error = common_meta::error::Error;
}
#[async_trait]
impl KvBackend for SimpleKvBackend {
fn name(&self) -> &str {
"SimpleKvBackend"
}
fn as_any(&self) -> &dyn Any {
self
}
async fn batch_get(&self, req: BatchGetRequest) -> Result<BatchGetResponse, Self::Error> {
let mut kvs = Vec::with_capacity(req.keys.len());
for key in req.keys.iter() {
if let Some(kv) = self.get(key).await? {
kvs.push(kv);
}
}
Ok(BatchGetResponse { kvs })
}
async fn put(&self, req: PutRequest) -> Result<PutResponse, Self::Error> {
self.inner_map.insert(req.key, req.value);
// always return None as prev_kv, since we don't use it in this test.
Ok(PutResponse { prev_kv: None })
}
async fn get(&self, key: &[u8]) -> Result<Option<KeyValue>, Self::Error> {
self.get_execute_times
.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
Ok(self.inner_map.get(key).map(|v| KeyValue {
key: key.to_vec(),
value: v.value().clone(),
}))
}
async fn range(&self, _req: RangeRequest) -> Result<RangeResponse, Self::Error> {
todo!()
}
async fn batch_put(&self, _req: BatchPutRequest) -> Result<BatchPutResponse, Self::Error> {
todo!()
}
async fn compare_and_put(
&self,
_req: CompareAndPutRequest,
) -> Result<CompareAndPutResponse, Self::Error> {
todo!()
}
async fn delete_range(
&self,
_req: DeleteRangeRequest,
) -> Result<DeleteRangeResponse, Self::Error> {
todo!()
}
async fn batch_delete(
&self,
_req: BatchDeleteRequest,
) -> Result<BatchDeleteResponse, Self::Error> {
todo!()
}
}
#[tokio::test]
async fn test_cached_kv_backend() {
let simple_kv = Arc::new(SimpleKvBackend::default());
let get_execute_times = simple_kv.get_execute_times.clone();
let cached_kv = CachedMetaKvBackend::wrap(simple_kv);
add_some_vals(&cached_kv).await;
let batch_get_req = BatchGetRequest {
keys: vec![b"k1".to_vec(), b"k2".to_vec()],
};
assert_eq!(get_execute_times.load(Ordering::SeqCst), 0);
for _ in 0..10 {
let _batch_get_resp = cached_kv.batch_get(batch_get_req.clone()).await.unwrap();
assert_eq!(get_execute_times.load(Ordering::SeqCst), 2);
}
let batch_get_req = BatchGetRequest {
keys: vec![b"k1".to_vec(), b"k2".to_vec(), b"k3".to_vec()],
};
let _batch_get_resp = cached_kv.batch_get(batch_get_req.clone()).await.unwrap();
assert_eq!(get_execute_times.load(Ordering::SeqCst), 3);
for _ in 0..10 {
let _batch_get_resp = cached_kv.batch_get(batch_get_req.clone()).await.unwrap();
assert_eq!(get_execute_times.load(Ordering::SeqCst), 3);
}
}
async fn add_some_vals(kv_backend: &impl KvBackend) {
kv_backend
.put(PutRequest {
key: b"k1".to_vec(),
value: b"v1".to_vec(),
prev_kv: false,
})
.await
.unwrap();
kv_backend
.put(PutRequest {
key: b"k2".to_vec(),
value: b"v2".to_vec(),
prev_kv: false,
})
.await
.unwrap();
kv_backend
.put(PutRequest {
key: b"k3".to_vec(),
value: b"v3".to_vec(),
prev_kv: false,
})
.await
.unwrap();
}
}

View File

@@ -15,18 +15,27 @@
use std::any::Any;
use std::collections::BTreeSet;
use std::sync::{Arc, Weak};
use std::time::Duration;
use common_catalog::consts::{DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, NUMBERS_TABLE_ID};
use async_stream::try_stream;
use common_catalog::consts::{
DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, NUMBERS_TABLE_ID,
};
use common_catalog::format_full_table_name;
use common_error::ext::BoxedError;
use common_meta::cache_invalidator::{CacheInvalidator, CacheInvalidatorRef, Context};
use common_meta::error::Result as MetaResult;
use common_meta::key::catalog_name::CatalogNameKey;
use common_meta::key::schema_name::SchemaNameKey;
use common_meta::key::table_info::TableInfoValue;
use common_meta::key::table_name::TableNameKey;
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
use common_meta::kv_backend::KvBackendRef;
use common_meta::table_name::TableName;
use futures_util::TryStreamExt;
use futures_util::stream::BoxStream;
use futures_util::{StreamExt, TryStreamExt};
use moka::future::{Cache as AsyncCache, CacheBuilder};
use moka::sync::Cache;
use partition::manager::{PartitionRuleManager, PartitionRuleManagerRef};
use snafu::prelude::*;
use table::dist_table::DistTable;
@@ -34,9 +43,10 @@ use table::metadata::TableId;
use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME};
use table::TableRef;
use crate::error::Error::{GetTableCache, TableCacheNotGet};
use crate::error::{
self as catalog_err, ListCatalogsSnafu, ListSchemasSnafu, Result as CatalogResult,
TableMetadataManagerSnafu,
self as catalog_err, ListCatalogsSnafu, ListSchemasSnafu, ListTablesSnafu,
Result as CatalogResult, TableCacheNotGetSnafu, TableMetadataManagerSnafu,
};
use crate::information_schema::InformationSchemaProvider;
use crate::CatalogManager;
@@ -56,23 +66,45 @@ pub struct KvBackendCatalogManager {
table_metadata_manager: TableMetadataManagerRef,
/// A sub-CatalogManager that handles system tables
system_catalog: SystemCatalog,
table_cache: AsyncCache<String, TableRef>,
}
fn make_table(table_info_value: TableInfoValue) -> CatalogResult<TableRef> {
let table_info = table_info_value
.table_info
.try_into()
.context(catalog_err::InvalidTableInfoInCatalogSnafu)?;
Ok(DistTable::table(Arc::new(table_info)))
}
#[async_trait::async_trait]
impl CacheInvalidator for KvBackendCatalogManager {
async fn invalidate_table_name(&self, ctx: &Context, table_name: TableName) -> MetaResult<()> {
self.cache_invalidator
.invalidate_table_name(ctx, table_name)
.await
}
async fn invalidate_table_id(&self, ctx: &Context, table_id: TableId) -> MetaResult<()> {
self.cache_invalidator
.invalidate_table_id(ctx, table_id)
.await
}
async fn invalidate_table_name(&self, ctx: &Context, table_name: TableName) -> MetaResult<()> {
let table_cache_key = format_full_table_name(
&table_name.catalog_name,
&table_name.schema_name,
&table_name.table_name,
);
self.cache_invalidator
.invalidate_table_name(ctx, table_name)
.await?;
self.table_cache.invalidate(&table_cache_key).await;
Ok(())
}
}
const CATALOG_CACHE_MAX_CAPACITY: u64 = 128;
const TABLE_CACHE_MAX_CAPACITY: u64 = 65536;
const TABLE_CACHE_TTL: Duration = Duration::from_secs(10 * 60);
const TABLE_CACHE_TTI: Duration = Duration::from_secs(5 * 60);
impl KvBackendCatalogManager {
pub fn new(backend: KvBackendRef, cache_invalidator: CacheInvalidatorRef) -> Arc<Self> {
Arc::new_cyclic(|me| Self {
@@ -81,12 +113,16 @@ impl KvBackendCatalogManager {
cache_invalidator,
system_catalog: SystemCatalog {
catalog_manager: me.clone(),
catalog_cache: Cache::new(CATALOG_CACHE_MAX_CAPACITY),
information_schema_provider: Arc::new(InformationSchemaProvider::new(
// The catalog name is not used in system_catalog, so let it empty
"".to_string(),
DEFAULT_CATALOG_NAME.to_string(),
me.clone(),
)),
},
table_cache: CacheBuilder::new(TABLE_CACHE_MAX_CAPACITY)
.time_to_live(TABLE_CACHE_TTL)
.time_to_idle(TABLE_CACHE_TTI)
.build(),
})
}
@@ -101,6 +137,10 @@ impl KvBackendCatalogManager {
#[async_trait::async_trait]
impl CatalogManager for KvBackendCatalogManager {
fn as_any(&self) -> &dyn Any {
self
}
async fn catalog_names(&self) -> CatalogResult<Vec<String>> {
let stream = self
.table_metadata_manager
@@ -135,18 +175,22 @@ impl CatalogManager for KvBackendCatalogManager {
}
async fn table_names(&self, catalog: &str, schema: &str) -> CatalogResult<Vec<String>> {
let mut tables = self
let stream = self
.table_metadata_manager
.table_name_manager()
.tables(catalog, schema)
.await;
let mut tables = stream
.try_collect::<Vec<_>>()
.await
.context(TableMetadataManagerSnafu)?
.map_err(BoxedError::new)
.context(ListTablesSnafu { catalog, schema })?
.into_iter()
.map(|(k, _)| k)
.collect::<Vec<String>>();
.collect::<Vec<_>>();
tables.extend_from_slice(&self.system_catalog.table_names(schema));
Ok(tables)
Ok(tables.into_iter().collect())
}
async fn catalog_exists(&self, catalog: &str) -> CatalogResult<bool> {
@@ -193,39 +237,101 @@ impl CatalogManager for KvBackendCatalogManager {
return Ok(Some(table));
}
let key = TableNameKey::new(catalog, schema, table_name);
let Some(table_name_value) = self
.table_metadata_manager
.table_name_manager()
.get(key)
.await
.context(TableMetadataManagerSnafu)?
else {
return Ok(None);
};
let table_id = table_name_value.table_id();
let init = async {
let table_name_key = TableNameKey::new(catalog, schema, table_name);
let Some(table_name_value) = self
.table_metadata_manager
.table_name_manager()
.get(table_name_key)
.await
.context(TableMetadataManagerSnafu)?
else {
return TableCacheNotGetSnafu {
key: table_name_key.to_string(),
}
.fail();
};
let table_id = table_name_value.table_id();
let Some(table_info_value) = self
.table_metadata_manager
.table_info_manager()
.get(table_id)
.await
.context(TableMetadataManagerSnafu)?
.map(|v| v.into_inner())
else {
return Ok(None);
let Some(table_info_value) = self
.table_metadata_manager
.table_info_manager()
.get(table_id)
.await
.context(TableMetadataManagerSnafu)?
.map(|v| v.into_inner())
else {
return TableCacheNotGetSnafu {
key: table_name_key.to_string(),
}
.fail();
};
make_table(table_info_value)
};
let table_info = Arc::new(
table_info_value
.table_info
.try_into()
.context(catalog_err::InvalidTableInfoInCatalogSnafu)?,
);
Ok(Some(DistTable::table(table_info)))
match self
.table_cache
.try_get_with_by_ref(&format_full_table_name(catalog, schema, table_name), init)
.await
{
Ok(table) => Ok(Some(table)),
Err(err) => match err.as_ref() {
TableCacheNotGet { .. } => Ok(None),
_ => Err(err),
},
}
.map_err(|err| GetTableCache {
err_msg: err.to_string(),
})
}
fn as_any(&self) -> &dyn Any {
self
async fn tables<'a>(
&'a self,
catalog: &'a str,
schema: &'a str,
) -> BoxStream<'a, CatalogResult<TableRef>> {
let sys_tables = try_stream!({
// System tables
let sys_table_names = self.system_catalog.table_names(schema);
for table_name in sys_table_names {
if let Some(table) = self.system_catalog.table(catalog, schema, &table_name) {
yield table;
}
}
});
let table_id_stream = self
.table_metadata_manager
.table_name_manager()
.tables(catalog, schema)
.await
.map_ok(|(_, v)| v.table_id());
const BATCH_SIZE: usize = 128;
let user_tables = try_stream!({
// Split table ids into chunks
let mut table_id_chunks = table_id_stream.ready_chunks(BATCH_SIZE);
while let Some(table_ids) = table_id_chunks.next().await {
let table_ids = table_ids
.into_iter()
.collect::<Result<Vec<_>, _>>()
.map_err(BoxedError::new)
.context(ListTablesSnafu { catalog, schema })?;
let table_info_values = self
.table_metadata_manager
.table_info_manager()
.batch_get(&table_ids)
.await
.context(TableMetadataManagerSnafu)?;
for table_info_value in table_info_values.into_values() {
yield make_table(table_info_value)?;
}
}
});
Box::pin(sys_tables.chain(user_tables))
}
}
@@ -238,6 +344,7 @@ impl CatalogManager for KvBackendCatalogManager {
#[derive(Clone)]
struct SystemCatalog {
catalog_manager: Weak<KvBackendCatalogManager>,
catalog_cache: Cache<String, Arc<InformationSchemaProvider>>,
information_schema_provider: Arc<InformationSchemaProvider>,
}
@@ -273,7 +380,12 @@ impl SystemCatalog {
fn table(&self, catalog: &str, schema: &str, table_name: &str) -> Option<TableRef> {
if schema == INFORMATION_SCHEMA_NAME {
let information_schema_provider =
InformationSchemaProvider::new(catalog.to_string(), self.catalog_manager.clone());
self.catalog_cache.get_with_by_ref(catalog, move || {
Arc::new(InformationSchemaProvider::new(
catalog.to_string(),
self.catalog_manager.clone(),
))
});
information_schema_provider.table(table_name)
} else if schema == DEFAULT_SCHEMA_NAME && table_name == NUMBERS_TABLE_NAME {
Some(NumbersTable::table(NUMBERS_TABLE_ID))

View File

@@ -1,128 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::sync::{Arc, RwLock as StdRwLock};
use common_recordbatch::RecordBatch;
use datatypes::data_type::ConcreteDataType;
use datatypes::schema::{ColumnSchema, Schema};
use datatypes::vectors::StringVector;
use table::engine::{CloseTableResult, EngineContext, TableEngine};
use table::metadata::TableId;
use table::requests::{
AlterTableRequest, CloseTableRequest, CreateTableRequest, DropTableRequest, OpenTableRequest,
TruncateTableRequest,
};
use table::test_util::MemTable;
use table::TableRef;
#[derive(Default)]
pub struct MockTableEngine {
tables: StdRwLock<HashMap<TableId, TableRef>>,
}
#[async_trait::async_trait]
impl TableEngine for MockTableEngine {
fn name(&self) -> &str {
"MockTableEngine"
}
/// Create a table with only one column
async fn create_table(
&self,
_ctx: &EngineContext,
request: CreateTableRequest,
) -> table::Result<TableRef> {
let table_id = request.id;
let schema = Arc::new(Schema::new(vec![ColumnSchema::new(
"name",
ConcreteDataType::string_datatype(),
true,
)]));
let data = vec![Arc::new(StringVector::from(vec!["a", "b", "c"])) as _];
let record_batch = RecordBatch::new(schema, data).unwrap();
let table = MemTable::new_with_catalog(
&request.table_name,
record_batch,
table_id,
request.catalog_name,
request.schema_name,
vec![0],
);
let mut tables = self.tables.write().unwrap();
let _ = tables.insert(table_id, table.clone() as TableRef);
Ok(table)
}
async fn open_table(
&self,
_ctx: &EngineContext,
request: OpenTableRequest,
) -> table::Result<Option<TableRef>> {
Ok(self.tables.read().unwrap().get(&request.table_id).cloned())
}
async fn alter_table(
&self,
_ctx: &EngineContext,
_request: AlterTableRequest,
) -> table::Result<TableRef> {
unimplemented!()
}
fn get_table(
&self,
_ctx: &EngineContext,
table_id: TableId,
) -> table::Result<Option<TableRef>> {
Ok(self.tables.read().unwrap().get(&table_id).cloned())
}
fn table_exists(&self, _ctx: &EngineContext, table_id: TableId) -> bool {
self.tables.read().unwrap().contains_key(&table_id)
}
async fn drop_table(
&self,
_ctx: &EngineContext,
_request: DropTableRequest,
) -> table::Result<bool> {
unimplemented!()
}
async fn close_table(
&self,
_ctx: &EngineContext,
request: CloseTableRequest,
) -> table::Result<CloseTableResult> {
let _ = self.tables.write().unwrap().remove(&request.table_id);
Ok(CloseTableResult::Released(vec![]))
}
async fn close(&self) -> table::Result<()> {
Ok(())
}
async fn truncate_table(
&self,
_ctx: &EngineContext,
_request: TruncateTableRequest,
) -> table::Result<bool> {
Ok(true)
}
}

View File

@@ -20,6 +20,7 @@ use std::fmt::{Debug, Formatter};
use std::sync::Arc;
use futures::future::BoxFuture;
use futures_util::stream::BoxStream;
use table::metadata::TableId;
use table::requests::CreateTableRequest;
use table::TableRef;
@@ -56,6 +57,13 @@ pub trait CatalogManager: Send + Sync {
schema: &str,
table_name: &str,
) -> Result<Option<TableRef>>;
/// Returns all tables with a stream by catalog and schema.
async fn tables<'a>(
&'a self,
catalog: &'a str,
schema: &'a str,
) -> BoxStream<'a, Result<TableRef>>;
}
pub type CatalogManagerRef = Arc<dyn CatalogManager>;

View File

@@ -17,10 +17,12 @@ use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::sync::{Arc, RwLock, Weak};
use async_stream::{stream, try_stream};
use common_catalog::build_db_string;
use common_catalog::consts::{
DEFAULT_CATALOG_NAME, DEFAULT_PRIVATE_SCHEMA_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME,
};
use futures_util::stream::BoxStream;
use snafu::OptionExt;
use table::TableRef;
@@ -39,10 +41,64 @@ pub struct MemoryCatalogManager {
#[async_trait::async_trait]
impl CatalogManager for MemoryCatalogManager {
fn as_any(&self) -> &dyn Any {
self
}
async fn catalog_names(&self) -> Result<Vec<String>> {
Ok(self.catalogs.read().unwrap().keys().cloned().collect())
}
async fn schema_names(&self, catalog: &str) -> Result<Vec<String>> {
Ok(self
.catalogs
.read()
.unwrap()
.get(catalog)
.with_context(|| CatalogNotFoundSnafu {
catalog_name: catalog,
})?
.keys()
.cloned()
.collect())
}
async fn table_names(&self, catalog: &str, schema: &str) -> Result<Vec<String>> {
Ok(self
.catalogs
.read()
.unwrap()
.get(catalog)
.with_context(|| CatalogNotFoundSnafu {
catalog_name: catalog,
})?
.get(schema)
.with_context(|| SchemaNotFoundSnafu { catalog, schema })?
.keys()
.cloned()
.collect())
}
async fn catalog_exists(&self, catalog: &str) -> Result<bool> {
self.catalog_exist_sync(catalog)
}
async fn schema_exists(&self, catalog: &str, schema: &str) -> Result<bool> {
self.schema_exist_sync(catalog, schema)
}
async fn table_exists(&self, catalog: &str, schema: &str, table: &str) -> Result<bool> {
let catalogs = self.catalogs.read().unwrap();
Ok(catalogs
.get(catalog)
.with_context(|| CatalogNotFoundSnafu {
catalog_name: catalog,
})?
.get(schema)
.with_context(|| SchemaNotFoundSnafu { catalog, schema })?
.contains_key(table))
}
async fn table(
&self,
catalog: &str,
@@ -61,57 +117,35 @@ impl CatalogManager for MemoryCatalogManager {
Ok(result)
}
async fn catalog_exists(&self, catalog: &str) -> Result<bool> {
self.catalog_exist_sync(catalog)
}
async fn table_exists(&self, catalog: &str, schema: &str, table: &str) -> Result<bool> {
async fn tables<'a>(
&'a self,
catalog: &'a str,
schema: &'a str,
) -> BoxStream<'a, Result<TableRef>> {
let catalogs = self.catalogs.read().unwrap();
Ok(catalogs
.get(catalog)
.with_context(|| CatalogNotFoundSnafu {
catalog_name: catalog,
})?
.get(schema)
.with_context(|| SchemaNotFoundSnafu { catalog, schema })?
.contains_key(table))
}
async fn catalog_names(&self) -> Result<Vec<String>> {
Ok(self.catalogs.read().unwrap().keys().cloned().collect())
}
let Some(schemas) = catalogs.get(catalog) else {
return Box::pin(stream!({
yield CatalogNotFoundSnafu {
catalog_name: catalog,
}
.fail();
}));
};
async fn schema_names(&self, catalog_name: &str) -> Result<Vec<String>> {
Ok(self
.catalogs
.read()
.unwrap()
.get(catalog_name)
.with_context(|| CatalogNotFoundSnafu { catalog_name })?
.keys()
.cloned()
.collect())
}
let Some(tables) = schemas.get(schema) else {
return Box::pin(stream!({
yield SchemaNotFoundSnafu { catalog, schema }.fail();
}));
};
async fn table_names(&self, catalog_name: &str, schema_name: &str) -> Result<Vec<String>> {
Ok(self
.catalogs
.read()
.unwrap()
.get(catalog_name)
.with_context(|| CatalogNotFoundSnafu { catalog_name })?
.get(schema_name)
.with_context(|| SchemaNotFoundSnafu {
catalog: catalog_name,
schema: schema_name,
})?
.keys()
.cloned()
.collect())
}
let tables = tables.values().cloned().collect::<Vec<_>>();
fn as_any(&self) -> &dyn Any {
self
return Box::pin(try_stream!({
for table in tables {
yield table;
}
}));
}
}
@@ -307,6 +341,7 @@ pub fn new_memory_catalog_manager() -> Result<Arc<MemoryCatalogManager>> {
#[cfg(test)]
mod tests {
use common_catalog::consts::*;
use futures_util::TryStreamExt;
use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME};
use super::*;
@@ -331,8 +366,18 @@ mod tests {
NUMBERS_TABLE_NAME,
)
.await
.unwrap()
.unwrap();
let _ = table.unwrap();
let stream = catalog_list
.tables(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME)
.await;
let tables = stream.try_collect::<Vec<_>>().await.unwrap();
assert_eq!(tables.len(), 1);
assert_eq!(
table.table_info().table_id(),
tables[0].table_info().table_id()
);
assert!(catalog_list
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "not_exists")
.await

View File

@@ -19,17 +19,19 @@ use prometheus::*;
lazy_static! {
pub static ref METRIC_CATALOG_MANAGER_CATALOG_COUNT: IntGauge =
register_int_gauge!("catalog_catalog_count", "catalog catalog count").unwrap();
register_int_gauge!("greptime_catalog_catalog_count", "catalog catalog count").unwrap();
pub static ref METRIC_CATALOG_MANAGER_SCHEMA_COUNT: IntGauge =
register_int_gauge!("catalog_schema_count", "catalog schema count").unwrap();
register_int_gauge!("greptime_catalog_schema_count", "catalog schema count").unwrap();
pub static ref METRIC_CATALOG_MANAGER_TABLE_COUNT: IntGaugeVec = register_int_gauge_vec!(
"catalog_table_count",
"greptime_catalog_table_count",
"catalog table count",
&[METRIC_DB_LABEL]
)
.unwrap();
pub static ref METRIC_CATALOG_KV_REMOTE_GET: Histogram =
register_histogram!("catalog_kv_get_remote", "catalog kv get remote").unwrap();
register_histogram!("greptime_catalog_kv_get_remote", "catalog kv get remote").unwrap();
pub static ref METRIC_CATALOG_KV_GET: Histogram =
register_histogram!("catalog_kv_get", "catalog kv get").unwrap();
register_histogram!("greptime_catalog_kv_get", "catalog kv get").unwrap();
pub static ref METRIC_CATALOG_KV_BATCH_GET: Histogram =
register_histogram!("greptime_catalog_kv_batch_get", "catalog kv batch get").unwrap();
}

View File

@@ -15,7 +15,6 @@
use std::collections::HashMap;
use std::sync::Arc;
use common_catalog::consts::INFORMATION_SCHEMA_NAME;
use common_catalog::format_full_table_name;
use datafusion::common::{ResolvedTableReference, TableReference};
use datafusion::datasource::provider_as_source;
@@ -30,7 +29,7 @@ use crate::CatalogManagerRef;
pub struct DfTableSourceProvider {
catalog_manager: CatalogManagerRef,
resolved_tables: HashMap<String, Arc<dyn TableSource>>,
disallow_cross_schema_query: bool,
disallow_cross_catalog_query: bool,
default_catalog: String,
default_schema: String,
}
@@ -38,12 +37,12 @@ pub struct DfTableSourceProvider {
impl DfTableSourceProvider {
pub fn new(
catalog_manager: CatalogManagerRef,
disallow_cross_schema_query: bool,
disallow_cross_catalog_query: bool,
query_ctx: &QueryContext,
) -> Self {
Self {
catalog_manager,
disallow_cross_schema_query,
disallow_cross_catalog_query,
resolved_tables: HashMap::new(),
default_catalog: query_ctx.current_catalog().to_owned(),
default_schema: query_ctx.current_schema().to_owned(),
@@ -54,29 +53,18 @@ impl DfTableSourceProvider {
&'a self,
table_ref: TableReference<'a>,
) -> Result<ResolvedTableReference<'a>> {
if self.disallow_cross_schema_query {
if self.disallow_cross_catalog_query {
match &table_ref {
TableReference::Bare { .. } => (),
TableReference::Partial { schema, .. } => {
ensure!(
schema.as_ref() == self.default_schema
|| schema.as_ref() == INFORMATION_SCHEMA_NAME,
QueryAccessDeniedSnafu {
catalog: &self.default_catalog,
schema: schema.as_ref(),
}
);
}
TableReference::Partial { .. } => {}
TableReference::Full {
catalog, schema, ..
} => {
ensure!(
catalog.as_ref() == self.default_catalog
&& (schema.as_ref() == self.default_schema
|| schema.as_ref() == INFORMATION_SCHEMA_NAME),
catalog.as_ref() == self.default_catalog,
QueryAccessDeniedSnafu {
catalog: catalog.as_ref(),
schema: schema.as_ref()
schema: schema.as_ref(),
}
);
}
@@ -136,21 +124,21 @@ mod tests {
table: Cow::Borrowed("table_name"),
};
let result = table_provider.resolve_table_ref(table_ref);
let _ = result.unwrap();
assert!(result.is_ok());
let table_ref = TableReference::Partial {
schema: Cow::Borrowed("public"),
table: Cow::Borrowed("table_name"),
};
let result = table_provider.resolve_table_ref(table_ref);
let _ = result.unwrap();
assert!(result.is_ok());
let table_ref = TableReference::Partial {
schema: Cow::Borrowed("wrong_schema"),
table: Cow::Borrowed("table_name"),
};
let result = table_provider.resolve_table_ref(table_ref);
assert!(result.is_err());
assert!(result.is_ok());
let table_ref = TableReference::Full {
catalog: Cow::Borrowed("greptime"),
@@ -158,7 +146,7 @@ mod tests {
table: Cow::Borrowed("table_name"),
};
let result = table_provider.resolve_table_ref(table_ref);
let _ = result.unwrap();
assert!(result.is_ok());
let table_ref = TableReference::Full {
catalog: Cow::Borrowed("wrong_catalog"),
@@ -172,14 +160,15 @@ mod tests {
schema: Cow::Borrowed("information_schema"),
table: Cow::Borrowed("columns"),
};
let _ = table_provider.resolve_table_ref(table_ref).unwrap();
let result = table_provider.resolve_table_ref(table_ref);
assert!(result.is_ok());
let table_ref = TableReference::Full {
catalog: Cow::Borrowed("greptime"),
schema: Cow::Borrowed("information_schema"),
table: Cow::Borrowed("columns"),
};
let _ = table_provider.resolve_table_ref(table_ref).unwrap();
assert!(table_provider.resolve_table_ref(table_ref).is_ok());
let table_ref = TableReference::Full {
catalog: Cow::Borrowed("dummy"),
@@ -187,5 +176,12 @@ mod tests {
table: Cow::Borrowed("columns"),
};
assert!(table_provider.resolve_table_ref(table_ref).is_err());
let table_ref = TableReference::Full {
catalog: Cow::Borrowed("greptime"),
schema: Cow::Borrowed("greptime_private"),
table: Cow::Borrowed("columns"),
};
assert!(table_provider.resolve_table_ref(table_ref).is_ok());
}
}

View File

@@ -7,8 +7,12 @@ license.workspace = true
[features]
testing = []
[lints]
workspace = true
[dependencies]
api.workspace = true
arc-swap = "1.6"
arrow-flight.workspace = true
async-stream.workspace = true
async-trait.workspace = true
@@ -33,10 +37,12 @@ parking_lot = "0.12"
prometheus.workspace = true
prost.workspace = true
rand.workspace = true
serde.workspace = true
serde_json.workspace = true
session.workspace = true
snafu.workspace = true
tokio-stream = { workspace = true, features = ["net"] }
tokio.workspace = true
tokio-stream = { workspace = true, features = ["net"] }
tonic.workspace = true
[dev-dependencies]

View File

@@ -37,7 +37,7 @@ async fn run() {
catalog_name: "greptime".to_string(),
schema_name: "public".to_string(),
table_name: "test_logical_dist_exec".to_string(),
desc: "".to_string(),
desc: String::default(),
column_defs: vec![
ColumnDef {
name: "timestamp".to_string(),

View File

@@ -122,7 +122,7 @@ impl Client {
self.inner.set_peers(urls);
}
fn find_channel(&self) -> Result<(String, Channel)> {
pub fn find_channel(&self) -> Result<(String, Channel)> {
let addr = self
.inner
.get_peer()

View File

@@ -47,6 +47,9 @@ pub struct Database {
// The dbname follows naming rule as out mysql, postgres and http
// protocol. The server treat dbname in priority of catalog/schema.
dbname: String,
// The time zone indicates the time zone where the user is located.
// Some queries need to be aware of the user's time zone to perform some specific actions.
timezone: String,
client: Client,
ctx: FlightContext,
@@ -58,7 +61,8 @@ impl Database {
Self {
catalog: catalog.into(),
schema: schema.into(),
dbname: "".to_string(),
dbname: String::default(),
timezone: String::default(),
client,
ctx: FlightContext::default(),
}
@@ -73,8 +77,9 @@ impl Database {
/// environment
pub fn new_with_dbname(dbname: impl Into<String>, client: Client) -> Self {
Self {
catalog: "".to_string(),
schema: "".to_string(),
catalog: String::default(),
schema: String::default(),
timezone: String::default(),
dbname: dbname.into(),
client,
ctx: FlightContext::default(),
@@ -105,6 +110,14 @@ impl Database {
self.dbname = dbname.into();
}
pub fn timezone(&self) -> &String {
&self.timezone
}
pub fn set_timezone(&mut self, timezone: impl Into<String>) {
self.timezone = timezone.into();
}
pub fn set_auth(&mut self, auth: AuthScheme) {
self.ctx.auth_header = Some(AuthHeader {
auth_scheme: Some(auth),
@@ -161,6 +174,7 @@ impl Database {
schema: self.schema.clone(),
authorization: self.ctx.auth_header.clone(),
dbname: self.dbname.clone(),
timezone: self.timezone.clone(),
// TODO(Taylor-lagrange): add client grpc tracing
tracing_context: W3cTrace::new(),
}),
@@ -295,32 +309,38 @@ impl Database {
);
Ok(Output::AffectedRows(rows))
}
FlightMessage::Recordbatch(_) => IllegalFlightMessagesSnafu {
reason: "The first flight message cannot be a RecordBatch message",
FlightMessage::Recordbatch(_) | FlightMessage::Metrics(_) => {
IllegalFlightMessagesSnafu {
reason: "The first flight message cannot be a RecordBatch or Metrics message",
}
.fail()
}
.fail(),
FlightMessage::Schema(schema) => {
let stream = Box::pin(stream!({
while let Some(flight_message) = flight_message_stream.next().await {
let flight_message = flight_message
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
let FlightMessage::Recordbatch(record_batch) = flight_message else {
yield IllegalFlightMessagesSnafu {reason: "A Schema message must be succeeded exclusively by a set of RecordBatch messages"}
match flight_message {
FlightMessage::Recordbatch(record_batch) => yield Ok(record_batch),
FlightMessage::Metrics(_) => {}
FlightMessage::AffectedRows(_) | FlightMessage::Schema(_) => {
yield IllegalFlightMessagesSnafu {reason: format!("A Schema message must be succeeded exclusively by a set of RecordBatch messages, flight_message: {:?}", flight_message)}
.fail()
.map_err(BoxedError::new)
.context(ExternalSnafu);
break;
};
yield Ok(record_batch);
break;
}
}
}
}));
let record_batch_stream = RecordBatchStreamWrapper {
schema,
stream,
output_ordering: None,
metrics: Default::default(),
};
Ok(Output::Stream(Box::pin(record_batch_stream)))
Ok(Output::new_stream(Box::pin(record_batch_stream)))
}
}
}

View File

@@ -16,7 +16,7 @@ use std::any::Any;
use common_error::ext::{BoxedError, ErrorExt};
use common_error::status_code::StatusCode;
use common_error::{GREPTIME_ERROR_CODE, GREPTIME_ERROR_MSG};
use common_error::{GREPTIME_DB_HEADER_ERROR_CODE, GREPTIME_DB_HEADER_ERROR_MSG};
use common_macro::stack_trace_debug;
use snafu::{Location, Snafu};
use tonic::{Code, Status};
@@ -115,7 +115,7 @@ impl From<Status> for Error {
.and_then(|v| String::from_utf8(v.as_bytes().to_vec()).ok())
}
let code = get_metadata_value(&e, GREPTIME_ERROR_CODE)
let code = get_metadata_value(&e, GREPTIME_DB_HEADER_ERROR_CODE)
.and_then(|s| {
if let Ok(code) = s.parse::<u32>() {
StatusCode::from_u32(code)
@@ -125,8 +125,8 @@ impl From<Status> for Error {
})
.unwrap_or(StatusCode::Unknown);
let msg =
get_metadata_value(&e, GREPTIME_ERROR_MSG).unwrap_or_else(|| e.message().to_string());
let msg = get_metadata_value(&e, GREPTIME_DB_HEADER_ERROR_MSG)
.unwrap_or_else(|| e.message().to_string());
Self::Server { code, msg }
}
@@ -134,10 +134,17 @@ impl From<Status> for Error {
impl Error {
pub fn should_retry(&self) -> bool {
!matches!(
// TODO(weny): figure out each case of these codes.
matches!(
self,
Self::RegionServer {
code: Code::InvalidArgument,
code: Code::Cancelled,
..
} | Self::RegionServer {
code: Code::DeadlineExceeded,
..
} | Self::RegionServer {
code: Code::Unavailable,
..
}
)

View File

@@ -17,27 +17,30 @@ use prometheus::*;
lazy_static! {
pub static ref METRIC_GRPC_CREATE_TABLE: Histogram =
register_histogram!("grpc_create_table", "grpc create table").unwrap();
pub static ref METRIC_GRPC_PROMQL_RANGE_QUERY: Histogram =
register_histogram!("grpc_promql_range_query", "grpc promql range query").unwrap();
register_histogram!("greptime_grpc_create_table", "grpc create table").unwrap();
pub static ref METRIC_GRPC_PROMQL_RANGE_QUERY: Histogram = register_histogram!(
"greptime_grpc_promql_range_query",
"grpc promql range query"
)
.unwrap();
pub static ref METRIC_GRPC_INSERT: Histogram =
register_histogram!("grpc_insert", "grpc insert").unwrap();
register_histogram!("greptime_grpc_insert", "grpc insert").unwrap();
pub static ref METRIC_GRPC_DELETE: Histogram =
register_histogram!("grpc_delete", "grpc delete").unwrap();
register_histogram!("greptime_grpc_delete", "grpc delete").unwrap();
pub static ref METRIC_GRPC_SQL: Histogram =
register_histogram!("grpc_sql", "grpc sql").unwrap();
register_histogram!("greptime_grpc_sql", "grpc sql").unwrap();
pub static ref METRIC_GRPC_LOGICAL_PLAN: Histogram =
register_histogram!("grpc_logical_plan", "grpc logical plan").unwrap();
register_histogram!("greptime_grpc_logical_plan", "grpc logical plan").unwrap();
pub static ref METRIC_GRPC_ALTER: Histogram =
register_histogram!("grpc_alter", "grpc alter").unwrap();
register_histogram!("greptime_grpc_alter", "grpc alter").unwrap();
pub static ref METRIC_GRPC_DROP_TABLE: Histogram =
register_histogram!("grpc_drop_table", "grpc drop table").unwrap();
register_histogram!("greptime_grpc_drop_table", "grpc drop table").unwrap();
pub static ref METRIC_GRPC_TRUNCATE_TABLE: Histogram =
register_histogram!("grpc_truncate_table", "grpc truncate table").unwrap();
register_histogram!("greptime_grpc_truncate_table", "grpc truncate table").unwrap();
pub static ref METRIC_GRPC_DO_GET: Histogram =
register_histogram!("grpc_do_get", "grpc do get").unwrap();
register_histogram!("greptime_grpc_do_get", "grpc do get").unwrap();
pub static ref METRIC_REGION_REQUEST_GRPC: HistogramVec = register_histogram_vec!(
"grpc_region_request",
"greptime_grpc_region_request",
"grpc region request",
&["request_type"]
)

View File

@@ -12,8 +12,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use api::v1::region::{QueryRequest, RegionRequest, RegionResponse};
use api::v1::ResponseHeader;
use arc_swap::ArcSwapOption;
use arrow_flight::Ticket;
use async_stream::stream;
use async_trait::async_trait;
@@ -25,6 +28,7 @@ use common_meta::error::{self as meta_error, Result as MetaResult};
use common_recordbatch::error::ExternalSnafu;
use common_recordbatch::{RecordBatchStreamWrapper, SendableRecordBatchStream};
use common_telemetry::error;
use common_telemetry::tracing_context::TracingContext;
use prost::Message;
use snafu::{location, Location, OptionExt, ResultExt};
use tokio_stream::StreamExt;
@@ -119,27 +123,44 @@ impl RegionRequester {
.fail();
};
let metrics = Arc::new(ArcSwapOption::from(None));
let metrics_ref = metrics.clone();
let tracing_context = TracingContext::from_current_span();
let stream = Box::pin(stream!({
let _span = tracing_context.attach(common_telemetry::tracing::info_span!(
"poll_flight_data_stream"
));
while let Some(flight_message) = flight_message_stream.next().await {
let flight_message = flight_message
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
let FlightMessage::Recordbatch(record_batch) = flight_message else {
yield IllegalFlightMessagesSnafu {
match flight_message {
FlightMessage::Recordbatch(record_batch) => yield Ok(record_batch),
FlightMessage::Metrics(s) => {
let m = serde_json::from_str(&s).ok().map(Arc::new);
metrics_ref.swap(m);
break;
}
_ => {
yield IllegalFlightMessagesSnafu {
reason: "A Schema message must be succeeded exclusively by a set of RecordBatch messages"
}
.fail()
.map_err(BoxedError::new)
.context(ExternalSnafu);
break;
};
yield Ok(record_batch);
break;
}
}
}
}));
let record_batch_stream = RecordBatchStreamWrapper {
schema,
stream,
output_ordering: None,
metrics,
};
Ok(Box::pin(record_batch_stream))
}
@@ -176,7 +197,7 @@ impl RegionRequester {
check_response_header(header)?;
Ok(affected_rows)
Ok(affected_rows as _)
}
pub async fn handle(&self, request: RegionRequest) -> Result<AffectedRows> {
@@ -230,7 +251,7 @@ mod test {
let result = check_response_header(Some(ResponseHeader {
status: Some(PbStatus {
status_code: StatusCode::Success as u32,
err_msg: "".to_string(),
err_msg: String::default(),
}),
}));
assert!(result.is_ok());
@@ -238,7 +259,7 @@ mod test {
let result = check_response_header(Some(ResponseHeader {
status: Some(PbStatus {
status_code: u32::MAX,
err_msg: "".to_string(),
err_msg: String::default(),
}),
}));
assert!(matches!(

View File

@@ -39,7 +39,7 @@ use crate::from_grpc_response;
/// ```
///
/// If you want to see a concrete usage example, please see
/// [stream_inserter.rs](https://github.com/GreptimeTeam/greptimedb/blob/develop/src/client/examples/stream_ingest.rs).
/// [stream_inserter.rs](https://github.com/GreptimeTeam/greptimedb/blob/main/src/client/examples/stream_ingest.rs).
pub struct StreamInserter {
sender: mpsc::Sender<GreptimeRequest>,

View File

@@ -12,13 +12,16 @@ path = "src/bin/greptime.rs"
[features]
tokio-console = ["common-telemetry/tokio-console"]
[lints]
workspace = true
[dependencies]
anymap = "1.0.0-beta.2"
async-trait.workspace = true
auth.workspace = true
catalog.workspace = true
chrono.workspace = true
clap = { version = "4.4", features = ["derive"] }
clap.workspace = true
client.workspace = true
common-base.workspace = true
common-catalog.workspace = true
@@ -29,10 +32,12 @@ common-meta.workspace = true
common-procedure.workspace = true
common-query.workspace = true
common-recordbatch.workspace = true
common-runtime.workspace = true
common-telemetry = { workspace = true, features = [
"deadlock_detection",
] }
common-time.workspace = true
common-wal.workspace = true
config = "0.13"
datanode.workspace = true
datatypes.workspace = true

View File

@@ -13,5 +13,5 @@
// limitations under the License.
fn main() {
common_version::setup_git_versions();
common_version::setup_build_info();
}

View File

@@ -156,6 +156,7 @@ fn create_region_routes(regions: Vec<RegionNumber>) -> Vec<RegionRoute> {
}),
follower_peers: vec![],
leader_status: None,
leader_down_since: None,
});
}

View File

@@ -58,8 +58,8 @@ pub struct ExportCommand {
#[clap(long)]
output_dir: String,
/// The name of the catalog to export. Default to "greptime-*"".
#[clap(long, default_value = "")]
/// The name of the catalog to export.
#[clap(long, default_value = "greptime-*")]
database: String,
/// Parallelism of the export.
@@ -142,7 +142,7 @@ impl Export {
.with_context(|_| RequestDatabaseSnafu {
sql: "show databases".to_string(),
})?;
let Output::Stream(stream) = result else {
let Output::Stream(stream, _) = result else {
NotDataFromOutputSnafu.fail()?
};
let record_batch = collect(stream)
@@ -183,7 +183,7 @@ impl Export {
.sql(&sql)
.await
.with_context(|_| RequestDatabaseSnafu { sql })?;
let Output::Stream(stream) = result else {
let Output::Stream(stream, _) = result else {
NotDataFromOutputSnafu.fail()?
};
let Some(record_batch) = collect(stream)
@@ -235,7 +235,7 @@ impl Export {
.sql(&sql)
.await
.with_context(|_| RequestDatabaseSnafu { sql })?;
let Output::Stream(stream) = result else {
let Output::Stream(stream, _) = result else {
NotDataFromOutputSnafu.fail()?
};
let record_batch = collect(stream)

View File

@@ -16,7 +16,9 @@ use std::path::PathBuf;
use std::sync::Arc;
use std::time::Instant;
use catalog::kvbackend::{CachedMetaKvBackend, KvBackendCatalogManager};
use catalog::kvbackend::{
CachedMetaKvBackend, CachedMetaKvBackendBuilder, KvBackendCatalogManager,
};
use client::{Client, Database, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_base::Plugins;
use common_error::ext::ErrorExt;
@@ -157,19 +159,20 @@ impl Repl {
let start = Instant::now();
let output = if let Some(query_engine) = &self.query_engine {
let stmt = QueryLanguageParser::parse_sql(&sql)
.with_context(|_| ParseSqlSnafu { sql: sql.clone() })?;
let query_ctx = QueryContext::with(self.database.catalog(), self.database.schema());
let stmt = QueryLanguageParser::parse_sql(&sql, &query_ctx)
.with_context(|_| ParseSqlSnafu { sql: sql.clone() })?;
let plan = query_engine
.planner()
.plan(stmt, query_ctx)
.plan(stmt, query_ctx.clone())
.await
.context(PlanStatementSnafu)?;
let LogicalPlan::DfPlan(plan) =
query_engine.optimize(&plan).context(PlanStatementSnafu)?;
let LogicalPlan::DfPlan(plan) = query_engine
.optimize(&query_engine.engine_context(query_ctx), &plan)
.context(PlanStatementSnafu)?;
let plan = DFLogicalSubstraitConvertor {}
.encode(&plan)
@@ -182,7 +185,7 @@ impl Repl {
.context(RequestDatabaseSnafu { sql: &sql })?;
let either = match output {
Output::Stream(s) => {
Output::Stream(s, _) => {
let x = RecordBatches::try_collect(s)
.await
.context(CollectRecordBatchesSnafu)?;
@@ -247,7 +250,8 @@ async fn create_query_engine(meta_addr: &str) -> Result<DatafusionQueryEngine> {
.context(StartMetaClientSnafu)?;
let meta_client = Arc::new(meta_client);
let cached_meta_backend = Arc::new(CachedMetaKvBackend::new(meta_client.clone()));
let cached_meta_backend =
Arc::new(CachedMetaKvBackendBuilder::new(meta_client.clone()).build());
let catalog_list =
KvBackendCatalogManager::new(cached_meta_backend.clone(), cached_meta_backend);
@@ -256,6 +260,7 @@ async fn create_query_engine(meta_addr: &str) -> Result<DatafusionQueryEngine> {
catalog_list,
None,
None,
None,
false,
plugins.clone(),
));

View File

@@ -18,10 +18,11 @@ use std::time::Duration;
use async_trait::async_trait;
use catalog::kvbackend::MetaKvBackend;
use clap::Parser;
use common_config::WalConfig;
use common_telemetry::{info, logging};
use common_wal::config::DatanodeWalConfig;
use datanode::config::DatanodeOptions;
use datanode::datanode::{Datanode, DatanodeBuilder};
use datanode::service::DatanodeServiceBuilder;
use meta_client::MetaClientOptions;
use servers::Mode;
use snafu::{OptionExt, ResultExt};
@@ -35,9 +36,17 @@ pub struct Instance {
}
impl Instance {
fn new(datanode: Datanode) -> Self {
pub fn new(datanode: Datanode) -> Self {
Self { datanode }
}
pub fn datanode_mut(&mut self) -> &mut Datanode {
&mut self.datanode
}
pub fn datanode(&self) -> &Datanode {
&self.datanode
}
}
#[async_trait]
@@ -169,7 +178,7 @@ impl StartCommand {
// `wal_dir` only affects raft-engine config.
if let Some(wal_dir) = &self.wal_dir
&& let WalConfig::RaftEngine(raft_engine_config) = &mut opts.wal
&& let DatanodeWalConfig::RaftEngine(raft_engine_config) = &mut opts.wal
{
if raft_engine_config
.dir
@@ -219,14 +228,20 @@ impl StartCommand {
client: Arc::new(meta_client.clone()),
});
let datanode = DatanodeBuilder::new(opts, plugins)
let mut datanode = DatanodeBuilder::new(opts.clone(), plugins)
.with_meta_client(meta_client)
.with_kv_backend(meta_backend)
.enable_region_server_service()
.build()
.await
.context(StartDatanodeSnafu)?;
let services = DatanodeServiceBuilder::new(&opts)
.with_default_grpc_server(&datanode.region_server())
.enable_http_service()
.build()
.await
.context(StartDatanodeSnafu)?;
datanode.setup_services(services);
Ok(Instance::new(datanode))
}
@@ -306,7 +321,7 @@ mod tests {
assert_eq!("127.0.0.1:3001".to_string(), options.rpc_addr);
assert_eq!(Some(42), options.node_id);
let WalConfig::RaftEngine(raft_engine_config) = options.wal else {
let DatanodeWalConfig::RaftEngine(raft_engine_config) = options.wal else {
unreachable!()
};
assert_eq!("/other/wal", raft_engine_config.dir.unwrap());
@@ -494,7 +509,7 @@ mod tests {
};
// Should be read from env, env > default values.
let WalConfig::RaftEngine(raft_engine_config) = opts.wal else {
let DatanodeWalConfig::RaftEngine(raft_engine_config) = opts.wal else {
unreachable!()
};
assert_eq!(raft_engine_config.read_batch_size, 100);

View File

@@ -249,6 +249,12 @@ pub enum Error {
source: BoxedError,
location: Location,
},
#[snafu(display("Failed to build runtime"))]
BuildRuntime {
location: Location,
source: common_runtime::error::Error,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -298,6 +304,8 @@ impl ErrorExt for Error {
Error::SerdeJson { .. } | Error::FileIo { .. } => StatusCode::Unexpected,
Error::Other { source, .. } => source.status_code(),
Error::BuildRuntime { source, .. } => source.status_code(),
}
}

View File

@@ -16,7 +16,7 @@ use std::sync::Arc;
use std::time::Duration;
use async_trait::async_trait;
use catalog::kvbackend::CachedMetaKvBackend;
use catalog::kvbackend::CachedMetaKvBackendBuilder;
use clap::Parser;
use client::client_manager::DatanodeClients;
use common_meta::heartbeat::handler::parse_mailbox_message::ParseMailboxMessageHandler;
@@ -28,6 +28,7 @@ use frontend::heartbeat::handler::invalidate_table_cache::InvalidateTableCacheHa
use frontend::heartbeat::HeartbeatTask;
use frontend::instance::builder::FrontendBuilder;
use frontend::instance::{FrontendInstance, Instance as FeInstance};
use frontend::server::Services;
use meta_client::MetaClientOptions;
use servers::tls::{TlsMode, TlsOption};
use servers::Mode;
@@ -42,9 +43,17 @@ pub struct Instance {
}
impl Instance {
fn new(frontend: FeInstance) -> Self {
pub fn new(frontend: FeInstance) -> Self {
Self { frontend }
}
pub fn mut_inner(&mut self) -> &mut FeInstance {
&mut self.frontend
}
pub fn inner(&self) -> &FeInstance {
&self.frontend
}
}
#[async_trait]
@@ -223,15 +232,27 @@ impl StartCommand {
let meta_client_options = opts.meta_client.as_ref().context(MissingConfigSnafu {
msg: "'meta_client'",
})?;
let cache_max_capacity = meta_client_options.metadata_cache_max_capacity;
let cache_ttl = meta_client_options.metadata_cache_ttl;
let cache_tti = meta_client_options.metadata_cache_tti;
let meta_client = FeInstance::create_meta_client(meta_client_options)
.await
.context(StartFrontendSnafu)?;
let meta_backend = Arc::new(CachedMetaKvBackend::new(meta_client.clone()));
let cached_meta_backend = CachedMetaKvBackendBuilder::new(meta_client.clone())
.cache_max_capacity(cache_max_capacity)
.cache_ttl(cache_ttl)
.cache_tti(cache_tti)
.build();
let cached_meta_backend = Arc::new(cached_meta_backend);
let executor = HandlerGroupExecutor::new(vec![
Arc::new(ParseMailboxMessageHandler),
Arc::new(InvalidateTableCacheHandler::new(meta_backend.clone())),
Arc::new(InvalidateTableCacheHandler::new(
cached_meta_backend.clone(),
)),
]);
let heartbeat_task = HeartbeatTask::new(
@@ -241,25 +262,24 @@ impl StartCommand {
);
let mut instance = FrontendBuilder::new(
meta_backend.clone(),
cached_meta_backend.clone(),
Arc::new(DatanodeClients::default()),
meta_client,
)
.with_cache_invalidator(meta_backend)
.with_plugin(plugins)
.with_cache_invalidator(cached_meta_backend)
.with_plugin(plugins.clone())
.with_heartbeat_task(heartbeat_task)
.try_build()
.await
.context(StartFrontendSnafu)?;
instance
.build_export_metrics_task(&opts.export_metrics)
.context(StartFrontendSnafu)?;
instance
.build_servers(opts)
let servers = Services::new(opts.clone(), Arc::new(instance.clone()), plugins)
.build()
.await
.context(StartFrontendSnafu)?;
instance
.build_servers(opts, servers)
.context(StartFrontendSnafu)?;
Ok(Instance::new(instance))
}

View File

@@ -28,35 +28,39 @@ pub mod standalone;
lazy_static::lazy_static! {
static ref APP_VERSION: prometheus::IntGaugeVec =
prometheus::register_int_gauge_vec!("app_version", "app version", &["short_version", "version"]).unwrap();
prometheus::register_int_gauge_vec!("greptime_app_version", "app version", &["short_version", "version"]).unwrap();
}
#[async_trait]
pub trait App {
pub trait App: Send {
fn name(&self) -> &str;
/// A hook for implementor to make something happened before actual startup. Defaults to no-op.
async fn pre_start(&mut self) -> error::Result<()> {
Ok(())
}
async fn start(&mut self) -> error::Result<()>;
async fn stop(&self) -> error::Result<()>;
}
pub async fn start_app(mut app: Box<dyn App>) -> error::Result<()> {
let name = app.name().to_string();
info!("Starting app: {}", app.name());
tokio::select! {
result = app.start() => {
if let Err(err) = result {
error!(err; "Failed to start app {name}!");
}
}
_ = tokio::signal::ctrl_c() => {
if let Err(err) = app.stop().await {
error!(err; "Failed to stop app {name}!");
}
info!("Goodbye!");
}
app.pre_start().await?;
app.start().await?;
if let Err(e) = tokio::signal::ctrl_c().await {
error!("Failed to listen for ctrl-c signal: {}", e);
// It's unusual to fail to listen for ctrl-c signal, maybe there's something unexpected in
// the underlying system. So we stop the app instead of running nonetheless to let people
// investigate the issue.
}
app.stop().await?;
info!("Goodbye!");
Ok(())
}

View File

@@ -128,7 +128,7 @@ impl StartCommand {
let mut opts: MetaSrvOptions = Options::load_layered_options(
self.config_file.as_deref(),
self.env_prefix.as_ref(),
None,
MetaSrvOptions::env_list_keys(),
)?;
if let Some(dir) = &cli_options.log_dir {

View File

@@ -14,8 +14,8 @@
use clap::ArgMatches;
use common_config::KvBackendConfig;
use common_meta::wal::WalConfig as MetaSrvWalConfig;
use common_telemetry::logging::{LoggingOptions, TracingOptions};
use common_wal::config::MetaSrvWalConfig;
use config::{Config, Environment, File, FileFormat};
use datanode::config::{DatanodeOptions, ProcedureConfig};
use frontend::error::{Result as FeResult, TomlFormatSnafu};
@@ -173,8 +173,8 @@ impl Options {
mod tests {
use std::io::Write;
use common_config::WalConfig;
use common_test_util::temp_dir::create_named_temp_file;
use common_wal::config::DatanodeWalConfig;
use datanode::config::{DatanodeOptions, ObjectStoreConfig};
use super::*;
@@ -281,7 +281,7 @@ mod tests {
);
// Should be the values from config file, not environment variables.
let WalConfig::RaftEngine(raft_engine_config) = opts.wal else {
let DatanodeWalConfig::RaftEngine(raft_engine_config) = opts.wal else {
unreachable!()
};
assert_eq!(raft_engine_config.dir.unwrap(), "/tmp/greptimedb/wal");

View File

@@ -18,28 +18,29 @@ use std::{fs, path};
use async_trait::async_trait;
use clap::Parser;
use common_catalog::consts::MIN_USER_TABLE_ID;
use common_config::wal::StandaloneWalConfig;
use common_config::{metadata_store_dir, KvBackendConfig};
use common_meta::cache_invalidator::DummyCacheInvalidator;
use common_meta::datanode_manager::DatanodeManagerRef;
use common_meta::ddl::{DdlTaskExecutorRef, TableMetadataAllocatorRef};
use common_meta::ddl::table_meta::{TableMetadataAllocator, TableMetadataAllocatorRef};
use common_meta::ddl::ProcedureExecutorRef;
use common_meta::ddl_manager::DdlManager;
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
use common_meta::kv_backend::KvBackendRef;
use common_meta::region_keeper::MemoryRegionKeeper;
use common_meta::sequence::SequenceBuilder;
use common_meta::wal::{WalOptionsAllocator, WalOptionsAllocatorRef};
use common_meta::wal_options_allocator::{WalOptionsAllocator, WalOptionsAllocatorRef};
use common_procedure::ProcedureManagerRef;
use common_telemetry::info;
use common_telemetry::logging::LoggingOptions;
use common_time::timezone::set_default_timezone;
use common_wal::config::StandaloneWalConfig;
use datanode::config::{DatanodeOptions, ProcedureConfig, RegionEngineConfig, StorageConfig};
use datanode::datanode::{Datanode, DatanodeBuilder};
use file_engine::config::EngineConfig as FileEngineConfig;
use frontend::frontend::FrontendOptions;
use frontend::instance::builder::FrontendBuilder;
use frontend::instance::standalone::StandaloneTableMetadataAllocator;
use frontend::instance::{FrontendInstance, Instance as FeInstance, StandaloneDatanodeManager};
use frontend::server::Services;
use frontend::service_config::{
GrpcOptions, InfluxdbOptions, MysqlOptions, OpentsdbOptions, PostgresOptions, PromStoreOptions,
};
@@ -118,6 +119,12 @@ pub struct StandaloneOptions {
pub export_metrics: ExportMetricsOption,
}
impl StandaloneOptions {
pub fn env_list_keys() -> Option<&'static [&'static str]> {
Some(&["wal.broker_endpoints"])
}
}
impl Default for StandaloneOptions {
fn default() -> Self {
Self {
@@ -206,6 +213,10 @@ impl App for Instance {
.await
.context(StartWalOptionsAllocatorSnafu)?;
plugins::start_frontend_plugins(self.frontend.plugins().clone())
.await
.context(StartFrontendSnafu)?;
self.frontend.start().await.context(StartFrontendSnafu)?;
Ok(())
}
@@ -267,7 +278,7 @@ impl StartCommand {
let opts: StandaloneOptions = Options::load_layered_options(
self.config_file.as_deref(),
self.env_prefix.as_ref(),
None,
StandaloneOptions::env_list_keys(),
)?;
self.convert_options(cli_options, opts)
@@ -361,20 +372,18 @@ impl StartCommand {
#[allow(unused_variables)]
#[allow(clippy::diverging_sub_expression)]
async fn build(self, opts: MixOptions) -> Result<Instance> {
let mut fe_opts = opts.frontend.clone();
info!("Standalone start command: {:#?}", self);
info!("Building standalone instance with {opts:#?}");
let mut fe_opts = opts.frontend;
#[allow(clippy::unnecessary_mut_passed)]
let fe_plugins = plugins::setup_frontend_plugins(&mut fe_opts) // mut ref is MUST, DO NOT change it
.await
.context(StartFrontendSnafu)?;
let dn_opts = opts.datanode.clone();
let dn_opts = opts.datanode;
info!("Standalone start command: {:#?}", self);
info!("Building standalone instance with {opts:#?}");
set_default_timezone(opts.frontend.default_timezone.as_deref())
.context(InitTimezoneSnafu)?;
set_default_timezone(fe_opts.default_timezone.as_deref()).context(InitTimezoneSnafu)?;
// Ensure the data_home directory exists.
fs::create_dir_all(path::Path::new(&opts.data_home)).context(CreateDirSnafu {
@@ -406,13 +415,18 @@ impl StartCommand {
opts.wal_meta.clone(),
kv_backend.clone(),
));
let table_meta_allocator = Arc::new(StandaloneTableMetadataAllocator::new(
let table_metadata_manager =
Self::create_table_metadata_manager(kv_backend.clone()).await?;
let table_meta_allocator = Arc::new(TableMetadataAllocator::new(
table_id_sequence,
wal_options_allocator.clone(),
table_metadata_manager.table_name_manager().clone(),
));
let ddl_task_executor = Self::create_ddl_task_executor(
kv_backend.clone(),
table_metadata_manager,
procedure_manager.clone(),
datanode_manager.clone(),
table_meta_allocator,
@@ -420,19 +434,18 @@ impl StartCommand {
.await?;
let mut frontend = FrontendBuilder::new(kv_backend, datanode_manager, ddl_task_executor)
.with_plugin(fe_plugins)
.with_plugin(fe_plugins.clone())
.try_build()
.await
.context(StartFrontendSnafu)?;
frontend
.build_export_metrics_task(&opts.frontend.export_metrics)
.context(StartFrontendSnafu)?;
frontend
.build_servers(opts)
let servers = Services::new(fe_opts.clone(), Arc::new(frontend.clone()), fe_plugins)
.build()
.await
.context(StartFrontendSnafu)?;
frontend
.build_servers(fe_opts, servers)
.context(StartFrontendSnafu)?;
Ok(Instance {
datanode,
@@ -443,15 +456,12 @@ impl StartCommand {
}
pub async fn create_ddl_task_executor(
kv_backend: KvBackendRef,
table_metadata_manager: TableMetadataManagerRef,
procedure_manager: ProcedureManagerRef,
datanode_manager: DatanodeManagerRef,
table_meta_allocator: TableMetadataAllocatorRef,
) -> Result<DdlTaskExecutorRef> {
let table_metadata_manager =
Self::create_table_metadata_manager(kv_backend.clone()).await?;
let ddl_task_executor: DdlTaskExecutorRef = Arc::new(
) -> Result<ProcedureExecutorRef> {
let procedure_executor: ProcedureExecutorRef = Arc::new(
DdlManager::try_new(
procedure_manager,
datanode_manager,
@@ -463,10 +473,10 @@ impl StartCommand {
.context(InitDdlManagerSnafu)?,
);
Ok(ddl_task_executor)
Ok(procedure_executor)
}
async fn create_table_metadata_manager(
pub async fn create_table_metadata_manager(
kv_backend: KvBackendRef,
) -> Result<TableMetadataManagerRef> {
let table_metadata_manager = Arc::new(TableMetadataManager::new(kv_backend));
@@ -488,8 +498,8 @@ mod tests {
use auth::{Identity, Password, UserProviderRef};
use common_base::readable_size::ReadableSize;
use common_config::WalConfig;
use common_test_util::temp_dir::create_named_temp_file;
use common_wal::config::DatanodeWalConfig;
use datanode::config::{FileConfig, GcsConfig};
use servers::Mode;
@@ -596,7 +606,7 @@ mod tests {
assert_eq!(None, fe_opts.mysql.reject_no_database);
assert!(fe_opts.influxdb.enable);
let WalConfig::RaftEngine(raft_engine_config) = dn_opts.wal else {
let DatanodeWalConfig::RaftEngine(raft_engine_config) = dn_opts.wal else {
unreachable!()
};
assert_eq!("/tmp/greptimedb/test/wal", raft_engine_config.dir.unwrap());

View File

@@ -4,6 +4,9 @@ version.workspace = true
edition.workspace = true
license.workspace = true
[lints]
workspace = true
[dependencies]
anymap = "1.0.0-beta.2"
bitvec = "1.0"

View File

@@ -216,7 +216,7 @@ mod tests {
let bytes = StringBytes::from(hello.clone());
assert_eq!(bytes.len(), hello.len());
let zero = "".to_string();
let zero = String::default();
let bytes = StringBytes::from(zero);
assert!(bytes.is_empty());
}

View File

@@ -21,6 +21,8 @@ pub mod readable_size;
use core::any::Any;
use std::sync::{Arc, Mutex, MutexGuard};
pub type AffectedRows = usize;
pub use bit_vec::BitVec;
/// [`Plugins`] is a wrapper of Arc contents.

View File

@@ -33,7 +33,7 @@ pub const GIB: u64 = MIB * BINARY_DATA_MAGNITUDE;
pub const TIB: u64 = GIB * BINARY_DATA_MAGNITUDE;
pub const PIB: u64 = TIB * BINARY_DATA_MAGNITUDE;
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd)]
#[derive(Clone, Copy, PartialEq, Eq, Ord, PartialOrd)]
pub struct ReadableSize(pub u64);
impl ReadableSize {

View File

@@ -4,6 +4,9 @@ version.workspace = true
edition.workspace = true
license.workspace = true
[lints]
workspace = true
[dependencies]
common-error.workspace = true
common-macro.workspace = true

View File

@@ -44,6 +44,48 @@ pub const INFORMATION_SCHEMA_COLUMN_PRIVILEGES_TABLE_ID: u32 = 6;
pub const INFORMATION_SCHEMA_COLUMN_STATISTICS_TABLE_ID: u32 = 7;
/// id for information_schema.build_info
pub const INFORMATION_SCHEMA_BUILD_INFO_TABLE_ID: u32 = 8;
/// id for information_schema.CHARACTER_SETS
pub const INFORMATION_SCHEMA_CHARACTER_SETS_TABLE_ID: u32 = 9;
/// id for information_schema.COLLATIONS
pub const INFORMATION_SCHEMA_COLLATIONS_TABLE_ID: u32 = 10;
/// id for information_schema.COLLATIONS
pub const INFORMATION_SCHEMA_COLLATION_CHARACTER_SET_APPLICABILITY_TABLE_ID: u32 = 11;
/// id for information_schema.CHECK_CONSTRAINTS
pub const INFORMATION_SCHEMA_CHECK_CONSTRAINTS_TABLE_ID: u32 = 12;
/// id for information_schema.EVENTS
pub const INFORMATION_SCHEMA_EVENTS_TABLE_ID: u32 = 13;
/// id for information_schema.FILES
pub const INFORMATION_SCHEMA_FILES_TABLE_ID: u32 = 14;
/// id for information_schema.SCHEMATA
pub const INFORMATION_SCHEMA_SCHEMATA_TABLE_ID: u32 = 15;
/// id for information_schema.KEY_COLUMN_USAGE
pub const INFORMATION_SCHEMA_KEY_COLUMN_USAGE_TABLE_ID: u32 = 16;
/// id for information_schema.OPTIMIZER_TRACE
pub const INFORMATION_SCHEMA_OPTIMIZER_TRACE_TABLE_ID: u32 = 17;
/// id for information_schema.PARAMETERS
pub const INFORMATION_SCHEMA_PARAMETERS_TABLE_ID: u32 = 18;
/// id for information_schema.PROFILING
pub const INFORMATION_SCHEMA_PROFILING_TABLE_ID: u32 = 19;
/// id for information_schema.REFERENTIAL_CONSTRAINTS
pub const INFORMATION_SCHEMA_REFERENTIAL_CONSTRAINTS_TABLE_ID: u32 = 20;
/// id for information_schema.ROUTINES
pub const INFORMATION_SCHEMA_ROUTINES_TABLE_ID: u32 = 21;
/// id for information_schema.SCHEMA_PRIVILEGES
pub const INFORMATION_SCHEMA_SCHEMA_PRIVILEGES_TABLE_ID: u32 = 22;
/// id for information_schema.TABLE_PRIVILEGES
pub const INFORMATION_SCHEMA_TABLE_PRIVILEGES_TABLE_ID: u32 = 23;
/// id for information_schema.TRIGGERS
pub const INFORMATION_SCHEMA_TRIGGERS_TABLE_ID: u32 = 24;
/// id for information_schema.GLOBAL_STATUS
pub const INFORMATION_SCHEMA_GLOBAL_STATUS_TABLE_ID: u32 = 25;
/// id for information_schema.SESSION_STATUS
pub const INFORMATION_SCHEMA_SESSION_STATUS_TABLE_ID: u32 = 26;
/// id for information_schema.RUNTIME_METRICS
pub const INFORMATION_SCHEMA_RUNTIME_METRICS_TABLE_ID: u32 = 27;
/// id for information_schema.PARTITIONS
pub const INFORMATION_SCHEMA_PARTITIONS_TABLE_ID: u32 = 28;
/// id for information_schema.REGION_PEERS
pub const INFORMATION_SCHEMA_REGION_PEERS_TABLE_ID: u32 = 29;
/// ----- End of information_schema tables -----
pub const MITO_ENGINE: &str = "mito";

View File

@@ -17,6 +17,11 @@ use consts::DEFAULT_CATALOG_NAME;
pub mod consts;
pub mod error;
#[inline]
pub fn format_schema_name(catalog: &str, schema: &str) -> String {
format!("{catalog}.{schema}")
}
/// Formats table fully-qualified name
#[inline]
pub fn format_full_table_name(catalog: &str, schema: &str, table: &str) -> String {
@@ -51,11 +56,22 @@ pub fn build_db_string(catalog: &str, schema: &str) -> String {
/// - if `[<catalog>-]` is provided, we split database name with `-` and use
/// `<catalog>` and `<schema>`.
pub fn parse_catalog_and_schema_from_db_string(db: &str) -> (&str, &str) {
match parse_optional_catalog_and_schema_from_db_string(db) {
(Some(catalog), schema) => (catalog, schema),
(None, schema) => (DEFAULT_CATALOG_NAME, schema),
}
}
/// Attempt to parse catalog and schema from given database name
///
/// Similar to [`parse_catalog_and_schema_from_db_string`] but returns an optional
/// catalog if it's not provided in the database name.
pub fn parse_optional_catalog_and_schema_from_db_string(db: &str) -> (Option<&str>, &str) {
let parts = db.splitn(2, '-').collect::<Vec<&str>>();
if parts.len() == 2 {
(parts[0], parts[1])
(Some(parts[0]), parts[1])
} else {
(DEFAULT_CATALOG_NAME, db)
(None, db)
}
}
@@ -85,5 +101,20 @@ mod tests {
("catalog", "schema1-schema2"),
parse_catalog_and_schema_from_db_string("catalog-schema1-schema2")
);
assert_eq!(
(None, "fullschema"),
parse_optional_catalog_and_schema_from_db_string("fullschema")
);
assert_eq!(
(Some("catalog"), "schema"),
parse_optional_catalog_and_schema_from_db_string("catalog-schema")
);
assert_eq!(
(Some("catalog"), "schema1-schema2"),
parse_optional_catalog_and_schema_from_db_string("catalog-schema1-schema2")
);
}
}

View File

@@ -4,11 +4,12 @@ version.workspace = true
edition.workspace = true
license.workspace = true
[lints]
workspace = true
[dependencies]
common-base.workspace = true
humantime-serde.workspace = true
rskafka.workspace = true
num_cpus.workspace = true
serde.workspace = true
serde_json.workspace = true
serde_with = "3"
toml.workspace = true
sysinfo.workspace = true

Some files were not shown because too many files have changed in this diff Show More