Compare commits

..

88 Commits

Author SHA1 Message Date
Ruihang Xia
11bab0c47c feat: add sqlness test for bloom filter index (#5240)
* feat: add sqlness test for bloom filter index

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* drop table after finished

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* redact more variables

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-12-27 06:40:18 +00:00
shuiyisong
588f6755f0 fix: disable path label in opendal for now (#5247)
* fix: remove path label in opendal for now

* fix: typo

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

---------

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
2024-12-27 04:34:19 +00:00
Kould
dad8ac6f71 feat(vector): add vector functions vec_sub & vec_sum & vec_elem_sum (#5230)
* feat(vector): add sub function

* chore: added check for vector length misalignment

* feat(vector): add `vec_sum` & `vec_elem_sum`

* chore: codefmt

* update lock file

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
2024-12-26 15:07:13 +00:00
Yohan Wal
ef13c52814 feat: init PgElection with candidate registration (#5209)
* feat: init PgElection

fix: release advisory lock

fix: handle duplicate keys

chore: update comments

fix: unlock if acquired the lock

chore: add TODO and avoid unwrap

refactor: check both lock and expire time, add more comments

chore: fmt

fix: deal with multiple edge cases

feat: init PgElection with candidate registration

chore: fmt

chore: remove

* test: add unit test for pg candidate registration

* test: add unit test for pg candidate registration

* chore: update pg env

* chore: make ci happy

* fix: spawn a background connection thread

* chore: typo

* fix: shadow the election client for now

* fix: fix ci

* chore: readability

* chore: follow review comments

* refactor: use kvbackend for pg election

* chore: rename

* chore: make clippy happy

* refactor: use pg server time instead of local ones

* chore: typo

* chore: rename infancy to leader_infancy for clarification

* chore: clean up

* chore: follow review comments

* chore: follow review comments

* ci: unit test should test all features

* ci: fix

* ci: just test pg
2024-12-26 12:39:32 +00:00
Zhenchi
7471f55c2e feat(mito): add bloom filter read metrics (#5239)
Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-12-26 04:44:03 +00:00
Zhenchi
f4b2d393be feat(config): add bloom filter config (#5237)
* feat(bloom-filter): integrate indexer with mito2

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* feat(config) add bloom filter config

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix docs

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* address comments

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix docs

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* merge

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* remove cache config

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-12-26 04:38:45 +00:00
localhost
0cf44e1e47 chore: add more info for pipeline dryrun API (#5232) 2024-12-26 03:06:25 +00:00
Ruihang Xia
00ad27dd2e feat(bloom-filter): bloom filter applier (#5220)
* wip

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* draft search logic

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* use defined BloomFilterReader

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* round the range end

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* finish index applier

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* integrate applier into mito2 with cache layer

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix cache key and add unit test

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* provide bloom filter index size hint

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* revert BloomFilterReaderImpl::read_vec

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove dead code

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* ignore null on eq

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add more tests and fix bloom filter logic

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-12-26 02:51:18 +00:00
discord9
5ba8bd09fb fix: flow compare null values (#5234)
* fix: flow compare null values

* fix: fix again ck ty before cmp

* chore: rm comment

* fix: handle null

* chore: typo

* docs: update comment

* refactor: per review

* tests: more sqlness

* tests: sqlness not show create table
2024-12-25 15:31:27 +00:00
Zhenchi
a9f21915ef feat(bloom-filter): integrate indexer with mito2 (#5236)
* feat(bloom-filter): integrate indexer with mito2

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* rename skippingindextype

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* address comments

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-12-25 14:30:07 +00:00
Lin Yihai
039989f77b feat: Add vec_mul function. (#5205) 2024-12-25 14:17:22 +00:00
discord9
abf34b845c feat(flow): check sink table mismatch on flow creation (#5112)
* tests: more mismatch errors

* feat: check sink table schema if exists&prompt nice err msg

* chore: rm unused variant

* chore: fmt

* chore: cargo clippy

* feat: check schema on create

* feat: better err msg when mismatch

* tests: fix a schema mismatch

* todo: create sink table

* feat: create sink table

* fix: find time index

* tests: auto created sink table

* fix: remove empty keys

* refactor: per review

* chore: fmt

* test: sqlness

* chore: after rebase
2024-12-25 13:42:37 +00:00
Ruihang Xia
4051be4214 feat: add some critical metrics to flownode (#5235)
* feat: add some critical metrics to flownode

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-12-25 10:57:21 +00:00
zyy17
5e88c80394 feat: introduce the Limiter in frontend to limit the requests by in-flight write bytes size. (#5231)
feat: introduct Limiter to limit in-flight write bytes size in frontend
2024-12-25 09:11:30 +00:00
discord9
6a46f391cc ci: upload .pdb files too for better windows debug (#5224)
ci: upload .pdb files too
2024-12-25 08:10:57 +00:00
Zhenchi
c96903e60c feat(bloom-filter): impl batch push to creator (#5225)
Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-12-25 07:53:53 +00:00
Ruihang Xia
a23f269bb1 fix: correct write cache's metric labels (#5227)
* refactor: remove unused field in WriteCache

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* refactor: unify read and write cache path

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update config and fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove unnecessary methods and adapt test

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* change the default path

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove remote-home

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-12-25 07:26:21 +00:00
Weny Xu
f33b378e45 chore: add log for converting region to follower (#5222)
* chore: add log for converting region to follower

* chore: apply suggestions from CR
2024-12-25 02:38:47 +00:00
zyy17
267941bbb5 ci: support to pack multiple files in upload-artifacts action (#5228) 2024-12-25 02:37:32 +00:00
Lei, HUANG
074846bbc2 feat(mito): parquet memtable reader (#4967)
* wip: row group reader base

* wip: memtable row group reader

* Refactor MemtableRowGroupReader to streamline data fetching

 - Added early return when fetch_ranges is empty to optimize performance.
 - Replaced inline chunk data assignment with a call to `assign_dense_chunk` for cleaner code.

* wip: row group reader

* wip: reuse RowGroupReader

* wip: bulk part reader

* Enhance BulkPart Iteration with Filtering

 - Introduced `RangeBase` to `BulkIterContext` for improved filter handling.
 - Implemented filter application in `BulkPartIter` to prune batches based on predicates.
 - Updated `SimpleFilterContext::new_opt` to be public for broader access.

* chore: add prune test

* fix: clippy

* fix: introduce prune reader for memtable and add more prune test

* Enhance BulkPart read method to return Option<BoxedBatchIterator>

 - Modified `BulkPart::read` to return `Option<BoxedBatchIterator>` to handle cases where no row groups are selected.
 - Added logic to return `None` when all row groups are filtered out.
 - Updated tests to handle the new return type and added a test case to verify behavior when no row groups match the pr

* refactor/separate-paraquet-reader: Add helper function to parse parquet metadata and integrate it into BulkPartEncoder

* refactor/separate-paraquet-reader:
 Change BulkPartEncoder row_group_size from Option to usize and update tests

* refactor/separate-paraquet-reader: Add context module for bulk memtable iteration and refactor part reading

 • Introduce context module to encapsulate context for bulk memtable iteration.
 • Refactor BulkPart to use BulkIterContextRef for reading operations.
 • Remove redundant code in BulkPart by centralizing context creation and row group pruning logic in the new context module.
 • Create new file context.rs with structures and logic for handling iteration context.
 • Adjust part_reader.rs and row_group_reader.rs to reference the new BulkIterContextRef.

* refactor/separate-paraquet-reader: Refactor RowGroupReader traits and implementations in memtable and parquet reader modules

 • Rename RowGroupReaderVirtual to RowGroupReaderContext for clarity.
 • Replace BulkPartVirt with direct usage of BulkIterContextRef in MemtableRowGroupReader.
 • Simplify MemtableRowGroupReaderBuilder by directly passing context instead of creating a BulkPartVirt instance.
 • Update RowGroupReaderBase to use context field instead of virt, reflecting the trait renaming and usage.
 • Modify FileRangeVirt to FileRangeContextRef and adjust implementations accordingly.

* refactor/separate-paraquet-reader: Refactor column page reader creation and remove unused code

 • Centralize creation of SerializedPageReader in RowGroupBase::column_reader method.
 • Remove unused RowGroupCachedReader and related code from MemtableRowGroupPageFetcher.
 • Eliminate redundant error handling for invalid column index in multiple places.

* chore: rebase main and resolve conflicts

* fix: some comments

* chore: resolve conflicts

* chore: resolve conflicts
2024-12-24 09:59:26 +00:00
Ruihang Xia
88d46a38ae chore: bump opendal to fork version to fix prometheus layer (#5223)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-12-24 08:54:59 +00:00
Weny Xu
de0beabf34 refactor: remove unnecessary wrap (#5221)
* chore: remove unnecessary arc

* chore: remove unnecessary box
2024-12-24 08:43:14 +00:00
Ruihang Xia
68dd2916fb feat: logs query endpoint (#5202)
* define endpoint

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* planner

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update lock file

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add unit test

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix toml format

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* revert metric change

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* Update src/query/src/log_query/planner.rs

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* fix compile

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* refactor and tests

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2024-12-24 06:21:19 +00:00
Zhenchi
d51b65a8bf feat(index-cache): abstract IndexCache to be shared by multi types of indexes (#5219)
* feat(index-cache): abstract `IndexCache` to be shared by multi types of indexes

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix typo

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: remove added label

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* refactor: simplify cached reader impl

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* rename func

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-12-24 05:10:30 +00:00
zyy17
2082c4b6e4 docs: add greptimedb-operator project link in 'Tools & Extensions' and other small improvements (#5216) 2024-12-24 03:09:41 +00:00
Ning Sun
c623404fff ci: fix nightly ci task on nix build (#5198) 2024-12-21 10:09:32 +00:00
Yingwen
fa3b7ed5ea build: use 8xlarge as arm default (#5214) 2024-12-21 08:39:24 +00:00
Yiran
8ece853076 fix: dead links (#5212) 2024-12-20 12:01:57 +00:00
Zhenchi
4245bff8f2 feat(bloom-filter): add bloom filter reader (#5204)
* feat(bloom-filter): add bloom filter reader

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* chore: remove unused dep

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix conflict

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* address comments

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-12-20 08:29:18 +00:00
Zhenchi
3d4121aefb feat(bloom-filter): add memory control for creator (#5185)
* feat(bloom-filter): add memory control for creator

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* refactor: remove meaningless buf

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* feat: add codec for intermediate

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-12-20 06:59:44 +00:00
Weny Xu
1910d71cb3 chore: adjust fuzz tests cfg (#5207) 2024-12-20 06:58:51 +00:00
LFC
a578eea801 ci: install latest protobuf in dev-builder image (#5196) 2024-12-20 02:45:53 +00:00
discord9
6bf574f098 fix: auto created table ttl check (#5203)
* fix: auto created table ttl check

* tests: with hint
2024-12-19 11:23:01 +00:00
discord9
a4d61bcaf1 fix(flow): batch builder with type (#5195)
* fix: typed builder

* chore: clippy

* chore: rename

* fix: unit tests

* refactor: per review
2024-12-19 09:16:56 +00:00
dennis zhuang
7ea8a44d3a chore: update PR template (#5199) 2024-12-19 08:28:20 +00:00
discord9
2d6f63a504 feat: show flow's mem usage in INFORMATION_SCHEMA.FLOWS (#4890)
* feat: add flow mem size to sys table

* chore: rm dup def

* chore: remove unused variant

* chore: minor refactor

* refactor: per review
2024-12-19 08:24:04 +00:00
Ruihang Xia
422d18da8b feat: bump opendal and switch prometheus layer to the upstream impl (#5179)
* feat: bump opendal and switch prometheus layer to the upstream impl

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove unused files

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix tests

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove unused things

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove root dir on recovering cache

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* filter out non-files entry in test

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-12-19 03:42:05 +00:00
Weny Xu
66f0581f5b fix: ensure table route metadata is eventually rolled back on failure (#5174)
* fix: ensure table route metadata is eventually rolled back on procedure failure

* fix(fuzz): enhance procedure condition checking

* chore: add logs

* feat: close downgraded leader region actively

* chore: apply suggestions from CR
2024-12-19 03:29:34 +00:00
Ning Sun
c9ad8c7101 feat: show create postgresql foreign table (#5143)
* feat: add show create table for pg in parser

* feat: implement show create table operation

* fix: adopt upstream changes
2024-12-18 15:15:55 +00:00
Ning Sun
2107737db1 chore: make nix compilation environment config more robust (#5183)
* chore: improve nix-shell support

* fix: add pkg-config

* ci: add a github action to ensure build on clean system

* ci: optimise dependencies of task

* ci: move clean build to nightly
2024-12-18 12:41:24 +00:00
Ruihang Xia
548e1988ab refactor: remove unused symbols (#5193)
chore: remove unused symbols

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-12-18 11:24:43 +00:00
Yingwen
218236cc5b docs: fix grafana dashboard row (#5192) 2024-12-18 09:10:56 +00:00
Ruihang Xia
f04d380259 fix: validate matcher op for __name__ in promql (#5191)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-12-18 08:51:46 +00:00
Lei, HUANG
fa773cf480 fix(sqlness): enforce order in union tests (#5190)
Add ORDER BY clause to subquery union tests

 Updated the SQL and result files for subquery union tests to include an ORDER BY clause, ensuring consistent result ordering. This change aligns with the test case from the DuckDB repository.
2024-12-18 08:24:15 +00:00
jeremyhi
9b4e8555e2 feat: extract hints from http header (#5128)
* feat: extract hints from http header

* Update src/servers/src/http/hints.rs

Co-authored-by: shuiyisong <113876041+shuiyisong@users.noreply.github.com>

* chore: by comment

* refactor: get instead of loop

---------

Co-authored-by: shuiyisong <113876041+shuiyisong@users.noreply.github.com>
2024-12-18 08:17:34 +00:00
Yingwen
c6b7caa2ec feat: do not remove time filters in ScanRegion (#5180)
* feat: do not remove time filters

* chore: remove `time_range` from parquet reader

* chore: print more message in the check script

* chore: fix unused error
2024-12-18 06:39:49 +00:00
Yingwen
58d6982c93 feat: do not keep MemtableRefs in ScanInput (#5184) 2024-12-18 06:37:22 +00:00
dennis zhuang
e662c241e6 feat: impl label_join and label_replace for promql (#5153)
* feat: impl label_join and label_replace for promql

* chore: style

* fix: dst_label is eqauls to src_label

* fix: forgot to sort the results

* fix: processing empty source label
2024-12-18 06:35:45 +00:00
Lanqing Yang
266919c226 fix: display inverted and fulltext index in show index (#5169) 2024-12-18 06:10:59 +00:00
Yohan Wal
7d1bcc9d49 feat: introduce Buffer for non-continuous bytes (#5164)
* feat: introduce Buffer for non-continuous bytes

* Update src/mito2/src/cache/index.rs

Co-authored-by: Weny Xu <wenymedia@gmail.com>

* chore: apply review comments

* refactor: use opendal::Buffer

---------

Co-authored-by: Weny Xu <wenymedia@gmail.com>
2024-12-18 03:45:38 +00:00
LFC
18e8c45384 refactor: produce BatchBuilder from a Batch to modify it again (#5186)
chore: pub some mods
2024-12-18 02:42:33 +00:00
Lei, HUANG
c33cf59398 perf: avoid holding memtable during compaction (#5157)
* perf/avoid-holding-memtable-during-compaction: Refactor Compaction Version Handling

 • Introduced CompactionVersion struct to encapsulate region version details for compaction, removing dependency on VersionRef.
 • Updated CompactionRequest and CompactionRegion to use CompactionVersion.
 • Modified open_compaction_region to construct CompactionVersion without memtables.
 • Adjusted WindowedCompactionPicker to work with CompactionVersion.
 • Enhanced flush logic in WriteBufferManager to improve memory usage checks and logging.

* reformat code

* chore: change log level

* reformat code

---------

Co-authored-by: Yingwen <realevenyag@gmail.com>
2024-12-17 07:06:07 +00:00
discord9
421088a868 test: sqlness upgrade compatibility tests (#5126)
* feat: simple version switch

* chore: remove debug print

* chore: add common folder

* tests: add drop table

* feat: pull versioned binary

* chore: don't use native-tls

* chore: rm outdated docs

* chore: new line

* fix: save old bin dir

* fix: switch version restart all node

* feat: use etcd

* fix: wait for election

* fix: normal sqlness

* refactor: hashmap for bin dir

* test: past 3 major version compat crate table

* refactor: allow using without setup etcd
2024-12-17 07:00:02 +00:00
Zhenchi
d821dc5a3e feat(bloom-filter): add basic bloom filter creator (Part 1) (#5177)
* feat(bloom-filter): add a simple bloom filter creator (Part 1)

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: clippy

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: header

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* docs: add format comment

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-12-17 06:55:42 +00:00
Yingwen
bfc777e6ac fix: deletion between two put may not work in last_non_null mode (#5168)
* fix: deletion between rows with the same key may not work

* test: add sqlness test case

* chore: comments
2024-12-17 04:01:32 +00:00
Yingwen
8a5384697b chore: add aquamarine to dep lists (#5181) 2024-12-17 01:45:50 +00:00
Weny Xu
d0245473a9 fix: correct set_region_role_state_gracefully behaviors (#5171)
* fix: reduce default max rows for fuzz testing

* chore: remove Postgres setup from fuzz test workflow

* chore(fuzz): increase resource limits for GreptimeDB cluster

* chore(fuzz): increase resource limits for kafka

* fix: correct `set_region_role_state_gracefully` behaviors

* chore: remove Postgres setup from fuzz test workflow

* chore(fuzz): redue resource limits for GreptimeDB & kafka
2024-12-16 14:01:40 +00:00
discord9
043d0bd7c2 test: flow rebuild (#5162)
* tests: rebuild flow

* tests: more rebuild

* tests: restart

* chore: drop clean
2024-12-16 12:25:23 +00:00
Ning Sun
acedff030b chore: add nix-shell configure for a minimal environment for development (#5175)
* chore: add nix-shell development environment

* chore: add rust-analyzer

* chore: use .envrc as a private file
2024-12-16 11:47:18 +00:00
ZonaHe
88f7075a2a feat: update dashboard to v0.7.3 (#5172)
Co-authored-by: sunchanglong <sunchanglong@users.noreply.github.com>
2024-12-16 10:56:41 +00:00
Ruihang Xia
54698325b6 feat: introduce SKIPPING index (part 1) (#5155)
* skip index parser

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* wip: sqlness

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* impl show create part

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add empty line

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* change keyword to SKIPPING INDEX

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* rename local variables

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-12-16 09:21:00 +00:00
Lei, HUANG
5ffda7e971 chore: gauge for flush compaction (#5156)
* add metrics

* chore/bench-metrics: Add INFLIGHT_FLUSH_COUNT Metric to Flush Process

 • Introduced INFLIGHT_FLUSH_COUNT metric to track the number of ongoing flush operations.
 • Incremented INFLIGHT_FLUSH_COUNT in FlushScheduler to monitor active flushes.
 • Removed redundant increment of INFLIGHT_FLUSH_COUNT in RegionWorkerLoop to prevent double counting.

* chore/bench-metrics: Add Metrics for Compaction and Flush Operations

 • Introduced INFLIGHT_COMPACTION_COUNT and INFLIGHT_FLUSH_COUNT metrics to track the number of ongoing compaction and flush operations.
 • Incremented INFLIGHT_COMPACTION_COUNT when scheduling remote and local compaction jobs, and decremented it upon completion.
 • Added INFLIGHT_FLUSH_COUNT increment and decrement logic around flush tasks to monitor active flush operations.
 • Removed redundant metric updates in worker.rs and handle_compaction.rs to streamline metric handling.

* chore: add metrics for remote compaction jobs

* chore: format

* chore: also add dashbaord
2024-12-16 07:08:07 +00:00
Lin Yihai
f82af15eba feat: Add vector_scalar_mul function. (#5166) 2024-12-16 06:46:38 +00:00
shuiyisong
9d7fea902e chore: remove unused dep (#5163)
* chore: remove unused dep

* chore: remove more unused dep
2024-12-16 06:17:27 +00:00
Niwaka
358d5e1d63 fix: support alter table ~ add ~ custom_type (#5165) 2024-12-15 09:05:29 +00:00
Yingwen
579059d99f ci: use 4xlarge for nightly build (#5158) 2024-12-13 12:53:11 +00:00
localhost
53d55c0b6b fix: loki write row len error (#5161) 2024-12-13 10:10:59 +00:00
Yingwen
bef6896280 docs: Add index panels to standalone grafana dashboard (#5140)
* docs: Add index panels to standalnoe grafana dashboard

* docs: fix flush/compaction op
2024-12-13 08:17:49 +00:00
Yohan Wal
4b4c6dbb66 refactor: cache inverted index with fixed-size page (#5114)
* feat: cache inverted index by page instead of file

* fix: add unit test and fix bugs

* chore: typo

* chore: ci

* fix: math

* chore: apply review comments

* chore: renames

* test: add unit test for index key calculation

* refactor: use ReadableSize

* feat: add config for inverted index page size

* chore: update config file

* refactor: handle multiple range read and fix some related bugs

* fix: add config

* test: turn to a fs reader to match behaviors of object store
2024-12-13 07:34:24 +00:00
localhost
e8e9526738 chore: pipeline dryrun api can currently receives pipeline raw content (#5142)
* chore: pipeline dryrun api can currently receives pipeline raw content

* chore: remove dryrun v1 and add test

* chore: change dryrun pipeline api body schema

* chore: remove useless struct PipelineInfo

* chore: update PipelineDryrunParams doc

* chore: increase code readability

* chore: add some comment for pipeline dryrun test

* Apply suggestions from code review

Co-authored-by: shuiyisong <113876041+shuiyisong@users.noreply.github.com>

* chore: format code

---------

Co-authored-by: shuiyisong <113876041+shuiyisong@users.noreply.github.com>
2024-12-12 11:47:21 +00:00
Yingwen
fee75a1fad feat: collect reader metrics from prune reader (#5152) 2024-12-12 11:27:22 +00:00
localhost
b8a78b7838 chore: decide tag column in log api follow table schema if table exists (#5138)
* chore: decide tag column in log api follow table schema if table exists

* chore: add more test for greptime_identity pipeline

* chore: change pipeline get_table function signature

* chore: change identity_pipeline_inner tag_column_names type
2024-12-12 09:01:21 +00:00
Weny Xu
2137c53274 feat(index): add file_size_hint for remote blob reader (#5147)
feat(index): add file_size_hint for remote blob reader
2024-12-12 04:45:40 +00:00
Yohan Wal
03ad6e2a8d feat(fuzz): add alter table options for alter fuzzer (#5074)
* feat(fuzz): add set table options to alter fuzzer

* chore: clippy is happy, I'm sad

* chore: happy ci happy

* fix: unit test

* feat(fuzz): add unset table options to alter fuzzer

* fix: unit test

* feat(fuzz): add table option validator

* fix: make clippy happy

* chore: add comments

* chore: apply review comments

* fix: unit test

* feat(fuzz): add more ttl options

* fix: #5108

* chore: add comments

* chore: add comments
2024-12-12 04:21:38 +00:00
Weny Xu
d53fbcb936 feat: introduce PuffinMetadataCache (#5148)
* feat: introduce `PuffinMetadataCache`

* refactor: remove too_many_arguments

* chore: fmt toml
2024-12-12 04:09:36 +00:00
Weny Xu
8c1959c580 feat: add prefetch support to InvertedIndexFooterReader for reduced I/O time (#5146)
* feat: add prefetch support to `InvertedIndeFooterReader`

* chore: correct struct name

* chore: apply suggestions from CR
2024-12-12 03:49:54 +00:00
Weny Xu
e2a41ccaec feat: add prefetch support to PuffinFileFooterReader for reduced I/O time (#5145)
* feat: introduce `PuffinFileFooterReader`

* refactor: remove `SyncReader` trait and impl

* refactor: replace `FooterParser` with `PuffinFileFooterReader`

* chore: remove unused errors
2024-12-12 03:13:36 +00:00
Niwaka
a8012147ab feat: support push down IN filter (#5129)
* feat: support push down IN filter

* chore: move tests to prune.sql
2024-12-11 13:46:23 +00:00
Ruihang Xia
60f8dbf7f0 feat: implement v1/sql/parse endpoint to parse GreptimeDB's SQL dialect (#5144)
* derive ser/de

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* impl method

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix typo

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove deserialize

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-12-11 13:33:54 +00:00
ZonaHe
9da2e17d0e feat: update dashboard to v0.7.2 (#5141)
Co-authored-by: sunchanglong <sunchanglong@users.noreply.github.com>
2024-12-11 12:47:59 +00:00
Yohan Wal
1a8e77a480 test: part of parser test migrated from duckdb (#5125)
* test: update test

* fix: fix test
2024-12-11 09:28:13 +00:00
Zhenchi
e1e39993f7 feat(vector): add scalar add function (#5119)
* refactor: extract implicit conversion helper functions of vector

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* feat(vector): add scalar add function

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix fmt

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-12-11 09:25:56 +00:00
Lei, HUANG
a30d918df2 perf: avoid cache during compaction (#5135)
* Revert "refactor: Avoid wrapping Option for CacheManagerRef (#4996)"

This reverts commit 42bf7e9965.

* fix: memory usage during log ingestion

* fix: fmt
2024-12-11 08:24:41 +00:00
dennis zhuang
2c4ac76754 feat: adjust WAL purge default configurations (#5107)
* feat: adjust WAL purge default configurations

* fix: config

* feat: change raft engine file_size default to 128Mib
2024-12-11 08:08:05 +00:00
jeremyhi
a6893aad42 chore: set store_key_prefix for all kvbackend (#5132) 2024-12-11 08:04:02 +00:00
discord9
d91517688a chore: fix aws_lc not in depend tree check in CI (#5121)
* chore: fix aws_lc check in CI

* chore: update lock file
2024-12-11 07:02:03 +00:00
shuiyisong
3d1b8c4fac chore: add /ready api for health checking (#5124)
* chore: add ready endpoint for health checking

* chore: add test
2024-12-11 02:56:48 +00:00
Yingwen
7c69ca0502 chore: bump main branch version to 0.12 (#5133)
chore: bump version to v0.12.0
2024-12-10 13:10:37 +00:00
160 changed files with 1149 additions and 4991 deletions

View File

@@ -41,8 +41,8 @@ runs:
image-name: ${{ inputs.image-name }}
image-tag: ${{ inputs.version }}
docker-file: docker/ci/ubuntu/Dockerfile
amd64-artifact-name: greptime-linux-amd64-${{ inputs.version }}
arm64-artifact-name: greptime-linux-arm64-${{ inputs.version }}
amd64-artifact-name: greptime-linux-amd64-pyo3-${{ inputs.version }}
arm64-artifact-name: greptime-linux-arm64-pyo3-${{ inputs.version }}
platforms: linux/amd64,linux/arm64
push-latest-tag: ${{ inputs.push-latest-tag }}

View File

@@ -48,7 +48,19 @@ runs:
path: /tmp/greptime-*.log
retention-days: 3
- name: Build greptime
- name: Build standard greptime
uses: ./.github/actions/build-greptime-binary
with:
base-image: ubuntu
features: pyo3_backend,servers/dashboard
cargo-profile: ${{ inputs.cargo-profile }}
artifacts-dir: greptime-linux-${{ inputs.arch }}-pyo3-${{ inputs.version }}
version: ${{ inputs.version }}
working-dir: ${{ inputs.working-dir }}
image-registry: ${{ inputs.image-registry }}
image-namespace: ${{ inputs.image-namespace }}
- name: Build greptime without pyo3
if: ${{ inputs.dev-mode == 'false' }}
uses: ./.github/actions/build-greptime-binary
with:

View File

@@ -33,6 +33,15 @@ runs:
- name: Rust Cache
uses: Swatinem/rust-cache@v2
- name: Install Python
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Install PyArrow Package
shell: pwsh
run: pip install pyarrow numpy
- name: Install WSL distribution
uses: Vampire/setup-wsl@v2
with:

View File

@@ -1,6 +1,9 @@
name: Check Dependencies
on:
push:
branches:
- main
pull_request:
branches:
- main

View File

@@ -29,7 +29,7 @@ on:
linux_arm64_runner:
type: choice
description: The runner uses to build linux-arm64 artifacts
default: ec2-c6g.8xlarge-arm64
default: ec2-c6g.4xlarge-arm64
options:
- ec2-c6g.xlarge-arm64 # 4C8G
- ec2-c6g.2xlarge-arm64 # 8C16G

View File

@@ -10,6 +10,17 @@ on:
- 'docker/**'
- '.gitignore'
- 'grafana/**'
push:
branches:
- main
paths-ignore:
- 'docs/**'
- 'config/**'
- '**.md'
- '.dockerignore'
- 'docker/**'
- '.gitignore'
- 'grafana/**'
workflow_dispatch:
name: CI
@@ -43,7 +54,7 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ ubuntu-20.04 ]
os: [ windows-2022, ubuntu-20.04 ]
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
@@ -57,8 +68,6 @@ jobs:
# Shares across multiple jobs
# Shares with `Clippy` job
shared-key: "check-lint"
cache-all-crates: "true"
save-if: ${{ github.ref == 'refs/heads/main' }}
- name: Run cargo check
run: cargo check --locked --workspace --all-targets
@@ -69,8 +78,13 @@ jobs:
steps:
- uses: actions/checkout@v4
- uses: actions-rust-lang/setup-rust-toolchain@v1
- name: Rust Cache
uses: Swatinem/rust-cache@v2
with:
# Shares across multiple jobs
shared-key: "check-toml"
- name: Install taplo
run: cargo +stable install taplo-cli --version ^0.9 --locked --force
run: cargo +stable install taplo-cli --version ^0.9 --locked
- name: Run taplo
run: taplo format --check
@@ -91,15 +105,13 @@ jobs:
with:
# Shares across multiple jobs
shared-key: "build-binaries"
cache-all-crates: "true"
save-if: ${{ github.ref == 'refs/heads/main' }}
- name: Install cargo-gc-bin
shell: bash
run: cargo install cargo-gc-bin --force
run: cargo install cargo-gc-bin
- name: Build greptime binaries
shell: bash
# `cargo gc` will invoke `cargo build` with specified args
run: cargo gc -- --bin greptime --bin sqlness-runner --features pg_kvbackend
run: cargo gc -- --bin greptime --bin sqlness-runner
- name: Pack greptime binaries
shell: bash
run: |
@@ -141,12 +153,17 @@ jobs:
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: actions-rust-lang/setup-rust-toolchain@v1
- name: Rust Cache
uses: Swatinem/rust-cache@v2
with:
# Shares across multiple jobs
shared-key: "fuzz-test-targets"
- name: Set Rust Fuzz
shell: bash
run: |
sudo apt-get install -y libfuzzer-14-dev
rustup install nightly
cargo +nightly install cargo-fuzz cargo-gc-bin --force
cargo +nightly install cargo-fuzz cargo-gc-bin
- name: Download pre-built binaries
uses: actions/download-artifact@v4
with:
@@ -194,11 +211,16 @@ jobs:
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: actions-rust-lang/setup-rust-toolchain@v1
- name: Rust Cache
uses: Swatinem/rust-cache@v2
with:
# Shares across multiple jobs
shared-key: "fuzz-test-targets"
- name: Set Rust Fuzz
shell: bash
run: |
sudo apt update && sudo apt install -y libfuzzer-14-dev
cargo install cargo-fuzz cargo-gc-bin --force
cargo install cargo-fuzz cargo-gc-bin
- name: Download pre-built binariy
uses: actions/download-artifact@v4
with:
@@ -244,15 +266,13 @@ jobs:
with:
# Shares across multiple jobs
shared-key: "build-greptime-ci"
cache-all-crates: "true"
save-if: ${{ github.ref == 'refs/heads/main' }}
- name: Install cargo-gc-bin
shell: bash
run: cargo install cargo-gc-bin --force
run: cargo install cargo-gc-bin
- name: Build greptime bianry
shell: bash
# `cargo gc` will invoke `cargo build` with specified args
run: cargo gc --profile ci -- --bin greptime --features pg_kvbackend
run: cargo gc --profile ci -- --bin greptime
- name: Pack greptime binary
shell: bash
run: |
@@ -308,12 +328,17 @@ jobs:
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: actions-rust-lang/setup-rust-toolchain@v1
- name: Rust Cache
uses: Swatinem/rust-cache@v2
with:
# Shares across multiple jobs
shared-key: "fuzz-test-targets"
- name: Set Rust Fuzz
shell: bash
run: |
sudo apt-get install -y libfuzzer-14-dev
rustup install nightly
cargo +nightly install cargo-fuzz cargo-gc-bin --force
cargo +nightly install cargo-fuzz cargo-gc-bin
# Downloads ci image
- name: Download pre-built binariy
uses: actions/download-artifact@v4
@@ -452,12 +477,17 @@ jobs:
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: actions-rust-lang/setup-rust-toolchain@v1
- name: Rust Cache
uses: Swatinem/rust-cache@v2
with:
# Shares across multiple jobs
shared-key: "fuzz-test-targets"
- name: Set Rust Fuzz
shell: bash
run: |
sudo apt-get install -y libfuzzer-14-dev
rustup install nightly
cargo +nightly install cargo-fuzz cargo-gc-bin --force
cargo +nightly install cargo-fuzz cargo-gc-bin
# Downloads ci image
- name: Download pre-built binariy
uses: actions/download-artifact@v4
@@ -559,8 +589,8 @@ jobs:
- uses: actions/checkout@v4
- if: matrix.mode.kafka
name: Setup kafka server
working-directory: tests-integration/fixtures
run: docker compose up -d --wait kafka
working-directory: tests-integration/fixtures/kafka
run: docker compose -f docker-compose-standalone.yml up -d --wait
- name: Download pre-built binaries
uses: actions/download-artifact@v4
with:
@@ -590,6 +620,11 @@ jobs:
- uses: actions-rust-lang/setup-rust-toolchain@v1
with:
components: rustfmt
- name: Rust Cache
uses: Swatinem/rust-cache@v2
with:
# Shares across multiple jobs
shared-key: "check-rust-fmt"
- name: Check format
run: make fmt-check
@@ -611,69 +646,11 @@ jobs:
# Shares across multiple jobs
# Shares with `Check` job
shared-key: "check-lint"
cache-all-crates: "true"
save-if: ${{ github.ref == 'refs/heads/main' }}
- name: Run cargo clippy
run: make clippy
conflict-check:
name: Check for conflict
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Merge Conflict Finder
uses: olivernybroe/action-conflict-finder@v4.0
test:
if: github.event_name != 'merge_group'
runs-on: ubuntu-24.04-arm
timeout-minutes: 60
needs: [conflict-check, clippy, fmt]
steps:
- uses: actions/checkout@v4
- uses: arduino/setup-protoc@v3
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: rui314/setup-mold@v1
- name: Install toolchain
uses: actions-rust-lang/setup-rust-toolchain@v1
with:
cache: false
- name: Rust Cache
uses: Swatinem/rust-cache@v2
with:
# Shares cross multiple jobs
shared-key: "coverage-test"
cache-all-crates: "true"
save-if: ${{ github.ref == 'refs/heads/main' }}
- name: Install latest nextest release
uses: taiki-e/install-action@nextest
- name: Setup external services
working-directory: tests-integration/fixtures
run: docker compose up -d --wait
- name: Run nextest cases
run: cargo nextest run --workspace -F dashboard -F pg_kvbackend
env:
CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=mold"
RUST_BACKTRACE: 1
CARGO_INCREMENTAL: 0
GT_S3_BUCKET: ${{ vars.AWS_CI_TEST_BUCKET }}
GT_S3_ACCESS_KEY_ID: ${{ secrets.AWS_CI_TEST_ACCESS_KEY_ID }}
GT_S3_ACCESS_KEY: ${{ secrets.AWS_CI_TEST_SECRET_ACCESS_KEY }}
GT_S3_REGION: ${{ vars.AWS_CI_TEST_BUCKET_REGION }}
GT_MINIO_BUCKET: greptime
GT_MINIO_ACCESS_KEY_ID: superpower_ci_user
GT_MINIO_ACCESS_KEY: superpower_password
GT_MINIO_REGION: us-west-2
GT_MINIO_ENDPOINT_URL: http://127.0.0.1:9000
GT_ETCD_ENDPOINTS: http://127.0.0.1:2379
GT_POSTGRES_ENDPOINTS: postgres://greptimedb:admin@127.0.0.1:5432/postgres
GT_KAFKA_ENDPOINTS: 127.0.0.1:9092
GT_KAFKA_SASL_ENDPOINTS: 127.0.0.1:9093
UNITTEST_LOG_DIR: "__unittest_logs"
coverage:
if: github.event_name == 'merge_group'
if: github.event.pull_request.draft == false
runs-on: ubuntu-20.04-8-cores
timeout-minutes: 60
steps:
@@ -681,29 +658,48 @@ jobs:
- uses: arduino/setup-protoc@v3
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: rui314/setup-mold@v1
- uses: KyleMayes/install-llvm-action@v1
with:
version: "14.0"
- name: Install toolchain
uses: actions-rust-lang/setup-rust-toolchain@v1
with:
components: llvm-tools
cache: false
components: llvm-tools-preview
- name: Rust Cache
uses: Swatinem/rust-cache@v2
with:
# Shares cross multiple jobs
shared-key: "coverage-test"
save-if: ${{ github.ref == 'refs/heads/main' }}
- name: Docker Cache
uses: ScribeMD/docker-cache@0.3.7
with:
key: docker-${{ runner.os }}-coverage
- name: Install latest nextest release
uses: taiki-e/install-action@nextest
- name: Install cargo-llvm-cov
uses: taiki-e/install-action@cargo-llvm-cov
- name: Setup external services
working-directory: tests-integration/fixtures
run: docker compose up -d --wait
- name: Install Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Install PyArrow Package
run: pip install pyarrow numpy
- name: Setup etcd server
working-directory: tests-integration/fixtures/etcd
run: docker compose -f docker-compose-standalone.yml up -d --wait
- name: Setup kafka server
working-directory: tests-integration/fixtures/kafka
run: docker compose -f docker-compose-standalone.yml up -d --wait
- name: Setup minio
working-directory: tests-integration/fixtures/minio
run: docker compose -f docker-compose-standalone.yml up -d --wait
- name: Setup postgres server
working-directory: tests-integration/fixtures/postgres
run: docker compose -f docker-compose-standalone.yml up -d --wait
- name: Run nextest cases
run: cargo llvm-cov nextest --workspace --lcov --output-path lcov.info -F dashboard -F pg_kvbackend
run: cargo llvm-cov nextest --workspace --lcov --output-path lcov.info -F pyo3_backend -F dashboard -F pg_kvbackend
env:
CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=mold"
CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=lld"
RUST_BACKTRACE: 1
CARGO_INCREMENTAL: 0
GT_S3_BUCKET: ${{ vars.AWS_CI_TEST_BUCKET }}

View File

@@ -27,7 +27,7 @@ on:
linux_arm64_runner:
type: choice
description: The runner uses to build linux-arm64 artifacts
default: ec2-c6g.8xlarge-arm64
default: ec2-c6g.4xlarge-arm64
options:
- ec2-c6g.xlarge-arm64 # 4C8G
- ec2-c6g.2xlarge-arm64 # 8C16G

View File

@@ -1,6 +1,6 @@
on:
schedule:
- cron: "0 23 * * 1-4"
- cron: "0 23 * * 1-5"
workflow_dispatch:
name: Nightly CI
@@ -91,12 +91,18 @@ jobs:
uses: Swatinem/rust-cache@v2
- name: Install Cargo Nextest
uses: taiki-e/install-action@nextest
- name: Install Python
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Install PyArrow Package
run: pip install pyarrow numpy
- name: Install WSL distribution
uses: Vampire/setup-wsl@v2
with:
distribution: Ubuntu-22.04
- name: Running tests
run: cargo nextest run -F dashboard
run: cargo nextest run -F pyo3_backend,dashboard
env:
CARGO_BUILD_RUSTFLAGS: "-C linker=lld-link"
RUST_BACKTRACE: 1
@@ -108,53 +114,7 @@ jobs:
GT_S3_REGION: ${{ vars.AWS_CI_TEST_BUCKET_REGION }}
UNITTEST_LOG_DIR: "__unittest_logs"
## this is designed for generating cache that usable for pull requests
test-on-linux:
name: Run tests on Linux
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
runs-on: ubuntu-20.04-8-cores
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
- uses: arduino/setup-protoc@v3
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: rui314/setup-mold@v1
- name: Install Rust toolchain
uses: actions-rust-lang/setup-rust-toolchain@v1
- name: Rust Cache
uses: Swatinem/rust-cache@v2
with:
# Shares cross multiple jobs
shared-key: "coverage-test"
- name: Install Cargo Nextest
uses: taiki-e/install-action@nextest
- name: Setup external services
working-directory: tests-integration/fixtures
run: docker compose up -d --wait
- name: Running tests
run: cargo nextest run -F dashboard -F pg_kvbackend
env:
CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=mold"
RUST_BACKTRACE: 1
CARGO_INCREMENTAL: 0
GT_S3_BUCKET: ${{ vars.AWS_CI_TEST_BUCKET }}
GT_S3_ACCESS_KEY_ID: ${{ secrets.AWS_CI_TEST_ACCESS_KEY_ID }}
GT_S3_ACCESS_KEY: ${{ secrets.AWS_CI_TEST_SECRET_ACCESS_KEY }}
GT_S3_REGION: ${{ vars.AWS_CI_TEST_BUCKET_REGION }}
GT_MINIO_BUCKET: greptime
GT_MINIO_ACCESS_KEY_ID: superpower_ci_user
GT_MINIO_ACCESS_KEY: superpower_password
GT_MINIO_REGION: us-west-2
GT_MINIO_ENDPOINT_URL: http://127.0.0.1:9000
GT_ETCD_ENDPOINTS: http://127.0.0.1:2379
GT_POSTGRES_ENDPOINTS: postgres://greptimedb:admin@127.0.0.1:5432/postgres
GT_KAFKA_ENDPOINTS: 127.0.0.1:9092
GT_KAFKA_SASL_ENDPOINTS: 127.0.0.1:9093
UNITTEST_LOG_DIR: "__unittest_logs"
cleanbuild-linux-nix:
name: Run clean build on Linux
runs-on: ubuntu-latest-8-cores
timeout-minutes: 60
steps:

View File

@@ -91,7 +91,7 @@ env:
# The scheduled version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-YYYYMMDD', like v0.2.0-nigthly-20230313;
NIGHTLY_RELEASE_PREFIX: nightly
# Note: The NEXT_RELEASE_VERSION should be modified manually by every formal release.
NEXT_RELEASE_VERSION: v0.11.0
NEXT_RELEASE_VERSION: v0.12.0
# Permission reference: https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs
permissions:
@@ -222,10 +222,18 @@ jobs:
arch: aarch64-apple-darwin
features: servers/dashboard
artifacts-dir-prefix: greptime-darwin-arm64
- os: ${{ needs.allocate-runners.outputs.macos-runner }}
arch: aarch64-apple-darwin
features: pyo3_backend,servers/dashboard
artifacts-dir-prefix: greptime-darwin-arm64-pyo3
- os: ${{ needs.allocate-runners.outputs.macos-runner }}
features: servers/dashboard
arch: x86_64-apple-darwin
artifacts-dir-prefix: greptime-darwin-amd64
- os: ${{ needs.allocate-runners.outputs.macos-runner }}
features: pyo3_backend,servers/dashboard
arch: x86_64-apple-darwin
artifacts-dir-prefix: greptime-darwin-amd64-pyo3
runs-on: ${{ matrix.os }}
outputs:
build-macos-result: ${{ steps.set-build-macos-result.outputs.build-macos-result }}
@@ -263,6 +271,10 @@ jobs:
arch: x86_64-pc-windows-msvc
features: servers/dashboard
artifacts-dir-prefix: greptime-windows-amd64
- os: ${{ needs.allocate-runners.outputs.windows-runner }}
arch: x86_64-pc-windows-msvc
features: pyo3_backend,servers/dashboard
artifacts-dir-prefix: greptime-windows-amd64-pyo3
runs-on: ${{ matrix.os }}
outputs:
build-windows-result: ${{ steps.set-build-windows-result.outputs.build-windows-result }}
@@ -436,22 +448,6 @@ jobs:
aws-region: ${{ vars.EC2_RUNNER_REGION }}
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
bump-doc-version:
name: Bump doc version
if: ${{ github.event_name == 'push' || github.event_name == 'schedule' }}
needs: [allocate-runners]
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/setup-cyborg
- name: Bump doc version
working-directory: cyborg
run: pnpm tsx bin/bump-doc-version.ts
env:
VERSION: ${{ needs.allocate-runners.outputs.version }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
DOCS_REPO_TOKEN: ${{ secrets.DOCS_REPO_TOKEN }}
notification:
if: ${{ github.repository == 'GreptimeTeam/greptimedb' && (github.event_name == 'push' || github.event_name == 'schedule') && always() }}
name: Send notification to Greptime team

178
Cargo.lock generated
View File

@@ -188,7 +188,7 @@ checksum = "d301b3b94cb4b2f23d7917810addbbaff90738e0ca2be692bd027e70d7e0330c"
[[package]]
name = "api"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"common-base",
"common-decimal",
@@ -773,7 +773,7 @@ dependencies = [
[[package]]
name = "auth"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"api",
"async-trait",
@@ -1314,7 +1314,7 @@ dependencies = [
[[package]]
name = "cache"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"catalog",
"common-error",
@@ -1348,7 +1348,7 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]]
name = "catalog"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"api",
"arrow",
@@ -1684,7 +1684,7 @@ checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97"
[[package]]
name = "cli"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"async-trait",
"auth",
@@ -1727,7 +1727,7 @@ dependencies = [
"session",
"snafu 0.8.5",
"store-api",
"substrait 0.11.3",
"substrait 0.12.0",
"table",
"tempfile",
"tokio",
@@ -1736,7 +1736,7 @@ dependencies = [
[[package]]
name = "client"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"api",
"arc-swap",
@@ -1763,7 +1763,7 @@ dependencies = [
"rand",
"serde_json",
"snafu 0.8.5",
"substrait 0.11.3",
"substrait 0.12.0",
"substrait 0.37.3",
"tokio",
"tokio-stream",
@@ -1804,7 +1804,7 @@ dependencies = [
[[package]]
name = "cmd"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"async-trait",
"auth",
@@ -1864,7 +1864,7 @@ dependencies = [
"similar-asserts",
"snafu 0.8.5",
"store-api",
"substrait 0.11.3",
"substrait 0.12.0",
"table",
"temp-env",
"tempfile",
@@ -1916,7 +1916,7 @@ checksum = "55b672471b4e9f9e95499ea597ff64941a309b2cdbffcc46f2cc5e2d971fd335"
[[package]]
name = "common-base"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"anymap2",
"async-trait",
@@ -1938,11 +1938,11 @@ dependencies = [
[[package]]
name = "common-catalog"
version = "0.11.3"
version = "0.12.0"
[[package]]
name = "common-config"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"common-base",
"common-error",
@@ -1965,7 +1965,7 @@ dependencies = [
[[package]]
name = "common-datasource"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"arrow",
"arrow-schema",
@@ -2001,7 +2001,7 @@ dependencies = [
[[package]]
name = "common-decimal"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"bigdecimal 0.4.5",
"common-error",
@@ -2014,7 +2014,7 @@ dependencies = [
[[package]]
name = "common-error"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"http 0.2.12",
"snafu 0.8.5",
@@ -2024,7 +2024,7 @@ dependencies = [
[[package]]
name = "common-frontend"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"async-trait",
"common-error",
@@ -2034,7 +2034,7 @@ dependencies = [
[[package]]
name = "common-function"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"api",
"approx 0.5.1",
@@ -2078,7 +2078,7 @@ dependencies = [
[[package]]
name = "common-greptimedb-telemetry"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"async-trait",
"common-runtime",
@@ -2095,7 +2095,7 @@ dependencies = [
[[package]]
name = "common-grpc"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"api",
"arrow-flight",
@@ -2121,7 +2121,7 @@ dependencies = [
[[package]]
name = "common-grpc-expr"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"api",
"common-base",
@@ -2140,7 +2140,7 @@ dependencies = [
[[package]]
name = "common-macro"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"arc-swap",
"common-query",
@@ -2154,7 +2154,7 @@ dependencies = [
[[package]]
name = "common-mem-prof"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"common-error",
"common-macro",
@@ -2167,7 +2167,7 @@ dependencies = [
[[package]]
name = "common-meta"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"anymap2",
"api",
@@ -2224,7 +2224,7 @@ dependencies = [
[[package]]
name = "common-options"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"common-grpc",
"humantime-serde",
@@ -2233,11 +2233,11 @@ dependencies = [
[[package]]
name = "common-plugins"
version = "0.11.3"
version = "0.12.0"
[[package]]
name = "common-pprof"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"common-error",
"common-macro",
@@ -2249,7 +2249,7 @@ dependencies = [
[[package]]
name = "common-procedure"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"async-stream",
"async-trait",
@@ -2276,7 +2276,7 @@ dependencies = [
[[package]]
name = "common-procedure-test"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"async-trait",
"common-procedure",
@@ -2284,7 +2284,7 @@ dependencies = [
[[package]]
name = "common-query"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"api",
"async-trait",
@@ -2310,7 +2310,7 @@ dependencies = [
[[package]]
name = "common-recordbatch"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"arc-swap",
"common-error",
@@ -2329,7 +2329,7 @@ dependencies = [
[[package]]
name = "common-runtime"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"async-trait",
"clap 4.5.19",
@@ -2351,15 +2351,13 @@ dependencies = [
"snafu 0.8.5",
"tempfile",
"tokio",
"tokio-metrics",
"tokio-metrics-collector",
"tokio-test",
"tokio-util",
]
[[package]]
name = "common-telemetry"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"atty",
"backtrace",
@@ -2387,7 +2385,7 @@ dependencies = [
[[package]]
name = "common-test-util"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"client",
"common-query",
@@ -2399,7 +2397,7 @@ dependencies = [
[[package]]
name = "common-time"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"arrow",
"chrono",
@@ -2417,7 +2415,7 @@ dependencies = [
[[package]]
name = "common-version"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"build-data",
"const_format",
@@ -2427,7 +2425,7 @@ dependencies = [
[[package]]
name = "common-wal"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"common-base",
"common-error",
@@ -3226,7 +3224,7 @@ dependencies = [
[[package]]
name = "datanode"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"api",
"arrow-flight",
@@ -3277,7 +3275,7 @@ dependencies = [
"session",
"snafu 0.8.5",
"store-api",
"substrait 0.11.3",
"substrait 0.12.0",
"table",
"tokio",
"toml 0.8.19",
@@ -3286,7 +3284,7 @@ dependencies = [
[[package]]
name = "datatypes"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"arrow",
"arrow-array",
@@ -3910,7 +3908,7 @@ dependencies = [
[[package]]
name = "file-engine"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"api",
"async-trait",
@@ -4026,7 +4024,7 @@ checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8"
[[package]]
name = "flow"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"api",
"arrow",
@@ -4085,7 +4083,7 @@ dependencies = [
"snafu 0.8.5",
"store-api",
"strum 0.25.0",
"substrait 0.11.3",
"substrait 0.12.0",
"table",
"tokio",
"tonic 0.11.0",
@@ -4123,7 +4121,7 @@ checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa"
[[package]]
name = "frontend"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"api",
"arc-swap",
@@ -4558,7 +4556,7 @@ dependencies = [
[[package]]
name = "greptime-proto"
version = "0.1.0"
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=43ddd8dea69f4df0fe2e8b5cdc0044d2cfa35908#43ddd8dea69f4df0fe2e8b5cdc0044d2cfa35908"
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=a875e976441188028353f7274a46a7e6e065c5d4#a875e976441188028353f7274a46a7e6e065c5d4"
dependencies = [
"prost 0.12.6",
"serde",
@@ -5273,7 +5271,7 @@ dependencies = [
[[package]]
name = "index"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"async-trait",
"asynchronous-codec",
@@ -6123,7 +6121,7 @@ checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
[[package]]
name = "log-query"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"chrono",
"common-error",
@@ -6135,7 +6133,7 @@ dependencies = [
[[package]]
name = "log-store"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"async-stream",
"async-trait",
@@ -6479,7 +6477,7 @@ dependencies = [
[[package]]
name = "meta-client"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"api",
"async-trait",
@@ -6506,7 +6504,7 @@ dependencies = [
[[package]]
name = "meta-srv"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"api",
"async-trait",
@@ -6585,7 +6583,7 @@ dependencies = [
[[package]]
name = "metric-engine"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"api",
"aquamarine",
@@ -6679,7 +6677,7 @@ dependencies = [
[[package]]
name = "mito2"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"api",
"aquamarine",
@@ -7416,7 +7414,7 @@ dependencies = [
[[package]]
name = "object-store"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"anyhow",
"bytes",
@@ -7669,7 +7667,7 @@ dependencies = [
[[package]]
name = "operator"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"ahash 0.8.11",
"api",
@@ -7717,7 +7715,7 @@ dependencies = [
"sql",
"sqlparser 0.45.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=54a267ac89c09b11c0c88934690530807185d3e7)",
"store-api",
"substrait 0.11.3",
"substrait 0.12.0",
"table",
"tokio",
"tokio-util",
@@ -7967,7 +7965,7 @@ dependencies = [
[[package]]
name = "partition"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"api",
"async-trait",
@@ -8253,7 +8251,7 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]]
name = "pipeline"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"ahash 0.8.11",
"api",
@@ -8415,7 +8413,7 @@ dependencies = [
[[package]]
name = "plugins"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"auth",
"clap 4.5.19",
@@ -8703,7 +8701,7 @@ dependencies = [
[[package]]
name = "promql"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"ahash 0.8.11",
"async-trait",
@@ -8938,7 +8936,7 @@ dependencies = [
[[package]]
name = "puffin"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"async-compression 0.4.13",
"async-trait",
@@ -9063,7 +9061,7 @@ dependencies = [
[[package]]
name = "query"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"ahash 0.8.11",
"api",
@@ -9128,7 +9126,7 @@ dependencies = [
"sqlparser 0.45.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=54a267ac89c09b11c0c88934690530807185d3e7)",
"statrs",
"store-api",
"substrait 0.11.3",
"substrait 0.12.0",
"table",
"tokio",
"tokio-stream",
@@ -10612,7 +10610,7 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "script"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"api",
"arc-swap",
@@ -10904,7 +10902,7 @@ dependencies = [
[[package]]
name = "servers"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"ahash 0.8.11",
"api",
@@ -11016,7 +11014,7 @@ dependencies = [
[[package]]
name = "session"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"api",
"arc-swap",
@@ -11370,7 +11368,7 @@ dependencies = [
[[package]]
name = "sql"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"api",
"chrono",
@@ -11434,7 +11432,7 @@ dependencies = [
[[package]]
name = "sqlness-runner"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"async-trait",
"clap 4.5.19",
@@ -11652,7 +11650,7 @@ dependencies = [
[[package]]
name = "store-api"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"api",
"aquamarine",
@@ -11814,7 +11812,7 @@ dependencies = [
[[package]]
name = "substrait"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"async-trait",
"bytes",
@@ -12013,7 +12011,7 @@ dependencies = [
[[package]]
name = "table"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"api",
"async-trait",
@@ -12290,7 +12288,7 @@ checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76"
[[package]]
name = "tests-fuzz"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"arbitrary",
"async-trait",
@@ -12333,7 +12331,7 @@ dependencies = [
[[package]]
name = "tests-integration"
version = "0.11.3"
version = "0.12.0"
dependencies = [
"api",
"arrow-flight",
@@ -12373,7 +12371,6 @@ dependencies = [
"futures-util",
"hex",
"itertools 0.10.5",
"log-query",
"loki-api",
"meta-client",
"meta-srv",
@@ -12398,7 +12395,7 @@ dependencies = [
"sql",
"sqlx",
"store-api",
"substrait 0.11.3",
"substrait 0.12.0",
"table",
"tempfile",
"time",
@@ -12622,9 +12619,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]]
name = "tokio"
version = "1.42.0"
version = "1.40.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5cec9b21b0450273377fc97bd4c33a8acffc8c996c987a7c5b319a0083707551"
checksum = "e2b070231665d27ad9ec9b8df639893f46727666c6767db40317fbe920a5d998"
dependencies = [
"backtrace",
"bytes",
@@ -12660,31 +12657,6 @@ dependencies = [
"syn 2.0.90",
]
[[package]]
name = "tokio-metrics"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eace09241d62c98b7eeb1107d4c5c64ca3bd7da92e8c218c153ab3a78f9be112"
dependencies = [
"futures-util",
"pin-project-lite",
"tokio",
"tokio-stream",
]
[[package]]
name = "tokio-metrics-collector"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8092b7a97ed5dac2f44892db190eca8f476ede0fa585bc87664de4151cd0b64"
dependencies = [
"lazy_static",
"parking_lot 0.12.3",
"prometheus",
"tokio",
"tokio-metrics",
]
[[package]]
name = "tokio-postgres"
version = "0.7.12"

View File

@@ -68,7 +68,7 @@ members = [
resolver = "2"
[workspace.package]
version = "0.11.3"
version = "0.12.0"
edition = "2021"
license = "Apache-2.0"
@@ -124,7 +124,7 @@ etcd-client = "0.13"
fst = "0.4.7"
futures = "0.3"
futures-util = "0.3"
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "43ddd8dea69f4df0fe2e8b5cdc0044d2cfa35908" }
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "a875e976441188028353f7274a46a7e6e065c5d4" }
hex = "0.4"
http = "0.2"
humantime = "2.1"

View File

@@ -151,7 +151,7 @@
| `region_engine.mito.inverted_index.intermediate_path` | String | `""` | Deprecated, use `region_engine.mito.index.aux_path` instead. |
| `region_engine.mito.inverted_index.metadata_cache_size` | String | `64MiB` | Cache size for inverted index metadata. |
| `region_engine.mito.inverted_index.content_cache_size` | String | `128MiB` | Cache size for inverted index content. |
| `region_engine.mito.inverted_index.content_cache_page_size` | String | `64KiB` | Page size for inverted index content cache. |
| `region_engine.mito.inverted_index.content_cache_page_size` | String | `8MiB` | Page size for inverted index content cache. |
| `region_engine.mito.fulltext_index` | -- | -- | The options for full-text index in Mito engine. |
| `region_engine.mito.fulltext_index.create_on_flush` | String | `auto` | Whether to create the index on flush.<br/>- `auto`: automatically (default)<br/>- `disable`: never |
| `region_engine.mito.fulltext_index.create_on_compaction` | String | `auto` | Whether to create the index on compaction.<br/>- `auto`: automatically (default)<br/>- `disable`: never |
@@ -485,7 +485,7 @@
| `region_engine.mito.inverted_index.intermediate_path` | String | `""` | Deprecated, use `region_engine.mito.index.aux_path` instead. |
| `region_engine.mito.inverted_index.metadata_cache_size` | String | `64MiB` | Cache size for inverted index metadata. |
| `region_engine.mito.inverted_index.content_cache_size` | String | `128MiB` | Cache size for inverted index content. |
| `region_engine.mito.inverted_index.content_cache_page_size` | String | `64KiB` | Page size for inverted index content cache. |
| `region_engine.mito.inverted_index.content_cache_page_size` | String | `8MiB` | Page size for inverted index content cache. |
| `region_engine.mito.fulltext_index` | -- | -- | The options for full-text index in Mito engine. |
| `region_engine.mito.fulltext_index.create_on_flush` | String | `auto` | Whether to create the index on flush.<br/>- `auto`: automatically (default)<br/>- `disable`: never |
| `region_engine.mito.fulltext_index.create_on_compaction` | String | `auto` | Whether to create the index on compaction.<br/>- `auto`: automatically (default)<br/>- `disable`: never |

View File

@@ -550,7 +550,7 @@ metadata_cache_size = "64MiB"
content_cache_size = "128MiB"
## Page size for inverted index content cache.
content_cache_page_size = "64KiB"
content_cache_page_size = "8MiB"
## The options for full-text index in Mito engine.
[region_engine.mito.fulltext_index]

View File

@@ -593,7 +593,7 @@ metadata_cache_size = "64MiB"
content_cache_size = "128MiB"
## Page size for inverted index content cache.
content_cache_page_size = "64KiB"
content_cache_page_size = "8MiB"
## The options for full-text index in Mito engine.
[region_engine.mito.fulltext_index]

View File

@@ -1,75 +0,0 @@
/*
* Copyright 2023 Greptime Team
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import * as core from "@actions/core";
import {obtainClient} from "@/common";
async function triggerWorkflow(workflowId: string, version: string) {
const docsClient = obtainClient("DOCS_REPO_TOKEN")
try {
await docsClient.rest.actions.createWorkflowDispatch({
owner: "GreptimeTeam",
repo: "docs",
workflow_id: workflowId,
ref: "main",
inputs: {
version,
},
});
console.log(`Successfully triggered ${workflowId} workflow with version ${version}`);
} catch (error) {
core.setFailed(`Failed to trigger workflow: ${error.message}`);
}
}
function determineWorkflow(version: string): [string, string] {
// Check if it's a nightly version
if (version.includes('nightly')) {
return ['bump-nightly-version.yml', version];
}
const parts = version.split('.');
if (parts.length !== 3) {
throw new Error('Invalid version format');
}
// If patch version (last number) is 0, it's a major version
// Return only major.minor version
if (parts[2] === '0') {
return ['bump-version.yml', `${parts[0]}.${parts[1]}`];
}
// Otherwise it's a patch version, use full version
return ['bump-patch-version.yml', version];
}
const version = process.env.VERSION;
if (!version) {
core.setFailed("VERSION environment variable is required");
process.exit(1);
}
// Remove 'v' prefix if exists
const cleanVersion = version.startsWith('v') ? version.slice(1) : version;
try {
const [workflowId, apiVersion] = determineWorkflow(cleanVersion);
triggerWorkflow(workflowId, apiVersion);
} catch (error) {
core.setFailed(`Error processing version: ${error.message}`);
process.exit(1);
}

View File

@@ -13,6 +13,8 @@ RUN yum install -y epel-release \
openssl \
openssl-devel \
centos-release-scl \
rh-python38 \
rh-python38-python-devel \
which
# Install protoc
@@ -41,6 +43,8 @@ RUN yum install -y epel-release \
openssl \
openssl-devel \
centos-release-scl \
rh-python38 \
rh-python38-python-devel \
which
WORKDIR /greptime

View File

@@ -20,7 +20,10 @@ RUN --mount=type=cache,target=/var/cache/apt \
curl \
git \
build-essential \
pkg-config
pkg-config \
python3.10 \
python3.10-dev \
python3-pip
# Install Rust.
SHELL ["/bin/bash", "-c"]
@@ -43,8 +46,15 @@ ARG OUTPUT_DIR
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get \
-y install ca-certificates \
python3.10 \
python3.10-dev \
python3-pip \
curl
COPY ./docker/python/requirements.txt /etc/greptime/requirements.txt
RUN python3 -m pip install -r /etc/greptime/requirements.txt
WORKDIR /greptime
COPY --from=builder /out/target/${OUTPUT_DIR}/greptime /greptime/bin/
ENV PATH /greptime/bin/:$PATH

View File

@@ -7,7 +7,9 @@ RUN sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo
RUN yum install -y epel-release \
openssl \
openssl-devel \
centos-release-scl
centos-release-scl \
rh-python38 \
rh-python38-python-devel
ARG TARGETARCH

View File

@@ -8,8 +8,15 @@ ARG TARGET_BIN=greptime
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
ca-certificates \
python3.10 \
python3.10-dev \
python3-pip \
curl
COPY $DOCKER_BUILD_ROOT/docker/python/requirements.txt /etc/greptime/requirements.txt
RUN python3 -m pip install -r /etc/greptime/requirements.txt
ARG TARGETARCH
ADD $TARGETARCH/$TARGET_BIN /greptime/bin/

View File

@@ -1,3 +1,3 @@
[toolchain]
channel = "nightly-2024-10-19"
components = ["rust-analyzer", "llvm-tools"]
components = ["rust-analyzer"]

View File

@@ -14,7 +14,6 @@
import os
import re
from multiprocessing import Pool
def find_rust_files(directory):
@@ -34,11 +33,13 @@ def extract_branch_names(file_content):
return pattern.findall(file_content)
def check_snafu_in_files(branch_name, rust_files_content):
def check_snafu_in_files(branch_name, rust_files):
branch_name_snafu = f"{branch_name}Snafu"
for content in rust_files_content.values():
if branch_name_snafu in content:
return True
for rust_file in rust_files:
with open(rust_file, "r") as file:
content = file.read()
if branch_name_snafu in content:
return True
return False
@@ -48,24 +49,21 @@ def main():
for error_file in error_files:
with open(error_file, "r") as file:
branch_names.extend(extract_branch_names(file.read()))
content = file.read()
branch_names.extend(extract_branch_names(content))
# Read all rust files into memory once
rust_files_content = {}
for rust_file in other_rust_files:
with open(rust_file, "r") as file:
rust_files_content[rust_file] = file.read()
with Pool() as pool:
results = pool.starmap(
check_snafu_in_files, [(bn, rust_files_content) for bn in branch_names]
)
unused_snafu = [bn for bn, found in zip(branch_names, results) if not found]
unused_snafu = [
branch_name
for branch_name in branch_names
if not check_snafu_in_files(branch_name, other_rust_files)
]
if unused_snafu:
print("Unused error variants:")
for name in unused_snafu:
print(name)
if unused_snafu:
raise SystemExit(1)

View File

@@ -1,5 +1,5 @@
let
nixpkgs = fetchTarball "https://github.com/NixOS/nixpkgs/tarball/nixos-24.11";
nixpkgs = fetchTarball "https://github.com/NixOS/nixpkgs/tarball/nixos-unstable";
fenix = import (fetchTarball "https://github.com/nix-community/fenix/archive/main.tar.gz") {};
pkgs = import nixpkgs { config = {}; overlays = []; };
in
@@ -11,20 +11,16 @@ pkgs.mkShell rec {
clang
gcc
protobuf
gnumake
mold
(fenix.fromToolchainFile {
dir = ./.;
})
cargo-nextest
cargo-llvm-cov
taplo
curl
];
buildInputs = with pkgs; [
libgit2
libz
];
LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath buildInputs;

View File

@@ -15,7 +15,7 @@ cache.workspace = true
catalog.workspace = true
chrono.workspace = true
clap.workspace = true
client = { workspace = true, features = ["testing"] }
client.workspace = true
common-base.workspace = true
common-catalog.workspace = true
common-config.workspace = true
@@ -56,6 +56,7 @@ tokio.workspace = true
tracing-appender.workspace = true
[dev-dependencies]
client = { workspace = true, features = ["testing"] }
common-test-util.workspace = true
common-version.workspace = true
serde.workspace = true

View File

@@ -35,23 +35,10 @@ data = {
"bigint_other": [5, -5, 1, 5, 5],
"utf8_increase": ["a", "bb", "ccc", "dddd", "eeeee"],
"utf8_decrease": ["eeeee", "dddd", "ccc", "bb", "a"],
"timestamp_simple": [
datetime.datetime(2023, 4, 1, 20, 15, 30, 2000),
datetime.datetime.fromtimestamp(int("1629617204525777000") / 1000000000),
datetime.datetime(2023, 1, 1),
datetime.datetime(2023, 2, 1),
datetime.datetime(2023, 3, 1),
],
"date_simple": [
datetime.date(2023, 4, 1),
datetime.date(2023, 3, 1),
datetime.date(2023, 1, 1),
datetime.date(2023, 2, 1),
datetime.date(2023, 3, 1),
],
"timestamp_simple": [datetime.datetime(2023, 4, 1, 20, 15, 30, 2000), datetime.datetime.fromtimestamp(int('1629617204525777000')/1000000000), datetime.datetime(2023, 1, 1), datetime.datetime(2023, 2, 1), datetime.datetime(2023, 3, 1)],
"date_simple": [datetime.date(2023, 4, 1), datetime.date(2023, 3, 1), datetime.date(2023, 1, 1), datetime.date(2023, 2, 1), datetime.date(2023, 3, 1)]
}
def infer_schema(data):
schema = "struct<"
for key, value in data.items():
@@ -69,7 +56,7 @@ def infer_schema(data):
elif key.startswith("date"):
dt = "date"
else:
print(key, value, dt)
print(key,value,dt)
raise NotImplementedError
if key.startswith("double"):
dt = "double"
@@ -81,6 +68,7 @@ def infer_schema(data):
return schema
def _write(
schema: str,
data,

View File

@@ -725,8 +725,7 @@ struct Tokenizer {
impl Tokenizer {
pub fn tokenize(mut self, pattern: &str) -> Result<Vec<Token>> {
let mut tokens = vec![];
let char_len = pattern.chars().count();
while self.cursor < char_len {
while self.cursor < pattern.len() {
// TODO: collect pattern into Vec<char> if this tokenizer is bottleneck in the future
let c = pattern.chars().nth(self.cursor).unwrap();
match c {
@@ -795,8 +794,7 @@ impl Tokenizer {
let mut phase = String::new();
let mut is_quote_present = false;
let char_len = pattern.chars().count();
while self.cursor < char_len {
while self.cursor < pattern.len() {
let mut c = pattern.chars().nth(self.cursor).unwrap();
match c {
@@ -901,26 +899,6 @@ mod test {
Phase("c".to_string()),
],
),
(
r#"中文 测试"#,
vec![Phase("中文".to_string()), Phase("测试".to_string())],
),
(
r#"中文 AND 测试"#,
vec![Phase("中文".to_string()), And, Phase("测试".to_string())],
),
(
r#"中文 +测试"#,
vec![Phase("中文".to_string()), Must, Phase("测试".to_string())],
),
(
r#"中文 -测试"#,
vec![
Phase("中文".to_string()),
Negative,
Phase("测试".to_string()),
],
),
];
for (query, expected) in cases {
@@ -1052,61 +1030,6 @@ mod test {
],
},
),
(
r#"中文 测试"#,
PatternAst::Binary {
op: BinaryOp::Or,
children: vec![
PatternAst::Literal {
op: UnaryOp::Optional,
pattern: "中文".to_string(),
},
PatternAst::Literal {
op: UnaryOp::Optional,
pattern: "测试".to_string(),
},
],
},
),
(
r#"中文 AND 测试"#,
PatternAst::Binary {
op: BinaryOp::And,
children: vec![
PatternAst::Literal {
op: UnaryOp::Optional,
pattern: "中文".to_string(),
},
PatternAst::Literal {
op: UnaryOp::Optional,
pattern: "测试".to_string(),
},
],
},
),
(
r#"中文 +测试"#,
PatternAst::Literal {
op: UnaryOp::Must,
pattern: "测试".to_string(),
},
),
(
r#"中文 -测试"#,
PatternAst::Binary {
op: BinaryOp::And,
children: vec![
PatternAst::Literal {
op: UnaryOp::Negative,
pattern: "测试".to_string(),
},
PatternAst::Literal {
op: UnaryOp::Optional,
pattern: "中文".to_string(),
},
],
},
),
];
for (query, expected) in cases {

View File

@@ -20,7 +20,6 @@ mod scalar_add;
mod scalar_mul;
mod sub;
pub(crate) mod sum;
mod vector_div;
mod vector_mul;
use std::sync::Arc;
@@ -46,7 +45,6 @@ impl VectorFunction {
// vector calculation
registry.register(Arc::new(vector_mul::VectorMulFunction));
registry.register(Arc::new(vector_div::VectorDivFunction));
registry.register(Arc::new(sub::SubFunction));
registry.register(Arc::new(elem_sum::ElemSumFunction));
}

View File

@@ -1,218 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::borrow::Cow;
use std::fmt::Display;
use common_query::error::{InvalidFuncArgsSnafu, Result};
use common_query::prelude::Signature;
use datatypes::prelude::ConcreteDataType;
use datatypes::scalars::ScalarVectorBuilder;
use datatypes::vectors::{BinaryVectorBuilder, MutableVector, VectorRef};
use nalgebra::DVectorView;
use snafu::ensure;
use crate::function::{Function, FunctionContext};
use crate::helper;
use crate::scalars::vector::impl_conv::{as_veclit, as_veclit_if_const, veclit_to_binlit};
const NAME: &str = "vec_div";
/// Divides corresponding elements of two vectors.
///
/// # Example
///
/// ```sql
/// SELECT vec_to_string(vec_div("[2, 4, 6]", "[2, 2, 2]")) as result;
///
/// +---------+
/// | result |
/// +---------+
/// | [1,2,3] |
/// +---------+
///
/// ```
#[derive(Debug, Clone, Default)]
pub struct VectorDivFunction;
impl Function for VectorDivFunction {
fn name(&self) -> &str {
NAME
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::binary_datatype())
}
fn signature(&self) -> Signature {
helper::one_of_sigs2(
vec![
ConcreteDataType::string_datatype(),
ConcreteDataType::binary_datatype(),
],
vec![
ConcreteDataType::string_datatype(),
ConcreteDataType::binary_datatype(),
],
)
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure!(
columns.len() == 2,
InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not correct, expect exactly two, have: {}",
columns.len()
),
}
);
let arg0 = &columns[0];
let arg1 = &columns[1];
let len = arg0.len();
let mut result = BinaryVectorBuilder::with_capacity(len);
if len == 0 {
return Ok(result.to_vector());
}
let arg0_const = as_veclit_if_const(arg0)?;
let arg1_const = as_veclit_if_const(arg1)?;
for i in 0..len {
let arg0 = match arg0_const.as_ref() {
Some(arg0) => Some(Cow::Borrowed(arg0.as_ref())),
None => as_veclit(arg0.get_ref(i))?,
};
let arg1 = match arg1_const.as_ref() {
Some(arg1) => Some(Cow::Borrowed(arg1.as_ref())),
None => as_veclit(arg1.get_ref(i))?,
};
if let (Some(arg0), Some(arg1)) = (arg0, arg1) {
ensure!(
arg0.len() == arg1.len(),
InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the vectors must match for division, have: {} vs {}",
arg0.len(),
arg1.len()
),
}
);
let vec0 = DVectorView::from_slice(&arg0, arg0.len());
let vec1 = DVectorView::from_slice(&arg1, arg1.len());
let vec_res = vec0.component_div(&vec1);
let veclit = vec_res.as_slice();
let binlit = veclit_to_binlit(veclit);
result.push(Some(&binlit));
} else {
result.push_null();
}
}
Ok(result.to_vector())
}
}
impl Display for VectorDivFunction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", NAME.to_ascii_uppercase())
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use common_query::error;
use datatypes::vectors::StringVector;
use super::*;
#[test]
fn test_vector_mul() {
let func = VectorDivFunction;
let vec0 = vec![1.0, 2.0, 3.0];
let vec1 = vec![1.0, 1.0];
let (len0, len1) = (vec0.len(), vec1.len());
let input0 = Arc::new(StringVector::from(vec![Some(format!("{vec0:?}"))]));
let input1 = Arc::new(StringVector::from(vec![Some(format!("{vec1:?}"))]));
let err = func
.eval(FunctionContext::default(), &[input0, input1])
.unwrap_err();
match err {
error::Error::InvalidFuncArgs { err_msg, .. } => {
assert_eq!(
err_msg,
format!(
"The length of the vectors must match for division, have: {} vs {}",
len0, len1
)
)
}
_ => unreachable!(),
}
let input0 = Arc::new(StringVector::from(vec![
Some("[1.0,2.0,3.0]".to_string()),
Some("[8.0,10.0,12.0]".to_string()),
Some("[7.0,8.0,9.0]".to_string()),
None,
]));
let input1 = Arc::new(StringVector::from(vec![
Some("[1.0,1.0,1.0]".to_string()),
Some("[2.0,2.0,2.0]".to_string()),
None,
Some("[3.0,3.0,3.0]".to_string()),
]));
let result = func
.eval(FunctionContext::default(), &[input0, input1])
.unwrap();
let result = result.as_ref();
assert_eq!(result.len(), 4);
assert_eq!(
result.get_ref(0).as_binary().unwrap(),
Some(veclit_to_binlit(&[1.0, 2.0, 3.0]).as_slice())
);
assert_eq!(
result.get_ref(1).as_binary().unwrap(),
Some(veclit_to_binlit(&[4.0, 5.0, 6.0]).as_slice())
);
assert!(result.get_ref(2).is_null());
assert!(result.get_ref(3).is_null());
let input0 = Arc::new(StringVector::from(vec![Some("[1.0,-2.0]".to_string())]));
let input1 = Arc::new(StringVector::from(vec![Some("[0.0,0.0]".to_string())]));
let result = func
.eval(FunctionContext::default(), &[input0, input1])
.unwrap();
let result = result.as_ref();
assert_eq!(
result.get_ref(0).as_binary().unwrap(),
Some(veclit_to_binlit(&[f64::INFINITY as f32, f64::NEG_INFINITY as f32]).as_slice())
);
}
}

View File

@@ -60,7 +60,6 @@ pub fn alter_expr_to_request(table_id: TableId, expr: AlterTableExpr) -> Result<
column_schema: schema,
is_key: column_def.semantic_type == SemanticType::Tag as i32,
location: parse_location(ac.location)?,
add_if_not_exists: ac.add_if_not_exists,
})
})
.collect::<Result<Vec<_>>>()?;
@@ -221,7 +220,6 @@ mod tests {
..Default::default()
}),
location: None,
add_if_not_exists: true,
}],
})),
};
@@ -242,7 +240,6 @@ mod tests {
add_column.column_schema.data_type
);
assert_eq!(None, add_column.location);
assert!(add_column.add_if_not_exists);
}
#[test]
@@ -268,7 +265,6 @@ mod tests {
location_type: LocationType::First.into(),
after_column_name: String::default(),
}),
add_if_not_exists: false,
},
AddColumn {
column_def: Some(ColumnDef {
@@ -284,7 +280,6 @@ mod tests {
location_type: LocationType::After.into(),
after_column_name: "ts".to_string(),
}),
add_if_not_exists: true,
},
],
})),
@@ -313,7 +308,6 @@ mod tests {
}),
add_column.location
);
assert!(add_column.add_if_not_exists);
let add_column = add_columns.pop().unwrap();
assert!(!add_column.is_key);
@@ -323,7 +317,6 @@ mod tests {
add_column.column_schema.data_type
);
assert_eq!(Some(AddColumnLocation::First), add_column.location);
assert!(!add_column.add_if_not_exists);
}
#[test]

View File

@@ -299,7 +299,6 @@ mod tests {
.unwrap()
)
);
assert!(host_column.add_if_not_exists);
let memory_column = &add_columns.add_columns[1];
assert_eq!(
@@ -312,7 +311,6 @@ mod tests {
.unwrap()
)
);
assert!(host_column.add_if_not_exists);
let time_column = &add_columns.add_columns[2];
assert_eq!(
@@ -325,7 +323,6 @@ mod tests {
.unwrap()
)
);
assert!(host_column.add_if_not_exists);
let interval_column = &add_columns.add_columns[3];
assert_eq!(
@@ -338,7 +335,6 @@ mod tests {
.unwrap()
)
);
assert!(host_column.add_if_not_exists);
let decimal_column = &add_columns.add_columns[4];
assert_eq!(
@@ -356,7 +352,6 @@ mod tests {
.unwrap()
)
);
assert!(host_column.add_if_not_exists);
}
#[test]

View File

@@ -192,9 +192,6 @@ pub fn build_create_table_expr(
Ok(expr)
}
/// Find columns that are not present in the schema and return them as `AddColumns`
/// for adding columns automatically.
/// It always sets `add_if_not_exists` to `true` for now.
pub fn extract_new_columns(
schema: &Schema,
column_exprs: Vec<ColumnExpr>,
@@ -216,7 +213,6 @@ pub fn extract_new_columns(
AddColumn {
column_def,
location: None,
add_if_not_exists: true,
}
})
.collect::<Vec<_>>();

View File

@@ -105,7 +105,7 @@ impl AlterLogicalTablesProcedure {
.context(ConvertAlterTableRequestSnafu)?;
let new_meta = table_info
.meta
.builder_with_alter_kind(table_ref.table, &request.alter_kind)
.builder_with_alter_kind(table_ref.table, &request.alter_kind, true)
.context(error::TableSnafu)?
.build()
.with_context(|_| error::BuildTableMetaSnafu {

View File

@@ -28,13 +28,13 @@ use common_procedure::error::{FromJsonSnafu, Result as ProcedureResult, ToJsonSn
use common_procedure::{
Context as ProcedureContext, Error as ProcedureError, LockKey, Procedure, Status, StringKey,
};
use common_telemetry::{debug, error, info};
use common_telemetry::{debug, info};
use futures::future;
use serde::{Deserialize, Serialize};
use snafu::ResultExt;
use store_api::storage::RegionId;
use strum::AsRefStr;
use table::metadata::{RawTableInfo, TableId, TableInfo};
use table::metadata::{RawTableInfo, TableId};
use table::table_reference::TableReference;
use crate::cache_invalidator::Context;
@@ -51,14 +51,10 @@ use crate::{metrics, ClusterId};
/// The alter table procedure
pub struct AlterTableProcedure {
/// The runtime context.
// The runtime context.
context: DdlContext,
/// The serialized data.
// The serialized data.
data: AlterTableData,
/// Cached new table metadata in the prepare step.
/// If we recover the procedure from json, then the table info value is not cached.
/// But we already validated it in the prepare step.
new_table_info: Option<TableInfo>,
}
impl AlterTableProcedure {
@@ -74,31 +70,18 @@ impl AlterTableProcedure {
Ok(Self {
context,
data: AlterTableData::new(task, table_id, cluster_id),
new_table_info: None,
})
}
pub fn from_json(json: &str, context: DdlContext) -> ProcedureResult<Self> {
let data: AlterTableData = serde_json::from_str(json).context(FromJsonSnafu)?;
Ok(AlterTableProcedure {
context,
data,
new_table_info: None,
})
Ok(AlterTableProcedure { context, data })
}
// Checks whether the table exists.
pub(crate) async fn on_prepare(&mut self) -> Result<Status> {
self.check_alter().await?;
self.fill_table_info().await?;
// Validates the request and builds the new table info.
// We need to build the new table info here because we should ensure the alteration
// is valid in `UpdateMeta` state as we already altered the region.
// Safety: `fill_table_info()` already set it.
let table_info_value = self.data.table_info_value.as_ref().unwrap();
self.new_table_info = Some(self.build_new_table_info(&table_info_value.table_info)?);
// Safety: Checked in `AlterTableProcedure::new`.
let alter_kind = self.data.task.alter_table.kind.as_ref().unwrap();
if matches!(alter_kind, Kind::RenameTable { .. }) {
@@ -123,14 +106,6 @@ impl AlterTableProcedure {
let leaders = find_leaders(&physical_table_route.region_routes);
let mut alter_region_tasks = Vec::with_capacity(leaders.len());
let alter_kind = self.make_region_alter_kind()?;
info!(
"Submitting alter region requests for table {}, table_id: {}, alter_kind: {:?}",
self.data.table_ref(),
table_id,
alter_kind,
);
for datanode in leaders {
let requester = self.context.node_manager.datanode(&datanode).await;
@@ -138,7 +113,7 @@ impl AlterTableProcedure {
for region in regions {
let region_id = RegionId::new(table_id, region);
let request = self.make_alter_region_request(region_id, alter_kind.clone())?;
let request = self.make_alter_region_request(region_id)?;
debug!("Submitting {request:?} to {datanode}");
let datanode = datanode.clone();
@@ -175,15 +150,7 @@ impl AlterTableProcedure {
let table_ref = self.data.table_ref();
// Safety: checked before.
let table_info_value = self.data.table_info_value.as_ref().unwrap();
// Gets the table info from the cache or builds it.
let new_info = match &self.new_table_info {
Some(cached) => cached.clone(),
None => self.build_new_table_info(&table_info_value.table_info)
.inspect_err(|e| {
// We already check the table info in the prepare step so this should not happen.
error!(e; "Unable to build info for table {} in update metadata step, table_id: {}", table_ref, table_id);
})?,
};
let new_info = self.build_new_table_info(&table_info_value.table_info)?;
debug!(
"Starting update table: {} metadata, new table info {:?}",
@@ -207,7 +174,7 @@ impl AlterTableProcedure {
.await?;
}
info!("Updated table metadata for table {table_ref}, table_id: {table_id}, kind: {alter_kind:?}");
info!("Updated table metadata for table {table_ref}, table_id: {table_id}");
self.data.state = AlterTableState::InvalidateTableCache;
Ok(Status::executing(true))
}

View File

@@ -12,8 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashSet;
use api::v1::alter_table_expr::Kind;
use api::v1::region::region_request::Body;
use api::v1::region::{
@@ -29,15 +27,13 @@ use crate::ddl::alter_table::AlterTableProcedure;
use crate::error::{InvalidProtoMsgSnafu, Result};
impl AlterTableProcedure {
/// Makes alter region request from existing an alter kind.
/// Region alter request always add columns if not exist.
pub(crate) fn make_alter_region_request(
&self,
region_id: RegionId,
kind: Option<alter_request::Kind>,
) -> Result<RegionRequest> {
/// Makes alter region request.
pub(crate) fn make_alter_region_request(&self, region_id: RegionId) -> Result<RegionRequest> {
// Safety: Checked in `AlterTableProcedure::new`.
let alter_kind = self.data.task.alter_table.kind.as_ref().unwrap();
// Safety: checked
let table_info = self.data.table_info().unwrap();
let kind = create_proto_alter_kind(table_info, alter_kind)?;
Ok(RegionRequest {
header: Some(RegionRequestHeader {
@@ -51,66 +47,45 @@ impl AlterTableProcedure {
})),
})
}
/// Makes alter kind proto that all regions can reuse.
/// Region alter request always add columns if not exist.
pub(crate) fn make_region_alter_kind(&self) -> Result<Option<alter_request::Kind>> {
// Safety: Checked in `AlterTableProcedure::new`.
let alter_kind = self.data.task.alter_table.kind.as_ref().unwrap();
// Safety: checked
let table_info = self.data.table_info().unwrap();
let kind = create_proto_alter_kind(table_info, alter_kind)?;
Ok(kind)
}
}
/// Creates region proto alter kind from `table_info` and `alter_kind`.
///
/// It always adds column if not exists and drops column if exists.
/// It skips the column if it already exists in the table.
/// Returns the kind and next column id if it adds new columns.
fn create_proto_alter_kind(
table_info: &RawTableInfo,
alter_kind: &Kind,
) -> Result<Option<alter_request::Kind>> {
match alter_kind {
Kind::AddColumns(x) => {
// Construct a set of existing columns in the table.
let existing_columns: HashSet<_> = table_info
.meta
.schema
.column_schemas
.iter()
.map(|col| &col.name)
.collect();
let mut next_column_id = table_info.meta.next_column_id;
let mut add_columns = Vec::with_capacity(x.add_columns.len());
for add_column in &x.add_columns {
let column_def = add_column
.column_def
.as_ref()
.context(InvalidProtoMsgSnafu {
err_msg: "'column_def' is absent",
})?;
let add_columns = x
.add_columns
.iter()
.map(|add_column| {
let column_def =
add_column
.column_def
.as_ref()
.context(InvalidProtoMsgSnafu {
err_msg: "'column_def' is absent",
})?;
// Skips existing columns.
if existing_columns.contains(&column_def.name) {
continue;
}
let column_id = next_column_id;
next_column_id += 1;
let column_id = next_column_id;
next_column_id += 1;
let column_def = RegionColumnDef {
column_def: Some(column_def.clone()),
column_id,
};
let column_def = RegionColumnDef {
column_def: Some(column_def.clone()),
column_id,
};
add_columns.push(AddColumn {
column_def: Some(column_def),
location: add_column.location.clone(),
});
}
Ok(AddColumn {
column_def: Some(column_def),
location: add_column.location.clone(),
})
})
.collect::<Result<Vec<_>>>()?;
Ok(Some(alter_request::Kind::AddColumns(AddColumns {
add_columns,
@@ -168,7 +143,6 @@ mod tests {
use crate::rpc::router::{Region, RegionRoute};
use crate::test_util::{new_ddl_context, MockDatanodeManager};
/// Prepares a region with schema `[ts: Timestamp, host: Tag, cpu: Field]`.
async fn prepare_ddl_context() -> (DdlContext, u64, TableId, RegionId, String) {
let datanode_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(datanode_manager);
@@ -197,7 +171,6 @@ mod tests {
.name("cpu")
.data_type(ColumnDataType::Float64)
.semantic_type(SemanticType::Field)
.is_nullable(true)
.build()
.unwrap()
.into(),
@@ -252,16 +225,15 @@ mod tests {
name: "my_tag3".to_string(),
data_type: ColumnDataType::String as i32,
is_nullable: true,
default_constraint: Vec::new(),
default_constraint: b"hello".to_vec(),
semantic_type: SemanticType::Tag as i32,
comment: String::new(),
..Default::default()
}),
location: Some(AddColumnLocation {
location_type: LocationType::After as i32,
after_column_name: "host".to_string(),
after_column_name: "my_tag2".to_string(),
}),
add_if_not_exists: false,
}],
})),
},
@@ -270,11 +242,8 @@ mod tests {
let mut procedure =
AlterTableProcedure::new(cluster_id, table_id, task, ddl_context).unwrap();
procedure.on_prepare().await.unwrap();
let alter_kind = procedure.make_region_alter_kind().unwrap();
let Some(Body::Alter(alter_region_request)) = procedure
.make_alter_region_request(region_id, alter_kind)
.unwrap()
.body
let Some(Body::Alter(alter_region_request)) =
procedure.make_alter_region_request(region_id).unwrap().body
else {
unreachable!()
};
@@ -290,7 +259,7 @@ mod tests {
name: "my_tag3".to_string(),
data_type: ColumnDataType::String as i32,
is_nullable: true,
default_constraint: Vec::new(),
default_constraint: b"hello".to_vec(),
semantic_type: SemanticType::Tag as i32,
comment: String::new(),
..Default::default()
@@ -299,7 +268,7 @@ mod tests {
}),
location: Some(AddColumnLocation {
location_type: LocationType::After as i32,
after_column_name: "host".to_string(),
after_column_name: "my_tag2".to_string(),
}),
}]
}
@@ -330,11 +299,8 @@ mod tests {
let mut procedure =
AlterTableProcedure::new(cluster_id, table_id, task, ddl_context).unwrap();
procedure.on_prepare().await.unwrap();
let alter_kind = procedure.make_region_alter_kind().unwrap();
let Some(Body::Alter(alter_region_request)) = procedure
.make_alter_region_request(region_id, alter_kind)
.unwrap()
.body
let Some(Body::Alter(alter_region_request)) =
procedure.make_alter_region_request(region_id).unwrap().body
else {
unreachable!()
};

View File

@@ -23,9 +23,7 @@ use crate::key::table_info::TableInfoValue;
use crate::key::{DeserializedValueWithBytes, RegionDistribution};
impl AlterTableProcedure {
/// Builds new table info after alteration.
/// It bumps the column id of the table by the number of the add column requests.
/// So there may be holes in the column id sequence.
/// Builds new_meta
pub(crate) fn build_new_table_info(&self, table_info: &RawTableInfo) -> Result<TableInfo> {
let table_info =
TableInfo::try_from(table_info.clone()).context(error::ConvertRawTableInfoSnafu)?;
@@ -36,7 +34,7 @@ impl AlterTableProcedure {
let new_meta = table_info
.meta
.builder_with_alter_kind(table_ref.table, &request.alter_kind)
.builder_with_alter_kind(table_ref.table, &request.alter_kind, false)
.context(error::TableSnafu)?
.build()
.with_context(|_| error::BuildTableMetaSnafu {
@@ -48,9 +46,6 @@ impl AlterTableProcedure {
new_info.ident.version = table_info.ident.version + 1;
match request.alter_kind {
AlterKind::AddColumns { columns } => {
// Bumps the column id for the new columns.
// It may bump more than the actual number of columns added if there are
// existing columns, but it's fine.
new_info.meta.next_column_id += columns.len() as u32;
}
AlterKind::RenameTable { new_table_name } => {

View File

@@ -30,8 +30,6 @@ pub struct TestAlterTableExpr {
add_columns: Vec<ColumnDef>,
#[builder(setter(into, strip_option))]
new_table_name: Option<String>,
#[builder(setter)]
add_if_not_exists: bool,
}
impl From<TestAlterTableExpr> for AlterTableExpr {
@@ -55,7 +53,6 @@ impl From<TestAlterTableExpr> for AlterTableExpr {
.map(|col| AddColumn {
column_def: Some(col),
location: None,
add_if_not_exists: value.add_if_not_exists,
})
.collect(),
})),

View File

@@ -56,7 +56,6 @@ fn make_alter_logical_table_add_column_task(
let alter_table = alter_table
.table_name(table.to_string())
.add_columns(add_columns)
.add_if_not_exists(true)
.build()
.unwrap();

View File

@@ -139,7 +139,7 @@ async fn test_on_submit_alter_request() {
table_name: table_name.to_string(),
kind: Some(Kind::DropColumns(DropColumns {
drop_columns: vec![DropColumn {
name: "cpu".to_string(),
name: "my_field_column".to_string(),
}],
})),
},
@@ -225,7 +225,7 @@ async fn test_on_submit_alter_request_with_outdated_request() {
table_name: table_name.to_string(),
kind: Some(Kind::DropColumns(DropColumns {
drop_columns: vec![DropColumn {
name: "cpu".to_string(),
name: "my_field_column".to_string(),
}],
})),
},
@@ -330,7 +330,6 @@ async fn test_on_update_metadata_add_columns() {
..Default::default()
}),
location: None,
add_if_not_exists: false,
}],
})),
},

View File

@@ -28,10 +28,13 @@ pub type SchemaMetadataManagerRef = Arc<SchemaMetadataManager>;
pub struct SchemaMetadataManager {
table_id_schema_cache: TableSchemaCacheRef,
schema_cache: SchemaCacheRef,
#[cfg(any(test, feature = "testing"))]
kv_backend: crate::kv_backend::KvBackendRef,
}
impl SchemaMetadataManager {
/// Creates a new database meta
#[cfg(not(any(test, feature = "testing")))]
pub fn new(table_id_schema_cache: TableSchemaCacheRef, schema_cache: SchemaCacheRef) -> Self {
Self {
table_id_schema_cache,
@@ -39,6 +42,20 @@ impl SchemaMetadataManager {
}
}
/// Creates a new database meta
#[cfg(any(test, feature = "testing"))]
pub fn new(
kv_backend: crate::kv_backend::KvBackendRef,
table_id_schema_cache: TableSchemaCacheRef,
schema_cache: SchemaCacheRef,
) -> Self {
Self {
table_id_schema_cache,
schema_cache,
kv_backend,
}
}
/// Gets schema options by table id.
pub async fn get_schema_options_by_table_id(
&self,
@@ -63,7 +80,6 @@ impl SchemaMetadataManager {
schema_name: &str,
catalog_name: &str,
schema_value: Option<crate::key::schema_name::SchemaNameValue>,
kv_backend: crate::kv_backend::KvBackendRef,
) {
use table::metadata::{RawTableInfo, TableType};
let value = crate::key::table_info::TableInfoValue::new(RawTableInfo {
@@ -75,18 +91,19 @@ impl SchemaMetadataManager {
meta: Default::default(),
table_type: TableType::Base,
});
let table_info_manager = crate::key::table_info::TableInfoManager::new(kv_backend.clone());
let table_info_manager =
crate::key::table_info::TableInfoManager::new(self.kv_backend.clone());
let (txn, _) = table_info_manager
.build_create_txn(table_id, &value)
.unwrap();
let resp = kv_backend.txn(txn).await.unwrap();
let resp = self.kv_backend.txn(txn).await.unwrap();
assert!(resp.succeeded, "Failed to create table metadata");
let key = crate::key::schema_name::SchemaNameKey {
catalog: catalog_name,
schema: schema_name,
};
crate::key::schema_name::SchemaManager::new(kv_backend.clone())
crate::key::schema_name::SchemaManager::new(self.kv_backend.clone())
.create(key, schema_value, false)
.await
.expect("Failed to create schema metadata");

View File

@@ -39,7 +39,3 @@ tokio-util.workspace = true
[dev-dependencies]
tokio-test = "0.4"
[target.'cfg(tokio_unstable)'.dependencies]
tokio-metrics = { version = "0.3" }
tokio-metrics-collector = { version = "0.2" }

View File

@@ -224,6 +224,7 @@ impl DatanodeBuilder {
cache_registry.get().context(MissingCacheSnafu)?;
let schema_metadata_manager = Arc::new(SchemaMetadataManager::new(
kv_backend.clone(),
table_id_schema_cache,
schema_cache,
));

View File

@@ -123,14 +123,6 @@ impl ColumnSchema {
self.default_constraint.as_ref()
}
/// Check if the default constraint is a impure function.
pub fn is_default_impure(&self) -> bool {
self.default_constraint
.as_ref()
.map(|c| c.is_function())
.unwrap_or(false)
}
#[inline]
pub fn metadata(&self) -> &Metadata {
&self.metadata
@@ -291,15 +283,6 @@ impl ColumnSchema {
}
}
/// Creates an impure default value for this column, only if it have a impure default constraint.
/// Otherwise, returns `Ok(None)`.
pub fn create_impure_default(&self) -> Result<Option<Value>> {
match &self.default_constraint {
Some(c) => c.create_impure_default(&self.data_type),
None => Ok(None),
}
}
/// Retrieves the fulltext options for the column.
pub fn fulltext_options(&self) -> Result<Option<FulltextOptions>> {
match self.metadata.get(FULLTEXT_KEY) {

View File

@@ -178,63 +178,12 @@ impl ColumnDefaultConstraint {
}
}
/// Only create default vector if it's impure, i.e., it's a function.
///
/// This helps to delay creating constant default values to mito engine while also keeps impure default have consistent values
pub fn create_impure_default_vector(
&self,
data_type: &ConcreteDataType,
num_rows: usize,
) -> Result<Option<VectorRef>> {
assert!(num_rows > 0);
match self {
ColumnDefaultConstraint::Function(expr) => {
// Functions should also ensure its return value is not null when
// is_nullable is true.
match &expr[..] {
// TODO(dennis): we only supports current_timestamp right now,
// it's better to use a expression framework in future.
CURRENT_TIMESTAMP | CURRENT_TIMESTAMP_FN | NOW_FN => {
create_current_timestamp_vector(data_type, num_rows).map(Some)
}
_ => error::UnsupportedDefaultExprSnafu { expr }.fail(),
}
}
ColumnDefaultConstraint::Value(_) => Ok(None),
}
}
/// Only create default value if it's impure, i.e., it's a function.
///
/// This helps to delay creating constant default values to mito engine while also keeps impure default have consistent values
pub fn create_impure_default(&self, data_type: &ConcreteDataType) -> Result<Option<Value>> {
match self {
ColumnDefaultConstraint::Function(expr) => {
// Functions should also ensure its return value is not null when
// is_nullable is true.
match &expr[..] {
CURRENT_TIMESTAMP | CURRENT_TIMESTAMP_FN | NOW_FN => {
create_current_timestamp(data_type).map(Some)
}
_ => error::UnsupportedDefaultExprSnafu { expr }.fail(),
}
}
ColumnDefaultConstraint::Value(_) => Ok(None),
}
}
/// Returns true if this constraint might creates NULL.
fn maybe_null(&self) -> bool {
// Once we support more functions, we may return true if given function
// could return null.
matches!(self, ColumnDefaultConstraint::Value(Value::Null))
}
/// Returns true if this constraint is a function.
pub fn is_function(&self) -> bool {
matches!(self, ColumnDefaultConstraint::Function(_))
}
}
fn create_current_timestamp(data_type: &ConcreteDataType) -> Result<Value> {

View File

@@ -45,12 +45,17 @@ use tokio::sync::broadcast::error::TryRecvError;
use tokio::sync::{broadcast, watch, Mutex, RwLock};
pub(crate) use crate::adapter::node_context::FlownodeContext;
use crate::adapter::table_source::ManagedTableSource;
use crate::adapter::util::relation_desc_to_column_schemas_with_fallback;
use crate::adapter::table_source::TableSource;
use crate::adapter::util::{
relation_desc_to_column_schemas_with_fallback, table_info_value_to_relation_desc,
};
use crate::adapter::worker::{create_worker, Worker, WorkerHandle};
use crate::compute::ErrCollector;
use crate::df_optimizer::sql_to_flow_plan;
use crate::error::{EvalSnafu, ExternalSnafu, InternalSnafu, InvalidQuerySnafu, UnexpectedSnafu};
use crate::error::{
EvalSnafu, ExternalSnafu, FlowAlreadyExistSnafu, InternalSnafu, InvalidQuerySnafu,
UnexpectedSnafu,
};
use crate::expr::Batch;
use crate::metrics::{METRIC_FLOW_INSERT_ELAPSED, METRIC_FLOW_ROWS, METRIC_FLOW_RUN_INTERVAL_MS};
use crate::repr::{self, DiffRow, RelationDesc, Row, BATCH_SIZE};
@@ -64,7 +69,7 @@ mod util;
mod worker;
pub(crate) mod node_context;
pub(crate) mod table_source;
mod table_source;
use crate::error::Error;
use crate::utils::StateReportHandler;
@@ -124,7 +129,7 @@ pub struct FlowWorkerManager {
/// The query engine that will be used to parse the query and convert it to a dataflow plan
pub query_engine: Arc<dyn QueryEngine>,
/// Getting table name and table schema from table info manager
table_info_source: ManagedTableSource,
table_info_source: TableSource,
frontend_invoker: RwLock<Option<FrontendInvoker>>,
/// contains mapping from table name to global id, and table schema
node_context: RwLock<FlownodeContext>,
@@ -153,11 +158,11 @@ impl FlowWorkerManager {
query_engine: Arc<dyn QueryEngine>,
table_meta: TableMetadataManagerRef,
) -> Self {
let srv_map = ManagedTableSource::new(
let srv_map = TableSource::new(
table_meta.table_info_manager().clone(),
table_meta.table_name_manager().clone(),
);
let node_context = FlownodeContext::new(Box::new(srv_map.clone()) as _);
let node_context = FlownodeContext::default();
let tick_manager = FlowTickManager::new();
let worker_handles = Vec::new();
FlowWorkerManager {
@@ -404,7 +409,7 @@ impl FlowWorkerManager {
) -> Result<Option<(Vec<String>, Option<usize>, Vec<ColumnSchema>)>, Error> {
if let Some(table_id) = self
.table_info_source
.get_opt_table_id_from_name(table_name)
.get_table_id_from_name(table_name)
.await?
{
let table_info = self
@@ -724,6 +729,43 @@ impl FlowWorkerManager {
query_ctx,
} = args;
let already_exist = {
let mut flag = false;
// check if the task already exists
for handle in self.worker_handles.iter() {
if handle.lock().await.contains_flow(flow_id).await? {
flag = true;
break;
}
}
flag
};
match (create_if_not_exists, or_replace, already_exist) {
// do replace
(_, true, true) => {
info!("Replacing flow with id={}", flow_id);
self.remove_flow(flow_id).await?;
}
(false, false, true) => FlowAlreadyExistSnafu { id: flow_id }.fail()?,
// do nothing if exists
(true, false, true) => {
info!("Flow with id={} already exists, do nothing", flow_id);
return Ok(None);
}
// create if not exists
(_, _, false) => (),
}
if create_if_not_exists {
// check if the task already exists
for handle in self.worker_handles.iter() {
if handle.lock().await.contains_flow(flow_id).await? {
return Ok(None);
}
}
}
let mut node_ctx = self.node_context.write().await;
// assign global id to source and sink table
for source in &source_table_ids {
@@ -786,9 +828,27 @@ impl FlowWorkerManager {
.fail()?,
}
}
let table_id = self
.table_info_source
.get_table_id_from_name(&sink_table_name)
.await?
.context(UnexpectedSnafu {
reason: format!("Can't get table id for table name {:?}", sink_table_name),
})?;
let table_info_value = self
.table_info_source
.get_table_info_value(&table_id)
.await?
.context(UnexpectedSnafu {
reason: format!("Can't get table info value for table id {:?}", table_id),
})?;
let real_schema = table_info_value_to_relation_desc(table_info_value)?;
node_ctx.assign_table_schema(&sink_table_name, real_schema.clone())?;
} else {
// assign inferred schema to sink table
// create sink table
node_ctx.assign_table_schema(&sink_table_name, flow_plan.schema.clone())?;
let did_create = self
.create_table_from_relation(
&format!("flow-id={flow_id}"),
@@ -837,11 +897,9 @@ impl FlowWorkerManager {
source_ids,
src_recvs: source_receivers,
expire_after,
or_replace,
create_if_not_exists,
err_collector,
};
handle.create_flow(create_request).await?;
info!("Successfully create flow with id={}", flow_id);
Ok(Some(flow_id))

View File

@@ -24,26 +24,21 @@ use common_error::ext::BoxedError;
use common_meta::error::{ExternalSnafu, Result, UnexpectedSnafu};
use common_meta::node_manager::Flownode;
use common_telemetry::{debug, trace};
use datatypes::value::Value;
use itertools::Itertools;
use snafu::{IntoError, OptionExt, ResultExt};
use snafu::{OptionExt, ResultExt};
use store_api::storage::RegionId;
use super::util::from_proto_to_data_type;
use crate::adapter::{CreateFlowArgs, FlowWorkerManager};
use crate::error::{CreateFlowSnafu, InsertIntoFlowSnafu, InternalSnafu};
use crate::error::InternalSnafu;
use crate::metrics::METRIC_FLOW_TASK_COUNT;
use crate::repr::{self, DiffRow};
/// return a function to convert `crate::error::Error` to `common_meta::error::Error`
fn to_meta_err(
location: snafu::Location,
) -> impl FnOnce(crate::error::Error) -> common_meta::error::Error {
move |err: crate::error::Error| -> common_meta::error::Error {
common_meta::error::Error::External {
location,
source: BoxedError::new(err),
}
}
fn to_meta_err(err: crate::error::Error) -> common_meta::error::Error {
// TODO(discord9): refactor this
Err::<(), _>(BoxedError::new(err))
.with_context(|_| ExternalSnafu)
.unwrap_err()
}
#[async_trait::async_trait]
@@ -80,16 +75,11 @@ impl Flownode for FlowWorkerManager {
or_replace,
expire_after,
comment: Some(comment),
sql: sql.clone(),
sql,
flow_options,
query_ctx,
};
let ret = self
.create_flow(args)
.await
.map_err(BoxedError::new)
.with_context(|_| CreateFlowSnafu { sql: sql.clone() })
.map_err(to_meta_err(snafu::location!()))?;
let ret = self.create_flow(args).await.map_err(to_meta_err)?;
METRIC_FLOW_TASK_COUNT.inc();
Ok(FlowResponse {
affected_flows: ret
@@ -104,7 +94,7 @@ impl Flownode for FlowWorkerManager {
})) => {
self.remove_flow(flow_id.id as u64)
.await
.map_err(to_meta_err(snafu::location!()))?;
.map_err(to_meta_err)?;
METRIC_FLOW_TASK_COUNT.dec();
Ok(Default::default())
}
@@ -122,15 +112,9 @@ impl Flownode for FlowWorkerManager {
.await
.flush_all_sender()
.await
.map_err(to_meta_err(snafu::location!()))?;
let rows_send = self
.run_available(true)
.await
.map_err(to_meta_err(snafu::location!()))?;
let row = self
.send_writeback_requests()
.await
.map_err(to_meta_err(snafu::location!()))?;
.map_err(to_meta_err)?;
let rows_send = self.run_available(true).await.map_err(to_meta_err)?;
let row = self.send_writeback_requests().await.map_err(to_meta_err)?;
debug!(
"Done to flush flow_id={:?} with {} input rows flushed, {} rows sended and {} output rows flushed",
@@ -170,41 +154,17 @@ impl Flownode for FlowWorkerManager {
// TODO(discord9): reconsider time assignment mechanism
let now = self.tick_manager.tick();
let (table_types, fetch_order) = {
let fetch_order = {
let ctx = self.node_context.read().await;
// TODO(discord9): also check schema version so that altered table can be reported
let table_schema = ctx
.table_source
.table_from_id(&table_id)
.await
.map_err(to_meta_err(snafu::location!()))?;
let default_vals = table_schema
.default_values
.iter()
.zip(table_schema.relation_desc.typ().column_types.iter())
.map(|(v, ty)| {
v.as_ref().and_then(|v| {
match v.create_default(ty.scalar_type(), ty.nullable()) {
Ok(v) => Some(v),
Err(err) => {
common_telemetry::error!(err; "Failed to create default value");
None
}
}
})
})
.collect_vec();
let table_types = table_schema
.relation_desc
.typ()
.column_types
.clone()
.into_iter()
.map(|t| t.scalar_type)
.collect_vec();
let table_col_names = table_schema.relation_desc.names;
let table_col_names = ctx
.table_repr
.get_by_table_id(&table_id)
.map(|r| r.1)
.and_then(|id| ctx.schema.get(&id))
.map(|desc| &desc.names)
.context(UnexpectedSnafu {
err_msg: format!("Table not found: {}", table_id),
})?;
let table_col_names = table_col_names
.iter().enumerate()
.map(|(idx,name)| match name {
@@ -221,80 +181,44 @@ impl Flownode for FlowWorkerManager {
.enumerate()
.map(|(i, name)| (&name.column_name, i)),
);
let fetch_order: Vec<FetchFromRow> = table_col_names
let fetch_order: Vec<usize> = table_col_names
.iter()
.zip(default_vals.into_iter())
.map(|(col_name, col_default_val)| {
name_to_col
.get(col_name)
.copied()
.map(FetchFromRow::Idx)
.or_else(|| col_default_val.clone().map(FetchFromRow::Default))
.with_context(|| UnexpectedSnafu {
err_msg: format!(
"Column not found: {}, default_value: {:?}",
col_name, col_default_val
),
})
.map(|names| {
name_to_col.get(names).copied().context(UnexpectedSnafu {
err_msg: format!("Column not found: {}", names),
})
})
.try_collect()?;
trace!("Reordering columns: {:?}", fetch_order);
(table_types, fetch_order)
if !fetch_order.iter().enumerate().all(|(i, &v)| i == v) {
trace!("Reordering columns: {:?}", fetch_order)
}
fetch_order
};
// TODO(discord9): use column instead of row
let rows: Vec<DiffRow> = rows_proto
.into_iter()
.map(|r| {
let r = repr::Row::from(r);
let reordered = fetch_order.iter().map(|i| i.fetch(&r)).collect_vec();
let reordered = fetch_order
.iter()
.map(|&i| r.inner[i].clone())
.collect_vec();
repr::Row::new(reordered)
})
.map(|r| (r, now, 1))
.collect_vec();
if let Err(err) = self
.handle_write_request(region_id.into(), rows, &table_types)
let batch_datatypes = insert_schema
.iter()
.map(from_proto_to_data_type)
.collect::<std::result::Result<Vec<_>, _>>()
.map_err(to_meta_err)?;
self.handle_write_request(region_id.into(), rows, &batch_datatypes)
.await
{
let err = BoxedError::new(err);
let flow_ids = self
.node_context
.read()
.await
.get_flow_ids(table_id)
.into_iter()
.flatten()
.cloned()
.collect_vec();
let err = InsertIntoFlowSnafu {
region_id,
flow_ids,
}
.into_error(err);
common_telemetry::error!(err; "Failed to handle write request");
let err = to_meta_err(snafu::location!())(err);
return Err(err);
}
.map_err(|err| {
common_telemetry::error!(err;"Failed to handle write request");
to_meta_err(err)
})?;
}
Ok(Default::default())
}
}
/// Simple helper enum for fetching value from row with default value
#[derive(Debug, Clone)]
enum FetchFromRow {
Idx(usize),
Default(Value),
}
impl FetchFromRow {
/// Panic if idx is out of bound
fn fetch(&self, row: &repr::Row) -> Value {
match self {
FetchFromRow::Idx(idx) => row.get(*idx).unwrap().clone(),
FetchFromRow::Default(v) => v.clone(),
}
}
}

View File

@@ -25,8 +25,7 @@ use snafu::{OptionExt, ResultExt};
use table::metadata::TableId;
use tokio::sync::{broadcast, mpsc, RwLock};
use crate::adapter::table_source::FlowTableSource;
use crate::adapter::{FlowId, ManagedTableSource, TableName};
use crate::adapter::{FlowId, TableName, TableSource};
use crate::error::{Error, EvalSnafu, TableNotFoundSnafu};
use crate::expr::error::InternalSnafu;
use crate::expr::{Batch, GlobalId};
@@ -34,7 +33,7 @@ use crate::metrics::METRIC_FLOW_INPUT_BUF_SIZE;
use crate::repr::{DiffRow, RelationDesc, BATCH_SIZE, BROADCAST_CAP, SEND_BUF_CAP};
/// A context that holds the information of the dataflow
#[derive(Debug)]
#[derive(Default, Debug)]
pub struct FlownodeContext {
/// mapping from source table to tasks, useful for schedule which task to run when a source table is updated
pub source_to_tasks: BTreeMap<TableId, BTreeSet<FlowId>>,
@@ -51,32 +50,13 @@ pub struct FlownodeContext {
/// note that the sink receiver should only have one, and we are using broadcast as mpsc channel here
pub sink_receiver:
BTreeMap<TableName, (mpsc::UnboundedSender<Batch>, mpsc::UnboundedReceiver<Batch>)>,
/// can query the schema of the table source, from metasrv with local cache
pub table_source: Box<dyn FlowTableSource>,
/// the schema of the table, query from metasrv or inferred from TypedPlan
pub schema: HashMap<GlobalId, RelationDesc>,
/// All the tables that have been registered in the worker
pub table_repr: IdToNameMap,
pub query_context: Option<Arc<QueryContext>>,
}
impl FlownodeContext {
pub fn new(table_source: Box<dyn FlowTableSource>) -> Self {
Self {
source_to_tasks: Default::default(),
flow_to_sink: Default::default(),
sink_to_flow: Default::default(),
source_sender: Default::default(),
sink_receiver: Default::default(),
table_source,
table_repr: Default::default(),
query_context: Default::default(),
}
}
pub fn get_flow_ids(&self, table_id: TableId) -> Option<&BTreeSet<FlowId>> {
self.source_to_tasks.get(&table_id)
}
}
/// a simple broadcast sender with backpressure, bounded capacity and blocking on send when send buf is full
/// note that it wouldn't evict old data, so it's possible to block forever if the receiver is slow
///
@@ -304,7 +284,7 @@ impl FlownodeContext {
/// Retrieves a GlobalId and table schema representing a table previously registered by calling the [register_table] function.
///
/// Returns an error if no table has been registered with the provided names
pub async fn table(&self, name: &TableName) -> Result<(GlobalId, RelationDesc), Error> {
pub fn table(&self, name: &TableName) -> Result<(GlobalId, RelationDesc), Error> {
let id = self
.table_repr
.get_by_name(name)
@@ -312,8 +292,14 @@ impl FlownodeContext {
.with_context(|| TableNotFoundSnafu {
name: name.join("."),
})?;
let schema = self.table_source.table(name).await?;
Ok((id, schema.relation_desc))
let schema = self
.schema
.get(&id)
.cloned()
.with_context(|| TableNotFoundSnafu {
name: name.join("."),
})?;
Ok((id, schema))
}
/// Assign a global id to a table, if already assigned, return the existing global id
@@ -326,7 +312,7 @@ impl FlownodeContext {
/// merely creating a mapping from table id to global id
pub async fn assign_global_id_to_table(
&mut self,
srv_map: &ManagedTableSource,
srv_map: &TableSource,
mut table_name: Option<TableName>,
table_id: Option<TableId>,
) -> Result<GlobalId, Error> {
@@ -347,9 +333,10 @@ impl FlownodeContext {
// table id is Some meaning db must have created the table
if let Some(table_id) = table_id {
let known_table_name = srv_map.get_table_name(&table_id).await?;
let (known_table_name, schema) = srv_map.get_table_name_schema(&table_id).await?;
table_name = table_name.or(Some(known_table_name));
} // if we don't have table id, it means database haven't assign one yet or we don't need it
self.schema.insert(global_id, schema);
} // if we don't have table id, it means database havn't assign one yet or we don't need it
// still update the mapping with new global id
self.table_repr.insert(table_name, table_id, global_id);
@@ -357,6 +344,26 @@ impl FlownodeContext {
}
}
/// Assign a schema to a table
///
pub fn assign_table_schema(
&mut self,
table_name: &TableName,
schema: RelationDesc,
) -> Result<(), Error> {
let gid = self
.table_repr
.get_by_name(table_name)
.map(|(_, gid)| gid)
.context(TableNotFoundSnafu {
name: format!("Table not found: {:?} in flownode cache", table_name),
})?;
self.schema.insert(gid, schema);
Ok(())
}
/// Get a new global id
pub fn new_global_id(&self) -> GlobalId {
GlobalId::User(self.table_repr.global_id_to_name_id.len() as u64)

View File

@@ -17,8 +17,6 @@
use common_error::ext::BoxedError;
use common_meta::key::table_info::{TableInfoManager, TableInfoValue};
use common_meta::key::table_name::{TableNameKey, TableNameManager};
use datatypes::schema::ColumnDefaultConstraint;
use serde::{Deserialize, Serialize};
use snafu::{OptionExt, ResultExt};
use table::metadata::TableId;
@@ -29,82 +27,16 @@ use crate::error::{
};
use crate::repr::RelationDesc;
/// Table description, include relation desc and default values, which is the minimal information flow needed for table
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct TableDesc {
pub relation_desc: RelationDesc,
pub default_values: Vec<Option<ColumnDefaultConstraint>>,
}
impl TableDesc {
pub fn new(
relation_desc: RelationDesc,
default_values: Vec<Option<ColumnDefaultConstraint>>,
) -> Self {
Self {
relation_desc,
default_values,
}
}
pub fn new_no_default(relation_desc: RelationDesc) -> Self {
Self {
relation_desc,
default_values: vec![],
}
}
}
/// Table source but for flow, provide table schema by table name/id
#[async_trait::async_trait]
pub trait FlowTableSource: Send + Sync + std::fmt::Debug {
async fn table_name_from_id(&self, table_id: &TableId) -> Result<TableName, Error>;
async fn table_id_from_name(&self, name: &TableName) -> Result<TableId, Error>;
/// Get the table schema by table name
async fn table(&self, name: &TableName) -> Result<TableDesc, Error> {
let id = self.table_id_from_name(name).await?;
self.table_from_id(&id).await
}
async fn table_from_id(&self, table_id: &TableId) -> Result<TableDesc, Error>;
}
/// managed table source information, query from table info manager and table name manager
#[derive(Clone)]
pub struct ManagedTableSource {
/// mapping of table name <-> table id should be query from tableinfo manager
pub struct TableSource {
/// for query `TableId -> TableName` mapping
table_info_manager: TableInfoManager,
table_name_manager: TableNameManager,
}
#[async_trait::async_trait]
impl FlowTableSource for ManagedTableSource {
async fn table_from_id(&self, table_id: &TableId) -> Result<TableDesc, Error> {
let table_info_value = self
.get_table_info_value(table_id)
.await?
.with_context(|| TableNotFoundSnafu {
name: format!("TableId = {:?}, Can't found table info", table_id),
})?;
let desc = table_info_value_to_relation_desc(table_info_value)?;
Ok(desc)
}
async fn table_name_from_id(&self, table_id: &TableId) -> Result<TableName, Error> {
self.get_table_name(table_id).await
}
async fn table_id_from_name(&self, name: &TableName) -> Result<TableId, Error> {
self.get_opt_table_id_from_name(name)
.await?
.with_context(|| TableNotFoundSnafu {
name: name.join("."),
})
}
}
impl ManagedTableSource {
impl TableSource {
pub fn new(table_info_manager: TableInfoManager, table_name_manager: TableNameManager) -> Self {
ManagedTableSource {
TableSource {
table_info_manager,
table_name_manager,
}
@@ -130,11 +62,8 @@ impl ManagedTableSource {
.map(|id| id.table_id())
}
/// If the table haven't been created in database, the tableId returned would be null
pub async fn get_opt_table_id_from_name(
&self,
name: &TableName,
) -> Result<Option<TableId>, Error> {
/// If the table havn't been created in database, the tableId returned would be null
pub async fn get_table_id_from_name(&self, name: &TableName) -> Result<Option<TableId>, Error> {
let ret = self
.table_name_manager
.get(TableNameKey::new(&name[0], &name[1], &name[2]))
@@ -178,7 +107,7 @@ impl ManagedTableSource {
pub async fn get_table_name_schema(
&self,
table_id: &TableId,
) -> Result<(TableName, TableDesc), Error> {
) -> Result<(TableName, RelationDesc), Error> {
let table_info_value = self
.get_table_info_value(table_id)
.await?
@@ -197,121 +126,3 @@ impl ManagedTableSource {
Ok((table_name, desc))
}
}
impl std::fmt::Debug for ManagedTableSource {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("KvBackendTableSource").finish()
}
}
#[cfg(test)]
pub(crate) mod test {
use std::collections::HashMap;
use datatypes::data_type::ConcreteDataType as CDT;
use super::*;
use crate::repr::{ColumnType, RelationType};
pub struct FlowDummyTableSource {
pub id_names_to_desc: Vec<(TableId, TableName, TableDesc)>,
id_to_idx: HashMap<TableId, usize>,
name_to_idx: HashMap<TableName, usize>,
}
impl Default for FlowDummyTableSource {
fn default() -> Self {
let id_names_to_desc = vec![
(
1024,
[
"greptime".to_string(),
"public".to_string(),
"numbers".to_string(),
],
TableDesc::new_no_default(
RelationType::new(vec![ColumnType::new(CDT::uint32_datatype(), false)])
.into_named(vec![Some("number".to_string())]),
),
),
(
1025,
[
"greptime".to_string(),
"public".to_string(),
"numbers_with_ts".to_string(),
],
TableDesc::new_no_default(
RelationType::new(vec![
ColumnType::new(CDT::uint32_datatype(), false),
ColumnType::new(CDT::timestamp_millisecond_datatype(), false),
])
.into_named(vec![Some("number".to_string()), Some("ts".to_string())]),
),
),
];
let id_to_idx = id_names_to_desc
.iter()
.enumerate()
.map(|(idx, (id, _name, _desc))| (*id, idx))
.collect();
let name_to_idx = id_names_to_desc
.iter()
.enumerate()
.map(|(idx, (_id, name, _desc))| (name.clone(), idx))
.collect();
Self {
id_names_to_desc,
id_to_idx,
name_to_idx,
}
}
}
#[async_trait::async_trait]
impl FlowTableSource for FlowDummyTableSource {
async fn table_from_id(&self, table_id: &TableId) -> Result<TableDesc, Error> {
let idx = self.id_to_idx.get(table_id).context(TableNotFoundSnafu {
name: format!("Table id = {:?}, couldn't found table desc", table_id),
})?;
let desc = self
.id_names_to_desc
.get(*idx)
.map(|x| x.2.clone())
.context(TableNotFoundSnafu {
name: format!("Table id = {:?}, couldn't found table desc", table_id),
})?;
Ok(desc)
}
async fn table_name_from_id(&self, table_id: &TableId) -> Result<TableName, Error> {
let idx = self.id_to_idx.get(table_id).context(TableNotFoundSnafu {
name: format!("Table id = {:?}, couldn't found table desc", table_id),
})?;
self.id_names_to_desc
.get(*idx)
.map(|x| x.1.clone())
.context(TableNotFoundSnafu {
name: format!("Table id = {:?}, couldn't found table desc", table_id),
})
}
async fn table_id_from_name(&self, name: &TableName) -> Result<TableId, Error> {
for (id, table_name, _desc) in &self.id_names_to_desc {
if name == table_name {
return Ok(*id);
}
}
TableNotFoundSnafu {
name: format!("Table name = {:?}, couldn't found table id", name),
}
.fail()?
}
}
impl std::fmt::Debug for FlowDummyTableSource {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("DummyTableSource").finish()
}
}
}

View File

@@ -27,7 +27,6 @@ use session::context::QueryContextBuilder;
use snafu::{OptionExt, ResultExt};
use table::table_reference::TableReference;
use crate::adapter::table_source::TableDesc;
use crate::adapter::{TableName, AUTO_CREATED_PLACEHOLDER_TS_COL};
use crate::error::{Error, ExternalSnafu, UnexpectedSnafu};
use crate::repr::{ColumnType, RelationDesc, RelationType};
@@ -127,7 +126,7 @@ impl FlowWorkerManager {
pub fn table_info_value_to_relation_desc(
table_info_value: TableInfoValue,
) -> Result<TableDesc, Error> {
) -> Result<RelationDesc, Error> {
let raw_schema = table_info_value.table_info.meta.schema;
let (column_types, col_names): (Vec<_>, Vec<_>) = raw_schema
.column_schemas
@@ -148,7 +147,8 @@ pub fn table_info_value_to_relation_desc(
let keys = vec![crate::repr::Key::from(key)];
let time_index = raw_schema.timestamp_index;
let relation_desc = RelationDesc {
Ok(RelationDesc {
typ: RelationType {
column_types,
keys,
@@ -157,14 +157,7 @@ pub fn table_info_value_to_relation_desc(
auto_columns: vec![],
},
names: col_names,
};
let default_values = raw_schema
.column_schemas
.iter()
.map(|c| c.default_constraint().cloned())
.collect_vec();
Ok(TableDesc::new(relation_desc, default_values))
})
}
pub fn from_proto_to_data_type(

View File

@@ -247,25 +247,15 @@ impl<'s> Worker<'s> {
src_recvs: Vec<broadcast::Receiver<Batch>>,
// TODO(discord9): set expire duration for all arrangement and compare to sys timestamp instead
expire_after: Option<repr::Duration>,
or_replace: bool,
create_if_not_exists: bool,
err_collector: ErrCollector,
) -> Result<Option<FlowId>, Error> {
let already_exist = self.task_states.contains_key(&flow_id);
match (create_if_not_exists, or_replace, already_exist) {
// if replace, ignore that old flow exists
(_, true, true) => {
info!("Replacing flow with id={}", flow_id);
}
(false, false, true) => FlowAlreadyExistSnafu { id: flow_id }.fail()?,
// already exists, and not replace, return None
(true, false, true) => {
info!("Flow with id={} already exists, do nothing", flow_id);
return Ok(None);
}
// continue as normal
(_, _, false) => (),
}
let already_exists = self.task_states.contains_key(&flow_id);
match (already_exists, create_if_not_exists) {
(true, true) => return Ok(None),
(true, false) => FlowAlreadyExistSnafu { id: flow_id }.fail()?,
(false, _) => (),
};
let mut cur_task_state = ActiveDataflowState::<'s> {
err_collector,
@@ -351,7 +341,6 @@ impl<'s> Worker<'s> {
source_ids,
src_recvs,
expire_after,
or_replace,
create_if_not_exists,
err_collector,
} => {
@@ -363,7 +352,6 @@ impl<'s> Worker<'s> {
&source_ids,
src_recvs,
expire_after,
or_replace,
create_if_not_exists,
err_collector,
);
@@ -410,7 +398,6 @@ pub enum Request {
source_ids: Vec<GlobalId>,
src_recvs: Vec<broadcast::Receiver<Batch>>,
expire_after: Option<repr::Duration>,
or_replace: bool,
create_if_not_exists: bool,
err_collector: ErrCollector,
},
@@ -560,7 +547,6 @@ mod test {
source_ids: src_ids,
src_recvs: vec![rx],
expire_after: None,
or_replace: false,
create_if_not_exists: true,
err_collector: ErrCollector::default(),
};

View File

@@ -492,7 +492,7 @@ impl ScalarUDFImpl for TumbleExpand {
if let Some(start_time) = opt{
if !matches!(start_time, Utf8 | Date32 | Date64 | Timestamp(_, _)){
return Err(DataFusionError::Plan(
format!("Expect start_time to either be date, timestamp or string, found {:?}", start_time)
format!("Expect start_time to either be date, timestampe or string, found {:?}", start_time)
));
}
}

View File

@@ -32,27 +32,6 @@ use crate::expr::EvalError;
#[snafu(visibility(pub))]
#[stack_trace_debug]
pub enum Error {
#[snafu(display(
"Failed to insert into flow: region_id={}, flow_ids={:?}",
region_id,
flow_ids
))]
InsertIntoFlow {
region_id: u64,
flow_ids: Vec<u64>,
source: BoxedError,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Error encountered while creating flow: {sql}"))]
CreateFlow {
sql: String,
source: BoxedError,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("External error"))]
External {
source: BoxedError,
@@ -228,17 +207,16 @@ pub type Result<T> = std::result::Result<T, Error>;
impl ErrorExt for Error {
fn status_code(&self) -> StatusCode {
match self {
Self::Eval { .. }
| Self::JoinTask { .. }
| Self::Datafusion { .. }
| Self::InsertIntoFlow { .. } => StatusCode::Internal,
Self::Eval { .. } | Self::JoinTask { .. } | Self::Datafusion { .. } => {
StatusCode::Internal
}
Self::FlowAlreadyExist { .. } => StatusCode::TableAlreadyExists,
Self::TableNotFound { .. }
| Self::TableNotFoundMeta { .. }
| Self::FlowNotFound { .. }
| Self::ListFlows { .. } => StatusCode::TableNotFound,
Self::Plan { .. } | Self::Datatypes { .. } => StatusCode::PlanQuery,
Self::InvalidQuery { .. } | Self::CreateFlow { .. } => StatusCode::EngineExecuteQuery,
Self::InvalidQuery { .. } => StatusCode::EngineExecuteQuery,
Self::Unexpected { .. } => StatusCode::Unexpected,
Self::NotImplemented { .. } | Self::UnsupportedTemporalFilter { .. } => {
StatusCode::Unsupported

View File

@@ -57,7 +57,7 @@ pub const BROADCAST_CAP: usize = 1024;
/// The maximum capacity of the send buffer, to prevent the buffer from growing too large
pub const SEND_BUF_CAP: usize = BROADCAST_CAP * 2;
/// Flow worker will try to at least accumulate this many rows before processing them(if one second haven't passed)
/// Flow worker will try to at least accumulate this many rows before processing them(if one second havn't passed)
pub const BATCH_SIZE: usize = 32 * 16384;
/// Convert a value that is or can be converted to Datetime to internal timestamp

View File

@@ -50,8 +50,8 @@ use tonic::{Request, Response, Status};
use crate::adapter::{CreateFlowArgs, FlowWorkerManagerRef};
use crate::error::{
to_status_with_last_err, CacheRequiredSnafu, CreateFlowSnafu, ExternalSnafu, FlowNotFoundSnafu,
ListFlowsSnafu, ParseAddrSnafu, ShutdownServerSnafu, StartServerSnafu, UnexpectedSnafu,
to_status_with_last_err, CacheRequiredSnafu, ExternalSnafu, FlowNotFoundSnafu, ListFlowsSnafu,
ParseAddrSnafu, ShutdownServerSnafu, StartServerSnafu, UnexpectedSnafu,
};
use crate::heartbeat::HeartbeatTask;
use crate::metrics::{METRIC_FLOW_PROCESSING_TIME, METRIC_FLOW_ROWS};
@@ -392,13 +392,7 @@ impl FlownodeBuilder {
.build(),
),
};
manager
.create_flow(args)
.await
.map_err(BoxedError::new)
.with_context(|_| CreateFlowSnafu {
sql: info.raw_sql().clone(),
})?;
manager.create_flow(args).await?;
}
Ok(cnt)

View File

@@ -173,11 +173,12 @@ mod test {
use super::*;
use crate::adapter::node_context::IdToNameMap;
use crate::adapter::table_source::test::FlowDummyTableSource;
use crate::df_optimizer::apply_df_optimizer;
use crate::expr::GlobalId;
use crate::repr::{ColumnType, RelationType};
pub fn create_test_ctx() -> FlownodeContext {
let mut schemas = HashMap::new();
let mut tri_map = IdToNameMap::new();
{
let gid = GlobalId::User(0);
@@ -186,7 +187,10 @@ mod test {
"public".to_string(),
"numbers".to_string(),
];
let schema = RelationType::new(vec![ColumnType::new(CDT::uint32_datatype(), false)]);
tri_map.insert(Some(name.clone()), Some(1024), gid);
schemas.insert(gid, schema.into_named(vec![Some("number".to_string())]));
}
{
@@ -196,16 +200,23 @@ mod test {
"public".to_string(),
"numbers_with_ts".to_string(),
];
let schema = RelationType::new(vec![
ColumnType::new(CDT::uint32_datatype(), false),
ColumnType::new(CDT::timestamp_millisecond_datatype(), false),
]);
schemas.insert(
gid,
schema.into_named(vec![Some("number".to_string()), Some("ts".to_string())]),
);
tri_map.insert(Some(name.clone()), Some(1025), gid);
}
let dummy_source = FlowDummyTableSource::default();
let mut ctx = FlownodeContext::new(Box::new(dummy_source));
ctx.table_repr = tri_map;
ctx.query_context = Some(Arc::new(QueryContext::with("greptime", "public")));
ctx
FlownodeContext {
schema: schemas,
table_repr: tri_map,
query_context: Some(Arc::new(QueryContext::with("greptime", "public"))),
..Default::default()
}
}
pub fn create_test_query_engine() -> Arc<dyn QueryEngine> {

View File

@@ -128,11 +128,7 @@ impl AggregateExpr {
}
if args.len() != 1 {
let fn_name = extensions.get(&f.function_reference).cloned();
return not_impl_err!(
"Aggregated function (name={:?}) with multiple arguments is not supported",
fn_name
);
return not_impl_err!("Aggregated function with multiple arguments is not supported");
}
let arg = if let Some(first) = args.first() {

View File

@@ -176,7 +176,7 @@ impl TypedPlan {
}
.fail()?,
};
let table = ctx.table(&table_reference).await?;
let table = ctx.table(&table_reference)?;
let get_table = Plan::Get {
id: crate::expr::Id::Global(table.0),
};

View File

@@ -30,8 +30,8 @@ pub struct LogQuery {
pub time_filter: TimeFilter,
/// Columns with filters to query.
pub columns: Vec<ColumnFilters>,
/// Controls row skipping and fetch count for logs.
pub limit: Limit,
/// Maximum number of logs to return. If not provided, it will return all matched logs.
pub limit: Option<usize>,
/// Adjacent lines to return.
pub context: Context,
}
@@ -42,7 +42,7 @@ impl Default for LogQuery {
table: TableName::new("", "", ""),
time_filter: Default::default(),
columns: vec![],
limit: Limit::default(),
limit: None,
context: Default::default(),
}
}
@@ -266,15 +266,6 @@ pub enum Context {
Seconds(usize, usize),
}
/// Represents limit and offset parameters for query pagination.
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct Limit {
/// Optional number of items to skip before starting to return results
pub skip: Option<usize>,
/// Optional number of items to return after skipping
pub fetch: Option<usize>,
}
#[cfg(test)]
mod tests {
use super::*;

View File

@@ -19,7 +19,6 @@ use object_store::util::{join_dir, with_instrument_layers};
use object_store::ObjectStore;
use snafu::ResultExt;
use store_api::metadata::RegionMetadataRef;
use store_api::storage::SequenceNumber;
use crate::cache::write_cache::SstUploadRequest;
use crate::cache::CacheManagerRef;
@@ -165,9 +164,7 @@ impl AccessLayer {
request.metadata,
indexer,
);
writer
.write_all(request.source, request.max_sequence, write_opts)
.await?
writer.write_all(request.source, write_opts).await?
};
// Put parquet metadata to cache manager.
@@ -197,7 +194,6 @@ pub(crate) struct SstWriteRequest {
pub(crate) cache_manager: CacheManagerRef,
#[allow(dead_code)]
pub(crate) storage: Option<String>,
pub(crate) max_sequence: Option<SequenceNumber>,
/// Configs for index
pub(crate) index_options: IndexOptions,

View File

@@ -55,195 +55,6 @@ const FILE_TYPE: &str = "file";
/// Metrics type key for selector result cache.
const SELECTOR_RESULT_TYPE: &str = "selector_result";
/// Cache strategies that may only enable a subset of caches.
#[derive(Clone)]
pub enum CacheStrategy {
/// Strategy for normal operations.
/// Doesn't disable any cache.
EnableAll(CacheManagerRef),
/// Strategy for compaction.
/// Disables some caches during compaction to avoid affecting queries.
/// Enables the write cache so that the compaction can read files cached
/// in the write cache and write the compacted files back to the write cache.
Compaction(CacheManagerRef),
/// Do not use any cache.
Disabled,
}
impl CacheStrategy {
/// Calls [CacheManager::get_parquet_meta_data()].
pub async fn get_parquet_meta_data(
&self,
region_id: RegionId,
file_id: FileId,
) -> Option<Arc<ParquetMetaData>> {
match self {
CacheStrategy::EnableAll(cache_manager) => {
cache_manager
.get_parquet_meta_data(region_id, file_id)
.await
}
CacheStrategy::Compaction(cache_manager) => {
cache_manager
.get_parquet_meta_data(region_id, file_id)
.await
}
CacheStrategy::Disabled => None,
}
}
/// Calls [CacheManager::get_parquet_meta_data_from_mem_cache()].
pub fn get_parquet_meta_data_from_mem_cache(
&self,
region_id: RegionId,
file_id: FileId,
) -> Option<Arc<ParquetMetaData>> {
match self {
CacheStrategy::EnableAll(cache_manager) => {
cache_manager.get_parquet_meta_data_from_mem_cache(region_id, file_id)
}
CacheStrategy::Compaction(cache_manager) => {
cache_manager.get_parquet_meta_data_from_mem_cache(region_id, file_id)
}
CacheStrategy::Disabled => None,
}
}
/// Calls [CacheManager::put_parquet_meta_data()].
pub fn put_parquet_meta_data(
&self,
region_id: RegionId,
file_id: FileId,
metadata: Arc<ParquetMetaData>,
) {
match self {
CacheStrategy::EnableAll(cache_manager) => {
cache_manager.put_parquet_meta_data(region_id, file_id, metadata);
}
CacheStrategy::Compaction(cache_manager) => {
cache_manager.put_parquet_meta_data(region_id, file_id, metadata);
}
CacheStrategy::Disabled => {}
}
}
/// Calls [CacheManager::remove_parquet_meta_data()].
pub fn remove_parquet_meta_data(&self, region_id: RegionId, file_id: FileId) {
match self {
CacheStrategy::EnableAll(cache_manager) => {
cache_manager.remove_parquet_meta_data(region_id, file_id);
}
CacheStrategy::Compaction(cache_manager) => {
cache_manager.remove_parquet_meta_data(region_id, file_id);
}
CacheStrategy::Disabled => {}
}
}
/// Calls [CacheManager::get_repeated_vector()].
/// It returns None if the strategy is [CacheStrategy::Compaction] or [CacheStrategy::Disabled].
pub fn get_repeated_vector(
&self,
data_type: &ConcreteDataType,
value: &Value,
) -> Option<VectorRef> {
match self {
CacheStrategy::EnableAll(cache_manager) => {
cache_manager.get_repeated_vector(data_type, value)
}
CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
}
}
/// Calls [CacheManager::put_repeated_vector()].
/// It does nothing if the strategy isn't [CacheStrategy::EnableAll].
pub fn put_repeated_vector(&self, value: Value, vector: VectorRef) {
if let CacheStrategy::EnableAll(cache_manager) = self {
cache_manager.put_repeated_vector(value, vector);
}
}
/// Calls [CacheManager::get_pages()].
/// It returns None if the strategy is [CacheStrategy::Compaction] or [CacheStrategy::Disabled].
pub fn get_pages(&self, page_key: &PageKey) -> Option<Arc<PageValue>> {
match self {
CacheStrategy::EnableAll(cache_manager) => cache_manager.get_pages(page_key),
CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
}
}
/// Calls [CacheManager::put_pages()].
/// It does nothing if the strategy isn't [CacheStrategy::EnableAll].
pub fn put_pages(&self, page_key: PageKey, pages: Arc<PageValue>) {
if let CacheStrategy::EnableAll(cache_manager) = self {
cache_manager.put_pages(page_key, pages);
}
}
/// Calls [CacheManager::get_selector_result()].
/// It returns None if the strategy is [CacheStrategy::Compaction] or [CacheStrategy::Disabled].
pub fn get_selector_result(
&self,
selector_key: &SelectorResultKey,
) -> Option<Arc<SelectorResultValue>> {
match self {
CacheStrategy::EnableAll(cache_manager) => {
cache_manager.get_selector_result(selector_key)
}
CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
}
}
/// Calls [CacheManager::put_selector_result()].
/// It does nothing if the strategy isn't [CacheStrategy::EnableAll].
pub fn put_selector_result(
&self,
selector_key: SelectorResultKey,
result: Arc<SelectorResultValue>,
) {
if let CacheStrategy::EnableAll(cache_manager) = self {
cache_manager.put_selector_result(selector_key, result);
}
}
/// Calls [CacheManager::write_cache()].
/// It returns None if the strategy is [CacheStrategy::Disabled].
pub fn write_cache(&self) -> Option<&WriteCacheRef> {
match self {
CacheStrategy::EnableAll(cache_manager) => cache_manager.write_cache(),
CacheStrategy::Compaction(cache_manager) => cache_manager.write_cache(),
CacheStrategy::Disabled => None,
}
}
/// Calls [CacheManager::index_cache()].
/// It returns None if the strategy is [CacheStrategy::Compaction] or [CacheStrategy::Disabled].
pub fn index_cache(&self) -> Option<&InvertedIndexCacheRef> {
match self {
CacheStrategy::EnableAll(cache_manager) => cache_manager.index_cache(),
CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
}
}
/// Calls [CacheManager::bloom_filter_index_cache()].
/// It returns None if the strategy is [CacheStrategy::Compaction] or [CacheStrategy::Disabled].
pub fn bloom_filter_index_cache(&self) -> Option<&BloomFilterIndexCacheRef> {
match self {
CacheStrategy::EnableAll(cache_manager) => cache_manager.bloom_filter_index_cache(),
CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
}
}
/// Calls [CacheManager::puffin_metadata_cache()].
/// It returns None if the strategy is [CacheStrategy::Compaction] or [CacheStrategy::Disabled].
pub fn puffin_metadata_cache(&self) -> Option<&PuffinMetadataCacheRef> {
match self {
CacheStrategy::EnableAll(cache_manager) => cache_manager.puffin_metadata_cache(),
CacheStrategy::Compaction(_) | CacheStrategy::Disabled => None,
}
}
}
/// Manages cached data for the engine.
///
/// All caches are disabled by default.

View File

@@ -61,7 +61,7 @@ fn bloom_filter_index_content_weight((k, _): &((FileId, ColumnId), PageKey), v:
pub struct CachedBloomFilterIndexBlobReader<R> {
file_id: FileId,
column_id: ColumnId,
blob_size: u64,
file_size: u64,
inner: R,
cache: BloomFilterIndexCacheRef,
}
@@ -71,14 +71,14 @@ impl<R> CachedBloomFilterIndexBlobReader<R> {
pub fn new(
file_id: FileId,
column_id: ColumnId,
blob_size: u64,
file_size: u64,
inner: R,
cache: BloomFilterIndexCacheRef,
) -> Self {
Self {
file_id,
column_id,
blob_size,
file_size,
inner,
cache,
}
@@ -92,7 +92,7 @@ impl<R: BloomFilterReader + Send> BloomFilterReader for CachedBloomFilterIndexBl
self.cache
.get_or_load(
(self.file_id, self.column_id),
self.blob_size,
self.file_size,
offset,
size,
move |ranges| async move { inner.read_vec(&ranges).await },

View File

@@ -58,17 +58,17 @@ fn inverted_index_content_weight((k, _): &(FileId, PageKey), v: &Bytes) -> u32 {
/// Inverted index blob reader with cache.
pub struct CachedInvertedIndexBlobReader<R> {
file_id: FileId,
blob_size: u64,
file_size: u64,
inner: R,
cache: InvertedIndexCacheRef,
}
impl<R> CachedInvertedIndexBlobReader<R> {
/// Creates a new inverted index blob reader with cache.
pub fn new(file_id: FileId, blob_size: u64, inner: R, cache: InvertedIndexCacheRef) -> Self {
pub fn new(file_id: FileId, file_size: u64, inner: R, cache: InvertedIndexCacheRef) -> Self {
Self {
file_id,
blob_size,
file_size,
inner,
cache,
}
@@ -82,7 +82,7 @@ impl<R: InvertedIndexReader> InvertedIndexReader for CachedInvertedIndexBlobRead
self.cache
.get_or_load(
self.file_id,
self.blob_size,
self.file_size,
offset,
size,
move |ranges| async move { inner.read_vec(&ranges).await },

View File

@@ -138,9 +138,7 @@ impl WriteCache {
indexer,
);
let sst_info = writer
.write_all(write_request.source, write_request.max_sequence, write_opts)
.await?;
let sst_info = writer.write_all(write_request.source, write_opts).await?;
timer.stop_and_record();
@@ -334,7 +332,7 @@ mod tests {
use super::*;
use crate::access_layer::OperationType;
use crate::cache::test_util::new_fs_store;
use crate::cache::{CacheManager, CacheStrategy};
use crate::cache::CacheManager;
use crate::region::options::IndexOptions;
use crate::sst::file::FileId;
use crate::sst::location::{index_file_path, sst_file_path};
@@ -377,7 +375,6 @@ mod tests {
metadata,
source,
storage: None,
max_sequence: None,
cache_manager: Default::default(),
index_options: IndexOptions::default(),
inverted_index_config: Default::default(),
@@ -471,7 +468,6 @@ mod tests {
metadata,
source,
storage: None,
max_sequence: None,
cache_manager: cache_manager.clone(),
index_options: IndexOptions::default(),
inverted_index_config: Default::default(),
@@ -499,7 +495,7 @@ mod tests {
// Read metadata from write cache
let builder = ParquetReaderBuilder::new(data_home, handle.clone(), mock_store.clone())
.cache(CacheStrategy::EnableAll(cache_manager.clone()));
.cache(Some(cache_manager.clone()));
let reader = builder.build().await.unwrap();
// Check parquet metadata

View File

@@ -27,7 +27,6 @@ use std::sync::Arc;
use std::time::Instant;
use api::v1::region::compact_request;
use api::v1::region::compact_request::Options;
use common_base::Plugins;
use common_meta::key::SchemaMetadataManagerRef;
use common_telemetry::{debug, error, info, warn};
@@ -41,19 +40,18 @@ use snafu::{OptionExt, ResultExt};
use store_api::metadata::RegionMetadataRef;
use store_api::storage::{RegionId, TableId};
use table::predicate::Predicate;
use task::MAX_PARALLEL_COMPACTION;
use tokio::sync::mpsc::{self, Sender};
use crate::access_layer::AccessLayerRef;
use crate::cache::{CacheManagerRef, CacheStrategy};
use crate::cache::CacheManagerRef;
use crate::compaction::compactor::{CompactionRegion, CompactionVersion, DefaultCompactor};
use crate::compaction::picker::{new_picker, CompactionTask};
use crate::compaction::task::CompactionTaskImpl;
use crate::config::MitoConfig;
use crate::error::{
CompactRegionSnafu, Error, GetSchemaMetadataSnafu, ManualCompactionOverrideSnafu,
RegionClosedSnafu, RegionDroppedSnafu, RegionTruncatedSnafu, RemoteCompactionSnafu, Result,
TimeRangePredicateOverflowSnafu, TimeoutSnafu,
CompactRegionSnafu, Error, GetSchemaMetadataSnafu, RegionClosedSnafu, RegionDroppedSnafu,
RegionTruncatedSnafu, RemoteCompactionSnafu, Result, TimeRangePredicateOverflowSnafu,
TimeoutSnafu,
};
use crate::metrics::{COMPACTION_STAGE_ELAPSED, INFLIGHT_COMPACTION_COUNT};
use crate::read::projection::ProjectionMapper;
@@ -87,13 +85,19 @@ pub struct CompactionRequest {
pub(crate) manifest_ctx: ManifestContextRef,
pub(crate) listener: WorkerListener,
pub(crate) schema_metadata_manager: SchemaMetadataManagerRef,
pub(crate) max_parallelism: usize,
}
impl CompactionRequest {
pub(crate) fn region_id(&self) -> RegionId {
self.current_version.metadata.region_id
}
/// Push waiter to the request.
pub(crate) fn push_waiter(&mut self, mut waiter: OptionOutputTx) {
if let Some(waiter) = waiter.take_inner() {
self.waiters.push(waiter);
}
}
}
/// Compaction scheduler tracks and manages compaction tasks.
@@ -141,27 +145,10 @@ impl CompactionScheduler {
waiter: OptionOutputTx,
manifest_ctx: &ManifestContextRef,
schema_metadata_manager: SchemaMetadataManagerRef,
max_parallelism: usize,
) -> Result<()> {
if let Some(status) = self.region_status.get_mut(&region_id) {
match compact_options {
Options::Regular(_) => {
// Region is compacting. Add the waiter to pending list.
status.merge_waiter(waiter);
}
options @ Options::StrictWindow(_) => {
// Incoming compaction request is manually triggered.
status.set_pending_request(PendingCompaction {
options,
waiter,
max_parallelism,
});
info!(
"Region {} is compacting, manually compaction will be re-scheduled.",
region_id
);
}
}
// Region is compacting. Add the waiter to pending list.
status.merge_waiter(waiter);
return Ok(());
}
@@ -176,7 +163,6 @@ impl CompactionScheduler {
manifest_ctx,
self.listener.clone(),
schema_metadata_manager,
max_parallelism,
);
self.region_status.insert(region_id, status);
let result = self
@@ -198,35 +184,6 @@ impl CompactionScheduler {
return;
};
if let Some(pending_request) = std::mem::take(&mut status.pending_request) {
let PendingCompaction {
options,
waiter,
max_parallelism,
} = pending_request;
let request = status.new_compaction_request(
self.request_sender.clone(),
waiter,
self.engine_config.clone(),
self.cache_manager.clone(),
manifest_ctx,
self.listener.clone(),
schema_metadata_manager,
max_parallelism,
);
if let Err(e) = self.schedule_compaction_request(request, options).await {
error!(e; "Failed to continue pending manual compaction for region id: {}", region_id);
} else {
debug!(
"Successfully scheduled manual compaction for region id: {}",
region_id
);
}
return;
}
// We should always try to compact the region until picker returns None.
let request = status.new_compaction_request(
self.request_sender.clone(),
@@ -236,7 +193,6 @@ impl CompactionScheduler {
manifest_ctx,
self.listener.clone(),
schema_metadata_manager,
MAX_PARALLEL_COMPACTION,
);
// Try to schedule next compaction task for this region.
if let Err(e) = self
@@ -308,7 +264,6 @@ impl CompactionScheduler {
manifest_ctx,
listener,
schema_metadata_manager,
max_parallelism,
} = request;
let ttl = find_ttl(
@@ -339,7 +294,6 @@ impl CompactionScheduler {
manifest_ctx: manifest_ctx.clone(),
file_purger: None,
ttl: Some(ttl),
max_parallelism,
};
let picker_output = {
@@ -463,6 +417,27 @@ impl Drop for CompactionScheduler {
}
}
/// Pending compaction tasks.
struct PendingCompaction {
waiters: Vec<OutputTx>,
}
impl PendingCompaction {
/// Push waiter to the request.
fn push_waiter(&mut self, mut waiter: OptionOutputTx) {
if let Some(waiter) = waiter.take_inner() {
self.waiters.push(waiter);
}
}
/// Send compaction error to waiter.
fn on_failure(&mut self, region_id: RegionId, err: Arc<Error>) {
for waiter in self.waiters.drain(..) {
waiter.send(Err(err.clone()).context(CompactRegionSnafu { region_id }));
}
}
}
/// Finds TTL of table by first examine table options then database options.
async fn find_ttl(
table_id: TableId,
@@ -496,10 +471,10 @@ struct CompactionStatus {
version_control: VersionControlRef,
/// Access layer of the region.
access_layer: AccessLayerRef,
/// Pending waiters for compaction.
waiters: Vec<OutputTx>,
/// Pending compactions that are supposed to run as soon as current compaction task finished.
pending_request: Option<PendingCompaction>,
/// Compaction pending to schedule.
///
/// For simplicity, we merge all pending compaction requests into one.
pending_compaction: Option<PendingCompaction>,
}
impl CompactionStatus {
@@ -513,44 +488,23 @@ impl CompactionStatus {
region_id,
version_control,
access_layer,
waiters: Vec::new(),
pending_request: None,
pending_compaction: None,
}
}
/// Merge the waiter to the pending compaction.
fn merge_waiter(&mut self, mut waiter: OptionOutputTx) {
if let Some(waiter) = waiter.take_inner() {
self.waiters.push(waiter);
}
/// Merge the watier to the pending compaction.
fn merge_waiter(&mut self, waiter: OptionOutputTx) {
let pending = self
.pending_compaction
.get_or_insert_with(|| PendingCompaction {
waiters: Vec::new(),
});
pending.push_waiter(waiter);
}
/// Set pending compaction request or replace current value if already exist.
fn set_pending_request(&mut self, pending: PendingCompaction) {
if let Some(mut prev) = self.pending_request.replace(pending) {
debug!(
"Replace pending compaction options with new request {:?} for region: {}",
prev.options, self.region_id
);
if let Some(waiter) = prev.waiter.take_inner() {
waiter.send(ManualCompactionOverrideSnafu.fail());
}
}
}
fn on_failure(mut self, err: Arc<Error>) {
for waiter in self.waiters.drain(..) {
waiter.send(Err(err.clone()).context(CompactRegionSnafu {
region_id: self.region_id,
}));
}
if let Some(pending_compaction) = self.pending_request {
pending_compaction
.waiter
.send(Err(err.clone()).context(CompactRegionSnafu {
region_id: self.region_id,
}));
fn on_failure(self, err: Arc<Error>) {
if let Some(mut pending) = self.pending_compaction {
pending.on_failure(self.region_id, err.clone());
}
}
@@ -561,36 +515,34 @@ impl CompactionStatus {
fn new_compaction_request(
&mut self,
request_sender: Sender<WorkerRequest>,
mut waiter: OptionOutputTx,
waiter: OptionOutputTx,
engine_config: Arc<MitoConfig>,
cache_manager: CacheManagerRef,
manifest_ctx: &ManifestContextRef,
listener: WorkerListener,
schema_metadata_manager: SchemaMetadataManagerRef,
max_parallelism: usize,
) -> CompactionRequest {
let current_version = CompactionVersion::from(self.version_control.current().version);
let start_time = Instant::now();
let mut waiters = Vec::with_capacity(self.waiters.len() + 1);
waiters.extend(std::mem::take(&mut self.waiters));
if let Some(waiter) = waiter.take_inner() {
waiters.push(waiter);
}
CompactionRequest {
let mut req = CompactionRequest {
engine_config,
current_version,
access_layer: self.access_layer.clone(),
request_sender: request_sender.clone(),
waiters,
waiters: Vec::new(),
start_time,
cache_manager,
manifest_ctx: manifest_ctx.clone(),
listener,
schema_metadata_manager,
max_parallelism,
};
if let Some(pending) = self.pending_compaction.take() {
req.waiters = pending.waiters;
}
req.push_waiter(waiter);
req
}
}
@@ -621,7 +573,6 @@ pub struct SerializedCompactionOutput {
struct CompactionSstReaderBuilder<'a> {
metadata: RegionMetadataRef,
sst_layer: AccessLayerRef,
cache: CacheManagerRef,
inputs: &'a [FileHandle],
append_mode: bool,
filter_deleted: bool,
@@ -635,8 +586,7 @@ impl<'a> CompactionSstReaderBuilder<'a> {
let mut scan_input = ScanInput::new(self.sst_layer, ProjectionMapper::all(&self.metadata)?)
.with_files(self.inputs.to_vec())
.with_append_mode(self.append_mode)
// We use special cache strategy for compaction.
.with_cache(CacheStrategy::Compaction(self.cache))
.with_cache(None)
.with_filter_deleted(self.filter_deleted)
// We ignore file not found error during compaction.
.with_ignore_file_not_found(true)
@@ -728,20 +678,8 @@ fn get_expired_ssts(
.collect()
}
/// Pending compaction request that is supposed to run after current task is finished,
/// typically used for manual compactions.
struct PendingCompaction {
/// Compaction options. Currently, it can only be [StrictWindow].
pub(crate) options: compact_request::Options,
/// Waiters of pending requests.
pub(crate) waiter: OptionOutputTx,
/// Max parallelism for pending compaction.
pub(crate) max_parallelism: usize,
}
#[cfg(test)]
mod tests {
use api::v1::region::StrictWindow;
use tokio::sync::oneshot;
use super::*;
@@ -755,7 +693,7 @@ mod tests {
let (tx, _rx) = mpsc::channel(4);
let mut scheduler = env.mock_compaction_scheduler(tx);
let mut builder = VersionControlBuilder::new();
let (schema_metadata_manager, kv_backend) = mock_schema_metadata_manager();
let schema_metadata_manager = mock_schema_metadata_manager();
schema_metadata_manager
.register_region_table_info(
builder.region_id().table_id(),
@@ -763,7 +701,6 @@ mod tests {
"test_catalog",
"test_schema",
None,
kv_backend,
)
.await;
// Nothing to compact.
@@ -782,7 +719,6 @@ mod tests {
waiter,
&manifest_ctx,
schema_metadata_manager.clone(),
1,
)
.await
.unwrap();
@@ -803,7 +739,6 @@ mod tests {
waiter,
&manifest_ctx,
schema_metadata_manager,
1,
)
.await
.unwrap();
@@ -814,7 +749,6 @@ mod tests {
#[tokio::test]
async fn test_schedule_on_finished() {
common_telemetry::init_default_ut_logging();
let job_scheduler = Arc::new(VecScheduler::default());
let env = SchedulerEnv::new().await.scheduler(job_scheduler.clone());
let (tx, _rx) = mpsc::channel(4);
@@ -823,7 +757,7 @@ mod tests {
let purger = builder.file_purger();
let region_id = builder.region_id();
let (schema_metadata_manager, kv_backend) = mock_schema_metadata_manager();
let schema_metadata_manager = mock_schema_metadata_manager();
schema_metadata_manager
.register_region_table_info(
builder.region_id().table_id(),
@@ -831,7 +765,6 @@ mod tests {
"test_catalog",
"test_schema",
None,
kv_backend,
)
.await;
@@ -858,7 +791,6 @@ mod tests {
OptionOutputTx::none(),
&manifest_ctx,
schema_metadata_manager.clone(),
1,
)
.await
.unwrap();
@@ -880,119 +812,6 @@ mod tests {
purger.clone(),
);
// The task is pending.
let (tx, _rx) = oneshot::channel();
scheduler
.schedule_compaction(
region_id,
compact_request::Options::Regular(Default::default()),
&version_control,
&env.access_layer,
OptionOutputTx::new(Some(OutputTx::new(tx))),
&manifest_ctx,
schema_metadata_manager.clone(),
1,
)
.await
.unwrap();
assert_eq!(1, scheduler.region_status.len());
assert_eq!(1, job_scheduler.num_jobs());
assert!(!scheduler
.region_status
.get(&builder.region_id())
.unwrap()
.waiters
.is_empty());
// On compaction finished and schedule next compaction.
scheduler
.on_compaction_finished(region_id, &manifest_ctx, schema_metadata_manager.clone())
.await;
assert_eq!(1, scheduler.region_status.len());
assert_eq!(2, job_scheduler.num_jobs());
// 5 files for next compaction.
apply_edit(
&version_control,
&[(0, end), (20, end), (40, end), (60, end), (80, end)],
&[],
purger.clone(),
);
let (tx, _rx) = oneshot::channel();
// The task is pending.
scheduler
.schedule_compaction(
region_id,
compact_request::Options::Regular(Default::default()),
&version_control,
&env.access_layer,
OptionOutputTx::new(Some(OutputTx::new(tx))),
&manifest_ctx,
schema_metadata_manager,
1,
)
.await
.unwrap();
assert_eq!(2, job_scheduler.num_jobs());
assert!(!scheduler
.region_status
.get(&builder.region_id())
.unwrap()
.waiters
.is_empty());
}
#[tokio::test]
async fn test_manual_compaction_when_compaction_in_progress() {
common_telemetry::init_default_ut_logging();
let job_scheduler = Arc::new(VecScheduler::default());
let env = SchedulerEnv::new().await.scheduler(job_scheduler.clone());
let (tx, _rx) = mpsc::channel(4);
let mut scheduler = env.mock_compaction_scheduler(tx);
let mut builder = VersionControlBuilder::new();
let purger = builder.file_purger();
let region_id = builder.region_id();
let (schema_metadata_manager, kv_backend) = mock_schema_metadata_manager();
schema_metadata_manager
.register_region_table_info(
builder.region_id().table_id(),
"test_table",
"test_catalog",
"test_schema",
None,
kv_backend,
)
.await;
// 5 files to compact.
let end = 1000 * 1000;
let version_control = Arc::new(
builder
.push_l0_file(0, end)
.push_l0_file(10, end)
.push_l0_file(50, end)
.push_l0_file(80, end)
.push_l0_file(90, end)
.build(),
);
let manifest_ctx = env
.mock_manifest_context(version_control.current().version.metadata.clone())
.await;
let file_metas: Vec<_> = version_control.current().version.ssts.levels()[0]
.files
.values()
.map(|file| file.meta_ref().clone())
.collect();
// 5 files for next compaction and removes old files.
apply_edit(
&version_control,
&[(0, end), (20, end), (40, end), (60, end), (80, end)],
&file_metas,
purger.clone(),
);
scheduler
.schedule_compaction(
region_id,
@@ -1002,40 +821,17 @@ mod tests {
OptionOutputTx::none(),
&manifest_ctx,
schema_metadata_manager.clone(),
1,
)
.await
.unwrap();
// Should schedule 1 compaction.
assert_eq!(1, scheduler.region_status.len());
assert_eq!(1, job_scheduler.num_jobs());
assert!(scheduler
.region_status
.get(&region_id)
.get(&builder.region_id())
.unwrap()
.pending_request
.is_none());
// Schedule another manual compaction.
let (tx, _rx) = oneshot::channel();
scheduler
.schedule_compaction(
region_id,
compact_request::Options::StrictWindow(StrictWindow { window_seconds: 60 }),
&version_control,
&env.access_layer,
OptionOutputTx::new(Some(OutputTx::new(tx))),
&manifest_ctx,
schema_metadata_manager.clone(),
1,
)
.await
.unwrap();
assert_eq!(1, scheduler.region_status.len());
// Current job num should be 1 since compaction is in progress.
assert_eq!(1, job_scheduler.num_jobs());
let status = scheduler.region_status.get(&builder.region_id()).unwrap();
assert!(status.pending_request.is_some());
.pending_compaction
.is_some());
// On compaction finished and schedule next compaction.
scheduler
@@ -1043,8 +839,32 @@ mod tests {
.await;
assert_eq!(1, scheduler.region_status.len());
assert_eq!(2, job_scheduler.num_jobs());
let status = scheduler.region_status.get(&builder.region_id()).unwrap();
assert!(status.pending_request.is_none());
// 5 files for next compaction.
apply_edit(
&version_control,
&[(0, end), (20, end), (40, end), (60, end), (80, end)],
&[],
purger.clone(),
);
// The task is pending.
scheduler
.schedule_compaction(
region_id,
compact_request::Options::Regular(Default::default()),
&version_control,
&env.access_layer,
OptionOutputTx::none(),
&manifest_ctx,
schema_metadata_manager,
)
.await
.unwrap();
assert_eq!(2, job_scheduler.num_jobs());
assert!(scheduler
.region_status
.get(&builder.region_id())
.unwrap()
.pending_compaction
.is_some());
}
}

View File

@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::num::NonZero;
use std::sync::Arc;
use std::time::Duration;
@@ -91,12 +90,6 @@ pub struct CompactionRegion {
pub(crate) current_version: CompactionVersion,
pub(crate) file_purger: Option<Arc<LocalFilePurger>>,
pub(crate) ttl: Option<TimeToLive>,
/// Controls the parallelism of this compaction task. Default is 1.
///
/// The parallel is inside this compaction task, not across different compaction tasks.
/// It can be different windows of the same compaction task or something like this.
pub max_parallelism: usize,
}
/// OpenCompactionRegionRequest represents the request to open a compaction region.
@@ -105,7 +98,6 @@ pub struct OpenCompactionRegionRequest {
pub region_id: RegionId,
pub region_dir: String,
pub region_options: RegionOptions,
pub max_parallelism: usize,
}
/// Open a compaction region from a compaction request.
@@ -212,7 +204,6 @@ pub async fn open_compaction_region(
current_version,
file_purger: Some(file_purger),
ttl: Some(ttl),
max_parallelism: req.max_parallelism,
})
}
@@ -274,7 +265,6 @@ impl Compactor for DefaultCompactor {
let mut futs = Vec::with_capacity(picker_output.outputs.len());
let mut compacted_inputs =
Vec::with_capacity(picker_output.outputs.iter().map(|o| o.inputs.len()).sum());
let internal_parallelism = compaction_region.max_parallelism.max(1);
for output in picker_output.outputs.drain(..) {
compacted_inputs.extend(output.inputs.iter().map(|f| f.meta_ref().clone()));
@@ -313,17 +303,10 @@ impl Compactor for DefaultCompactor {
let fulltext_index_config = compaction_region.engine_config.fulltext_index.clone();
let bloom_filter_index_config =
compaction_region.engine_config.bloom_filter_index.clone();
let max_sequence = output
.inputs
.iter()
.map(|f| f.meta_ref().sequence)
.max()
.flatten();
futs.push(async move {
let reader = CompactionSstReaderBuilder {
metadata: region_metadata.clone(),
sst_layer: sst_layer.clone(),
cache: cache_manager.clone(),
inputs: &output.inputs,
append_mode,
filter_deleted: output.filter_deleted,
@@ -341,7 +324,6 @@ impl Compactor for DefaultCompactor {
source: Source::Reader(reader),
cache_manager,
storage,
max_sequence: max_sequence.map(NonZero::get),
index_options,
inverted_index_config,
fulltext_index_config,
@@ -360,15 +342,15 @@ impl Compactor for DefaultCompactor {
index_file_size: sst_info.index_metadata.file_size,
num_rows: sst_info.num_rows as u64,
num_row_groups: sst_info.num_row_groups,
sequence: max_sequence,
});
Ok(file_meta_opt)
});
}
let mut output_files = Vec::with_capacity(futs.len());
while !futs.is_empty() {
let mut task_chunk = Vec::with_capacity(internal_parallelism);
for _ in 0..internal_parallelism {
let mut task_chunk =
Vec::with_capacity(crate::compaction::task::MAX_PARALLEL_COMPACTION);
for _ in 0..crate::compaction::task::MAX_PARALLEL_COMPACTION {
if let Some(task) = futs.pop() {
task_chunk.push(common_runtime::spawn_compact(task));
}

View File

@@ -32,7 +32,7 @@ use crate::request::{
use crate::worker::WorkerListener;
/// Maximum number of compaction tasks in parallel.
pub const MAX_PARALLEL_COMPACTION: usize = 1;
pub const MAX_PARALLEL_COMPACTION: usize = 8;
pub(crate) struct CompactionTaskImpl {
pub compaction_region: CompactionRegion,

View File

@@ -39,7 +39,6 @@ pub fn new_file_handle(
index_file_size: 0,
num_rows: 0,
num_row_groups: 0,
sequence: None,
},
file_purger,
)
@@ -64,7 +63,6 @@ pub(crate) fn new_file_handles(file_specs: &[(i64, i64, u64)]) -> Vec<FileHandle
index_file_size: 0,
num_rows: 0,
num_row_groups: 0,
sequence: None,
},
file_purger.clone(),
)

View File

@@ -760,7 +760,6 @@ mod tests {
index_file_size: 0,
num_rows: 0,
num_row_groups: 0,
sequence: None,
},
Arc::new(NoopFilePurger),
)

View File

@@ -443,7 +443,7 @@ impl Default for InvertedIndexConfig {
intermediate_path: String::new(),
metadata_cache_size: ReadableSize::mb(64),
content_cache_size: ReadableSize::mb(128),
content_cache_page_size: ReadableSize::kb(64),
content_cache_page_size: ReadableSize::mb(8),
};
if let Some(sys_memory) = common_config::utils::get_sys_total_memory() {

View File

@@ -84,7 +84,6 @@ use store_api::region_request::{AffectedRows, RegionOpenRequest, RegionRequest};
use store_api::storage::{RegionId, ScanRequest};
use tokio::sync::{oneshot, Semaphore};
use crate::cache::CacheStrategy;
use crate::config::MitoConfig;
use crate::error::{
InvalidRequestSnafu, JoinSnafu, RecvSnafu, RegionNotFoundSnafu, Result, SerdeJsonSnafu,
@@ -429,7 +428,7 @@ impl EngineInner {
version,
region.access_layer.clone(),
request,
CacheStrategy::EnableAll(cache_manager),
Some(cache_manager),
)
.with_parallel_scan_channel_size(self.config.parallel_scan_channel_size)
.with_ignore_inverted_index(self.config.inverted_index.apply_on_query.disabled())

View File

@@ -116,7 +116,6 @@ async fn test_alter_region() {
"test_catalog",
"test_schema",
None,
env.get_kv_backend(),
)
.await;
@@ -211,7 +210,6 @@ async fn test_put_after_alter() {
"test_catalog",
"test_schema",
None,
env.get_kv_backend(),
)
.await;
@@ -317,7 +315,6 @@ async fn test_alter_region_retry() {
"test_catalog",
"test_schema",
None,
env.get_kv_backend(),
)
.await;
@@ -377,7 +374,6 @@ async fn test_alter_on_flushing() {
"test_catalog",
"test_schema",
None,
env.get_kv_backend(),
)
.await;
@@ -481,7 +477,6 @@ async fn test_alter_column_fulltext_options() {
"test_catalog",
"test_schema",
None,
env.get_kv_backend(),
)
.await;
@@ -599,7 +594,6 @@ async fn test_alter_region_ttl_options() {
"test_catalog",
"test_schema",
None,
env.get_kv_backend(),
)
.await;
engine
@@ -650,7 +644,6 @@ async fn test_write_stall_on_altering() {
"test_catalog",
"test_schema",
None,
env.get_kv_backend(),
)
.await;

View File

@@ -104,7 +104,6 @@ async fn test_append_mode_compaction() {
"test_catalog",
"test_schema",
None,
env.get_kv_backend(),
)
.await;

View File

@@ -580,7 +580,7 @@ async fn test_region_usage() {
flush_region(&engine, region_id, None).await;
let region_stat = region.region_statistic();
assert!(region_stat.sst_size > 0); // Chief says this assert can ensure the size is counted.
assert_eq!(region_stat.sst_size, 2790);
assert_eq!(region_stat.num_rows, 10);
// region total usage

View File

@@ -12,20 +12,16 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::ops::Range;
use std::sync::Arc;
use std::time::Duration;
use api::v1::{ColumnSchema, Rows};
use common_recordbatch::{RecordBatches, SendableRecordBatchStream};
use datatypes::prelude::ScalarVector;
use datatypes::vectors::TimestampMillisecondVector;
use store_api::region_engine::{RegionEngine, RegionRole};
use store_api::region_request::AlterKind::SetRegionOptions;
use store_api::region_request::{
RegionAlterRequest, RegionCompactRequest, RegionDeleteRequest, RegionFlushRequest,
RegionOpenRequest, RegionRequest, SetRegionOption,
RegionCompactRequest, RegionDeleteRequest, RegionFlushRequest, RegionRequest,
};
use store_api::storage::{RegionId, ScanRequest};
use tokio::sync::Notify;
@@ -123,7 +119,6 @@ async fn test_compaction_region() {
"test_catalog",
"test_schema",
None,
env.get_kv_backend(),
)
.await;
@@ -195,7 +190,6 @@ async fn test_compaction_region_with_overlapping() {
"test_catalog",
"test_schema",
None,
env.get_kv_backend(),
)
.await;
@@ -251,7 +245,6 @@ async fn test_compaction_region_with_overlapping_delete_all() {
"test_catalog",
"test_schema",
None,
env.get_kv_backend(),
)
.await;
@@ -326,7 +319,6 @@ async fn test_readonly_during_compaction() {
"test_catalog",
"test_schema",
None,
env.get_kv_backend(),
)
.await;
@@ -382,307 +374,3 @@ async fn test_readonly_during_compaction() {
let vec = collect_stream_ts(stream).await;
assert_eq!((0..20).map(|v| v * 1000).collect::<Vec<_>>(), vec);
}
#[tokio::test]
async fn test_compaction_update_time_window() {
common_telemetry::init_default_ut_logging();
let mut env = TestEnv::new();
let engine = env.create_engine(MitoConfig::default()).await;
let region_id = RegionId::new(1, 1);
env.get_schema_metadata_manager()
.register_region_table_info(
region_id.table_id(),
"test_table",
"test_catalog",
"test_schema",
None,
env.get_kv_backend(),
)
.await;
let request = CreateRequestBuilder::new()
.insert_option("compaction.type", "twcs")
.insert_option("compaction.twcs.max_active_window_runs", "2")
.insert_option("compaction.twcs.max_active_window_files", "2")
.insert_option("compaction.twcs.max_inactive_window_runs", "2")
.insert_option("compaction.twcs.max_inactive_window_files", "2")
.build();
let column_schemas = request
.column_metadatas
.iter()
.map(column_metadata_to_column_schema)
.collect::<Vec<_>>();
engine
.handle_request(region_id, RegionRequest::Create(request))
.await
.unwrap();
// Flush 3 SSTs for compaction.
put_and_flush(&engine, region_id, &column_schemas, 0..1200).await; // window 3600
put_and_flush(&engine, region_id, &column_schemas, 1200..2400).await; // window 3600
put_and_flush(&engine, region_id, &column_schemas, 2400..3600).await; // window 3600
let result = engine
.handle_request(
region_id,
RegionRequest::Compact(RegionCompactRequest::default()),
)
.await
.unwrap();
assert_eq!(result.affected_rows, 0);
let scanner = engine.scanner(region_id, ScanRequest::default()).unwrap();
assert_eq!(0, scanner.num_memtables());
// We keep at most two files.
assert_eq!(
2,
scanner.num_files(),
"unexpected files: {:?}",
scanner.file_ids()
);
// Flush a new SST and the time window is applied.
put_and_flush(&engine, region_id, &column_schemas, 0..1200).await; // window 3600
// Puts window 7200.
let rows = Rows {
schema: column_schemas.clone(),
rows: build_rows_for_key("a", 3600, 4000, 0),
};
put_rows(&engine, region_id, rows).await;
let scanner = engine.scanner(region_id, ScanRequest::default()).unwrap();
assert_eq!(1, scanner.num_memtables());
let stream = scanner.scan().await.unwrap();
let vec = collect_stream_ts(stream).await;
assert_eq!((0..4000).map(|v| v * 1000).collect::<Vec<_>>(), vec);
// Puts window 3600.
let rows = Rows {
schema: column_schemas.clone(),
rows: build_rows_for_key("a", 2400, 3600, 0),
};
put_rows(&engine, region_id, rows).await;
let scanner = engine.scanner(region_id, ScanRequest::default()).unwrap();
assert_eq!(2, scanner.num_memtables());
let stream = scanner.scan().await.unwrap();
let vec = collect_stream_ts(stream).await;
assert_eq!((0..4000).map(|v| v * 1000).collect::<Vec<_>>(), vec);
}
#[tokio::test]
async fn test_change_region_compaction_window() {
common_telemetry::init_default_ut_logging();
let mut env = TestEnv::new();
let engine = env.create_engine(MitoConfig::default()).await;
let region_id = RegionId::new(1, 1);
env.get_schema_metadata_manager()
.register_region_table_info(
region_id.table_id(),
"test_table",
"test_catalog",
"test_schema",
None,
env.get_kv_backend(),
)
.await;
let request = CreateRequestBuilder::new()
.insert_option("compaction.type", "twcs")
.insert_option("compaction.twcs.max_active_window_runs", "1")
.insert_option("compaction.twcs.max_active_window_files", "1")
.insert_option("compaction.twcs.max_inactive_window_runs", "1")
.insert_option("compaction.twcs.max_inactive_window_files", "1")
.build();
let region_dir = request.region_dir.clone();
let column_schemas = request
.column_metadatas
.iter()
.map(column_metadata_to_column_schema)
.collect::<Vec<_>>();
engine
.handle_request(region_id, RegionRequest::Create(request))
.await
.unwrap();
// Flush 2 SSTs for compaction.
put_and_flush(&engine, region_id, &column_schemas, 0..1200).await; // window 3600
put_and_flush(&engine, region_id, &column_schemas, 1200..2400).await; // window 3600
engine
.handle_request(
region_id,
RegionRequest::Compact(RegionCompactRequest::default()),
)
.await
.unwrap();
// Put window 7200
put_and_flush(&engine, region_id, &column_schemas, 4000..5000).await; // window 3600
// Check compaction window.
let region = engine.get_region(region_id).unwrap();
{
let version = region.version();
assert_eq!(
Some(Duration::from_secs(3600)),
version.compaction_time_window,
);
assert!(version.options.compaction.time_window().is_none());
}
// Change compaction window.
let request = RegionRequest::Alter(RegionAlterRequest {
schema_version: region.metadata().schema_version,
kind: SetRegionOptions {
options: vec![SetRegionOption::Twsc(
"compaction.twcs.time_window".to_string(),
"2h".to_string(),
)],
},
});
engine.handle_request(region_id, request).await.unwrap();
// Compaction again. It should compacts window 3600 and 7200
// into 7200.
engine
.handle_request(
region_id,
RegionRequest::Compact(RegionCompactRequest::default()),
)
.await
.unwrap();
// Check compaction window.
{
let region = engine.get_region(region_id).unwrap();
let version = region.version();
assert_eq!(
Some(Duration::from_secs(7200)),
version.compaction_time_window,
);
assert_eq!(
Some(Duration::from_secs(7200)),
version.options.compaction.time_window()
);
}
// Reopen region.
let engine = env.reopen_engine(engine, MitoConfig::default()).await;
engine
.handle_request(
region_id,
RegionRequest::Open(RegionOpenRequest {
engine: String::new(),
region_dir,
options: Default::default(),
skip_wal_replay: false,
}),
)
.await
.unwrap();
// Check compaction window.
{
let region = engine.get_region(region_id).unwrap();
let version = region.version();
assert_eq!(
Some(Duration::from_secs(7200)),
version.compaction_time_window,
);
// We open the region without options, so the time window should be None.
assert!(version.options.compaction.time_window().is_none());
}
}
#[tokio::test]
async fn test_open_overwrite_compaction_window() {
common_telemetry::init_default_ut_logging();
let mut env = TestEnv::new();
let engine = env.create_engine(MitoConfig::default()).await;
let region_id = RegionId::new(1, 1);
env.get_schema_metadata_manager()
.register_region_table_info(
region_id.table_id(),
"test_table",
"test_catalog",
"test_schema",
None,
env.get_kv_backend(),
)
.await;
let request = CreateRequestBuilder::new()
.insert_option("compaction.type", "twcs")
.insert_option("compaction.twcs.max_active_window_runs", "1")
.insert_option("compaction.twcs.max_active_window_files", "1")
.insert_option("compaction.twcs.max_inactive_window_runs", "1")
.insert_option("compaction.twcs.max_inactive_window_files", "1")
.build();
let region_dir = request.region_dir.clone();
let column_schemas = request
.column_metadatas
.iter()
.map(column_metadata_to_column_schema)
.collect::<Vec<_>>();
engine
.handle_request(region_id, RegionRequest::Create(request))
.await
.unwrap();
// Flush 2 SSTs for compaction.
put_and_flush(&engine, region_id, &column_schemas, 0..1200).await; // window 3600
put_and_flush(&engine, region_id, &column_schemas, 1200..2400).await; // window 3600
engine
.handle_request(
region_id,
RegionRequest::Compact(RegionCompactRequest::default()),
)
.await
.unwrap();
// Check compaction window.
{
let region = engine.get_region(region_id).unwrap();
let version = region.version();
assert_eq!(
Some(Duration::from_secs(3600)),
version.compaction_time_window,
);
assert!(version.options.compaction.time_window().is_none());
}
// Reopen region.
let options = HashMap::from([
("compaction.type".to_string(), "twcs".to_string()),
("compaction.twcs.time_window".to_string(), "2h".to_string()),
]);
let engine = env.reopen_engine(engine, MitoConfig::default()).await;
engine
.handle_request(
region_id,
RegionRequest::Open(RegionOpenRequest {
engine: String::new(),
region_dir,
options,
skip_wal_replay: false,
}),
)
.await
.unwrap();
// Check compaction window.
{
let region = engine.get_region(region_id).unwrap();
let version = region.version();
assert_eq!(
Some(Duration::from_secs(7200)),
version.compaction_time_window,
);
assert_eq!(
Some(Duration::from_secs(7200)),
version.options.compaction.time_window()
);
}
}

View File

@@ -17,7 +17,6 @@ use std::time::Duration;
use api::v1::Rows;
use common_meta::key::SchemaMetadataManager;
use common_meta::kv_backend::KvBackendRef;
use object_store::util::join_path;
use store_api::region_engine::RegionEngine;
use store_api::region_request::{RegionDropRequest, RegionRequest};
@@ -50,7 +49,6 @@ async fn test_engine_drop_region() {
"test_catalog",
"test_schema",
None,
env.get_kv_backend(),
)
.await;
@@ -104,7 +102,6 @@ async fn test_engine_drop_region_for_custom_store() {
async fn setup(
engine: &MitoEngine,
schema_metadata_manager: &SchemaMetadataManager,
kv_backend: &KvBackendRef,
region_id: RegionId,
storage_name: &str,
) {
@@ -126,7 +123,6 @@ async fn test_engine_drop_region_for_custom_store() {
"test_catalog",
"test_schema",
None,
kv_backend.clone(),
)
.await;
@@ -149,26 +145,17 @@ async fn test_engine_drop_region_for_custom_store() {
.await;
let schema_metadata_manager = env.get_schema_metadata_manager();
let object_store_manager = env.get_object_store_manager().unwrap();
let kv_backend = env.get_kv_backend();
let global_region_id = RegionId::new(1, 1);
setup(
&engine,
&schema_metadata_manager,
&kv_backend,
global_region_id,
"default",
)
.await;
let custom_region_id = RegionId::new(2, 1);
setup(
&engine,
&schema_metadata_manager,
&kv_backend,
custom_region_id,
"Gcs",
)
.await;
setup(&engine, &schema_metadata_manager, custom_region_id, "Gcs").await;
let global_region = engine.get_region(global_region_id).unwrap();
let global_region_dir = global_region.access_layer.region_dir().to_string();

View File

@@ -72,7 +72,6 @@ async fn test_edit_region_schedule_compaction() {
"test_catalog",
"test_schema",
None,
env.get_kv_backend(),
)
.await;
engine

View File

@@ -40,7 +40,6 @@ async fn test_scan_without_filtering_deleted() {
"test_catalog",
"test_schema",
None,
env.get_kv_backend(),
)
.await;
let request = CreateRequestBuilder::new()

View File

@@ -52,7 +52,6 @@ async fn test_manual_flush() {
"test_catalog",
"test_schema",
None,
env.get_kv_backend(),
)
.await;
@@ -110,7 +109,6 @@ async fn test_flush_engine() {
"test_catalog",
"test_schema",
None,
env.get_kv_backend(),
)
.await;
@@ -180,7 +178,6 @@ async fn test_write_stall() {
"test_catalog",
"test_schema",
None,
env.get_kv_backend(),
)
.await;
let request = CreateRequestBuilder::new().build();
@@ -254,7 +251,6 @@ async fn test_flush_empty() {
"test_catalog",
"test_schema",
None,
env.get_kv_backend(),
)
.await;
let request = CreateRequestBuilder::new().build();
@@ -299,7 +295,6 @@ async fn test_flush_reopen_region(factory: Option<LogStoreFactory>) {
"test_catalog",
"test_schema",
None,
env.get_kv_backend(),
)
.await;
@@ -420,7 +415,6 @@ async fn test_auto_flush_engine() {
"test_catalog",
"test_schema",
None,
env.get_kv_backend(),
)
.await;
@@ -490,7 +484,6 @@ async fn test_flush_workers() {
"test_catalog",
"test_schema",
None,
env.get_kv_backend(),
)
.await;

View File

@@ -104,7 +104,6 @@ async fn test_merge_mode_compaction() {
"test_catalog",
"test_schema",
None,
env.get_kv_backend(),
)
.await;

View File

@@ -252,7 +252,6 @@ async fn test_open_region_skip_wal_replay() {
"test_catalog",
"test_schema",
None,
env.get_kv_backend(),
)
.await;
@@ -442,7 +441,6 @@ async fn test_open_compaction_region() {
"test_catalog",
"test_schema",
None,
env.get_kv_backend(),
)
.await;
let request = CreateRequestBuilder::new().build();
@@ -464,7 +462,6 @@ async fn test_open_compaction_region() {
region_id,
region_dir: region_dir.clone(),
region_options: RegionOptions::default(),
max_parallelism: 1,
};
let compaction_region = open_compaction_region(

View File

@@ -84,7 +84,6 @@ async fn test_parallel_scan() {
"test_catalog",
"test_schema",
None,
env.get_kv_backend(),
)
.await;

View File

@@ -159,7 +159,6 @@ async fn test_prune_memtable() {
"test_catalog",
"test_schema",
None,
env.get_kv_backend(),
)
.await;

View File

@@ -36,7 +36,6 @@ async fn test_last_row(append_mode: bool) {
"test_catalog",
"test_schema",
None,
env.get_kv_backend(),
)
.await;
let request = CreateRequestBuilder::new()

View File

@@ -159,7 +159,6 @@ async fn test_engine_truncate_after_flush() {
"test_catalog",
"test_schema",
None,
env.get_kv_backend(),
)
.await;

View File

@@ -925,23 +925,6 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
},
#[snafu(display(
"Unexpected impure default value with region_id: {}, column: {}, default_value: {}",
region_id,
column,
default_value
))]
UnexpectedImpureDefault {
#[snafu(implicit)]
location: Location,
region_id: RegionId,
column: String,
default_value: String,
},
#[snafu(display("Manual compaction is override by following operations."))]
ManualCompactionOverride {},
}
pub type Result<T, E = Error> = std::result::Result<T, E>;
@@ -981,8 +964,7 @@ impl ErrorExt for Error {
| InvalidParquet { .. }
| OperateAbortedIndex { .. }
| UnexpectedReplay { .. }
| IndexEncodeNull { .. }
| UnexpectedImpureDefault { .. } => StatusCode::Unexpected,
| IndexEncodeNull { .. } => StatusCode::Unexpected,
RegionNotFound { .. } => StatusCode::RegionNotFound,
ObjectStoreNotFound { .. }
| InvalidScanIndex { .. }
@@ -1085,8 +1067,6 @@ impl ErrorExt for Error {
PushBloomFilterValue { source, .. } | BloomFilterFinish { source, .. } => {
source.status_code()
}
ManualCompactionOverride {} => StatusCode::Cancelled,
}
}

View File

@@ -15,7 +15,6 @@
//! Flush related utilities and structs.
use std::collections::HashMap;
use std::num::NonZeroU64;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
@@ -346,7 +345,6 @@ impl RegionFlushTask {
continue;
}
let max_sequence = mem.stats().max_sequence();
let file_id = FileId::random();
let iter = mem.iter(None, None)?;
let source = Source::Iter(iter);
@@ -359,7 +357,6 @@ impl RegionFlushTask {
source,
cache_manager: self.cache_manager.clone(),
storage: version.options.storage.clone(),
max_sequence: Some(max_sequence),
index_options: self.index_options.clone(),
inverted_index_config: self.engine_config.inverted_index.clone(),
fulltext_index_config: self.engine_config.fulltext_index.clone(),
@@ -385,7 +382,6 @@ impl RegionFlushTask {
index_file_size: sst_info.index_metadata.file_size,
num_rows: sst_info.num_rows as u64,
num_row_groups: sst_info.num_row_groups,
sequence: NonZeroU64::new(max_sequence),
};
file_metas.push(file_meta);
}

View File

@@ -225,7 +225,6 @@ async fn checkpoint_with_different_compression_types() {
index_file_size: 0,
num_rows: 0,
num_row_groups: 0,
sequence: None,
};
let action = RegionMetaActionList::new(vec![RegionMetaAction::Edit(RegionEdit {
files_to_add: vec![file_meta],

View File

@@ -23,7 +23,7 @@ pub use bulk::part::BulkPart;
use common_time::Timestamp;
use serde::{Deserialize, Serialize};
use store_api::metadata::RegionMetadataRef;
use store_api::storage::{ColumnId, SequenceNumber};
use store_api::storage::ColumnId;
use table::predicate::Predicate;
use crate::config::MitoConfig;
@@ -70,15 +70,13 @@ impl Default for MemtableConfig {
pub struct MemtableStats {
/// The estimated bytes allocated by this memtable from heap.
estimated_bytes: usize,
/// The inclusive time range that this memtable contains. It is None if
/// The time range that this memtable contains. It is None if
/// and only if the memtable is empty.
time_range: Option<(Timestamp, Timestamp)>,
/// Total rows in memtable
num_rows: usize,
/// Total number of ranges in the memtable.
num_ranges: usize,
/// The maximum sequence number in the memtable.
max_sequence: SequenceNumber,
}
impl MemtableStats {
@@ -108,11 +106,6 @@ impl MemtableStats {
pub fn num_ranges(&self) -> usize {
self.num_ranges
}
/// Returns the maximum sequence number in the memtable.
pub fn max_sequence(&self) -> SequenceNumber {
self.max_sequence
}
}
pub type BoxedBatchIterator = Box<dyn Iterator<Item = Result<Batch>> + Send>;

View File

@@ -63,25 +63,6 @@ impl KeyValues {
// Safety: rows is not None.
self.mutation.rows.as_ref().unwrap().rows.len()
}
/// Returns if this container is empty
pub fn is_empty(&self) -> bool {
self.mutation.rows.is_none()
}
/// Return the max sequence in this container.
///
/// When the mutation has no rows, the sequence is the same as the mutation sequence.
pub fn max_sequence(&self) -> SequenceNumber {
let mut sequence = self.mutation.sequence;
let num_rows = self.mutation.rows.as_ref().unwrap().rows.len() as u64;
sequence += num_rows;
if num_rows > 0 {
sequence -= 1;
}
sequence
}
}
/// Key value view of a mutation.

View File

@@ -24,7 +24,7 @@ mod shard_builder;
mod tree;
use std::fmt;
use std::sync::atomic::{AtomicI64, AtomicU64, AtomicUsize, Ordering};
use std::sync::atomic::{AtomicI64, AtomicUsize, Ordering};
use std::sync::Arc;
use common_base::readable_size::ReadableSize;
@@ -113,7 +113,6 @@ pub struct PartitionTreeMemtable {
alloc_tracker: AllocTracker,
max_timestamp: AtomicI64,
min_timestamp: AtomicI64,
max_sequence: AtomicU64,
/// Total written rows in memtable. This also includes deleted and duplicated rows.
num_rows: AtomicUsize,
}
@@ -132,10 +131,6 @@ impl Memtable for PartitionTreeMemtable {
}
fn write(&self, kvs: &KeyValues) -> Result<()> {
if kvs.is_empty() {
return Ok(());
}
// TODO(yingwen): Validate schema while inserting rows.
let mut metrics = WriteMetrics::default();
@@ -145,12 +140,6 @@ impl Memtable for PartitionTreeMemtable {
self.update_stats(&metrics);
// update max_sequence
if res.is_ok() {
let sequence = kvs.max_sequence();
self.max_sequence.fetch_max(sequence, Ordering::Relaxed);
}
self.num_rows.fetch_add(kvs.num_rows(), Ordering::Relaxed);
res
}
@@ -163,12 +152,6 @@ impl Memtable for PartitionTreeMemtable {
self.update_stats(&metrics);
// update max_sequence
if res.is_ok() {
self.max_sequence
.fetch_max(key_value.sequence(), Ordering::Relaxed);
}
self.num_rows.fetch_add(1, Ordering::Relaxed);
res
}
@@ -227,7 +210,6 @@ impl Memtable for PartitionTreeMemtable {
time_range: None,
num_rows: 0,
num_ranges: 0,
max_sequence: 0,
};
}
@@ -247,7 +229,6 @@ impl Memtable for PartitionTreeMemtable {
time_range: Some((min_timestamp, max_timestamp)),
num_rows: self.num_rows.load(Ordering::Relaxed),
num_ranges: 1,
max_sequence: self.max_sequence.load(Ordering::Relaxed),
}
}
@@ -286,7 +267,6 @@ impl PartitionTreeMemtable {
max_timestamp: AtomicI64::new(i64::MIN),
min_timestamp: AtomicI64::new(i64::MAX),
num_rows: AtomicUsize::new(0),
max_sequence: AtomicU64::new(0),
}
}

View File

@@ -168,11 +168,8 @@ impl TimePartitions {
Ok(())
}
/// Forks latest partition and updates the partition duration if `part_duration` is Some.
pub fn fork(&self, metadata: &RegionMetadataRef, part_duration: Option<Duration>) -> Self {
// Fall back to the existing partition duration.
let part_duration = part_duration.or(self.part_duration);
/// Forks latest partition.
pub fn fork(&self, metadata: &RegionMetadataRef) -> Self {
let mut inner = self.inner.lock().unwrap();
let latest_part = inner
.parts
@@ -181,39 +178,24 @@ impl TimePartitions {
.cloned();
let Some(old_part) = latest_part else {
// If there is no partition, then we create a new partition with the new duration.
return Self::new(
metadata.clone(),
self.builder.clone(),
inner.next_memtable_id,
part_duration,
self.part_duration,
);
};
let old_stats = old_part.memtable.stats();
// Use the max timestamp to compute the new time range for the memtable.
// If `part_duration` is None, the new range will be None.
let new_time_range =
old_stats
.time_range()
.zip(part_duration)
.and_then(|(range, bucket)| {
partition_start_timestamp(range.1, bucket)
.and_then(|start| PartTimeRange::from_start_duration(start, bucket))
});
// Forks the latest partition, but compute the time range based on the new duration.
let memtable = old_part.memtable.fork(inner.alloc_memtable_id(), metadata);
let new_part = TimePartition {
memtable,
time_range: new_time_range,
time_range: old_part.time_range,
};
Self {
inner: Mutex::new(PartitionsInner::with_partition(
new_part,
inner.next_memtable_id,
)),
part_duration,
part_duration: self.part_duration,
metadata: metadata.clone(),
builder: self.builder.clone(),
}
@@ -256,19 +238,6 @@ impl TimePartitions {
inner.next_memtable_id
}
/// Creates a new empty partition list from this list and a `part_duration`.
/// It falls back to the old partition duration if `part_duration` is `None`.
pub(crate) fn new_with_part_duration(&self, part_duration: Option<Duration>) -> Self {
debug_assert!(self.is_empty());
Self::new(
self.metadata.clone(),
self.builder.clone(),
self.next_memtable_id(),
part_duration.or(self.part_duration),
)
}
/// Returns all partitions.
fn list_partitions(&self) -> PartitionVec {
let inner = self.inner.lock().unwrap();
@@ -478,9 +447,9 @@ mod tests {
assert_eq!(1, partitions.num_partitions());
assert!(!partitions.is_empty());
assert!(!partitions.is_empty());
let mut memtables = Vec::new();
partitions.list_memtables(&mut memtables);
assert_eq!(0, memtables[0].id());
let iter = memtables[0].iter(None, None).unwrap();
let timestamps = collect_iter_timestamps(iter);
@@ -534,14 +503,16 @@ mod tests {
);
}
fn new_multi_partitions(metadata: &RegionMetadataRef) -> TimePartitions {
#[test]
fn test_write_multi_parts() {
let metadata = memtable_util::metadata_for_test();
let builder = Arc::new(PartitionTreeMemtableBuilder::default());
let partitions =
TimePartitions::new(metadata.clone(), builder, 0, Some(Duration::from_secs(5)));
assert_eq!(0, partitions.num_partitions());
let kvs = memtable_util::build_key_values(
metadata,
&metadata,
"hello".to_string(),
0,
&[2000, 0],
@@ -553,7 +524,7 @@ mod tests {
assert!(!partitions.is_empty());
let kvs = memtable_util::build_key_values(
metadata,
&metadata,
"hello".to_string(),
0,
&[3000, 7000, 4000, 5000],
@@ -563,18 +534,9 @@ mod tests {
partitions.write(&kvs).unwrap();
assert_eq!(2, partitions.num_partitions());
partitions
}
#[test]
fn test_write_multi_parts() {
let metadata = memtable_util::metadata_for_test();
let partitions = new_multi_partitions(&metadata);
let parts = partitions.list_partitions();
let iter = parts[0].memtable.iter(None, None).unwrap();
let timestamps = collect_iter_timestamps(iter);
assert_eq!(0, parts[0].memtable.id());
assert_eq!(
Timestamp::new_millisecond(0),
parts[0].time_range.unwrap().min_timestamp
@@ -585,7 +547,6 @@ mod tests {
);
assert_eq!(&[0, 2000, 3000, 4000], &timestamps[..]);
let iter = parts[1].memtable.iter(None, None).unwrap();
assert_eq!(1, parts[1].memtable.id());
let timestamps = collect_iter_timestamps(iter);
assert_eq!(&[5000, 7000], &timestamps[..]);
assert_eq!(
@@ -597,85 +558,4 @@ mod tests {
parts[1].time_range.unwrap().max_timestamp
);
}
#[test]
fn test_new_with_part_duration() {
let metadata = memtable_util::metadata_for_test();
let builder = Arc::new(PartitionTreeMemtableBuilder::default());
let partitions = TimePartitions::new(metadata.clone(), builder.clone(), 0, None);
let new_parts = partitions.new_with_part_duration(Some(Duration::from_secs(5)));
assert_eq!(Duration::from_secs(5), new_parts.part_duration().unwrap());
assert_eq!(1, new_parts.next_memtable_id());
// Won't update the duration if it's None.
let new_parts = new_parts.new_with_part_duration(None);
assert_eq!(Duration::from_secs(5), new_parts.part_duration().unwrap());
// Don't need to create new memtables.
assert_eq!(1, new_parts.next_memtable_id());
let new_parts = new_parts.new_with_part_duration(Some(Duration::from_secs(10)));
assert_eq!(Duration::from_secs(10), new_parts.part_duration().unwrap());
// Don't need to create new memtables.
assert_eq!(1, new_parts.next_memtable_id());
let builder = Arc::new(PartitionTreeMemtableBuilder::default());
let partitions = TimePartitions::new(metadata.clone(), builder.clone(), 0, None);
// Need to build a new memtable as duration is still None.
let new_parts = partitions.new_with_part_duration(None);
assert!(new_parts.part_duration().is_none());
assert_eq!(2, new_parts.next_memtable_id());
}
#[test]
fn test_fork_empty() {
let metadata = memtable_util::metadata_for_test();
let builder = Arc::new(PartitionTreeMemtableBuilder::default());
let partitions = TimePartitions::new(metadata.clone(), builder, 0, None);
partitions.freeze().unwrap();
let new_parts = partitions.fork(&metadata, None);
assert!(new_parts.part_duration().is_none());
assert_eq!(1, new_parts.list_partitions()[0].memtable.id());
assert_eq!(2, new_parts.next_memtable_id());
new_parts.freeze().unwrap();
let new_parts = new_parts.fork(&metadata, Some(Duration::from_secs(5)));
assert_eq!(Duration::from_secs(5), new_parts.part_duration().unwrap());
assert_eq!(2, new_parts.list_partitions()[0].memtable.id());
assert_eq!(3, new_parts.next_memtable_id());
new_parts.freeze().unwrap();
let new_parts = new_parts.fork(&metadata, None);
// Won't update the duration.
assert_eq!(Duration::from_secs(5), new_parts.part_duration().unwrap());
assert_eq!(3, new_parts.list_partitions()[0].memtable.id());
assert_eq!(4, new_parts.next_memtable_id());
new_parts.freeze().unwrap();
let new_parts = new_parts.fork(&metadata, Some(Duration::from_secs(10)));
assert_eq!(Duration::from_secs(10), new_parts.part_duration().unwrap());
assert_eq!(4, new_parts.list_partitions()[0].memtable.id());
assert_eq!(5, new_parts.next_memtable_id());
}
#[test]
fn test_fork_non_empty_none() {
let metadata = memtable_util::metadata_for_test();
let partitions = new_multi_partitions(&metadata);
partitions.freeze().unwrap();
// Won't update the duration.
let new_parts = partitions.fork(&metadata, None);
assert!(new_parts.is_empty());
assert_eq!(Duration::from_secs(5), new_parts.part_duration().unwrap());
assert_eq!(2, new_parts.list_partitions()[0].memtable.id());
assert_eq!(3, new_parts.next_memtable_id());
// Although we don't fork a memtable multiple times, we still add a test for it.
let new_parts = partitions.fork(&metadata, Some(Duration::from_secs(10)));
assert!(new_parts.is_empty());
assert_eq!(Duration::from_secs(10), new_parts.part_duration().unwrap());
assert_eq!(3, new_parts.list_partitions()[0].memtable.id());
assert_eq!(4, new_parts.next_memtable_id());
}
}

View File

@@ -15,7 +15,7 @@
use std::collections::btree_map::Entry;
use std::collections::{BTreeMap, Bound, HashSet};
use std::fmt::{Debug, Formatter};
use std::sync::atomic::{AtomicI64, AtomicU64, AtomicUsize, Ordering};
use std::sync::atomic::{AtomicI64, AtomicUsize, Ordering};
use std::sync::{Arc, RwLock};
use std::time::{Duration, Instant};
@@ -100,7 +100,6 @@ pub struct TimeSeriesMemtable {
alloc_tracker: AllocTracker,
max_timestamp: AtomicI64,
min_timestamp: AtomicI64,
max_sequence: AtomicU64,
dedup: bool,
merge_mode: MergeMode,
/// Total written rows in memtable. This also includes deleted and duplicated rows.
@@ -135,7 +134,6 @@ impl TimeSeriesMemtable {
alloc_tracker: AllocTracker::new(write_buffer_manager),
max_timestamp: AtomicI64::new(i64::MIN),
min_timestamp: AtomicI64::new(i64::MAX),
max_sequence: AtomicU64::new(0),
dedup,
merge_mode,
num_rows: Default::default(),
@@ -188,10 +186,6 @@ impl Memtable for TimeSeriesMemtable {
}
fn write(&self, kvs: &KeyValues) -> Result<()> {
if kvs.is_empty() {
return Ok(());
}
let mut local_stats = WriteMetrics::default();
for kv in kvs.iter() {
@@ -205,10 +199,6 @@ impl Memtable for TimeSeriesMemtable {
// so that we can ensure writing to memtable will succeed.
self.update_stats(local_stats);
// update max_sequence
let sequence = kvs.max_sequence();
self.max_sequence.fetch_max(sequence, Ordering::Relaxed);
self.num_rows.fetch_add(kvs.num_rows(), Ordering::Relaxed);
Ok(())
}
@@ -219,13 +209,6 @@ impl Memtable for TimeSeriesMemtable {
metrics.value_bytes += std::mem::size_of::<Timestamp>() + std::mem::size_of::<OpType>();
self.update_stats(metrics);
// update max_sequence
if res.is_ok() {
self.max_sequence
.fetch_max(key_value.sequence(), Ordering::Relaxed);
}
self.num_rows.fetch_add(1, Ordering::Relaxed);
res
}
@@ -311,7 +294,6 @@ impl Memtable for TimeSeriesMemtable {
time_range: None,
num_rows: 0,
num_ranges: 0,
max_sequence: 0,
};
}
let ts_type = self
@@ -329,7 +311,6 @@ impl Memtable for TimeSeriesMemtable {
time_range: Some((min_timestamp, max_timestamp)),
num_rows: self.num_rows.load(Ordering::Relaxed),
num_ranges: 1,
max_sequence: self.max_sequence.load(Ordering::Relaxed),
}
}

View File

@@ -15,7 +15,6 @@
//! Memtable version.
use std::sync::Arc;
use std::time::Duration;
use smallvec::SmallVec;
use store_api::metadata::RegionMetadataRef;
@@ -66,53 +65,27 @@ impl MemtableVersion {
/// Returns a new [MemtableVersion] which switches the old mutable memtable to immutable
/// memtable.
///
/// It will switch to use the `time_window` provided.
///
/// Returns `None` if the mutable memtable is empty.
pub(crate) fn freeze_mutable(
&self,
metadata: &RegionMetadataRef,
time_window: Option<Duration>,
) -> Result<Option<MemtableVersion>> {
if self.mutable.is_empty() {
// No need to freeze the mutable memtable, but we need to check the time window.
if self.mutable.part_duration() == time_window {
// If the time window is the same, we don't need to update it.
return Ok(None);
}
// Update the time window.
let mutable = self.mutable.new_with_part_duration(time_window);
common_telemetry::debug!(
"Freeze empty memtable, update partition duration from {:?} to {:?}",
self.mutable.part_duration(),
time_window
);
return Ok(Some(MemtableVersion {
mutable: Arc::new(mutable),
immutables: self.immutables.clone(),
}));
// No need to freeze the mutable memtable.
return Ok(None);
}
// Marks the mutable memtable as immutable so it can free the memory usage from our
// soft limit.
self.mutable.freeze()?;
// Fork the memtable.
if self.mutable.part_duration() != time_window {
common_telemetry::debug!(
"Fork memtable, update partition duration from {:?}, to {:?}",
self.mutable.part_duration(),
time_window
);
}
let mutable = Arc::new(self.mutable.fork(metadata, time_window));
let mutable = Arc::new(self.mutable.fork(metadata));
// Pushes the mutable memtable to immutable list.
let mut immutables =
SmallVec::with_capacity(self.immutables.len() + self.mutable.num_partitions());
immutables.extend(self.immutables.iter().cloned());
// Pushes the mutable memtable to immutable list.
self.mutable.list_memtables_to_small_vec(&mut immutables);
immutables.extend(self.immutables.iter().cloned());
Ok(Some(MemtableVersion {
mutable,
immutables,

View File

@@ -21,7 +21,7 @@ use datatypes::vectors::UInt32Vector;
use store_api::storage::TimeSeriesRowSelector;
use crate::cache::{
selector_result_cache_hit, selector_result_cache_miss, CacheStrategy, SelectorResultKey,
selector_result_cache_hit, selector_result_cache_miss, CacheManagerRef, SelectorResultKey,
SelectorResultValue,
};
use crate::error::Result;
@@ -86,7 +86,7 @@ impl RowGroupLastRowCachedReader {
pub(crate) fn new(
file_id: FileId,
row_group_idx: usize,
cache_strategy: CacheStrategy,
cache_manager: Option<CacheManagerRef>,
row_group_reader: RowGroupReader,
) -> Self {
let key = SelectorResultKey {
@@ -95,17 +95,20 @@ impl RowGroupLastRowCachedReader {
selector: TimeSeriesRowSelector::LastRow,
};
if let Some(value) = cache_strategy.get_selector_result(&key) {
let Some(cache_manager) = cache_manager else {
return Self::new_miss(key, row_group_reader, None);
};
if let Some(value) = cache_manager.get_selector_result(&key) {
let schema_matches =
value.projection == row_group_reader.read_format().projection_indices();
if schema_matches {
// Schema matches, use cache batches.
Self::new_hit(value)
} else {
Self::new_miss(key, row_group_reader, cache_strategy)
Self::new_miss(key, row_group_reader, Some(cache_manager))
}
} else {
Self::new_miss(key, row_group_reader, cache_strategy)
Self::new_miss(key, row_group_reader, Some(cache_manager))
}
}
@@ -127,13 +130,13 @@ impl RowGroupLastRowCachedReader {
fn new_miss(
key: SelectorResultKey,
row_group_reader: RowGroupReader,
cache_strategy: CacheStrategy,
cache_manager: Option<CacheManagerRef>,
) -> Self {
selector_result_cache_miss();
Self::Miss(RowGroupLastRowReader::new(
key,
row_group_reader,
cache_strategy,
cache_manager,
))
}
}
@@ -172,19 +175,23 @@ pub(crate) struct RowGroupLastRowReader {
reader: RowGroupReader,
selector: LastRowSelector,
yielded_batches: Vec<Batch>,
cache_strategy: CacheStrategy,
cache_manager: Option<CacheManagerRef>,
/// Index buffer to take a new batch from the last row.
take_index: UInt32Vector,
}
impl RowGroupLastRowReader {
fn new(key: SelectorResultKey, reader: RowGroupReader, cache_strategy: CacheStrategy) -> Self {
fn new(
key: SelectorResultKey,
reader: RowGroupReader,
cache_manager: Option<CacheManagerRef>,
) -> Self {
Self {
key,
reader,
selector: LastRowSelector::default(),
yielded_batches: vec![],
cache_strategy,
cache_manager,
take_index: UInt32Vector::from_vec(vec![0]),
}
}
@@ -214,15 +221,17 @@ impl RowGroupLastRowReader {
/// Updates row group's last row cache if cache manager is present.
fn maybe_update_cache(&mut self) {
if self.yielded_batches.is_empty() {
// we always expect that row groups yields batches.
return;
if let Some(cache) = &self.cache_manager {
if self.yielded_batches.is_empty() {
// we always expect that row groups yields batches.
return;
}
let value = Arc::new(SelectorResultValue {
result: std::mem::take(&mut self.yielded_batches),
projection: self.reader.read_format().projection_indices().to_vec(),
});
cache.put_selector_result(self.key, value)
}
let value = Arc::new(SelectorResultValue {
result: std::mem::take(&mut self.yielded_batches),
projection: self.reader.read_format().projection_indices().to_vec(),
});
self.cache_strategy.put_selector_result(self.key, value);
}
fn metrics(&self) -> &ReaderMetrics {

View File

@@ -30,7 +30,7 @@ use snafu::{OptionExt, ResultExt};
use store_api::metadata::RegionMetadataRef;
use store_api::storage::ColumnId;
use crate::cache::CacheStrategy;
use crate::cache::CacheManager;
use crate::error::{InvalidRequestSnafu, Result};
use crate::read::Batch;
use crate::row_converter::{McmpRowCodec, RowCodec, SortField};
@@ -171,7 +171,7 @@ impl ProjectionMapper {
pub(crate) fn convert(
&self,
batch: &Batch,
cache_strategy: &CacheStrategy,
cache_manager: Option<&CacheManager>,
) -> common_recordbatch::error::Result<RecordBatch> {
debug_assert_eq!(self.batch_fields.len(), batch.fields().len());
debug_assert!(self
@@ -204,12 +204,15 @@ impl ProjectionMapper {
match index {
BatchIndex::Tag(idx) => {
let value = &pk_values[*idx];
let vector = repeated_vector_with_cache(
&column_schema.data_type,
value,
num_rows,
cache_strategy,
)?;
let vector = match cache_manager {
Some(cache) => repeated_vector_with_cache(
&column_schema.data_type,
value,
num_rows,
cache,
)?,
None => new_repeated_vector(&column_schema.data_type, value, num_rows)?,
};
columns.push(vector);
}
BatchIndex::Timestamp => {
@@ -241,9 +244,9 @@ fn repeated_vector_with_cache(
data_type: &ConcreteDataType,
value: &Value,
num_rows: usize,
cache_strategy: &CacheStrategy,
cache_manager: &CacheManager,
) -> common_recordbatch::error::Result<VectorRef> {
if let Some(vector) = cache_strategy.get_repeated_vector(data_type, value) {
if let Some(vector) = cache_manager.get_repeated_vector(data_type, value) {
// Tries to get the vector from cache manager. If the vector doesn't
// have enough length, creates a new one.
match vector.len().cmp(&num_rows) {
@@ -257,7 +260,7 @@ fn repeated_vector_with_cache(
let vector = new_repeated_vector(data_type, value, num_rows)?;
// Updates cache.
if vector.len() <= MAX_VECTOR_LENGTH_TO_CACHE {
cache_strategy.put_repeated_vector(value.clone(), vector.clone());
cache_manager.put_repeated_vector(value.clone(), vector.clone());
}
Ok(vector)
@@ -281,15 +284,12 @@ fn new_repeated_vector(
#[cfg(test)]
mod tests {
use std::sync::Arc;
use api::v1::OpType;
use datatypes::arrow::array::{Int64Array, TimestampMillisecondArray, UInt64Array, UInt8Array};
use datatypes::arrow::util::pretty;
use datatypes::value::ValueRef;
use super::*;
use crate::cache::CacheManager;
use crate::read::BatchBuilder;
use crate::test_util::meta_util::TestRegionMetadataBuilder;
@@ -359,9 +359,8 @@ mod tests {
// With vector cache.
let cache = CacheManager::builder().vector_cache_size(1024).build();
let cache = CacheStrategy::EnableAll(Arc::new(cache));
let batch = new_batch(0, &[1, 2], &[(3, 3), (4, 4)], 3);
let record_batch = mapper.convert(&batch, &cache).unwrap();
let record_batch = mapper.convert(&batch, Some(&cache)).unwrap();
let expect = "\
+---------------------+----+----+----+----+
| ts | k0 | k1 | v0 | v1 |
@@ -381,7 +380,7 @@ mod tests {
assert!(cache
.get_repeated_vector(&ConcreteDataType::int64_datatype(), &Value::Int64(3))
.is_none());
let record_batch = mapper.convert(&batch, &cache).unwrap();
let record_batch = mapper.convert(&batch, Some(&cache)).unwrap();
assert_eq!(expect, print_record_batch(record_batch));
}
@@ -402,9 +401,7 @@ mod tests {
);
let batch = new_batch(0, &[1, 2], &[(4, 4)], 3);
let cache = CacheManager::builder().vector_cache_size(1024).build();
let cache = CacheStrategy::EnableAll(Arc::new(cache));
let record_batch = mapper.convert(&batch, &cache).unwrap();
let record_batch = mapper.convert(&batch, None).unwrap();
let expect = "\
+----+----+
| v1 | k0 |

View File

@@ -22,7 +22,7 @@ use parquet::arrow::arrow_reader::RowSelection;
use smallvec::{smallvec, SmallVec};
use store_api::region_engine::PartitionRange;
use crate::cache::CacheStrategy;
use crate::cache::CacheManager;
use crate::error::Result;
use crate::memtable::{MemtableRange, MemtableRanges, MemtableStats};
use crate::read::scan_region::ScanInput;
@@ -112,7 +112,7 @@ impl RangeMeta {
Self::push_unordered_file_ranges(
input.memtables.len(),
&input.files,
&input.cache_strategy,
input.cache_manager.as_deref(),
&mut ranges,
);
@@ -203,15 +203,16 @@ impl RangeMeta {
fn push_unordered_file_ranges(
num_memtables: usize,
files: &[FileHandle],
cache: &CacheStrategy,
cache: Option<&CacheManager>,
ranges: &mut Vec<RangeMeta>,
) {
// For append mode, we can parallelize reading row groups.
for (i, file) in files.iter().enumerate() {
let file_index = num_memtables + i;
// Get parquet meta from the cache.
let parquet_meta =
cache.get_parquet_meta_data_from_mem_cache(file.region_id(), file.file_id());
let parquet_meta = cache.and_then(|c| {
c.get_parquet_meta_data_from_mem_cache(file.region_id(), file.file_id())
});
if let Some(parquet_meta) = parquet_meta {
// Scans each row group.
for row_group_index in 0..file.meta_ref().num_row_groups {

View File

@@ -33,7 +33,7 @@ use tokio_stream::wrappers::ReceiverStream;
use crate::access_layer::AccessLayerRef;
use crate::cache::file_cache::FileCacheRef;
use crate::cache::CacheStrategy;
use crate::cache::CacheManagerRef;
use crate::config::DEFAULT_SCAN_CHANNEL_SIZE;
use crate::error::Result;
use crate::memtable::MemtableRange;
@@ -171,7 +171,7 @@ pub(crate) struct ScanRegion {
/// Scan request.
request: ScanRequest,
/// Cache.
cache_strategy: CacheStrategy,
cache_manager: Option<CacheManagerRef>,
/// Capacity of the channel to send data from parallel scan tasks to the main task.
parallel_scan_channel_size: usize,
/// Whether to ignore inverted index.
@@ -190,13 +190,13 @@ impl ScanRegion {
version: VersionRef,
access_layer: AccessLayerRef,
request: ScanRequest,
cache_strategy: CacheStrategy,
cache_manager: Option<CacheManagerRef>,
) -> ScanRegion {
ScanRegion {
version,
access_layer,
request,
cache_strategy,
cache_manager,
parallel_scan_channel_size: DEFAULT_SCAN_CHANNEL_SIZE,
ignore_inverted_index: false,
ignore_fulltext_index: false,
@@ -357,7 +357,7 @@ impl ScanRegion {
.with_predicate(Some(predicate))
.with_memtables(memtables)
.with_files(files)
.with_cache(self.cache_strategy)
.with_cache(self.cache_manager)
.with_inverted_index_applier(inverted_index_applier)
.with_bloom_filter_index_applier(bloom_filter_applier)
.with_fulltext_index_applier(fulltext_index_applier)
@@ -421,14 +421,23 @@ impl ScanRegion {
}
let file_cache = || -> Option<FileCacheRef> {
let write_cache = self.cache_strategy.write_cache()?;
let cache_manager = self.cache_manager.as_ref()?;
let write_cache = cache_manager.write_cache()?;
let file_cache = write_cache.file_cache();
Some(file_cache)
}();
let index_cache = self.cache_strategy.index_cache().cloned();
let index_cache = self
.cache_manager
.as_ref()
.and_then(|c| c.index_cache())
.cloned();
let puffin_metadata_cache = self.cache_strategy.puffin_metadata_cache().cloned();
let puffin_metadata_cache = self
.cache_manager
.as_ref()
.and_then(|c| c.puffin_metadata_cache())
.cloned();
InvertedIndexApplierBuilder::new(
self.access_layer.region_dir().to_string(),
@@ -461,14 +470,23 @@ impl ScanRegion {
}
let file_cache = || -> Option<FileCacheRef> {
let write_cache = self.cache_strategy.write_cache()?;
let cache_manager = self.cache_manager.as_ref()?;
let write_cache = cache_manager.write_cache()?;
let file_cache = write_cache.file_cache();
Some(file_cache)
}();
let index_cache = self.cache_strategy.bloom_filter_index_cache().cloned();
let index_cache = self
.cache_manager
.as_ref()
.and_then(|c| c.bloom_filter_index_cache())
.cloned();
let puffin_metadata_cache = self.cache_strategy.puffin_metadata_cache().cloned();
let puffin_metadata_cache = self
.cache_manager
.as_ref()
.and_then(|c| c.puffin_metadata_cache())
.cloned();
BloomFilterIndexApplierBuilder::new(
self.access_layer.region_dir().to_string(),
@@ -532,7 +550,7 @@ pub(crate) struct ScanInput {
/// Handles to SST files to scan.
pub(crate) files: Vec<FileHandle>,
/// Cache.
pub(crate) cache_strategy: CacheStrategy,
pub(crate) cache_manager: Option<CacheManagerRef>,
/// Ignores file not found error.
ignore_file_not_found: bool,
/// Capacity of the channel to send data from parallel scan tasks to the main task.
@@ -564,7 +582,7 @@ impl ScanInput {
predicate: None,
memtables: Vec::new(),
files: Vec::new(),
cache_strategy: CacheStrategy::Disabled,
cache_manager: None,
ignore_file_not_found: false,
parallel_scan_channel_size: DEFAULT_SCAN_CHANNEL_SIZE,
inverted_index_applier: None,
@@ -608,8 +626,8 @@ impl ScanInput {
/// Sets cache for this query.
#[must_use]
pub(crate) fn with_cache(mut self, cache: CacheStrategy) -> Self {
self.cache_strategy = cache;
pub(crate) fn with_cache(mut self, cache: Option<CacheManagerRef>) -> Self {
self.cache_manager = cache;
self
}
@@ -742,7 +760,7 @@ impl ScanInput {
.read_sst(file.clone())
.predicate(self.predicate.clone())
.projection(Some(self.mapper.column_ids().to_vec()))
.cache(self.cache_strategy.clone())
.cache(self.cache_manager.clone())
.inverted_index_applier(self.inverted_index_applier.clone())
.bloom_filter_index_applier(self.bloom_filter_index_applier.clone())
.fulltext_index_applier(self.fulltext_index_applier.clone())

View File

@@ -257,7 +257,7 @@ impl SeqScan {
.await
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
let cache = &stream_ctx.input.cache_strategy;
let cache = stream_ctx.input.cache_manager.as_deref();
let mut metrics = ScannerMetrics::default();
let mut fetch_start = Instant::now();
#[cfg(debug_assertions)]

View File

@@ -148,7 +148,7 @@ impl UnorderedScan {
let stream = try_stream! {
part_metrics.on_first_poll();
let cache = &stream_ctx.input.cache_strategy;
let cache = stream_ctx.input.cache_manager.as_deref();
let range_builder_list = Arc::new(RangeBuilderList::new(
stream_ctx.input.num_memtables(),
stream_ctx.input.num_files(),

View File

@@ -26,7 +26,6 @@
use std::sync::{Arc, RwLock};
use std::time::Duration;
use common_telemetry::info;
use store_api::metadata::RegionMetadataRef;
use store_api::storage::SequenceNumber;
@@ -81,12 +80,8 @@ impl VersionControl {
/// Freezes the mutable memtable if it is not empty.
pub(crate) fn freeze_mutable(&self) -> Result<()> {
let version = self.current().version;
let time_window = version.compaction_time_window;
let Some(new_memtables) = version
.memtables
.freeze_mutable(&version.metadata, time_window)?
else {
let Some(new_memtables) = version.memtables.freeze_mutable(&version.metadata)? else {
return Ok(());
};
@@ -254,10 +249,7 @@ pub(crate) struct Version {
///
/// Used to check if it is a flush task during the truncating table.
pub(crate) truncated_entry_id: Option<EntryId>,
/// Inferred compaction time window from flush.
///
/// If compaction options contain a time window, it will overwrite this value
/// when creating a new version from the [VersionBuilder].
/// Inferred compaction time window.
pub(crate) compaction_time_window: Option<Duration>,
/// Options of the region.
pub(crate) options: RegionOptions,
@@ -393,24 +385,7 @@ impl VersionBuilder {
}
/// Builds a new [Version] from the builder.
/// It overwrites the window size by compaction option.
pub(crate) fn build(self) -> Version {
let compaction_time_window = self
.options
.compaction
.time_window()
.or(self.compaction_time_window);
if self.compaction_time_window.is_some()
&& compaction_time_window != self.compaction_time_window
{
info!(
"VersionBuilder overwrites region compaction time window from {:?} to {:?}, region: {}",
self.compaction_time_window,
compaction_time_window,
self.metadata.region_id
);
}
Version {
metadata: self.metadata,
memtables: self.memtables,
@@ -418,7 +393,7 @@ impl VersionBuilder {
flushed_entry_id: self.flushed_entry_id,
flushed_sequence: self.flushed_sequence,
truncated_entry_id: self.truncated_entry_id,
compaction_time_window,
compaction_time_window: self.compaction_time_window,
options: self.options,
}
}

View File

@@ -42,7 +42,7 @@ use tokio::sync::oneshot::{self, Receiver, Sender};
use crate::error::{
CompactRegionSnafu, ConvertColumnDataTypeSnafu, CreateDefaultSnafu, Error, FillDefaultSnafu,
FlushRegionSnafu, InvalidRequestSnafu, Result, UnexpectedImpureDefaultSnafu,
FlushRegionSnafu, InvalidRequestSnafu, Result,
};
use crate::manifest::action::RegionEdit;
use crate::memtable::MemtableId;
@@ -333,14 +333,6 @@ impl WriteRequest {
}
OpType::Put => {
// For put requests, we use the default value from column schema.
if column.column_schema.is_default_impure() {
UnexpectedImpureDefaultSnafu {
region_id: self.region_id,
column: &column.column_schema.name,
default_value: format!("{:?}", column.column_schema.default_constraint()),
}
.fail()?
}
column
.column_schema
.create_default()
@@ -1047,57 +1039,6 @@ mod tests {
check_invalid_request(&err, r#"unknown columns: ["k1"]"#);
}
#[test]
fn test_fill_impure_columns_err() {
let rows = Rows {
schema: vec![new_column_schema(
"k0",
ColumnDataType::Int64,
SemanticType::Tag,
)],
rows: vec![Row {
values: vec![i64_value(1)],
}],
};
let metadata = {
let mut builder = RegionMetadataBuilder::new(RegionId::new(1, 1));
builder
.push_column_metadata(ColumnMetadata {
column_schema: datatypes::schema::ColumnSchema::new(
"ts",
ConcreteDataType::timestamp_millisecond_datatype(),
false,
)
.with_default_constraint(Some(ColumnDefaultConstraint::Function(
"now()".to_string(),
)))
.unwrap(),
semantic_type: SemanticType::Timestamp,
column_id: 1,
})
.push_column_metadata(ColumnMetadata {
column_schema: datatypes::schema::ColumnSchema::new(
"k0",
ConcreteDataType::int64_datatype(),
true,
),
semantic_type: SemanticType::Tag,
column_id: 2,
})
.primary_key(vec![2]);
builder.build().unwrap()
};
let mut request = WriteRequest::new(RegionId::new(1, 1), OpType::Put, rows).unwrap();
let err = request.check_schema(&metadata).unwrap_err();
assert!(err.is_fill_default());
assert!(request
.fill_missing_columns(&metadata)
.unwrap_err()
.to_string()
.contains("Unexpected impure default value with region_id"));
}
#[test]
fn test_fill_missing_columns() {
let rows = Rows {

View File

@@ -15,7 +15,6 @@
//! Structures to describe metadata of files.
use std::fmt;
use std::num::NonZeroU64;
use std::str::FromStr;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
@@ -135,11 +134,6 @@ pub struct FileMeta {
/// the default value `0` doesn't means the file doesn't contains any rows,
/// but instead means the number of rows is unknown.
pub num_row_groups: u64,
/// Sequence in this file.
///
/// This sequence is the only sequence in this file. And it's retrieved from the max
/// sequence of the rows on generating this file.
pub sequence: Option<NonZeroU64>,
}
/// Type of index.
@@ -349,7 +343,6 @@ mod tests {
index_file_size: 0,
num_rows: 0,
num_row_groups: 0,
sequence: None,
}
}

Some files were not shown because too many files have changed in this diff Show More