Compare commits

...

102 Commits

Author SHA1 Message Date
Weny Xu
eab702cc02 feat: implement sync_region for metric engine (#5826)
* feat: implement `sync_region` for metric engine

* chore: apply suggestions from CR

* chore: upgrade proto
2025-04-03 12:46:20 +00:00
Zhenchi
dd63068df6 feat: add matches_term function (#5817)
* feat: add `matches_term` function

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* merge & fix

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* address comments

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix & skip char after boundary mismatch

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2025-04-03 09:09:41 +00:00
Yuhan Wang
f73b61e767 feat(remote-wal): add remote wal prune procedure (#5714)
* feat: add remote wal prune procedure

* feat: add retry logic and remove rollback

* chore: simplify the logic

* fix: remove REMOTE_WAL_LOCK

* fix: use in-memory kv

* perf: O(n) judgement

* chore: add single write lock

* test: add unit test

* chore: remove unused function

* chore: update comments

* chore: apply comments

* chore: apply comments
2025-04-03 08:11:51 +00:00
Yingwen
2acecd3620 feat: support REPLACE INTO statement (#5820)
* feat: support replace into

* feat: support replace into
2025-04-03 03:22:43 +00:00
Zhenchi
f797de3497 feat: add backend field to fulltext options (#5806)
* feat: add backend field to fulltext options

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* update proto

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix option conv

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix display

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* polish

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2025-04-02 09:15:54 +00:00
dennis zhuang
d53afa849d fix: interval cast expression can't work in range query, #5805 (#5813)
* fix: interval cast expression can't work in range query, #5805

* fix: nested cast

* test: make vector test stable
2025-04-02 08:46:17 +00:00
discord9
3aebfc1716 test: looser condition (#5816) 2025-04-02 07:38:05 +00:00
Weny Xu
dbb79c9671 feat: introduce CollectLeaderRegionHandler (#5811)
* feat: introduce `CollectLeaderRegionHandler`

* feat: add to default handler group

* fix: correct unit test

* chore: rename
2025-04-02 04:47:00 +00:00
shuiyisong
054056fcbb refactor: remove prom store write dispatch (#5812)
* refactor: remove prom store remote write dispatch pattern

* chore: ref XIX-22
2025-04-02 04:35:28 +00:00
Zhenchi
aa486db8b7 refactor: allow bloom filter search to apply and conjunction (#5770)
* refactor: change bloom filter search from any to all match

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* polish

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* place back in list

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* nit

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2025-04-01 12:50:34 +00:00
Weny Xu
4ef9afd8d8 feat: introduce read preference (#5783)
* feat: introduce read preference

* feat: introduce `RegionQueryHandlerFactory`

* feat: extract ReadPreference from http header

* test: add more tests

* chore: apply suggestions from CR

* chore: apply suggestions from CR
2025-04-01 09:17:01 +00:00
shuiyisong
f9221e9e66 perf: introduce simd_json for parsing ndjson (#5794)
* perf: introduce simd_json for parsing ndjson

* fix: some tests

* fix: some tests

* fix: es test case

* chore: use `as_bytes_mut()`

* chore: remove unnecessary `to_string`

* chore: add safety comment
2025-04-01 08:17:26 +00:00
Weny Xu
6c26fe9c80 fix: correct error status code (#5802) 2025-04-01 07:34:16 +00:00
fys
33c9fb737c refactor: remove mode option in configuration files (#5809)
* refactor: remove mode option in configuration files

* chore: remove mode in configuration file

* remvoe mode field in FlownodeOptions

* add comment for test

* update config.md

* remove mode field in standalone options

* fix: ci
2025-04-01 07:14:10 +00:00
Weny Xu
68ce796771 chore: expose modules (#5810) 2025-04-01 05:33:20 +00:00
Weny Xu
d701c18150 feat: introduce CustomizedRegionLeaseRenewer (#5762)
* feat: add manifest_version to `GrantedRegion`

* chore: upgrade proto

* chore: apply review suggestions

* chore: apply suggestions from CR

* feat: introduce `CustomizedRegionLeaseRenewerRef`

* chore: upgrade to `103948`
2025-03-31 13:25:05 +00:00
Weny Xu
d3a60d8821 feat: add limit for the number of running procedures (#5793)
* refactor: remove unused `messages`

* feat: introduce running procedure num limit

* feat: update config

* chore: apply suggestions from CR

* feat: impl `status_code` for `log-store` crate
2025-03-31 06:14:21 +00:00
discord9
5d688c6565 feat(flow): time window expr (#5785)
* feat: time window expr

* chore: comments

* refactor: per review

* chore: partially per review

* chore: per review

* chore: per review use query engine's session
2025-03-31 04:46:37 +00:00
Weny Xu
41aee1f1b7 feat: implement sync_region for mito engine (#5765)
* chore: upgrade proto to `2d52b`

* feat: add `SyncRegion` to `WorkerRequest`

* feat: impl `sync_region` for `Engine` trait

* test: add tests

* chore: fmt code

* chore: upgrade proto

* chore: unify `RegionLeaderState` and `RegionFollowerState`

* chore: check immutable memtable

* chore: fix clippy

* chore: apply suggestions from CR
2025-03-31 03:53:47 +00:00
yihong
c5b55fd8cf fix: close issue #3902 since upstream fixed (#5801)
Signed-off-by: yihong0618 <zouzou0208@gmail.com>
2025-03-30 12:34:52 +00:00
Ruihang Xia
8051dbbc31 fix: typo variadic (#5800)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2025-03-29 07:09:36 +00:00
Ruihang Xia
2d3192984d refactor: remove deprecated find_unique method (#5790)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2025-03-28 19:32:11 +00:00
shuiyisong
bef45ed0e8 feat(pipeline): support table name suffix templating in pipeline (#5775)
* chore: add table name template in pipeline yaml

* chore: implement apply function and add simple test

* chore: add comment and integration test

* chore: minor update

* fix: typos

* chore: change to table suffix

* chore: update comment and test

* chore: change name to table_suffix
2025-03-28 18:12:46 +00:00
LFC
a9e990768d refactor: skip re-taking arrays in memtable if possible (#5779)
experiment: skip sorting and re-taking arrays if possible when scanning memtable
2025-03-28 09:58:55 +00:00
Weny Xu
7e1ba49d3d refactor: remove useless region follower legacy code (#5795) 2025-03-28 08:10:30 +00:00
Yingwen
737558ef53 fix: support __name__ matcher in label values (#5773) 2025-03-28 02:18:59 +00:00
Yingwen
dbc25dd8da feat: expose scanner metrics to df execution metrics (#5699)
* feat: add metrics list to scanner

* chore: add report metrics method

* feat: use df metrics in PartitionMetrics

* feat: pass execution metrics to scan partition

* refactor: remove PartitionMetricsList

* feat: better debug format for ScanMetricsSet

* feat: do not expose all metrics to execution metrics by default

* refactor: use struct destruction

* feat: add metrics list to scanner

* chore: Add custom Debug for ScanMetricsSet and partition metrics display

* test: update sqlness result
2025-03-27 23:40:39 +00:00
Ruihang Xia
76a58a07e1 feat: simple implementation of DictionaryVector (#5758)
* basic impl

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* impl vector op

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* unit tests

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove unwraps

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* feat: enhance DictionaryVector operations and deprecate find_unique method

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix typo

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* chore: remove find_unique test

* chore: remove unused import

* fix test

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
2025-03-27 23:19:10 +00:00
Weny Xu
c2ba7fb16c refactor: remove useless region follower legacy code (#5787)
chore: remove region follower procedure
2025-03-27 11:50:29 +00:00
Lei, HUANG
09ef24fd75 refactor: remove useless partition legacy code (#5786)
* refactor: remove useless partition legacy code

* also remove error variants

* fix imports
2025-03-27 11:08:25 +00:00
Weny Xu
9b7b012620 feat: impl show region (#5782)
* fix: fix region follower procedure

* feat: add table related info to region peers table and follower regions

* feat: impl show region

* chore: apply suggestions from CR
2025-03-27 10:41:44 +00:00
fys
898e0bd828 chore: expose some methods (#5784) 2025-03-27 09:00:51 +00:00
shuiyisong
2b4ed43692 chore: accept table options in auto create table from hints (#5776)
chore: accept table options in auto create table from hint
2025-03-27 08:17:27 +00:00
Weny Xu
8f2ae4e136 feat: add AddRegionFollower and RemoveRegionFollower admin fn (#5780) 2025-03-27 06:30:50 +00:00
Weny Xu
0cd219a5d2 refactor: move list_flow_stats to ClusterInfo trait. (#5774)
refactor: minor refactor
2025-03-27 04:20:12 +00:00
fys
2b2ea5bf72 chore: upgrade some dependencies (#5777)
* chore: upgrade some dependencies

* chore: upgrade some dependencies

* fix: cr

* fix: ci

* fix: test

* fix: cargo fmt
2025-03-27 02:48:44 +00:00
discord9
e107bd5529 feat(flow): utils function for recording rule (#5768)
* chore: utils for rr

* chore: one more test

* chore: more test case

* test: even more tests

* chore: per review

* tests: add more&update testcase

* chore: update comment
2025-03-26 08:55:35 +00:00
Weny Xu
a31f0e255b feat: introduce RegionFollowerClient trait (#5771)
* chore: expose AskLeader

* feat: introduce `RegionFollowerClient` trait

* feat: build meta client with region follower client
2025-03-26 08:05:15 +00:00
Lei, HUANG
40b52f3b13 feat(mito): allow skipping wal while creating tables (#5740)
* chore: add Noop Wal option

* remove: WalOptionsAllocator::alloc method

* feat/no-op-wal:
 ### Add Noop WAL Option

 - **`engine.rs`, `opener.rs`, `wal.rs`, `entry_reader.rs`, `handle_write.rs`, `provider.rs`**:
   - Introduced a new `WalOptions::Noop` variant to handle scenarios where no write-ahead logging is required.
   - Implemented `NoopEntryReader` to provide a no-operation entry reader.
   - Updated logic to skip WAL operations for regions with `Noop` option.
   - Added `Provider::Noop` to handle `Noop` operations in the provider logic.

* feat/no-op-wal:
 ### Add `skip_wal` Option to Table Metadata

 - **Enhancements in `table_meta.rs`**:
   - Added a `skip_wal` parameter to the `create_wal_options` function to allow skipping WAL writes.
   - Updated the `create_table_route` function to utilize the `skip_wal` option from `table_info.meta.options`.

 - **Updates in `wal_options_allocator.rs`**:
   - Modified `alloc_batch` to handle the `skip_wal` flag, setting WAL options to `Noop` when true.
   - Added a test case `test_allocator_with_skip_wal` to verify the `skip_wal` functionality.

 - **Changes in `requests.rs`**:
   - Introduced `skip_wal` in `TableOptions` and added parsing logic.
   - Updated `TableOptions` display to include `skip_wal`.

 These changes introduce the ability to skip WAL writes for tables, enhancing flexibility in table metadata management.

* feat/no-op-wal:
 **Add WAL Option Handling and Table Option Validation**

 - **`handle_write.rs`**: Introduced a check for `WalOptions::Noop` in the `RegionWorkerLoop` to skip WAL writing for regions with this option.
 - **`requests.rs`**: Added `SKIP_WAL_KEY` to the list of valid table options for enhanced table configuration validation.

* feat/no-op-wal:
 ### Update WAL Options Allocation

 - **`key.rs`**: Modified the `allocate_region_wal_options` function to include an additional boolean parameter, enhancing the allocation logic.
 - **`wal_options_allocator.rs`**: Simplified the `test_allocator_with_skip_wal` test by removing unnecessary variable declarations and directly using `WalOptionsAllocator::RaftEngine`.

 These changes improve the flexibility and efficiency of WAL options allocation in the system.

* chore: reformat code

* feat/no-op-wal:
 **Enhancement:** Conditional Addition of `SKIP_WAL_KEY` in `requests.rs`

 - Updated `TableOptions` implementation in `requests.rs` to conditionally add `SKIP_WAL_KEY` to `key_vals` only when `self.skip_wal` is true, optimizing the key-value pair generation.

* feat/no-op-wal:
 Update `requests.rs` tests to reflect changes in `skip_wal` option

 - Modified test assertions in `requests.rs` to remove `skip_wal=false` from expected strings.
 - Added a new test case to verify `skip_wal=true` is correctly represented in `TableOptions`.

* feat/no-op-wal: Add Debug Logging and Improve Error Handling for WAL and Table Options

 • Introduced debug logging in wal.rs to skip obsolete regions, enhancing traceability.
 • Improved error handling in requests.rs by replacing warn with error propagation for invalid skip_wal values.
 • Added new test cases for skip_wal functionality, including SQL scripts and expected results, to ensure correct behavior and validation of the changes.
2025-03-26 07:53:52 +00:00
shuiyisong
f13a43647a chore: remove Transformer trait (#5772)
* chore: remove transformer trait

* chore: remove unnecessory generic
2025-03-26 02:53:30 +00:00
Zhenchi
7bcb01d269 feat: utilize blob metadata properties (#5767)
* feat: utilize blob metadata properties

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* Update src/puffin/src/puffin_manager/fs_puffin_manager/reader.rs

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2025-03-26 02:47:20 +00:00
Ruihang Xia
e81213728b feat: add/correct some kafka-related metrics (#5757)
* feat: add/correct some kafka-related metrics

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix dumb issues

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* per-partition produce latency

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2025-03-25 19:16:39 +00:00
Yingwen
d88482b996 feat: support explain analyze verbose (#5763)
* Add explain_verbose to QueryContext

* feat: fmt plan by display type

* feat: update proto to use ExplainOptions

* feat: display more info in verbose mode

* chore: fix clippy

* test: add sqlness test

* test: update sqlness result

* chore: update proto version

* chore: Simplify QueryContextBuilder::explain_options using get_or_insert_default
2025-03-25 03:48:36 +00:00
discord9
3b547d9d13 feat(flow): frontend client for handle sql (#5761)
* feat: frontend client for handle sql

* refactor: per review

* chore: revert unnecessary change
2025-03-25 02:26:04 +00:00
Yuhan Wang
278553fc3f docs: rfc for wal purge (#5475)
* docs: add rfc for wal purge

* docs: fix typo

* docs: follow name format

* chore: all in heartbeat

* fix: unneeded sentence in rfc

* chore: apply comments
2025-03-24 12:07:50 +00:00
Yuhan Wang
a36901a653 chore: ut and some fix (#5752)
* chore: ut and some fix

* fix: remove NOWAIT

* refactor: use param for meta lease ttl

* chore: feature gate

* chore: add comments

* chore: apply comments

* fix: advice by claude 3.7 sonnet

* chore: apply comments
2025-03-24 09:05:06 +00:00
discord9
c4ac242c69 fix: properly give placeholder types (#5760)
* fix: properly give placeholder types

* chore: update sqlness
2025-03-24 08:41:32 +00:00
fys
9f9307de73 refactor: make frontend instance clear (#5754)
* refactor: the startup of frontend

* remove unnecessary error type

* fix: cr

* remove unnecessary trait FrontendInstance

* fix: cr

* fix: cr

* adjust the startup order of services
2025-03-24 06:08:02 +00:00
shuiyisong
c77ce958a3 chore: support custom time index selector for identity pipeline (#5750)
* chore: minor refactor

* chore: minor refactor

* chore: support custom ts for identity pipeline

* chore: fix clippy

* chore: minor refactor & update tests

* chore: use ref on identity pipeline param
2025-03-24 04:27:22 +00:00
discord9
5ad2d8b3b8 fix: handle nullable default value (#5747)
* fix: handle nullable default value

* chore: update sqlness
2025-03-24 02:38:26 +00:00
Ruihang Xia
2724c3c142 feat: support regex in simple filter (#5753)
* feat: support regex in simple filter

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update sqlness result

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* Update src/common/recordbatch/src/filter.rs

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-03-24 02:10:42 +00:00
Weny Xu
4eb0771afe feat: introduce install_manifest_to for RegionManifestManager (#5742)
* feat: introduce `install_manifest_changes` for `RegionManifestManager`

* chore: rename function to `install_manifest_to`

* Apply suggestions from code review

Co-authored-by: jeremyhi <jiachun_feng@proton.me>

* chore: add comments

* chore: add comments

* chore: update logic and add comments

* chore: add more check

* Update src/mito2/src/manifest/manager.rs

Co-authored-by: Yingwen <realevenyag@gmail.com>

---------

Co-authored-by: jeremyhi <jiachun_feng@proton.me>
Co-authored-by: Yingwen <realevenyag@gmail.com>
2025-03-21 05:19:23 +00:00
Yohan Wal
a0739a96e4 fix: wrap table name with `` (#5748)
* fix: wrap table name with quotes

* fix: minor fix
2025-03-20 09:38:54 +00:00
Ning Sun
77ccf1eac8 chore: add datanode write rows to grafana dashboard (#5745) 2025-03-20 03:39:40 +00:00
Yohan Wal
1dc4a196bf feat: add mysql election logic (#5694)
* feat: add mysql election

* feat: add mysql election

* chore: fix deps

* chore: fix deps

* fix: duplicate container

* fix: duplicate setup for sqlness

* fix: call once

* fix: do not use NOWAIT for mysql 5.7

* chore: apply comments

* fix: no parallel sqlness for mysql

* chore: comments and minor revert

* chore: apply comments

* chore: apply comments

* chore: add  to table name

* ci: use 2 metasrv to detect election bugs

* refactor: better election logic

* chore: apply comments

* chore: apply comments

* feat: version check before startup
2025-03-19 11:31:18 +00:00
shuiyisong
2431cd3bdf chore: merge error files under pipeline crate (#5738) 2025-03-19 09:55:51 +00:00
discord9
cd730e0486 fix: mysql prepare limit&offset param (#5734)
* fix: prepare limit&offset param

* test: sqlness

* chore: per review

* chore: per review
2025-03-19 07:49:26 +00:00
zyy17
a19441bed8 refactor: remove trace id from primary key in opentelemetry_traces table (#5733)
* refactor: remove trace id in primary key

* refactor: remove trace id in primary key in v0 model

* refactor: add span id in v1

* fix: integration test
2025-03-19 06:17:58 +00:00
dennis zhuang
162e3b8620 docs: adds news to readme (#5735) 2025-03-19 01:33:46 +00:00
Wenbin
83642dab87 feat: remove duplicated peer definition (#5728)
* remove duplicate peer

* fix
2025-03-18 11:30:25 +00:00
discord9
46070958c9 fix: mysql prepare bool value (#5732) 2025-03-18 10:50:45 +00:00
pikady
eea8b1c730 feat: add vec_kth_elem function (#5674)
* feat: add vec_kth_elem function

Signed-off-by: pikady <2652917633@qq.com>

* code format

Signed-off-by: pikady <2652917633@qq.com>

* add test sql

Signed-off-by: pikady <2652917633@qq.com>

* change indexing from 1-based to 0-based

Signed-off-by: pikady <2652917633@qq.com>

* improve code formatting and correct spelling errors

Signed-off-by: pikady <2652917633@qq.com>

* Update tests/cases/standalone/common/function/vector/vector.sql

I noticed the two lines are identical. Could you clarify the reason for the change? Thanks!

Co-authored-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: pikady <2652917633@qq.com>
Co-authored-by: Zhenchi <zhongzc_arch@outlook.com>
2025-03-18 07:25:53 +00:00
Ning Sun
1ab4ddab8d feat: update pipeline header name to x-greptime-pipeline-name (#5710)
* feat: update pipeline header name to x-greptime-pipeline-name

* refactor: update string_value_from_header
2025-03-18 02:39:54 +00:00
Ning Sun
9e63018198 feat: disable http timeout (#5721)
* feat: update to disable http timeout by default

* feat: make http timeout default to 0

* test: correct test case

* chore: generate new config doc

* test: correct tests
2025-03-18 01:18:56 +00:00
discord9
594bec8c36 feat: load manifest manually in mito engine (#5725)
* feat: load manifest and some

* chore: per review
2025-03-18 01:18:08 +00:00
localhost
1586732d20 chore: add some method for log query handler (#5685)
* chore: add some method for log query handler

* chore: make clippy happy

* chore: add some method for log query handler

* Update src/frontend/src/instance/logs.rs

Co-authored-by: shuiyisong <113876041+shuiyisong@users.noreply.github.com>

---------

Co-authored-by: shuiyisong <113876041+shuiyisong@users.noreply.github.com>
2025-03-17 18:36:43 +00:00
yihong
16fddd97a7 chore: revert commit update flate2 version (#5706)" (#5715)
Revert "chore: update flate2 version (#5706)"

This reverts commit a5df3954f3.
2025-03-17 12:16:26 +00:00
Ning Sun
2260782c12 refactor: update jaeger api implementation for new trace modeling (#5655)
* refactor: update jaeger api implementation

* test: add tests for v1 data model

* feat: customize trace table name

* fix: update column requirements to use Column type instead of String

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: lint fix

* refactor: accumulate resource attributes for v1

* fix: add empty check for additional string

* feat: add table option to mark data model version

* fix: do not overwrite all tags

* feat: use table option to mark table data model version and process accordingly

* chore: update comments to reflect query changes

* feat: use header for jaeger table name

* feat: update index for service_name, drop index for span_name

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: zyy17 <zyylsxm@gmail.com>
2025-03-17 07:31:32 +00:00
Sicong Hu
09dacc8e9b feat: add vec_subvector function (#5683)
* feat: add vec_subvector function

* change datatype of arg1 and arg2 from u64 to i64

* add sqlness test

* improve description comments
2025-03-16 10:43:53 +00:00
Ruihang Xia
dec439db2b chore: bump version to 0.14.0 (#5711)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2025-03-16 09:58:19 +00:00
Ning Sun
dc76571166 feat: move default data path from /tmp to current directory (#5719) 2025-03-16 09:57:46 +00:00
shuiyisong
3e17f8c426 chore: use Bytes instead of string in bulk ingestion (#5717)
chore: use bytes instead of string in bulk log ingestion
2025-03-14 09:31:35 +00:00
yihong
a5df3954f3 chore: update flate2 version (#5706)
Signed-off-by: yihong0618 <zouzou0208@gmail.com>
2025-03-14 02:15:27 +00:00
Ruihang Xia
32fd850c20 perf: support in list in simple filter (#5709)
* feat: support in list in simple filter

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2025-03-14 01:08:29 +00:00
shuiyisong
2bfdae4f8f feat: add simple extract processor (#5688)
* feat: add simple extract processor

* chore: add test

* chore: add license header

* chore: minor update
2025-03-13 09:19:58 +00:00
shuiyisong
fcb898e9a4 chore: support inverted index in pipeline (#5700)
chore: rebase main
2025-03-13 08:30:29 +00:00
Ning Sun
8fa2fdfc42 feat: make empty parent_span_id null for v1 (#5690) 2025-03-13 07:48:15 +00:00
shuiyisong
4dc1a1d60f chore: support tag in transform (#5701)
chore: support tag in transform to specify tag
2025-03-13 07:27:12 +00:00
Lei, HUANG
e375a18011 fix: conversion from TableMeta to TableMetaBuilder (#5693)
* refactor: use proc macro to generate conversion between TableMeta and TableMetaBuilder

* chore: format

* fix/partition-key-index:
 ### Update `TableMeta` and Add Partition and Alter Table Tests

 - **`metadata.rs`**: Modified `new_meta_builder` method in `TableMeta` to manually remove `value_indices` by setting it to `None` in the `TableMetaBuilder`.
 - **`partition_and_alter.result` & `partition_and_alter.sql`**: Added new test cases for creating, inserting, selecting, altering, and dropping a partitioned table `molestiAe`. These tests verify partitioning on the `sImiLiQUE` column and altering the table with a TTL
 setting.

fix/partition-key-index:
 ### Remove Obsolete TODO Comment in `metadata.rs`

 - Removed an outdated TODO comment regarding the `new_meta_builder` function in `src/table/src/metadata.rs`.

chore: check struct name in derive_meta_builder

refactor: Simplify TableMeta struct name check in macro

refactor: Improve ToMetaBuilder derive macro validation and error handling

refactor: Enforce ToMetaBuilder macro for table::metadata::TableMeta struct

* fix/partition-key-index:
 Update `partition_and_alter.sql` to modify TTL setting

 - Modified the TTL setting for the `molestiAe` table to '1d' in `partition_and_alter.sql`.

* fix: sqlness

* fix/partition-key-index:
 ### Update `TableMeta` and Test File Structure

 - **Enhancement**: Added a note in `metadata.rs` to always use `new_meta_builder` for creating `TableMetaBuilder`.
 - **Refactor**: Renamed test result and SQL files for better organization:
   - `partition_and_alter.result` to `alter/partition_and_alter.result`
   - `partition_and_alter.sql` to `alter/partition_and_alter.sql`

* refactor: Simplify `derive_meta_builder` by initializing fields with `Default::default()`

* fix/partition-key-index:
 ### Commit Summary

 - **Refactor `TableMetaBuilder` Initialization**:
   - Replaced `TableMetaBuilder::default()` with `TableMetaBuilder::empty()` across multiple files for initializing `TableMetaBuilder` instances.
   - Affected files include:
     - `src/catalog/src/system_schema.rs`
     - `src/common/meta/src/key/test_utils.rs`
     - `src/operator/src/req_convert/insert/fill_impure_default.rs`
     - `src/query/src/log_query/planner.rs`
     - `src/query/src/promql/planner.rs`
     - `src/query/src/range_select/plan_rewrite.rs`
     - `src/query/src/sql/show_create_table.rs`
     - `src/table/src/test_util/memtable.rs`
     - `src/table/src/test_util/table_info.rs`

 - **Enhance `TableMetaBuilder`**:
   - Added `custom_constructor` to `TableMeta` and implemented an `empty` method for `TableMetaBuilder`.
   - Modified `TableMetaBuilder` to include a `new_external_table` method with default values.
   - Updated `src/table/src/metadata.rs` to reflect these changes.

 - **Add Testing Feature**:
   - Introduced a conditional compilation for `test_util` in `src/table/src/lib.rs` to include testing utilities when the `testing` feature is enabled.

 - **Update `Cargo.toml`**:
   - Enabled the `testing` feature for the `table` module in `src/common/meta/Cargo.toml`.

 - **Modify `NumbersTable` Initialization**:
   - Replaced `TableMetaBuilder` with direct `TableMeta` struct initialization in `src/table/src/table/numbers.rs`.

 - **Test Result Update**:
   - Updated test results in `tests/cases/standalone/common/alter/partition_and_alter.result` to reflect changes in table meta handling.

* fix: rename default to empty

* docs: add doc for TableMetaBuilder::empty

* chore: Update src/table/src/metadata.rs

---------

Co-authored-by: Yingwen <realevenyag@gmail.com>
2025-03-13 06:30:16 +00:00
shuiyisong
e0ff701e51 chore: support application/x-ndjson for log ingest (#5697)
chore: support ndjson content type
2025-03-13 04:29:22 +00:00
Yingwen
25645a3303 feat: expose virtual_host_style config for s3 storage (#5696)
* feat: expose enable_virtual_host_style for s3 storage

* docs: update examples

* test: fix config test
2025-03-12 13:46:56 +00:00
Ruihang Xia
b32ea7d84c feat: add Docker image tag information to step summary in dev-build workflow (#5692)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2025-03-12 13:45:19 +00:00
discord9
f164f6eaf3 fix: FlowInfoValue's compatibility (#5695) 2025-03-12 09:02:48 +00:00
Yohan Wal
af1920defc feat: add mysql kvbackend (#5528)
* feat: add mysql kvbackend txn support

* chore: error handling

* chore: follow review comments

* chore: follow review comments

* chore: follow review comments

* revert: mysql QAQ

* revert: revert changes to sqls

This reverts commit cf98c50dd9.

* chore: add comments
2025-03-12 06:52:56 +00:00
Lei, HUANG
7c97fae522 chore: check region wal provider on startup to avoid inconsistence (#5687) 2025-03-11 17:51:18 +00:00
AntiTopQuark
b8070adc3a feat: enhancement information_schema.flows (#5623)
* feat: enhancement information_schema.flows

* feat: enhancement information_schema.flows

* u

* u

* u

* u

* u

* u

* u

* u

* u

* update

* update

* update

* delete unused code

* u

* u

* Update src/flow/src/adapter/worker.rs

Co-authored-by: dennis zhuang <killme2008@gmail.com>

* Update src/common/meta/src/key/flow/flow_state.rs

Co-authored-by: dennis zhuang <killme2008@gmail.com>

* Update src/common/meta/src/key/flow/flow_info.rs

Co-authored-by: dennis zhuang <killme2008@gmail.com>

* Update src/common/meta/src/key/flow/flow_state.rs

Co-authored-by: dennis zhuang <killme2008@gmail.com>

* Update src/common/meta/src/key/flow/flow_info.rs

Co-authored-by: dennis zhuang <killme2008@gmail.com>

* u

* u

* u

* u

* u

* u

* chore: fix sqlness

* chore: update proto

* fix: remove date time

* fix: update result of information_schema test

---------

Co-authored-by: dennis zhuang <killme2008@gmail.com>
Co-authored-by: discord9 <discord9@163.com>
2025-03-11 15:08:10 +00:00
yihong
11bfb17328 feat: support export command export data to s3 (#5585)
* feat: s3 first step

Signed-off-by: yihong0618 <zouzou0208@gmail.com>

* fix: finish s3 export

Signed-off-by: yihong0618 <zouzou0208@gmail.com>

* fix: drop useless comment

Signed-off-by: yihong0618 <zouzou0208@gmail.com>

* fix: forget to create_database and copy_from

Signed-off-by: yihong0618 <zouzou0208@gmail.com>

* fix: address comment use opendal Fs

Signed-off-by: yihong0618 <zouzou0208@gmail.com>

* refactor: make the export mess code clean

Signed-off-by: yihong0618 <zouzou0208@gmail.com>

---------

Signed-off-by: yihong0618 <zouzou0208@gmail.com>
2025-03-11 08:56:59 +00:00
jeremyhi
1d87bd2d43 feat: alter region follower (#5676)
* feat: add region follower manager

* feat: add region procudure

* refactor: make add, remove follower procedure look  nice

* feat: add region follower procedure

* chore: undo some chane, possibly made by AI

* feat: on prepare cheking

* feat: on update metadata

* feat: on broadcast

* chore: unit test

* feat: add remove follower operation

* feat: add or remove region follower procedure

* chore: ut

* chore: rename

* chore: by comment

* chore: by comment

---------

Co-authored-by: jeremy <jeremy@greptime.local>
2025-03-11 08:44:50 +00:00
jeremyhi
ababeaf538 chore: make memorykv write happily (#5686)
chore: make memorykv write happly
2025-03-11 07:37:14 +00:00
Lin Yihai
2cbf51d0be refactor!: Remove Value::DateTime and ValueRef::DateTime. (#5616)
* refactor: Remove Value::DateTime and ValueRef::DateTime

* fix: don't panic if arrow cast field.

* fix: map `ColumnDataType::Datetime` to `ConcreteDataType::timestamp_microsecond_datatype`

* fix: Map `ValueData::DatetimeValue` correctly.

* refactor: Replace `datetime` with `timestamp_micro_second`
2025-03-11 07:03:27 +00:00
Yingwen
3059b04b19 feat: add a gauge for download tasks (#5681) 2025-03-11 06:55:13 +00:00
Yingwen
352b197be4 feat: add hint for logical region in RegionScanner (#5684)
* feat: add a flag to check logical region

* feat: sets logical region hint in metric engine

* refactor: rename to logical_region
2025-03-11 06:34:39 +00:00
Ning Sun
d0254f9705 feat: update promql-parser to 0.5 for duration literal (#5682) 2025-03-11 06:27:36 +00:00
Ning Sun
8a86903c73 feat: add description for each grafana panel (#5673)
* feat: add description for each grafana panel

* Apply suggestions from code review

Co-authored-by: Yingwen <realevenyag@gmail.com>

* fix: unit of write stall

* feat: add jq script to summary the grafana dashboard

* fix: update description

* ci: add ci step to valid grafana and send summary as comment

* ci: update check

* ci: update ci

---------

Co-authored-by: Yingwen <realevenyag@gmail.com>
2025-03-11 06:16:49 +00:00
Weny Xu
0bd322a078 perf(prom): optimize label values query (#5653)
perf: optimize label values query
2025-03-10 13:20:47 +00:00
discord9
3811e3f632 feat: also get index file&expose mito in metrics (#5680)
* feat: download index file too

* feat: expose mito in metrics

* chore: fmt
2025-03-10 13:07:08 +00:00
localhost
c14aa176b5 chore: impl ref and ref_mut for json like (#5679)
* chore: impl ref and ref_mut for json like

* chore: add code source
2025-03-10 10:43:15 +00:00
Lei, HUANG
a922dcd9df refactor(mito): move wal sync task to background (#5677)
chore/move-wal-sync-to-bg:
 ### Refactor Log Store Task Management

 - **Error Handling Enhancements**: Updated error handling for task management in `error.rs` by renaming `StartGcTask` and `StopGcTask` to `StartWalTask` and `StopWalTask`, respectively, and added a `name` field for more descriptive error messages.
 - **Task Management Improvements**: Introduced `SyncWalTaskFunction` in `log_store.rs` to handle periodic synchronization of WAL tasks, replacing the previous atomic-based sync logic.
 - **Backend Adjustments**: Modified `backend.rs` to use the new `StartWalTaskSnafu` for starting tasks, ensuring consistency with the updated error handling approach.
2025-03-10 08:22:35 +00:00
dennis zhuang
530ff53422 feat(promql): supports quantile and count_values (#5652)
* feat(promql): supports quantile

* fix: merge_batch

* chore: sqlness test

* test: unit tests

* feat: implements count_values

* fix: typo

* refactor: planner

* chore: apply review suggestions

---------

Co-authored-by: Yingwen <realevenyag@gmail.com>
2025-03-10 06:41:40 +00:00
Ruihang Xia
73ca39f37e feat: time series distribution in scanner (#5675)
* define distribution

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* feat: SeqScan support per series distribution

* probe distribution

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* reverse sort order

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* more strict check

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* change null's ordering

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: evenyag <realevenyag@gmail.com>
2025-03-10 05:43:17 +00:00
Yingwen
0acc6b0354 fix: correct stalled count (#5678) 2025-03-10 04:25:38 +00:00
Zhenchi
face361fcb feat: introduce roaring bitmap to optimize sparse value scenarios (#5603)
* feat: introduce roaring bitmap to optimize sparse value scenarios

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix taplo

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* address comments

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* polish

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* address comments

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2025-03-10 04:24:08 +00:00
532 changed files with 25004 additions and 10625 deletions

View File

@@ -52,7 +52,7 @@ runs:
uses: ./.github/actions/build-greptime-binary
with:
base-image: ubuntu
features: servers/dashboard,pg_kvbackend
features: servers/dashboard,pg_kvbackend,mysql_kvbackend
cargo-profile: ${{ inputs.cargo-profile }}
artifacts-dir: greptime-linux-${{ inputs.arch }}-${{ inputs.version }}
version: ${{ inputs.version }}
@@ -70,7 +70,7 @@ runs:
if: ${{ inputs.arch == 'amd64' && inputs.dev-mode == 'false' }} # Builds greptime for centos if the host machine is amd64.
with:
base-image: centos
features: servers/dashboard,pg_kvbackend
features: servers/dashboard,pg_kvbackend,mysql_kvbackend
cargo-profile: ${{ inputs.cargo-profile }}
artifacts-dir: greptime-linux-${{ inputs.arch }}-centos-${{ inputs.version }}
version: ${{ inputs.version }}

View File

@@ -47,7 +47,6 @@ runs:
shell: pwsh
run: make test sqlness-test
env:
RUSTUP_WINDOWS_PATH_ADD_BIN: 1 # Workaround for https://github.com/nextest-rs/nextest/issues/1493
RUST_BACKTRACE: 1
SQLNESS_OPTS: "--preserve-state"

View File

@@ -8,7 +8,7 @@ inputs:
default: 2
description: "Number of Datanode replicas"
meta-replicas:
default: 1
default: 2
description: "Number of Metasrv replicas"
image-registry:
default: "docker.io"

View File

@@ -238,6 +238,13 @@ jobs:
version: ${{ needs.allocate-runners.outputs.version }}
push-latest-tag: false # Don't push the latest tag to registry.
dev-mode: true # Only build the standard images.
- name: Echo Docker image tag to step summary
run: |
echo "## Docker Image Tag" >> $GITHUB_STEP_SUMMARY
echo "Image Tag: \`${{ needs.allocate-runners.outputs.version }}\`" >> $GITHUB_STEP_SUMMARY
echo "Full Image Name: \`docker.io/${{ vars.IMAGE_NAMESPACE }}/${{ vars.DEV_BUILD_IMAGE_NAME }}:${{ needs.allocate-runners.outputs.version }}\`" >> $GITHUB_STEP_SUMMARY
echo "Pull Command: \`docker pull docker.io/${{ vars.IMAGE_NAMESPACE }}/${{ vars.DEV_BUILD_IMAGE_NAME }}:${{ needs.allocate-runners.outputs.version }}\`" >> $GITHUB_STEP_SUMMARY
- name: Set build result
id: set-build-result

View File

@@ -111,7 +111,7 @@ jobs:
- name: Build greptime binaries
shell: bash
# `cargo gc` will invoke `cargo build` with specified args
run: cargo gc -- --bin greptime --bin sqlness-runner --features pg_kvbackend
run: cargo gc -- --bin greptime --bin sqlness-runner --features "pg_kvbackend,mysql_kvbackend"
- name: Pack greptime binaries
shell: bash
run: |
@@ -270,7 +270,7 @@ jobs:
- name: Build greptime bianry
shell: bash
# `cargo gc` will invoke `cargo build` with specified args
run: cargo gc --profile ci -- --bin greptime --features pg_kvbackend
run: cargo gc --profile ci -- --bin greptime --features "pg_kvbackend,mysql_kvbackend"
- name: Pack greptime binary
shell: bash
run: |
@@ -576,9 +576,12 @@ jobs:
- name: "Remote WAL"
opts: "-w kafka -k 127.0.0.1:9092"
kafka: true
- name: "Pg Kvbackend"
- name: "PostgreSQL KvBackend"
opts: "--setup-pg"
kafka: false
- name: "MySQL Kvbackend"
opts: "--setup-mysql"
kafka: false
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
@@ -687,7 +690,7 @@ jobs:
working-directory: tests-integration/fixtures
run: docker compose up -d --wait
- name: Run nextest cases
run: cargo nextest run --workspace -F dashboard -F pg_kvbackend
run: cargo nextest run --workspace -F dashboard -F pg_kvbackend -F mysql_kvbackend
env:
CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=mold"
RUST_BACKTRACE: 1
@@ -704,6 +707,7 @@ jobs:
GT_MINIO_ENDPOINT_URL: http://127.0.0.1:9000
GT_ETCD_ENDPOINTS: http://127.0.0.1:2379
GT_POSTGRES_ENDPOINTS: postgres://greptimedb:admin@127.0.0.1:5432/postgres
GT_MYSQL_ENDPOINTS: mysql://greptimedb:admin@127.0.0.1:3306/mysql
GT_KAFKA_ENDPOINTS: 127.0.0.1:9092
GT_KAFKA_SASL_ENDPOINTS: 127.0.0.1:9093
UNITTEST_LOG_DIR: "__unittest_logs"
@@ -739,7 +743,7 @@ jobs:
working-directory: tests-integration/fixtures
run: docker compose up -d --wait
- name: Run nextest cases
run: cargo llvm-cov nextest --workspace --lcov --output-path lcov.info -F dashboard -F pg_kvbackend
run: cargo llvm-cov nextest --workspace --lcov --output-path lcov.info -F dashboard -F pg_kvbackend -F mysql_kvbackend
env:
CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=mold"
RUST_BACKTRACE: 1
@@ -755,6 +759,7 @@ jobs:
GT_MINIO_ENDPOINT_URL: http://127.0.0.1:9000
GT_ETCD_ENDPOINTS: http://127.0.0.1:2379
GT_POSTGRES_ENDPOINTS: postgres://greptimedb:admin@127.0.0.1:5432/postgres
GT_MYSQL_ENDPOINTS: mysql://greptimedb:admin@127.0.0.1:3306/mysql
GT_KAFKA_ENDPOINTS: 127.0.0.1:9092
GT_KAFKA_SASL_ENDPOINTS: 127.0.0.1:9093
UNITTEST_LOG_DIR: "__unittest_logs"

52
.github/workflows/grafana.yml vendored Normal file
View File

@@ -0,0 +1,52 @@
name: Check Grafana Panels
on:
pull_request:
branches:
- main
paths:
- 'grafana/**' # Trigger only when files under the grafana/ directory change
jobs:
check-panels:
runs-on: ubuntu-latest
steps:
# Check out the repository
- name: Checkout repository
uses: actions/checkout@v4
# Install jq (required for the script)
- name: Install jq
run: sudo apt-get install -y jq
# Make the check.sh script executable
- name: Make check.sh executable
run: chmod +x grafana/check.sh
# Run the check.sh script
- name: Run check.sh
run: ./grafana/check.sh
# Only run summary.sh for pull_request events (not for merge queues or final pushes)
- name: Check if this is a pull request
id: check-pr
run: |
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
echo "is_pull_request=true" >> $GITHUB_OUTPUT
else
echo "is_pull_request=false" >> $GITHUB_OUTPUT
fi
# Make the summary.sh script executable
- name: Make summary.sh executable
if: steps.check-pr.outputs.is_pull_request == 'true'
run: chmod +x grafana/summary.sh
# Run the summary.sh script and add its output to the GitHub Job Summary
- name: Run summary.sh and add to Job Summary
if: steps.check-pr.outputs.is_pull_request == 'true'
run: |
SUMMARY=$(./grafana/summary.sh)
echo "### Summary of Grafana Panels" >> $GITHUB_STEP_SUMMARY
echo "$SUMMARY" >> $GITHUB_STEP_SUMMARY

View File

@@ -107,7 +107,6 @@ jobs:
CARGO_BUILD_RUSTFLAGS: "-C linker=lld-link"
RUST_BACKTRACE: 1
CARGO_INCREMENTAL: 0
RUSTUP_WINDOWS_PATH_ADD_BIN: 1 # Workaround for https://github.com/nextest-rs/nextest/issues/1493
GT_S3_BUCKET: ${{ vars.AWS_CI_TEST_BUCKET }}
GT_S3_ACCESS_KEY_ID: ${{ secrets.AWS_CI_TEST_ACCESS_KEY_ID }}
GT_S3_ACCESS_KEY: ${{ secrets.AWS_CI_TEST_SECRET_ACCESS_KEY }}

View File

@@ -91,7 +91,7 @@ env:
# The scheduled version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-YYYYMMDD', like v0.2.0-nigthly-20230313;
NIGHTLY_RELEASE_PREFIX: nightly
# Note: The NEXT_RELEASE_VERSION should be modified manually by every formal release.
NEXT_RELEASE_VERSION: v0.13.0
NEXT_RELEASE_VERSION: v0.14.0
jobs:
allocate-runners:

3
.gitignore vendored
View File

@@ -54,3 +54,6 @@ tests-fuzz/corpus/
# Nix
.direnv
.envrc
## default data home
greptimedb_data

987
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -29,6 +29,7 @@ members = [
"src/common/query",
"src/common/recordbatch",
"src/common/runtime",
"src/common/session",
"src/common/substrait",
"src/common/telemetry",
"src/common/test-util",
@@ -67,7 +68,7 @@ members = [
resolver = "2"
[workspace.package]
version = "0.13.0"
version = "0.14.0"
edition = "2021"
license = "Apache-2.0"
@@ -88,7 +89,7 @@ rust.unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] }
#
# See for more detaiils: https://github.com/rust-lang/cargo/issues/11329
ahash = { version = "0.8", features = ["compile-time-rng"] }
aquamarine = "0.3"
aquamarine = "0.6"
arrow = { version = "53.0.0", features = ["prettyprint"] }
arrow-array = { version = "53.0.0", default-features = false, features = ["chrono-tz"] }
arrow-flight = "53.0"
@@ -99,9 +100,9 @@ async-trait = "0.1"
# Remember to update axum-extra, axum-macros when updating axum
axum = "0.8"
axum-extra = "0.10"
axum-macros = "0.4"
axum-macros = "0.5"
backon = "1"
base64 = "0.21"
base64 = "0.22"
bigdecimal = "0.4.2"
bitflags = "2.4.1"
bytemuck = "1.12"
@@ -111,7 +112,7 @@ chrono-tz = "0.10.1"
clap = { version = "4.4", features = ["derive"] }
config = "0.13.0"
crossbeam-utils = "0.8"
dashmap = "5.4"
dashmap = "6.1"
datafusion = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
datafusion-common = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
datafusion-expr = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
@@ -121,31 +122,31 @@ datafusion-physical-expr = { git = "https://github.com/apache/datafusion.git", r
datafusion-physical-plan = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
datafusion-sql = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
datafusion-substrait = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
deadpool = "0.10"
deadpool-postgres = "0.12"
derive_builder = "0.12"
deadpool = "0.12"
deadpool-postgres = "0.14"
derive_builder = "0.20"
dotenv = "0.15"
etcd-client = "0.14"
fst = "0.4.7"
futures = "0.3"
futures-util = "0.3"
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "d92c9ac4e90ef4abdcf5c2eaf5a164e18ba09486" }
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "fb8e20ce29afd81835e3ea3c1164c8ce10de2c65" }
hex = "0.4"
http = "1"
humantime = "2.1"
humantime-serde = "1.1"
hyper = "1.1"
hyper-util = "0.1"
itertools = "0.10"
itertools = "0.14"
jsonb = { git = "https://github.com/databendlabs/jsonb.git", rev = "8c8d2fc294a39f3ff08909d60f718639cfba3875", default-features = false }
lazy_static = "1.4"
local-ip-address = "0.6"
loki-proto = { git = "https://github.com/GreptimeTeam/loki-proto.git", rev = "1434ecf23a2654025d86188fb5205e7a74b225d3" }
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "5618e779cf2bb4755b499c630fba4c35e91898cb" }
mockall = "0.11.4"
mockall = "0.13"
moka = "0.12"
nalgebra = "0.33"
notify = "6.1"
notify = "8.0"
num_cpus = "1.16"
once_cell = "1.18"
opentelemetry-proto = { version = "0.27", features = [
@@ -160,13 +161,11 @@ parquet = { version = "53.0.0", default-features = false, features = ["arrow", "
paste = "1.0"
pin-project = "1.0"
prometheus = { version = "0.13.3", features = ["process"] }
promql-parser = { git = "https://github.com/GreptimeTeam/promql-parser.git", features = [
"ser",
], rev = "27abb8e16003a50c720f00d6c85f41f5fa2a2a8e" }
promql-parser = { version = "0.5", features = ["ser"] }
prost = "0.13"
raft-engine = { version = "0.4.1", default-features = false }
rand = "0.8"
ratelimit = "0.9"
rand = "0.9"
ratelimit = "0.10"
regex = "1.8"
regex-automata = "0.4"
reqwest = { version = "0.12", default-features = false, features = [
@@ -178,7 +177,7 @@ reqwest = { version = "0.12", default-features = false, features = [
rskafka = { git = "https://github.com/influxdata/rskafka.git", rev = "75535b5ad9bae4a5dbb582c82e44dfd81ec10105", features = [
"transport-tls",
] }
rstest = "0.21"
rstest = "0.25"
rstest_reuse = "0.7"
rust_decimal = "1.33"
rustc-hash = "2.0"
@@ -186,17 +185,24 @@ rustls = { version = "0.23.20", default-features = false } # override by patch,
serde = { version = "1.0", features = ["derive"] }
serde_json = { version = "1.0", features = ["float_roundtrip"] }
serde_with = "3"
shadow-rs = "0.38"
shadow-rs = "1.1"
simd-json = "0.15"
similar-asserts = "1.6.0"
smallvec = { version = "1", features = ["serde"] }
snafu = "0.8"
sysinfo = "0.30"
sqlx = { version = "0.8", features = [
"runtime-tokio-rustls",
"mysql",
"postgres",
"chrono",
] }
sysinfo = "0.33"
# on branch v0.52.x
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "71dd86058d2af97b9925093d40c4e03360403170", features = [
"visitor",
"serde",
] } # on branch v0.44.x
strum = { version = "0.25", features = ["derive"] }
strum = { version = "0.27", features = ["derive"] }
tempfile = "3"
tokio = { version = "1.40", features = ["full"] }
tokio-postgres = "0.7"
@@ -243,6 +249,7 @@ common-procedure-test = { path = "src/common/procedure-test" }
common-query = { path = "src/common/query" }
common-recordbatch = { path = "src/common/recordbatch" }
common-runtime = { path = "src/common/runtime" }
common-session = { path = "src/common/session" }
common-telemetry = { path = "src/common/telemetry" }
common-test-util = { path = "src/common/test-util" }
common-time = { path = "src/common/time" }

View File

@@ -6,7 +6,7 @@
</picture>
</p>
<h2 align="center">Unified & Cost-Effective Time Series Database for Metrics, Logs, and Events</h2>
<h2 align="center">Unified & Cost-Effective Observerability Database for Metrics, Logs, and Events</h2>
<div align="center">
<h3 align="center">
@@ -62,15 +62,19 @@
## Introduction
**GreptimeDB** is an open-source unified & cost-effective time-series database for **Metrics**, **Logs**, and **Events** (also **Traces** in plan). You can gain real-time insights from Edge to Cloud at Any Scale.
**GreptimeDB** is an open-source unified & cost-effective observerability database for **Metrics**, **Logs**, and **Events** (also **Traces** in plan). You can gain real-time insights from Edge to Cloud at Any Scale.
## News
**[GreptimeDB archives 1 billion cold run #1 in JSONBench!](https://greptime.com/blogs/2025-03-18-jsonbench-greptimedb-performance)**
## Why GreptimeDB
Our core developers have been building time-series data platforms for years. Based on our best practices, GreptimeDB was born to give you:
Our core developers have been building observerability data platforms for years. Based on our best practices, GreptimeDB was born to give you:
* **Unified Processing of Metrics, Logs, and Events**
GreptimeDB unifies time series data processing by treating all data - whether metrics, logs, or events - as timestamped events with context. Users can analyze this data using either [SQL](https://docs.greptime.com/user-guide/query-data/sql) or [PromQL](https://docs.greptime.com/user-guide/query-data/promql) and leverage stream processing ([Flow](https://docs.greptime.com/user-guide/flow-computation/overview)) to enable continuous aggregation. [Read more](https://docs.greptime.com/user-guide/concepts/data-model).
GreptimeDB unifies observerability data processing by treating all data - whether metrics, logs, or events - as timestamped events with context. Users can analyze this data using either [SQL](https://docs.greptime.com/user-guide/query-data/sql) or [PromQL](https://docs.greptime.com/user-guide/query-data/promql) and leverage stream processing ([Flow](https://docs.greptime.com/user-guide/flow-computation/overview)) to enable continuous aggregation. [Read more](https://docs.greptime.com/user-guide/concepts/data-model).
* **Cloud-native Distributed Database**
@@ -112,7 +116,7 @@ Start a GreptimeDB container with:
```shell
docker run -p 127.0.0.1:4000-4003:4000-4003 \
-v "$(pwd)/greptimedb:/tmp/greptimedb" \
-v "$(pwd)/greptimedb:./greptimedb_data" \
--name greptime --rm \
greptime/greptimedb:latest standalone start \
--http-addr 0.0.0.0:4000 \

View File

@@ -12,7 +12,6 @@
| Key | Type | Default | Descriptions |
| --- | -----| ------- | ----------- |
| `mode` | String | `standalone` | The running mode of the datanode. It can be `standalone` or `distributed`. |
| `default_timezone` | String | Unset | The default timezone of the server. |
| `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.<br/>By default, it provides services after all regions have been initialized. |
| `init_regions_parallelism` | Integer | `16` | Parallelism of initializing regions. |
@@ -24,7 +23,7 @@
| `runtime.compact_rt_size` | Integer | `4` | The number of threads to execute the runtime for global write operations. |
| `http` | -- | -- | The HTTP server options. |
| `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
| `http.timeout` | String | `30s` | HTTP request timeout. Set to 0 to disable timeout. |
| `http.timeout` | String | `0s` | HTTP request timeout. Set to 0 to disable timeout. |
| `http.body_limit` | String | `64MB` | HTTP request body limit.<br/>The following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.<br/>Set to 0 to disable limit. |
| `http.enable_cors` | Bool | `true` | HTTP CORS support, it's turned on by default<br/>This allows browser to access http APIs without CORS restrictions |
| `http.cors_allowed_origins` | Array | Unset | Customize allowed origins for HTTP CORS. |
@@ -98,10 +97,11 @@
| `procedure` | -- | -- | Procedure storage options. |
| `procedure.max_retry_times` | Integer | `3` | Procedure max retry time. |
| `procedure.retry_delay` | String | `500ms` | Initial retry delay of procedures, increases exponentially |
| `procedure.max_running_procedures` | Integer | `128` | Max running procedures.<br/>The maximum number of procedures that can be running at the same time.<br/>If the number of running procedures exceeds this limit, the procedure will be rejected. |
| `flow` | -- | -- | flow engine options. |
| `flow.num_workers` | Integer | `0` | The number of flow worker in flownode.<br/>Not setting(or set to 0) this value will use the number of CPU cores divided by 2. |
| `storage` | -- | -- | The data storage options. |
| `storage.data_home` | String | `/tmp/greptimedb/` | The working home directory. |
| `storage.data_home` | String | `./greptimedb_data/` | The working home directory. |
| `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
| `storage.cache_path` | String | Unset | Read cache configuration for object storage such as 'S3' etc, it's configured by default when using object storage. It is recommended to configure it when using object storage for better performance.<br/>A local file directory, defaults to `{data_home}`. An empty string means disabling. |
| `storage.cache_capacity` | String | Unset | The local file cache capacity in bytes. If your disk space is sufficient, it is recommended to set it larger. |
@@ -181,7 +181,7 @@
| `region_engine.metric` | -- | -- | Metric engine options. |
| `region_engine.metric.experimental_sparse_primary_key_encoding` | Bool | `false` | Whether to enable the experimental sparse primary key encoding. |
| `logging` | -- | -- | The logging options. |
| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
| `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
| `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
@@ -222,7 +222,7 @@
| `heartbeat.retry_interval` | String | `3s` | Interval for retrying to send heartbeat messages to the metasrv. |
| `http` | -- | -- | The HTTP server options. |
| `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
| `http.timeout` | String | `30s` | HTTP request timeout. Set to 0 to disable timeout. |
| `http.timeout` | String | `0s` | HTTP request timeout. Set to 0 to disable timeout. |
| `http.body_limit` | String | `64MB` | HTTP request body limit.<br/>The following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.<br/>Set to 0 to disable limit. |
| `http.enable_cors` | Bool | `true` | HTTP CORS support, it's turned on by default<br/>This allows browser to access http APIs without CORS restrictions |
| `http.cors_allowed_origins` | Array | Unset | Customize allowed origins for HTTP CORS. |
@@ -279,7 +279,7 @@
| `datanode.client.connect_timeout` | String | `10s` | -- |
| `datanode.client.tcp_nodelay` | Bool | `true` | -- |
| `logging` | -- | -- | The logging options. |
| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
| `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
| `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
@@ -308,7 +308,7 @@
| Key | Type | Default | Descriptions |
| --- | -----| ------- | ----------- |
| `data_home` | String | `/tmp/metasrv/` | The working home directory. |
| `data_home` | String | `./greptimedb_data/metasrv/` | The working home directory. |
| `bind_addr` | String | `127.0.0.1:3002` | The bind address of metasrv. |
| `server_addr` | String | `127.0.0.1:3002` | The communication server address for the frontend and datanode to connect to metasrv.<br/>If left empty or unset, the server will automatically use the IP address of the first network interface<br/>on the host, with the same port number as the one specified in `bind_addr`. |
| `store_addrs` | Array | -- | Store server address default to etcd store.<br/>For postgres store, the format is:<br/>"password=password dbname=postgres user=postgres host=localhost port=5432"<br/>For etcd store, the format is:<br/>"127.0.0.1:2379" |
@@ -328,6 +328,7 @@
| `procedure.max_retry_times` | Integer | `12` | Procedure max retry time. |
| `procedure.retry_delay` | String | `500ms` | Initial retry delay of procedures, increases exponentially |
| `procedure.max_metadata_value_size` | String | `1500KiB` | Auto split large value<br/>GreptimeDB procedure uses etcd as the default metadata storage backend.<br/>The etcd the maximum size of any request is 1.5 MiB<br/>1500KiB = 1536KiB (1.5MiB) - 36KiB (reserved size of key)<br/>Comments out the `max_metadata_value_size`, for don't split large value (no limit). |
| `procedure.max_running_procedures` | Integer | `128` | Max running procedures.<br/>The maximum number of procedures that can be running at the same time.<br/>If the number of running procedures exceeds this limit, the procedure will be rejected. |
| `failure_detector` | -- | -- | -- |
| `failure_detector.threshold` | Float | `8.0` | The threshold value used by the failure detector to determine failure conditions. |
| `failure_detector.min_std_deviation` | String | `100ms` | The minimum standard deviation of the heartbeat intervals, used to calculate acceptable variations. |
@@ -352,7 +353,7 @@
| `wal.backoff_base` | Integer | `2` | Exponential backoff rate, i.e. next backoff = base * current backoff. |
| `wal.backoff_deadline` | String | `5mins` | Stop reconnecting if the total wait time reaches the deadline. If this config is missing, the reconnecting won't terminate. |
| `logging` | -- | -- | The logging options. |
| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
| `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
| `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
@@ -381,7 +382,6 @@
| Key | Type | Default | Descriptions |
| --- | -----| ------- | ----------- |
| `mode` | String | `standalone` | The running mode of the datanode. It can be `standalone` or `distributed`. |
| `node_id` | Integer | Unset | The datanode identifier and should be unique in the cluster. |
| `require_lease_before_startup` | Bool | `false` | Start services after regions have obtained leases.<br/>It will block the datanode start if it can't receive leases in the heartbeat from metasrv. |
| `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.<br/>By default, it provides services after all regions have been initialized. |
@@ -390,7 +390,7 @@
| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. Enabled by default. |
| `http` | -- | -- | The HTTP server options. |
| `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
| `http.timeout` | String | `30s` | HTTP request timeout. Set to 0 to disable timeout. |
| `http.timeout` | String | `0s` | HTTP request timeout. Set to 0 to disable timeout. |
| `http.body_limit` | String | `64MB` | HTTP request body limit.<br/>The following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.<br/>Set to 0 to disable limit. |
| `grpc` | -- | -- | The gRPC server options. |
| `grpc.bind_addr` | String | `127.0.0.1:3001` | The address to bind the gRPC server. |
@@ -442,7 +442,7 @@
| `wal.dump_index_interval` | String | `60s` | The interval for dumping WAL indexes.<br/>**It's only used when the provider is `kafka`**. |
| `wal.overwrite_entry_start_id` | Bool | `false` | Ignore missing entries during read WAL.<br/>**It's only used when the provider is `kafka`**.<br/><br/>This option ensures that when Kafka messages are deleted, the system<br/>can still successfully replay memtable data without throwing an<br/>out-of-range error.<br/>However, enabling this option might lead to unexpected data loss,<br/>as the system will skip over missing entries instead of treating<br/>them as critical errors. |
| `storage` | -- | -- | The data storage options. |
| `storage.data_home` | String | `/tmp/greptimedb/` | The working home directory. |
| `storage.data_home` | String | `./greptimedb_data/` | The working home directory. |
| `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
| `storage.cache_path` | String | Unset | Read cache configuration for object storage such as 'S3' etc, it's configured by default when using object storage. It is recommended to configure it when using object storage for better performance.<br/>A local file directory, defaults to `{data_home}`. An empty string means disabling. |
| `storage.cache_capacity` | String | Unset | The local file cache capacity in bytes. If your disk space is sufficient, it is recommended to set it larger. |
@@ -522,7 +522,7 @@
| `region_engine.metric` | -- | -- | Metric engine options. |
| `region_engine.metric.experimental_sparse_primary_key_encoding` | Bool | `false` | Whether to enable the experimental sparse primary key encoding. |
| `logging` | -- | -- | The logging options. |
| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
| `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
| `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
@@ -551,7 +551,6 @@
| Key | Type | Default | Descriptions |
| --- | -----| ------- | ----------- |
| `mode` | String | `distributed` | The running mode of the flownode. It can be `standalone` or `distributed`. |
| `node_id` | Integer | Unset | The flownode identifier and should be unique in the cluster. |
| `flow` | -- | -- | flow engine options. |
| `flow.num_workers` | Integer | `0` | The number of flow worker in flownode.<br/>Not setting(or set to 0) this value will use the number of CPU cores divided by 2. |
@@ -563,7 +562,7 @@
| `grpc.max_send_message_size` | String | `512MB` | The maximum send message size for gRPC server. |
| `http` | -- | -- | The HTTP server options. |
| `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
| `http.timeout` | String | `30s` | HTTP request timeout. Set to 0 to disable timeout. |
| `http.timeout` | String | `0s` | HTTP request timeout. Set to 0 to disable timeout. |
| `http.body_limit` | String | `64MB` | HTTP request body limit.<br/>The following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.<br/>Set to 0 to disable limit. |
| `meta_client` | -- | -- | The metasrv client options. |
| `meta_client.metasrv_addrs` | Array | -- | The addresses of the metasrv. |
@@ -579,7 +578,7 @@
| `heartbeat.interval` | String | `3s` | Interval for sending heartbeat messages to the metasrv. |
| `heartbeat.retry_interval` | String | `3s` | Interval for retrying to send heartbeat messages to the metasrv. |
| `logging` | -- | -- | The logging options. |
| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
| `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
| `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |

View File

@@ -1,6 +1,3 @@
## The running mode of the datanode. It can be `standalone` or `distributed`.
mode = "standalone"
## The datanode identifier and should be unique in the cluster.
## @toml2docs:none-default
node_id = 42
@@ -27,7 +24,7 @@ max_concurrent_queries = 0
## The address to bind the HTTP server.
addr = "127.0.0.1:4000"
## HTTP request timeout. Set to 0 to disable timeout.
timeout = "30s"
timeout = "0s"
## HTTP request body limit.
## The following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.
## Set to 0 to disable limit.
@@ -119,7 +116,7 @@ provider = "raft_engine"
## The directory to store the WAL files.
## **It's only used when the provider is `raft_engine`**.
## @toml2docs:none-default
dir = "/tmp/greptimedb/wal"
dir = "./greptimedb_data/wal"
## The size of the WAL segment file.
## **It's only used when the provider is `raft_engine`**.
@@ -231,6 +228,7 @@ overwrite_entry_start_id = false
# secret_access_key = "123456"
# endpoint = "https://s3.amazonaws.com"
# region = "us-west-2"
# enable_virtual_host_style = false
# Example of using Oss as the storage.
# [storage]
@@ -264,7 +262,7 @@ overwrite_entry_start_id = false
## The data storage options.
[storage]
## The working home directory.
data_home = "/tmp/greptimedb/"
data_home = "./greptimedb_data/"
## The storage type used to store the data.
## - `File`: the data is stored in the local file system.
@@ -617,7 +615,7 @@ experimental_sparse_primary_key_encoding = false
## The logging options.
[logging]
## The directory to store the log files. If set to empty, logs will not be written to files.
dir = "/tmp/greptimedb/logs"
dir = "./greptimedb_data/logs"
## The log level. Can be `info`/`debug`/`warn`/`error`.
## @toml2docs:none-default

View File

@@ -1,6 +1,3 @@
## The running mode of the flownode. It can be `standalone` or `distributed`.
mode = "distributed"
## The flownode identifier and should be unique in the cluster.
## @toml2docs:none-default
node_id = 14
@@ -30,7 +27,7 @@ max_send_message_size = "512MB"
## The address to bind the HTTP server.
addr = "127.0.0.1:4000"
## HTTP request timeout. Set to 0 to disable timeout.
timeout = "30s"
timeout = "0s"
## HTTP request body limit.
## The following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.
## Set to 0 to disable limit.
@@ -76,7 +73,7 @@ retry_interval = "3s"
## The logging options.
[logging]
## The directory to store the log files. If set to empty, logs will not be written to files.
dir = "/tmp/greptimedb/logs"
dir = "./greptimedb_data/logs"
## The log level. Can be `info`/`debug`/`warn`/`error`.
## @toml2docs:none-default
@@ -121,4 +118,3 @@ sample_ratio = 1.0
## The tokio console address.
## @toml2docs:none-default
#+ tokio_console_addr = "127.0.0.1"

View File

@@ -26,7 +26,7 @@ retry_interval = "3s"
## The address to bind the HTTP server.
addr = "127.0.0.1:4000"
## HTTP request timeout. Set to 0 to disable timeout.
timeout = "30s"
timeout = "0s"
## HTTP request body limit.
## The following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.
## Set to 0 to disable limit.
@@ -189,7 +189,7 @@ tcp_nodelay = true
## The logging options.
[logging]
## The directory to store the log files. If set to empty, logs will not be written to files.
dir = "/tmp/greptimedb/logs"
dir = "./greptimedb_data/logs"
## The log level. Can be `info`/`debug`/`warn`/`error`.
## @toml2docs:none-default

View File

@@ -1,5 +1,5 @@
## The working home directory.
data_home = "/tmp/metasrv/"
data_home = "./greptimedb_data/metasrv/"
## The bind address of metasrv.
bind_addr = "127.0.0.1:3002"
@@ -79,6 +79,11 @@ retry_delay = "500ms"
## Comments out the `max_metadata_value_size`, for don't split large value (no limit).
max_metadata_value_size = "1500KiB"
## Max running procedures.
## The maximum number of procedures that can be running at the same time.
## If the number of running procedures exceeds this limit, the procedure will be rejected.
max_running_procedures = 128
# Failure detectors options.
[failure_detector]
@@ -177,7 +182,7 @@ backoff_deadline = "5mins"
## The logging options.
[logging]
## The directory to store the log files. If set to empty, logs will not be written to files.
dir = "/tmp/greptimedb/logs"
dir = "./greptimedb_data/logs"
## The log level. Can be `info`/`debug`/`warn`/`error`.
## @toml2docs:none-default

View File

@@ -1,6 +1,3 @@
## The running mode of the datanode. It can be `standalone` or `distributed`.
mode = "standalone"
## The default timezone of the server.
## @toml2docs:none-default
default_timezone = "UTC"
@@ -34,7 +31,7 @@ max_concurrent_queries = 0
## The address to bind the HTTP server.
addr = "127.0.0.1:4000"
## HTTP request timeout. Set to 0 to disable timeout.
timeout = "30s"
timeout = "0s"
## HTTP request body limit.
## The following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.
## Set to 0 to disable limit.
@@ -164,7 +161,7 @@ provider = "raft_engine"
## The directory to store the WAL files.
## **It's only used when the provider is `raft_engine`**.
## @toml2docs:none-default
dir = "/tmp/greptimedb/wal"
dir = "./greptimedb_data/wal"
## The size of the WAL segment file.
## **It's only used when the provider is `raft_engine`**.
@@ -302,6 +299,10 @@ purge_interval = "1m"
max_retry_times = 3
## Initial retry delay of procedures, increases exponentially
retry_delay = "500ms"
## Max running procedures.
## The maximum number of procedures that can be running at the same time.
## If the number of running procedures exceeds this limit, the procedure will be rejected.
max_running_procedures = 128
## flow engine options.
[flow]
@@ -318,6 +319,7 @@ retry_delay = "500ms"
# secret_access_key = "123456"
# endpoint = "https://s3.amazonaws.com"
# region = "us-west-2"
# enable_virtual_host_style = false
# Example of using Oss as the storage.
# [storage]
@@ -351,7 +353,7 @@ retry_delay = "500ms"
## The data storage options.
[storage]
## The working home directory.
data_home = "/tmp/greptimedb/"
data_home = "./greptimedb_data/"
## The storage type used to store the data.
## - `File`: the data is stored in the local file system.
@@ -704,7 +706,7 @@ experimental_sparse_primary_key_encoding = false
## The logging options.
[logging]
## The directory to store the log files. If set to empty, logs will not be written to files.
dir = "/tmp/greptimedb/logs"
dir = "./greptimedb_data/logs"
## The log level. Can be `info`/`debug`/`warn`/`error`.
## @toml2docs:none-default

View File

@@ -25,7 +25,7 @@ services:
- --initial-cluster-state=new
- *etcd_initial_cluster_token
volumes:
- /tmp/greptimedb-cluster-docker-compose/etcd0:/var/lib/etcd
- ./greptimedb-cluster-docker-compose/etcd0:/var/lib/etcd
healthcheck:
test: [ "CMD", "etcdctl", "--endpoints=http://etcd0:2379", "endpoint", "health" ]
interval: 5s
@@ -68,12 +68,13 @@ services:
- datanode
- start
- --node-id=0
- --data-home=/greptimedb_data
- --rpc-bind-addr=0.0.0.0:3001
- --rpc-server-addr=datanode0:3001
- --metasrv-addrs=metasrv:3002
- --http-addr=0.0.0.0:5000
volumes:
- /tmp/greptimedb-cluster-docker-compose/datanode0:/tmp/greptimedb
- ./greptimedb-cluster-docker-compose/datanode0:/greptimedb_data
healthcheck:
test: [ "CMD", "curl", "-fv", "http://datanode0:5000/health" ]
interval: 5s

View File

@@ -3,7 +3,7 @@
This document introduces how to write fuzz tests in GreptimeDB.
## What is a fuzz test
Fuzz test is tool that leverage deterministic random generation to assist in finding bugs. The goal of fuzz tests is to identify inputs generated by the fuzzer that cause system panics, crashes, or unexpected behaviors to occur. And we are using the [cargo-fuzz](https://github.com/rust-fuzz/cargo-fuzz) to run our fuzz test targets.
Fuzz test is tool that leverage deterministic random generation to assist in finding bugs. The goal of fuzz tests is to identify inputs generated by the fuzzer that cause system panics, crashes, or unexpected behaviors to occur. And we are using the [cargo-fuzz](https://github.com/rust-fuzz/cargo-fuzz) to run our fuzz test targets.
## Why we need them
- Find bugs by leveraging random generation
@@ -13,7 +13,7 @@ Fuzz test is tool that leverage deterministic random generation to assist in fin
All fuzz test-related resources are located in the `/tests-fuzz` directory.
There are two types of resources: (1) fundamental components and (2) test targets.
### Fundamental components
### Fundamental components
They are located in the `/tests-fuzz/src` directory. The fundamental components define how to generate SQLs (including dialects for different protocols) and validate execution results (e.g., column attribute validation), etc.
### Test targets
@@ -21,25 +21,25 @@ They are located in the `/tests-fuzz/targets` directory, with each file represen
Figure 1 illustrates the fundamental components of the fuzz test provide the ability to generate random SQLs. It utilizes a Random Number Generator (Rng) to generate the Intermediate Representation (IR), then employs a DialectTranslator to produce specified dialects for different protocols. Finally, the fuzz tests send the generated SQL via the specified protocol and verify that the execution results meet expectations.
```
Rng
|
|
v
ExprGenerator
|
|
v
Intermediate representation (IR)
|
|
+----------------------+----------------------+
| | |
v v v
Rng
|
|
v
ExprGenerator
|
|
v
Intermediate representation (IR)
|
|
+----------------------+----------------------+
| | |
v v v
MySQLTranslator PostgreSQLTranslator OtherDialectTranslator
| | |
| | |
v v v
SQL(MySQL Dialect) ..... .....
| | |
| | |
v v v
SQL(MySQL Dialect) ..... .....
|
|
v
@@ -133,4 +133,4 @@ fuzz_target!(|input: FuzzInput| {
cargo fuzz run <fuzz-target> --fuzz-dir tests-fuzz
```
For more details, please refer to this [document](/tests-fuzz/README.md).
For more details, please refer to this [document](/tests-fuzz/README.md).

View File

@@ -0,0 +1,77 @@
---
Feature Name: Remote WAL Purge
Tracking Issue: https://github.com/GreptimeTeam/greptimedb/issues/5474
Date: 2025-02-06
Author: "Yuhan Wang <profsyb@gmail.com>"
---
# Summary
This RFC proposes a method for purging remote WAL in the database.
# Motivation
Currently only local wal entries are purged when flushing, while remote wal does nothing.
# Details
```mermaid
sequenceDiagram
Region0->>Kafka: Last entry id of the topic in use
Region0->>WALPruner: Heartbeat with last entry id
WALPruner->>+WALPruner: Time Loop
WALPruner->>+ProcedureManager: Submit purge procedure
ProcedureManager->>Region0: Flush request
ProcedureManager->>Kafka: Prune WAL entries
Region0->>Region0: Flush
```
## Steps
### Before purge
Before purging remote WAL, metasrv needs to know:
1. `last_entry_id` of each region.
2. `kafka_topic_last_entry_id` which is the last entry id of the topic in use. Can be lazily updated and needed when region has empty memtable.
3. Kafka topics that each region uses.
The states are maintained through:
1. Heartbeat: Datanode sends `last_entry_id` to metasrv in heartbeat. As for regions with empty memtable, `last_entry_id` should equals to `kafka_topic_last_entry_id`.
2. Metasrv maintains a topic-region map to know which region uses which topic.
`kafka_topic_last_entry_id` will be maintained by the region itself. Region will update the value after `k` heartbeats if the memtable is empty.
### Purge procedure
We can better handle locks utilizing current procedure. It's quite similar to the region migration procedure.
After a period of time, metasrv will submit a purge procedure to ProcedureManager. The purge will apply to all topics.
The procedure is divided into following stages:
1. Preparation:
- Retrieve `last_entry_id` of each region kvbackend.
- Choose regions that have a relatively small `last_entry_id` as candidate regions, which means we need to send a flush request to these regions.
2. Communication:
- Send flush requests to candidate regions.
3. Purge:
- Choose proper entry id to delete for each topic. The entry should be the smallest `last_entry_id - 1` among all regions.
- Delete legacy entries in Kafka.
- Store the `last_purged_entry_id` in kvbackend. It should be locked to prevent other regions from replaying the purged entries.
### After purge
After purge, there may be some regions that have `last_entry_id` smaller than the entry we just deleted. It's legal since we only delete the entries that are not needed anymore.
When restarting a region, it should query the `last_purged_entry_id` from metasrv and replay from `min(last_entry_id, last_purged_entry_id)`.
### Error handling
No persisted states are needed since all states are maintained in kvbackend.
Retry when failed to retrieving metadata from kvbackend.
# Alternatives
Purge time can depend on the size of the WAL entries instead of a fixed period of time, which may be more efficient.

19
grafana/check.sh Executable file
View File

@@ -0,0 +1,19 @@
#!/usr/bin/env bash
BASEDIR=$(dirname "$0")
# Use jq to check for panels with empty or missing descriptions
invalid_panels=$(cat $BASEDIR/greptimedb-cluster.json | jq -r '
.panels[]
| select((.type == "stats" or .type == "timeseries") and (.description == "" or .description == null))
')
# Check if any invalid panels were found
if [[ -n "$invalid_panels" ]]; then
echo "Error: The following panels have empty or missing descriptions:"
echo "$invalid_panels"
exit 1
else
echo "All panels with type 'stats' or 'timeseries' have valid descriptions."
exit 0
fi

File diff suppressed because it is too large Load Diff

11
grafana/summary.sh Executable file
View File

@@ -0,0 +1,11 @@
#!/usr/bin/env bash
BASEDIR=$(dirname "$0")
echo '| Title | Description | Expressions |
|---|---|---|'
cat $BASEDIR/greptimedb-cluster.json | jq -r '
.panels |
map(select(.type == "stat" or .type == "timeseries")) |
.[] | "| \(.title) | \(.description | gsub("\n"; "<br>")) | \(.targets | map(.expr // .rawSql | "`\(.|gsub("\n"; "<br>"))`") | join("<br>")) |"
'

View File

@@ -19,9 +19,7 @@ use common_decimal::decimal128::{DECIMAL128_DEFAULT_SCALE, DECIMAL128_MAX_PRECIS
use common_decimal::Decimal128;
use common_time::time::Time;
use common_time::timestamp::TimeUnit;
use common_time::{
Date, DateTime, IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth, Timestamp,
};
use common_time::{Date, IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth, Timestamp};
use datatypes::prelude::{ConcreteDataType, ValueRef};
use datatypes::scalars::ScalarVector;
use datatypes::types::{
@@ -29,8 +27,8 @@ use datatypes::types::{
};
use datatypes::value::{OrderedF32, OrderedF64, Value};
use datatypes::vectors::{
BinaryVector, BooleanVector, DateTimeVector, DateVector, Decimal128Vector, Float32Vector,
Float64Vector, Int32Vector, Int64Vector, IntervalDayTimeVector, IntervalMonthDayNanoVector,
BinaryVector, BooleanVector, DateVector, Decimal128Vector, Float32Vector, Float64Vector,
Int32Vector, Int64Vector, IntervalDayTimeVector, IntervalMonthDayNanoVector,
IntervalYearMonthVector, PrimitiveVector, StringVector, TimeMicrosecondVector,
TimeMillisecondVector, TimeNanosecondVector, TimeSecondVector, TimestampMicrosecondVector,
TimestampMillisecondVector, TimestampNanosecondVector, TimestampSecondVector, UInt32Vector,
@@ -118,7 +116,7 @@ impl From<ColumnDataTypeWrapper> for ConcreteDataType {
ColumnDataType::Json => ConcreteDataType::json_datatype(),
ColumnDataType::String => ConcreteDataType::string_datatype(),
ColumnDataType::Date => ConcreteDataType::date_datatype(),
ColumnDataType::Datetime => ConcreteDataType::datetime_datatype(),
ColumnDataType::Datetime => ConcreteDataType::timestamp_microsecond_datatype(),
ColumnDataType::TimestampSecond => ConcreteDataType::timestamp_second_datatype(),
ColumnDataType::TimestampMillisecond => {
ConcreteDataType::timestamp_millisecond_datatype()
@@ -271,7 +269,6 @@ impl TryFrom<ConcreteDataType> for ColumnDataTypeWrapper {
ConcreteDataType::Binary(_) => ColumnDataType::Binary,
ConcreteDataType::String(_) => ColumnDataType::String,
ConcreteDataType::Date(_) => ColumnDataType::Date,
ConcreteDataType::DateTime(_) => ColumnDataType::Datetime,
ConcreteDataType::Timestamp(t) => match t {
TimestampType::Second(_) => ColumnDataType::TimestampSecond,
TimestampType::Millisecond(_) => ColumnDataType::TimestampMillisecond,
@@ -476,7 +473,6 @@ pub fn push_vals(column: &mut Column, origin_count: usize, vector: VectorRef) {
Value::String(val) => values.string_values.push(val.as_utf8().to_string()),
Value::Binary(val) => values.binary_values.push(val.to_vec()),
Value::Date(val) => values.date_values.push(val.val()),
Value::DateTime(val) => values.datetime_values.push(val.val()),
Value::Timestamp(val) => match val.unit() {
TimeUnit::Second => values.timestamp_second_values.push(val.value()),
TimeUnit::Millisecond => values.timestamp_millisecond_values.push(val.value()),
@@ -577,12 +573,11 @@ pub fn pb_value_to_value_ref<'a>(
ValueData::BinaryValue(bytes) => ValueRef::Binary(bytes.as_slice()),
ValueData::StringValue(string) => ValueRef::String(string.as_str()),
ValueData::DateValue(d) => ValueRef::Date(Date::from(*d)),
ValueData::DatetimeValue(d) => ValueRef::DateTime(DateTime::new(*d)),
ValueData::TimestampSecondValue(t) => ValueRef::Timestamp(Timestamp::new_second(*t)),
ValueData::TimestampMillisecondValue(t) => {
ValueRef::Timestamp(Timestamp::new_millisecond(*t))
}
ValueData::TimestampMicrosecondValue(t) => {
ValueData::DatetimeValue(t) | ValueData::TimestampMicrosecondValue(t) => {
ValueRef::Timestamp(Timestamp::new_microsecond(*t))
}
ValueData::TimestampNanosecondValue(t) => {
@@ -651,7 +646,6 @@ pub fn pb_values_to_vector_ref(data_type: &ConcreteDataType, values: Values) ->
ConcreteDataType::Binary(_) => Arc::new(BinaryVector::from(values.binary_values)),
ConcreteDataType::String(_) => Arc::new(StringVector::from_vec(values.string_values)),
ConcreteDataType::Date(_) => Arc::new(DateVector::from_vec(values.date_values)),
ConcreteDataType::DateTime(_) => Arc::new(DateTimeVector::from_vec(values.datetime_values)),
ConcreteDataType::Timestamp(unit) => match unit {
TimestampType::Second(_) => Arc::new(TimestampSecondVector::from_vec(
values.timestamp_second_values,
@@ -787,11 +781,6 @@ pub fn pb_values_to_values(data_type: &ConcreteDataType, values: Values) -> Vec<
.into_iter()
.map(|val| val.into())
.collect(),
ConcreteDataType::DateTime(_) => values
.datetime_values
.into_iter()
.map(|v| Value::DateTime(v.into()))
.collect(),
ConcreteDataType::Date(_) => values
.date_values
.into_iter()
@@ -947,9 +936,6 @@ pub fn to_proto_value(value: Value) -> Option<v1::Value> {
Value::Date(v) => v1::Value {
value_data: Some(ValueData::DateValue(v.val())),
},
Value::DateTime(v) => v1::Value {
value_data: Some(ValueData::DatetimeValue(v.val())),
},
Value::Timestamp(v) => match v.unit() {
TimeUnit::Second => v1::Value {
value_data: Some(ValueData::TimestampSecondValue(v.value())),
@@ -1066,7 +1052,6 @@ pub fn value_to_grpc_value(value: Value) -> GrpcValue {
Value::String(v) => Some(ValueData::StringValue(v.as_utf8().to_string())),
Value::Binary(v) => Some(ValueData::BinaryValue(v.to_vec())),
Value::Date(v) => Some(ValueData::DateValue(v.val())),
Value::DateTime(v) => Some(ValueData::DatetimeValue(v.val())),
Value::Timestamp(v) => Some(match v.unit() {
TimeUnit::Second => ValueData::TimestampSecondValue(v.value()),
TimeUnit::Millisecond => ValueData::TimestampMillisecondValue(v.value()),
@@ -1248,7 +1233,7 @@ mod tests {
ColumnDataTypeWrapper::date_datatype().into()
);
assert_eq!(
ConcreteDataType::datetime_datatype(),
ConcreteDataType::timestamp_microsecond_datatype(),
ColumnDataTypeWrapper::datetime_datatype().into()
);
assert_eq!(
@@ -1339,10 +1324,6 @@ mod tests {
ColumnDataTypeWrapper::date_datatype(),
ConcreteDataType::date_datatype().try_into().unwrap()
);
assert_eq!(
ColumnDataTypeWrapper::datetime_datatype(),
ConcreteDataType::datetime_datatype().try_into().unwrap()
);
assert_eq!(
ColumnDataTypeWrapper::timestamp_millisecond_datatype(),
ConcreteDataType::timestamp_millisecond_datatype()
@@ -1830,17 +1811,6 @@ mod tests {
]
);
test_convert_values!(
datetime,
vec![1.into(), 2.into(), 3.into()],
datetime,
vec![
Value::DateTime(1.into()),
Value::DateTime(2.into()),
Value::DateTime(3.into())
]
);
#[test]
fn test_vectors_to_rows_for_different_types() {
let boolean_vec = BooleanVector::from_vec(vec![true, false, true]);

View File

@@ -15,10 +15,13 @@
use std::collections::HashMap;
use datatypes::schema::{
ColumnDefaultConstraint, ColumnSchema, FulltextAnalyzer, FulltextOptions, SkippingIndexOptions,
SkippingIndexType, COMMENT_KEY, FULLTEXT_KEY, INVERTED_INDEX_KEY, SKIPPING_INDEX_KEY,
ColumnDefaultConstraint, ColumnSchema, FulltextAnalyzer, FulltextBackend, FulltextOptions,
SkippingIndexOptions, SkippingIndexType, COMMENT_KEY, FULLTEXT_KEY, INVERTED_INDEX_KEY,
SKIPPING_INDEX_KEY,
};
use greptime_proto::v1::{
Analyzer, FulltextBackend as PbFulltextBackend, SkippingIndexType as PbSkippingIndexType,
};
use greptime_proto::v1::{Analyzer, SkippingIndexType as PbSkippingIndexType};
use snafu::ResultExt;
use crate::error::{self, Result};
@@ -132,14 +135,31 @@ pub fn options_from_skipping(skipping: &SkippingIndexOptions) -> Result<Option<C
Ok((!options.options.is_empty()).then_some(options))
}
/// Tries to construct a `ColumnOptions` for inverted index.
pub fn options_from_inverted() -> ColumnOptions {
let mut options = ColumnOptions::default();
options
.options
.insert(INVERTED_INDEX_GRPC_KEY.to_string(), "true".to_string());
options
}
/// Tries to construct a `FulltextAnalyzer` from the given analyzer.
pub fn as_fulltext_option(analyzer: Analyzer) -> FulltextAnalyzer {
pub fn as_fulltext_option_analyzer(analyzer: Analyzer) -> FulltextAnalyzer {
match analyzer {
Analyzer::English => FulltextAnalyzer::English,
Analyzer::Chinese => FulltextAnalyzer::Chinese,
}
}
/// Tries to construct a `FulltextBackend` from the given backend.
pub fn as_fulltext_option_backend(backend: PbFulltextBackend) -> FulltextBackend {
match backend {
PbFulltextBackend::Bloom => FulltextBackend::Bloom,
PbFulltextBackend::Tantivy => FulltextBackend::Tantivy,
}
}
/// Tries to construct a `SkippingIndexType` from the given skipping index type.
pub fn as_skipping_index_type(skipping_index_type: PbSkippingIndexType) -> SkippingIndexType {
match skipping_index_type {
@@ -151,7 +171,7 @@ pub fn as_skipping_index_type(skipping_index_type: PbSkippingIndexType) -> Skipp
mod tests {
use datatypes::data_type::ConcreteDataType;
use datatypes::schema::FulltextAnalyzer;
use datatypes::schema::{FulltextAnalyzer, FulltextBackend};
use super::*;
use crate::v1::ColumnDataType;
@@ -210,13 +230,14 @@ mod tests {
enable: true,
analyzer: FulltextAnalyzer::English,
case_sensitive: false,
backend: FulltextBackend::Bloom,
})
.unwrap();
schema.set_inverted_index(true);
let options = options_from_column_schema(&schema).unwrap();
assert_eq!(
options.options.get(FULLTEXT_GRPC_KEY).unwrap(),
"{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false}"
"{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\"}"
);
assert_eq!(
options.options.get(INVERTED_INDEX_GRPC_KEY).unwrap(),
@@ -230,11 +251,12 @@ mod tests {
enable: true,
analyzer: FulltextAnalyzer::English,
case_sensitive: false,
backend: FulltextBackend::Bloom,
};
let options = options_from_fulltext(&fulltext).unwrap().unwrap();
assert_eq!(
options.options.get(FULLTEXT_GRPC_KEY).unwrap(),
"{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false}"
"{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\"}"
);
}

View File

@@ -77,7 +77,7 @@ trait SystemSchemaProviderInner {
fn system_table(&self, name: &str) -> Option<SystemTableRef>;
fn table_info(catalog_name: String, table: &SystemTableRef) -> TableInfoRef {
let table_meta = TableMetaBuilder::default()
let table_meta = TableMetaBuilder::empty()
.schema(table.schema())
.primary_key_indices(vec![])
.next_column_id(0)

View File

@@ -19,7 +19,7 @@ mod information_memory_table;
pub mod key_column_usage;
mod partitions;
mod procedure_info;
mod region_peers;
pub mod region_peers;
mod region_statistics;
mod runtime_metrics;
pub mod schemata;

View File

@@ -56,6 +56,8 @@ pub const TABLE_CATALOG: &str = "table_catalog";
pub const TABLE_SCHEMA: &str = "table_schema";
pub const TABLE_NAME: &str = "table_name";
pub const COLUMN_NAME: &str = "column_name";
pub const REGION_ID: &str = "region_id";
pub const PEER_ID: &str = "peer_id";
const ORDINAL_POSITION: &str = "ordinal_position";
const CHARACTER_MAXIMUM_LENGTH: &str = "character_maximum_length";
const CHARACTER_OCTET_LENGTH: &str = "character_octet_length";
@@ -365,10 +367,6 @@ impl InformationSchemaColumnsBuilder {
self.numeric_scales.push(None);
match &column_schema.data_type {
ConcreteDataType::DateTime(datetime_type) => {
self.datetime_precisions
.push(Some(datetime_type.precision() as i64));
}
ConcreteDataType::Timestamp(ts_type) => {
self.datetime_precisions
.push(Some(ts_type.precision() as i64));

View File

@@ -28,16 +28,19 @@ use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
use datatypes::prelude::ConcreteDataType as CDT;
use datatypes::scalars::ScalarVectorBuilder;
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::timestamp::TimestampMillisecond;
use datatypes::value::Value;
use datatypes::vectors::{
Int64VectorBuilder, StringVectorBuilder, UInt32VectorBuilder, UInt64VectorBuilder, VectorRef,
Int64VectorBuilder, StringVectorBuilder, TimestampMillisecondVectorBuilder,
UInt32VectorBuilder, UInt64VectorBuilder, VectorRef,
};
use futures::TryStreamExt;
use snafu::{OptionExt, ResultExt};
use store_api::storage::{ScanRequest, TableId};
use crate::error::{
CreateRecordBatchSnafu, FlowInfoNotFoundSnafu, InternalSnafu, JsonSnafu, ListFlowsSnafu, Result,
CreateRecordBatchSnafu, FlowInfoNotFoundSnafu, InternalSnafu, JsonSnafu, ListFlowsSnafu,
Result, UpgradeWeakCatalogManagerRefSnafu,
};
use crate::information_schema::{Predicates, FLOWS};
use crate::system_schema::information_schema::InformationTable;
@@ -59,6 +62,10 @@ pub const SOURCE_TABLE_IDS: &str = "source_table_ids";
pub const SINK_TABLE_NAME: &str = "sink_table_name";
pub const FLOWNODE_IDS: &str = "flownode_ids";
pub const OPTIONS: &str = "options";
pub const CREATED_TIME: &str = "created_time";
pub const UPDATED_TIME: &str = "updated_time";
pub const LAST_EXECUTION_TIME: &str = "last_execution_time";
pub const SOURCE_TABLE_NAMES: &str = "source_table_names";
/// The `information_schema.flows` to provides information about flows in databases.
#[derive(Debug)]
@@ -99,6 +106,14 @@ impl InformationSchemaFlows {
(SINK_TABLE_NAME, CDT::string_datatype(), false),
(FLOWNODE_IDS, CDT::string_datatype(), true),
(OPTIONS, CDT::string_datatype(), true),
(CREATED_TIME, CDT::timestamp_millisecond_datatype(), false),
(UPDATED_TIME, CDT::timestamp_millisecond_datatype(), false),
(
LAST_EXECUTION_TIME,
CDT::timestamp_millisecond_datatype(),
true,
),
(SOURCE_TABLE_NAMES, CDT::string_datatype(), true),
]
.into_iter()
.map(|(name, ty, nullable)| ColumnSchema::new(name, ty, nullable))
@@ -170,6 +185,10 @@ struct InformationSchemaFlowsBuilder {
sink_table_names: StringVectorBuilder,
flownode_id_groups: StringVectorBuilder,
option_groups: StringVectorBuilder,
created_time: TimestampMillisecondVectorBuilder,
updated_time: TimestampMillisecondVectorBuilder,
last_execution_time: TimestampMillisecondVectorBuilder,
source_table_names: StringVectorBuilder,
}
impl InformationSchemaFlowsBuilder {
@@ -196,6 +215,10 @@ impl InformationSchemaFlowsBuilder {
sink_table_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
flownode_id_groups: StringVectorBuilder::with_capacity(INIT_CAPACITY),
option_groups: StringVectorBuilder::with_capacity(INIT_CAPACITY),
created_time: TimestampMillisecondVectorBuilder::with_capacity(INIT_CAPACITY),
updated_time: TimestampMillisecondVectorBuilder::with_capacity(INIT_CAPACITY),
last_execution_time: TimestampMillisecondVectorBuilder::with_capacity(INIT_CAPACITY),
source_table_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
}
}
@@ -235,13 +258,14 @@ impl InformationSchemaFlowsBuilder {
catalog_name: catalog_name.to_string(),
flow_name: flow_name.to_string(),
})?;
self.add_flow(&predicates, flow_id.flow_id(), flow_info, &flow_stat)?;
self.add_flow(&predicates, flow_id.flow_id(), flow_info, &flow_stat)
.await?;
}
self.finish()
}
fn add_flow(
async fn add_flow(
&mut self,
predicates: &Predicates,
flow_id: FlowId,
@@ -290,6 +314,36 @@ impl InformationSchemaFlowsBuilder {
input: format!("{:?}", flow_info.options()),
},
)?));
self.created_time
.push(Some(flow_info.created_time().timestamp_millis().into()));
self.updated_time
.push(Some(flow_info.updated_time().timestamp_millis().into()));
self.last_execution_time
.push(flow_stat.as_ref().and_then(|state| {
state
.last_exec_time_map
.get(&flow_id)
.map(|v| TimestampMillisecond::new(*v))
}));
let mut source_table_names = vec![];
let catalog_name = self.catalog_name.clone();
let catalog_manager = self
.catalog_manager
.upgrade()
.context(UpgradeWeakCatalogManagerRefSnafu)?;
for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
source_table_names.extend(
catalog_manager
.tables_by_ids(&catalog_name, &schema_name, flow_info.source_table_ids())
.await?
.into_iter()
.map(|table| table.table_info().full_table_name()),
);
}
let source_table_names = source_table_names.join(",");
self.source_table_names.push(Some(&source_table_names));
Ok(())
}
@@ -307,6 +361,10 @@ impl InformationSchemaFlowsBuilder {
Arc::new(self.sink_table_names.finish()),
Arc::new(self.flownode_id_groups.finish()),
Arc::new(self.option_groups.finish()),
Arc::new(self.created_time.finish()),
Arc::new(self.updated_time.finish()),
Arc::new(self.last_execution_time.finish()),
Arc::new(self.source_table_names.finish()),
];
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
}

View File

@@ -20,7 +20,7 @@ use datatypes::vectors::{Int64Vector, StringVector, VectorRef};
use super::table_names::*;
use crate::system_schema::utils::tables::{
bigint_column, datetime_column, string_column, string_columns,
bigint_column, string_column, string_columns, timestamp_micro_column,
};
const NO_VALUE: &str = "NO";
@@ -163,17 +163,17 @@ pub(super) fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>
string_column("EVENT_BODY"),
string_column("EVENT_DEFINITION"),
string_column("EVENT_TYPE"),
datetime_column("EXECUTE_AT"),
timestamp_micro_column("EXECUTE_AT"),
bigint_column("INTERVAL_VALUE"),
string_column("INTERVAL_FIELD"),
string_column("SQL_MODE"),
datetime_column("STARTS"),
datetime_column("ENDS"),
timestamp_micro_column("STARTS"),
timestamp_micro_column("ENDS"),
string_column("STATUS"),
string_column("ON_COMPLETION"),
datetime_column("CREATED"),
datetime_column("LAST_ALTERED"),
datetime_column("LAST_EXECUTED"),
timestamp_micro_column("CREATED"),
timestamp_micro_column("LAST_ALTERED"),
timestamp_micro_column("LAST_EXECUTED"),
string_column("EVENT_COMMENT"),
bigint_column("ORIGINATOR"),
string_column("CHARACTER_SET_CLIENT"),
@@ -204,10 +204,10 @@ pub(super) fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>
bigint_column("INITIAL_SIZE"),
bigint_column("MAXIMUM_SIZE"),
bigint_column("AUTOEXTEND_SIZE"),
datetime_column("CREATION_TIME"),
datetime_column("LAST_UPDATE_TIME"),
datetime_column("LAST_ACCESS_TIME"),
datetime_column("RECOVER_TIME"),
timestamp_micro_column("CREATION_TIME"),
timestamp_micro_column("LAST_UPDATE_TIME"),
timestamp_micro_column("LAST_ACCESS_TIME"),
timestamp_micro_column("RECOVER_TIME"),
bigint_column("TRANSACTION_COUNTER"),
string_column("VERSION"),
string_column("ROW_FORMAT"),
@@ -217,9 +217,9 @@ pub(super) fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>
bigint_column("MAX_DATA_LENGTH"),
bigint_column("INDEX_LENGTH"),
bigint_column("DATA_FREE"),
datetime_column("CREATE_TIME"),
datetime_column("UPDATE_TIME"),
datetime_column("CHECK_TIME"),
timestamp_micro_column("CREATE_TIME"),
timestamp_micro_column("UPDATE_TIME"),
timestamp_micro_column("CHECK_TIME"),
string_column("CHECKSUM"),
string_column("STATUS"),
string_column("EXTRA"),
@@ -330,8 +330,8 @@ pub(super) fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>
string_column("SQL_DATA_ACCESS"),
string_column("SQL_PATH"),
string_column("SECURITY_TYPE"),
datetime_column("CREATED"),
datetime_column("LAST_ALTERED"),
timestamp_micro_column("CREATED"),
timestamp_micro_column("LAST_ALTERED"),
string_column("SQL_MODE"),
string_column("ROUTINE_COMMENT"),
string_column("DEFINER"),
@@ -383,7 +383,7 @@ pub(super) fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>
string_column("ACTION_REFERENCE_NEW_TABLE"),
string_column("ACTION_REFERENCE_OLD_ROW"),
string_column("ACTION_REFERENCE_NEW_ROW"),
datetime_column("CREATED"),
timestamp_micro_column("CREATED"),
string_column("SQL_MODE"),
string_column("DEFINER"),
string_column("CHARACTER_SET_CLIENT"),

View File

@@ -20,17 +20,18 @@ use common_catalog::consts::INFORMATION_SCHEMA_PARTITIONS_TABLE_ID;
use common_error::ext::BoxedError;
use common_recordbatch::adapter::RecordBatchStreamAdapter;
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
use common_time::datetime::DateTime;
use datafusion::execution::TaskContext;
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::timestamp::TimestampMicrosecond;
use datatypes::value::Value;
use datatypes::vectors::{
ConstantVector, DateTimeVector, DateTimeVectorBuilder, Int64Vector, Int64VectorBuilder,
MutableVector, StringVector, StringVectorBuilder, UInt64VectorBuilder,
ConstantVector, Int64Vector, Int64VectorBuilder, MutableVector, StringVector,
StringVectorBuilder, TimestampMicrosecondVector, TimestampMicrosecondVectorBuilder,
UInt64VectorBuilder,
};
use futures::{StreamExt, TryStreamExt};
use partition::manager::PartitionInfo;
@@ -127,9 +128,21 @@ impl InformationSchemaPartitions {
ColumnSchema::new("max_data_length", ConcreteDataType::int64_datatype(), true),
ColumnSchema::new("index_length", ConcreteDataType::int64_datatype(), true),
ColumnSchema::new("data_free", ConcreteDataType::int64_datatype(), true),
ColumnSchema::new("create_time", ConcreteDataType::datetime_datatype(), true),
ColumnSchema::new("update_time", ConcreteDataType::datetime_datatype(), true),
ColumnSchema::new("check_time", ConcreteDataType::datetime_datatype(), true),
ColumnSchema::new(
"create_time",
ConcreteDataType::timestamp_microsecond_datatype(),
true,
),
ColumnSchema::new(
"update_time",
ConcreteDataType::timestamp_microsecond_datatype(),
true,
),
ColumnSchema::new(
"check_time",
ConcreteDataType::timestamp_microsecond_datatype(),
true,
),
ColumnSchema::new("checksum", ConcreteDataType::int64_datatype(), true),
ColumnSchema::new(
"partition_comment",
@@ -200,7 +213,7 @@ struct InformationSchemaPartitionsBuilder {
partition_names: StringVectorBuilder,
partition_ordinal_positions: Int64VectorBuilder,
partition_expressions: StringVectorBuilder,
create_times: DateTimeVectorBuilder,
create_times: TimestampMicrosecondVectorBuilder,
partition_ids: UInt64VectorBuilder,
}
@@ -220,7 +233,7 @@ impl InformationSchemaPartitionsBuilder {
partition_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
partition_ordinal_positions: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
partition_expressions: StringVectorBuilder::with_capacity(INIT_CAPACITY),
create_times: DateTimeVectorBuilder::with_capacity(INIT_CAPACITY),
create_times: TimestampMicrosecondVectorBuilder::with_capacity(INIT_CAPACITY),
partition_ids: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
}
}
@@ -324,7 +337,7 @@ impl InformationSchemaPartitionsBuilder {
};
self.partition_expressions.push(expressions.as_deref());
self.create_times.push(Some(DateTime::from(
self.create_times.push(Some(TimestampMicrosecond::from(
table_info.meta.created_on.timestamp_millis(),
)));
self.partition_ids.push(Some(partition.id.as_u64()));
@@ -342,8 +355,8 @@ impl InformationSchemaPartitionsBuilder {
Arc::new(Int64Vector::from(vec![None])),
rows_num,
));
let null_datetime_vector = Arc::new(ConstantVector::new(
Arc::new(DateTimeVector::from(vec![None])),
let null_timestampmicrosecond_vector = Arc::new(ConstantVector::new(
Arc::new(TimestampMicrosecondVector::from(vec![None])),
rows_num,
));
let partition_methods = Arc::new(ConstantVector::new(
@@ -373,8 +386,8 @@ impl InformationSchemaPartitionsBuilder {
null_i64_vector.clone(),
Arc::new(self.create_times.finish()),
// TODO(dennis): supports update_time
null_datetime_vector.clone(),
null_datetime_vector,
null_timestampmicrosecond_vector.clone(),
null_timestampmicrosecond_vector,
null_i64_vector,
null_string_vector.clone(),
null_string_vector.clone(),

View File

@@ -21,6 +21,7 @@ use common_error::ext::BoxedError;
use common_meta::rpc::router::RegionRoute;
use common_recordbatch::adapter::RecordBatchStreamAdapter;
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
use datafusion::common::HashMap;
use datafusion::execution::TaskContext;
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
@@ -43,16 +44,22 @@ use crate::kvbackend::KvBackendCatalogManager;
use crate::system_schema::information_schema::{InformationTable, Predicates};
use crate::CatalogManager;
const REGION_ID: &str = "region_id";
const PEER_ID: &str = "peer_id";
pub const TABLE_CATALOG: &str = "table_catalog";
pub const TABLE_SCHEMA: &str = "table_schema";
pub const TABLE_NAME: &str = "table_name";
pub const REGION_ID: &str = "region_id";
pub const PEER_ID: &str = "peer_id";
const PEER_ADDR: &str = "peer_addr";
const IS_LEADER: &str = "is_leader";
pub const IS_LEADER: &str = "is_leader";
const STATUS: &str = "status";
const DOWN_SECONDS: &str = "down_seconds";
const INIT_CAPACITY: usize = 42;
/// The `REGION_PEERS` table provides information about the region distribution and routes. Including fields:
///
/// - `table_catalog`: the table catalog name
/// - `table_schema`: the table schema name
/// - `table_name`: the table name
/// - `region_id`: the region id
/// - `peer_id`: the region storage datanode peer id
/// - `peer_addr`: the region storage datanode gRPC peer address
@@ -77,6 +84,9 @@ impl InformationSchemaRegionPeers {
pub(crate) fn schema() -> SchemaRef {
Arc::new(Schema::new(vec![
ColumnSchema::new(TABLE_CATALOG, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(TABLE_SCHEMA, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(TABLE_NAME, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(REGION_ID, ConcreteDataType::uint64_datatype(), false),
ColumnSchema::new(PEER_ID, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new(PEER_ADDR, ConcreteDataType::string_datatype(), true),
@@ -134,6 +144,9 @@ struct InformationSchemaRegionPeersBuilder {
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
table_catalogs: StringVectorBuilder,
table_schemas: StringVectorBuilder,
table_names: StringVectorBuilder,
region_ids: UInt64VectorBuilder,
peer_ids: UInt64VectorBuilder,
peer_addrs: StringVectorBuilder,
@@ -152,6 +165,9 @@ impl InformationSchemaRegionPeersBuilder {
schema,
catalog_name,
catalog_manager,
table_catalogs: StringVectorBuilder::with_capacity(INIT_CAPACITY),
table_schemas: StringVectorBuilder::with_capacity(INIT_CAPACITY),
table_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
region_ids: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
peer_ids: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
peer_addrs: StringVectorBuilder::with_capacity(INIT_CAPACITY),
@@ -177,24 +193,28 @@ impl InformationSchemaRegionPeersBuilder {
let predicates = Predicates::from_scan_request(&request);
for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
let table_id_stream = catalog_manager
let table_stream = catalog_manager
.tables(&catalog_name, &schema_name, None)
.try_filter_map(|t| async move {
let table_info = t.table_info();
if table_info.table_type == TableType::Temporary {
Ok(None)
} else {
Ok(Some(table_info.ident.table_id))
Ok(Some((
table_info.ident.table_id,
table_info.name.to_string(),
)))
}
});
const BATCH_SIZE: usize = 128;
// Split table ids into chunks
let mut table_id_chunks = pin!(table_id_stream.ready_chunks(BATCH_SIZE));
// Split tables into chunks
let mut table_chunks = pin!(table_stream.ready_chunks(BATCH_SIZE));
while let Some(table_ids) = table_id_chunks.next().await {
let table_ids = table_ids.into_iter().collect::<Result<Vec<_>>>()?;
while let Some(tables) = table_chunks.next().await {
let tables = tables.into_iter().collect::<Result<HashMap<_, _>>>()?;
let table_ids = tables.keys().cloned().collect::<Vec<_>>();
let table_routes = if let Some(partition_manager) = &partition_manager {
partition_manager
@@ -206,7 +226,16 @@ impl InformationSchemaRegionPeersBuilder {
};
for (table_id, routes) in table_routes {
self.add_region_peers(&predicates, table_id, &routes);
// Safety: table_id is guaranteed to be in the map
let table_name = tables.get(&table_id).unwrap();
self.add_region_peers(
&catalog_name,
&schema_name,
table_name,
&predicates,
table_id,
&routes,
);
}
}
}
@@ -216,6 +245,9 @@ impl InformationSchemaRegionPeersBuilder {
fn add_region_peers(
&mut self,
table_catalog: &str,
table_schema: &str,
table_name: &str,
predicates: &Predicates,
table_id: TableId,
routes: &[RegionRoute],
@@ -231,13 +263,20 @@ impl InformationSchemaRegionPeersBuilder {
Some("ALIVE".to_string())
};
let row = [(REGION_ID, &Value::from(region_id))];
let row = [
(TABLE_CATALOG, &Value::from(table_catalog)),
(TABLE_SCHEMA, &Value::from(table_schema)),
(TABLE_NAME, &Value::from(table_name)),
(REGION_ID, &Value::from(region_id)),
];
if !predicates.eval(&row) {
return;
}
// TODO(dennis): adds followers.
self.table_catalogs.push(Some(table_catalog));
self.table_schemas.push(Some(table_schema));
self.table_names.push(Some(table_name));
self.region_ids.push(Some(region_id));
self.peer_ids.push(peer_id);
self.peer_addrs.push(peer_addr.as_deref());
@@ -245,11 +284,26 @@ impl InformationSchemaRegionPeersBuilder {
self.statuses.push(state.as_deref());
self.down_seconds
.push(route.leader_down_millis().map(|m| m / 1000));
for follower in &route.follower_peers {
self.table_catalogs.push(Some(table_catalog));
self.table_schemas.push(Some(table_schema));
self.table_names.push(Some(table_name));
self.region_ids.push(Some(region_id));
self.peer_ids.push(Some(follower.id));
self.peer_addrs.push(Some(follower.addr.as_str()));
self.is_leaders.push(Some("No"));
self.statuses.push(None);
self.down_seconds.push(None);
}
}
}
fn finish(&mut self) -> Result<RecordBatch> {
let columns: Vec<VectorRef> = vec![
Arc::new(self.table_catalogs.finish()),
Arc::new(self.table_schemas.finish()),
Arc::new(self.table_names.finish()),
Arc::new(self.region_ids.finish()),
Arc::new(self.peer_ids.finish()),
Arc::new(self.peer_addrs.finish()),

View File

@@ -30,7 +30,8 @@ use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::value::Value;
use datatypes::vectors::{
DateTimeVectorBuilder, StringVectorBuilder, UInt32VectorBuilder, UInt64VectorBuilder,
StringVectorBuilder, TimestampMicrosecondVectorBuilder, UInt32VectorBuilder,
UInt64VectorBuilder,
};
use futures::TryStreamExt;
use snafu::{OptionExt, ResultExt};
@@ -105,9 +106,21 @@ impl InformationSchemaTables {
ColumnSchema::new(TABLE_ROWS, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new(DATA_FREE, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new(AUTO_INCREMENT, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new(CREATE_TIME, ConcreteDataType::datetime_datatype(), true),
ColumnSchema::new(UPDATE_TIME, ConcreteDataType::datetime_datatype(), true),
ColumnSchema::new(CHECK_TIME, ConcreteDataType::datetime_datatype(), true),
ColumnSchema::new(
CREATE_TIME,
ConcreteDataType::timestamp_microsecond_datatype(),
true,
),
ColumnSchema::new(
UPDATE_TIME,
ConcreteDataType::timestamp_microsecond_datatype(),
true,
),
ColumnSchema::new(
CHECK_TIME,
ConcreteDataType::timestamp_microsecond_datatype(),
true,
),
ColumnSchema::new(TABLE_COLLATION, ConcreteDataType::string_datatype(), true),
ColumnSchema::new(CHECKSUM, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new(CREATE_OPTIONS, ConcreteDataType::string_datatype(), true),
@@ -182,9 +195,9 @@ struct InformationSchemaTablesBuilder {
max_index_length: UInt64VectorBuilder,
data_free: UInt64VectorBuilder,
auto_increment: UInt64VectorBuilder,
create_time: DateTimeVectorBuilder,
update_time: DateTimeVectorBuilder,
check_time: DateTimeVectorBuilder,
create_time: TimestampMicrosecondVectorBuilder,
update_time: TimestampMicrosecondVectorBuilder,
check_time: TimestampMicrosecondVectorBuilder,
table_collation: StringVectorBuilder,
checksum: UInt64VectorBuilder,
create_options: StringVectorBuilder,
@@ -219,9 +232,9 @@ impl InformationSchemaTablesBuilder {
max_index_length: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
data_free: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
auto_increment: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
create_time: DateTimeVectorBuilder::with_capacity(INIT_CAPACITY),
update_time: DateTimeVectorBuilder::with_capacity(INIT_CAPACITY),
check_time: DateTimeVectorBuilder::with_capacity(INIT_CAPACITY),
create_time: TimestampMicrosecondVectorBuilder::with_capacity(INIT_CAPACITY),
update_time: TimestampMicrosecondVectorBuilder::with_capacity(INIT_CAPACITY),
check_time: TimestampMicrosecondVectorBuilder::with_capacity(INIT_CAPACITY),
table_collation: StringVectorBuilder::with_capacity(INIT_CAPACITY),
checksum: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
create_options: StringVectorBuilder::with_capacity(INIT_CAPACITY),

View File

@@ -51,10 +51,10 @@ pub fn bigint_column(name: &str) -> ColumnSchema {
)
}
pub fn datetime_column(name: &str) -> ColumnSchema {
pub fn timestamp_micro_column(name: &str) -> ColumnSchema {
ColumnSchema::new(
str::to_lowercase(name),
ConcreteDataType::datetime_datatype(),
ConcreteDataType::timestamp_microsecond_datatype(),
false,
)
}

View File

@@ -6,6 +6,7 @@ license.workspace = true
[features]
pg_kvbackend = ["common-meta/pg_kvbackend"]
mysql_kvbackend = ["common-meta/mysql_kvbackend"]
[lints]
workspace = true
@@ -43,6 +44,10 @@ futures.workspace = true
humantime.workspace = true
meta-client.workspace = true
nu-ansi-term = "0.46"
opendal = { version = "0.51.1", features = [
"services-fs",
"services-s3",
] }
query.workspace = true
rand.workspace = true
reqwest.workspace = true

View File

@@ -23,6 +23,8 @@ use common_error::ext::BoxedError;
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
use common_meta::kv_backend::etcd::EtcdStore;
use common_meta::kv_backend::memory::MemoryKvBackend;
#[cfg(feature = "mysql_kvbackend")]
use common_meta::kv_backend::rds::MySqlStore;
#[cfg(feature = "pg_kvbackend")]
use common_meta::kv_backend::rds::PgStore;
use common_meta::peer::Peer;
@@ -63,6 +65,9 @@ pub struct BenchTableMetadataCommand {
#[cfg(feature = "pg_kvbackend")]
#[clap(long)]
postgres_addr: Option<String>,
#[cfg(feature = "mysql_kvbackend")]
#[clap(long)]
mysql_addr: Option<String>,
#[clap(long)]
count: u32,
}
@@ -86,6 +91,16 @@ impl BenchTableMetadataCommand {
kv_backend
};
#[cfg(feature = "mysql_kvbackend")]
let kv_backend = if let Some(mysql_addr) = &self.mysql_addr {
info!("Using mysql as kv backend");
MySqlStore::with_url(mysql_addr, "greptime_metakv", 128)
.await
.unwrap()
} else {
kv_backend
};
let table_metadata_manager = Arc::new(TableMetadataManager::new(kv_backend));
let tool = BenchTableMetadata {
@@ -162,7 +177,7 @@ fn create_table_info(table_id: TableId, table_name: TableName) -> RawTableInfo {
fn create_region_routes(regions: Vec<RegionNumber>) -> Vec<RegionRoute> {
let mut region_routes = Vec::with_capacity(100);
let mut rng = rand::thread_rng();
let mut rng = rand::rng();
for region_id in regions.into_iter().map(u64::from) {
region_routes.push(RegionRoute {
@@ -173,7 +188,7 @@ fn create_region_routes(regions: Vec<RegionNumber>) -> Vec<RegionRoute> {
attrs: BTreeMap::new(),
},
leader_peer: Some(Peer {
id: rng.gen_range(0..10),
id: rng.random_range(0..10),
addr: String::new(),
}),
follower_peers: vec![],

View File

@@ -276,6 +276,24 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("OpenDAL operator failed"))]
OpenDal {
#[snafu(implicit)]
location: Location,
#[snafu(source)]
error: opendal::Error,
},
#[snafu(display("S3 config need be set"))]
S3ConfigNotSet {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Output directory not set"))]
OutputDirNotSet {
#[snafu(implicit)]
location: Location,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -319,6 +337,9 @@ impl ErrorExt for Error {
| Error::BuildClient { .. } => StatusCode::Unexpected,
Error::Other { source, .. } => source.status_code(),
Error::OpenDal { .. } => StatusCode::Internal,
Error::S3ConfigNotSet { .. } => StatusCode::InvalidArguments,
Error::OutputDirNotSet { .. } => StatusCode::InvalidArguments,
Error::BuildRuntime { source, .. } => source.status_code(),

View File

@@ -21,15 +21,18 @@ use async_trait::async_trait;
use clap::{Parser, ValueEnum};
use common_error::ext::BoxedError;
use common_telemetry::{debug, error, info};
use opendal::layers::LoggingLayer;
use opendal::{services, Operator};
use serde_json::Value;
use snafu::{OptionExt, ResultExt};
use tokio::fs::File;
use tokio::io::{AsyncWriteExt, BufWriter};
use tokio::sync::Semaphore;
use tokio::time::Instant;
use crate::database::{parse_proxy_opts, DatabaseClient};
use crate::error::{EmptyResultSnafu, Error, FileIoSnafu, Result, SchemaNotFoundSnafu};
use crate::error::{
EmptyResultSnafu, Error, OpenDalSnafu, OutputDirNotSetSnafu, Result, S3ConfigNotSetSnafu,
SchemaNotFoundSnafu,
};
use crate::{database, Tool};
type TableReference = (String, String, String);
@@ -52,8 +55,9 @@ pub struct ExportCommand {
addr: String,
/// Directory to put the exported data. E.g.: /tmp/greptimedb-export
/// for local export.
#[clap(long)]
output_dir: String,
output_dir: Option<String>,
/// The name of the catalog to export.
#[clap(long, default_value = "greptime-*")]
@@ -101,10 +105,51 @@ pub struct ExportCommand {
/// Disable proxy server, if set, will not use any proxy.
#[clap(long)]
no_proxy: bool,
/// if export data to s3
#[clap(long)]
s3: bool,
/// The s3 bucket name
/// if s3 is set, this is required
#[clap(long)]
s3_bucket: Option<String>,
/// The s3 endpoint
/// if s3 is set, this is required
#[clap(long)]
s3_endpoint: Option<String>,
/// The s3 access key
/// if s3 is set, this is required
#[clap(long)]
s3_access_key: Option<String>,
/// The s3 secret key
/// if s3 is set, this is required
#[clap(long)]
s3_secret_key: Option<String>,
/// The s3 region
/// if s3 is set, this is required
#[clap(long)]
s3_region: Option<String>,
}
impl ExportCommand {
pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
if self.s3
&& (self.s3_bucket.is_none()
|| self.s3_endpoint.is_none()
|| self.s3_access_key.is_none()
|| self.s3_secret_key.is_none()
|| self.s3_region.is_none())
{
return Err(BoxedError::new(S3ConfigNotSetSnafu {}.build()));
}
if !self.s3 && self.output_dir.is_none() {
return Err(BoxedError::new(OutputDirNotSetSnafu {}.build()));
}
let (catalog, schema) =
database::split_database(&self.database).map_err(BoxedError::new)?;
let proxy = parse_proxy_opts(self.proxy.clone(), self.no_proxy)?;
@@ -126,24 +171,43 @@ impl ExportCommand {
target: self.target.clone(),
start_time: self.start_time.clone(),
end_time: self.end_time.clone(),
s3: self.s3,
s3_bucket: self.s3_bucket.clone(),
s3_endpoint: self.s3_endpoint.clone(),
s3_access_key: self.s3_access_key.clone(),
s3_secret_key: self.s3_secret_key.clone(),
s3_region: self.s3_region.clone(),
}))
}
}
#[derive(Clone)]
pub struct Export {
catalog: String,
schema: Option<String>,
database_client: DatabaseClient,
output_dir: String,
output_dir: Option<String>,
parallelism: usize,
target: ExportTarget,
start_time: Option<String>,
end_time: Option<String>,
s3: bool,
s3_bucket: Option<String>,
s3_endpoint: Option<String>,
s3_access_key: Option<String>,
s3_secret_key: Option<String>,
s3_region: Option<String>,
}
impl Export {
fn catalog_path(&self) -> PathBuf {
PathBuf::from(&self.output_dir).join(&self.catalog)
if self.s3 {
PathBuf::from(&self.catalog)
} else if let Some(dir) = &self.output_dir {
PathBuf::from(dir).join(&self.catalog)
} else {
unreachable!("catalog_path: output_dir must be set when not using s3")
}
}
async fn get_db_names(&self) -> Result<Vec<String>> {
@@ -300,19 +364,23 @@ impl Export {
let timer = Instant::now();
let db_names = self.get_db_names().await?;
let db_count = db_names.len();
let operator = self.build_operator().await?;
for schema in db_names {
let db_dir = self.catalog_path().join(format!("{schema}/"));
tokio::fs::create_dir_all(&db_dir)
.await
.context(FileIoSnafu)?;
let file = db_dir.join("create_database.sql");
let mut file = File::create(file).await.context(FileIoSnafu)?;
let create_database = self
.show_create("DATABASE", &self.catalog, &schema, None)
.await?;
file.write_all(create_database.as_bytes())
.await
.context(FileIoSnafu)?;
let file_path = self.get_file_path(&schema, "create_database.sql");
self.write_to_storage(&operator, &file_path, create_database.into_bytes())
.await?;
info!(
"Exported {}.{} database creation SQL to {}",
self.catalog,
schema,
self.format_output_path(&file_path)
);
}
let elapsed = timer.elapsed();
@@ -326,149 +394,267 @@ impl Export {
let semaphore = Arc::new(Semaphore::new(self.parallelism));
let db_names = self.get_db_names().await?;
let db_count = db_names.len();
let operator = Arc::new(self.build_operator().await?);
let mut tasks = Vec::with_capacity(db_names.len());
for schema in db_names {
let semaphore_moved = semaphore.clone();
let export_self = self.clone();
let operator = operator.clone();
tasks.push(async move {
let _permit = semaphore_moved.acquire().await.unwrap();
let (metric_physical_tables, remaining_tables, views) =
self.get_table_list(&self.catalog, &schema).await?;
let table_count =
metric_physical_tables.len() + remaining_tables.len() + views.len();
let db_dir = self.catalog_path().join(format!("{schema}/"));
tokio::fs::create_dir_all(&db_dir)
.await
.context(FileIoSnafu)?;
let file = db_dir.join("create_tables.sql");
let mut file = File::create(file).await.context(FileIoSnafu)?;
for (c, s, t) in metric_physical_tables.into_iter().chain(remaining_tables) {
let create_table = self.show_create("TABLE", &c, &s, Some(&t)).await?;
file.write_all(create_table.as_bytes())
.await
.context(FileIoSnafu)?;
}
for (c, s, v) in views {
let create_view = self.show_create("VIEW", &c, &s, Some(&v)).await?;
file.write_all(create_view.as_bytes())
.await
.context(FileIoSnafu)?;
let (metric_physical_tables, remaining_tables, views) = export_self
.get_table_list(&export_self.catalog, &schema)
.await?;
// Create directory if needed for file system storage
if !export_self.s3 {
let db_dir = format!("{}/{}/", export_self.catalog, schema);
operator.create_dir(&db_dir).await.context(OpenDalSnafu)?;
}
let file_path = export_self.get_file_path(&schema, "create_tables.sql");
let mut content = Vec::new();
// Add table creation SQL
for (c, s, t) in metric_physical_tables.iter().chain(&remaining_tables) {
let create_table = export_self.show_create("TABLE", c, s, Some(t)).await?;
content.extend_from_slice(create_table.as_bytes());
}
// Add view creation SQL
for (c, s, v) in &views {
let create_view = export_self.show_create("VIEW", c, s, Some(v)).await?;
content.extend_from_slice(create_view.as_bytes());
}
// Write to storage
export_self
.write_to_storage(&operator, &file_path, content)
.await?;
info!(
"Finished exporting {}.{schema} with {table_count} table schemas to path: {}",
self.catalog,
db_dir.to_string_lossy()
"Finished exporting {}.{schema} with {} table schemas to path: {}",
export_self.catalog,
metric_physical_tables.len() + remaining_tables.len() + views.len(),
export_self.format_output_path(&file_path)
);
Ok::<(), Error>(())
});
}
let success = futures::future::join_all(tasks)
.await
.into_iter()
.filter(|r| match r {
Ok(_) => true,
Err(e) => {
error!(e; "export schema job failed");
false
}
})
.count();
let success = self.execute_tasks(tasks).await;
let elapsed = timer.elapsed();
info!("Success {success}/{db_count} jobs, cost: {elapsed:?}");
Ok(())
}
async fn build_operator(&self) -> Result<Operator> {
if self.s3 {
self.build_s3_operator().await
} else {
self.build_fs_operator().await
}
}
async fn build_s3_operator(&self) -> Result<Operator> {
let mut builder = services::S3::default().root("").bucket(
self.s3_bucket
.as_ref()
.expect("s3_bucket must be provided when s3 is enabled"),
);
if let Some(endpoint) = self.s3_endpoint.as_ref() {
builder = builder.endpoint(endpoint);
}
if let Some(region) = self.s3_region.as_ref() {
builder = builder.region(region);
}
if let Some(key_id) = self.s3_access_key.as_ref() {
builder = builder.access_key_id(key_id);
}
if let Some(secret_key) = self.s3_secret_key.as_ref() {
builder = builder.secret_access_key(secret_key);
}
let op = Operator::new(builder)
.context(OpenDalSnafu)?
.layer(LoggingLayer::default())
.finish();
Ok(op)
}
async fn build_fs_operator(&self) -> Result<Operator> {
let root = self
.output_dir
.as_ref()
.context(OutputDirNotSetSnafu)?
.clone();
let op = Operator::new(services::Fs::default().root(&root))
.context(OpenDalSnafu)?
.layer(LoggingLayer::default())
.finish();
Ok(op)
}
async fn export_database_data(&self) -> Result<()> {
let timer = Instant::now();
let semaphore = Arc::new(Semaphore::new(self.parallelism));
let db_names = self.get_db_names().await?;
let db_count = db_names.len();
let mut tasks = Vec::with_capacity(db_count);
let operator = Arc::new(self.build_operator().await?);
let with_options = build_with_options(&self.start_time, &self.end_time);
for schema in db_names {
let semaphore_moved = semaphore.clone();
let export_self = self.clone();
let with_options_clone = with_options.clone();
let operator = operator.clone();
tasks.push(async move {
let _permit = semaphore_moved.acquire().await.unwrap();
let db_dir = self.catalog_path().join(format!("{schema}/"));
tokio::fs::create_dir_all(&db_dir)
.await
.context(FileIoSnafu)?;
let with_options = match (&self.start_time, &self.end_time) {
(Some(start_time), Some(end_time)) => {
format!(
"WITH (FORMAT='parquet', start_time='{}', end_time='{}')",
start_time, end_time
)
}
(Some(start_time), None) => {
format!("WITH (FORMAT='parquet', start_time='{}')", start_time)
}
(None, Some(end_time)) => {
format!("WITH (FORMAT='parquet', end_time='{}')", end_time)
}
(None, None) => "WITH (FORMAT='parquet')".to_string(),
};
// Create directory if not using S3
if !export_self.s3 {
let db_dir = format!("{}/{}/", export_self.catalog, schema);
operator.create_dir(&db_dir).await.context(OpenDalSnafu)?;
}
let (path, connection_part) = export_self.get_storage_params(&schema);
// Execute COPY DATABASE TO command
let sql = format!(
r#"COPY DATABASE "{}"."{}" TO '{}' {};"#,
self.catalog,
schema,
db_dir.to_str().unwrap(),
with_options
r#"COPY DATABASE "{}"."{}" TO '{}' WITH ({}){};"#,
export_self.catalog, schema, path, with_options_clone, connection_part
);
info!("Executing sql: {sql}");
export_self.database_client.sql_in_public(&sql).await?;
info!(
"Finished exporting {}.{} data to {}",
export_self.catalog, schema, path
);
info!("Executing sql: {sql}");
// Create copy_from.sql file
let copy_database_from_sql = format!(
r#"COPY DATABASE "{}"."{}" FROM '{}' WITH ({}){};"#,
export_self.catalog, schema, path, with_options_clone, connection_part
);
self.database_client.sql_in_public(&sql).await?;
let copy_from_path = export_self.get_file_path(&schema, "copy_from.sql");
export_self
.write_to_storage(
&operator,
&copy_from_path,
copy_database_from_sql.into_bytes(),
)
.await?;
info!(
"Finished exporting {}.{schema} data into path: {}",
self.catalog,
db_dir.to_string_lossy()
);
// The export copy from sql
let copy_from_file = db_dir.join("copy_from.sql");
let mut writer =
BufWriter::new(File::create(copy_from_file).await.context(FileIoSnafu)?);
let copy_database_from_sql = format!(
r#"COPY DATABASE "{}"."{}" FROM '{}' WITH (FORMAT='parquet');"#,
self.catalog,
"Finished exporting {}.{} copy_from.sql to {}",
export_self.catalog,
schema,
db_dir.to_str().unwrap()
export_self.format_output_path(&copy_from_path)
);
writer
.write(copy_database_from_sql.as_bytes())
.await
.context(FileIoSnafu)?;
writer.flush().await.context(FileIoSnafu)?;
info!("Finished exporting {}.{schema} copy_from.sql", self.catalog);
Ok::<(), Error>(())
})
});
}
let success = futures::future::join_all(tasks)
let success = self.execute_tasks(tasks).await;
let elapsed = timer.elapsed();
info!("Success {success}/{db_count} jobs, costs: {elapsed:?}");
Ok(())
}
fn get_file_path(&self, schema: &str, file_name: &str) -> String {
format!("{}/{}/{}", self.catalog, schema, file_name)
}
fn format_output_path(&self, file_path: &str) -> String {
if self.s3 {
format!(
"s3://{}/{}",
self.s3_bucket.as_ref().unwrap_or(&String::new()),
file_path
)
} else {
format!(
"{}/{}",
self.output_dir.as_ref().unwrap_or(&String::new()),
file_path
)
}
}
async fn write_to_storage(
&self,
op: &Operator,
file_path: &str,
content: Vec<u8>,
) -> Result<()> {
op.write(file_path, content).await.context(OpenDalSnafu)
}
fn get_storage_params(&self, schema: &str) -> (String, String) {
if self.s3 {
let s3_path = format!(
"s3://{}/{}/{}/",
// Safety: s3_bucket is required when s3 is enabled
self.s3_bucket.as_ref().unwrap(),
self.catalog,
schema
);
// endpoint is optional
let endpoint_option = if let Some(endpoint) = self.s3_endpoint.as_ref() {
format!(", ENDPOINT='{}'", endpoint)
} else {
String::new()
};
// Safety: All s3 options are required
let connection_options = format!(
"ACCESS_KEY_ID='{}', SECRET_ACCESS_KEY='{}', REGION='{}'{}",
self.s3_access_key.as_ref().unwrap(),
self.s3_secret_key.as_ref().unwrap(),
self.s3_region.as_ref().unwrap(),
endpoint_option
);
(s3_path, format!(" CONNECTION ({})", connection_options))
} else {
(
self.catalog_path()
.join(format!("{schema}/"))
.to_string_lossy()
.to_string(),
String::new(),
)
}
}
async fn execute_tasks(
&self,
tasks: Vec<impl std::future::Future<Output = Result<()>>>,
) -> usize {
futures::future::join_all(tasks)
.await
.into_iter()
.filter(|r| match r {
Ok(_) => true,
Err(e) => {
error!(e; "export database job failed");
error!(e; "export job failed");
false
}
})
.count();
let elapsed = timer.elapsed();
info!("Success {success}/{db_count} jobs, costs: {elapsed:?}");
Ok(())
.count()
}
}
@@ -493,3 +679,15 @@ impl Tool for Export {
}
}
}
/// Builds the WITH options string for SQL commands, assuming consistent syntax across S3 and local exports.
fn build_with_options(start_time: &Option<String>, end_time: &Option<String>) -> String {
let mut options = vec!["format = 'parquet'".to_string()];
if let Some(start) = start_time {
options.push(format!("start_time = '{}'", start));
}
if let Some(end) = end_time {
options.push(format!("end_time = '{}'", end));
}
options.join(", ")
}

View File

@@ -16,7 +16,6 @@
mod client;
pub mod client_manager;
#[cfg(feature = "testing")]
mod database;
pub mod error;
pub mod flow;
@@ -34,7 +33,6 @@ pub use common_recordbatch::{RecordBatches, SendableRecordBatchStream};
use snafu::OptionExt;
pub use self::client::Client;
#[cfg(feature = "testing")]
pub use self::database::Database;
pub use self::error::{Error, Result};
use crate::error::{IllegalDatabaseResponseSnafu, ServerSnafu};

View File

@@ -13,7 +13,7 @@
// limitations under the License.
use enum_dispatch::enum_dispatch;
use rand::seq::SliceRandom;
use rand::seq::IndexedRandom;
#[enum_dispatch]
pub trait LoadBalance {
@@ -37,7 +37,7 @@ pub struct Random;
impl LoadBalance for Random {
fn get_peer<'a>(&self, peers: &'a [String]) -> Option<&'a String> {
peers.choose(&mut rand::thread_rng())
peers.choose(&mut rand::rng())
}
}

View File

@@ -30,7 +30,7 @@ use datanode::datanode::{Datanode, DatanodeBuilder};
use datanode::service::DatanodeServiceBuilder;
use meta_client::{MetaClientOptions, MetaClientType};
use servers::Mode;
use snafu::{OptionExt, ResultExt};
use snafu::{ensure, OptionExt, ResultExt};
use tracing_appender::non_blocking::WorkerGuard;
use crate::error::{
@@ -223,15 +223,14 @@ impl StartCommand {
.get_or_insert_with(MetaClientOptions::default)
.metasrv_addrs
.clone_from(metasrv_addrs);
opts.mode = Mode::Distributed;
}
if let (Mode::Distributed, None) = (&opts.mode, &opts.node_id) {
return MissingConfigSnafu {
msg: "Missing node id option",
ensure!(
opts.node_id.is_some(),
MissingConfigSnafu {
msg: "Missing node id option"
}
.fail();
}
);
if let Some(data_home) = &self.data_home {
opts.storage.data_home.clone_from(data_home);
@@ -295,10 +294,13 @@ impl StartCommand {
msg: "'meta_client_options'",
})?;
let meta_client =
meta_client::create_meta_client(MetaClientType::Datanode { member_id }, meta_config)
.await
.context(MetaClientInitSnafu)?;
let meta_client = meta_client::create_meta_client(
MetaClientType::Datanode { member_id },
meta_config,
None,
)
.await
.context(MetaClientInitSnafu)?;
let meta_backend = Arc::new(MetaKvBackend {
client: meta_client.clone(),
@@ -311,7 +313,7 @@ impl StartCommand {
.build(),
);
let mut datanode = DatanodeBuilder::new(opts.clone(), plugins)
let mut datanode = DatanodeBuilder::new(opts.clone(), plugins, Mode::Distributed)
.with_meta_client(meta_client)
.with_kv_backend(meta_backend)
.with_cache_registry(layered_cache_registry)
@@ -333,6 +335,7 @@ impl StartCommand {
#[cfg(test)]
mod tests {
use std::assert_matches::assert_matches;
use std::io::Write;
use std::time::Duration;
@@ -340,7 +343,6 @@ mod tests {
use common_test_util::temp_dir::create_named_temp_file;
use datanode::config::{FileConfig, GcsConfig, ObjectStoreConfig, S3Config};
use servers::heartbeat_options::HeartbeatOptions;
use servers::Mode;
use super::*;
use crate::options::GlobalOptions;
@@ -406,7 +408,7 @@ mod tests {
sync_write = false
[storage]
data_home = "/tmp/greptimedb/"
data_home = "./greptimedb_data/"
type = "File"
[[storage.providers]]
@@ -420,7 +422,7 @@ mod tests {
[logging]
level = "debug"
dir = "/tmp/greptimedb/test/logs"
dir = "./greptimedb_data/test/logs"
"#;
write!(file, "{}", toml_str).unwrap();
@@ -467,7 +469,7 @@ mod tests {
assert_eq!(10000, ddl_timeout.as_millis());
assert_eq!(3000, timeout.as_millis());
assert!(tcp_nodelay);
assert_eq!("/tmp/greptimedb/", options.storage.data_home);
assert_eq!("./greptimedb_data/", options.storage.data_home);
assert!(matches!(
&options.storage.store,
ObjectStoreConfig::File(FileConfig { .. })
@@ -483,27 +485,14 @@ mod tests {
));
assert_eq!("debug", options.logging.level.unwrap());
assert_eq!("/tmp/greptimedb/test/logs".to_string(), options.logging.dir);
assert_eq!(
"./greptimedb_data/test/logs".to_string(),
options.logging.dir
);
}
#[test]
fn test_try_from_cmd() {
let opt = StartCommand::default()
.load_options(&GlobalOptions::default())
.unwrap()
.component;
assert_eq!(Mode::Standalone, opt.mode);
let opt = (StartCommand {
node_id: Some(42),
metasrv_addrs: Some(vec!["127.0.0.1:3002".to_string()]),
..Default::default()
})
.load_options(&GlobalOptions::default())
.unwrap()
.component;
assert_eq!(Mode::Distributed, opt.mode);
assert!((StartCommand {
metasrv_addrs: Some(vec!["127.0.0.1:3002".to_string()]),
..Default::default()
@@ -522,11 +511,23 @@ mod tests {
#[test]
fn test_load_log_options_from_cli() {
let cmd = StartCommand::default();
let mut cmd = StartCommand::default();
let result = cmd.load_options(&GlobalOptions {
log_dir: Some("./greptimedb_data/test/logs".to_string()),
log_level: Some("debug".to_string()),
#[cfg(feature = "tokio-console")]
tokio_console_addr: None,
});
// Missing node_id.
assert_matches!(result, Err(crate::error::Error::MissingConfig { .. }));
cmd.node_id = Some(42);
let options = cmd
.load_options(&GlobalOptions {
log_dir: Some("/tmp/greptimedb/test/logs".to_string()),
log_dir: Some("./greptimedb_data/test/logs".to_string()),
log_level: Some("debug".to_string()),
#[cfg(feature = "tokio-console")]
@@ -536,7 +537,7 @@ mod tests {
.component;
let logging_opt = options.logging;
assert_eq!("/tmp/greptimedb/test/logs", logging_opt.dir);
assert_eq!("./greptimedb_data/test/logs", logging_opt.dir);
assert_eq!("debug", logging_opt.level.as_ref().unwrap());
}
@@ -565,11 +566,11 @@ mod tests {
[storage]
type = "File"
data_home = "/tmp/greptimedb/"
data_home = "./greptimedb_data/"
[logging]
level = "debug"
dir = "/tmp/greptimedb/test/logs"
dir = "./greptimedb_data/test/logs"
"#;
write!(file, "{}", toml_str).unwrap();

View File

@@ -100,6 +100,13 @@ pub enum Error {
source: flow::Error,
},
#[snafu(display("Servers error"))]
Servers {
#[snafu(implicit)]
location: Location,
source: servers::error::Error,
},
#[snafu(display("Failed to start frontend"))]
StartFrontend {
#[snafu(implicit)]
@@ -365,6 +372,7 @@ impl ErrorExt for Error {
Error::ShutdownFrontend { source, .. } => source.status_code(),
Error::StartMetaServer { source, .. } => source.status_code(),
Error::ShutdownMetaServer { source, .. } => source.status_code(),
Error::Servers { source, .. } => source.status_code(),
Error::BuildMetaServer { source, .. } => source.status_code(),
Error::UnsupportedSelectorType { source, .. } => source.status_code(),
Error::BuildCli { source, .. } => source.status_code(),

View File

@@ -34,8 +34,7 @@ use common_telemetry::logging::TracingOptions;
use common_version::{short_version, version};
use flow::{FlownodeBuilder, FlownodeInstance, FrontendInvoker};
use meta_client::{MetaClientOptions, MetaClientType};
use servers::Mode;
use snafu::{OptionExt, ResultExt};
use snafu::{ensure, OptionExt, ResultExt};
use tracing_appender::non_blocking::WorkerGuard;
use crate::error::{
@@ -203,7 +202,6 @@ impl StartCommand {
.get_or_insert_with(MetaClientOptions::default)
.metasrv_addrs
.clone_from(metasrv_addrs);
opts.mode = Mode::Distributed;
}
if let Some(http_addr) = &self.http_addr {
@@ -214,12 +212,12 @@ impl StartCommand {
opts.http.timeout = Duration::from_secs(http_timeout);
}
if let (Mode::Distributed, None) = (&opts.mode, &opts.node_id) {
return MissingConfigSnafu {
msg: "Missing node id option",
ensure!(
opts.node_id.is_some(),
MissingConfigSnafu {
msg: "Missing node id option"
}
.fail();
}
);
Ok(())
}
@@ -249,10 +247,13 @@ impl StartCommand {
msg: "'meta_client_options'",
})?;
let meta_client =
meta_client::create_meta_client(MetaClientType::Flownode { member_id }, meta_config)
.await
.context(MetaClientInitSnafu)?;
let meta_client = meta_client::create_meta_client(
MetaClientType::Flownode { member_id },
meta_config,
None,
)
.await
.context(MetaClientInitSnafu)?;
let cache_max_capacity = meta_config.metadata_cache_max_capacity;
let cache_ttl = meta_config.metadata_cache_ttl;

View File

@@ -32,28 +32,25 @@ use common_telemetry::info;
use common_telemetry::logging::TracingOptions;
use common_time::timezone::set_default_timezone;
use common_version::{short_version, version};
use frontend::frontend::Frontend;
use frontend::heartbeat::HeartbeatTask;
use frontend::instance::builder::FrontendBuilder;
use frontend::instance::{FrontendInstance, Instance as FeInstance};
use frontend::server::Services;
use meta_client::{MetaClientOptions, MetaClientType};
use query::stats::StatementStatistics;
use servers::export_metrics::ExportMetricsTask;
use servers::tls::{TlsMode, TlsOption};
use snafu::{OptionExt, ResultExt};
use tracing_appender::non_blocking::WorkerGuard;
use crate::error::{
self, InitTimezoneSnafu, LoadLayeredConfigSnafu, MetaClientInitSnafu, MissingConfigSnafu,
Result, StartFrontendSnafu,
};
use crate::error::{self, Result};
use crate::options::{GlobalOptions, GreptimeOptions};
use crate::{log_versions, App};
type FrontendOptions = GreptimeOptions<frontend::frontend::FrontendOptions>;
pub struct Instance {
frontend: FeInstance,
frontend: Frontend,
// Keep the logging guard to prevent the worker from being dropped.
_guard: Vec<WorkerGuard>,
}
@@ -61,20 +58,17 @@ pub struct Instance {
pub const APP_NAME: &str = "greptime-frontend";
impl Instance {
pub fn new(frontend: FeInstance, guard: Vec<WorkerGuard>) -> Self {
Self {
frontend,
_guard: guard,
}
pub fn new(frontend: Frontend, _guard: Vec<WorkerGuard>) -> Self {
Self { frontend, _guard }
}
pub fn mut_inner(&mut self) -> &mut FeInstance {
&mut self.frontend
}
pub fn inner(&self) -> &FeInstance {
pub fn inner(&self) -> &Frontend {
&self.frontend
}
pub fn mut_inner(&mut self) -> &mut Frontend {
&mut self.frontend
}
}
#[async_trait]
@@ -84,11 +78,15 @@ impl App for Instance {
}
async fn start(&mut self) -> Result<()> {
plugins::start_frontend_plugins(self.frontend.plugins().clone())
let plugins = self.frontend.instance.plugins().clone();
plugins::start_frontend_plugins(plugins)
.await
.context(StartFrontendSnafu)?;
.context(error::StartFrontendSnafu)?;
self.frontend.start().await.context(StartFrontendSnafu)
self.frontend
.start()
.await
.context(error::StartFrontendSnafu)
}
async fn stop(&self) -> Result<()> {
@@ -178,7 +176,7 @@ impl StartCommand {
self.config_file.as_deref(),
self.env_prefix.as_ref(),
)
.context(LoadLayeredConfigSnafu)?;
.context(error::LoadLayeredConfigSnafu)?;
self.merge_with_cli_options(global_options, &mut opts)?;
@@ -283,22 +281,28 @@ impl StartCommand {
let mut plugins = Plugins::new();
plugins::setup_frontend_plugins(&mut plugins, &plugin_opts, &opts)
.await
.context(StartFrontendSnafu)?;
.context(error::StartFrontendSnafu)?;
set_default_timezone(opts.default_timezone.as_deref()).context(InitTimezoneSnafu)?;
set_default_timezone(opts.default_timezone.as_deref()).context(error::InitTimezoneSnafu)?;
let meta_client_options = opts.meta_client.as_ref().context(MissingConfigSnafu {
msg: "'meta_client'",
})?;
let meta_client_options = opts
.meta_client
.as_ref()
.context(error::MissingConfigSnafu {
msg: "'meta_client'",
})?;
let cache_max_capacity = meta_client_options.metadata_cache_max_capacity;
let cache_ttl = meta_client_options.metadata_cache_ttl;
let cache_tti = meta_client_options.metadata_cache_tti;
let meta_client =
meta_client::create_meta_client(MetaClientType::Frontend, meta_client_options)
.await
.context(MetaClientInitSnafu)?;
let meta_client = meta_client::create_meta_client(
MetaClientType::Frontend,
meta_client_options,
Some(&plugins),
)
.await
.context(error::MetaClientInitSnafu)?;
// TODO(discord9): add helper function to ease the creation of cache registry&such
let cached_meta_backend =
@@ -345,6 +349,7 @@ impl StartCommand {
opts.heartbeat.clone(),
Arc::new(executor),
);
let heartbeat_task = Some(heartbeat_task);
// frontend to datanode need not timeout.
// Some queries are expected to take long time.
@@ -356,7 +361,7 @@ impl StartCommand {
};
let client = NodeClients::new(channel_config);
let mut instance = FrontendBuilder::new(
let instance = FrontendBuilder::new(
opts.clone(),
cached_meta_backend.clone(),
layered_cache_registry.clone(),
@@ -367,20 +372,27 @@ impl StartCommand {
)
.with_plugin(plugins.clone())
.with_local_cache_invalidator(layered_cache_registry)
.with_heartbeat_task(heartbeat_task)
.try_build()
.await
.context(StartFrontendSnafu)?;
.context(error::StartFrontendSnafu)?;
let instance = Arc::new(instance);
let servers = Services::new(opts, Arc::new(instance.clone()), plugins)
let export_metrics_task = ExportMetricsTask::try_new(&opts.export_metrics, Some(&plugins))
.context(error::ServersSnafu)?;
let servers = Services::new(opts, instance.clone(), plugins)
.build()
.await
.context(StartFrontendSnafu)?;
instance
.build_servers(servers)
.context(StartFrontendSnafu)?;
.context(error::StartFrontendSnafu)?;
Ok(Instance::new(instance, guard))
let frontend = Frontend {
instance,
servers,
heartbeat_task,
export_metrics_task,
};
Ok(Instance::new(frontend, guard))
}
}
@@ -440,7 +452,7 @@ mod tests {
[http]
addr = "127.0.0.1:4000"
timeout = "30s"
timeout = "0s"
body_limit = "2GB"
[opentsdb]
@@ -448,7 +460,7 @@ mod tests {
[logging]
level = "debug"
dir = "/tmp/greptimedb/test/logs"
dir = "./greptimedb_data/test/logs"
"#;
write!(file, "{}", toml_str).unwrap();
@@ -461,12 +473,15 @@ mod tests {
let fe_opts = command.load_options(&Default::default()).unwrap().component;
assert_eq!("127.0.0.1:4000".to_string(), fe_opts.http.addr);
assert_eq!(Duration::from_secs(30), fe_opts.http.timeout);
assert_eq!(Duration::from_secs(0), fe_opts.http.timeout);
assert_eq!(ReadableSize::gb(2), fe_opts.http.body_limit);
assert_eq!("debug", fe_opts.logging.level.as_ref().unwrap());
assert_eq!("/tmp/greptimedb/test/logs".to_string(), fe_opts.logging.dir);
assert_eq!(
"./greptimedb_data/test/logs".to_string(),
fe_opts.logging.dir
);
assert!(!fe_opts.opentsdb.enable);
}
@@ -505,7 +520,7 @@ mod tests {
let options = cmd
.load_options(&GlobalOptions {
log_dir: Some("/tmp/greptimedb/test/logs".to_string()),
log_dir: Some("./greptimedb_data/test/logs".to_string()),
log_level: Some("debug".to_string()),
#[cfg(feature = "tokio-console")]
@@ -515,7 +530,7 @@ mod tests {
.component;
let logging_opt = options.logging;
assert_eq!("/tmp/greptimedb/test/logs", logging_opt.dir);
assert_eq!("./greptimedb_data/test/logs", logging_opt.dir);
assert_eq!("debug", logging_opt.level.as_ref().unwrap());
}

View File

@@ -337,7 +337,7 @@ mod tests {
[logging]
level = "debug"
dir = "/tmp/greptimedb/test/logs"
dir = "./greptimedb_data/test/logs"
[failure_detector]
threshold = 8.0
@@ -358,7 +358,10 @@ mod tests {
assert_eq!(vec!["127.0.0.1:2379".to_string()], options.store_addrs);
assert_eq!(SelectorType::LeaseBased, options.selector);
assert_eq!("debug", options.logging.level.as_ref().unwrap());
assert_eq!("/tmp/greptimedb/test/logs".to_string(), options.logging.dir);
assert_eq!(
"./greptimedb_data/test/logs".to_string(),
options.logging.dir
);
assert_eq!(8.0, options.failure_detector.threshold);
assert_eq!(
100.0,
@@ -396,7 +399,7 @@ mod tests {
let options = cmd
.load_options(&GlobalOptions {
log_dir: Some("/tmp/greptimedb/test/logs".to_string()),
log_dir: Some("./greptimedb_data/test/logs".to_string()),
log_level: Some("debug".to_string()),
#[cfg(feature = "tokio-console")]
@@ -406,7 +409,7 @@ mod tests {
.component;
let logging_opt = options.logging;
assert_eq!("/tmp/greptimedb/test/logs", logging_opt.dir);
assert_eq!("./greptimedb_data/test/logs", logging_opt.dir);
assert_eq!("debug", logging_opt.level.as_ref().unwrap());
}
@@ -424,7 +427,7 @@ mod tests {
[logging]
level = "debug"
dir = "/tmp/greptimedb/test/logs"
dir = "./greptimedb_data/test/logs"
"#;
write!(file, "{}", toml_str).unwrap();

View File

@@ -42,6 +42,7 @@ use common_meta::kv_backend::KvBackendRef;
use common_meta::node_manager::NodeManagerRef;
use common_meta::peer::Peer;
use common_meta::region_keeper::MemoryRegionKeeper;
use common_meta::region_registry::LeaderRegionRegistry;
use common_meta::sequence::SequenceBuilder;
use common_meta::wal_options_allocator::{build_wal_options_allocator, WalOptionsAllocatorRef};
use common_procedure::{ProcedureInfo, ProcedureManagerRef};
@@ -55,9 +56,9 @@ use datanode::datanode::{Datanode, DatanodeBuilder};
use datanode::region_server::RegionServer;
use file_engine::config::EngineConfig as FileEngineConfig;
use flow::{FlowConfig, FlowWorkerManager, FlownodeBuilder, FlownodeOptions, FrontendInvoker};
use frontend::frontend::FrontendOptions;
use frontend::frontend::{Frontend, FrontendOptions};
use frontend::instance::builder::FrontendBuilder;
use frontend::instance::{FrontendInstance, Instance as FeInstance, StandaloneDatanodeManager};
use frontend::instance::{Instance as FeInstance, StandaloneDatanodeManager};
use frontend::server::Services;
use frontend::service_config::{
InfluxdbOptions, JaegerOptions, MysqlOptions, OpentsdbOptions, PostgresOptions,
@@ -67,7 +68,7 @@ use meta_srv::metasrv::{FLOW_ID_SEQ, TABLE_ID_SEQ};
use mito2::config::MitoConfig;
use query::stats::StatementStatistics;
use serde::{Deserialize, Serialize};
use servers::export_metrics::ExportMetricsOption;
use servers::export_metrics::{ExportMetricsOption, ExportMetricsTask};
use servers::grpc::GrpcOptions;
use servers::http::HttpOptions;
use servers::tls::{TlsMode, TlsOption};
@@ -76,15 +77,9 @@ use snafu::ResultExt;
use tokio::sync::{broadcast, RwLock};
use tracing_appender::non_blocking::WorkerGuard;
use crate::error::{
BuildCacheRegistrySnafu, BuildWalOptionsAllocatorSnafu, CreateDirSnafu, IllegalConfigSnafu,
InitDdlManagerSnafu, InitMetadataSnafu, InitTimezoneSnafu, LoadLayeredConfigSnafu, OtherSnafu,
Result, ShutdownDatanodeSnafu, ShutdownFlownodeSnafu, ShutdownFrontendSnafu,
StartDatanodeSnafu, StartFlownodeSnafu, StartFrontendSnafu, StartProcedureManagerSnafu,
StartWalOptionsAllocatorSnafu, StopProcedureManagerSnafu,
};
use crate::error::Result;
use crate::options::{GlobalOptions, GreptimeOptions};
use crate::{log_versions, App};
use crate::{error, log_versions, App};
pub const APP_NAME: &str = "greptime-standalone";
@@ -132,7 +127,6 @@ impl SubCommand {
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
#[serde(default)]
pub struct StandaloneOptions {
pub mode: Mode,
pub enable_telemetry: bool,
pub default_timezone: Option<String>,
pub http: HttpOptions,
@@ -162,7 +156,6 @@ pub struct StandaloneOptions {
impl Default for StandaloneOptions {
fn default() -> Self {
Self {
mode: Mode::Standalone,
enable_telemetry: true,
default_timezone: None,
http: HttpOptions::default(),
@@ -243,7 +236,6 @@ impl StandaloneOptions {
grpc: cloned_opts.grpc,
init_regions_in_background: cloned_opts.init_regions_in_background,
init_regions_parallelism: cloned_opts.init_regions_parallelism,
mode: Mode::Standalone,
..Default::default()
}
}
@@ -251,13 +243,12 @@ impl StandaloneOptions {
pub struct Instance {
datanode: Datanode,
frontend: FeInstance,
frontend: Frontend,
// TODO(discord9): wrapped it in flownode instance instead
flow_worker_manager: Arc<FlowWorkerManager>,
flow_shutdown: broadcast::Sender<()>,
procedure_manager: ProcedureManagerRef,
wal_options_allocator: WalOptionsAllocatorRef,
// Keep the logging guard to prevent the worker from being dropped.
_guard: Vec<WorkerGuard>,
}
@@ -281,21 +272,26 @@ impl App for Instance {
self.procedure_manager
.start()
.await
.context(StartProcedureManagerSnafu)?;
.context(error::StartProcedureManagerSnafu)?;
self.wal_options_allocator
.start()
.await
.context(StartWalOptionsAllocatorSnafu)?;
.context(error::StartWalOptionsAllocatorSnafu)?;
plugins::start_frontend_plugins(self.frontend.plugins().clone())
plugins::start_frontend_plugins(self.frontend.instance.plugins().clone())
.await
.context(StartFrontendSnafu)?;
.context(error::StartFrontendSnafu)?;
self.frontend
.start()
.await
.context(error::StartFrontendSnafu)?;
self.frontend.start().await.context(StartFrontendSnafu)?;
self.flow_worker_manager
.clone()
.run_background(Some(self.flow_shutdown.subscribe()));
Ok(())
}
@@ -303,17 +299,18 @@ impl App for Instance {
self.frontend
.shutdown()
.await
.context(ShutdownFrontendSnafu)?;
.context(error::ShutdownFrontendSnafu)?;
self.procedure_manager
.stop()
.await
.context(StopProcedureManagerSnafu)?;
.context(error::StopProcedureManagerSnafu)?;
self.datanode
.shutdown()
.await
.context(ShutdownDatanodeSnafu)?;
.context(error::ShutdownDatanodeSnafu)?;
self.flow_shutdown
.send(())
.map_err(|_e| {
@@ -322,7 +319,8 @@ impl App for Instance {
}
.build()
})
.context(ShutdownFlownodeSnafu)?;
.context(error::ShutdownFlownodeSnafu)?;
info!("Datanode instance stopped.");
Ok(())
@@ -368,7 +366,7 @@ impl StartCommand {
self.config_file.as_deref(),
self.env_prefix.as_ref(),
)
.context(LoadLayeredConfigSnafu)?;
.context(error::LoadLayeredConfigSnafu)?;
self.merge_with_cli_options(global_options, &mut opts.component)?;
@@ -381,9 +379,6 @@ impl StartCommand {
global_options: &GlobalOptions,
opts: &mut StandaloneOptions,
) -> Result<()> {
// Should always be standalone mode.
opts.mode = Mode::Standalone;
if let Some(dir) = &global_options.log_dir {
opts.logging.dir.clone_from(dir);
}
@@ -415,7 +410,7 @@ impl StartCommand {
// frontend grpc addr conflict with datanode default grpc addr
let datanode_grpc_addr = DatanodeOptions::default().grpc.bind_addr;
if addr.eq(&datanode_grpc_addr) {
return IllegalConfigSnafu {
return error::IllegalConfigSnafu {
msg: format!(
"gRPC listen address conflicts with datanode reserved gRPC addr: {datanode_grpc_addr}",
),
@@ -474,18 +469,19 @@ impl StartCommand {
plugins::setup_frontend_plugins(&mut plugins, &plugin_opts, &fe_opts)
.await
.context(StartFrontendSnafu)?;
.context(error::StartFrontendSnafu)?;
plugins::setup_datanode_plugins(&mut plugins, &plugin_opts, &dn_opts)
.await
.context(StartDatanodeSnafu)?;
.context(error::StartDatanodeSnafu)?;
set_default_timezone(fe_opts.default_timezone.as_deref()).context(InitTimezoneSnafu)?;
set_default_timezone(fe_opts.default_timezone.as_deref())
.context(error::InitTimezoneSnafu)?;
let data_home = &dn_opts.storage.data_home;
// Ensure the data_home directory exists.
fs::create_dir_all(path::Path::new(data_home))
.context(CreateDirSnafu { dir: data_home })?;
.context(error::CreateDirSnafu { dir: data_home })?;
let metadata_dir = metadata_store_dir(data_home);
let (kv_backend, procedure_manager) = FeInstance::try_build_standalone_components(
@@ -494,7 +490,7 @@ impl StartCommand {
opts.procedure,
)
.await
.context(StartFrontendSnafu)?;
.context(error::StartFrontendSnafu)?;
// Builds cache registry
let layered_cache_builder = LayeredCacheRegistryBuilder::default();
@@ -503,16 +499,16 @@ impl StartCommand {
with_default_composite_cache_registry(
layered_cache_builder.add_cache_registry(fundamental_cache_registry),
)
.context(BuildCacheRegistrySnafu)?
.context(error::BuildCacheRegistrySnafu)?
.build(),
);
let datanode = DatanodeBuilder::new(dn_opts, plugins.clone())
let datanode = DatanodeBuilder::new(dn_opts, plugins.clone(), Mode::Standalone)
.with_kv_backend(kv_backend.clone())
.with_cache_registry(layered_cache_registry.clone())
.build()
.await
.context(StartDatanodeSnafu)?;
.context(error::StartDatanodeSnafu)?;
let information_extension = Arc::new(StandaloneInformationExtension::new(
datanode.region_server(),
@@ -545,7 +541,7 @@ impl StartCommand {
.build()
.await
.map_err(BoxedError::new)
.context(OtherSnafu)?,
.context(error::OtherSnafu)?,
);
// set the ref to query for the local flow state
@@ -576,7 +572,7 @@ impl StartCommand {
let kafka_options = opts.wal.clone().into();
let wal_options_allocator = build_wal_options_allocator(&kafka_options, kv_backend.clone())
.await
.context(BuildWalOptionsAllocatorSnafu)?;
.context(error::BuildWalOptionsAllocatorSnafu)?;
let wal_options_allocator = Arc::new(wal_options_allocator);
let table_meta_allocator = Arc::new(TableMetadataAllocator::new(
table_id_sequence,
@@ -597,8 +593,8 @@ impl StartCommand {
)
.await?;
let mut frontend = FrontendBuilder::new(
fe_opts,
let fe_instance = FrontendBuilder::new(
fe_opts.clone(),
kv_backend.clone(),
layered_cache_registry.clone(),
catalog_manager.clone(),
@@ -609,7 +605,8 @@ impl StartCommand {
.with_plugin(plugins.clone())
.try_build()
.await
.context(StartFrontendSnafu)?;
.context(error::StartFrontendSnafu)?;
let fe_instance = Arc::new(fe_instance);
let flow_worker_manager = flownode.flow_worker_manager();
// flow server need to be able to use frontend to write insert requests back
@@ -622,18 +619,25 @@ impl StartCommand {
node_manager,
)
.await
.context(StartFlownodeSnafu)?;
.context(error::StartFlownodeSnafu)?;
flow_worker_manager.set_frontend_invoker(invoker).await;
let (tx, _rx) = broadcast::channel(1);
let servers = Services::new(opts, Arc::new(frontend.clone()), plugins)
let export_metrics_task = ExportMetricsTask::try_new(&opts.export_metrics, Some(&plugins))
.context(error::ServersSnafu)?;
let servers = Services::new(opts, fe_instance.clone(), plugins)
.build()
.await
.context(StartFrontendSnafu)?;
frontend
.build_servers(servers)
.context(StartFrontendSnafu)?;
.context(error::StartFrontendSnafu)?;
let frontend = Frontend {
instance: fe_instance,
servers,
heartbeat_task: None,
export_metrics_task,
};
Ok(Instance {
datanode,
@@ -661,6 +665,7 @@ impl StartCommand {
node_manager,
cache_invalidator,
memory_region_keeper: Arc::new(MemoryRegionKeeper::default()),
leader_region_registry: Arc::new(LeaderRegionRegistry::default()),
table_metadata_manager,
table_metadata_allocator,
flow_metadata_manager,
@@ -670,7 +675,7 @@ impl StartCommand {
procedure_manager,
true,
)
.context(InitDdlManagerSnafu)?,
.context(error::InitDdlManagerSnafu)?,
);
Ok(procedure_executor)
@@ -684,7 +689,7 @@ impl StartCommand {
table_metadata_manager
.init()
.await
.context(InitMetadataSnafu)?;
.context(error::InitMetadataSnafu)?;
Ok(table_metadata_manager)
}
@@ -778,6 +783,7 @@ impl InformationExtension for StandaloneInformationExtension {
manifest_size: region_stat.manifest_size,
sst_size: region_stat.sst_size,
index_size: region_stat.index_size,
region_manifest: region_stat.manifest.into(),
}
})
.collect::<Vec<_>>();
@@ -852,7 +858,7 @@ mod tests {
[wal]
provider = "raft_engine"
dir = "/tmp/greptimedb/test/wal"
dir = "./greptimedb_data/test/wal"
file_size = "1GB"
purge_threshold = "50GB"
purge_interval = "10m"
@@ -860,7 +866,7 @@ mod tests {
sync_write = false
[storage]
data_home = "/tmp/greptimedb/"
data_home = "./greptimedb_data/"
type = "File"
[[storage.providers]]
@@ -892,7 +898,7 @@ mod tests {
[logging]
level = "debug"
dir = "/tmp/greptimedb/test/logs"
dir = "./greptimedb_data/test/logs"
"#;
write!(file, "{}", toml_str).unwrap();
let cmd = StartCommand {
@@ -922,7 +928,10 @@ mod tests {
let DatanodeWalConfig::RaftEngine(raft_engine_config) = dn_opts.wal else {
unreachable!()
};
assert_eq!("/tmp/greptimedb/test/wal", raft_engine_config.dir.unwrap());
assert_eq!(
"./greptimedb_data/test/wal",
raft_engine_config.dir.unwrap()
);
assert!(matches!(
&dn_opts.storage.store,
@@ -946,7 +955,7 @@ mod tests {
}
assert_eq!("debug", logging_opts.level.as_ref().unwrap());
assert_eq!("/tmp/greptimedb/test/logs".to_string(), logging_opts.dir);
assert_eq!("./greptimedb_data/test/logs".to_string(), logging_opts.dir);
}
#[test]
@@ -958,7 +967,7 @@ mod tests {
let opts = cmd
.load_options(&GlobalOptions {
log_dir: Some("/tmp/greptimedb/test/logs".to_string()),
log_dir: Some("./greptimedb_data/test/logs".to_string()),
log_level: Some("debug".to_string()),
#[cfg(feature = "tokio-console")]
@@ -967,7 +976,7 @@ mod tests {
.unwrap()
.component;
assert_eq!("/tmp/greptimedb/test/logs", opts.logging.dir);
assert_eq!("./greptimedb_data/test/logs", opts.logging.dir);
assert_eq!("debug", opts.logging.level.unwrap());
}
@@ -1051,7 +1060,6 @@ mod tests {
let options =
StandaloneOptions::load_layered_options(None, "GREPTIMEDB_STANDALONE").unwrap();
let default_options = StandaloneOptions::default();
assert_eq!(options.mode, default_options.mode);
assert_eq!(options.enable_telemetry, default_options.enable_telemetry);
assert_eq!(options.http, default_options.http);
assert_eq!(options.grpc, default_options.grpc);

View File

@@ -56,13 +56,13 @@ fn test_load_datanode_example_config() {
metadata_cache_tti: Duration::from_secs(300),
}),
wal: DatanodeWalConfig::RaftEngine(RaftEngineConfig {
dir: Some("/tmp/greptimedb/wal".to_string()),
dir: Some("./greptimedb_data/wal".to_string()),
sync_period: Some(Duration::from_secs(10)),
recovery_parallelism: 2,
..Default::default()
}),
storage: StorageConfig {
data_home: "/tmp/greptimedb/".to_string(),
data_home: "./greptimedb_data/".to_string(),
..Default::default()
},
region_engine: vec![
@@ -159,10 +159,10 @@ fn test_load_metasrv_example_config() {
let expected = GreptimeOptions::<MetasrvOptions> {
component: MetasrvOptions {
selector: SelectorType::default(),
data_home: "/tmp/metasrv/".to_string(),
data_home: "./greptimedb_data/metasrv/".to_string(),
server_addr: "127.0.0.1:3002".to_string(),
logging: LoggingOptions {
dir: "/tmp/greptimedb/logs".to_string(),
dir: "./greptimedb_data/logs".to_string(),
level: Some("info".to_string()),
otlp_endpoint: Some(DEFAULT_OTLP_ENDPOINT.to_string()),
tracing_sample_ratio: Some(Default::default()),
@@ -202,7 +202,7 @@ fn test_load_standalone_example_config() {
component: StandaloneOptions {
default_timezone: Some("UTC".to_string()),
wal: DatanodeWalConfig::RaftEngine(RaftEngineConfig {
dir: Some("/tmp/greptimedb/wal".to_string()),
dir: Some("./greptimedb_data/wal".to_string()),
sync_period: Some(Duration::from_secs(10)),
recovery_parallelism: 2,
..Default::default()
@@ -219,7 +219,7 @@ fn test_load_standalone_example_config() {
}),
],
storage: StorageConfig {
data_home: "/tmp/greptimedb/".to_string(),
data_home: "./greptimedb_data/".to_string(),
..Default::default()
},
logging: LoggingOptions {

View File

@@ -135,5 +135,6 @@ pub fn is_readonly_schema(schema: &str) -> bool {
pub const TRACE_ID_COLUMN: &str = "trace_id";
pub const SPAN_ID_COLUMN: &str = "span_id";
pub const SPAN_NAME_COLUMN: &str = "span_name";
pub const SERVICE_NAME_COLUMN: &str = "service_name";
pub const PARENT_SPAN_ID_COLUMN: &str = "parent_span_id";
// ---- End of special table and fields ----

View File

@@ -161,7 +161,7 @@ mod tests {
[wal]
provider = "raft_engine"
dir = "/tmp/greptimedb/wal"
dir = "./greptimedb_data/wal"
file_size = "1GB"
purge_threshold = "50GB"
purge_interval = "10m"
@@ -170,7 +170,7 @@ mod tests {
[logging]
level = "debug"
dir = "/tmp/greptimedb/test/logs"
dir = "./greptimedb_data/test/logs"
"#;
write!(file, "{}", toml_str).unwrap();
@@ -246,7 +246,7 @@ mod tests {
let DatanodeWalConfig::RaftEngine(raft_engine_config) = opts.wal else {
unreachable!()
};
assert_eq!(raft_engine_config.dir.unwrap(), "/tmp/greptimedb/wal");
assert_eq!(raft_engine_config.dir.unwrap(), "./greptimedb_data/wal");
// Should be default values.
assert_eq!(opts.node_id, None);

View File

@@ -17,6 +17,7 @@ api.workspace = true
arc-swap = "1.0"
async-trait.workspace = true
bincode = "1.3"
chrono.workspace = true
common-base.workspace = true
common-catalog.workspace = true
common-error.workspace = true
@@ -38,6 +39,7 @@ geohash = { version = "0.13", optional = true }
h3o = { version = "0.6", optional = true }
hyperloglogplus = "0.4"
jsonb.workspace = true
memchr = "2.7"
nalgebra.workspace = true
num = "0.4"
num-traits = "0.2"

View File

@@ -12,15 +12,19 @@
// See the License for the specific language governing permissions and
// limitations under the License.
mod add_region_follower;
mod flush_compact_region;
mod flush_compact_table;
mod migrate_region;
mod remove_region_follower;
use std::sync::Arc;
use add_region_follower::AddRegionFollowerFunction;
use flush_compact_region::{CompactRegionFunction, FlushRegionFunction};
use flush_compact_table::{CompactTableFunction, FlushTableFunction};
use migrate_region::MigrateRegionFunction;
use remove_region_follower::RemoveRegionFollowerFunction;
use crate::flush_flow::FlushFlowFunction;
use crate::function_registry::FunctionRegistry;
@@ -32,6 +36,8 @@ impl AdminFunction {
/// Register all table functions to [`FunctionRegistry`].
pub fn register(registry: &FunctionRegistry) {
registry.register_async(Arc::new(MigrateRegionFunction));
registry.register_async(Arc::new(AddRegionFollowerFunction));
registry.register_async(Arc::new(RemoveRegionFollowerFunction));
registry.register_async(Arc::new(FlushRegionFunction));
registry.register_async(Arc::new(CompactRegionFunction));
registry.register_async(Arc::new(FlushTableFunction));

View File

@@ -0,0 +1,129 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use common_macro::admin_fn;
use common_meta::rpc::procedure::AddRegionFollowerRequest;
use common_query::error::{
InvalidFuncArgsSnafu, MissingProcedureServiceHandlerSnafu, Result,
UnsupportedInputDataTypeSnafu,
};
use common_query::prelude::{Signature, TypeSignature, Volatility};
use datatypes::prelude::ConcreteDataType;
use datatypes::value::{Value, ValueRef};
use session::context::QueryContextRef;
use snafu::ensure;
use crate::handlers::ProcedureServiceHandlerRef;
use crate::helper::cast_u64;
/// A function to add a follower to a region.
/// Only available in cluster mode.
///
/// - `add_region_follower(region_id, peer_id)`.
///
/// The parameters:
/// - `region_id`: the region id
/// - `peer_id`: the peer id
#[admin_fn(
name = AddRegionFollowerFunction,
display_name = add_region_follower,
sig_fn = signature,
ret = uint64
)]
pub(crate) async fn add_region_follower(
procedure_service_handler: &ProcedureServiceHandlerRef,
_ctx: &QueryContextRef,
params: &[ValueRef<'_>],
) -> Result<Value> {
ensure!(
params.len() == 2,
InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not correct, expect exactly 2, have: {}",
params.len()
),
}
);
let Some(region_id) = cast_u64(&params[0])? else {
return UnsupportedInputDataTypeSnafu {
function: "add_region_follower",
datatypes: params.iter().map(|v| v.data_type()).collect::<Vec<_>>(),
}
.fail();
};
let Some(peer_id) = cast_u64(&params[1])? else {
return UnsupportedInputDataTypeSnafu {
function: "add_region_follower",
datatypes: params.iter().map(|v| v.data_type()).collect::<Vec<_>>(),
}
.fail();
};
procedure_service_handler
.add_region_follower(AddRegionFollowerRequest { region_id, peer_id })
.await?;
Ok(Value::from(0u64))
}
fn signature() -> Signature {
Signature::one_of(
vec![
// add_region_follower(region_id, peer)
TypeSignature::Uniform(2, ConcreteDataType::numerics()),
],
Volatility::Immutable,
)
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use common_query::prelude::TypeSignature;
use datatypes::vectors::{UInt64Vector, VectorRef};
use super::*;
use crate::function::{AsyncFunction, FunctionContext};
#[test]
fn test_add_region_follower_misc() {
let f = AddRegionFollowerFunction;
assert_eq!("add_region_follower", f.name());
assert_eq!(
ConcreteDataType::uint64_datatype(),
f.return_type(&[]).unwrap()
);
assert!(matches!(f.signature(),
Signature {
type_signature: TypeSignature::OneOf(sigs),
volatility: Volatility::Immutable
} if sigs.len() == 1));
}
#[tokio::test]
async fn test_add_region_follower() {
let f = AddRegionFollowerFunction;
let args = vec![1, 1];
let args = args
.into_iter()
.map(|arg| Arc::new(UInt64Vector::from_slice([arg])) as _)
.collect::<Vec<_>>();
let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
let expect: VectorRef = Arc::new(UInt64Vector::from_slice([0u64]));
assert_eq!(result, expect);
}
}

View File

@@ -0,0 +1,129 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use common_macro::admin_fn;
use common_meta::rpc::procedure::RemoveRegionFollowerRequest;
use common_query::error::{
InvalidFuncArgsSnafu, MissingProcedureServiceHandlerSnafu, Result,
UnsupportedInputDataTypeSnafu,
};
use common_query::prelude::{Signature, TypeSignature, Volatility};
use datatypes::prelude::ConcreteDataType;
use datatypes::value::{Value, ValueRef};
use session::context::QueryContextRef;
use snafu::ensure;
use crate::handlers::ProcedureServiceHandlerRef;
use crate::helper::cast_u64;
/// A function to remove a follower from a region.
//// Only available in cluster mode.
///
/// - `remove_region_follower(region_id, peer_id)`.
///
/// The parameters:
/// - `region_id`: the region id
/// - `peer_id`: the peer id
#[admin_fn(
name = RemoveRegionFollowerFunction,
display_name = remove_region_follower,
sig_fn = signature,
ret = uint64
)]
pub(crate) async fn remove_region_follower(
procedure_service_handler: &ProcedureServiceHandlerRef,
_ctx: &QueryContextRef,
params: &[ValueRef<'_>],
) -> Result<Value> {
ensure!(
params.len() == 2,
InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not correct, expect exactly 2, have: {}",
params.len()
),
}
);
let Some(region_id) = cast_u64(&params[0])? else {
return UnsupportedInputDataTypeSnafu {
function: "add_region_follower",
datatypes: params.iter().map(|v| v.data_type()).collect::<Vec<_>>(),
}
.fail();
};
let Some(peer_id) = cast_u64(&params[1])? else {
return UnsupportedInputDataTypeSnafu {
function: "add_region_follower",
datatypes: params.iter().map(|v| v.data_type()).collect::<Vec<_>>(),
}
.fail();
};
procedure_service_handler
.remove_region_follower(RemoveRegionFollowerRequest { region_id, peer_id })
.await?;
Ok(Value::from(0u64))
}
fn signature() -> Signature {
Signature::one_of(
vec![
// remove_region_follower(region_id, peer_id)
TypeSignature::Uniform(2, ConcreteDataType::numerics()),
],
Volatility::Immutable,
)
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use common_query::prelude::TypeSignature;
use datatypes::vectors::{UInt64Vector, VectorRef};
use super::*;
use crate::function::{AsyncFunction, FunctionContext};
#[test]
fn test_remove_region_follower_misc() {
let f = RemoveRegionFollowerFunction;
assert_eq!("remove_region_follower", f.name());
assert_eq!(
ConcreteDataType::uint64_datatype(),
f.return_type(&[]).unwrap()
);
assert!(matches!(f.signature(),
Signature {
type_signature: TypeSignature::OneOf(sigs),
volatility: Volatility::Immutable
} if sigs.len() == 1));
}
#[tokio::test]
async fn test_remove_region_follower() {
let f = RemoveRegionFollowerFunction;
let args = vec![1, 1];
let args = args
.into_iter()
.map(|arg| Arc::new(UInt64Vector::from_slice([arg])) as _)
.collect::<Vec<_>>();
let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
let expect: VectorRef = Arc::new(UInt64Vector::from_slice([0u64]));
assert_eq!(result, expect);
}
}

View File

@@ -27,6 +27,7 @@ use crate::scalars::hll_count::HllCalcFunction;
use crate::scalars::ip::IpFunctions;
use crate::scalars::json::JsonFunction;
use crate::scalars::matches::MatchesFunction;
use crate::scalars::matches_term::MatchesTermFunction;
use crate::scalars::math::MathFunction;
use crate::scalars::timestamp::TimestampFunction;
use crate::scalars::uddsketch_calc::UddSketchCalcFunction;
@@ -116,6 +117,7 @@ pub static FUNCTION_REGISTRY: Lazy<Arc<FunctionRegistry>> = Lazy::new(|| {
// Full text search function
MatchesFunction::register(&function_registry);
MatchesTermFunction::register(&function_registry);
// System and administration functions
SystemFunction::register(&function_registry);

View File

@@ -16,7 +16,10 @@ use std::sync::Arc;
use async_trait::async_trait;
use common_base::AffectedRows;
use common_meta::rpc::procedure::{MigrateRegionRequest, ProcedureStateResponse};
use common_meta::rpc::procedure::{
AddRegionFollowerRequest, MigrateRegionRequest, ProcedureStateResponse,
RemoveRegionFollowerRequest,
};
use common_query::error::Result;
use common_query::Output;
use session::context::QueryContextRef;
@@ -63,6 +66,12 @@ pub trait ProcedureServiceHandler: Send + Sync {
/// Query the procedure' state by its id
async fn query_procedure_state(&self, pid: &str) -> Result<ProcedureStateResponse>;
/// Add a region follower to a region.
async fn add_region_follower(&self, request: AddRegionFollowerRequest) -> Result<()>;
/// Remove a region follower from a region.
async fn remove_region_follower(&self, request: RemoveRegionFollowerRequest) -> Result<()>;
}
/// This flow service handler is only use for flush flow for now.

View File

@@ -19,6 +19,7 @@ pub mod expression;
pub mod geo;
pub mod json;
pub mod matches;
pub mod matches_term;
pub mod math;
pub mod vector;

View File

@@ -43,7 +43,6 @@ impl Function for DateFormatFunction {
helper::one_of_sigs2(
vec![
ConcreteDataType::date_datatype(),
ConcreteDataType::datetime_datatype(),
ConcreteDataType::timestamp_second_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(),
ConcreteDataType::timestamp_microsecond_datatype(),
@@ -105,22 +104,6 @@ impl Function for DateFormatFunction {
results.push(result.as_deref());
}
}
ConcreteDataType::DateTime(_) => {
for i in 0..size {
let datetime = left.get(i).as_datetime();
let format = formats.get(i).as_string();
let result = match (datetime, format) {
(Some(datetime), Some(fmt)) => datetime
.as_formatted_string(&fmt, Some(&func_ctx.query_ctx.timezone()))
.map_err(BoxedError::new)
.context(error::ExecuteSnafu)?,
_ => None,
};
results.push(result.as_deref());
}
}
_ => {
return UnsupportedInputDataTypeSnafu {
function: NAME,
@@ -147,7 +130,7 @@ mod tests {
use common_query::prelude::{TypeSignature, Volatility};
use datatypes::prelude::{ConcreteDataType, ScalarVector};
use datatypes::value::Value;
use datatypes::vectors::{DateTimeVector, DateVector, StringVector, TimestampSecondVector};
use datatypes::vectors::{DateVector, StringVector, TimestampSecondVector};
use super::{DateFormatFunction, *};
@@ -169,16 +152,11 @@ mod tests {
ConcreteDataType::string_datatype(),
f.return_type(&[ConcreteDataType::date_datatype()]).unwrap()
);
assert_eq!(
ConcreteDataType::string_datatype(),
f.return_type(&[ConcreteDataType::datetime_datatype()])
.unwrap()
);
assert!(matches!(f.signature(),
Signature {
type_signature: TypeSignature::OneOf(sigs),
volatility: Volatility::Immutable
} if sigs.len() == 6));
} if sigs.len() == 5));
}
#[test]
@@ -262,45 +240,4 @@ mod tests {
}
}
}
#[test]
fn test_datetime_date_format() {
let f = DateFormatFunction;
let dates = vec![Some(123), None, Some(42), None];
let formats = vec![
"%Y-%m-%d %T.%3f",
"%Y-%m-%d %T.%3f",
"%Y-%m-%d %T.%3f",
"%Y-%m-%d %T.%3f",
];
let results = [
Some("1970-01-01 00:00:00.123"),
None,
Some("1970-01-01 00:00:00.042"),
None,
];
let date_vector = DateTimeVector::from(dates.clone());
let interval_vector = StringVector::from_vec(formats);
let args: Vec<VectorRef> = vec![Arc::new(date_vector), Arc::new(interval_vector)];
let vector = f.eval(&FunctionContext::default(), &args).unwrap();
assert_eq!(4, vector.len());
for (i, _t) in dates.iter().enumerate() {
let v = vector.get(i);
let result = results.get(i).unwrap();
if result.is_none() {
assert_eq!(Value::Null, v);
continue;
}
match v {
Value::String(s) => {
assert_eq!(s.as_utf8(), result.unwrap());
}
_ => unreachable!(),
}
}
}
}

View File

@@ -118,11 +118,6 @@ mod tests {
ConcreteDataType::date_datatype(),
f.return_type(&[ConcreteDataType::date_datatype()]).unwrap()
);
assert_eq!(
ConcreteDataType::datetime_datatype(),
f.return_type(&[ConcreteDataType::datetime_datatype()])
.unwrap()
);
assert!(
matches!(f.signature(),
Signature {

View File

@@ -0,0 +1,375 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use std::{fmt, iter};
use common_query::error::{InvalidFuncArgsSnafu, Result};
use common_query::prelude::Volatility;
use datatypes::prelude::ConcreteDataType;
use datatypes::scalars::ScalarVectorBuilder;
use datatypes::vectors::{BooleanVector, BooleanVectorBuilder, MutableVector, VectorRef};
use memchr::memmem;
use snafu::ensure;
use crate::function::{Function, FunctionContext};
use crate::function_registry::FunctionRegistry;
/// Exact term/phrase matching function for text columns.
///
/// This function checks if a text column contains exact term/phrase matches
/// with non-alphanumeric boundaries. Designed for:
/// - Whole-word matching (e.g. "cat" in "cat!" but not in "category")
/// - Phrase matching (e.g. "hello world" in "note:hello world!")
///
/// # Signature
/// `matches_term(text: String, term: String) -> Boolean`
///
/// # Arguments
/// * `text` - String column to search
/// * `term` - Search term/phrase
///
/// # Returns
/// BooleanVector where each element indicates if the corresponding text
/// contains an exact match of the term, following these rules:
/// 1. Exact substring match found (case-sensitive)
/// 2. Match boundaries are either:
/// - Start/end of text
/// - Any non-alphanumeric character (including spaces, hyphens, punctuation, etc.)
///
/// # Examples
/// ```
/// -- SQL examples --
/// -- Match phrase with space --
/// SELECT matches_term(column, 'hello world') FROM table;
/// -- Text: "warning:hello world!" => true
/// -- Text: "hello-world" => false (hyphen instead of space)
/// -- Text: "hello world2023" => false (ending with numbers)
///
/// -- Match multiple words with boundaries --
/// SELECT matches_term(column, 'critical error') FROM logs;
/// -- Match in: "ERROR:critical error!"
/// -- No match: "critical_errors"
///
/// -- Empty string handling --
/// SELECT matches_term(column, '') FROM table;
/// -- Text: "" => true
/// -- Text: "any" => false
///
/// -- Case sensitivity --
/// SELECT matches_term(column, 'Cat') FROM table;
/// -- Text: "Cat" => true
/// -- Text: "cat" => false
/// ```
pub struct MatchesTermFunction;
impl MatchesTermFunction {
pub fn register(registry: &FunctionRegistry) {
registry.register(Arc::new(MatchesTermFunction));
}
}
impl fmt::Display for MatchesTermFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "MATCHES_TERM")
}
}
impl Function for MatchesTermFunction {
fn name(&self) -> &str {
"matches_term"
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::boolean_datatype())
}
fn signature(&self) -> common_query::prelude::Signature {
common_query::prelude::Signature::exact(
vec![
ConcreteDataType::string_datatype(),
ConcreteDataType::string_datatype(),
],
Volatility::Immutable,
)
}
fn eval(&self, _func_ctx: &FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure!(
columns.len() == 2,
InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not correct, expect exactly 2, have: {}",
columns.len()
),
}
);
let text_column = &columns[0];
if text_column.is_empty() {
return Ok(Arc::new(BooleanVector::from(Vec::<bool>::with_capacity(0))));
}
let term_column = &columns[1];
let compiled_finder = if term_column.is_const() {
let term = term_column.get_ref(0).as_string().unwrap();
match term {
None => {
return Ok(Arc::new(BooleanVector::from_iter(
iter::repeat(None).take(text_column.len()),
)));
}
Some(term) => Some(MatchesTermFinder::new(term)),
}
} else {
None
};
let len = text_column.len();
let mut result = BooleanVectorBuilder::with_capacity(len);
for i in 0..len {
let text = text_column.get_ref(i).as_string().unwrap();
let Some(text) = text else {
result.push_null();
continue;
};
let contains = match &compiled_finder {
Some(finder) => finder.find(text),
None => {
let term = match term_column.get_ref(i).as_string().unwrap() {
None => {
result.push_null();
continue;
}
Some(term) => term,
};
MatchesTermFinder::new(term).find(text)
}
};
result.push(Some(contains));
}
Ok(result.to_vector())
}
}
/// A compiled finder for `matches_term` function that holds the compiled term
/// and its metadata for efficient matching.
///
/// A term is considered matched when:
/// 1. The exact sequence appears in the text
/// 2. It is either:
/// - At the start/end of text with adjacent non-alphanumeric character
/// - Surrounded by non-alphanumeric characters
///
/// # Examples
/// ```
/// let finder = MatchesTermFinder::new("cat");
/// assert!(finder.find("cat!")); // Term at end with punctuation
/// assert!(finder.find("dog,cat")); // Term preceded by comma
/// assert!(!finder.find("category")); // Partial match rejected
///
/// let finder = MatchesTermFinder::new("world");
/// assert!(finder.find("hello-world")); // Hyphen boundary
/// ```
#[derive(Clone, Debug)]
pub struct MatchesTermFinder {
finder: memmem::Finder<'static>,
term: String,
starts_with_non_alnum: bool,
ends_with_non_alnum: bool,
}
impl MatchesTermFinder {
/// Create a new `MatchesTermFinder` for the given term.
pub fn new(term: &str) -> Self {
let starts_with_non_alnum = term.chars().next().is_some_and(|c| !c.is_alphanumeric());
let ends_with_non_alnum = term.chars().last().is_some_and(|c| !c.is_alphanumeric());
Self {
finder: memmem::Finder::new(term).into_owned(),
term: term.to_string(),
starts_with_non_alnum,
ends_with_non_alnum,
}
}
/// Find the term in the text.
pub fn find(&self, text: &str) -> bool {
if self.term.is_empty() {
return text.is_empty();
}
if text.len() < self.term.len() {
return false;
}
let mut pos = 0;
while let Some(found_pos) = self.finder.find(text[pos..].as_bytes()) {
let actual_pos = pos + found_pos;
let prev_ok = self.starts_with_non_alnum
|| text[..actual_pos]
.chars()
.last()
.map(|c| !c.is_alphanumeric())
.unwrap_or(true);
if prev_ok {
let next_pos = actual_pos + self.finder.needle().len();
let next_ok = self.ends_with_non_alnum
|| text[next_pos..]
.chars()
.next()
.map(|c| !c.is_alphanumeric())
.unwrap_or(true);
if next_ok {
return true;
}
}
if let Some(next_char) = text[actual_pos..].chars().next() {
pos = actual_pos + next_char.len_utf8();
} else {
break;
}
}
false
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn matches_term_example() {
let finder = MatchesTermFinder::new("hello world");
assert!(finder.find("warning:hello world!"));
assert!(!finder.find("hello-world"));
assert!(!finder.find("hello world2023"));
let finder = MatchesTermFinder::new("critical error");
assert!(finder.find("ERROR:critical error!"));
assert!(!finder.find("critical_errors"));
let finder = MatchesTermFinder::new("");
assert!(finder.find(""));
assert!(!finder.find("any"));
let finder = MatchesTermFinder::new("Cat");
assert!(finder.find("Cat"));
assert!(!finder.find("cat"));
}
#[test]
fn matches_term_with_punctuation() {
assert!(MatchesTermFinder::new("cat").find("cat!"));
assert!(MatchesTermFinder::new("dog").find("!dog"));
}
#[test]
fn matches_phrase_with_boundaries() {
assert!(MatchesTermFinder::new("hello-world").find("hello-world"));
assert!(MatchesTermFinder::new("'foo bar'").find("test: 'foo bar'"));
}
#[test]
fn matches_at_text_boundaries() {
assert!(MatchesTermFinder::new("start").find("start..."));
assert!(MatchesTermFinder::new("end").find("...end"));
}
// Negative cases
#[test]
fn rejects_partial_matches() {
assert!(!MatchesTermFinder::new("cat").find("category"));
assert!(!MatchesTermFinder::new("boot").find("rebooted"));
}
#[test]
fn rejects_missing_term() {
assert!(!MatchesTermFinder::new("foo").find("hello world"));
}
// Edge cases
#[test]
fn handles_empty_inputs() {
assert!(!MatchesTermFinder::new("test").find(""));
assert!(!MatchesTermFinder::new("").find("text"));
}
#[test]
fn different_unicode_boundaries() {
assert!(MatchesTermFinder::new("café").find("café>"));
assert!(!MatchesTermFinder::new("café").find("口café>"));
assert!(!MatchesTermFinder::new("café").find("café口"));
assert!(!MatchesTermFinder::new("café").find("cafémore"));
assert!(MatchesTermFinder::new("русский").find("русский!"));
assert!(MatchesTermFinder::new("русский").find("русский!"));
}
#[test]
fn case_sensitive_matching() {
assert!(!MatchesTermFinder::new("cat").find("Cat"));
assert!(MatchesTermFinder::new("CaT").find("CaT"));
}
#[test]
fn numbers_in_term() {
assert!(MatchesTermFinder::new("v1.0").find("v1.0!"));
assert!(!MatchesTermFinder::new("v1.0").find("v1.0a"));
}
#[test]
fn adjacent_alphanumeric_fails() {
assert!(!MatchesTermFinder::new("cat").find("cat5"));
assert!(!MatchesTermFinder::new("dog").find("dogcat"));
}
#[test]
fn empty_term_text() {
assert!(!MatchesTermFinder::new("").find("text"));
assert!(MatchesTermFinder::new("").find(""));
assert!(!MatchesTermFinder::new("text").find(""));
}
#[test]
fn leading_non_alphanumeric() {
assert!(MatchesTermFinder::new("/cat").find("dog/cat"));
assert!(MatchesTermFinder::new("dog/").find("dog/cat"));
assert!(MatchesTermFinder::new("dog/cat").find("dog/cat"));
}
#[test]
fn continues_searching_after_boundary_mismatch() {
assert!(!MatchesTermFinder::new("log").find("bloglog!"));
assert!(MatchesTermFinder::new("log").find("bloglog log"));
assert!(MatchesTermFinder::new("log").find("alogblog_log!"));
assert!(MatchesTermFinder::new("error").find("errorlog_error_case"));
assert!(MatchesTermFinder::new("test").find("atestbtestc_test_end"));
assert!(MatchesTermFinder::new("data").find("database_data_store"));
assert!(!MatchesTermFinder::new("data").find("database_datastore"));
assert!(MatchesTermFinder::new("log.txt").find("catalog.txt_log.txt!"));
assert!(!MatchesTermFinder::new("log.txt").find("catalog.txtlog.txt!"));
assert!(MatchesTermFinder::new("data-set").find("bigdata-set_data-set!"));
assert!(MatchesTermFinder::new("中文").find("这是中文测试,中文!"));
assert!(MatchesTermFinder::new("error").find("错误errorerror日志_error!"));
}
}

View File

@@ -23,7 +23,7 @@ use datatypes::arrow::array::AsArray;
use datatypes::arrow::compute::cast;
use datatypes::arrow::compute::kernels::zip;
use datatypes::arrow::datatypes::{
DataType as ArrowDataType, Date32Type, Date64Type, TimestampMicrosecondType,
DataType as ArrowDataType, Date32Type, TimeUnit, TimestampMicrosecondType,
TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
};
use datatypes::prelude::ConcreteDataType;
@@ -69,9 +69,8 @@ impl Function for GreatestFunction {
);
match &input_types[0] {
ConcreteDataType::String(_) => Ok(ConcreteDataType::datetime_datatype()),
ConcreteDataType::String(_) => Ok(ConcreteDataType::timestamp_millisecond_datatype()),
ConcreteDataType::Date(_) => Ok(ConcreteDataType::date_datatype()),
ConcreteDataType::DateTime(_) => Ok(ConcreteDataType::datetime_datatype()),
ConcreteDataType::Timestamp(ts_type) => Ok(ConcreteDataType::Timestamp(*ts_type)),
_ => UnsupportedInputDataTypeSnafu {
function: NAME,
@@ -87,7 +86,6 @@ impl Function for GreatestFunction {
vec![
ConcreteDataType::string_datatype(),
ConcreteDataType::date_datatype(),
ConcreteDataType::datetime_datatype(),
ConcreteDataType::timestamp_nanosecond_datatype(),
ConcreteDataType::timestamp_microsecond_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(),
@@ -109,20 +107,24 @@ impl Function for GreatestFunction {
);
match columns[0].data_type() {
ConcreteDataType::String(_) => {
// Treats string as `DateTime` type.
let column1 = cast(&columns[0].to_arrow_array(), &ArrowDataType::Date64)
.context(ArrowComputeSnafu)?;
let column1 = column1.as_primitive::<Date64Type>();
let column2 = cast(&columns[1].to_arrow_array(), &ArrowDataType::Date64)
.context(ArrowComputeSnafu)?;
let column2 = column2.as_primitive::<Date64Type>();
let column1 = cast(
&columns[0].to_arrow_array(),
&ArrowDataType::Timestamp(TimeUnit::Millisecond, None),
)
.context(ArrowComputeSnafu)?;
let column1 = column1.as_primitive::<TimestampMillisecondType>();
let column2 = cast(
&columns[1].to_arrow_array(),
&ArrowDataType::Timestamp(TimeUnit::Millisecond, None),
)
.context(ArrowComputeSnafu)?;
let column2 = column2.as_primitive::<TimestampMillisecondType>();
let boolean_array = gt(&column1, &column2).context(ArrowComputeSnafu)?;
let result =
zip::zip(&boolean_array, &column1, &column2).context(ArrowComputeSnafu)?;
Ok(Helper::try_into_vector(&result).context(error::FromArrowArraySnafu)?)
}
ConcreteDataType::Date(_) => gt_time_types!(Date32Type, columns),
ConcreteDataType::DateTime(_) => gt_time_types!(Date64Type, columns),
ConcreteDataType::Timestamp(ts_type) => match ts_type {
TimestampType::Second(_) => gt_time_types!(TimestampSecondType, columns),
TimestampType::Millisecond(_) => {
@@ -155,15 +157,15 @@ mod tests {
use std::sync::Arc;
use common_time::timestamp::TimeUnit;
use common_time::{Date, DateTime, Timestamp};
use common_time::{Date, Timestamp};
use datatypes::types::{
DateTimeType, DateType, TimestampMicrosecondType, TimestampMillisecondType,
TimestampNanosecondType, TimestampSecondType,
DateType, TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType,
TimestampSecondType,
};
use datatypes::value::Value;
use datatypes::vectors::{
DateTimeVector, DateVector, StringVector, TimestampMicrosecondVector,
TimestampMillisecondVector, TimestampNanosecondVector, TimestampSecondVector, Vector,
DateVector, StringVector, TimestampMicrosecondVector, TimestampMillisecondVector,
TimestampNanosecondVector, TimestampSecondVector, Vector,
};
use paste::paste;
@@ -178,7 +180,7 @@ mod tests {
ConcreteDataType::string_datatype()
])
.unwrap(),
ConcreteDataType::DateTime(DateTimeType)
ConcreteDataType::timestamp_millisecond_datatype()
);
let columns = vec![
Arc::new(StringVector::from(vec![
@@ -194,15 +196,18 @@ mod tests {
let result = function
.eval(&FunctionContext::default(), &columns)
.unwrap();
let result = result.as_any().downcast_ref::<DateTimeVector>().unwrap();
let result = result
.as_any()
.downcast_ref::<TimestampMillisecondVector>()
.unwrap();
assert_eq!(result.len(), 2);
assert_eq!(
result.get(0),
Value::DateTime(DateTime::from_str("2001-02-01 00:00:00", None).unwrap())
Value::Timestamp(Timestamp::from_str("2001-02-01 00:00:00", None).unwrap())
);
assert_eq!(
result.get(1),
Value::DateTime(DateTime::from_str("2012-12-23 00:00:00", None).unwrap())
Value::Timestamp(Timestamp::from_str("2012-12-23 00:00:00", None).unwrap())
);
}
@@ -245,30 +250,33 @@ mod tests {
assert_eq!(
function
.return_type(&[
ConcreteDataType::datetime_datatype(),
ConcreteDataType::datetime_datatype()
ConcreteDataType::timestamp_millisecond_datatype(),
ConcreteDataType::timestamp_millisecond_datatype()
])
.unwrap(),
ConcreteDataType::DateTime(DateTimeType)
ConcreteDataType::timestamp_millisecond_datatype()
);
let columns = vec![
Arc::new(DateTimeVector::from_slice(vec![-1, 2])) as _,
Arc::new(DateTimeVector::from_slice(vec![0, 1])) as _,
Arc::new(TimestampMillisecondVector::from_slice(vec![-1, 2])) as _,
Arc::new(TimestampMillisecondVector::from_slice(vec![0, 1])) as _,
];
let result = function
.eval(&FunctionContext::default(), &columns)
.unwrap();
let result = result.as_any().downcast_ref::<DateTimeVector>().unwrap();
let result = result
.as_any()
.downcast_ref::<TimestampMillisecondVector>()
.unwrap();
assert_eq!(result.len(), 2);
assert_eq!(
result.get(0),
Value::DateTime(DateTime::from_str("1970-01-01 00:00:00", None).unwrap())
Value::Timestamp(Timestamp::from_str("1970-01-01 00:00:00", None).unwrap())
);
assert_eq!(
result.get(1),
Value::DateTime(DateTime::from_str("1970-01-01 00:00:00.002", None).unwrap())
Value::Timestamp(Timestamp::from_str("1970-01-01 00:00:00.002", None).unwrap())
);
}

View File

@@ -17,7 +17,7 @@ use std::sync::Arc;
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
use common_query::prelude::{Signature, Volatility};
use common_time::{Date, DateTime, Timestamp};
use common_time::{Date, Timestamp};
use datatypes::prelude::ConcreteDataType;
use datatypes::vectors::{Int64Vector, VectorRef};
use snafu::ensure;
@@ -32,10 +32,6 @@ const NAME: &str = "to_unixtime";
fn convert_to_seconds(arg: &str, func_ctx: &FunctionContext) -> Option<i64> {
let timezone = &func_ctx.query_ctx.timezone();
if let Ok(dt) = DateTime::from_str(arg, Some(timezone)) {
return Some(dt.val() / 1000);
}
if let Ok(ts) = Timestamp::from_str(arg, Some(timezone)) {
return Some(ts.split().0);
}
@@ -59,12 +55,6 @@ fn convert_dates_to_seconds(vector: &VectorRef) -> Vec<Option<i64>> {
.collect::<Vec<Option<i64>>>()
}
fn convert_datetimes_to_seconds(vector: &VectorRef) -> Vec<Option<i64>> {
(0..vector.len())
.map(|i| vector.get(i).as_datetime().map(|dt| dt.val() / 1000))
.collect::<Vec<Option<i64>>>()
}
impl Function for ToUnixtimeFunction {
fn name(&self) -> &str {
NAME
@@ -82,7 +72,6 @@ impl Function for ToUnixtimeFunction {
ConcreteDataType::int32_datatype(),
ConcreteDataType::int64_datatype(),
ConcreteDataType::date_datatype(),
ConcreteDataType::datetime_datatype(),
ConcreteDataType::timestamp_second_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(),
ConcreteDataType::timestamp_microsecond_datatype(),
@@ -119,10 +108,6 @@ impl Function for ToUnixtimeFunction {
let seconds = convert_dates_to_seconds(vector);
Ok(Arc::new(Int64Vector::from(seconds)))
}
ConcreteDataType::DateTime(_) => {
let seconds = convert_datetimes_to_seconds(vector);
Ok(Arc::new(Int64Vector::from(seconds)))
}
ConcreteDataType::Timestamp(_) => {
let seconds = convert_timestamps_to_seconds(vector);
Ok(Arc::new(Int64Vector::from(seconds)))
@@ -148,7 +133,7 @@ mod tests {
use datatypes::prelude::ConcreteDataType;
use datatypes::value::Value;
use datatypes::vectors::{
DateTimeVector, DateVector, StringVector, TimestampMillisecondVector, TimestampSecondVector,
DateVector, StringVector, TimestampMillisecondVector, TimestampSecondVector,
};
use super::{ToUnixtimeFunction, *};
@@ -171,7 +156,6 @@ mod tests {
ConcreteDataType::int32_datatype(),
ConcreteDataType::int64_datatype(),
ConcreteDataType::date_datatype(),
ConcreteDataType::datetime_datatype(),
ConcreteDataType::timestamp_second_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(),
ConcreteDataType::timestamp_microsecond_datatype(),
@@ -253,31 +237,6 @@ mod tests {
}
}
#[test]
fn test_datetime_to_unixtime() {
let f = ToUnixtimeFunction;
let times = vec![Some(123000), None, Some(42000), None];
let results = [Some(123), None, Some(42), None];
let date_vector = DateTimeVector::from(times.clone());
let args: Vec<VectorRef> = vec![Arc::new(date_vector)];
let vector = f.eval(&FunctionContext::default(), &args).unwrap();
assert_eq!(4, vector.len());
for (i, _t) in times.iter().enumerate() {
let v = vector.get(i);
if i == 1 || i == 3 {
assert_eq!(Value::Null, v);
continue;
}
match v {
Value::Int64(ts) => {
assert_eq!(ts, (*results.get(i).unwrap()).unwrap());
}
_ => unreachable!(),
}
}
}
#[test]
fn test_timestamp_to_unixtime() {
let f = ToUnixtimeFunction;

View File

@@ -24,9 +24,11 @@ pub(crate) mod sum;
mod vector_add;
mod vector_dim;
mod vector_div;
mod vector_kth_elem;
mod vector_mul;
mod vector_norm;
mod vector_sub;
mod vector_subvector;
use std::sync::Arc;
@@ -56,6 +58,8 @@ impl VectorFunction {
registry.register(Arc::new(vector_div::VectorDivFunction));
registry.register(Arc::new(vector_norm::VectorNormFunction));
registry.register(Arc::new(vector_dim::VectorDimFunction));
registry.register(Arc::new(vector_kth_elem::VectorKthElemFunction));
registry.register(Arc::new(vector_subvector::VectorSubvectorFunction));
registry.register(Arc::new(elem_sum::ElemSumFunction));
registry.register(Arc::new(elem_product::ElemProductFunction));
}

View File

@@ -0,0 +1,211 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::borrow::Cow;
use std::fmt::Display;
use common_query::error::{InvalidFuncArgsSnafu, Result};
use common_query::prelude::Signature;
use datatypes::prelude::ConcreteDataType;
use datatypes::scalars::ScalarVectorBuilder;
use datatypes::vectors::{Float32VectorBuilder, MutableVector, VectorRef};
use snafu::ensure;
use crate::function::{Function, FunctionContext};
use crate::helper;
use crate::scalars::vector::impl_conv::{as_veclit, as_veclit_if_const};
const NAME: &str = "vec_kth_elem";
/// Returns the k-th(0-based index) element of the vector.
///
/// # Example
///
/// ```sql
/// SELECT vec_kth_elem("[2, 4, 6]",1) as result;
///
/// +---------+
/// | result |
/// +---------+
/// | 4 |
/// +---------+
///
/// ```
///
#[derive(Debug, Clone, Default)]
pub struct VectorKthElemFunction;
impl Function for VectorKthElemFunction {
fn name(&self) -> &str {
NAME
}
fn return_type(
&self,
_input_types: &[ConcreteDataType],
) -> common_query::error::Result<ConcreteDataType> {
Ok(ConcreteDataType::float32_datatype())
}
fn signature(&self) -> Signature {
helper::one_of_sigs2(
vec![
ConcreteDataType::string_datatype(),
ConcreteDataType::binary_datatype(),
],
vec![ConcreteDataType::int64_datatype()],
)
}
fn eval(&self, _func_ctx: &FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure!(
columns.len() == 2,
InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not correct, expect exactly two, have: {}",
columns.len()
),
}
);
let arg0 = &columns[0];
let arg1 = &columns[1];
let len = arg0.len();
let mut result = Float32VectorBuilder::with_capacity(len);
if len == 0 {
return Ok(result.to_vector());
};
let arg0_const = as_veclit_if_const(arg0)?;
for i in 0..len {
let arg0 = match arg0_const.as_ref() {
Some(arg0) => Some(Cow::Borrowed(arg0.as_ref())),
None => as_veclit(arg0.get_ref(i))?,
};
let Some(arg0) = arg0 else {
result.push_null();
continue;
};
let arg1 = arg1.get(i).as_f64_lossy();
let Some(arg1) = arg1 else {
result.push_null();
continue;
};
ensure!(
arg1 >= 0.0 && arg1.fract() == 0.0,
InvalidFuncArgsSnafu {
err_msg: format!(
"Invalid argument: k must be a non-negative integer, but got k = {}.",
arg1
),
}
);
let k = arg1 as usize;
ensure!(
k < arg0.len(),
InvalidFuncArgsSnafu {
err_msg: format!(
"Out of range: k must be in the range [0, {}], but got k = {}.",
arg0.len() - 1,
k
),
}
);
let value = arg0[k];
result.push(Some(value));
}
Ok(result.to_vector())
}
}
impl Display for VectorKthElemFunction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", NAME.to_ascii_uppercase())
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use common_query::error;
use datatypes::vectors::{Int64Vector, StringVector};
use super::*;
#[test]
fn test_vec_kth_elem() {
let func = VectorKthElemFunction;
let input0 = Arc::new(StringVector::from(vec![
Some("[1.0,2.0,3.0]".to_string()),
Some("[4.0,5.0,6.0]".to_string()),
Some("[7.0,8.0,9.0]".to_string()),
None,
]));
let input1 = Arc::new(Int64Vector::from(vec![Some(0), Some(2), None, Some(1)]));
let result = func
.eval(&FunctionContext::default(), &[input0, input1])
.unwrap();
let result = result.as_ref();
assert_eq!(result.len(), 4);
assert_eq!(result.get_ref(0).as_f32().unwrap(), Some(1.0));
assert_eq!(result.get_ref(1).as_f32().unwrap(), Some(6.0));
assert!(result.get_ref(2).is_null());
assert!(result.get_ref(3).is_null());
let input0 = Arc::new(StringVector::from(vec![Some("[1.0,2.0,3.0]".to_string())]));
let input1 = Arc::new(Int64Vector::from(vec![Some(3)]));
let err = func
.eval(&FunctionContext::default(), &[input0, input1])
.unwrap_err();
match err {
error::Error::InvalidFuncArgs { err_msg, .. } => {
assert_eq!(
err_msg,
format!("Out of range: k must be in the range [0, 2], but got k = 3.")
)
}
_ => unreachable!(),
}
let input0 = Arc::new(StringVector::from(vec![Some("[1.0,2.0,3.0]".to_string())]));
let input1 = Arc::new(Int64Vector::from(vec![Some(-1)]));
let err = func
.eval(&FunctionContext::default(), &[input0, input1])
.unwrap_err();
match err {
error::Error::InvalidFuncArgs { err_msg, .. } => {
assert_eq!(
err_msg,
format!("Invalid argument: k must be a non-negative integer, but got k = -1.")
)
}
_ => unreachable!(),
}
}
}

View File

@@ -0,0 +1,240 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::borrow::Cow;
use std::fmt::Display;
use common_query::error::{InvalidFuncArgsSnafu, Result};
use common_query::prelude::{Signature, TypeSignature};
use datafusion_expr::Volatility;
use datatypes::prelude::ConcreteDataType;
use datatypes::scalars::ScalarVectorBuilder;
use datatypes::vectors::{BinaryVectorBuilder, MutableVector, VectorRef};
use snafu::ensure;
use crate::function::{Function, FunctionContext};
use crate::scalars::vector::impl_conv::{as_veclit, as_veclit_if_const, veclit_to_binlit};
const NAME: &str = "vec_subvector";
/// Returns a subvector from start(included) to end(excluded) index.
///
/// # Example
///
/// ```sql
/// SELECT vec_to_string(vec_subvector("[1, 2, 3, 4, 5]", 1, 3)) as result;
///
/// +---------+
/// | result |
/// +---------+
/// | [2, 3] |
/// +---------+
///
/// ```
///
#[derive(Debug, Clone, Default)]
pub struct VectorSubvectorFunction;
impl Function for VectorSubvectorFunction {
fn name(&self) -> &str {
NAME
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::binary_datatype())
}
fn signature(&self) -> Signature {
Signature::one_of(
vec![
TypeSignature::Exact(vec![
ConcreteDataType::string_datatype(),
ConcreteDataType::int64_datatype(),
ConcreteDataType::int64_datatype(),
]),
TypeSignature::Exact(vec![
ConcreteDataType::binary_datatype(),
ConcreteDataType::int64_datatype(),
ConcreteDataType::int64_datatype(),
]),
],
Volatility::Immutable,
)
}
fn eval(&self, _func_ctx: &FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure!(
columns.len() == 3,
InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not correct, expect exactly three, have: {}",
columns.len()
)
}
);
let arg0 = &columns[0];
let arg1 = &columns[1];
let arg2 = &columns[2];
ensure!(
arg0.len() == arg1.len() && arg1.len() == arg2.len(),
InvalidFuncArgsSnafu {
err_msg: format!(
"The lengths of the vector are not aligned, args 0: {}, args 1: {}, args 2: {}",
arg0.len(),
arg1.len(),
arg2.len()
)
}
);
let len = arg0.len();
let mut result = BinaryVectorBuilder::with_capacity(len);
if len == 0 {
return Ok(result.to_vector());
}
let arg0_const = as_veclit_if_const(arg0)?;
for i in 0..len {
let arg0 = match arg0_const.as_ref() {
Some(arg0) => Some(Cow::Borrowed(arg0.as_ref())),
None => as_veclit(arg0.get_ref(i))?,
};
let arg1 = arg1.get(i).as_i64();
let arg2 = arg2.get(i).as_i64();
let (Some(arg0), Some(arg1), Some(arg2)) = (arg0, arg1, arg2) else {
result.push_null();
continue;
};
ensure!(
0 <= arg1 && arg1 <= arg2 && arg2 as usize <= arg0.len(),
InvalidFuncArgsSnafu {
err_msg: format!(
"Invalid start and end indices: start={}, end={}, vec_len={}",
arg1,
arg2,
arg0.len()
)
}
);
let subvector = &arg0[arg1 as usize..arg2 as usize];
let binlit = veclit_to_binlit(subvector);
result.push(Some(&binlit));
}
Ok(result.to_vector())
}
}
impl Display for VectorSubvectorFunction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", NAME.to_ascii_uppercase())
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use common_query::error::Error;
use datatypes::vectors::{Int64Vector, StringVector};
use super::*;
use crate::function::FunctionContext;
#[test]
fn test_subvector() {
let func = VectorSubvectorFunction;
let input0 = Arc::new(StringVector::from(vec![
Some("[1.0, 2.0, 3.0, 4.0, 5.0]".to_string()),
Some("[6.0, 7.0, 8.0, 9.0, 10.0]".to_string()),
None,
Some("[11.0, 12.0, 13.0]".to_string()),
]));
let input1 = Arc::new(Int64Vector::from(vec![Some(1), Some(0), Some(0), Some(1)]));
let input2 = Arc::new(Int64Vector::from(vec![Some(3), Some(5), Some(2), Some(3)]));
let result = func
.eval(&FunctionContext::default(), &[input0, input1, input2])
.unwrap();
let result = result.as_ref();
assert_eq!(result.len(), 4);
assert_eq!(
result.get_ref(0).as_binary().unwrap(),
Some(veclit_to_binlit(&[2.0, 3.0]).as_slice())
);
assert_eq!(
result.get_ref(1).as_binary().unwrap(),
Some(veclit_to_binlit(&[6.0, 7.0, 8.0, 9.0, 10.0]).as_slice())
);
assert!(result.get_ref(2).is_null());
assert_eq!(
result.get_ref(3).as_binary().unwrap(),
Some(veclit_to_binlit(&[12.0, 13.0]).as_slice())
);
}
#[test]
fn test_subvector_error() {
let func = VectorSubvectorFunction;
let input0 = Arc::new(StringVector::from(vec![
Some("[1.0, 2.0, 3.0]".to_string()),
Some("[4.0, 5.0, 6.0]".to_string()),
]));
let input1 = Arc::new(Int64Vector::from(vec![Some(1), Some(2)]));
let input2 = Arc::new(Int64Vector::from(vec![Some(3)]));
let result = func.eval(&FunctionContext::default(), &[input0, input1, input2]);
match result {
Err(Error::InvalidFuncArgs { err_msg, .. }) => {
assert_eq!(
err_msg,
"The lengths of the vector are not aligned, args 0: 2, args 1: 2, args 2: 1"
)
}
_ => unreachable!(),
}
}
#[test]
fn test_subvector_invalid_indices() {
let func = VectorSubvectorFunction;
let input0 = Arc::new(StringVector::from(vec![
Some("[1.0, 2.0, 3.0]".to_string()),
Some("[4.0, 5.0, 6.0]".to_string()),
]));
let input1 = Arc::new(Int64Vector::from(vec![Some(1), Some(3)]));
let input2 = Arc::new(Int64Vector::from(vec![Some(3), Some(4)]));
let result = func.eval(&FunctionContext::default(), &[input0, input1, input2]);
match result {
Err(Error::InvalidFuncArgs { err_msg, .. }) => {
assert_eq!(
err_msg,
"Invalid start and end indices: start=3, end=4, vec_len=3"
)
}
_ => unreachable!(),
}
}
}

View File

@@ -35,7 +35,10 @@ impl FunctionState {
use api::v1::meta::ProcedureStatus;
use async_trait::async_trait;
use common_base::AffectedRows;
use common_meta::rpc::procedure::{MigrateRegionRequest, ProcedureStateResponse};
use common_meta::rpc::procedure::{
AddRegionFollowerRequest, MigrateRegionRequest, ProcedureStateResponse,
RemoveRegionFollowerRequest,
};
use common_query::error::Result;
use common_query::Output;
use session::context::QueryContextRef;
@@ -66,6 +69,17 @@ impl FunctionState {
..Default::default()
})
}
async fn add_region_follower(&self, _request: AddRegionFollowerRequest) -> Result<()> {
Ok(())
}
async fn remove_region_follower(
&self,
_request: RemoveRegionFollowerRequest,
) -> Result<()> {
Ok(())
}
}
#[async_trait]

View File

@@ -22,7 +22,9 @@ mod version;
use std::sync::Arc;
use build::BuildFunction;
use database::{CurrentSchemaFunction, DatabaseFunction, SessionUserFunction};
use database::{
CurrentSchemaFunction, DatabaseFunction, ReadPreferenceFunction, SessionUserFunction,
};
use pg_catalog::PGCatalogFunction;
use procedure_state::ProcedureStateFunction;
use timezone::TimezoneFunction;
@@ -39,6 +41,7 @@ impl SystemFunction {
registry.register(Arc::new(CurrentSchemaFunction));
registry.register(Arc::new(DatabaseFunction));
registry.register(Arc::new(SessionUserFunction));
registry.register(Arc::new(ReadPreferenceFunction));
registry.register(Arc::new(TimezoneFunction));
registry.register_async(Arc::new(ProcedureStateFunction));
PGCatalogFunction::register(registry);

View File

@@ -30,9 +30,12 @@ pub struct DatabaseFunction;
pub struct CurrentSchemaFunction;
pub struct SessionUserFunction;
pub struct ReadPreferenceFunction;
const DATABASE_FUNCTION_NAME: &str = "database";
const CURRENT_SCHEMA_FUNCTION_NAME: &str = "current_schema";
const SESSION_USER_FUNCTION_NAME: &str = "session_user";
const READ_PREFERENCE_FUNCTION_NAME: &str = "read_preference";
impl Function for DatabaseFunction {
fn name(&self) -> &str {
@@ -94,6 +97,26 @@ impl Function for SessionUserFunction {
}
}
impl Function for ReadPreferenceFunction {
fn name(&self) -> &str {
READ_PREFERENCE_FUNCTION_NAME
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::string_datatype())
}
fn signature(&self) -> Signature {
Signature::nullary(Volatility::Immutable)
}
fn eval(&self, func_ctx: &FunctionContext, _columns: &[VectorRef]) -> Result<VectorRef> {
let read_preference = func_ctx.query_ctx.read_preference();
Ok(Arc::new(StringVector::from_slice(&[read_preference.as_ref()])) as _)
}
}
impl fmt::Display for DatabaseFunction {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "DATABASE")
@@ -112,6 +135,12 @@ impl fmt::Display for SessionUserFunction {
}
}
impl fmt::Display for ReadPreferenceFunction {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "READ_PREFERENCE")
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;

View File

@@ -15,11 +15,13 @@
use api::helper::ColumnDataTypeWrapper;
use api::v1::add_column_location::LocationType;
use api::v1::alter_table_expr::Kind;
use api::v1::column_def::{as_fulltext_option, as_skipping_index_type};
use api::v1::column_def::{
as_fulltext_option_analyzer, as_fulltext_option_backend, as_skipping_index_type,
};
use api::v1::{
column_def, AddColumnLocation as Location, AlterTableExpr, Analyzer, CreateTableExpr,
DropColumns, ModifyColumnTypes, RenameTable, SemanticType,
SkippingIndexType as PbSkippingIndexType,
DropColumns, FulltextBackend as PbFulltextBackend, ModifyColumnTypes, RenameTable,
SemanticType, SkippingIndexType as PbSkippingIndexType,
};
use common_query::AddColumnLocation;
use datatypes::schema::{ColumnSchema, FulltextOptions, RawSchema, SkippingIndexOptions};
@@ -126,11 +128,15 @@ pub fn alter_expr_to_request(table_id: TableId, expr: AlterTableExpr) -> Result<
column_name: f.column_name.clone(),
options: FulltextOptions {
enable: f.enable,
analyzer: as_fulltext_option(
analyzer: as_fulltext_option_analyzer(
Analyzer::try_from(f.analyzer)
.context(InvalidSetFulltextOptionRequestSnafu)?,
),
case_sensitive: f.case_sensitive,
backend: as_fulltext_option_backend(
PbFulltextBackend::try_from(f.backend)
.context(InvalidSetFulltextOptionRequestSnafu)?,
),
},
},
},

View File

@@ -25,7 +25,7 @@ async fn do_bench_channel_manager() {
let m_clone = m.clone();
let join = tokio::spawn(async move {
for _ in 0..10000 {
let idx = rand::random::<usize>() % 100;
let idx = rand::random::<u32>() % 100;
let ret = m_clone.get(format!("{idx}"));
let _ = ret.unwrap();
}

View File

@@ -17,8 +17,8 @@ use api::v1::column::Values;
use common_base::BitVec;
use datatypes::types::{IntervalType, TimeType, TimestampType, WrapperType};
use datatypes::vectors::{
BinaryVector, BooleanVector, DateTimeVector, DateVector, Decimal128Vector, Float32Vector,
Float64Vector, Int16Vector, Int32Vector, Int64Vector, Int8Vector, IntervalDayTimeVector,
BinaryVector, BooleanVector, DateVector, Decimal128Vector, Float32Vector, Float64Vector,
Int16Vector, Int32Vector, Int64Vector, Int8Vector, IntervalDayTimeVector,
IntervalMonthDayNanoVector, IntervalYearMonthVector, StringVector, TimeMicrosecondVector,
TimeMillisecondVector, TimeNanosecondVector, TimeSecondVector, TimestampMicrosecondVector,
TimestampMillisecondVector, TimestampNanosecondVector, TimestampSecondVector, UInt16Vector,
@@ -141,12 +141,6 @@ pub fn values(arrays: &[VectorRef]) -> Result<Values> {
(ConcreteDataType::Date(_), DateVector, date_values, |x| {
x.val()
}),
(
ConcreteDataType::DateTime(_),
DateTimeVector,
datetime_values,
|x| { x.val() }
),
(
ConcreteDataType::Timestamp(TimestampType::Second(_)),
TimestampSecondVector,

View File

@@ -18,11 +18,13 @@ mod print_caller;
mod range_fn;
mod stack_trace_debug;
mod utils;
use aggr_func::{impl_aggr_func_type_store, impl_as_aggr_func_creator};
use print_caller::process_print_caller;
use proc_macro::TokenStream;
use quote::quote;
use range_fn::process_range_fn;
use syn::{parse_macro_input, DeriveInput};
use syn::{parse_macro_input, Data, DeriveInput, Fields};
use crate::admin_fn::process_admin_fn;
@@ -136,3 +138,51 @@ pub fn print_caller(args: TokenStream, input: TokenStream) -> TokenStream {
pub fn stack_trace_debug(args: TokenStream, input: TokenStream) -> TokenStream {
stack_trace_debug::stack_trace_style_impl(args.into(), input.into()).into()
}
/// Generates implementation for `From<&TableMeta> for TableMetaBuilder`
#[proc_macro_derive(ToMetaBuilder)]
pub fn derive_meta_builder(input: TokenStream) -> TokenStream {
let input = parse_macro_input!(input as DeriveInput);
let Data::Struct(data_struct) = input.data else {
panic!("ToMetaBuilder can only be derived for structs");
};
let Fields::Named(fields) = data_struct.fields else {
panic!("ToMetaBuilder can only be derived for structs with named fields");
};
// Check that this is being applied to TableMeta struct
if input.ident != "TableMeta" {
panic!("ToMetaBuilder can only be derived for TableMeta struct");
}
let field_init = fields.named.iter().map(|field| {
let field_name = field.ident.as_ref().unwrap();
quote! {
#field_name: Default::default(),
}
});
let field_assignments = fields.named.iter().map(|field| {
let field_name = field.ident.as_ref().unwrap();
quote! {
builder.#field_name(meta.#field_name.clone());
}
});
let gen = quote! {
impl From<&TableMeta> for TableMetaBuilder {
fn from(meta: &TableMeta) -> Self {
let mut builder = Self {
#(#field_init)*
};
#(#field_assignments)*
builder
}
}
};
gen.into()
}

View File

@@ -7,6 +7,7 @@ license.workspace = true
[features]
testing = []
pg_kvbackend = ["dep:tokio-postgres", "dep:backon", "dep:deadpool-postgres", "dep:deadpool"]
mysql_kvbackend = ["dep:sqlx", "dep:backon"]
[lints]
workspace = true
@@ -57,9 +58,10 @@ serde_json.workspace = true
serde_with.workspace = true
session.workspace = true
snafu.workspace = true
sqlx = { workspace = true, optional = true }
store-api.workspace = true
strum.workspace = true
table.workspace = true
table = { workspace = true, features = ["testing"] }
tokio.workspace = true
tokio-postgres = { workspace = true, optional = true }
tonic.workspace = true

View File

@@ -192,6 +192,8 @@ mod tests {
expire_after: Some(300),
comment: "comment".to_string(),
options: Default::default(),
created_time: chrono::Utc::now(),
updated_time: chrono::Utc::now(),
},
(1..=3)
.map(|i| {

View File

@@ -27,6 +27,7 @@ use crate::error::{
DecodeJsonSnafu, EncodeJsonSnafu, Error, FromUtf8Snafu, InvalidNodeInfoKeySnafu,
InvalidRoleSnafu, ParseNumSnafu, Result,
};
use crate::key::flow::flow_state::FlowStat;
use crate::peer::Peer;
const CLUSTER_NODE_INFO_PREFIX: &str = "__meta_cluster_node_info";
@@ -52,6 +53,9 @@ pub trait ClusterInfo {
/// List all region stats in the cluster.
async fn list_region_stats(&self) -> std::result::Result<Vec<RegionStat>, Self::Error>;
/// List all flow stats in the cluster.
async fn list_flow_stats(&self) -> std::result::Result<Option<FlowStat>, Self::Error>;
// TODO(jeremy): Other info, like region status, etc.
}

View File

@@ -92,6 +92,22 @@ pub struct RegionStat {
pub sst_size: u64,
/// The size of the SST index files in bytes.
pub index_size: u64,
/// The manifest infoof the region.
pub region_manifest: RegionManifestInfo,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub enum RegionManifestInfo {
Mito {
manifest_version: u64,
flushed_entry_id: u64,
},
Metric {
data_manifest_version: u64,
data_flushed_entry_id: u64,
metadata_manifest_version: u64,
metadata_flushed_entry_id: u64,
},
}
impl Stat {
@@ -165,6 +181,31 @@ impl TryFrom<&HeartbeatRequest> for Stat {
}
}
impl From<store_api::region_engine::RegionManifestInfo> for RegionManifestInfo {
fn from(value: store_api::region_engine::RegionManifestInfo) -> Self {
match value {
store_api::region_engine::RegionManifestInfo::Mito {
manifest_version,
flushed_entry_id,
} => RegionManifestInfo::Mito {
manifest_version,
flushed_entry_id,
},
store_api::region_engine::RegionManifestInfo::Metric {
data_manifest_version,
data_flushed_entry_id,
metadata_manifest_version,
metadata_flushed_entry_id,
} => RegionManifestInfo::Metric {
data_manifest_version,
data_flushed_entry_id,
metadata_manifest_version,
metadata_flushed_entry_id,
},
}
}
}
impl From<&api::v1::meta::RegionStat> for RegionStat {
fn from(value: &api::v1::meta::RegionStat) -> Self {
let region_stat = value
@@ -185,6 +226,7 @@ impl From<&api::v1::meta::RegionStat> for RegionStat {
manifest_size: region_stat.manifest_size,
sst_size: region_stat.sst_size,
index_size: region_stat.index_size,
region_manifest: region_stat.manifest.into(),
}
}
}

View File

@@ -22,14 +22,18 @@ use store_api::storage::{RegionId, RegionNumber, TableId};
use crate::cache_invalidator::CacheInvalidatorRef;
use crate::ddl::flow_meta::FlowMetadataAllocatorRef;
use crate::ddl::table_meta::TableMetadataAllocatorRef;
use crate::error::Result;
use crate::error::{Result, UnsupportedSnafu};
use crate::key::flow::FlowMetadataManagerRef;
use crate::key::table_route::PhysicalTableRouteValue;
use crate::key::TableMetadataManagerRef;
use crate::node_manager::NodeManagerRef;
use crate::region_keeper::MemoryRegionKeeperRef;
use crate::region_registry::LeaderRegionRegistryRef;
use crate::rpc::ddl::{SubmitDdlTaskRequest, SubmitDdlTaskResponse};
use crate::rpc::procedure::{MigrateRegionRequest, MigrateRegionResponse, ProcedureStateResponse};
use crate::rpc::procedure::{
AddRegionFollowerRequest, MigrateRegionRequest, MigrateRegionResponse, ProcedureStateResponse,
RemoveRegionFollowerRequest,
};
use crate::DatanodeId;
pub mod alter_database;
@@ -70,6 +74,30 @@ pub trait ProcedureExecutor: Send + Sync {
request: SubmitDdlTaskRequest,
) -> Result<SubmitDdlTaskResponse>;
/// Add a region follower
async fn add_region_follower(
&self,
_ctx: &ExecutorContext,
_request: AddRegionFollowerRequest,
) -> Result<()> {
UnsupportedSnafu {
operation: "add_region_follower",
}
.fail()
}
/// Remove a region follower
async fn remove_region_follower(
&self,
_ctx: &ExecutorContext,
_request: RemoveRegionFollowerRequest,
) -> Result<()> {
UnsupportedSnafu {
operation: "remove_region_follower",
}
.fail()
}
/// Submit a region migration task
async fn migrate_region(
&self,
@@ -137,6 +165,8 @@ pub struct DdlContext {
pub cache_invalidator: CacheInvalidatorRef,
/// Keep tracking operating regions.
pub memory_region_keeper: MemoryRegionKeeperRef,
/// The leader region registry.
pub leader_region_registry: LeaderRegionRegistryRef,
/// Table metadata manager.
pub table_metadata_manager: TableMetadataManagerRef,
/// Allocator for table metadata.

View File

@@ -425,7 +425,14 @@ impl From<&CreateFlowData> for (FlowInfoValue, Vec<(FlowPartitionId, FlowRouteVa
let flow_type = value.flow_type.unwrap_or_default().to_string();
options.insert("flow_type".to_string(), flow_type);
let flow_info = FlowInfoValue {
let mut create_time = chrono::Utc::now();
if let Some(prev_flow_value) = value.prev_flow_info_value.as_ref()
&& value.task.or_replace
{
create_time = prev_flow_value.get_inner_ref().created_time;
}
let flow_info: FlowInfoValue = FlowInfoValue {
source_table_ids: value.source_table_ids.clone(),
sink_table_name,
flownode_ids,
@@ -435,6 +442,8 @@ impl From<&CreateFlowData> for (FlowInfoValue, Vec<(FlowPartitionId, FlowRouteVa
expire_after,
comment,
options,
created_time: create_time,
updated_time: chrono::Utc::now(),
};
(flow_info, flow_routes)

View File

@@ -35,7 +35,9 @@ use crate::error::{self, Result};
use crate::instruction::CacheIdent;
use crate::key::table_name::TableNameKey;
use crate::key::table_route::TableRouteValue;
use crate::rpc::router::{find_leader_regions, find_leaders, RegionRoute};
use crate::rpc::router::{
find_leader_regions, find_leaders, operating_leader_regions, RegionRoute,
};
/// [Control] indicated to the caller whether to go to the next step.
#[derive(Debug)]
@@ -250,6 +252,11 @@ impl DropTableExecutor {
.into_iter()
.collect::<Result<Vec<_>>>()?;
// Deletes the leader region from registry.
let region_ids = operating_leader_regions(region_routes);
ctx.leader_region_registry
.batch_delete(region_ids.into_iter().map(|(region_id, _)| region_id));
Ok(())
}
}

View File

@@ -98,13 +98,14 @@ impl TableMetadataAllocator {
fn create_wal_options(
&self,
table_route: &PhysicalTableRouteValue,
skip_wal: bool,
) -> Result<HashMap<RegionNumber, String>> {
let region_numbers = table_route
.region_routes
.iter()
.map(|route| route.region.id.region_number())
.collect();
allocate_region_wal_options(region_numbers, &self.wal_options_allocator)
allocate_region_wal_options(region_numbers, &self.wal_options_allocator, skip_wal)
}
async fn create_table_route(
@@ -158,7 +159,9 @@ impl TableMetadataAllocator {
pub async fn create(&self, task: &CreateTableTask) -> Result<TableMetadata> {
let table_id = self.allocate_table_id(&task.create_table.table_id).await?;
let table_route = self.create_table_route(table_id, task).await?;
let region_wal_options = self.create_wal_options(&table_route)?;
let region_wal_options =
self.create_wal_options(&table_route, task.table_info.meta.options.skip_wal)?;
debug!(
"Allocated region wal options {:?} for table {}",

View File

@@ -850,6 +850,7 @@ mod tests {
use crate::node_manager::{DatanodeRef, FlownodeRef, NodeManager};
use crate::peer::Peer;
use crate::region_keeper::MemoryRegionKeeper;
use crate::region_registry::LeaderRegionRegistry;
use crate::sequence::SequenceBuilder;
use crate::state_store::KvStateStore;
use crate::wal_options_allocator::WalOptionsAllocator;
@@ -893,6 +894,7 @@ mod tests {
flow_metadata_manager,
flow_metadata_allocator,
memory_region_keeper: Arc::new(MemoryRegionKeeper::default()),
leader_region_registry: Arc::new(LeaderRegionRegistry::default()),
region_failure_detector_controller: Arc::new(NoopRegionFailureDetectorControl),
},
procedure_manager.clone(),

View File

@@ -685,7 +685,36 @@ pub enum Error {
operation: String,
},
#[cfg(feature = "pg_kvbackend")]
#[cfg(feature = "mysql_kvbackend")]
#[snafu(display("Failed to execute via MySql, sql: {}", sql))]
MySqlExecution {
sql: String,
#[snafu(source)]
error: sqlx::Error,
#[snafu(implicit)]
location: Location,
},
#[cfg(feature = "mysql_kvbackend")]
#[snafu(display("Failed to create connection pool for MySql"))]
CreateMySqlPool {
#[snafu(source)]
error: sqlx::Error,
#[snafu(implicit)]
location: Location,
},
#[cfg(feature = "mysql_kvbackend")]
#[snafu(display("Failed to {} MySql transaction", operation))]
MySqlTransaction {
#[snafu(source)]
error: sqlx::Error,
#[snafu(implicit)]
location: Location,
operation: String,
},
#[cfg(any(feature = "pg_kvbackend", feature = "mysql_kvbackend"))]
#[snafu(display("Rds transaction retry failed"))]
RdsTransactionRetryFailed {
#[snafu(implicit)]
@@ -823,8 +852,13 @@ impl ErrorExt for Error {
PostgresExecution { .. }
| CreatePostgresPool { .. }
| GetPostgresConnection { .. }
| PostgresTransaction { .. }
| RdsTransactionRetryFailed { .. } => StatusCode::Internal,
| PostgresTransaction { .. } => StatusCode::Internal,
#[cfg(feature = "mysql_kvbackend")]
MySqlExecution { .. } | CreateMySqlPool { .. } | MySqlTransaction { .. } => {
StatusCode::Internal
}
#[cfg(any(feature = "pg_kvbackend", feature = "mysql_kvbackend"))]
RdsTransactionRetryFailed { .. } => StatusCode::Internal,
Error::DatanodeTableInfoNotFound { .. } => StatusCode::Internal,
}
}
@@ -835,16 +869,29 @@ impl ErrorExt for Error {
}
impl Error {
#[cfg(feature = "pg_kvbackend")]
#[cfg(any(feature = "pg_kvbackend", feature = "mysql_kvbackend"))]
/// Check if the error is a serialization error.
pub fn is_serialization_error(&self) -> bool {
match self {
#[cfg(feature = "pg_kvbackend")]
Error::PostgresTransaction { error, .. } => {
error.code() == Some(&tokio_postgres::error::SqlState::T_R_SERIALIZATION_FAILURE)
}
#[cfg(feature = "pg_kvbackend")]
Error::PostgresExecution { error, .. } => {
error.code() == Some(&tokio_postgres::error::SqlState::T_R_SERIALIZATION_FAILURE)
}
#[cfg(feature = "mysql_kvbackend")]
Error::MySqlExecution {
error: sqlx::Error::Database(database_error),
..
} => {
matches!(
database_error.message(),
"Deadlock found when trying to get lock; try restarting transaction"
| "can't serialize access for this transaction"
)
}
_ => false,
}
}

View File

@@ -512,6 +512,10 @@ impl TableMetadataManager {
&self.table_route_manager
}
pub fn topic_region_manager(&self) -> &TopicRegionManager {
&self.topic_region_manager
}
#[cfg(feature = "testing")]
pub fn kv_backend(&self) -> &KvBackendRef {
&self.kv_backend
@@ -1471,7 +1475,8 @@ mod tests {
new_test_table_info(region_routes.iter().map(|r| r.region.id.region_number())).into();
let wal_allocator = WalOptionsAllocator::RaftEngine;
let regions = (0..16).collect();
let region_wal_options = allocate_region_wal_options(regions, &wal_allocator).unwrap();
let region_wal_options =
allocate_region_wal_options(regions, &wal_allocator, false).unwrap();
create_physical_table_metadata(
&table_metadata_manager,
table_info.clone(),

View File

@@ -461,6 +461,8 @@ mod tests {
expire_after: Some(300),
comment: "hi".to_string(),
options: Default::default(),
created_time: chrono::Utc::now(),
updated_time: chrono::Utc::now(),
}
}
@@ -632,6 +634,8 @@ mod tests {
expire_after: Some(300),
comment: "hi".to_string(),
options: Default::default(),
created_time: chrono::Utc::now(),
updated_time: chrono::Utc::now(),
};
let err = flow_metadata_manager
.create_flow_metadata(flow_id, flow_value, flow_routes.clone())
@@ -869,6 +873,8 @@ mod tests {
expire_after: Some(300),
comment: "hi".to_string(),
options: Default::default(),
created_time: chrono::Utc::now(),
updated_time: chrono::Utc::now(),
};
let err = flow_metadata_manager
.update_flow_metadata(

View File

@@ -15,6 +15,7 @@
use std::collections::{BTreeMap, HashMap};
use std::sync::Arc;
use chrono::{DateTime, Utc};
use lazy_static::lazy_static;
use regex::Regex;
use serde::{Deserialize, Serialize};
@@ -131,6 +132,12 @@ pub struct FlowInfoValue {
pub(crate) comment: String,
/// The options.
pub(crate) options: HashMap<String, String>,
/// The created time
#[serde(default)]
pub(crate) created_time: DateTime<Utc>,
/// The updated time.
#[serde(default)]
pub(crate) updated_time: DateTime<Utc>,
}
impl FlowInfoValue {
@@ -171,6 +178,14 @@ impl FlowInfoValue {
pub fn options(&self) -> &HashMap<String, String> {
&self.options
}
pub fn created_time(&self) -> &DateTime<Utc> {
&self.created_time
}
pub fn updated_time(&self) -> &DateTime<Utc> {
&self.updated_time
}
}
pub type FlowInfoManagerRef = Arc<FlowInfoManager>;

View File

@@ -97,11 +97,19 @@ impl<'a> MetadataKey<'a, FlowStateKey> for FlowStateKey {
pub struct FlowStateValue {
/// For each key, the bytes of the state in memory
pub state_size: BTreeMap<FlowId, usize>,
/// For each key, the last execution time of flow in unix timestamp milliseconds.
pub last_exec_time_map: BTreeMap<FlowId, i64>,
}
impl FlowStateValue {
pub fn new(state_size: BTreeMap<FlowId, usize>) -> Self {
Self { state_size }
pub fn new(
state_size: BTreeMap<FlowId, usize>,
last_exec_time_map: BTreeMap<FlowId, i64>,
) -> Self {
Self {
state_size,
last_exec_time_map,
}
}
}
@@ -143,12 +151,15 @@ impl FlowStateManager {
pub struct FlowStat {
/// For each key, the bytes of the state in memory
pub state_size: BTreeMap<u32, usize>,
/// For each key, the last execution time of flow in unix timestamp milliseconds.
pub last_exec_time_map: BTreeMap<FlowId, i64>,
}
impl From<FlowStateValue> for FlowStat {
fn from(value: FlowStateValue) -> Self {
Self {
state_size: value.state_size,
last_exec_time_map: value.last_exec_time_map,
}
}
}
@@ -157,6 +168,7 @@ impl From<FlowStat> for FlowStateValue {
fn from(value: FlowStat) -> Self {
Self {
state_size: value.state_size,
last_exec_time_map: value.last_exec_time_map,
}
}
}

View File

@@ -40,7 +40,7 @@ pub fn new_test_table_info_with_name<I: IntoIterator<Item = u32>>(
.build()
.unwrap();
let meta = TableMetaBuilder::default()
let meta = TableMetaBuilder::empty()
.schema(Arc::new(schema))
.primary_key_indices(vec![0])
.engine("engine")

View File

@@ -224,6 +224,7 @@ impl TopicRegionManager {
Some((region_id, kafka.topic.as_str()))
}
Some(WalOptions::RaftEngine) => None,
Some(WalOptions::Noop) => None,
None => None,
},
)

View File

@@ -31,7 +31,7 @@ use crate::rpc::KeyValue;
pub mod chroot;
pub mod etcd;
pub mod memory;
#[cfg(feature = "pg_kvbackend")]
#[cfg(any(feature = "mysql_kvbackend", feature = "pg_kvbackend"))]
pub mod rds;
pub mod test;
pub mod txn;

View File

@@ -14,13 +14,11 @@
use std::any::Any;
use std::collections::BTreeMap;
use std::fmt::{Display, Formatter};
use std::marker::PhantomData;
use std::sync::{Arc, RwLock};
use async_trait::async_trait;
use common_error::ext::ErrorExt;
use serde::Serializer;
use super::{KvBackendRef, ResettableKvBackend};
use crate::kv_backend::txn::{Txn, TxnOp, TxnOpResponse, TxnRequest, TxnResponse};
@@ -38,19 +36,6 @@ pub struct MemoryKvBackend<T> {
_phantom: PhantomData<T>,
}
impl<T> Display for MemoryKvBackend<T> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
let kvs = self.kvs.read().unwrap();
for (k, v) in kvs.iter() {
f.serialize_str(&String::from_utf8_lossy(k))?;
f.serialize_str(" -> ")?;
f.serialize_str(&String::from_utf8_lossy(v))?;
f.serialize_str("\n")?;
}
Ok(())
}
}
impl<T> Default for MemoryKvBackend<T> {
fn default() -> Self {
Self {

View File

@@ -33,10 +33,16 @@ use crate::rpc::store::{
};
use crate::rpc::KeyValue;
#[cfg(feature = "pg_kvbackend")]
mod postgres;
#[cfg(feature = "pg_kvbackend")]
pub use postgres::PgStore;
#[cfg(feature = "mysql_kvbackend")]
mod mysql;
#[cfg(feature = "mysql_kvbackend")]
pub use mysql::MySqlStore;
const RDS_STORE_TXN_RETRY_COUNT: usize = 3;
/// Query executor for rds. It can execute queries or generate a transaction executor.
@@ -106,6 +112,14 @@ impl<T: Executor> ExecutorImpl<'_, T> {
}
}
#[warn(dead_code)] // Used in #[cfg(feature = "mysql_kvbackend")]
async fn execute(&mut self, query: &str, params: &Vec<&Vec<u8>>) -> Result<()> {
match self {
Self::Default(executor) => executor.execute(query, params).await,
Self::Txn(executor) => executor.execute(query, params).await,
}
}
async fn commit(self) -> Result<()> {
match self {
Self::Txn(executor) => executor.commit().await,

View File

@@ -0,0 +1,653 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::marker::PhantomData;
use std::sync::Arc;
use common_telemetry::debug;
use snafu::ResultExt;
use sqlx::mysql::MySqlRow;
use sqlx::pool::Pool;
use sqlx::{MySql, MySqlPool, Row, Transaction as MySqlTransaction};
use crate::error::{CreateMySqlPoolSnafu, MySqlExecutionSnafu, MySqlTransactionSnafu, Result};
use crate::kv_backend::rds::{
Executor, ExecutorFactory, ExecutorImpl, KvQueryExecutor, RdsStore, Transaction,
RDS_STORE_TXN_RETRY_COUNT,
};
use crate::kv_backend::KvBackendRef;
use crate::rpc::store::{
BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse, BatchPutRequest,
BatchPutResponse, DeleteRangeRequest, DeleteRangeResponse, RangeRequest, RangeResponse,
};
use crate::rpc::KeyValue;
type MySqlClient = Arc<Pool<MySql>>;
pub struct MySqlTxnClient(MySqlTransaction<'static, MySql>);
fn key_value_from_row(row: MySqlRow) -> KeyValue {
// Safety: key and value are the first two columns in the row
KeyValue {
key: row.get_unchecked(0),
value: row.get_unchecked(1),
}
}
const EMPTY: &[u8] = &[0];
/// Type of range template.
#[derive(Debug, Clone, Copy)]
enum RangeTemplateType {
Point,
Range,
Full,
LeftBounded,
Prefix,
}
/// Builds params for the given range template type.
impl RangeTemplateType {
fn build_params(&self, mut key: Vec<u8>, range_end: Vec<u8>) -> Vec<Vec<u8>> {
match self {
RangeTemplateType::Point => vec![key],
RangeTemplateType::Range => vec![key, range_end],
RangeTemplateType::Full => vec![],
RangeTemplateType::LeftBounded => vec![key],
RangeTemplateType::Prefix => {
key.push(b'%');
vec![key]
}
}
}
}
/// Templates for range request.
#[derive(Debug, Clone)]
struct RangeTemplate {
point: String,
range: String,
full: String,
left_bounded: String,
prefix: String,
}
impl RangeTemplate {
/// Gets the template for the given type.
fn get(&self, typ: RangeTemplateType) -> &str {
match typ {
RangeTemplateType::Point => &self.point,
RangeTemplateType::Range => &self.range,
RangeTemplateType::Full => &self.full,
RangeTemplateType::LeftBounded => &self.left_bounded,
RangeTemplateType::Prefix => &self.prefix,
}
}
/// Adds limit to the template.
fn with_limit(template: &str, limit: i64) -> String {
if limit == 0 {
return format!("{};", template);
}
format!("{} LIMIT {};", template, limit)
}
}
fn is_prefix_range(start: &[u8], end: &[u8]) -> bool {
if start.len() != end.len() {
return false;
}
let l = start.len();
let same_prefix = start[0..l - 1] == end[0..l - 1];
if let (Some(rhs), Some(lhs)) = (start.last(), end.last()) {
return same_prefix && (*rhs + 1) == *lhs;
}
false
}
/// Determine the template type for range request.
fn range_template(key: &[u8], range_end: &[u8]) -> RangeTemplateType {
match (key, range_end) {
(_, &[]) => RangeTemplateType::Point,
(EMPTY, EMPTY) => RangeTemplateType::Full,
(_, EMPTY) => RangeTemplateType::LeftBounded,
(start, end) => {
if is_prefix_range(start, end) {
RangeTemplateType::Prefix
} else {
RangeTemplateType::Range
}
}
}
}
/// Generate in placeholders for MySQL.
fn mysql_generate_in_placeholders(from: usize, to: usize) -> Vec<String> {
(from..=to).map(|_| "?".to_string()).collect()
}
/// Factory for building sql templates.
struct MySqlTemplateFactory<'a> {
table_name: &'a str,
}
impl<'a> MySqlTemplateFactory<'a> {
/// Creates a new [`SqlTemplateFactory`] with the given table name.
fn new(table_name: &'a str) -> Self {
Self { table_name }
}
/// Builds the template set for the given table name.
fn build(&self) -> MySqlTemplateSet {
let table_name = self.table_name;
// Some of queries don't end with `;`, because we need to add `LIMIT` clause.
MySqlTemplateSet {
table_name: table_name.to_string(),
create_table_statement: format!(
// Cannot be more than 3072 bytes in PRIMARY KEY
"CREATE TABLE IF NOT EXISTS `{table_name}`(k VARBINARY(3072) PRIMARY KEY, v BLOB);",
),
range_template: RangeTemplate {
point: format!("SELECT k, v FROM `{table_name}` WHERE k = ?"),
range: format!("SELECT k, v FROM `{table_name}` WHERE k >= ? AND k < ? ORDER BY k"),
full: format!("SELECT k, v FROM `{table_name}` ? ORDER BY k"),
left_bounded: format!("SELECT k, v FROM `{table_name}` WHERE k >= ? ORDER BY k"),
prefix: format!("SELECT k, v FROM `{table_name}` WHERE k LIKE ? ORDER BY k"),
},
delete_template: RangeTemplate {
point: format!("DELETE FROM `{table_name}` WHERE k = ?;"),
range: format!("DELETE FROM `{table_name}` WHERE k >= ? AND k < ?;"),
full: format!("DELETE FROM `{table_name}`"),
left_bounded: format!("DELETE FROM `{table_name}` WHERE k >= ?;"),
prefix: format!("DELETE FROM `{table_name}` WHERE k LIKE ?;"),
},
}
}
}
/// Templates for the given table name.
#[derive(Debug, Clone)]
pub struct MySqlTemplateSet {
table_name: String,
create_table_statement: String,
range_template: RangeTemplate,
delete_template: RangeTemplate,
}
impl MySqlTemplateSet {
/// Generates the sql for batch get.
fn generate_batch_get_query(&self, key_len: usize) -> String {
let table_name = &self.table_name;
let in_clause = mysql_generate_in_placeholders(1, key_len).join(", ");
format!(
"SELECT k, v FROM `{table_name}` WHERE k in ({});",
in_clause
)
}
/// Generates the sql for batch delete.
fn generate_batch_delete_query(&self, key_len: usize) -> String {
let table_name = &self.table_name;
let in_clause = mysql_generate_in_placeholders(1, key_len).join(", ");
format!("DELETE FROM `{table_name}` WHERE k in ({});", in_clause)
}
/// Generates the sql for batch upsert.
/// For MySQL, it also generates a select query to get the previous values.
fn generate_batch_upsert_query(&self, kv_len: usize) -> (String, String) {
let table_name = &self.table_name;
let in_placeholders: Vec<String> = (1..=kv_len).map(|_| "?".to_string()).collect();
let in_clause = in_placeholders.join(", ");
let mut values_placeholders = Vec::new();
for _ in 0..kv_len {
values_placeholders.push("(?, ?)".to_string());
}
let values_clause = values_placeholders.join(", ");
(
format!(r#"SELECT k, v FROM `{table_name}` WHERE k IN ({in_clause})"#,),
format!(
r#"INSERT INTO `{table_name}` (k, v) VALUES {values_clause} ON DUPLICATE KEY UPDATE v = VALUES(v);"#,
),
)
}
}
#[async_trait::async_trait]
impl Executor for MySqlClient {
type Transaction<'a>
= MySqlTxnClient
where
Self: 'a;
fn name() -> &'static str {
"MySql"
}
async fn query(&mut self, raw_query: &str, params: &[&Vec<u8>]) -> Result<Vec<KeyValue>> {
let query = sqlx::query(raw_query);
let query = params.iter().fold(query, |query, param| query.bind(param));
let rows = query
.fetch_all(&**self)
.await
.context(MySqlExecutionSnafu { sql: raw_query })?;
Ok(rows.into_iter().map(key_value_from_row).collect())
}
async fn execute(&mut self, raw_query: &str, params: &[&Vec<u8>]) -> Result<()> {
let query = sqlx::query(raw_query);
let query = params.iter().fold(query, |query, param| query.bind(param));
query
.execute(&**self)
.await
.context(MySqlExecutionSnafu { sql: raw_query })?;
Ok(())
}
async fn txn_executor<'a>(&'a mut self) -> Result<Self::Transaction<'a>> {
// sqlx has no isolation level support for now, so we have to set it manually.
// TODO(CookiePie): Waiting for https://github.com/launchbadge/sqlx/pull/3614 and remove this.
sqlx::query("SET SESSION TRANSACTION ISOLATION LEVEL SERIALIZABLE")
.execute(&**self)
.await
.context(MySqlExecutionSnafu {
sql: "SET SESSION TRANSACTION ISOLATION LEVEL SERIALIZABLE",
})?;
let txn = self
.begin()
.await
.context(MySqlExecutionSnafu { sql: "begin" })?;
Ok(MySqlTxnClient(txn))
}
}
#[async_trait::async_trait]
impl Transaction<'_> for MySqlTxnClient {
async fn query(&mut self, raw_query: &str, params: &[&Vec<u8>]) -> Result<Vec<KeyValue>> {
let query = sqlx::query(raw_query);
let query = params.iter().fold(query, |query, param| query.bind(param));
// As said in https://docs.rs/sqlx/latest/sqlx/trait.Executor.html, we need a `&mut *transaction`. Weird.
let rows = query
.fetch_all(&mut *(self.0))
.await
.context(MySqlExecutionSnafu { sql: raw_query })?;
Ok(rows.into_iter().map(key_value_from_row).collect())
}
async fn execute(&mut self, raw_query: &str, params: &[&Vec<u8>]) -> Result<()> {
let query = sqlx::query(raw_query);
let query = params.iter().fold(query, |query, param| query.bind(param));
// As said in https://docs.rs/sqlx/latest/sqlx/trait.Executor.html, we need a `&mut *transaction`. Weird.
query
.execute(&mut *(self.0))
.await
.context(MySqlExecutionSnafu { sql: raw_query })?;
Ok(())
}
/// Caution: sqlx will stuck on the query if two transactions conflict with each other.
/// Don't know if it's a feature or it depends on the database. Be careful.
async fn commit(self) -> Result<()> {
self.0.commit().await.context(MySqlTransactionSnafu {
operation: "commit",
})?;
Ok(())
}
}
pub struct MySqlExecutorFactory {
pool: Arc<Pool<MySql>>,
}
#[async_trait::async_trait]
impl ExecutorFactory<MySqlClient> for MySqlExecutorFactory {
async fn default_executor(&self) -> Result<MySqlClient> {
Ok(self.pool.clone())
}
async fn txn_executor<'a>(
&self,
default_executor: &'a mut MySqlClient,
) -> Result<MySqlTxnClient> {
default_executor.txn_executor().await
}
}
/// A MySQL-backed key-value store.
/// It uses [sqlx::Pool<MySql>] as the connection pool for [RdsStore].
pub type MySqlStore = RdsStore<MySqlClient, MySqlExecutorFactory, MySqlTemplateSet>;
#[async_trait::async_trait]
impl KvQueryExecutor<MySqlClient> for MySqlStore {
async fn range_with_query_executor(
&self,
query_executor: &mut ExecutorImpl<'_, MySqlClient>,
req: RangeRequest,
) -> Result<RangeResponse> {
let template_type = range_template(&req.key, &req.range_end);
let template = self.sql_template_set.range_template.get(template_type);
let params = template_type.build_params(req.key, req.range_end);
let params_ref = params.iter().collect::<Vec<_>>();
// Always add 1 to limit to check if there is more data
let query =
RangeTemplate::with_limit(template, if req.limit == 0 { 0 } else { req.limit + 1 });
let limit = req.limit as usize;
debug!("query: {:?}, params: {:?}", query, params);
let mut kvs = query_executor.query(&query, &params_ref).await?;
if req.keys_only {
kvs.iter_mut().for_each(|kv| kv.value = vec![]);
}
// If limit is 0, we always return all data
if limit == 0 || kvs.len() <= limit {
return Ok(RangeResponse { kvs, more: false });
}
// If limit is greater than the number of rows, we remove the last row and set more to true
let removed = kvs.pop();
debug_assert!(removed.is_some());
Ok(RangeResponse { kvs, more: true })
}
async fn batch_put_with_query_executor(
&self,
query_executor: &mut ExecutorImpl<'_, MySqlClient>,
req: BatchPutRequest,
) -> Result<BatchPutResponse> {
let mut in_params = Vec::with_capacity(req.kvs.len() * 3);
let mut values_params = Vec::with_capacity(req.kvs.len() * 2);
for kv in &req.kvs {
let processed_key = &kv.key;
in_params.push(processed_key);
let processed_value = &kv.value;
values_params.push(processed_key);
values_params.push(processed_value);
}
let in_params = in_params.iter().map(|x| x as _).collect::<Vec<_>>();
let values_params = values_params.iter().map(|x| x as _).collect::<Vec<_>>();
let (select, update) = self
.sql_template_set
.generate_batch_upsert_query(req.kvs.len());
// Fast path: if we don't need previous kvs, we can just upsert the keys.
if !req.prev_kv {
query_executor.execute(&update, &values_params).await?;
return Ok(BatchPutResponse::default());
}
// Should use transaction to ensure atomicity.
if let ExecutorImpl::Default(query_executor) = query_executor {
let txn = query_executor.txn_executor().await?;
let mut txn = ExecutorImpl::Txn(txn);
let res = self.batch_put_with_query_executor(&mut txn, req).await;
txn.commit().await?;
return res;
}
let prev_kvs = query_executor.query(&select, &in_params).await?;
query_executor.execute(&update, &values_params).await?;
Ok(BatchPutResponse { prev_kvs })
}
async fn batch_get_with_query_executor(
&self,
query_executor: &mut ExecutorImpl<'_, MySqlClient>,
req: BatchGetRequest,
) -> Result<BatchGetResponse> {
if req.keys.is_empty() {
return Ok(BatchGetResponse { kvs: vec![] });
}
let query = self
.sql_template_set
.generate_batch_get_query(req.keys.len());
let params = req.keys.iter().map(|x| x as _).collect::<Vec<_>>();
let kvs = query_executor.query(&query, &params).await?;
Ok(BatchGetResponse { kvs })
}
async fn delete_range_with_query_executor(
&self,
query_executor: &mut ExecutorImpl<'_, MySqlClient>,
req: DeleteRangeRequest,
) -> Result<DeleteRangeResponse> {
// Since we need to know the number of deleted keys, we have no fast path here.
// Should use transaction to ensure atomicity.
if let ExecutorImpl::Default(query_executor) = query_executor {
let txn = query_executor.txn_executor().await?;
let mut txn = ExecutorImpl::Txn(txn);
let res = self.delete_range_with_query_executor(&mut txn, req).await;
txn.commit().await?;
return res;
}
let range_get_req = RangeRequest {
key: req.key.clone(),
range_end: req.range_end.clone(),
limit: 0,
keys_only: false,
};
let prev_kvs = self
.range_with_query_executor(query_executor, range_get_req)
.await?
.kvs;
let template_type = range_template(&req.key, &req.range_end);
let template = self.sql_template_set.delete_template.get(template_type);
let params = template_type.build_params(req.key, req.range_end);
let params_ref = params.iter().map(|x| x as _).collect::<Vec<_>>();
query_executor.execute(template, &params_ref).await?;
let mut resp = DeleteRangeResponse::new(prev_kvs.len() as i64);
if req.prev_kv {
resp.with_prev_kvs(prev_kvs);
}
Ok(resp)
}
async fn batch_delete_with_query_executor(
&self,
query_executor: &mut ExecutorImpl<'_, MySqlClient>,
req: BatchDeleteRequest,
) -> Result<BatchDeleteResponse> {
if req.keys.is_empty() {
return Ok(BatchDeleteResponse::default());
}
let query = self
.sql_template_set
.generate_batch_delete_query(req.keys.len());
let params = req.keys.iter().map(|x| x as _).collect::<Vec<_>>();
// Fast path: if we don't need previous kvs, we can just delete the keys.
if !req.prev_kv {
query_executor.execute(&query, &params).await?;
return Ok(BatchDeleteResponse::default());
}
// Should use transaction to ensure atomicity.
if let ExecutorImpl::Default(query_executor) = query_executor {
let txn = query_executor.txn_executor().await?;
let mut txn = ExecutorImpl::Txn(txn);
let res = self.batch_delete_with_query_executor(&mut txn, req).await;
txn.commit().await?;
return res;
}
// Should get previous kvs first
let batch_get_req = BatchGetRequest {
keys: req.keys.clone(),
};
let prev_kvs = self
.batch_get_with_query_executor(query_executor, batch_get_req)
.await?
.kvs;
// Pure `DELETE` has no return value, so we need to use `execute` instead of `query`.
query_executor.execute(&query, &params).await?;
if req.prev_kv {
Ok(BatchDeleteResponse { prev_kvs })
} else {
Ok(BatchDeleteResponse::default())
}
}
}
impl MySqlStore {
/// Create [MySqlStore] impl of [KvBackendRef] from url.
pub async fn with_url(url: &str, table_name: &str, max_txn_ops: usize) -> Result<KvBackendRef> {
let pool = MySqlPool::connect(url)
.await
.context(CreateMySqlPoolSnafu)?;
Self::with_mysql_pool(pool, table_name, max_txn_ops).await
}
/// Create [MySqlStore] impl of [KvBackendRef] from [sqlx::Pool<MySql>].
pub async fn with_mysql_pool(
pool: Pool<MySql>,
table_name: &str,
max_txn_ops: usize,
) -> Result<KvBackendRef> {
// This step ensures the mysql metadata backend is ready to use.
// We check if greptime_metakv table exists, and we will create a new table
// if it does not exist.
let sql_template_set = MySqlTemplateFactory::new(table_name).build();
sqlx::query(&sql_template_set.create_table_statement)
.execute(&pool)
.await
.context(MySqlExecutionSnafu {
sql: sql_template_set.create_table_statement.to_string(),
})?;
Ok(Arc::new(MySqlStore {
max_txn_ops,
sql_template_set,
txn_retry_count: RDS_STORE_TXN_RETRY_COUNT,
executor_factory: MySqlExecutorFactory {
pool: Arc::new(pool),
},
_phantom: PhantomData,
}))
}
}
#[cfg(test)]
mod tests {
use common_telemetry::init_default_ut_logging;
use super::*;
use crate::kv_backend::test::{
prepare_kv_with_prefix, test_kv_batch_delete_with_prefix, test_kv_batch_get_with_prefix,
test_kv_compare_and_put_with_prefix, test_kv_delete_range_with_prefix,
test_kv_put_with_prefix, test_kv_range_2_with_prefix, test_kv_range_with_prefix,
test_txn_compare_equal, test_txn_compare_greater, test_txn_compare_less,
test_txn_compare_not_equal, test_txn_one_compare_op, text_txn_multi_compare_op,
unprepare_kv,
};
async fn build_mysql_kv_backend(table_name: &str) -> Option<MySqlStore> {
init_default_ut_logging();
let endpoints = std::env::var("GT_MYSQL_ENDPOINTS").unwrap_or_default();
if endpoints.is_empty() {
return None;
}
let pool = MySqlPool::connect(&endpoints).await.unwrap();
let sql_templates = MySqlTemplateFactory::new(table_name).build();
sqlx::query(&sql_templates.create_table_statement)
.execute(&pool)
.await
.unwrap();
Some(MySqlStore {
max_txn_ops: 128,
sql_template_set: sql_templates,
txn_retry_count: RDS_STORE_TXN_RETRY_COUNT,
executor_factory: MySqlExecutorFactory {
pool: Arc::new(pool),
},
_phantom: PhantomData,
})
}
#[tokio::test]
async fn test_mysql_put() {
let kv_backend = build_mysql_kv_backend("put_test").await.unwrap();
let prefix = b"put/";
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
test_kv_put_with_prefix(&kv_backend, prefix.to_vec()).await;
unprepare_kv(&kv_backend, prefix).await;
}
#[tokio::test]
async fn test_mysql_range() {
let kv_backend = build_mysql_kv_backend("range_test").await.unwrap();
let prefix = b"range/";
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
test_kv_range_with_prefix(&kv_backend, prefix.to_vec()).await;
unprepare_kv(&kv_backend, prefix).await;
}
#[tokio::test]
async fn test_mysql_range_2() {
let kv_backend = build_mysql_kv_backend("range2_test").await.unwrap();
let prefix = b"range2/";
test_kv_range_2_with_prefix(&kv_backend, prefix.to_vec()).await;
unprepare_kv(&kv_backend, prefix).await;
}
#[tokio::test]
async fn test_mysql_batch_get() {
let kv_backend = build_mysql_kv_backend("batch_get_test").await.unwrap();
let prefix = b"batch_get/";
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
test_kv_batch_get_with_prefix(&kv_backend, prefix.to_vec()).await;
unprepare_kv(&kv_backend, prefix).await;
}
#[tokio::test]
async fn test_mysql_batch_delete() {
let kv_backend = build_mysql_kv_backend("batch_delete_test").await.unwrap();
let prefix = b"batch_delete/";
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
test_kv_delete_range_with_prefix(&kv_backend, prefix.to_vec()).await;
unprepare_kv(&kv_backend, prefix).await;
}
#[tokio::test]
async fn test_mysql_batch_delete_with_prefix() {
let kv_backend = build_mysql_kv_backend("batch_delete_with_prefix_test")
.await
.unwrap();
let prefix = b"batch_delete/";
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
test_kv_batch_delete_with_prefix(&kv_backend, prefix.to_vec()).await;
unprepare_kv(&kv_backend, prefix).await;
}
#[tokio::test]
async fn test_mysql_delete_range() {
let kv_backend = build_mysql_kv_backend("delete_range_test").await.unwrap();
let prefix = b"delete_range/";
prepare_kv_with_prefix(&kv_backend, prefix.to_vec()).await;
test_kv_delete_range_with_prefix(&kv_backend, prefix.to_vec()).await;
unprepare_kv(&kv_backend, prefix).await;
}
#[tokio::test]
async fn test_mysql_compare_and_put() {
let kv_backend = build_mysql_kv_backend("compare_and_put_test")
.await
.unwrap();
let prefix = b"compare_and_put/";
let kv_backend = Arc::new(kv_backend);
test_kv_compare_and_put_with_prefix(kv_backend.clone(), prefix.to_vec()).await;
}
#[tokio::test]
async fn test_mysql_txn() {
let kv_backend = build_mysql_kv_backend("txn_test").await.unwrap();
test_txn_one_compare_op(&kv_backend).await;
text_txn_multi_compare_op(&kv_backend).await;
test_txn_compare_equal(&kv_backend).await;
test_txn_compare_greater(&kv_backend).await;
test_txn_compare_less(&kv_backend).await;
test_txn_compare_not_equal(&kv_backend).await;
}
}

View File

@@ -153,24 +153,29 @@ impl<'a> PgSqlTemplateFactory<'a> {
/// Builds the template set for the given table name.
fn build(&self) -> PgSqlTemplateSet {
let table_name = self.table_name;
// Some of queries don't end with `;`, because we need to add `LIMIT` clause.
PgSqlTemplateSet {
table_name: table_name.to_string(),
create_table_statement: format!(
"CREATE TABLE IF NOT EXISTS {table_name}(k bytea PRIMARY KEY, v bytea)",
"CREATE TABLE IF NOT EXISTS \"{table_name}\"(k bytea PRIMARY KEY, v bytea)",
),
range_template: RangeTemplate {
point: format!("SELECT k, v FROM {table_name} WHERE k = $1"),
range: format!("SELECT k, v FROM {table_name} WHERE k >= $1 AND k < $2 ORDER BY k"),
full: format!("SELECT k, v FROM {table_name} $1 ORDER BY k"),
left_bounded: format!("SELECT k, v FROM {table_name} WHERE k >= $1 ORDER BY k"),
prefix: format!("SELECT k, v FROM {table_name} WHERE k LIKE $1 ORDER BY k"),
point: format!("SELECT k, v FROM \"{table_name}\" WHERE k = $1"),
range: format!(
"SELECT k, v FROM \"{table_name}\" WHERE k >= $1 AND k < $2 ORDER BY k"
),
full: format!("SELECT k, v FROM \"{table_name}\" $1 ORDER BY k"),
left_bounded: format!("SELECT k, v FROM \"{table_name}\" WHERE k >= $1 ORDER BY k"),
prefix: format!("SELECT k, v FROM \"{table_name}\" WHERE k LIKE $1 ORDER BY k"),
},
delete_template: RangeTemplate {
point: format!("DELETE FROM {table_name} WHERE k = $1 RETURNING k,v;"),
range: format!("DELETE FROM {table_name} WHERE k >= $1 AND k < $2 RETURNING k,v;"),
full: format!("DELETE FROM {table_name} RETURNING k,v"),
left_bounded: format!("DELETE FROM {table_name} WHERE k >= $1 RETURNING k,v;"),
prefix: format!("DELETE FROM {table_name} WHERE k LIKE $1 RETURNING k,v;"),
point: format!("DELETE FROM \"{table_name}\" WHERE k = $1 RETURNING k,v;"),
range: format!(
"DELETE FROM \"{table_name}\" WHERE k >= $1 AND k < $2 RETURNING k,v;"
),
full: format!("DELETE FROM \"{table_name}\" RETURNING k,v"),
left_bounded: format!("DELETE FROM \"{table_name}\" WHERE k >= $1 RETURNING k,v;"),
prefix: format!("DELETE FROM \"{table_name}\" WHERE k LIKE $1 RETURNING k,v;"),
},
}
}
@@ -190,7 +195,10 @@ impl PgSqlTemplateSet {
fn generate_batch_get_query(&self, key_len: usize) -> String {
let table_name = &self.table_name;
let in_clause = pg_generate_in_placeholders(1, key_len).join(", ");
format!("SELECT k, v FROM {table_name} WHERE k in ({});", in_clause)
format!(
"SELECT k, v FROM \"{table_name}\" WHERE k in ({});",
in_clause
)
}
/// Generates the sql for batch delete.
@@ -198,7 +206,7 @@ impl PgSqlTemplateSet {
let table_name = &self.table_name;
let in_clause = pg_generate_in_placeholders(1, key_len).join(", ");
format!(
"DELETE FROM {table_name} WHERE k in ({}) RETURNING k,v;",
"DELETE FROM \"{table_name}\" WHERE k in ({}) RETURNING k,v;",
in_clause
)
}
@@ -219,9 +227,9 @@ impl PgSqlTemplateSet {
format!(
r#"
WITH prev AS (
SELECT k,v FROM {table_name} WHERE k IN ({in_clause})
SELECT k,v FROM "{table_name}" WHERE k IN ({in_clause})
), update AS (
INSERT INTO {table_name} (k, v) VALUES
INSERT INTO "{table_name}" (k, v) VALUES
{values_clause}
ON CONFLICT (
k

View File

@@ -39,6 +39,7 @@ pub mod node_manager;
pub mod peer;
pub mod range_stream;
pub mod region_keeper;
pub mod region_registry;
pub mod rpc;
pub mod sequence;
pub mod state_store;

View File

@@ -27,6 +27,7 @@ const TABLE_NAME_LOCK_PREFIX: &str = "__table_name_lock";
const FLOW_NAME_LOCK_PREFIX: &str = "__flow_name_lock";
const REGION_LOCK_PREFIX: &str = "__region_lock";
const FLOW_LOCK_PREFIX: &str = "__flow_lock";
const REMOTE_WAL_LOCK_PREFIX: &str = "__remote_wal_lock";
/// [CatalogLock] acquires the lock on the tenant level.
pub enum CatalogLock<'a> {
@@ -231,6 +232,31 @@ impl From<FlowLock> for StringKey {
}
}
/// [RemoteWalLock] acquires the lock on the remote wal topic level.
pub enum RemoteWalLock {
Read(String),
Write(String),
}
impl Display for RemoteWalLock {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let key = match self {
RemoteWalLock::Read(s) => s,
RemoteWalLock::Write(s) => s,
};
write!(f, "{}/{}", REMOTE_WAL_LOCK_PREFIX, key)
}
}
impl From<RemoteWalLock> for StringKey {
fn from(value: RemoteWalLock) -> Self {
match value {
RemoteWalLock::Write(_) => StringKey::Exclusive(value.to_string()),
RemoteWalLock::Read(_) => StringKey::Share(value.to_string()),
}
}
}
#[cfg(test)]
mod tests {
use common_procedure::StringKey;
@@ -308,5 +334,16 @@ mod tests {
string_key,
StringKey::Exclusive(format!("{}/{}", FLOW_LOCK_PREFIX, flow_id))
);
// The remote wal lock
let string_key: StringKey = RemoteWalLock::Read("foo".to_string()).into();
assert_eq!(
string_key,
StringKey::Share(format!("{}/{}", REMOTE_WAL_LOCK_PREFIX, "foo"))
);
let string_key: StringKey = RemoteWalLock::Write("foo".to_string()).into();
assert_eq!(
string_key,
StringKey::Exclusive(format!("{}/{}", REMOTE_WAL_LOCK_PREFIX, "foo"))
);
}
}

View File

@@ -12,63 +12,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt::{Display, Formatter};
use std::sync::Arc;
use api::v1::meta::Peer as PbPeer;
use serde::{Deserialize, Serialize};
pub use api::v1::meta::Peer;
use crate::error::Error;
use crate::{DatanodeId, FlownodeId};
#[derive(Debug, Default, Clone, Hash, Eq, PartialEq, Deserialize, Serialize)]
pub struct Peer {
/// Node identifier. Unique in a cluster.
pub id: u64,
pub addr: String,
}
impl From<PbPeer> for Peer {
fn from(p: PbPeer) -> Self {
Self {
id: p.id,
addr: p.addr,
}
}
}
impl From<Peer> for PbPeer {
fn from(p: Peer) -> Self {
Self {
id: p.id,
addr: p.addr,
}
}
}
impl Peer {
pub fn new(id: u64, addr: impl Into<String>) -> Self {
Self {
id,
addr: addr.into(),
}
}
#[cfg(any(test, feature = "testing"))]
pub fn empty(id: u64) -> Self {
Self {
id,
addr: String::new(),
}
}
}
impl Display for Peer {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "peer-{}({})", self.id, self.addr)
}
}
/// can query peer given a node id
#[async_trait::async_trait]
pub trait PeerLookupService {

View File

@@ -0,0 +1,186 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::sync::{Arc, RwLock};
use common_telemetry::warn;
use store_api::storage::RegionId;
use crate::datanode::RegionManifestInfo;
/// Represents information about a leader region in the cluster.
/// Contains the datanode id where the leader is located,
/// and the current manifest version.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct LeaderRegion {
pub datanode_id: u64,
pub manifest: LeaderRegionManifestInfo,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum LeaderRegionManifestInfo {
Mito {
manifest_version: u64,
flushed_entry_id: u64,
},
Metric {
data_manifest_version: u64,
data_flushed_entry_id: u64,
metadata_manifest_version: u64,
metadata_flushed_entry_id: u64,
},
}
impl From<RegionManifestInfo> for LeaderRegionManifestInfo {
fn from(value: RegionManifestInfo) -> Self {
match value {
RegionManifestInfo::Mito {
manifest_version,
flushed_entry_id,
} => LeaderRegionManifestInfo::Mito {
manifest_version,
flushed_entry_id,
},
RegionManifestInfo::Metric {
data_manifest_version,
data_flushed_entry_id,
metadata_manifest_version,
metadata_flushed_entry_id,
} => LeaderRegionManifestInfo::Metric {
data_manifest_version,
data_flushed_entry_id,
metadata_manifest_version,
metadata_flushed_entry_id,
},
}
}
}
impl LeaderRegionManifestInfo {
/// Returns the manifest version of the leader region.
pub fn manifest_version(&self) -> u64 {
match self {
LeaderRegionManifestInfo::Mito {
manifest_version, ..
} => *manifest_version,
LeaderRegionManifestInfo::Metric {
data_manifest_version,
..
} => *data_manifest_version,
}
}
/// Returns the flushed entry id of the leader region.
pub fn flushed_entry_id(&self) -> u64 {
match self {
LeaderRegionManifestInfo::Mito {
flushed_entry_id, ..
} => *flushed_entry_id,
LeaderRegionManifestInfo::Metric {
data_flushed_entry_id,
..
} => *data_flushed_entry_id,
}
}
/// Returns the minimum flushed entry id of the leader region.
/// It is used to determine the minimum flushed entry id that can be pruned in remote wal.
pub fn min_flushed_entry_id(&self) -> u64 {
match self {
LeaderRegionManifestInfo::Mito {
flushed_entry_id, ..
} => *flushed_entry_id,
LeaderRegionManifestInfo::Metric {
data_flushed_entry_id,
metadata_flushed_entry_id,
..
} => (*data_flushed_entry_id).min(*metadata_flushed_entry_id),
}
}
}
pub type LeaderRegionRegistryRef = Arc<LeaderRegionRegistry>;
/// Registry that maintains a mapping of all leader regions in the cluster.
/// Tracks which datanode is hosting the leader for each region and the corresponding
/// manifest version.
#[derive(Default)]
pub struct LeaderRegionRegistry {
inner: RwLock<HashMap<RegionId, LeaderRegion>>,
}
impl LeaderRegionRegistry {
/// Creates a new empty leader region registry.
pub fn new() -> Self {
Self {
inner: RwLock::new(HashMap::new()),
}
}
/// Gets the leader region for the given region ids.
pub fn batch_get<I: Iterator<Item = RegionId>>(
&self,
region_ids: I,
) -> HashMap<RegionId, LeaderRegion> {
let inner = self.inner.read().unwrap();
region_ids
.into_iter()
.flat_map(|region_id| {
inner
.get(&region_id)
.map(|leader_region| (region_id, *leader_region))
})
.collect::<HashMap<_, _>>()
}
/// Puts the leader regions into the registry.
pub fn batch_put(&self, key_values: Vec<(RegionId, LeaderRegion)>) {
let mut inner = self.inner.write().unwrap();
for (region_id, leader_region) in key_values {
match inner.entry(region_id) {
Entry::Vacant(entry) => {
entry.insert(leader_region);
}
Entry::Occupied(mut entry) => {
let manifest_version = entry.get().manifest.manifest_version();
if manifest_version > leader_region.manifest.manifest_version() {
warn!(
"Received a leader region with a smaller manifest version than the existing one, ignore it. region: {}, existing_manifest_version: {}, new_manifest_version: {}",
region_id,
manifest_version,
leader_region.manifest.manifest_version()
);
} else {
entry.insert(leader_region);
}
}
}
}
}
pub fn batch_delete<I: Iterator<Item = RegionId>>(&self, region_ids: I) {
let mut inner = self.inner.write().unwrap();
for region_id in region_ids {
inner.remove(&region_id);
}
}
/// Resets the registry to an empty state.
pub fn reset(&self) {
let mut inner = self.inner.write().unwrap();
inner.clear();
}
}

View File

@@ -1240,6 +1240,7 @@ impl From<QueryContext> for PbQueryContext {
extensions,
channel: channel as u32,
snapshot_seqs: None,
explain: None,
}
}
}

View File

@@ -34,6 +34,24 @@ pub struct MigrateRegionRequest {
pub timeout: Duration,
}
/// A request to add region follower.
#[derive(Debug, Clone)]
pub struct AddRegionFollowerRequest {
/// The region id to add follower.
pub region_id: u64,
/// The peer id to add follower.
pub peer_id: u64,
}
/// A request to remove region follower.
#[derive(Debug, Clone)]
pub struct RemoveRegionFollowerRequest {
/// The region id to remove follower.
pub region_id: u64,
/// The peer id to remove follower.
pub peer_id: u64,
}
/// Cast the protobuf [`ProcedureId`] to common [`ProcedureId`].
pub fn pb_pid_to_pid(pid: &PbProcedureId) -> Result<ProcedureId> {
ProcedureId::parse_str(&String::from_utf8_lossy(&pid.key)).with_context(|_| {

View File

@@ -290,13 +290,13 @@ mod tests {
num_per_range: u32,
max_bytes: u32,
) {
let num_cases = rand::thread_rng().gen_range(1..=8);
let num_cases = rand::rng().random_range(1..=8);
common_telemetry::info!("num_cases: {}", num_cases);
let mut cases = Vec::with_capacity(num_cases);
for i in 0..num_cases {
let size = rand::thread_rng().gen_range(size_limit..=max_bytes);
let size = rand::rng().random_range(size_limit..=max_bytes);
let mut large_value = vec![0u8; size as usize];
rand::thread_rng().fill_bytes(&mut large_value);
rand::rng().fill_bytes(&mut large_value);
// Starts from `a`.
let prefix = format!("{}/", std::char::from_u32(97 + i as u32).unwrap());
@@ -354,8 +354,8 @@ mod tests {
#[tokio::test]
async fn test_meta_state_store_split_value() {
let size_limit = rand::thread_rng().gen_range(128..=512);
let page_size = rand::thread_rng().gen_range(1..10);
let size_limit = rand::rng().random_range(128..=512);
let page_size = rand::rng().random_range(1..10);
let kv_backend = Arc::new(MemoryKvBackend::new());
test_meta_state_store_split_value_with_size_limit(kv_backend, size_limit, page_size, 8192)
.await;
@@ -388,7 +388,7 @@ mod tests {
// However, some KvBackends, the `ChrootKvBackend`, will add the prefix to `key`;
// we don't know the exact size of the key.
let size_limit = 1536 * 1024 - key_size;
let page_size = rand::thread_rng().gen_range(1..10);
let page_size = rand::rng().random_range(1..10);
test_meta_state_store_split_value_with_size_limit(
kv_backend,
size_limit,

Some files were not shown because too many files have changed in this diff Show More