Compare commits

..

170 Commits

Author SHA1 Message Date
evenyag
6247de2d50 chore: Revert "feat: prune in each partition"
This reverts commit 3f9bf48161.
2024-11-08 23:57:18 +08:00
evenyag
a2eb46132f feat: tokio dump 2024-11-08 23:08:47 +08:00
evenyag
3f9bf48161 feat: prune in each partition 2024-11-08 21:31:03 +08:00
evenyag
9bd2e006b5 feat: file output thread id 2024-11-08 20:35:40 +08:00
evenyag
031421ca91 feat: add thread id to log 2024-11-08 19:09:08 +08:00
evenyag
999f3a40c2 chore: Revert "chore: Revert "feat: yield after get ranges""
This reverts commit 770a850437.
2024-11-08 17:12:54 +08:00
evenyag
50d28e0a00 feat: add timeout to build ranges 2024-11-08 16:23:03 +08:00
evenyag
770a850437 chore: Revert "feat: yield after get ranges"
This reverts commit 65e53b5bc4.
2024-11-08 15:35:23 +08:00
evenyag
65e53b5bc4 feat: yield after get ranges 2024-11-08 01:28:39 +08:00
evenyag
9a6c7aa4d6 chore: log label 2024-11-08 01:08:02 +08:00
evenyag
4f446b95d8 chore: logs 2024-11-08 01:01:27 +08:00
evenyag
9ad4200f55 feat: only log for unordered scan 2024-11-08 00:58:29 +08:00
evenyag
53d456651f chore: range builder logs 2024-11-08 00:11:54 +08:00
evenyag
f11c5acb0f feat: logs for debug prune cost 2024-11-07 22:10:46 +08:00
evenyag
8536a1ec6e chore: logs to debug hang 2024-11-07 20:36:12 +08:00
evenyag
fce8c968da feat: gauge for scan partition 2024-11-07 16:55:40 +08:00
evenyag
98a6ac973c feat: log on merge scan region start/end 2024-11-07 16:48:03 +08:00
evenyag
8f79e421c3 chore: Revert "feat: remove too large files"
This reverts commit a22667bf3c.
2024-11-07 16:20:39 +08:00
evenyag
e8b326382f chore: fix compile 2024-11-07 00:28:19 +08:00
evenyag
56781e7fbc fix: skip expired files 2024-11-07 00:25:52 +08:00
evenyag
7d342b3d95 feat: small max file size 2024-11-06 23:31:16 +08:00
evenyag
a22667bf3c feat: remove too large files 2024-11-06 22:08:43 +08:00
evenyag
29b9b7db0c feat: support compression method 2024-11-06 18:58:20 +08:00
evenyag
a66909a562 chore: fix compile 2024-11-06 16:29:18 +08:00
evenyag
8137b8ff3d chore: more logs 2024-11-06 15:25:49 +08:00
Ruihang Xia
7c5cd2922a fix split logic
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-11-05 23:43:35 +08:00
evenyag
a1d0dcf2c3 chore: more logs 2024-11-05 20:25:05 +08:00
evenyag
c391171f99 feat: more logs 2024-11-05 20:18:35 +08:00
evenyag
f44862aaac feat: update log 2024-11-05 17:47:32 +08:00
evenyag
8bf795d88c chore: more logs 2024-11-05 16:22:54 +08:00
evenyag
3bbf4e0232 feat: log range meta 2024-11-05 16:01:55 +08:00
evenyag
83da3950da chore: debug 2024-11-05 15:33:42 +08:00
evenyag
957b5effd5 chore: fix compile 2024-11-05 15:32:35 +08:00
evenyag
f59e28006a feat: assert precision 2024-11-05 15:24:40 +08:00
evenyag
3e5bbdf71e feat: enable batch checker 2024-11-05 15:24:40 +08:00
Ruihang Xia
b8ac19c480 log on wrong range index
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-11-05 14:57:44 +08:00
evenyag
92b274a856 chore: log compute cost 2024-11-05 14:57:54 +08:00
evenyag
6bdac25f0a chore: more logs 2024-11-05 13:02:16 +08:00
evenyag
a9f3c4b17c chore: page reader metrics 2024-11-04 20:08:56 +08:00
evenyag
e003eaab36 chore: more log 2024-11-04 20:06:20 +08:00
evenyag
6e590da412 chore: remove compaction skip log 2024-11-04 19:40:42 +08:00
evenyag
ff5fa40b85 feat: skip wal 2024-11-04 19:40:41 +08:00
Ruihang Xia
d4aa4159d4 feat: support windowed sort with where condition
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-11-04 19:34:03 +08:00
evenyag
960f6d821b feat: spawn block write wal 2024-11-04 17:35:12 +08:00
Ruihang Xia
9c5d044238 Merge branch 'main' into transform-count-min-max
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-11-01 17:45:28 +08:00
Ruihang Xia
be72d3bedb feat: simple limit impl in PartSort (#4922)
* feat: simple limit impl in PartSort

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: update time_index method to return a non-optional String

Co-authored-by: Yingwen <realevenyag@gmail.com>
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* use builtin limit

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add more info to analyze display

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update sqlness

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
2024-11-01 09:25:03 +00:00
discord9
1ff29d8fde chore: short desc markdown about change log level (#4921)
* chore: tiny doc about change log level

* chore: per review

* chore
2024-11-01 07:10:57 +00:00
Yingwen
39ab1a6415 feat: get row group time range from cached metadata (#4869)
* feat: get part range min-max from cache for unordered scan

* feat: seq scan push row groups if num_row_groups > 0

* test: test split

* feat: update comment

* test: fix split test

* refactor: rename get meta data method
2024-11-01 06:35:03 +00:00
Ruihang Xia
70c354eed6 fix: the way to retrieve time index column
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-11-01 12:10:12 +08:00
Ruihang Xia
23bf663d58 feat: handle sort that wont preserving partition
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-10-31 22:13:36 +08:00
Ruihang Xia
817648eac5 Merge branch 'main' into transform-count-min-max
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-10-31 15:38:12 +08:00
Weny Xu
758ad0a8c5 refactor: simplify WeightedChoose (#4916)
* refactor: simplify WeightedChoose

* chore: remove unused errors
2024-10-31 06:22:30 +00:00
Ruihang Xia
8b60c27c2e feat: enhance windowed-sort optimizer rule (#4910)
* add RegionScanner::metadata

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* skip PartSort when there is no tag column

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add more sqlness test

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* handle desc

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: should keep part sort on DESC

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-10-31 06:15:45 +00:00
Yingwen
ea6df9ba49 fix: prune batches from memtable by time range (#4913)
* feat: add an iter to prune by time range

* feat: filter rows from mem range
2024-10-31 05:13:35 +00:00
Ning Sun
69420793e2 feat: implement parse_query api (#4860)
* feat: implement parse_query api

* chore: switch to upstream

* fix: add post method for parse_query

* chore: bump promql-parser

* test: use latest promql ast serialization
2024-10-30 12:16:22 +00:00
Yingwen
0da112b335 chore: provide more info in check batch message (#4906)
* chore: provide more info in check message

* chore: set timeout to 240s

---------

Co-authored-by: WenyXu <wenymedia@gmail.com>
2024-10-30 11:56:10 +00:00
dennis zhuang
dcc08f6b3e feat: adds the number of rows and index files size to region_statistics table (#4909)
* feat: adds index size to region statistics

* feat: adds the number of rows for region statistics

* test: adds sqlness test for region_statistics

* fix: test
2024-10-30 11:12:58 +00:00
dennis zhuang
a34035a1f2 fix: set transaction variables not working in mysql protocol (#4912) 2024-10-30 10:59:13 +00:00
LFC
fd8eba36a8 refactor: make use of the "pre_execute" in sql execution interceptor (#4875)
* feat: dynamic definition of plugin options

* rebase

* revert

* fix ci
2024-10-30 09:16:46 +00:00
Ruihang Xia
9712295177 fix(config): update tracing section headers in example TOML files (#4898)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-10-30 08:31:31 +00:00
Lei, HUANG
d275cdd570 feat: Support altering table TTL (#4848)
* feat/alter-ttl:
 Update greptime-proto source and add ChangeTableOptions handling

 - Change greptime-proto source repository and revision in Cargo.lock and Cargo.toml
 - Implement handling for ChangeTableOptions in grpc-expr and meta modules
 - Add support for parsing and applying region option changes in mito2
 - Introduce new error type for invalid change table option requests
 - Add humantime dependency to store-api
 - Fix SQL syntax in tests for changing column types

* chore: remove write buffer size option handling since we don't support specifying write_buffer_size for single table or region

* persist ttl to manifest

* chore: add sqlness

* fix: sqlness

* fix: typo and toml format

* fix: tests

* update: change alter syntax

* feat/alter-ttl: Add Clone trait to RegionFlushRequest and remove redundant Default derive in region_request.rs.

* feat/alter-ttl: Refactor code to replace 'ChangeTableOption' with 'ChangeRegionOption' and handle TTL as a region option

 • Rename ChangeTableOption to ChangeRegionOption across various files.
 • Update AlterKind::ChangeTableOptions to AlterKind::ChangeRegionOptions.
 • Modify TTL handling to treat '0d' as None for TTL in table options.
 • Adjust related function names and comments to reflect the change from table to region options.
 • Include test case updates to verify the new TTL handling behavior.

* chore: update format

* refactor: update region options in DatanodeTableValue

* feat/alter-ttl:
 Remove TTL handling from RegionManifest and related structures

 - Eliminate TTL fields from `RegionManifest`, `RegionChange`, and associated handling logic.
 - Update tests and checksums to reflect removal of TTL.
 - Refactor `RegionOpener` and `handle_alter` to adjust to TTL removal.
 - Simplify `RegionChangeResult` by replacing `change` with `new_meta`.

* chore: fmt

* remove useless delete op

* feat/alter-ttl: Updated Cargo.lock and gRPC expression Cargo.toml to include store-api dependency. Refactored alter.rs to use ChangeOption from store-api instead of ChangeTableOptionRequest.
Adjusted error handling in error.rs to use MetadataError. Modified handle_alter.rs to handle TTL changes with ChangeOption. Simplified region_request.rs by replacing
ChangeRegionOption with ChangeOption and removing redundant code. Removed UnsupportedTableOptionChange error in table/src/error.rs. Updated metadata.rs to use ChangeOption for table
options. Removed ChangeTableOptionRequest enum and related conversion code from requests.rs.

* feat/alter-ttl: Update greptime-proto dependency to revision 53ab9a9553

* chore: format code

* chore: update greptime-proto
2024-10-30 04:39:48 +00:00
Weny Xu
83eb777d21 test: add fuzz test for metric region migration (#4862)
* test: add fuzz tests for migrate metric regions

* test: insert values before migrating metric region

* feat: correct table num

* chore: apply suggestions from CR
2024-10-29 15:47:48 +00:00
Yohan Wal
8ed5bc5305 refactor: json conversion (#4893)
* refactor: json type update

* test: update test

* fix: convert when needed

* revert: leave sqlness tests unchanged

* fix: fmt

* refactor: just refactor

* Apply suggestions from code review

Co-authored-by: Weny Xu <wenymedia@gmail.com>

* refactor: parse jsonb first

* test: add bad cases

* Update src/datatypes/src/vectors/binary.rs

Co-authored-by: Weny Xu <wenymedia@gmail.com>

* fix: fmt

* fix: fix clippy/check

---------

Co-authored-by: Weny Xu <wenymedia@gmail.com>
2024-10-29 15:46:24 +00:00
Weny Xu
9ded314905 feat: add json datatype for grpc protocol (#4897)
* chore: update greptime-proto

* feat: add json datatype for grpc protocol
2024-10-29 12:37:53 +00:00
discord9
702a55a235 chore: update proto depend (#4899) 2024-10-29 09:32:28 +00:00
discord9
f3e5a5a7aa ci: install numpy in CI (#4895)
chore: install numpy in CI
2024-10-29 07:57:40 +00:00
Zhenchi
9c79baca4b feat(index): support building inverted index for the field column on Mito (#4887)
feat(index): support building inverted index for the field column

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2024-10-29 07:57:17 +00:00
Ruihang Xia
03f2fa219d feat: optimizer rule for windowed sort (#4874)
* basic impl

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* implement physical rule

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* feat: install windowed sort physical rule and optimize partition ranges

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add logs and sqlness test

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* feat: introduce PartSortExec for partitioned sorting

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* tune exec nodes' properties and metrics

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* clean up

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix typo

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* debug: add more info on very wrong

* debug: also print overlap ranges

* feat: add check when emit PartSort Stream

* dbg: info on overlap working range

* feat: check batch range is inside part range

* set distinguish partition range param

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* chore: more logs

* update sqlness

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* tune optimizer

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* clean up

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix lints

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix windowed sort rule

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: early terminate sort stream

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* chore: remove min/max check

* chore: remove unused windowed_sort module, uuid feature and refactor region_scanner to synchronous

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* chore: print more fuzz log

* chore: more log

* fix: part sort should skip empty part

* chore: remove insert logs

* tests: empty PartitionRange

* refactor: testcase

* docs: update comment&tests: all empty

* ci: enlarge etcd cpu limit

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: discord9 <discord9@163.com>
Co-authored-by: evenyag <realevenyag@gmail.com>
2024-10-29 07:46:05 +00:00
Lei, HUANG
0ee455a980 fix: pyo3 ut (#4894) 2024-10-29 04:47:57 +00:00
Lei, HUANG
eab9e3a48d chore: remove struct size assertion (#4885)
chore/remove-struct-size-assertion: Remove unit tests for parquet_meta_size function in cache_size.rs
2024-10-28 08:50:10 +00:00
Yingwen
1008af5324 feat!: Divide flush and compaction job pool (#4871)
* feat: divide flush/compact job pool

* feat!: divide bg jobs config

* docs: update config examples

* test: fix tests
2024-10-25 23:36:16 +00:00
discord9
2485f66077 chore: graceful exit on bind fail (#4882) 2024-10-25 09:29:39 +00:00
Weny Xu
4f3afb13b6 fix: fix broken import (#4880) 2024-10-25 07:09:51 +00:00
shuiyisong
32a0023010 chore: add schema urls to otlp logs (#4876)
* chore: add schema urls to otlp logs table

* chore: update meter-macros version to remove anymap warning

* chore: change span id and trace id to field
2024-10-25 03:45:24 +00:00
Kaifeng Zheng
4e9c251041 feat: add json_path_match udf (#4864)
* add json_path_match udf

* sql tests for json_path_match

* fix clippy & comment

* fix null value behavior

* added null tests

* adjust function's behavior on nulls

* update test cases

* fix null check of json
2024-10-25 03:13:34 +00:00
Lei, HUANG
e328c7067c chore: udapte Rust toolchain to 2024-10-19 (#4857)
* update rust toolchain

* change toolchain to 2024-10-17

* fix: clippy

* fix: ut

* bump shadow-rs

* fix: use nightly-2024-10-19

* fix: clippy

* chore/udapte-toolchain-2024-10-17: Update DEV_BUILDER_IMAGE_TAG to 2024-10-19-a5c00e85-20241024184445 in Makefile
2024-10-25 00:23:32 +00:00
Weny Xu
8b307e4548 feat: introduce the PluginOptions (#4835)
* feat: introduce the `PluginOptions`

* chore: apply suggestions from CR
2024-10-24 12:02:10 +00:00
discord9
ff38abde2e chore: better column schema check for flow (#4855)
* chore: better column schema check for flow

* chore: better msg

* tests: clean up after tests

* chore: better msg

* chore: per review

* tests: sqlness
2024-10-24 09:43:32 +00:00
jeremyhi
aa9a265984 chore: make pusher log easy to understand (#4841)
* chore: make pusher log easy to understand

* Update src/meta-srv/src/service/heartbeat.rs

Co-authored-by: Yingwen <realevenyag@gmail.com>

* Update src/meta-srv/src/service/heartbeat.rs

Co-authored-by: Yingwen <realevenyag@gmail.com>

* chore: by comment

---------

Co-authored-by: Yingwen <realevenyag@gmail.com>
2024-10-24 07:44:16 +00:00
pa
9d3ee6384a feat: Limit CPU in runtime (#3685) (#4782)
feat: add throttle runtime (#3685)
2024-10-24 07:30:24 +00:00
localhost
fcde0a4874 feat: Add functionality to the Opentelemetry write interface to extract fields from attr to top-level data. (#4859)
* chore: add otlp select

* chore: change otlp select

* chore: remove json path

* chore: format toml

* chore: change opentelemetry extract keys header name

* chore: add some doc and remove useless code and lib

* chore: make clippy happy

* chore: fix by pr comment

* chore: fix by pr comment

* chore: opentelemetry logs select key change some type default semantic type
2024-10-24 05:55:57 +00:00
Weny Xu
5d42e63ab0 fix!: replace timeout_millis and connect_timeout_millis with Duration in DatanodeClientOptions (#4867)
* fix: correct options struct

* fix: fix unit test
2024-10-23 08:20:34 +00:00
discord9
0c01532a37 feat: Sort within each PartitionRange (#4847)
* feat: PartSort

* chore: rm unused

* chore: typo

* chore: mem pool df

* chore: add location to arrow error

* refactor: test_util

* refactor: per review

* chore: rm unused

* chore: more cases

* chore: test&buffer clear

* fix: remove fetch

* chore: fmt

* chore: per review

* chore: rm unused
2024-10-23 07:01:55 +00:00
ZonaHe
6d503b047a feat: update dashboard to v0.6.0 (#4861)
Co-authored-by: ZonaHex <ZonaHex@users.noreply.github.com>
2024-10-22 02:34:09 +00:00
Yingwen
5d28f7a912 feat: yields empty batch after reading a range (#4845)
* feat: add empty batch to end of range stream

* feat: add batch validation

* fix: validate batch order

* fix: not yield empty batch in compaction

* fix: empty record batch

* feat: add a flag to enable empty batch
2024-10-21 13:52:47 +00:00
Lei, HUANG
a50eea76a6 chore: bump greptime-meter (#4858)
chore/bump-greptime-meter: Add meter-core package and update meter-core dependency across various packages to
new git revision.
2024-10-21 08:18:30 +00:00
Yingwen
2ee1ce2ba1 docs: change cpu/mem panel to time-series (#4844)
* docs: change cpu/mem panel to time-series

* docs: update version
2024-10-18 08:42:01 +00:00
Weny Xu
c02b5dae93 chore: bump version to 0.9.5 (#4853) 2024-10-18 08:07:13 +00:00
Weny Xu
081c6d9e74 fix: flush metric metadata region (#4852)
* fix: flush metric metadata region

* chore: apply suggestions from CR
2024-10-18 07:21:35 +00:00
Weny Xu
ca6e02980e fix: overwrite entry_id if entry id is less than start_offset (#4842)
* fix: overwrite entry_id if entry id is less than start_offset

* feat: add `overwrite_entry_start_id` to options

* chore: update config.md
2024-10-18 06:31:02 +00:00
Weny Xu
74bdba4613 fix: fix metadata forward compatibility issue (#4846) 2024-10-18 06:26:41 +00:00
Weny Xu
2e0e82ddc8 chore: update greptime-proto to b4d3011 (#4850) 2024-10-18 04:10:22 +00:00
Yingwen
e0c4157ad8 feat: Seq scanner scans data by time range (#4809)
* feat: seq scan by partition

* feat: part metrics

* chore: remove unused codes

* chore: fmt stream

* feat: build ranges returns smallvec

* feat: move scan mem/file ranges to util and reuse

* feat: log metrics

* chore: correct some metrics

* feat: get explain info from ranges

* test: group test and remove unused codes

* chore: fix clippy

* feat: change PartitionRange end to exclusive

* test: add tests
2024-10-17 11:05:12 +00:00
discord9
613e07afb4 feat: window sort physical plan (#4814)
* WIP

* feat: range split& tests

* WIP: split range

* add sort exprs

* chore: typo

* WIP

* feat: find successive runs

* WIP

* READY FOR REVIEW PART ONE: more tests

* refactor: break into smaller functions

* feat: precompute working range(need testing)

* tests: on working range

* tests: on working range

* feat: support rev working range

* feat(to be tested): core logic of merge sort

* fix: poll results

* fix: find_slice_from_range&test

* chore: remove some unused util func&fields

* chore: typos

* chore: impl exec plan for WindowedSortExec

* test(WIP): window sort stream

* test: window sort stream

* chore: remove unused

* fix: fetch

* fix: WIP intersection remaining

* test: fix and test!

* chore: remove outdated comments

* chore: rename test

* chore: remove dbg line

* chore: sorted runs

* feat: handling unexpected data

* chore: unused

* chore: remove a print in test

* chore: per review

* docs: wrong comment

* chore: more test cases
2024-10-16 11:50:25 +00:00
Weny Xu
0ce93f0b88 chore: add more metrics for region migration (#4838) 2024-10-16 09:36:57 +00:00
Ning Sun
c231eee7c1 fix: respect feature flags for geo function (#4836) 2024-10-16 07:46:31 +00:00
Yiran
176f2df5b3 fix: dead links (#4837) 2024-10-16 07:43:14 +00:00
localhost
4622412dfe feat: add API to write OpenTelemetry logs to GreptimeDB (#4755)
* chore: otlp logs api

* feat: add API to write OpenTelemetry logs to GreptimeDB

* chore: fix test data schema error

* chore: modify the underlying data structure of the pipeline value map type from hashmap to btremap to keep key order

* chore: fix by pr comment

* chore: resolve conflicts and add some test

* chore: remove useless error

* chore: change otlp header name

* chore: fmt code

* chore: fix integration test for otlp log write api

* chore: fix by pr comment

* chore: set otlp body with fulltext default
2024-10-16 04:36:08 +00:00
jeremyhi
59ec90299b refactor: metasrv cannot be cloned (#4834)
* refactor: metasrv cannot be cloned

* chore: remove MetasrvInstance's clone
2024-10-15 11:36:48 +00:00
discord9
16b8cdc3d5 chore: bump version v0.9.4 (#4833) 2024-10-15 10:48:03 +00:00
Weny Xu
3197b8b535 feat: introduce default customizers (#4831)
* feat: introduce `DefaultHeartbeatHandlerGroupBuilderCustomizer` and `DefaultLeadershipChangeNotifierCustomizer`

* chore: code styling
2024-10-15 09:48:13 +00:00
zyy17
972c2441af chore: bump promql-parser to v0.4.1 and use to_string() for EvalStmt (#4832)
chore: bump promql-parser to v0.4.1 and use to_string() for EvalStmt
2024-10-15 08:50:37 +00:00
Ning Sun
bb8b54b5d3 feat: add some s2 geo functions (#4823)
* feat: add first batch of s2 functions

* refactor: update reusable code from main

* test: add sqlness tests for s2

* feat: add tostring function for s2

* Update src/common/function/src/scalars/geo/s2.rs

Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>

* Apply suggestions from code review

* one more change

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>
2024-10-15 06:47:29 +00:00
Weny Xu
b5233e500b feat: defer HeartbeatHandlerGroup construction and enhance LeadershipChangeNotifier (#4826)
* feat: enhance `HeartbeatHandlerGroup`

* chore: apply suggestions from CR

* chore: minor refactoring

* chore: code styling

* chore: apply suggestions from CR
2024-10-15 03:35:31 +00:00
Ruihang Xia
b61a388d04 refactor: replace info logs with debug logs in region server (#4829)
* refactor: replace info logs with debug logs in region server

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: update error handling for closing and opening nonexistent regions

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-10-14 12:46:07 +00:00
Ruihang Xia
06e565d25a feat: cache logical region's metadata (#4827)
* feat: cache logical region's metadata

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* feat: implement logical region locking for metadata operations

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: correct typo in comment for MetadataRegion struct

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-10-14 08:44:13 +00:00
Yingwen
3b2ce31a19 feat: enable prof features by default (#4815)
* feat: enable prof by default

* docs: don't need to build with features

* feat: add common-pprof as optional dep for pprof feature

* build: remove optional

* feat: use dump_text
2024-10-14 03:32:47 +00:00
Ruihang Xia
a889ea88ca fix: case sensitive for __field__ matcher (#4822)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-10-14 03:18:59 +00:00
Yingwen
2f2b4b306c feat!: implement interval type by multiple structs (#4772)
* define structs and methods

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* feat: re-implement interval types in time crate

* feat: use new

* feat: interval value

* feat: query crate interval

* feat: pg and mysql interval

* chore: remove unused imports

* chore: remove commented codes

* feat: make flow compile but may not work

* feat: flow datetime

* test: fix some tests

* test: fix some flow tests(WIP)

* chore: some fix test&docs

* fix: change interval order

* chore: remove unused codes

* chore: fix cilppy

* chore: now signature change

* chore: remove todo

* feat: update error message

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: discord9 <discord9@163.com>
2024-10-14 03:09:03 +00:00
jeremyhi
856c0280f5 feat: remove the distributed lock (#4825)
* feat: remove the distributed lock as we do not need it any more

* chore: delete todo comment

* chore: remove unused error
2024-10-12 09:04:22 +00:00
Ning Sun
aaa9b32908 feat: add more h3 functions (#4770)
* feat: add more h3 grid functions

* feat: add more traversal functions

* refactor: update some function definitions

* style: format

* refactor: avoid creating slice in nested loop

* feat: ensure column number and length

* refactor: fix lint warnings

* refactor: merge main

* Apply suggestions from code review

Co-authored-by: LFC <990479+MichaelScofield@users.noreply.github.com>

* Update src/common/function/src/scalars/geo/h3.rs

Co-authored-by: LFC <990479+MichaelScofield@users.noreply.github.com>

* style: format

---------

Co-authored-by: LFC <990479+MichaelScofield@users.noreply.github.com>
2024-10-11 17:57:54 +00:00
Weny Xu
4bb1f4f184 feat: introduce LeadershipChangeNotifier and LeadershipChangeListener (#4817)
* feat: introduce `LeadershipChangeNotifier`

* refactor: use `LeadershipChangeNotifier`

* chore: apply suggestions from CR

* chore: apply suggestions from CR

* chore: adjust log styling
2024-10-11 12:48:53 +00:00
Weny Xu
0f907ef99e fix: correct table name formatting (#4819) 2024-10-11 11:32:15 +00:00
Ruihang Xia
a61c0bd1d8 fix: error in admin function is not formatted properly (#4820)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-10-11 10:02:45 +00:00
Lei, HUANG
7dd0e3ab37 fix: Panic in UNION ALL queries (#4796)
* fix/union_all_panic:
 Improve MetricCollector by incrementing level and fix underflow issue; add tests for UNION ALL queries

* chore: remove useless documentation

* fix/union_all_panic: Add order by clause to UNION ALL select queries in tests
2024-10-11 08:23:01 +00:00
Yingwen
d168bde226 feat!: move v1/prof API to debug/prof (#4810)
* feat!: move v1/prof to debug/prof

* docs: update readme

* docs: move prof docs to docs dir

* chore: update message

* feat!: remove v1/prof

* docs: update mem prof docs
2024-10-11 04:16:37 +00:00
jeremyhi
4b34f610aa feat: information extension (#4811)
* feat: information extension

* Update manager.rs

Co-authored-by: Weny Xu <wenymedia@gmail.com>

* chore: by comment

---------

Co-authored-by: Weny Xu <wenymedia@gmail.com>
2024-10-11 03:13:49 +00:00
Weny Xu
695ff1e037 feat: expose RegionMigrationManagerRef (#4812)
* chore: expose `RegionMigrationProcedureTask`

* fix: fix typos

* chore: expose `tracker`
2024-10-11 02:40:51 +00:00
Yohan Wal
288fdc3145 feat: json_path_exists udf (#4807)
* feat: json_path_exists udf

* chore: fix comments

* fix: caution when copy&paste QAQ
2024-10-10 14:15:34 +00:00
discord9
a8ed3db0aa feat: Merge sort Logical plan (#4768)
* feat(WIP): MergeSort

* wip

* feat: MergeSort LogicalPlan

* update sqlness result

* Apply suggestions from code review

Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>

* refactor: per review advice

* refactor: more per review

* chore: per review

---------

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>
2024-10-09 09:37:27 +00:00
Kaifeng Zheng
0dd11f53f5 feat: add json format output for http interface (#4797)
* feat: json output format for http

* feat: add json result test case

* fix: typo and refactor a piece of code

* fix: cargo check

* move affected_rows to top level
2024-10-09 07:11:57 +00:00
Ning Sun
19918928c5 feat: add function to aggregate path into a geojson path (#4798)
* feat: add geojson function to aggregate paths

* test: add sqlness results

* test: add sqlness

* refactor: corrected to aggregation function

* chore: update comments

* fix: make linter happy again

* refactor: rename to remove `geo` from `geojson` function name

The return type is not geojson at all. It's just compatible with geojson's
coordinates part and superset's deckgl path plugin.
2024-10-09 02:38:44 +00:00
shuiyisong
5f0a83b2b1 fix: ts conversion during transform phase (#4790)
* fix: allow ts conversion during transform phase

* chore: replace `unimplemented` with snafu
2024-10-08 17:54:44 +00:00
localhost
71a66d15f7 chore: add json write (#4744)
* chore: add json write

* chore: add test for write json log api

* chore: enhancement of Error Handling

* chore: fix by pr comment

* chore: fix by pr comment

* chore: enhancement of error content and add some doc
2024-10-08 12:11:09 +00:00
Weny Xu
2cdd103874 feat: introduce HeartbeatHandlerGroupBuilderCustomizer (#4803)
* feat: introduce `HeartbeatHandlerGroupBuilderFinalizer`

* chore: rename to `HeartbeatHandlerGroupBuilderCustomizer`
2024-10-08 09:02:06 +00:00
Ning Sun
4dea4cac47 refactor: change sqlness ports to avoid conflict with local instance (#4794) 2024-10-08 07:33:24 +00:00
Kaifeng Zheng
a283e13da7 feat: set max log files to 720 by default, info log only (#4787)
* feat: set max log files to 720 by default, info log only

* expose max_log_files in tomls

* include dir info when panicing, limit max_log_files of err_log to 30, and that of slow_queries to opt.max_log_files

* fix clippy

* update config.md

* update expected config str

* limit err_log max files size to `max_log_files` too, include err info when panicing, put `max_l_f` in right position

* fix typos

* chore: config

Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>

---------

Co-authored-by: dennis zhuang <killme2008@gmail.com>
Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>
2024-10-04 18:05:40 +00:00
JohnsonLee
47a3277d12 feat: customize channel information for sqlness tests (#4729)
* feat: add pg and mysql server_address options

* feat: start pg and mysql server(standalone)

* feat: start pg and mysql in distribute

* feat: finally get there, specify postgres sqlness

* feat: support mysql sqlness

* fix: license

* fix: remove unused import

* fix: toml

* fix: clippy

* refactor: BeginProtocolInterceptorFactory to ProtocolInterceptorFactory

* fix: sqlness pg connect

* fix: clippy

* Apply suggestions from code review

Co-authored-by: Yingwen <realevenyag@gmail.com>

* fix: rustfmt

* fix: reconnect pg and mysql when restart

* test: add mysql related sqlness

* fix: wait for start while restarting

* fix: clippy

* fix: cargo lock conflict

fix: Cargo.lock conflict

* fix: usage of '@@tx_isolation' in sqlness

* fix: typos

* feat: retry with backoff when create client

* fix: use millisecond rather than microseconds in backoff

---------

Co-authored-by: Yingwen <realevenyag@gmail.com>
2024-10-04 09:26:57 +00:00
Yingwen
caf5f2c7a5 docs: add TM to logos (#4789) 2024-10-01 03:35:04 +00:00
Weny Xu
c1e8084af6 feat: add add_handler_after, add_handler_before, replace_handler (#4788)
* feat: add `add_handler_after`, `add_handler_before`, `replace_handler`

* chore: apply suggestions from CR

* test: add more tests

* feat: use `Vec` instead of `LinkedList`

* Update src/meta-srv/src/lib.rs

Co-authored-by: Yingwen <realevenyag@gmail.com>

---------

Co-authored-by: Yingwen <realevenyag@gmail.com>
2024-09-30 08:53:59 +00:00
Weny Xu
6e776d5f98 feat: support to reject write after flushing (#4759)
* refactor: use `RegionRoleState` instead of `RegionState`

* feat: introducing `RegionLeaderState::Downgrading`

* refactor: introduce `set_region_role_state_gracefully`

* refactor: use `set_region_role` instead of `set_writable`

* feat: support to reject write after flushing

* fix: fix unit tests

* test: add unit test for `should_reject_write`

* chore: add comments

* chore: refine comments

* fix: fix unit test

* test: enable fuzz tests for Local WAL

* chore: add logs

* chore: rename `RegionStatus` to `RegionState`

* feat: introduce `DowngradingLeader`

* chore: rename

* refactor: refactor `set_role_state` tests

* test: ensure downgrading region will reject write

* chore: enhance logs

* chore: refine name

* chore: refine comment

* test: add tests for `set_role_role_state`

* fix: fix unit tests

* chore: apply suggestions from CR

* chore: apply suggestions from CR
2024-09-30 08:28:51 +00:00
zyy17
e39a9e6feb feat: add StatementStatistics for slow query logging implementation (#4719)
* feat: log slow query

* feat: log slow query for sql

* refactor: add slow query logging options

* ci: fix errors

* feat: add StatementStatistics

* chore: revert modification of servers crate

* docs: update config docs

* fix: clippy errors
2024-09-30 03:26:50 +00:00
Weny Xu
77af4fd981 refactor: introduce HeartbeatHandlerGroupBuilder (#4785) 2024-09-30 02:56:53 +00:00
Yingwen
cd55202136 feat: unordered scanner scans data by time ranges (#4757)
* feat: define range meta

* feat: group ranges

* feat: split range

* feat: build ranges from the scan input

* feat: get partition range from range meta

* feat: build file range

* feat: unordered scan read by ranges

* feat: wip for mem ranges

* feat: build ranges

* feat: remove unused codes

* chore: update comments

* feat: update metrics

* chore: address review comments

* chore: debug assertion
2024-09-29 07:14:48 +00:00
Lei, HUANG
50cb59587d chore: replace anymap with anymap2 (#4781) 2024-09-29 06:27:35 +00:00
Lei, HUANG
0a82b12d08 fix(sqlness): sqlness isolation (#4780)
* fix: isolate logs

* fix: copy cases

* fix: clippy
2024-09-29 05:54:01 +00:00
LFC
d9f2f0ccf0 feat: add a new status code for "external" errors (#4775)
* feat: add a new status code for "external" errors

* Update src/auth/src/error.rs

Co-authored-by: shuiyisong <113876041+shuiyisong@users.noreply.github.com>

* support mysql cli cleartext auth

* resolve PR comments

---------

Co-authored-by: shuiyisong <113876041+shuiyisong@users.noreply.github.com>
2024-09-29 03:38:50 +00:00
Ning Sun
cedbbcf2b8 fix: update pgwire for potential issue with connection establish (#4783) 2024-09-28 19:27:31 +00:00
Ning Sun
d6be44bc7f fix: dead loop on detecting postgres ssl handshake (#4778) 2024-09-28 01:39:29 +00:00
JohnsonLee
3a46c1b235 fix: use information_schema returns Unknown database (#4774)
* fix: use information_schema returns Unknown database 'information_schema'

* test: make sure 'use information_schma' successful
2024-09-27 23:15:48 +00:00
Lei, HUANG
934bc13967 feat(mito): limit compaction output file size (#4754)
* Commit Message

 Clarify documentation for CompactionOutput struct

 Updated the documentation for the `CompactionOutput` struct to specify that the output time range is only relevant for windowed compaction.

* Add max_output_file_size to TwcsPicker and TwcsOptions

 - Introduced `max_output_file_size` to `TwcsPicker` struct and its logic to enforce output file size limits during compaction.
 - Updated `TwcsOptions` to include `max_output_file_size` and adjusted related tests.
 - Modified `new_picker` function to initialize `TwcsPicker` with the new `max_output_file_size` field.

* feat/limit-compaction-output-size:
 Refactor compaction picker and TWCS to support append mode and improve options handling

 - Update compaction picker to accept a reference to options and append mode flag
 - Modify TWCS picker logic to consider append mode when filtering deleted rows
 - Remove VersionControl usage in compactor and simplify return type
 - Adjust enforce_max_output_size logic in TWCS picker to handle max output file size
 - Add append mode flag to TwcsPicker struct
 - Fix incorrect condition in TWCS picker for enforcing max output size
 - Update region options tests to reflect new max output file size format (1GB and 7MB)
 - Simplify InvalidTableOptionSnafu error handling in create_parser
 - Add `compaction.twcs.max_output_file_size` to mito engine option keys

* resolve some comments
2024-09-27 11:17:36 +00:00
Weny Xu
4045298cb2 feat: add region_statistics table (#4771)
* refactor: introduce `region_statistic`

* refactor: move DatanodeStat related structs to common_meta

* chore: add comments

* feat: implement `list_region_stats` for `ClusterInfo` trait

* feat: add `region_statistics` table

* feat: add table_id and region_number fields

* chore: rename unused snafu

* chore: udpate sqlness results

* chore: avoid to print source in error msg

* chore: move `procedure_info` under `greptime` catalog

* chore: apply suggestions from CR

* Update src/common/meta/src/datanode.rs

Co-authored-by: jeremyhi <jiachun_feng@proton.me>

---------

Co-authored-by: jeremyhi <jiachun_feng@proton.me>
2024-09-27 09:54:52 +00:00
Lanqing Yang
cc4106cbd2 feat: protect datanode with concurrency limit. (#4699)
Adding parallelism in region server to protect datanode from query overload.
2024-09-27 06:12:57 +00:00
localhost
627a326273 refactor: Change the error type in the pipeline crate from String to Error (#4763)
* chore: in process

* chore: change pipeline crate error type

* chore: improve event error

* chore: fix by pr comment

* chore: use snafu context replace ok_or_else

* refactor: update snafu usage

---------

Co-authored-by: Ning Sun <sunng@protonmail.com>
2024-09-25 19:32:34 +00:00
discord9
0274e752ae chore: make sure aws-lc-sys wouldn't be built (#4767)
* chore: make sure aws-lc-sys wouldn't be built

* fix: bash

* refactor: multiple line bash
2024-09-25 17:11:37 +00:00
Lei, HUANG
cd4bf239d0 chore: relax table name constraint (#4766)
chore/relax-table-name-constraint: Updated NAME_PATTERN to allow '@' and '#' characters and adjusted tests for new table name validation rules.
2024-09-25 02:45:18 +00:00
Ning Sun
e3c0b5482f feat: returning warning instead of error on unsupported SET statement (#4761)
* feat: add capability to send warning to pgclient

* fix: refactor query context to carry query scope data

* feat: return a warning for unsupported postgres statement
2024-09-24 08:45:55 +00:00
Weny Xu
d1b252736d refactor: unify the styling in create_or_alter_tables_on_demand (#4756)
* refactor: refactor `create_or_alter_tables_on_demand`

* chore: apply suggestions from CR
2024-09-24 03:48:16 +00:00
discord9
54f6e13d13 fix: Release CI & make rustls use ring (#4750)
* feat: new dev-build

* fix: copy

* chore: rm dbg run

* fix: binstall install script

* chore: typos

* fix: update useable image

* fix: properly uninstall gcc-9

* chore: another try

* chore: print current cc version

* chore: another try to release

* fix: use gcc-10 by `update-alternatives`

* chore: update dev-build image

* remove gcc-9 again and install make/cmake

* use new image

* alias gcc-10/g++-10 to every variant

* again.....

* fix....

* again release ....

* chore: remove auto remove

* feat: rustls default to ring

* chore: update Cargo.lock

* chore: update Cargo.lock

* Apply suggestions from code review

---------

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
2024-09-24 03:11:12 +00:00
Ruihang Xia
5c64f0ce09 refactor!: simplify NativeType trait and remove percentile UDAF (#4758)
* refactor!: simplify NativeType trait and remove percentile UDAF

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove NativeType

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* recover a mis-deleted case

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-09-23 10:55:20 +00:00
Ruihang Xia
2feddca1cb feat: include order by to commutativity rule set (#4753)
* feat: include order by to commutativity rule set

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* tune sqlness replace interceptor

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-09-23 08:35:06 +00:00
Ning Sun
0f99218386 feat: list/array/timezone support for postgres output (#4727)
* feat: list/array support for postgres output

* fix: implement time zone support for postgrsql

* feat: add a geohash function that returns array

* fix: typo

* fix: lint warnings

* test: add sqlness test

* refactor: check resolution range before convert value

* fix: test result for sqlness

* feat: upgrade pgwire apis
2024-09-22 02:39:38 +00:00
Weny Xu
163cea81c2 feat: migrate local WAL regions (#4715)
* feat: allow to flush region before migrating

* fix: fix unit tests

* feat: allow to set `flush_timeout`

* feat: skip to replay memtable

* fix: fix unit tests

* test: add more tests

* refactor: simplify timeout logical

* test: add unit tests

* test: add unit tests

* chore: update comments

* fix: fix unit tests

* fix: fmt and clippy

* feat: change default timeout to 30s

* fix: throw `ExceededDeadline` error

* test: add tests for `downgrade_region_with_retry`

* chore: apply suggestions from CR

* chore: apply suggestions from CR

* chore: apply suggestions from CR

* chore: update proto to `3633474`

* refactor: refactor `upgrade_region_with_retry`

* chore: apply suggestions from CR
2024-09-20 08:27:20 +00:00
taobo
0c9b8eb0d2 feat: improve observability for procedure (#4675)
* feat: improve observability for procedure

* fix: test error

* test: add sqlness test for information_schema.procedure_info

* fix: sqlness test error

* fix: cr comment

* chore: update proto version

* fix: apply cr comment

* update version

* fix: cr comment

* optimize procedure type output format

* upgrade dep version

* fix: clippy error

* fix: `procedure` borrowed error

* fix: optimize code
2024-09-20 06:07:53 +00:00
Ning Sun
75c6fad1a3 feat: add more h3 scalar functions (#4707)
* feat: add more h3 scalar functions

* chore: comment up
2024-09-20 04:19:50 +00:00
Yingwen
e12ffbeb2f feat: flush other workers if still need flush (#4746) 2024-09-20 02:55:31 +00:00
discord9
c4e52ebf91 feat: use new image for gcc-10 (#4748)
feat: use new image
2024-09-20 02:34:45 +00:00
Yingwen
f02410c39b fix: disable field pruning in last non null mode (#4740)
* fix: don't prune fields in last non null mode

* test: add sqlness test for field pruning

* test: add flush

* refine implementation

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
2024-09-20 00:35:37 +00:00
Ruihang Xia
f5cf25b0db refactor: remove DfPlan wrapper (#4733)
* refactor: remove DfPlan wrapper

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* clean up

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove unused errors

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix test assertion

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-09-19 12:29:33 +00:00
liyang
1acda74c26 fix: cannot input tag for the dev-builder image (#4743) 2024-09-19 11:14:34 +00:00
Yohan Wal
95787825f1 build(deps): use original jsonb repo (#4742) 2024-09-19 09:44:44 +00:00
Weny Xu
49004391d3 chore(fuzz): print table name for debugging (#4738)
* chore(fuzz): print table name for debugging

* chore: apply suggestions
2024-09-19 09:40:10 +00:00
discord9
d0f5b2ad7d fix: use gcc-10 in release dev build (#4741) 2024-09-19 09:34:06 +00:00
Ruihang Xia
03b29439e2 Merge branch 'main' into transform-count-min-max
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-09-11 11:09:07 +08:00
Ruihang Xia
712f4ca0ef try sort partial commutative
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-09-09 21:08:59 +08:00
Ruihang Xia
60bacff57e ignore unmatched left and right greater
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-09-08 11:12:21 +08:00
Ruihang Xia
6208772ba4 Merge branch 'main' into transform-count-min-max
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-09-08 11:02:04 +08:00
Ruihang Xia
67184c0498 Merge branch 'main' into transform-count-min-max
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-09-05 14:30:47 +08:00
Ruihang Xia
1dd908fdf7 handle group by
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-09-05 12:50:13 +08:00
Ruihang Xia
8179b4798e feat: support transforming min/max/count aggr fn
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-09-04 22:17:31 +08:00
588 changed files with 28422 additions and 9211 deletions

View File

@@ -50,7 +50,7 @@ runs:
BUILDX_MULTI_PLATFORM_BUILD=all \
IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \
IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
IMAGE_TAG=${{ inputs.version }}
DEV_BUILDER_IMAGE_TAG=${{ inputs.version }}
- name: Build and push dev-builder-centos image
shell: bash
@@ -61,7 +61,7 @@ runs:
BUILDX_MULTI_PLATFORM_BUILD=amd64 \
IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \
IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
IMAGE_TAG=${{ inputs.version }}
DEV_BUILDER_IMAGE_TAG=${{ inputs.version }}
- name: Build and push dev-builder-android image # Only build image for amd64 platform.
shell: bash
@@ -71,6 +71,6 @@ runs:
BASE_IMAGE=android \
IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \
IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
IMAGE_TAG=${{ inputs.version }} && \
DEV_BUILDER_IMAGE_TAG=${{ inputs.version }} && \
docker push ${{ inputs.dockerhub-image-registry }}/${{ inputs.dockerhub-image-namespace }}/dev-builder-android:${{ inputs.version }}

View File

@@ -40,7 +40,7 @@ runs:
- name: Install PyArrow Package
shell: pwsh
run: pip install pyarrow
run: pip install pyarrow numpy
- name: Install WSL distribution
uses: Vampire/setup-wsl@v2

View File

@@ -18,7 +18,7 @@ runs:
--set replicaCount=${{ inputs.etcd-replicas }} \
--set resources.requests.cpu=50m \
--set resources.requests.memory=128Mi \
--set resources.limits.cpu=1000m \
--set resources.limits.cpu=1500m \
--set resources.limits.memory=2Gi \
--set auth.rbac.create=false \
--set auth.rbac.token.enabled=false \

View File

@@ -269,6 +269,13 @@ jobs:
- name: Install cargo-gc-bin
shell: bash
run: cargo install cargo-gc-bin
- name: Check aws-lc-sys will not build
shell: bash
run: |
if cargo tree -i aws-lc-sys -e features | grep -q aws-lc-sys; then
echo "Found aws-lc-sys, which has compilation problems on older gcc versions. Please replace it with ring until its building experience improves."
exit 1
fi
- name: Build greptime bianry
shell: bash
# `cargo gc` will invoke `cargo build` with specified args
@@ -429,12 +436,25 @@ jobs:
timeout-minutes: 60
strategy:
matrix:
target: ["fuzz_migrate_mito_regions", "fuzz_failover_mito_regions", "fuzz_failover_metric_regions"]
target: ["fuzz_migrate_mito_regions", "fuzz_migrate_metric_regions", "fuzz_failover_mito_regions", "fuzz_failover_metric_regions"]
mode:
- name: "Remote WAL"
minio: true
kafka: true
values: "with-remote-wal.yaml"
include:
- target: "fuzz_migrate_mito_regions"
mode:
name: "Local WAL"
minio: true
kafka: false
values: "with-minio.yaml"
- target: "fuzz_migrate_metric_regions"
mode:
name: "Local WAL"
minio: true
kafka: false
values: "with-minio.yaml"
steps:
- name: Remove unused software
run: |
@@ -523,7 +543,7 @@ jobs:
with:
image-registry: localhost:5001
values-filename: ${{ matrix.mode.values }}
enable-region-failover: true
enable-region-failover: ${{ matrix.mode.kafka }}
- name: Port forward (mysql)
run: |
kubectl port-forward service/my-greptimedb-frontend 4002:4002 -n my-greptimedb&
@@ -674,7 +694,7 @@ jobs:
with:
python-version: '3.10'
- name: Install PyArrow Package
run: pip install pyarrow
run: pip install pyarrow numpy
- name: Setup etcd server
working-directory: tests-integration/fixtures/etcd
run: docker compose -f docker-compose-standalone.yml up -d --wait

View File

@@ -92,7 +92,7 @@ jobs:
with:
python-version: "3.10"
- name: Install PyArrow Package
run: pip install pyarrow
run: pip install pyarrow numpy
- name: Install WSL distribution
uses: Vampire/setup-wsl@v2
with:

2649
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -20,6 +20,7 @@ members = [
"src/common/mem-prof",
"src/common/meta",
"src/common/plugins",
"src/common/pprof",
"src/common/procedure",
"src/common/procedure-test",
"src/common/query",
@@ -64,7 +65,7 @@ members = [
resolver = "2"
[workspace.package]
version = "0.9.3"
version = "0.9.5"
edition = "2021"
license = "Apache-2.0"
@@ -120,13 +121,13 @@ etcd-client = { version = "0.13" }
fst = "0.4.7"
futures = "0.3"
futures-util = "0.3"
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "973f49cde88a582fb65755cc572ebcf6fb93ccf7" }
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "255f87a3318ace3f88a67f76995a0e14910983f4" }
humantime = "2.1"
humantime-serde = "1.1"
itertools = "0.10"
jsonb = { git = "https://github.com/CookiePieWw/jsonb.git", rev = "d0166c130fce903bf6c58643417a3173a6172d31", default-features = false }
jsonb = { git = "https://github.com/databendlabs/jsonb.git", rev = "46ad50fc71cf75afbf98eec455f7892a6387c1fc", default-features = false }
lazy_static = "1.4"
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "80eb97c24c88af4dd9a86f8bbaf50e741d4eb8cd" }
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "a10facb353b41460eeb98578868ebf19c2084fac" }
mockall = "0.11.4"
moka = "0.12"
notify = "6.1"
@@ -137,15 +138,18 @@ opentelemetry-proto = { version = "0.5", features = [
"metrics",
"trace",
"with-serde",
"logs",
] }
parking_lot = "0.12"
parquet = { version = "51.0.0", default-features = false, features = ["arrow", "async", "object_store"] }
paste = "1.0"
pin-project = "1.0"
prometheus = { version = "0.13.3", features = ["process"] }
promql-parser = { version = "0.4" }
promql-parser = { version = "0.4.3", features = ["ser"] }
prost = "0.12"
raft-engine = { version = "0.4.1", default-features = false }
rand = "0.8"
ratelimit = "0.9"
regex = "1.8"
regex-automata = { version = "0.4" }
reqwest = { version = "0.12", default-features = false, features = [
@@ -165,7 +169,7 @@ schemars = "0.8"
serde = { version = "1.0", features = ["derive"] }
serde_json = { version = "1.0", features = ["float_roundtrip"] }
serde_with = "3"
shadow-rs = "0.31"
shadow-rs = "0.35"
similar-asserts = "1.6.0"
smallvec = { version = "1", features = ["serde"] }
snafu = "0.8"
@@ -176,13 +180,16 @@ sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "5
] }
strum = { version = "0.25", features = ["derive"] }
tempfile = "3"
tokio = { version = "1.36", features = ["full"] }
tokio = { version = "1.40", features = ["full"] }
tokio-postgres = "0.7"
tokio-stream = { version = "0.1" }
tokio-util = { version = "0.7", features = ["io-util", "compat"] }
toml = "0.8.8"
tonic = { version = "0.11", features = ["tls", "gzip", "zstd"] }
tower = { version = "0.4" }
tracing-appender = "0.2"
tracing-subscriber = { version = "0.3", features = ["env-filter", "json", "fmt"] }
typetag = "0.2"
uuid = { version = "1.7", features = ["serde", "v4", "fast-rng"] }
zstd = "0.13"
@@ -208,6 +215,7 @@ common-macro = { path = "src/common/macro" }
common-mem-prof = { path = "src/common/mem-prof" }
common-meta = { path = "src/common/meta" }
common-plugins = { path = "src/common/plugins" }
common-pprof = { path = "src/common/pprof" }
common-procedure = { path = "src/common/procedure" }
common-procedure-test = { path = "src/common/procedure-test" }
common-query = { path = "src/common/query" }
@@ -245,18 +253,29 @@ store-api = { path = "src/store-api" }
substrait = { path = "src/common/substrait" }
table = { path = "src/table" }
[patch.crates-io]
# change all rustls dependencies to use our fork to default to `ring` to make it "just work"
hyper-rustls = { git = "https://github.com/GreptimeTeam/hyper-rustls" }
rustls = { git = "https://github.com/GreptimeTeam/rustls" }
tokio-rustls = { git = "https://github.com/GreptimeTeam/tokio-rustls" }
# This is commented, since we are not using aws-lc-sys, if we need to use it, we need to uncomment this line or use a release after this commit, or it wouldn't compile with gcc < 8.1
# see https://github.com/aws/aws-lc-rs/pull/526
# aws-lc-sys = { git ="https://github.com/aws/aws-lc-rs", rev = "556558441e3494af4b156ae95ebc07ebc2fd38aa" }
[workspace.dependencies.meter-macros]
git = "https://github.com/GreptimeTeam/greptime-meter.git"
rev = "80eb97c24c88af4dd9a86f8bbaf50e741d4eb8cd"
rev = "a10facb353b41460eeb98578868ebf19c2084fac"
[profile.release]
debug = 1
# debug = 1
split-debuginfo = "off"
[profile.nightly]
inherits = "release"
strip = "debuginfo"
split-debuginfo = "off"
# strip = "debuginfo"
lto = "thin"
debug = false
# debug = false
incremental = false
[profile.ci]

View File

@@ -8,7 +8,7 @@ CARGO_BUILD_OPTS := --locked
IMAGE_REGISTRY ?= docker.io
IMAGE_NAMESPACE ?= greptime
IMAGE_TAG ?= latest
DEV_BUILDER_IMAGE_TAG ?= 2024-06-06-b4b105ad-20240827021230
DEV_BUILDER_IMAGE_TAG ?= 2024-10-19-a5c00e85-20241024184445
BUILDX_MULTI_PLATFORM_BUILD ?= false
BUILDX_BUILDER_NAME ?= gtbuilder
BASE_IMAGE ?= ubuntu

View File

@@ -17,6 +17,7 @@
| `default_timezone` | String | Unset | The default timezone of the server. |
| `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.<br/>By default, it provides services after all regions have been initialized. |
| `init_regions_parallelism` | Integer | `16` | Parallelism of initializing regions. |
| `max_concurrent_queries` | Integer | `0` | The maximum current queries allowed to be executed. Zero means unlimited. |
| `runtime` | -- | -- | The runtime options. |
| `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
| `runtime.compact_rt_size` | Integer | `4` | The number of threads to execute the runtime for global write operations. |
@@ -82,6 +83,7 @@
| `wal.backoff_max` | String | `10s` | The maximum backoff delay.<br/>**It's only used when the provider is `kafka`**. |
| `wal.backoff_base` | Integer | `2` | The exponential backoff rate, i.e. next backoff = base * current backoff.<br/>**It's only used when the provider is `kafka`**. |
| `wal.backoff_deadline` | String | `5mins` | The deadline of retries.<br/>**It's only used when the provider is `kafka`**. |
| `wal.overwrite_entry_start_id` | Bool | `false` | Ignore missing entries during read WAL.<br/>**It's only used when the provider is `kafka`**.<br/><br/>This option ensures that when Kafka messages are deleted, the system<br/>can still successfully replay memtable data without throwing an<br/>out-of-range error.<br/>However, enabling this option might lead to unexpected data loss,<br/>as the system will skip over missing entries instead of treating<br/>them as critical errors. |
| `metadata_store` | -- | -- | Metadata storage options. |
| `metadata_store.file_size` | String | `256MB` | Kv file size in bytes. |
| `metadata_store.purge_threshold` | String | `4GB` | Kv purge threshold. |
@@ -114,7 +116,9 @@
| `region_engine.mito.worker_request_batch_size` | Integer | `64` | Max batch size for a worker to handle requests. |
| `region_engine.mito.manifest_checkpoint_distance` | Integer | `10` | Number of meta action updated to trigger a new checkpoint for the manifest. |
| `region_engine.mito.compress_manifest` | Bool | `false` | Whether to compress manifest and checkpoint file by gzip (default false). |
| `region_engine.mito.max_background_jobs` | Integer | `4` | Max number of running background jobs |
| `region_engine.mito.max_background_flushes` | Integer | Auto | Max number of running background flush jobs (default: 1/2 of cpu cores). |
| `region_engine.mito.max_background_compactions` | Integer | Auto | Max number of running background compaction jobs (default: 1/4 of cpu cores). |
| `region_engine.mito.max_background_purges` | Integer | Auto | Max number of running background purge jobs (default: number of cpu cores). |
| `region_engine.mito.auto_flush_interval` | String | `1h` | Interval to auto flush a region if it has not flushed yet. |
| `region_engine.mito.global_write_buffer_size` | String | Auto | Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB. |
| `region_engine.mito.global_write_buffer_reject_size` | String | Auto | Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size`. |
@@ -160,8 +164,13 @@
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
| `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
| `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
| `logging.slow_query` | -- | -- | The slow query log options. |
| `logging.slow_query.enable` | Bool | `false` | Whether to enable slow query log. |
| `logging.slow_query.threshold` | String | Unset | The threshold of slow query. |
| `logging.slow_query.sample_ratio` | Float | Unset | The sampling ratio of slow query log. The value should be in the range of (0, 1]. |
| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
@@ -246,8 +255,13 @@
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
| `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
| `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
| `logging.slow_query` | -- | -- | The slow query log options. |
| `logging.slow_query.enable` | Bool | `false` | Whether to enable slow query log. |
| `logging.slow_query.threshold` | String | Unset | The threshold of slow query. |
| `logging.slow_query.sample_ratio` | Float | Unset | The sampling ratio of slow query log. The value should be in the range of (0, 1]. |
| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
@@ -311,8 +325,13 @@
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
| `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
| `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
| `logging.slow_query` | -- | -- | The slow query log options. |
| `logging.slow_query.enable` | Bool | `false` | Whether to enable slow query log. |
| `logging.slow_query.threshold` | String | Unset | The threshold of slow query. |
| `logging.slow_query.sample_ratio` | Float | Unset | The sampling ratio of slow query log. The value should be in the range of (0, 1]. |
| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
@@ -335,6 +354,7 @@
| `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.<br/>By default, it provides services after all regions have been initialized. |
| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. |
| `init_regions_parallelism` | Integer | `16` | Parallelism of initializing regions. |
| `max_concurrent_queries` | Integer | `0` | The maximum current queries allowed to be executed. Zero means unlimited. |
| `rpc_addr` | String | Unset | Deprecated, use `grpc.addr` instead. |
| `rpc_hostname` | String | Unset | Deprecated, use `grpc.hostname` instead. |
| `rpc_runtime_size` | Integer | Unset | Deprecated, use `grpc.runtime_size` instead. |
@@ -392,6 +412,7 @@
| `wal.backoff_deadline` | String | `5mins` | The deadline of retries.<br/>**It's only used when the provider is `kafka`**. |
| `wal.create_index` | Bool | `true` | Whether to enable WAL index creation.<br/>**It's only used when the provider is `kafka`**. |
| `wal.dump_index_interval` | String | `60s` | The interval for dumping WAL indexes.<br/>**It's only used when the provider is `kafka`**. |
| `wal.overwrite_entry_start_id` | Bool | `false` | Ignore missing entries during read WAL.<br/>**It's only used when the provider is `kafka`**.<br/><br/>This option ensures that when Kafka messages are deleted, the system<br/>can still successfully replay memtable data without throwing an<br/>out-of-range error.<br/>However, enabling this option might lead to unexpected data loss,<br/>as the system will skip over missing entries instead of treating<br/>them as critical errors. |
| `storage` | -- | -- | The data storage options. |
| `storage.data_home` | String | `/tmp/greptimedb/` | The working home directory. |
| `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
@@ -418,7 +439,9 @@
| `region_engine.mito.worker_request_batch_size` | Integer | `64` | Max batch size for a worker to handle requests. |
| `region_engine.mito.manifest_checkpoint_distance` | Integer | `10` | Number of meta action updated to trigger a new checkpoint for the manifest. |
| `region_engine.mito.compress_manifest` | Bool | `false` | Whether to compress manifest and checkpoint file by gzip (default false). |
| `region_engine.mito.max_background_jobs` | Integer | `4` | Max number of running background jobs |
| `region_engine.mito.max_background_flushes` | Integer | Auto | Max number of running background flush jobs (default: 1/2 of cpu cores). |
| `region_engine.mito.max_background_compactions` | Integer | Auto | Max number of running background compaction jobs (default: 1/4 of cpu cores). |
| `region_engine.mito.max_background_purges` | Integer | Auto | Max number of running background purge jobs (default: number of cpu cores). |
| `region_engine.mito.auto_flush_interval` | String | `1h` | Interval to auto flush a region if it has not flushed yet. |
| `region_engine.mito.global_write_buffer_size` | String | Auto | Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB. |
| `region_engine.mito.global_write_buffer_reject_size` | String | Auto | Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size` |
@@ -462,8 +485,13 @@
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
| `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
| `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
| `logging.slow_query` | -- | -- | The slow query log options. |
| `logging.slow_query.enable` | Bool | `false` | Whether to enable slow query log. |
| `logging.slow_query.threshold` | String | Unset | The threshold of slow query. |
| `logging.slow_query.sample_ratio` | Float | Unset | The sampling ratio of slow query log. The value should be in the range of (0, 1]. |
| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
@@ -508,7 +536,12 @@
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
| `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
| `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
| `logging.slow_query` | -- | -- | The slow query log options. |
| `logging.slow_query.enable` | Bool | `false` | Whether to enable slow query log. |
| `logging.slow_query.threshold` | String | Unset | The threshold of slow query. |
| `logging.slow_query.sample_ratio` | Float | Unset | The sampling ratio of slow query log. The value should be in the range of (0, 1]. |
| `tracing` | -- | -- | The tracing options. Only effect when compiled with `tokio-console` feature. |
| `tracing.tokio_console_addr` | String | Unset | The tokio console address. |

View File

@@ -19,6 +19,9 @@ enable_telemetry = true
## Parallelism of initializing regions.
init_regions_parallelism = 16
## The maximum current queries allowed to be executed. Zero means unlimited.
max_concurrent_queries = 0
## Deprecated, use `grpc.addr` instead.
## @toml2docs:none-default
rpc_addr = "127.0.0.1:3001"
@@ -210,6 +213,17 @@ create_index = true
## **It's only used when the provider is `kafka`**.
dump_index_interval = "60s"
## Ignore missing entries during read WAL.
## **It's only used when the provider is `kafka`**.
##
## This option ensures that when Kafka messages are deleted, the system
## can still successfully replay memtable data without throwing an
## out-of-range error.
## However, enabling this option might lead to unexpected data loss,
## as the system will skip over missing entries instead of treating
## them as critical errors.
overwrite_entry_start_id = false
# The Kafka SASL configuration.
# **It's only used when the provider is `kafka`**.
# Available SASL mechanisms:
@@ -402,8 +416,17 @@ manifest_checkpoint_distance = 10
## Whether to compress manifest and checkpoint file by gzip (default false).
compress_manifest = false
## Max number of running background jobs
max_background_jobs = 4
## Max number of running background flush jobs (default: 1/2 of cpu cores).
## @toml2docs:none-default="Auto"
#+ max_background_flushes = 4
## Max number of running background compaction jobs (default: 1/4 of cpu cores).
## @toml2docs:none-default="Auto"
#+ max_background_compactions = 2
## Max number of running background purge jobs (default: number of cpu cores).
## @toml2docs:none-default="Auto"
#+ max_background_purges = 8
## Interval to auto flush a region if it has not flushed yet.
auto_flush_interval = "1h"
@@ -577,12 +600,28 @@ append_stdout = true
## The log format. Can be `text`/`json`.
log_format = "text"
## The maximum amount of log files.
max_log_files = 720
## The percentage of tracing will be sampled and exported.
## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
[logging.tracing_sample_ratio]
default_ratio = 1.0
## The slow query log options.
[logging.slow_query]
## Whether to enable slow query log.
enable = false
## The threshold of slow query.
## @toml2docs:none-default
threshold = "10s"
## The sampling ratio of slow query log. The value should be in the range of (0, 1].
## @toml2docs:none-default
sample_ratio = 1.0
## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
[export_metrics]
@@ -607,7 +646,7 @@ url = ""
headers = { }
## The tracing options. Only effect when compiled with `tokio-console` feature.
[tracing]
#+ [tracing]
## The tokio console address.
## @toml2docs:none-default
tokio_console_addr = "127.0.0.1"
#+ tokio_console_addr = "127.0.0.1"

View File

@@ -78,15 +78,31 @@ append_stdout = true
## The log format. Can be `text`/`json`.
log_format = "text"
## The maximum amount of log files.
max_log_files = 720
## The percentage of tracing will be sampled and exported.
## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
[logging.tracing_sample_ratio]
default_ratio = 1.0
## The slow query log options.
[logging.slow_query]
## Whether to enable slow query log.
enable = false
## The threshold of slow query.
## @toml2docs:none-default
threshold = "10s"
## The sampling ratio of slow query log. The value should be in the range of (0, 1].
## @toml2docs:none-default
sample_ratio = 1.0
## The tracing options. Only effect when compiled with `tokio-console` feature.
[tracing]
#+ [tracing]
## The tokio console address.
## @toml2docs:none-default
tokio_console_addr = "127.0.0.1"
#+ tokio_console_addr = "127.0.0.1"

View File

@@ -185,12 +185,28 @@ append_stdout = true
## The log format. Can be `text`/`json`.
log_format = "text"
## The maximum amount of log files.
max_log_files = 720
## The percentage of tracing will be sampled and exported.
## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
[logging.tracing_sample_ratio]
default_ratio = 1.0
## The slow query log options.
[logging.slow_query]
## Whether to enable slow query log.
enable = false
## The threshold of slow query.
## @toml2docs:none-default
threshold = "10s"
## The sampling ratio of slow query log. The value should be in the range of (0, 1].
## @toml2docs:none-default
sample_ratio = 1.0
## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
[export_metrics]
@@ -215,7 +231,7 @@ url = ""
headers = { }
## The tracing options. Only effect when compiled with `tokio-console` feature.
[tracing]
#+ [tracing]
## The tokio console address.
## @toml2docs:none-default
tokio_console_addr = "127.0.0.1"
#+ tokio_console_addr = "127.0.0.1"

View File

@@ -172,12 +172,28 @@ append_stdout = true
## The log format. Can be `text`/`json`.
log_format = "text"
## The maximum amount of log files.
max_log_files = 720
## The percentage of tracing will be sampled and exported.
## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
[logging.tracing_sample_ratio]
default_ratio = 1.0
## The slow query log options.
[logging.slow_query]
## Whether to enable slow query log.
enable = false
## The threshold of slow query.
## @toml2docs:none-default
threshold = "10s"
## The sampling ratio of slow query log. The value should be in the range of (0, 1].
## @toml2docs:none-default
sample_ratio = 1.0
## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
[export_metrics]
@@ -202,7 +218,7 @@ url = ""
headers = { }
## The tracing options. Only effect when compiled with `tokio-console` feature.
[tracing]
#+ [tracing]
## The tokio console address.
## @toml2docs:none-default
tokio_console_addr = "127.0.0.1"
#+ tokio_console_addr = "127.0.0.1"

View File

@@ -15,6 +15,9 @@ init_regions_in_background = false
## Parallelism of initializing regions.
init_regions_parallelism = 16
## The maximum current queries allowed to be executed. Zero means unlimited.
max_concurrent_queries = 0
## The runtime options.
#+ [runtime]
## The number of threads to execute the runtime for global read operations.
@@ -234,6 +237,17 @@ backoff_base = 2
## **It's only used when the provider is `kafka`**.
backoff_deadline = "5mins"
## Ignore missing entries during read WAL.
## **It's only used when the provider is `kafka`**.
##
## This option ensures that when Kafka messages are deleted, the system
## can still successfully replay memtable data without throwing an
## out-of-range error.
## However, enabling this option might lead to unexpected data loss,
## as the system will skip over missing entries instead of treating
## them as critical errors.
overwrite_entry_start_id = false
# The Kafka SASL configuration.
# **It's only used when the provider is `kafka`**.
# Available SASL mechanisms:
@@ -440,8 +454,17 @@ manifest_checkpoint_distance = 10
## Whether to compress manifest and checkpoint file by gzip (default false).
compress_manifest = false
## Max number of running background jobs
max_background_jobs = 4
## Max number of running background flush jobs (default: 1/2 of cpu cores).
## @toml2docs:none-default="Auto"
#+ max_background_flushes = 4
## Max number of running background compaction jobs (default: 1/4 of cpu cores).
## @toml2docs:none-default="Auto"
#+ max_background_compactions = 2
## Max number of running background purge jobs (default: number of cpu cores).
## @toml2docs:none-default="Auto"
#+ max_background_purges = 8
## Interval to auto flush a region if it has not flushed yet.
auto_flush_interval = "1h"
@@ -621,12 +644,28 @@ append_stdout = true
## The log format. Can be `text`/`json`.
log_format = "text"
## The maximum amount of log files.
max_log_files = 720
## The percentage of tracing will be sampled and exported.
## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
[logging.tracing_sample_ratio]
default_ratio = 1.0
## The slow query log options.
[logging.slow_query]
## Whether to enable slow query log.
enable = false
## The threshold of slow query.
## @toml2docs:none-default
threshold = "10s"
## The sampling ratio of slow query log. The value should be in the range of (0, 1].
## @toml2docs:none-default
sample_ratio = 1.0
## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
[export_metrics]
@@ -651,7 +690,7 @@ url = ""
headers = { }
## The tracing options. Only effect when compiled with `tokio-console` feature.
[tracing]
#+ [tracing]
## The tokio console address.
## @toml2docs:none-default
tokio_console_addr = "127.0.0.1"
#+ tokio_console_addr = "127.0.0.1"

View File

@@ -0,0 +1,50 @@
#!/bin/bash
set -euxo pipefail
cd "$(mktemp -d)"
# Fix version to v1.6.6, this is different than the latest version in original install script in
# https://raw.githubusercontent.com/cargo-bins/cargo-binstall/main/install-from-binstall-release.sh
base_url="https://github.com/cargo-bins/cargo-binstall/releases/download/v1.6.6/cargo-binstall-"
os="$(uname -s)"
if [ "$os" == "Darwin" ]; then
url="${base_url}universal-apple-darwin.zip"
curl -LO --proto '=https' --tlsv1.2 -sSf "$url"
unzip cargo-binstall-universal-apple-darwin.zip
elif [ "$os" == "Linux" ]; then
machine="$(uname -m)"
if [ "$machine" == "armv7l" ]; then
machine="armv7"
fi
target="${machine}-unknown-linux-musl"
if [ "$machine" == "armv7" ]; then
target="${target}eabihf"
fi
url="${base_url}${target}.tgz"
curl -L --proto '=https' --tlsv1.2 -sSf "$url" | tar -xvzf -
elif [ "${OS-}" = "Windows_NT" ]; then
machine="$(uname -m)"
target="${machine}-pc-windows-msvc"
url="${base_url}${target}.zip"
curl -LO --proto '=https' --tlsv1.2 -sSf "$url"
unzip "cargo-binstall-${target}.zip"
else
echo "Unsupported OS ${os}"
exit 1
fi
./cargo-binstall -y --force cargo-binstall
CARGO_HOME="${CARGO_HOME:-$HOME/.cargo}"
if ! [[ ":$PATH:" == *":$CARGO_HOME/bin:"* ]]; then
if [ -n "${CI:-}" ] && [ -n "${GITHUB_PATH:-}" ]; then
echo "$CARGO_HOME/bin" >> "$GITHUB_PATH"
else
echo
printf "\033[0;31mYour path is missing %s, you might want to add it.\033[0m\n" "$CARGO_HOME/bin"
echo
fi
fi

View File

@@ -32,7 +32,9 @@ RUN rustup toolchain install ${RUST_TOOLCHAIN}
# Install cargo-binstall with a specific version to adapt the current rust toolchain.
# Note: if we use the latest version, we may encounter the following `use of unstable library feature 'io_error_downcast'` error.
RUN cargo install cargo-binstall --version 1.6.6 --locked
# compile from source take too long, so we use the precompiled binary instead
COPY $DOCKER_BUILD_ROOT/docker/dev-builder/binstall/pull_binstall.sh /usr/local/bin/pull_binstall.sh
RUN chmod +x /usr/local/bin/pull_binstall.sh && /usr/local/bin/pull_binstall.sh
# Install nextest.
RUN cargo binstall cargo-nextest --no-confirm

View File

@@ -24,6 +24,15 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
python3.10 \
python3.10-dev
# https://github.com/GreptimeTeam/greptimedb/actions/runs/10935485852/job/30357457188#step:3:7106
# `aws-lc-sys` require gcc >= 10.3.0 to work, hence alias to use gcc-10
RUN apt-get remove -y gcc-9 g++-9 cpp-9 && \
apt-get install -y gcc-10 g++-10 cpp-10 make cmake && \
ln -sf /usr/bin/gcc-10 /usr/bin/gcc && ln -sf /usr/bin/g++-10 /usr/bin/g++ && \
ln -sf /usr/bin/gcc-10 /usr/bin/cc && \
ln -sf /usr/bin/g++-10 /usr/bin/cpp && ln -sf /usr/bin/g++-10 /usr/bin/c++ && \
cc --version && gcc --version && g++ --version && cpp --version && c++ --version
# Remove Python 3.8 and install pip.
RUN apt-get -y purge python3.8 && \
apt-get -y autoremove && \
@@ -57,7 +66,9 @@ RUN rustup toolchain install ${RUST_TOOLCHAIN}
# Install cargo-binstall with a specific version to adapt the current rust toolchain.
# Note: if we use the latest version, we may encounter the following `use of unstable library feature 'io_error_downcast'` error.
RUN cargo install cargo-binstall --version 1.6.6 --locked
# compile from source take too long, so we use the precompiled binary instead
COPY $DOCKER_BUILD_ROOT/docker/dev-builder/binstall/pull_binstall.sh /usr/local/bin/pull_binstall.sh
RUN chmod +x /usr/local/bin/pull_binstall.sh && /usr/local/bin/pull_binstall.sh
# Install nextest.
RUN cargo binstall cargo-nextest --no-confirm

View File

@@ -48,4 +48,4 @@ Please refer to [SQL query](./query.sql) for GreptimeDB and Clickhouse, and [que
## Addition
- You can tune GreptimeDB's configuration to get better performance.
- You can setup GreptimeDB to use S3 as storage, see [here](https://docs.greptime.com/user-guide/operations/configuration/#storage-options).
- You can setup GreptimeDB to use S3 as storage, see [here](https://docs.greptime.com/user-guide/deployments/configuration#storage-options).

View File

@@ -0,0 +1,16 @@
# Change Log Level on the Fly
## HTTP API
example:
```bash
curl --data "trace;flow=debug" 127.0.0.1:4000/debug/log_level
```
And database will reply with something like:
```bash
Log Level changed from Some("info") to "trace;flow=debug"%
```
The data is a string in the format of `global_level;module1=level1;module2=level2;...` that follow the same rule of `RUST_LOG`.
The module is the module name of the log, and the level is the log level. The log level can be one of the following: `trace`, `debug`, `info`, `warn`, `error`, `off`(case insensitive).

View File

@@ -1,15 +1,9 @@
# Profiling CPU
## Build GreptimeDB with `pprof` feature
```bash
cargo build --features=pprof
```
## HTTP API
Sample at 99 Hertz, for 5 seconds, output report in [protobuf format](https://github.com/google/pprof/blob/master/proto/profile.proto).
```bash
curl -s '0:4000/v1/prof/cpu' > /tmp/pprof.out
curl -s '0:4000/debug/prof/cpu' > /tmp/pprof.out
```
Then you can use `pprof` command with the protobuf file.
@@ -19,10 +13,10 @@ go tool pprof -top /tmp/pprof.out
Sample at 99 Hertz, for 60 seconds, output report in flamegraph format.
```bash
curl -s '0:4000/v1/prof/cpu?seconds=60&output=flamegraph' > /tmp/pprof.svg
curl -s '0:4000/debug/prof/cpu?seconds=60&output=flamegraph' > /tmp/pprof.svg
```
Sample at 49 Hertz, for 10 seconds, output report in text format.
```bash
curl -s '0:4000/v1/prof/cpu?seconds=10&frequency=49&output=text' > /tmp/pprof.txt
curl -s '0:4000/debug/prof/cpu?seconds=10&frequency=49&output=text' > /tmp/pprof.txt
```

View File

@@ -12,16 +12,10 @@ brew install jemalloc
sudo apt install libjemalloc-dev
```
### [flamegraph](https://github.com/brendangregg/FlameGraph)
### [flamegraph](https://github.com/brendangregg/FlameGraph)
```bash
curl https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl > ./flamegraph.pl
```
### Build GreptimeDB with `mem-prof` feature.
```bash
cargo build --features=mem-prof
curl https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl > ./flamegraph.pl
```
## Profiling
@@ -35,7 +29,7 @@ MALLOC_CONF=prof:true,lg_prof_interval:28 ./target/debug/greptime standalone sta
Dump memory profiling data through HTTP API:
```bash
curl localhost:4000/v1/prof/mem > greptime.hprof
curl localhost:4000/debug/prof/mem > greptime.hprof
```
You can periodically dump profiling data and compare them to find the delta memory usage.
@@ -45,6 +39,9 @@ You can periodically dump profiling data and compare them to find the delta memo
To create flamegraph according to dumped profiling data:
```bash
jeprof --svg <path_to_greptimedb_binary> --base=<baseline_prof> <profile_data> > output.svg
```
sudo apt install -y libjemalloc-dev
jeprof <path_to_greptime_binary> <profile_data> --collapse | ./flamegraph.pl > mem-prof.svg
jeprof <path_to_greptime_binary> --base <baseline_prof> <profile_data> --collapse | ./flamegraph.pl > output.svg
```

Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

After

Width:  |  Height:  |  Size: 36 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 18 KiB

After

Width:  |  Height:  |  Size: 25 KiB

View File

@@ -409,7 +409,39 @@
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"fieldMinMax": false,
"mappings": [],
@@ -438,18 +470,16 @@
},
"id": 27,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": ["lastNotNull"],
"fields": "",
"values": false
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"text": {},
"textMode": "auto",
"wideLayout": true
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "10.2.3",
"targets": [
@@ -467,7 +497,7 @@
}
],
"title": "CPU",
"type": "stat"
"type": "timeseries"
},
{
"datasource": {
@@ -477,7 +507,39 @@
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"decimals": 0,
"fieldMinMax": false,
@@ -503,18 +565,16 @@
},
"id": 28,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": ["lastNotNull"],
"fields": "",
"values": false
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"text": {},
"textMode": "auto",
"wideLayout": true
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "10.2.3",
"targets": [
@@ -532,7 +592,7 @@
}
],
"title": "Memory",
"type": "stat"
"type": "timeseries"
},
{
"collapsed": false,
@@ -3335,6 +3395,6 @@
"timezone": "",
"title": "GreptimeDB",
"uid": "e7097237-669b-4f8d-b751-13067afbfb68",
"version": 15,
"version": 16,
"weekStart": ""
}

View File

@@ -1,3 +1,2 @@
[toolchain]
channel = "nightly-2024-06-06"
channel = "nightly-2024-10-19"

View File

@@ -17,10 +17,11 @@ use std::sync::Arc;
use common_base::BitVec;
use common_decimal::decimal128::{DECIMAL128_DEFAULT_SCALE, DECIMAL128_MAX_PRECISION};
use common_decimal::Decimal128;
use common_time::interval::IntervalUnit;
use common_time::time::Time;
use common_time::timestamp::TimeUnit;
use common_time::{Date, DateTime, Interval, Timestamp};
use common_time::{
Date, DateTime, IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth, Timestamp,
};
use datatypes::prelude::{ConcreteDataType, ValueRef};
use datatypes::scalars::ScalarVector;
use datatypes::types::{
@@ -115,6 +116,7 @@ impl From<ColumnDataTypeWrapper> for ConcreteDataType {
ConcreteDataType::binary_datatype()
}
}
ColumnDataType::Json => ConcreteDataType::json_datatype(),
ColumnDataType::String => ConcreteDataType::string_datatype(),
ColumnDataType::Date => ConcreteDataType::date_datatype(),
ColumnDataType::Datetime => ConcreteDataType::datetime_datatype(),
@@ -416,6 +418,10 @@ pub fn values_with_capacity(datatype: ColumnDataType, capacity: usize) -> Values
decimal128_values: Vec::with_capacity(capacity),
..Default::default()
},
ColumnDataType::Json => Values {
string_values: Vec::with_capacity(capacity),
..Default::default()
},
}
}
@@ -456,13 +462,11 @@ pub fn push_vals(column: &mut Column, origin_count: usize, vector: VectorRef) {
TimeUnit::Microsecond => values.time_microsecond_values.push(val.value()),
TimeUnit::Nanosecond => values.time_nanosecond_values.push(val.value()),
},
Value::Interval(val) => match val.unit() {
IntervalUnit::YearMonth => values.interval_year_month_values.push(val.to_i32()),
IntervalUnit::DayTime => values.interval_day_time_values.push(val.to_i64()),
IntervalUnit::MonthDayNano => values
.interval_month_day_nano_values
.push(convert_i128_to_interval(val.to_i128())),
},
Value::IntervalYearMonth(val) => values.interval_year_month_values.push(val.to_i32()),
Value::IntervalDayTime(val) => values.interval_day_time_values.push(val.to_i64()),
Value::IntervalMonthDayNano(val) => values
.interval_month_day_nano_values
.push(convert_month_day_nano_to_pb(val)),
Value::Decimal128(val) => values.decimal128_values.push(convert_to_pb_decimal128(val)),
Value::List(_) | Value::Duration(_) => unreachable!(),
});
@@ -507,14 +511,12 @@ fn ddl_request_type(request: &DdlRequest) -> &'static str {
}
}
/// Converts an i128 value to google protobuf type [IntervalMonthDayNano].
pub fn convert_i128_to_interval(v: i128) -> v1::IntervalMonthDayNano {
let interval = Interval::from_i128(v);
let (months, days, nanoseconds) = interval.to_month_day_nano();
/// Converts an interval to google protobuf type [IntervalMonthDayNano].
pub fn convert_month_day_nano_to_pb(v: IntervalMonthDayNano) -> v1::IntervalMonthDayNano {
v1::IntervalMonthDayNano {
months,
days,
nanoseconds,
months: v.months,
days: v.days,
nanoseconds: v.nanoseconds,
}
}
@@ -562,11 +564,15 @@ pub fn pb_value_to_value_ref<'a>(
ValueData::TimeMillisecondValue(t) => ValueRef::Time(Time::new_millisecond(*t)),
ValueData::TimeMicrosecondValue(t) => ValueRef::Time(Time::new_microsecond(*t)),
ValueData::TimeNanosecondValue(t) => ValueRef::Time(Time::new_nanosecond(*t)),
ValueData::IntervalYearMonthValue(v) => ValueRef::Interval(Interval::from_i32(*v)),
ValueData::IntervalDayTimeValue(v) => ValueRef::Interval(Interval::from_i64(*v)),
ValueData::IntervalYearMonthValue(v) => {
ValueRef::IntervalYearMonth(IntervalYearMonth::from_i32(*v))
}
ValueData::IntervalDayTimeValue(v) => {
ValueRef::IntervalDayTime(IntervalDayTime::from_i64(*v))
}
ValueData::IntervalMonthDayNanoValue(v) => {
let interval = Interval::from_month_day_nano(v.months, v.days, v.nanoseconds);
ValueRef::Interval(interval)
let interval = IntervalMonthDayNano::new(v.months, v.days, v.nanoseconds);
ValueRef::IntervalMonthDayNano(interval)
}
ValueData::Decimal128Value(v) => {
// get precision and scale from datatype_extension
@@ -657,7 +663,7 @@ pub fn pb_values_to_vector_ref(data_type: &ConcreteDataType, values: Values) ->
IntervalType::MonthDayNano(_) => {
Arc::new(IntervalMonthDayNanoVector::from_iter_values(
values.interval_month_day_nano_values.iter().map(|x| {
Interval::from_month_day_nano(x.months, x.days, x.nanoseconds).to_i128()
IntervalMonthDayNano::new(x.months, x.days, x.nanoseconds).to_i128()
}),
))
}
@@ -802,18 +808,18 @@ pub fn pb_values_to_values(data_type: &ConcreteDataType, values: Values) -> Vec<
ConcreteDataType::Interval(IntervalType::YearMonth(_)) => values
.interval_year_month_values
.into_iter()
.map(|v| Value::Interval(Interval::from_i32(v)))
.map(|v| Value::IntervalYearMonth(IntervalYearMonth::from_i32(v)))
.collect(),
ConcreteDataType::Interval(IntervalType::DayTime(_)) => values
.interval_day_time_values
.into_iter()
.map(|v| Value::Interval(Interval::from_i64(v)))
.map(|v| Value::IntervalDayTime(IntervalDayTime::from_i64(v)))
.collect(),
ConcreteDataType::Interval(IntervalType::MonthDayNano(_)) => values
.interval_month_day_nano_values
.into_iter()
.map(|v| {
Value::Interval(Interval::from_month_day_nano(
Value::IntervalMonthDayNano(IntervalMonthDayNano::new(
v.months,
v.days,
v.nanoseconds,
@@ -941,18 +947,16 @@ pub fn to_proto_value(value: Value) -> Option<v1::Value> {
value_data: Some(ValueData::TimeNanosecondValue(v.value())),
},
},
Value::Interval(v) => match v.unit() {
IntervalUnit::YearMonth => v1::Value {
value_data: Some(ValueData::IntervalYearMonthValue(v.to_i32())),
},
IntervalUnit::DayTime => v1::Value {
value_data: Some(ValueData::IntervalDayTimeValue(v.to_i64())),
},
IntervalUnit::MonthDayNano => v1::Value {
value_data: Some(ValueData::IntervalMonthDayNanoValue(
convert_i128_to_interval(v.to_i128()),
)),
},
Value::IntervalYearMonth(v) => v1::Value {
value_data: Some(ValueData::IntervalYearMonthValue(v.to_i32())),
},
Value::IntervalDayTime(v) => v1::Value {
value_data: Some(ValueData::IntervalDayTimeValue(v.to_i64())),
},
Value::IntervalMonthDayNano(v) => v1::Value {
value_data: Some(ValueData::IntervalMonthDayNanoValue(
convert_month_day_nano_to_pb(v),
)),
},
Value::Decimal128(v) => v1::Value {
value_data: Some(ValueData::Decimal128Value(convert_to_pb_decimal128(v))),
@@ -1044,13 +1048,11 @@ pub fn value_to_grpc_value(value: Value) -> GrpcValue {
TimeUnit::Microsecond => ValueData::TimeMicrosecondValue(v.value()),
TimeUnit::Nanosecond => ValueData::TimeNanosecondValue(v.value()),
}),
Value::Interval(v) => Some(match v.unit() {
IntervalUnit::YearMonth => ValueData::IntervalYearMonthValue(v.to_i32()),
IntervalUnit::DayTime => ValueData::IntervalDayTimeValue(v.to_i64()),
IntervalUnit::MonthDayNano => {
ValueData::IntervalMonthDayNanoValue(convert_i128_to_interval(v.to_i128()))
}
}),
Value::IntervalYearMonth(v) => Some(ValueData::IntervalYearMonthValue(v.to_i32())),
Value::IntervalDayTime(v) => Some(ValueData::IntervalDayTimeValue(v.to_i64())),
Value::IntervalMonthDayNano(v) => Some(ValueData::IntervalMonthDayNanoValue(
convert_month_day_nano_to_pb(v),
)),
Value::Decimal128(v) => Some(ValueData::Decimal128Value(convert_to_pb_decimal128(v))),
Value::List(_) | Value::Duration(_) => unreachable!(),
},
@@ -1061,6 +1063,7 @@ pub fn value_to_grpc_value(value: Value) -> GrpcValue {
mod tests {
use std::sync::Arc;
use common_time::interval::IntervalUnit;
use datatypes::types::{
Int32Type, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalYearMonthType,
TimeMillisecondType, TimeSecondType, TimestampMillisecondType, TimestampSecondType,
@@ -1506,11 +1509,11 @@ mod tests {
#[test]
fn test_convert_i128_to_interval() {
let i128_val = 3000;
let interval = convert_i128_to_interval(i128_val);
let i128_val = 3;
let interval = convert_month_day_nano_to_pb(IntervalMonthDayNano::from_i128(i128_val));
assert_eq!(interval.months, 0);
assert_eq!(interval.days, 0);
assert_eq!(interval.nanoseconds, 3000);
assert_eq!(interval.nanoseconds, 3);
}
#[test]
@@ -1590,9 +1593,9 @@ mod tests {
},
);
let expect = vec![
Value::Interval(Interval::from_year_month(1_i32)),
Value::Interval(Interval::from_year_month(2_i32)),
Value::Interval(Interval::from_year_month(3_i32)),
Value::IntervalYearMonth(IntervalYearMonth::new(1_i32)),
Value::IntervalYearMonth(IntervalYearMonth::new(2_i32)),
Value::IntervalYearMonth(IntervalYearMonth::new(3_i32)),
];
assert_eq!(expect, actual);
@@ -1605,9 +1608,9 @@ mod tests {
},
);
let expect = vec![
Value::Interval(Interval::from_i64(1_i64)),
Value::Interval(Interval::from_i64(2_i64)),
Value::Interval(Interval::from_i64(3_i64)),
Value::IntervalDayTime(IntervalDayTime::from_i64(1_i64)),
Value::IntervalDayTime(IntervalDayTime::from_i64(2_i64)),
Value::IntervalDayTime(IntervalDayTime::from_i64(3_i64)),
];
assert_eq!(expect, actual);
@@ -1636,9 +1639,9 @@ mod tests {
},
);
let expect = vec![
Value::Interval(Interval::from_month_day_nano(1, 2, 3)),
Value::Interval(Interval::from_month_day_nano(5, 6, 7)),
Value::Interval(Interval::from_month_day_nano(9, 10, 11)),
Value::IntervalMonthDayNano(IntervalMonthDayNano::new(1, 2, 3)),
Value::IntervalMonthDayNano(IntervalMonthDayNano::new(5, 6, 7)),
Value::IntervalMonthDayNano(IntervalMonthDayNano::new(9, 10, 11)),
];
assert_eq!(expect, actual);
}

View File

@@ -75,6 +75,16 @@ pub enum Password<'a> {
PgMD5(HashedPassword<'a>, Salt<'a>),
}
impl Password<'_> {
pub fn r#type(&self) -> &str {
match self {
Password::PlainText(_) => "plain_text",
Password::MysqlNativePassword(_, _) => "mysql_native_password",
Password::PgMD5(_, _) => "pg_md5",
}
}
}
pub fn auth_mysql(
auth_data: HashedPassword,
salt: Salt,

View File

@@ -89,7 +89,7 @@ impl ErrorExt for Error {
Error::FileWatch { .. } => StatusCode::InvalidArguments,
Error::InternalState { .. } => StatusCode::Unexpected,
Error::Io { .. } => StatusCode::StorageUnavailable,
Error::AuthBackend { .. } => StatusCode::Internal,
Error::AuthBackend { source, .. } => source.status_code(),
Error::UserNotFound { .. } => StatusCode::UserNotFound,
Error::UnsupportedPasswordType { .. } => StatusCode::UnsupportedPasswordType,

View File

@@ -57,6 +57,11 @@ pub trait UserProvider: Send + Sync {
self.authorize(catalog, schema, &user_info).await?;
Ok(user_info)
}
/// Returns whether this user provider implementation is backed by an external system.
fn external(&self) -> bool {
false
}
}
fn load_credential_from_file(filepath: &str) -> Result<Option<HashMap<String, Vec<u8>>>> {

View File

@@ -33,7 +33,7 @@ impl StaticUserProvider {
value: value.to_string(),
msg: "StaticUserProviderOption must be in format `<option>:<value>`",
})?;
return match mode {
match mode {
"file" => {
let users = load_credential_from_file(content)?
.context(InvalidConfigSnafu {
@@ -58,7 +58,7 @@ impl StaticUserProvider {
msg: "StaticUserProviderOption must be in format `file:<path>` or `cmd:<values>`",
}
.fail(),
};
}
}
}

View File

@@ -22,6 +22,7 @@ common-config.workspace = true
common-error.workspace = true
common-macro.workspace = true
common-meta.workspace = true
common-procedure.workspace = true
common-query.workspace = true
common-recordbatch.workspace = true
common-runtime.workspace = true

View File

@@ -50,13 +50,20 @@ pub enum Error {
source: BoxedError,
},
#[snafu(display("Failed to list nodes in cluster: {source}"))]
#[snafu(display("Failed to list nodes in cluster"))]
ListNodes {
#[snafu(implicit)]
location: Location,
source: BoxedError,
},
#[snafu(display("Failed to region stats in cluster"))]
ListRegionStats {
#[snafu(implicit)]
location: Location,
source: BoxedError,
},
#[snafu(display("Failed to list flows in catalog {catalog}"))]
ListFlows {
#[snafu(implicit)]
@@ -82,6 +89,32 @@ pub enum Error {
location: Location,
},
#[snafu(display("Failed to get information extension client"))]
GetInformationExtension {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Failed to list procedures"))]
ListProcedures {
#[snafu(implicit)]
location: Location,
source: BoxedError,
},
#[snafu(display("Procedure id not found"))]
ProcedureIdNotFound {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("convert proto data error"))]
ConvertProtoData {
#[snafu(implicit)]
location: Location,
source: BoxedError,
},
#[snafu(display("Failed to re-compile script due to internal error"))]
CompileScriptInternal {
#[snafu(implicit)]
@@ -266,7 +299,9 @@ impl ErrorExt for Error {
| Error::FindRegionRoutes { .. }
| Error::CacheNotFound { .. }
| Error::CastManager { .. }
| Error::Json { .. } => StatusCode::Unexpected,
| Error::Json { .. }
| Error::GetInformationExtension { .. }
| Error::ProcedureIdNotFound { .. } => StatusCode::Unexpected,
Error::ViewPlanColumnsChanged { .. } => StatusCode::InvalidArguments,
@@ -283,7 +318,10 @@ impl ErrorExt for Error {
| Error::ListNodes { source, .. }
| Error::ListSchemas { source, .. }
| Error::ListTables { source, .. }
| Error::ListFlows { source, .. } => source.status_code(),
| Error::ListFlows { source, .. }
| Error::ListProcedures { source, .. }
| Error::ListRegionStats { source, .. }
| Error::ConvertProtoData { source, .. } => source.status_code(),
Error::CreateTable { source, .. } => source.status_code(),

View File

@@ -21,7 +21,6 @@ use common_catalog::consts::{
DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, NUMBERS_TABLE_ID,
PG_CATALOG_NAME,
};
use common_config::Mode;
use common_error::ext::BoxedError;
use common_meta::cache::{LayeredCacheRegistryRef, ViewInfoCacheRef};
use common_meta::key::catalog_name::CatalogNameKey;
@@ -31,9 +30,9 @@ use common_meta::key::table_info::TableInfoValue;
use common_meta::key::table_name::TableNameKey;
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
use common_meta::kv_backend::KvBackendRef;
use common_procedure::ProcedureManagerRef;
use futures_util::stream::BoxStream;
use futures_util::{StreamExt, TryStreamExt};
use meta_client::client::MetaClient;
use moka::sync::Cache;
use partition::manager::{PartitionRuleManager, PartitionRuleManagerRef};
use session::context::{Channel, QueryContext};
@@ -49,7 +48,7 @@ use crate::error::{
CacheNotFoundSnafu, GetTableCacheSnafu, InvalidTableInfoInCatalogSnafu, ListCatalogsSnafu,
ListSchemasSnafu, ListTablesSnafu, Result, TableMetadataManagerSnafu,
};
use crate::information_schema::InformationSchemaProvider;
use crate::information_schema::{InformationExtensionRef, InformationSchemaProvider};
use crate::kvbackend::TableCacheRef;
use crate::system_schema::pg_catalog::PGCatalogProvider;
use crate::system_schema::SystemSchemaProvider;
@@ -62,27 +61,31 @@ use crate::CatalogManager;
/// comes from `SystemCatalog`, which is static and read-only.
#[derive(Clone)]
pub struct KvBackendCatalogManager {
mode: Mode,
meta_client: Option<Arc<MetaClient>>,
/// Provides the extension methods for the `information_schema` tables
information_extension: InformationExtensionRef,
/// Manages partition rules.
partition_manager: PartitionRuleManagerRef,
/// Manages table metadata.
table_metadata_manager: TableMetadataManagerRef,
/// A sub-CatalogManager that handles system tables
system_catalog: SystemCatalog,
/// Cache registry for all caches.
cache_registry: LayeredCacheRegistryRef,
/// Only available in `Standalone` mode.
procedure_manager: Option<ProcedureManagerRef>,
}
const CATALOG_CACHE_MAX_CAPACITY: u64 = 128;
impl KvBackendCatalogManager {
pub fn new(
mode: Mode,
meta_client: Option<Arc<MetaClient>>,
information_extension: InformationExtensionRef,
backend: KvBackendRef,
cache_registry: LayeredCacheRegistryRef,
procedure_manager: Option<ProcedureManagerRef>,
) -> Arc<Self> {
Arc::new_cyclic(|me| Self {
mode,
meta_client,
information_extension,
partition_manager: Arc::new(PartitionRuleManager::new(
backend.clone(),
cache_registry
@@ -106,23 +109,19 @@ impl KvBackendCatalogManager {
backend,
},
cache_registry,
procedure_manager,
})
}
/// Returns the server running mode.
pub fn running_mode(&self) -> &Mode {
&self.mode
}
pub fn view_info_cache(&self) -> Result<ViewInfoCacheRef> {
self.cache_registry.get().context(CacheNotFoundSnafu {
name: "view_info_cache",
})
}
/// Returns the `[MetaClient]`.
pub fn meta_client(&self) -> Option<Arc<MetaClient>> {
self.meta_client.clone()
/// Returns the [`InformationExtension`].
pub fn information_extension(&self) -> InformationExtensionRef {
self.information_extension.clone()
}
pub fn partition_manager(&self) -> PartitionRuleManagerRef {
@@ -132,6 +131,10 @@ impl KvBackendCatalogManager {
pub fn table_metadata_manager_ref(&self) -> &TableMetadataManagerRef {
&self.table_metadata_manager
}
pub fn procedure_manager(&self) -> Option<ProcedureManagerRef> {
self.procedure_manager.clone()
}
}
#[async_trait::async_trait]

View File

@@ -18,7 +18,9 @@ pub mod flows;
mod information_memory_table;
pub mod key_column_usage;
mod partitions;
mod procedure_info;
mod region_peers;
mod region_statistics;
mod runtime_metrics;
pub mod schemata;
mod table_constraints;
@@ -30,7 +32,11 @@ use std::collections::HashMap;
use std::sync::{Arc, Weak};
use common_catalog::consts::{self, DEFAULT_CATALOG_NAME, INFORMATION_SCHEMA_NAME};
use common_error::ext::ErrorExt;
use common_meta::cluster::NodeInfo;
use common_meta::datanode::RegionStat;
use common_meta::key::flow::FlowMetadataManager;
use common_procedure::ProcedureInfo;
use common_recordbatch::SendableRecordBatchStream;
use datatypes::schema::SchemaRef;
use lazy_static::lazy_static;
@@ -43,7 +49,7 @@ use views::InformationSchemaViews;
use self::columns::InformationSchemaColumns;
use super::{SystemSchemaProviderInner, SystemTable, SystemTableRef};
use crate::error::Result;
use crate::error::{Error, Result};
use crate::system_schema::information_schema::cluster_info::InformationSchemaClusterInfo;
use crate::system_schema::information_schema::flows::InformationSchemaFlows;
use crate::system_schema::information_schema::information_memory_table::get_schema_columns;
@@ -188,6 +194,16 @@ impl SystemSchemaProviderInner for InformationSchemaProvider {
self.catalog_name.clone(),
self.flow_metadata_manager.clone(),
)) as _),
PROCEDURE_INFO => Some(
Arc::new(procedure_info::InformationSchemaProcedureInfo::new(
self.catalog_manager.clone(),
)) as _,
),
REGION_STATISTICS => Some(Arc::new(
region_statistics::InformationSchemaRegionStatistics::new(
self.catalog_manager.clone(),
),
) as _),
_ => None,
}
}
@@ -235,6 +251,14 @@ impl InformationSchemaProvider {
CLUSTER_INFO.to_string(),
self.build_table(CLUSTER_INFO).unwrap(),
);
tables.insert(
PROCEDURE_INFO.to_string(),
self.build_table(PROCEDURE_INFO).unwrap(),
);
tables.insert(
REGION_STATISTICS.to_string(),
self.build_table(REGION_STATISTICS).unwrap(),
);
}
tables.insert(TABLES.to_string(), self.build_table(TABLES).unwrap());
@@ -250,7 +274,6 @@ impl InformationSchemaProvider {
self.build_table(TABLE_CONSTRAINTS).unwrap(),
);
tables.insert(FLOWS.to_string(), self.build_table(FLOWS).unwrap());
// Add memory tables
for name in MEMORY_TABLES.iter() {
tables.insert((*name).to_string(), self.build_table(name).expect(name));
@@ -299,3 +322,39 @@ where
InformationTable::to_stream(self, request)
}
}
pub type InformationExtensionRef = Arc<dyn InformationExtension<Error = Error> + Send + Sync>;
/// The `InformationExtension` trait provides the extension methods for the `information_schema` tables.
#[async_trait::async_trait]
pub trait InformationExtension {
type Error: ErrorExt;
/// Gets the nodes information.
async fn nodes(&self) -> std::result::Result<Vec<NodeInfo>, Self::Error>;
/// Gets the procedures information.
async fn procedures(&self) -> std::result::Result<Vec<(String, ProcedureInfo)>, Self::Error>;
/// Gets the region statistics.
async fn region_stats(&self) -> std::result::Result<Vec<RegionStat>, Self::Error>;
}
pub struct NoopInformationExtension;
#[async_trait::async_trait]
impl InformationExtension for NoopInformationExtension {
type Error = Error;
async fn nodes(&self) -> std::result::Result<Vec<NodeInfo>, Self::Error> {
Ok(vec![])
}
async fn procedures(&self) -> std::result::Result<Vec<(String, ProcedureInfo)>, Self::Error> {
Ok(vec![])
}
async fn region_stats(&self) -> std::result::Result<Vec<RegionStat>, Self::Error> {
Ok(vec![])
}
}

View File

@@ -17,13 +17,10 @@ use std::time::Duration;
use arrow_schema::SchemaRef as ArrowSchemaRef;
use common_catalog::consts::INFORMATION_SCHEMA_CLUSTER_INFO_TABLE_ID;
use common_config::Mode;
use common_error::ext::BoxedError;
use common_meta::cluster::{ClusterInfo, NodeInfo, NodeStatus};
use common_meta::peer::Peer;
use common_meta::cluster::NodeInfo;
use common_recordbatch::adapter::RecordBatchStreamAdapter;
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
use common_telemetry::warn;
use common_time::timestamp::Timestamp;
use datafusion::execution::TaskContext;
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
@@ -40,7 +37,7 @@ use snafu::ResultExt;
use store_api::storage::{ScanRequest, TableId};
use super::CLUSTER_INFO;
use crate::error::{CreateRecordBatchSnafu, InternalSnafu, ListNodesSnafu, Result};
use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
use crate::system_schema::information_schema::{InformationTable, Predicates};
use crate::system_schema::utils;
use crate::CatalogManager;
@@ -70,7 +67,6 @@ const INIT_CAPACITY: usize = 42;
pub(super) struct InformationSchemaClusterInfo {
schema: SchemaRef,
catalog_manager: Weak<dyn CatalogManager>,
start_time_ms: u64,
}
impl InformationSchemaClusterInfo {
@@ -78,7 +74,6 @@ impl InformationSchemaClusterInfo {
Self {
schema: Self::schema(),
catalog_manager,
start_time_ms: common_time::util::current_time_millis() as u64,
}
}
@@ -100,11 +95,7 @@ impl InformationSchemaClusterInfo {
}
fn builder(&self) -> InformationSchemaClusterInfoBuilder {
InformationSchemaClusterInfoBuilder::new(
self.schema.clone(),
self.catalog_manager.clone(),
self.start_time_ms,
)
InformationSchemaClusterInfoBuilder::new(self.schema.clone(), self.catalog_manager.clone())
}
}
@@ -144,7 +135,6 @@ impl InformationTable for InformationSchemaClusterInfo {
struct InformationSchemaClusterInfoBuilder {
schema: SchemaRef,
start_time_ms: u64,
catalog_manager: Weak<dyn CatalogManager>,
peer_ids: Int64VectorBuilder,
@@ -158,11 +148,7 @@ struct InformationSchemaClusterInfoBuilder {
}
impl InformationSchemaClusterInfoBuilder {
fn new(
schema: SchemaRef,
catalog_manager: Weak<dyn CatalogManager>,
start_time_ms: u64,
) -> Self {
fn new(schema: SchemaRef, catalog_manager: Weak<dyn CatalogManager>) -> Self {
Self {
schema,
catalog_manager,
@@ -174,56 +160,17 @@ impl InformationSchemaClusterInfoBuilder {
start_times: TimestampMillisecondVectorBuilder::with_capacity(INIT_CAPACITY),
uptimes: StringVectorBuilder::with_capacity(INIT_CAPACITY),
active_times: StringVectorBuilder::with_capacity(INIT_CAPACITY),
start_time_ms,
}
}
/// Construct the `information_schema.cluster_info` virtual table
async fn make_cluster_info(&mut self, request: Option<ScanRequest>) -> Result<RecordBatch> {
let predicates = Predicates::from_scan_request(&request);
let mode = utils::running_mode(&self.catalog_manager)?.unwrap_or(Mode::Standalone);
match mode {
Mode::Standalone => {
let build_info = common_version::build_info();
self.add_node_info(
&predicates,
NodeInfo {
// For the standalone:
// - id always 0
// - empty string for peer_addr
peer: Peer {
id: 0,
addr: "".to_string(),
},
last_activity_ts: -1,
status: NodeStatus::Standalone,
version: build_info.version.to_string(),
git_commit: build_info.commit_short.to_string(),
// Use `self.start_time_ms` instead.
// It's not precise but enough.
start_time_ms: self.start_time_ms,
},
);
}
Mode::Distributed => {
if let Some(meta_client) = utils::meta_client(&self.catalog_manager)? {
let node_infos = meta_client
.list_nodes(None)
.await
.map_err(BoxedError::new)
.context(ListNodesSnafu)?;
for node_info in node_infos {
self.add_node_info(&predicates, node_info);
}
} else {
warn!("Could not find meta client in distributed mode.");
}
}
let information_extension = utils::information_extension(&self.catalog_manager)?;
let node_infos = information_extension.nodes().await?;
for node_info in node_infos {
self.add_node_info(&predicates, node_info);
}
self.finish()
}

View File

@@ -0,0 +1,241 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::{Arc, Weak};
use arrow_schema::SchemaRef as ArrowSchemaRef;
use common_catalog::consts::INFORMATION_SCHEMA_PROCEDURE_INFO_TABLE_ID;
use common_error::ext::BoxedError;
use common_procedure::ProcedureInfo;
use common_recordbatch::adapter::RecordBatchStreamAdapter;
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
use common_time::timestamp::Timestamp;
use datafusion::execution::TaskContext;
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::timestamp::TimestampMillisecond;
use datatypes::value::Value;
use datatypes::vectors::{StringVectorBuilder, TimestampMillisecondVectorBuilder};
use snafu::ResultExt;
use store_api::storage::{ScanRequest, TableId};
use super::PROCEDURE_INFO;
use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
use crate::system_schema::information_schema::{InformationTable, Predicates};
use crate::system_schema::utils;
use crate::CatalogManager;
const PROCEDURE_ID: &str = "procedure_id";
const PROCEDURE_TYPE: &str = "procedure_type";
const START_TIME: &str = "start_time";
const END_TIME: &str = "end_time";
const STATUS: &str = "status";
const LOCK_KEYS: &str = "lock_keys";
const INIT_CAPACITY: usize = 42;
/// The `PROCEDURE_INFO` table provides information about the current procedure information of the cluster.
///
/// - `procedure_id`: the unique identifier of the procedure.
/// - `procedure_name`: the name of the procedure.
/// - `start_time`: the starting execution time of the procedure.
/// - `end_time`: the ending execution time of the procedure.
/// - `status`: the status of the procedure.
/// - `lock_keys`: the lock keys of the procedure.
///
pub(super) struct InformationSchemaProcedureInfo {
schema: SchemaRef,
catalog_manager: Weak<dyn CatalogManager>,
}
impl InformationSchemaProcedureInfo {
pub(super) fn new(catalog_manager: Weak<dyn CatalogManager>) -> Self {
Self {
schema: Self::schema(),
catalog_manager,
}
}
pub(crate) fn schema() -> SchemaRef {
Arc::new(Schema::new(vec![
ColumnSchema::new(PROCEDURE_ID, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(PROCEDURE_TYPE, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(
START_TIME,
ConcreteDataType::timestamp_millisecond_datatype(),
true,
),
ColumnSchema::new(
END_TIME,
ConcreteDataType::timestamp_millisecond_datatype(),
true,
),
ColumnSchema::new(STATUS, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(LOCK_KEYS, ConcreteDataType::string_datatype(), true),
]))
}
fn builder(&self) -> InformationSchemaProcedureInfoBuilder {
InformationSchemaProcedureInfoBuilder::new(
self.schema.clone(),
self.catalog_manager.clone(),
)
}
}
impl InformationTable for InformationSchemaProcedureInfo {
fn table_id(&self) -> TableId {
INFORMATION_SCHEMA_PROCEDURE_INFO_TABLE_ID
}
fn table_name(&self) -> &'static str {
PROCEDURE_INFO
}
fn schema(&self) -> SchemaRef {
self.schema.clone()
}
fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_procedure_info(Some(request))
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
}),
));
Ok(Box::pin(
RecordBatchStreamAdapter::try_new(stream)
.map_err(BoxedError::new)
.context(InternalSnafu)?,
))
}
}
struct InformationSchemaProcedureInfoBuilder {
schema: SchemaRef,
catalog_manager: Weak<dyn CatalogManager>,
procedure_ids: StringVectorBuilder,
procedure_types: StringVectorBuilder,
start_times: TimestampMillisecondVectorBuilder,
end_times: TimestampMillisecondVectorBuilder,
statuses: StringVectorBuilder,
lock_keys: StringVectorBuilder,
}
impl InformationSchemaProcedureInfoBuilder {
fn new(schema: SchemaRef, catalog_manager: Weak<dyn CatalogManager>) -> Self {
Self {
schema,
catalog_manager,
procedure_ids: StringVectorBuilder::with_capacity(INIT_CAPACITY),
procedure_types: StringVectorBuilder::with_capacity(INIT_CAPACITY),
start_times: TimestampMillisecondVectorBuilder::with_capacity(INIT_CAPACITY),
end_times: TimestampMillisecondVectorBuilder::with_capacity(INIT_CAPACITY),
statuses: StringVectorBuilder::with_capacity(INIT_CAPACITY),
lock_keys: StringVectorBuilder::with_capacity(INIT_CAPACITY),
}
}
/// Construct the `information_schema.procedure_info` virtual table
async fn make_procedure_info(&mut self, request: Option<ScanRequest>) -> Result<RecordBatch> {
let predicates = Predicates::from_scan_request(&request);
let information_extension = utils::information_extension(&self.catalog_manager)?;
let procedures = information_extension.procedures().await?;
for (status, procedure_info) in procedures {
self.add_procedure(&predicates, status, procedure_info);
}
self.finish()
}
fn add_procedure(
&mut self,
predicates: &Predicates,
status: String,
procedure_info: ProcedureInfo,
) {
let ProcedureInfo {
id,
type_name,
start_time_ms,
end_time_ms,
lock_keys,
..
} = procedure_info;
let pid = id.to_string();
let start_time = TimestampMillisecond(Timestamp::new_millisecond(start_time_ms));
let end_time = TimestampMillisecond(Timestamp::new_millisecond(end_time_ms));
let lock_keys = lock_keys.join(",");
let row = [
(PROCEDURE_ID, &Value::from(pid.clone())),
(PROCEDURE_TYPE, &Value::from(type_name.clone())),
(START_TIME, &Value::from(start_time)),
(END_TIME, &Value::from(end_time)),
(STATUS, &Value::from(status.clone())),
(LOCK_KEYS, &Value::from(lock_keys.clone())),
];
if !predicates.eval(&row) {
return;
}
self.procedure_ids.push(Some(&pid));
self.procedure_types.push(Some(&type_name));
self.start_times.push(Some(start_time));
self.end_times.push(Some(end_time));
self.statuses.push(Some(&status));
self.lock_keys.push(Some(&lock_keys));
}
fn finish(&mut self) -> Result<RecordBatch> {
let columns: Vec<VectorRef> = vec![
Arc::new(self.procedure_ids.finish()),
Arc::new(self.procedure_types.finish()),
Arc::new(self.start_times.finish()),
Arc::new(self.end_times.finish()),
Arc::new(self.statuses.finish()),
Arc::new(self.lock_keys.finish()),
];
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
}
}
impl DfPartitionStream for InformationSchemaProcedureInfo {
fn schema(&self) -> &ArrowSchemaRef {
self.schema.arrow_schema()
}
fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_procedure_info(None)
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
}),
))
}
}

View File

@@ -224,8 +224,8 @@ impl InformationSchemaRegionPeersBuilder {
let region_id = RegionId::new(table_id, route.region.id.region_number()).as_u64();
let peer_id = route.leader_peer.clone().map(|p| p.id);
let peer_addr = route.leader_peer.clone().map(|p| p.addr);
let status = if let Some(status) = route.leader_status {
Some(status.as_ref().to_string())
let state = if let Some(state) = route.leader_state {
Some(state.as_ref().to_string())
} else {
// Alive by default
Some("ALIVE".to_string())
@@ -242,7 +242,7 @@ impl InformationSchemaRegionPeersBuilder {
self.peer_ids.push(peer_id);
self.peer_addrs.push(peer_addr.as_deref());
self.is_leaders.push(Some("Yes"));
self.statuses.push(status.as_deref());
self.statuses.push(state.as_deref());
self.down_seconds
.push(route.leader_down_millis().map(|m| m / 1000));
}

View File

@@ -0,0 +1,261 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::{Arc, Weak};
use arrow_schema::SchemaRef as ArrowSchemaRef;
use common_catalog::consts::INFORMATION_SCHEMA_REGION_STATISTICS_TABLE_ID;
use common_error::ext::BoxedError;
use common_meta::datanode::RegionStat;
use common_recordbatch::adapter::RecordBatchStreamAdapter;
use common_recordbatch::{DfSendableRecordBatchStream, RecordBatch, SendableRecordBatchStream};
use datafusion::execution::TaskContext;
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::value::Value;
use datatypes::vectors::{StringVectorBuilder, UInt32VectorBuilder, UInt64VectorBuilder};
use snafu::ResultExt;
use store_api::storage::{ScanRequest, TableId};
use super::{InformationTable, REGION_STATISTICS};
use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
use crate::information_schema::Predicates;
use crate::system_schema::utils;
use crate::CatalogManager;
const REGION_ID: &str = "region_id";
const TABLE_ID: &str = "table_id";
const REGION_NUMBER: &str = "region_number";
const REGION_ROWS: &str = "region_rows";
const DISK_SIZE: &str = "disk_size";
const MEMTABLE_SIZE: &str = "memtable_size";
const MANIFEST_SIZE: &str = "manifest_size";
const SST_SIZE: &str = "sst_size";
const INDEX_SIZE: &str = "index_size";
const ENGINE: &str = "engine";
const REGION_ROLE: &str = "region_role";
const INIT_CAPACITY: usize = 42;
/// The `REGION_STATISTICS` table provides information about the region statistics. Including fields:
///
/// - `region_id`: The region id.
/// - `table_id`: The table id.
/// - `region_number`: The region number.
/// - `region_rows`: The number of rows in region.
/// - `memtable_size`: The memtable size in bytes.
/// - `disk_size`: The approximate disk size in bytes.
/// - `manifest_size`: The manifest size in bytes.
/// - `sst_size`: The sst data files size in bytes.
/// - `index_size`: The sst index files size in bytes.
/// - `engine`: The engine type.
/// - `region_role`: The region role.
///
pub(super) struct InformationSchemaRegionStatistics {
schema: SchemaRef,
catalog_manager: Weak<dyn CatalogManager>,
}
impl InformationSchemaRegionStatistics {
pub(super) fn new(catalog_manager: Weak<dyn CatalogManager>) -> Self {
Self {
schema: Self::schema(),
catalog_manager,
}
}
pub(crate) fn schema() -> SchemaRef {
Arc::new(Schema::new(vec![
ColumnSchema::new(REGION_ID, ConcreteDataType::uint64_datatype(), false),
ColumnSchema::new(TABLE_ID, ConcreteDataType::uint32_datatype(), false),
ColumnSchema::new(REGION_NUMBER, ConcreteDataType::uint32_datatype(), false),
ColumnSchema::new(REGION_ROWS, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new(DISK_SIZE, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new(MEMTABLE_SIZE, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new(MANIFEST_SIZE, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new(SST_SIZE, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new(INDEX_SIZE, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new(ENGINE, ConcreteDataType::string_datatype(), true),
ColumnSchema::new(REGION_ROLE, ConcreteDataType::string_datatype(), true),
]))
}
fn builder(&self) -> InformationSchemaRegionStatisticsBuilder {
InformationSchemaRegionStatisticsBuilder::new(
self.schema.clone(),
self.catalog_manager.clone(),
)
}
}
impl InformationTable for InformationSchemaRegionStatistics {
fn table_id(&self) -> TableId {
INFORMATION_SCHEMA_REGION_STATISTICS_TABLE_ID
}
fn table_name(&self) -> &'static str {
REGION_STATISTICS
}
fn schema(&self) -> SchemaRef {
self.schema.clone()
}
fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_region_statistics(Some(request))
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
}),
));
Ok(Box::pin(
RecordBatchStreamAdapter::try_new(stream)
.map_err(BoxedError::new)
.context(InternalSnafu)?,
))
}
}
struct InformationSchemaRegionStatisticsBuilder {
schema: SchemaRef,
catalog_manager: Weak<dyn CatalogManager>,
region_ids: UInt64VectorBuilder,
table_ids: UInt32VectorBuilder,
region_numbers: UInt32VectorBuilder,
region_rows: UInt64VectorBuilder,
disk_sizes: UInt64VectorBuilder,
memtable_sizes: UInt64VectorBuilder,
manifest_sizes: UInt64VectorBuilder,
sst_sizes: UInt64VectorBuilder,
index_sizes: UInt64VectorBuilder,
engines: StringVectorBuilder,
region_roles: StringVectorBuilder,
}
impl InformationSchemaRegionStatisticsBuilder {
fn new(schema: SchemaRef, catalog_manager: Weak<dyn CatalogManager>) -> Self {
Self {
schema,
catalog_manager,
region_ids: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
table_ids: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
region_numbers: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
region_rows: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
disk_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
memtable_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
manifest_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
sst_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
index_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
engines: StringVectorBuilder::with_capacity(INIT_CAPACITY),
region_roles: StringVectorBuilder::with_capacity(INIT_CAPACITY),
}
}
/// Construct a new `InformationSchemaRegionStatistics` from the collected data.
async fn make_region_statistics(
&mut self,
request: Option<ScanRequest>,
) -> Result<RecordBatch> {
let predicates = Predicates::from_scan_request(&request);
let information_extension = utils::information_extension(&self.catalog_manager)?;
let region_stats = information_extension.region_stats().await?;
for region_stat in region_stats {
self.add_region_statistic(&predicates, region_stat);
}
self.finish()
}
fn add_region_statistic(&mut self, predicate: &Predicates, region_stat: RegionStat) {
let row = [
(REGION_ID, &Value::from(region_stat.id.as_u64())),
(TABLE_ID, &Value::from(region_stat.id.table_id())),
(REGION_NUMBER, &Value::from(region_stat.id.region_number())),
(REGION_ROWS, &Value::from(region_stat.num_rows)),
(DISK_SIZE, &Value::from(region_stat.approximate_bytes)),
(MEMTABLE_SIZE, &Value::from(region_stat.memtable_size)),
(MANIFEST_SIZE, &Value::from(region_stat.manifest_size)),
(SST_SIZE, &Value::from(region_stat.sst_size)),
(INDEX_SIZE, &Value::from(region_stat.index_size)),
(ENGINE, &Value::from(region_stat.engine.as_str())),
(REGION_ROLE, &Value::from(region_stat.role.to_string())),
];
if !predicate.eval(&row) {
return;
}
self.region_ids.push(Some(region_stat.id.as_u64()));
self.table_ids.push(Some(region_stat.id.table_id()));
self.region_numbers
.push(Some(region_stat.id.region_number()));
self.region_rows.push(Some(region_stat.num_rows));
self.disk_sizes.push(Some(region_stat.approximate_bytes));
self.memtable_sizes.push(Some(region_stat.memtable_size));
self.manifest_sizes.push(Some(region_stat.manifest_size));
self.sst_sizes.push(Some(region_stat.sst_size));
self.index_sizes.push(Some(region_stat.index_size));
self.engines.push(Some(&region_stat.engine));
self.region_roles.push(Some(&region_stat.role.to_string()));
}
fn finish(&mut self) -> Result<RecordBatch> {
let columns: Vec<VectorRef> = vec![
Arc::new(self.region_ids.finish()),
Arc::new(self.table_ids.finish()),
Arc::new(self.region_numbers.finish()),
Arc::new(self.region_rows.finish()),
Arc::new(self.disk_sizes.finish()),
Arc::new(self.memtable_sizes.finish()),
Arc::new(self.manifest_sizes.finish()),
Arc::new(self.sst_sizes.finish()),
Arc::new(self.index_sizes.finish()),
Arc::new(self.engines.finish()),
Arc::new(self.region_roles.finish()),
];
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
}
}
impl DfPartitionStream for InformationSchemaRegionStatistics {
fn schema(&self) -> &ArrowSchemaRef {
self.schema.arrow_schema()
}
fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_region_statistics(None)
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
}),
))
}
}

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
/// All table names in `information_schema`.
//! All table names in `information_schema`.
pub const TABLES: &str = "tables";
pub const COLUMNS: &str = "columns";
@@ -45,3 +45,5 @@ pub const TABLE_CONSTRAINTS: &str = "table_constraints";
pub const CLUSTER_INFO: &str = "cluster_info";
pub const VIEWS: &str = "views";
pub const FLOWS: &str = "flows";
pub const PROCEDURE_INFO: &str = "procedure_info";
pub const REGION_STATISTICS: &str = "region_statistics";

View File

@@ -74,7 +74,7 @@ impl MemoryTableBuilder {
/// Construct the `information_schema.{table_name}` virtual table
pub async fn memory_records(&mut self) -> Result<RecordBatch> {
if self.columns.is_empty() {
RecordBatch::new_empty(self.schema.clone()).context(CreateRecordBatchSnafu)
Ok(RecordBatch::new_empty(self.schema.clone()))
} else {
RecordBatch::new(self.schema.clone(), std::mem::take(&mut self.columns))
.context(CreateRecordBatchSnafu)

View File

@@ -12,6 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//! The `pg_catalog.pg_namespace` table implementation.
//! namespace is a schema in greptime
pub(super) mod oid_map;
use std::sync::{Arc, Weak};
@@ -40,9 +43,6 @@ use crate::system_schema::utils::tables::{string_column, u32_column};
use crate::system_schema::SystemTable;
use crate::CatalogManager;
/// The `pg_catalog.pg_namespace` table implementation.
/// namespace is a schema in greptime
const NSPNAME: &str = "nspname";
const INIT_CAPACITY: usize = 42;

View File

@@ -12,47 +12,33 @@
// See the License for the specific language governing permissions and
// limitations under the License.
pub mod tables;
use std::sync::Weak;
use std::sync::{Arc, Weak};
use common_config::Mode;
use common_meta::key::TableMetadataManagerRef;
use meta_client::client::MetaClient;
use snafu::OptionExt;
use crate::error::{Result, UpgradeWeakCatalogManagerRefSnafu};
use crate::error::{GetInformationExtensionSnafu, Result, UpgradeWeakCatalogManagerRefSnafu};
use crate::information_schema::InformationExtensionRef;
use crate::kvbackend::KvBackendCatalogManager;
use crate::CatalogManager;
/// Try to get the server running mode from `[CatalogManager]` weak reference.
pub fn running_mode(catalog_manager: &Weak<dyn CatalogManager>) -> Result<Option<Mode>> {
pub mod tables;
/// Try to get the `[InformationExtension]` from `[CatalogManager]` weak reference.
pub fn information_extension(
catalog_manager: &Weak<dyn CatalogManager>,
) -> Result<InformationExtensionRef> {
let catalog_manager = catalog_manager
.upgrade()
.context(UpgradeWeakCatalogManagerRefSnafu)?;
Ok(catalog_manager
let information_extension = catalog_manager
.as_any()
.downcast_ref::<KvBackendCatalogManager>()
.map(|manager| manager.running_mode())
.copied())
}
.map(|manager| manager.information_extension())
.context(GetInformationExtensionSnafu)?;
/// Try to get the `[MetaClient]` from `[CatalogManager]` weak reference.
pub fn meta_client(catalog_manager: &Weak<dyn CatalogManager>) -> Result<Option<Arc<MetaClient>>> {
let catalog_manager = catalog_manager
.upgrade()
.context(UpgradeWeakCatalogManagerRefSnafu)?;
let meta_client = match catalog_manager
.as_any()
.downcast_ref::<KvBackendCatalogManager>()
{
None => None,
Some(manager) => manager.meta_client(),
};
Ok(meta_client)
Ok(information_extension)
}
/// Try to get the `[TableMetadataManagerRef]` from `[CatalogManager]` weak reference.

View File

@@ -259,7 +259,6 @@ mod tests {
use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
use cache::{build_fundamental_cache_registry, with_default_composite_cache_registry};
use common_config::Mode;
use common_meta::cache::{CacheRegistryBuilder, LayeredCacheRegistryBuilder};
use common_meta::key::TableMetadataManager;
use common_meta::kv_backend::memory::MemoryKvBackend;
@@ -269,6 +268,8 @@ mod tests {
use datafusion::logical_expr::builder::LogicalTableSource;
use datafusion::logical_expr::{col, lit, LogicalPlan, LogicalPlanBuilder};
use crate::information_schema::NoopInformationExtension;
struct MockDecoder;
impl MockDecoder {
pub fn arc() -> Arc<Self> {
@@ -323,10 +324,10 @@ mod tests {
);
let catalog_manager = KvBackendCatalogManager::new(
Mode::Standalone,
None,
Arc::new(NoopInformationExtension),
backend.clone(),
layered_cache_registry,
None,
);
let table_metadata_manager = TableMetadataManager::new(backend);
let mut view_info = common_meta::key::test_utils::new_test_table_info(1024, vec![]);

View File

@@ -28,7 +28,7 @@ enum_dispatch = "0.3"
futures-util.workspace = true
lazy_static.workspace = true
moka = { workspace = true, features = ["future"] }
parking_lot = "0.12"
parking_lot.workspace = true
prometheus.workspace = true
prost.workspace = true
query.workspace = true
@@ -45,7 +45,6 @@ common-grpc-expr.workspace = true
datanode.workspace = true
derive-new = "0.5"
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
[dev-dependencies.substrait_proto]
package = "substrait"

View File

@@ -10,7 +10,7 @@ name = "greptime"
path = "src/bin/greptime.rs"
[features]
default = ["python"]
default = ["python", "servers/pprof", "servers/mem-prof"]
tokio-console = ["common-telemetry/tokio-console"]
python = ["frontend/python"]
@@ -78,7 +78,7 @@ table.workspace = true
tokio.workspace = true
toml.workspace = true
tonic.workspace = true
tracing-appender = "0.2"
tracing-appender.workspace = true
[target.'cfg(not(windows))'.dependencies]
tikv-jemallocator = "0.6"

View File

@@ -158,7 +158,7 @@ fn create_region_routes(regions: Vec<RegionNumber>) -> Vec<RegionRoute> {
addr: String::new(),
}),
follower_peers: vec![],
leader_status: None,
leader_state: None,
leader_down_since: None,
});
}

View File

@@ -35,7 +35,6 @@ use either::Either;
use meta_client::client::MetaClientBuilder;
use query::datafusion::DatafusionQueryEngine;
use query::parser::QueryLanguageParser;
use query::plan::LogicalPlan;
use query::query_engine::{DefaultSerializer, QueryEngineState};
use query::QueryEngine;
use rustyline::error::ReadlineError;
@@ -47,12 +46,12 @@ use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan};
use crate::cli::cmd::ReplCommand;
use crate::cli::helper::RustylineHelper;
use crate::cli::AttachCommand;
use crate::error;
use crate::error::{
CollectRecordBatchesSnafu, ParseSqlSnafu, PlanStatementSnafu, PrettyPrintRecordBatchesSnafu,
ReadlineSnafu, ReplCreationSnafu, RequestDatabaseSnafu, Result, StartMetaClientSnafu,
SubstraitEncodeLogicalPlanSnafu,
};
use crate::{error, DistributedInformationExtension};
/// Captures the state of the repl, gathers commands and executes them one by one
pub struct Repl {
@@ -175,11 +174,11 @@ impl Repl {
let plan = query_engine
.planner()
.plan(stmt, query_ctx.clone())
.plan(&stmt, query_ctx.clone())
.await
.context(PlanStatementSnafu)?;
let LogicalPlan::DfPlan(plan) = query_engine
let plan = query_engine
.optimize(&query_engine.engine_context(query_ctx), &plan)
.context(PlanStatementSnafu)?;
@@ -276,11 +275,12 @@ async fn create_query_engine(meta_addr: &str) -> Result<DatafusionQueryEngine> {
.build(),
);
let information_extension = Arc::new(DistributedInformationExtension::new(meta_client.clone()));
let catalog_manager = KvBackendCatalogManager::new(
Mode::Distributed,
Some(meta_client.clone()),
information_extension,
cached_meta_backend.clone(),
layered_cache_registry,
None,
);
let plugins: Plugins = Default::default();
let state = Arc::new(QueryEngineState::new(

View File

@@ -272,9 +272,10 @@ impl StartCommand {
info!("Datanode start command: {:#?}", self);
info!("Datanode options: {:#?}", opts);
let plugin_opts = opts.plugins;
let opts = opts.component;
let mut plugins = Plugins::new();
plugins::setup_datanode_plugins(&mut plugins, &opts)
plugins::setup_datanode_plugins(&mut plugins, &plugin_opts, &opts)
.await
.context(StartDatanodeSnafu)?;

View File

@@ -41,7 +41,7 @@ use crate::error::{
MissingConfigSnafu, Result, ShutdownFlownodeSnafu, StartFlownodeSnafu,
};
use crate::options::{GlobalOptions, GreptimeOptions};
use crate::{log_versions, App};
use crate::{log_versions, App, DistributedInformationExtension};
pub const APP_NAME: &str = "greptime-flownode";
@@ -269,11 +269,13 @@ impl StartCommand {
.build(),
);
let information_extension =
Arc::new(DistributedInformationExtension::new(meta_client.clone()));
let catalog_manager = KvBackendCatalogManager::new(
opts.mode,
Some(meta_client.clone()),
information_extension,
cached_meta_backend.clone(),
layered_cache_registry.clone(),
None,
);
let table_metadata_manager =

View File

@@ -36,8 +36,8 @@ use frontend::instance::builder::FrontendBuilder;
use frontend::instance::{FrontendInstance, Instance as FeInstance};
use frontend::server::Services;
use meta_client::{MetaClientOptions, MetaClientType};
use query::stats::StatementStatistics;
use servers::tls::{TlsMode, TlsOption};
use servers::Mode;
use snafu::{OptionExt, ResultExt};
use tracing_appender::non_blocking::WorkerGuard;
@@ -46,7 +46,7 @@ use crate::error::{
Result, StartFrontendSnafu,
};
use crate::options::{GlobalOptions, GreptimeOptions};
use crate::{log_versions, App};
use crate::{log_versions, App, DistributedInformationExtension};
type FrontendOptions = GreptimeOptions<frontend::frontend::FrontendOptions>;
@@ -266,9 +266,10 @@ impl StartCommand {
info!("Frontend start command: {:#?}", self);
info!("Frontend options: {:#?}", opts);
let plugin_opts = opts.plugins;
let opts = opts.component;
let mut plugins = Plugins::new();
plugins::setup_frontend_plugins(&mut plugins, &opts)
plugins::setup_frontend_plugins(&mut plugins, &plugin_opts, &opts)
.await
.context(StartFrontendSnafu)?;
@@ -315,11 +316,13 @@ impl StartCommand {
.build(),
);
let information_extension =
Arc::new(DistributedInformationExtension::new(meta_client.clone()));
let catalog_manager = KvBackendCatalogManager::new(
Mode::Distributed,
Some(meta_client.clone()),
information_extension,
cached_meta_backend.clone(),
layered_cache_registry.clone(),
None,
);
let executor = HandlerGroupExecutor::new(vec![
@@ -340,6 +343,8 @@ impl StartCommand {
// Some queries are expected to take long time.
let channel_config = ChannelConfig {
timeout: None,
tcp_nodelay: opts.datanode.client.tcp_nodelay,
connect_timeout: Some(opts.datanode.client.connect_timeout),
..Default::default()
};
let client = NodeClients::new(channel_config);
@@ -351,6 +356,7 @@ impl StartCommand {
catalog_manager,
Arc::new(client),
meta_client,
StatementStatistics::new(opts.logging.slow_query.clone()),
)
.with_plugin(plugins.clone())
.with_local_cache_invalidator(layered_cache_registry)
@@ -469,7 +475,7 @@ mod tests {
};
let mut plugins = Plugins::new();
plugins::setup_frontend_plugins(&mut plugins, &fe_opts)
plugins::setup_frontend_plugins(&mut plugins, &[], &fe_opts)
.await
.unwrap();

View File

@@ -15,7 +15,17 @@
#![feature(assert_matches, let_chains)]
use async_trait::async_trait;
use catalog::information_schema::InformationExtension;
use client::api::v1::meta::ProcedureStatus;
use common_error::ext::BoxedError;
use common_meta::cluster::{ClusterInfo, NodeInfo};
use common_meta::datanode::RegionStat;
use common_meta::ddl::{ExecutorContext, ProcedureExecutor};
use common_meta::rpc::procedure;
use common_procedure::{ProcedureInfo, ProcedureState};
use common_telemetry::{error, info};
use meta_client::MetaClientRef;
use snafu::ResultExt;
use crate::error::Result;
@@ -74,6 +84,7 @@ pub trait App: Send {
}
/// Log the versions of the application, and the arguments passed to the cli.
///
/// `version` should be the same as the output of cli "--version";
/// and the `short_version` is the short version of the codes, often consist of git branch and commit.
pub fn log_versions(version: &str, short_version: &str, app: &str) {
@@ -94,3 +105,69 @@ fn log_env_flags() {
info!("argument: {}", argument);
}
}
pub struct DistributedInformationExtension {
meta_client: MetaClientRef,
}
impl DistributedInformationExtension {
pub fn new(meta_client: MetaClientRef) -> Self {
Self { meta_client }
}
}
#[async_trait::async_trait]
impl InformationExtension for DistributedInformationExtension {
type Error = catalog::error::Error;
async fn nodes(&self) -> std::result::Result<Vec<NodeInfo>, Self::Error> {
self.meta_client
.list_nodes(None)
.await
.map_err(BoxedError::new)
.context(catalog::error::ListNodesSnafu)
}
async fn procedures(&self) -> std::result::Result<Vec<(String, ProcedureInfo)>, Self::Error> {
let procedures = self
.meta_client
.list_procedures(&ExecutorContext::default())
.await
.map_err(BoxedError::new)
.context(catalog::error::ListProceduresSnafu)?
.procedures;
let mut result = Vec::with_capacity(procedures.len());
for procedure in procedures {
let pid = match procedure.id {
Some(pid) => pid,
None => return catalog::error::ProcedureIdNotFoundSnafu {}.fail(),
};
let pid = procedure::pb_pid_to_pid(&pid)
.map_err(BoxedError::new)
.context(catalog::error::ConvertProtoDataSnafu)?;
let status = ProcedureStatus::try_from(procedure.status)
.map(|v| v.as_str_name())
.unwrap_or("Unknown")
.to_string();
let procedure_info = ProcedureInfo {
id: pid,
type_name: procedure.type_name,
start_time_ms: procedure.start_time_ms,
end_time_ms: procedure.end_time_ms,
state: ProcedureState::Running,
lock_keys: procedure.lock_keys,
};
result.push((status, procedure_info));
}
Ok(result)
}
async fn region_stats(&self) -> std::result::Result<Vec<RegionStat>, Self::Error> {
self.meta_client
.list_region_stats()
.await
.map_err(BoxedError::new)
.context(catalog::error::ListRegionStatsSnafu)
}
}

View File

@@ -48,6 +48,10 @@ impl Instance {
_guard: guard,
}
}
pub fn get_inner(&self) -> &MetasrvInstance {
&self.instance
}
}
#[async_trait]
@@ -86,6 +90,14 @@ impl Command {
pub fn load_options(&self, global_options: &GlobalOptions) -> Result<MetasrvOptions> {
self.subcmd.load_options(global_options)
}
pub fn config_file(&self) -> &Option<String> {
self.subcmd.config_file()
}
pub fn env_prefix(&self) -> &String {
self.subcmd.env_prefix()
}
}
#[derive(Parser)]
@@ -105,6 +117,18 @@ impl SubCommand {
SubCommand::Start(cmd) => cmd.load_options(global_options),
}
}
fn config_file(&self) -> &Option<String> {
match self {
SubCommand::Start(cmd) => &cmd.config_file,
}
}
fn env_prefix(&self) -> &String {
match self {
SubCommand::Start(cmd) => &cmd.env_prefix,
}
}
}
#[derive(Debug, Default, Parser)]
@@ -249,9 +273,10 @@ impl StartCommand {
info!("Metasrv start command: {:#?}", self);
info!("Metasrv options: {:#?}", opts);
let plugin_opts = opts.plugins;
let opts = opts.component;
let mut plugins = Plugins::new();
plugins::setup_metasrv_plugins(&mut plugins, &opts)
plugins::setup_metasrv_plugins(&mut plugins, &plugin_opts, &opts)
.await
.context(StartMetaServerSnafu)?;

View File

@@ -15,6 +15,7 @@
use clap::Parser;
use common_config::Configurable;
use common_runtime::global::RuntimeOptions;
use plugins::PluginOptions;
use serde::{Deserialize, Serialize};
#[derive(Parser, Default, Debug, Clone)]
@@ -40,6 +41,8 @@ pub struct GlobalOptions {
pub struct GreptimeOptions<T> {
/// The runtime options.
pub runtime: RuntimeOptions,
/// The plugin options.
pub plugins: Vec<PluginOptions>,
/// The options of each component (like Datanode or Standalone) of GreptimeDB.
#[serde(flatten)]

View File

@@ -12,19 +12,24 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::net::SocketAddr;
use std::sync::Arc;
use std::{fs, path};
use async_trait::async_trait;
use cache::{build_fundamental_cache_registry, with_default_composite_cache_registry};
use catalog::information_schema::InformationExtension;
use catalog::kvbackend::KvBackendCatalogManager;
use clap::Parser;
use client::api::v1::meta::RegionRole;
use common_base::Plugins;
use common_catalog::consts::{MIN_USER_FLOW_ID, MIN_USER_TABLE_ID};
use common_config::{metadata_store_dir, Configurable, KvBackendConfig};
use common_error::ext::BoxedError;
use common_meta::cache::LayeredCacheRegistryBuilder;
use common_meta::cache_invalidator::CacheInvalidatorRef;
use common_meta::cluster::{NodeInfo, NodeStatus};
use common_meta::datanode::RegionStat;
use common_meta::ddl::flow_meta::{FlowMetadataAllocator, FlowMetadataAllocatorRef};
use common_meta::ddl::table_meta::{TableMetadataAllocator, TableMetadataAllocatorRef};
use common_meta::ddl::{DdlContext, NoopRegionFailureDetectorControl, ProcedureExecutorRef};
@@ -33,10 +38,11 @@ use common_meta::key::flow::{FlowMetadataManager, FlowMetadataManagerRef};
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
use common_meta::kv_backend::KvBackendRef;
use common_meta::node_manager::NodeManagerRef;
use common_meta::peer::Peer;
use common_meta::region_keeper::MemoryRegionKeeper;
use common_meta::sequence::SequenceBuilder;
use common_meta::wal_options_allocator::{WalOptionsAllocator, WalOptionsAllocatorRef};
use common_procedure::ProcedureManagerRef;
use common_procedure::{ProcedureInfo, ProcedureManagerRef};
use common_telemetry::info;
use common_telemetry::logging::{LoggingOptions, TracingOptions};
use common_time::timezone::set_default_timezone;
@@ -44,6 +50,7 @@ use common_version::{short_version, version};
use common_wal::config::DatanodeWalConfig;
use datanode::config::{DatanodeOptions, ProcedureConfig, RegionEngineConfig, StorageConfig};
use datanode::datanode::{Datanode, DatanodeBuilder};
use datanode::region_server::RegionServer;
use file_engine::config::EngineConfig as FileEngineConfig;
use flow::{FlowWorkerManager, FlownodeBuilder, FrontendInvoker};
use frontend::frontend::FrontendOptions;
@@ -55,6 +62,7 @@ use frontend::service_config::{
};
use meta_srv::metasrv::{FLOW_ID_SEQ, TABLE_ID_SEQ};
use mito2::config::MitoConfig;
use query::stats::StatementStatistics;
use serde::{Deserialize, Serialize};
use servers::export_metrics::ExportMetricsOption;
use servers::grpc::GrpcOptions;
@@ -243,6 +251,13 @@ pub struct Instance {
_guard: Vec<WorkerGuard>,
}
impl Instance {
/// Find the socket addr of a server by its `name`.
pub async fn server_addr(&self, name: &str) -> Option<SocketAddr> {
self.frontend.server_handlers().addr(name).await
}
}
#[async_trait]
impl App for Instance {
fn name(&self) -> &str {
@@ -333,7 +348,8 @@ pub struct StartCommand {
}
impl StartCommand {
fn load_options(
/// Load the GreptimeDB options from various sources (command line, config file or env).
pub fn load_options(
&self,
global_options: &GlobalOptions,
) -> Result<GreptimeOptions<StandaloneOptions>> {
@@ -423,7 +439,8 @@ impl StartCommand {
#[allow(unreachable_code)]
#[allow(unused_variables)]
#[allow(clippy::diverging_sub_expression)]
async fn build(&self, opts: GreptimeOptions<StandaloneOptions>) -> Result<Instance> {
/// Build GreptimeDB instance with the loaded options.
pub async fn build(&self, opts: GreptimeOptions<StandaloneOptions>) -> Result<Instance> {
common_runtime::init_global_runtimes(&opts.runtime);
let guard = common_telemetry::init_global_logging(
@@ -438,15 +455,16 @@ impl StartCommand {
info!("Standalone options: {opts:#?}");
let mut plugins = Plugins::new();
let plugin_opts = opts.plugins;
let opts = opts.component;
let fe_opts = opts.frontend_options();
let dn_opts = opts.datanode_options();
plugins::setup_frontend_plugins(&mut plugins, &fe_opts)
plugins::setup_frontend_plugins(&mut plugins, &plugin_opts, &fe_opts)
.await
.context(StartFrontendSnafu)?;
plugins::setup_datanode_plugins(&mut plugins, &dn_opts)
plugins::setup_datanode_plugins(&mut plugins, &plugin_opts, &dn_opts)
.await
.context(StartDatanodeSnafu)?;
@@ -477,22 +495,26 @@ impl StartCommand {
.build(),
);
let catalog_manager = KvBackendCatalogManager::new(
dn_opts.mode,
None,
kv_backend.clone(),
layered_cache_registry.clone(),
);
let table_metadata_manager =
Self::create_table_metadata_manager(kv_backend.clone()).await?;
let datanode = DatanodeBuilder::new(dn_opts, plugins.clone())
.with_kv_backend(kv_backend.clone())
.build()
.await
.context(StartDatanodeSnafu)?;
let information_extension = Arc::new(StandaloneInformationExtension::new(
datanode.region_server(),
procedure_manager.clone(),
));
let catalog_manager = KvBackendCatalogManager::new(
information_extension,
kv_backend.clone(),
layered_cache_registry.clone(),
Some(procedure_manager.clone()),
);
let table_metadata_manager =
Self::create_table_metadata_manager(kv_backend.clone()).await?;
let flow_metadata_manager = Arc::new(FlowMetadataManager::new(kv_backend.clone()));
let flow_builder = FlownodeBuilder::new(
Default::default(),
@@ -556,6 +578,7 @@ impl StartCommand {
catalog_manager.clone(),
node_manager.clone(),
ddl_task_executor.clone(),
StatementStatistics::new(opts.logging.slow_query.clone()),
)
.with_plugin(plugins.clone())
.try_build()
@@ -641,6 +664,93 @@ impl StartCommand {
}
}
pub struct StandaloneInformationExtension {
region_server: RegionServer,
procedure_manager: ProcedureManagerRef,
start_time_ms: u64,
}
impl StandaloneInformationExtension {
pub fn new(region_server: RegionServer, procedure_manager: ProcedureManagerRef) -> Self {
Self {
region_server,
procedure_manager,
start_time_ms: common_time::util::current_time_millis() as u64,
}
}
}
#[async_trait::async_trait]
impl InformationExtension for StandaloneInformationExtension {
type Error = catalog::error::Error;
async fn nodes(&self) -> std::result::Result<Vec<NodeInfo>, Self::Error> {
let build_info = common_version::build_info();
let node_info = NodeInfo {
// For the standalone:
// - id always 0
// - empty string for peer_addr
peer: Peer {
id: 0,
addr: "".to_string(),
},
last_activity_ts: -1,
status: NodeStatus::Standalone,
version: build_info.version.to_string(),
git_commit: build_info.commit_short.to_string(),
// Use `self.start_time_ms` instead.
// It's not precise but enough.
start_time_ms: self.start_time_ms,
};
Ok(vec![node_info])
}
async fn procedures(&self) -> std::result::Result<Vec<(String, ProcedureInfo)>, Self::Error> {
self.procedure_manager
.list_procedures()
.await
.map_err(BoxedError::new)
.map(|procedures| {
procedures
.into_iter()
.map(|procedure| {
let status = procedure.state.as_str_name().to_string();
(status, procedure)
})
.collect::<Vec<_>>()
})
.context(catalog::error::ListProceduresSnafu)
}
async fn region_stats(&self) -> std::result::Result<Vec<RegionStat>, Self::Error> {
let stats = self
.region_server
.reportable_regions()
.into_iter()
.map(|stat| {
let region_stat = self
.region_server
.region_statistic(stat.region_id)
.unwrap_or_default();
RegionStat {
id: stat.region_id,
rcus: 0,
wcus: 0,
approximate_bytes: region_stat.estimated_disk_size(),
engine: stat.engine,
role: RegionRole::from(stat.role).into(),
num_rows: region_stat.num_rows,
memtable_size: region_stat.memtable_size,
manifest_size: region_stat.manifest_size,
sst_size: region_stat.sst_size,
index_size: region_stat.index_size,
}
})
.collect::<Vec<_>>();
Ok(stats)
}
}
#[cfg(test)]
mod tests {
use std::default::Default;
@@ -665,7 +775,7 @@ mod tests {
};
let mut plugins = Plugins::new();
plugins::setup_frontend_plugins(&mut plugins, &fe_opts)
plugins::setup_frontend_plugins(&mut plugins, &[], &fe_opts)
.await
.unwrap();

View File

@@ -20,7 +20,7 @@ use common_config::Configurable;
use common_grpc::channel_manager::{
DEFAULT_MAX_GRPC_RECV_MESSAGE_SIZE, DEFAULT_MAX_GRPC_SEND_MESSAGE_SIZE,
};
use common_telemetry::logging::{LoggingOptions, DEFAULT_OTLP_ENDPOINT};
use common_telemetry::logging::{LoggingOptions, SlowQueryOptions, DEFAULT_OTLP_ENDPOINT};
use common_wal::config::raft_engine::RaftEngineConfig;
use common_wal::config::DatanodeWalConfig;
use datanode::config::{DatanodeOptions, RegionEngineConfig, StorageConfig};
@@ -159,8 +159,20 @@ fn test_load_metasrv_example_config() {
level: Some("info".to_string()),
otlp_endpoint: Some(DEFAULT_OTLP_ENDPOINT.to_string()),
tracing_sample_ratio: Some(Default::default()),
slow_query: SlowQueryOptions {
enable: false,
threshold: Some(Duration::from_secs(10)),
sample_ratio: Some(1.0),
},
..Default::default()
},
datanode: meta_srv::metasrv::DatanodeOptions {
client: meta_srv::metasrv::DatanodeClientOptions {
timeout: Duration::from_secs(10),
connect_timeout: Duration::from_secs(10),
tcp_nodelay: true,
},
},
export_metrics: ExportMetricsOption {
self_import: Some(Default::default()),
remote_write: Some(Default::default()),

View File

@@ -8,7 +8,7 @@ license.workspace = true
workspace = true
[dependencies]
anymap = "1.0.0-beta.2"
anymap2 = "0.13"
async-trait.workspace = true
bitvec = "1.0"
bytes.workspace = true

View File

@@ -12,20 +12,21 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::any::Any;
use std::sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard};
/// [`Plugins`] is a wrapper of [AnyMap](https://github.com/chris-morgan/anymap) and provides a thread-safe way to store and retrieve plugins.
use anymap2::SendSyncAnyMap;
/// [`Plugins`] is a wrapper of [anymap2](https://github.com/azriel91/anymap2) and provides a thread-safe way to store and retrieve plugins.
/// Make it Cloneable and we can treat it like an Arc struct.
#[derive(Default, Clone)]
pub struct Plugins {
inner: Arc<RwLock<anymap::Map<dyn Any + Send + Sync>>>,
inner: Arc<RwLock<SendSyncAnyMap>>,
}
impl Plugins {
pub fn new() -> Self {
Self {
inner: Arc::new(RwLock::new(anymap::Map::new())),
inner: Arc::new(RwLock::new(SendSyncAnyMap::new())),
}
}
@@ -37,6 +38,18 @@ impl Plugins {
self.read().get::<T>().cloned()
}
pub fn get_or_insert<T, F>(&self, f: F) -> T
where
T: 'static + Send + Sync + Clone,
F: FnOnce() -> T,
{
let mut binding = self.write();
if !binding.contains::<T>() {
binding.insert(f());
}
binding.get::<T>().cloned().unwrap()
}
pub fn map_mut<T: 'static + Send + Sync, F, R>(&self, mapper: F) -> R
where
F: FnOnce(Option<&mut T>) -> R,
@@ -61,11 +74,11 @@ impl Plugins {
self.read().is_empty()
}
fn read(&self) -> RwLockReadGuard<anymap::Map<dyn Any + Send + Sync>> {
fn read(&self) -> RwLockReadGuard<SendSyncAnyMap> {
self.inner.read().unwrap()
}
fn write(&self) -> RwLockWriteGuard<anymap::Map<dyn Any + Send + Sync>> {
fn write(&self) -> RwLockWriteGuard<SendSyncAnyMap> {
self.inner.write().unwrap()
}
}

View File

@@ -46,8 +46,9 @@ impl From<String> for SecretString {
}
}
/// Wrapper type for values that contains secrets, which attempts to limit
/// accidental exposure and ensure secrets are wiped from memory when dropped.
/// Wrapper type for values that contains secrets.
///
/// It attempts to limit accidental exposure and ensure secrets are wiped from memory when dropped.
/// (e.g. passwords, cryptographic keys, access tokens or other credentials)
///
/// Access to the secret inner value occurs through the [`ExposeSecret`]

View File

@@ -98,14 +98,20 @@ pub const INFORMATION_SCHEMA_CLUSTER_INFO_TABLE_ID: u32 = 31;
pub const INFORMATION_SCHEMA_VIEW_TABLE_ID: u32 = 32;
/// id for information_schema.FLOWS
pub const INFORMATION_SCHEMA_FLOW_TABLE_ID: u32 = 33;
/// ----- End of information_schema tables -----
/// id for information_schema.procedure_info
pub const INFORMATION_SCHEMA_PROCEDURE_INFO_TABLE_ID: u32 = 34;
/// id for information_schema.region_statistics
pub const INFORMATION_SCHEMA_REGION_STATISTICS_TABLE_ID: u32 = 35;
// ----- End of information_schema tables -----
/// ----- Begin of pg_catalog tables -----
pub const PG_CATALOG_PG_CLASS_TABLE_ID: u32 = 256;
pub const PG_CATALOG_PG_TYPE_TABLE_ID: u32 = 257;
pub const PG_CATALOG_PG_NAMESPACE_TABLE_ID: u32 = 258;
/// ----- End of pg_catalog tables -----
// ----- End of pg_catalog tables -----
pub const MITO_ENGINE: &str = "mito";
pub const MITO2_ENGINE: &str = "mito2";
pub const METRIC_ENGINE: &str = "metric";

View File

@@ -38,6 +38,8 @@ pub enum StatusCode {
Cancelled = 1005,
/// Illegal state, can be exposed to users.
IllegalState = 1006,
/// Caused by some error originated from external system.
External = 1007,
// ====== End of common status code ================
// ====== Begin of SQL related status code =========
@@ -162,7 +164,8 @@ impl StatusCode {
| StatusCode::InvalidAuthHeader
| StatusCode::AccessDenied
| StatusCode::PermissionDenied
| StatusCode::RequestOutdated => false,
| StatusCode::RequestOutdated
| StatusCode::External => false,
}
}
@@ -177,7 +180,9 @@ impl StatusCode {
| StatusCode::IllegalState
| StatusCode::EngineExecuteQuery
| StatusCode::StorageUnavailable
| StatusCode::RuntimeResourcesExhausted => true,
| StatusCode::RuntimeResourcesExhausted
| StatusCode::External => true,
StatusCode::Success
| StatusCode::Unsupported
| StatusCode::InvalidArguments
@@ -256,7 +261,7 @@ macro_rules! define_into_tonic_status {
pub fn status_to_tonic_code(status_code: StatusCode) -> Code {
match status_code {
StatusCode::Success => Code::Ok,
StatusCode::Unknown => Code::Unknown,
StatusCode::Unknown | StatusCode::External => Code::Unknown,
StatusCode::Unsupported => Code::Unimplemented,
StatusCode::Unexpected
| StatusCode::IllegalState

View File

@@ -9,7 +9,7 @@ workspace = true
[features]
default = ["geo"]
geo = ["geohash", "h3o"]
geo = ["geohash", "h3o", "s2"]
[dependencies]
api.workspace = true
@@ -27,6 +27,7 @@ common-time.workspace = true
common-version.workspace = true
datafusion.workspace = true
datatypes.workspace = true
derive_more = { version = "1", default-features = false, features = ["display"] }
geohash = { version = "0.13", optional = true }
h3o = { version = "0.6", optional = true }
jsonb.workspace = true
@@ -34,6 +35,7 @@ num = "0.4"
num-traits = "0.2"
once_cell.workspace = true
paste = "1.0"
s2 = { version = "0.0.12", optional = true }
serde.workspace = true
serde_json.workspace = true
session.workspace = true

View File

@@ -16,7 +16,6 @@ mod argmax;
mod argmin;
mod diff;
mod mean;
mod percentile;
mod polyval;
mod scipy_stats_norm_cdf;
mod scipy_stats_norm_pdf;
@@ -28,7 +27,6 @@ pub use argmin::ArgminAccumulatorCreator;
use common_query::logical_plan::AggregateFunctionCreatorRef;
pub use diff::DiffAccumulatorCreator;
pub use mean::MeanAccumulatorCreator;
pub use percentile::PercentileAccumulatorCreator;
pub use polyval::PolyvalAccumulatorCreator;
pub use scipy_stats_norm_cdf::ScipyStatsNormCdfAccumulatorCreator;
pub use scipy_stats_norm_pdf::ScipyStatsNormPdfAccumulatorCreator;
@@ -91,8 +89,14 @@ impl AggregateFunctions {
register_aggr_func!("polyval", 2, PolyvalAccumulatorCreator);
register_aggr_func!("argmax", 1, ArgmaxAccumulatorCreator);
register_aggr_func!("argmin", 1, ArgminAccumulatorCreator);
register_aggr_func!("percentile", 2, PercentileAccumulatorCreator);
register_aggr_func!("scipystatsnormcdf", 2, ScipyStatsNormCdfAccumulatorCreator);
register_aggr_func!("scipystatsnormpdf", 2, ScipyStatsNormPdfAccumulatorCreator);
#[cfg(feature = "geo")]
register_aggr_func!(
"json_encode_path",
3,
super::geo::encoding::JsonPathEncodeFunctionCreator
);
}
}

View File

@@ -16,7 +16,10 @@ use std::cmp::Ordering;
use std::sync::Arc;
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
use common_query::error::{BadAccumulatorImplSnafu, CreateAccumulatorSnafu, Result};
use common_query::error::{
BadAccumulatorImplSnafu, CreateAccumulatorSnafu, InvalidInputStateSnafu, Result,
};
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;

View File

@@ -16,7 +16,10 @@ use std::cmp::Ordering;
use std::sync::Arc;
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
use common_query::error::{BadAccumulatorImplSnafu, CreateAccumulatorSnafu, Result};
use common_query::error::{
BadAccumulatorImplSnafu, CreateAccumulatorSnafu, InvalidInputStateSnafu, Result,
};
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;

View File

@@ -17,8 +17,10 @@ use std::sync::Arc;
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
use common_query::error::{
CreateAccumulatorSnafu, DowncastVectorSnafu, FromScalarValueSnafu, Result,
CreateAccumulatorSnafu, DowncastVectorSnafu, FromScalarValueSnafu, InvalidInputStateSnafu,
Result,
};
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;

View File

@@ -17,8 +17,10 @@ use std::sync::Arc;
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
use common_query::error::{
BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu, Result,
BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu, InvalidInputStateSnafu,
Result,
};
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;

View File

@@ -1,436 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::cmp::Reverse;
use std::collections::BinaryHeap;
use std::sync::Arc;
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
use common_query::error::{
self, BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu,
FromScalarValueSnafu, InvalidInputColSnafu, Result,
};
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;
use datatypes::types::OrdPrimitive;
use datatypes::value::{ListValue, OrderedFloat};
use datatypes::vectors::{ConstantVector, Float64Vector, Helper, ListVector};
use datatypes::with_match_primitive_type_id;
use num::NumCast;
use snafu::{ensure, OptionExt, ResultExt};
// https://numpy.org/doc/stable/reference/generated/numpy.percentile.html?highlight=percentile#numpy.percentile
// if the p is 50,then the Percentile become median
// we use two heap great and not_greater
// the not_greater push the value that smaller than P-value
// the greater push the value that bigger than P-value
// just like the percentile in numpy:
// Given a vector V of length N, the q-th percentile of V is the value q/100 of the way from the minimum to the maximum in a sorted copy of V.
// The values and distances of the two nearest neighbors as well as the method parameter will determine the percentile
// if the normalized ranking does not match the location of q exactly.
// This function is the same as the median if q=50, the same as the minimum if q=0 and the same as the maximum if q=100.
// This optional method parameter specifies the method to use when the desired quantile lies between two data points i < j.
// If g is the fractional part of the index surrounded by i and alpha and beta are correction constants modifying i and j.
// i+g = (q-alpha)/(n-alpha-beta+1)
// Below, 'q' is the quantile value, 'n' is the sample size and alpha and beta are constants. The following formula gives an interpolation "i + g" of where the quantile would be in the sorted sample.
// With 'i' being the floor and 'g' the fractional part of the result.
// the default method is linear where
// alpha = 1
// beta = 1
#[derive(Debug, Default)]
pub struct Percentile<T>
where
T: WrapperType,
{
greater: BinaryHeap<Reverse<OrdPrimitive<T>>>,
not_greater: BinaryHeap<OrdPrimitive<T>>,
n: u64,
p: Option<f64>,
}
impl<T> Percentile<T>
where
T: WrapperType,
{
fn push(&mut self, value: T) {
let value = OrdPrimitive::<T>(value);
self.n += 1;
if self.not_greater.is_empty() {
self.not_greater.push(value);
return;
}
// to keep the not_greater length == floor+1
// so to ensure the peek of the not_greater is array[floor]
// and the peek of the greater is array[floor+1]
let p = self.p.unwrap_or(0.0_f64);
let floor = (((self.n - 1) as f64) * p / (100_f64)).floor();
if value <= *self.not_greater.peek().unwrap() {
self.not_greater.push(value);
if self.not_greater.len() > (floor + 1.0) as usize {
self.greater.push(Reverse(self.not_greater.pop().unwrap()));
}
} else {
self.greater.push(Reverse(value));
if self.not_greater.len() < (floor + 1.0) as usize {
self.not_greater.push(self.greater.pop().unwrap().0);
}
}
}
}
impl<T> Accumulator for Percentile<T>
where
T: WrapperType,
{
fn state(&self) -> Result<Vec<Value>> {
let nums = self
.greater
.iter()
.map(|x| &x.0)
.chain(self.not_greater.iter())
.map(|&n| n.into())
.collect::<Vec<Value>>();
Ok(vec![
Value::List(ListValue::new(nums, T::LogicalType::build_data_type())),
self.p.into(),
])
}
fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
if values.is_empty() {
return Ok(());
}
ensure!(values.len() == 2, InvalidInputStateSnafu);
ensure!(values[0].len() == values[1].len(), InvalidInputStateSnafu);
if values[0].len() == 0 {
return Ok(());
}
// This is a unary accumulator, so only one column is provided.
let column = &values[0];
let mut len = 1;
let column: &<T as Scalar>::VectorType = if column.is_const() {
len = column.len();
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
unsafe { Helper::static_cast(column.inner()) }
} else {
unsafe { Helper::static_cast(column) }
};
let x = &values[1];
let x = Helper::check_get_scalar::<f64>(x).context(error::InvalidInputTypeSnafu {
err_msg: "expecting \"POLYVAL\" function's second argument to be float64",
})?;
// `get(0)` is safe because we have checked `values[1].len() == values[0].len() != 0`
let first = x.get(0);
ensure!(!first.is_null(), InvalidInputColSnafu);
for i in 1..x.len() {
ensure!(first == x.get(i), InvalidInputColSnafu);
}
let first = match first {
Value::Float64(OrderedFloat(v)) => v,
// unreachable because we have checked `first` is not null and is i64 above
_ => unreachable!(),
};
if let Some(p) = self.p {
ensure!(p == first, InvalidInputColSnafu);
} else {
self.p = Some(first);
};
(0..len).for_each(|_| {
for v in column.iter_data().flatten() {
self.push(v);
}
});
Ok(())
}
fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
if states.is_empty() {
return Ok(());
}
ensure!(
states.len() == 2,
BadAccumulatorImplSnafu {
err_msg: "expect 2 states in `merge_batch`"
}
);
let p = &states[1];
let p = p
.as_any()
.downcast_ref::<Float64Vector>()
.with_context(|| DowncastVectorSnafu {
err_msg: format!(
"expect float64vector, got vector type {}",
p.vector_type_name()
),
})?;
let p = p.get(0);
if p.is_null() {
return Ok(());
}
let p = match p {
Value::Float64(OrderedFloat(p)) => p,
_ => unreachable!(),
};
self.p = Some(p);
let values = &states[0];
let values = values
.as_any()
.downcast_ref::<ListVector>()
.with_context(|| DowncastVectorSnafu {
err_msg: format!(
"expect ListVector, got vector type {}",
values.vector_type_name()
),
})?;
for value in values.values_iter() {
if let Some(value) = value.context(FromScalarValueSnafu)? {
let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
for v in column.iter_data().flatten() {
self.push(v);
}
}
}
Ok(())
}
fn evaluate(&self) -> Result<Value> {
if self.not_greater.is_empty() {
assert!(
self.greater.is_empty(),
"not expected in two-heap percentile algorithm, there must be a bug when implementing it"
);
}
let not_greater = self.not_greater.peek();
if not_greater.is_none() {
return Ok(Value::Null);
}
let not_greater = (*self.not_greater.peek().unwrap()).as_primitive();
let percentile = if self.greater.is_empty() {
NumCast::from(not_greater).unwrap()
} else {
let greater = self.greater.peek().unwrap();
let p = if let Some(p) = self.p {
p
} else {
return Ok(Value::Null);
};
let fract = (((self.n - 1) as f64) * p / 100_f64).fract();
let not_greater_v: f64 = NumCast::from(not_greater).unwrap();
let greater_v: f64 = NumCast::from(greater.0.as_primitive()).unwrap();
not_greater_v * (1.0 - fract) + greater_v * fract
};
Ok(Value::from(percentile))
}
}
#[as_aggr_func_creator]
#[derive(Debug, Default, AggrFuncTypeStore)]
pub struct PercentileAccumulatorCreator {}
impl AggregateFunctionCreator for PercentileAccumulatorCreator {
fn creator(&self) -> AccumulatorCreatorFunction {
let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
let input_type = &types[0];
with_match_primitive_type_id!(
input_type.logical_type_id(),
|$S| {
Ok(Box::new(Percentile::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
},
{
let err_msg = format!(
"\"PERCENTILE\" aggregate function not support data type {:?}",
input_type.logical_type_id(),
);
CreateAccumulatorSnafu { err_msg }.fail()?
}
)
});
creator
}
fn output_type(&self) -> Result<ConcreteDataType> {
let input_types = self.input_types()?;
ensure!(input_types.len() == 2, InvalidInputStateSnafu);
// unwrap is safe because we have checked input_types len must equals 1
Ok(ConcreteDataType::float64_datatype())
}
fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
let input_types = self.input_types()?;
ensure!(input_types.len() == 2, InvalidInputStateSnafu);
Ok(vec![
ConcreteDataType::list_datatype(input_types.into_iter().next().unwrap()),
ConcreteDataType::float64_datatype(),
])
}
}
#[cfg(test)]
mod test {
use datatypes::vectors::{Float64Vector, Int32Vector};
use super::*;
#[test]
fn test_update_batch() {
// test update empty batch, expect not updating anything
let mut percentile = Percentile::<i32>::default();
percentile.update_batch(&[]).unwrap();
assert!(percentile.not_greater.is_empty());
assert!(percentile.greater.is_empty());
assert_eq!(Value::Null, percentile.evaluate().unwrap());
// test update one not-null value
let mut percentile = Percentile::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from(vec![Some(42)])),
Arc::new(Float64Vector::from(vec![Some(100.0_f64)])),
];
percentile.update_batch(&v).unwrap();
assert_eq!(Value::from(42.0_f64), percentile.evaluate().unwrap());
// test update one null value
let mut percentile = Percentile::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from(vec![Option::<i32>::None])),
Arc::new(Float64Vector::from(vec![Some(100.0_f64)])),
];
percentile.update_batch(&v).unwrap();
assert_eq!(Value::Null, percentile.evaluate().unwrap());
// test update no null-value batch
let mut percentile = Percentile::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
Arc::new(Float64Vector::from(vec![
Some(100.0_f64),
Some(100.0_f64),
Some(100.0_f64),
])),
];
percentile.update_batch(&v).unwrap();
assert_eq!(Value::from(2_f64), percentile.evaluate().unwrap());
// test update null-value batch
let mut percentile = Percentile::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from(vec![Some(-2i32), None, Some(3), Some(4)])),
Arc::new(Float64Vector::from(vec![
Some(100.0_f64),
Some(100.0_f64),
Some(100.0_f64),
Some(100.0_f64),
])),
];
percentile.update_batch(&v).unwrap();
assert_eq!(Value::from(4_f64), percentile.evaluate().unwrap());
// test update with constant vector
let mut percentile = Percentile::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(ConstantVector::new(
Arc::new(Int32Vector::from_vec(vec![4])),
2,
)),
Arc::new(Float64Vector::from(vec![Some(100.0_f64), Some(100.0_f64)])),
];
percentile.update_batch(&v).unwrap();
assert_eq!(Value::from(4_f64), percentile.evaluate().unwrap());
// test left border
let mut percentile = Percentile::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
Arc::new(Float64Vector::from(vec![
Some(0.0_f64),
Some(0.0_f64),
Some(0.0_f64),
])),
];
percentile.update_batch(&v).unwrap();
assert_eq!(Value::from(-1.0_f64), percentile.evaluate().unwrap());
// test medium
let mut percentile = Percentile::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
Arc::new(Float64Vector::from(vec![
Some(50.0_f64),
Some(50.0_f64),
Some(50.0_f64),
])),
];
percentile.update_batch(&v).unwrap();
assert_eq!(Value::from(1.0_f64), percentile.evaluate().unwrap());
// test right border
let mut percentile = Percentile::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
Arc::new(Float64Vector::from(vec![
Some(100.0_f64),
Some(100.0_f64),
Some(100.0_f64),
])),
];
percentile.update_batch(&v).unwrap();
assert_eq!(Value::from(2.0_f64), percentile.evaluate().unwrap());
// the following is the result of numpy.percentile
// numpy.percentile
// a = np.array([[10,7,4]])
// np.percentile(a,40)
// >> 6.400000000000
let mut percentile = Percentile::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from(vec![Some(10i32), Some(7), Some(4)])),
Arc::new(Float64Vector::from(vec![
Some(40.0_f64),
Some(40.0_f64),
Some(40.0_f64),
])),
];
percentile.update_batch(&v).unwrap();
assert_eq!(Value::from(6.400000000_f64), percentile.evaluate().unwrap());
// the following is the result of numpy.percentile
// a = np.array([[10,7,4]])
// np.percentile(a,95)
// >> 9.7000000000000011
let mut percentile = Percentile::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from(vec![Some(10i32), Some(7), Some(4)])),
Arc::new(Float64Vector::from(vec![
Some(95.0_f64),
Some(95.0_f64),
Some(95.0_f64),
])),
];
percentile.update_batch(&v).unwrap();
assert_eq!(
Value::from(9.700_000_000_000_001_f64),
percentile.evaluate().unwrap()
);
}
}

View File

@@ -18,8 +18,9 @@ use std::sync::Arc;
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
use common_query::error::{
self, BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu,
FromScalarValueSnafu, InvalidInputColSnafu, Result,
FromScalarValueSnafu, InvalidInputColSnafu, InvalidInputStateSnafu, Result,
};
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;

View File

@@ -17,8 +17,10 @@ use std::sync::Arc;
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
use common_query::error::{
self, BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu,
FromScalarValueSnafu, GenerateFunctionSnafu, InvalidInputColSnafu, Result,
FromScalarValueSnafu, GenerateFunctionSnafu, InvalidInputColSnafu, InvalidInputStateSnafu,
Result,
};
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;

View File

@@ -17,8 +17,10 @@ use std::sync::Arc;
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
use common_query::error::{
self, BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu,
FromScalarValueSnafu, GenerateFunctionSnafu, InvalidInputColSnafu, Result,
FromScalarValueSnafu, GenerateFunctionSnafu, InvalidInputColSnafu, InvalidInputStateSnafu,
Result,
};
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;

View File

@@ -14,18 +14,19 @@
use std::fmt;
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
use common_query::error::{ArrowComputeSnafu, IntoVectorSnafu, InvalidFuncArgsSnafu, Result};
use common_query::prelude::Signature;
use datatypes::data_type::DataType;
use datatypes::arrow::compute::kernels::numeric;
use datatypes::prelude::ConcreteDataType;
use datatypes::value::ValueRef;
use datatypes::vectors::VectorRef;
use snafu::ensure;
use datatypes::vectors::{Helper, VectorRef};
use snafu::{ensure, ResultExt};
use crate::function::{Function, FunctionContext};
use crate::helper;
/// A function adds an interval value to Timestamp, Date or DateTime, and return the result.
/// A function adds an interval value to Timestamp, Date, and return the result.
/// The implementation of datetime type is based on Date64 which is incorrect so this function
/// doesn't support the datetime type.
#[derive(Clone, Debug, Default)]
pub struct DateAddFunction;
@@ -44,7 +45,6 @@ impl Function for DateAddFunction {
helper::one_of_sigs2(
vec![
ConcreteDataType::date_datatype(),
ConcreteDataType::datetime_datatype(),
ConcreteDataType::timestamp_second_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(),
ConcreteDataType::timestamp_microsecond_datatype(),
@@ -69,64 +69,14 @@ impl Function for DateAddFunction {
}
);
let left = &columns[0];
let right = &columns[1];
let left = columns[0].to_arrow_array();
let right = columns[1].to_arrow_array();
let size = left.len();
let left_datatype = columns[0].data_type();
match left_datatype {
ConcreteDataType::Timestamp(_) => {
let mut result = left_datatype.create_mutable_vector(size);
for i in 0..size {
let ts = left.get(i).as_timestamp();
let interval = right.get(i).as_interval();
let new_ts = match (ts, interval) {
(Some(ts), Some(interval)) => ts.add_interval(interval),
_ => ts,
};
result.push_value_ref(ValueRef::from(new_ts));
}
Ok(result.to_vector())
}
ConcreteDataType::Date(_) => {
let mut result = left_datatype.create_mutable_vector(size);
for i in 0..size {
let date = left.get(i).as_date();
let interval = right.get(i).as_interval();
let new_date = match (date, interval) {
(Some(date), Some(interval)) => date.add_interval(interval),
_ => date,
};
result.push_value_ref(ValueRef::from(new_date));
}
Ok(result.to_vector())
}
ConcreteDataType::DateTime(_) => {
let mut result = left_datatype.create_mutable_vector(size);
for i in 0..size {
let datetime = left.get(i).as_datetime();
let interval = right.get(i).as_interval();
let new_datetime = match (datetime, interval) {
(Some(datetime), Some(interval)) => datetime.add_interval(interval),
_ => datetime,
};
result.push_value_ref(ValueRef::from(new_datetime));
}
Ok(result.to_vector())
}
_ => UnsupportedInputDataTypeSnafu {
function: NAME,
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
}
.fail(),
}
let result = numeric::add(&left, &right).context(ArrowComputeSnafu)?;
let arrow_type = result.data_type().clone();
Helper::try_into_vector(result).context(IntoVectorSnafu {
data_type: arrow_type,
})
}
}
@@ -144,8 +94,7 @@ mod tests {
use datatypes::prelude::ConcreteDataType;
use datatypes::value::Value;
use datatypes::vectors::{
DateTimeVector, DateVector, IntervalDayTimeVector, IntervalYearMonthVector,
TimestampSecondVector,
DateVector, IntervalDayTimeVector, IntervalYearMonthVector, TimestampSecondVector,
};
use super::{DateAddFunction, *};
@@ -168,16 +117,15 @@ mod tests {
ConcreteDataType::date_datatype(),
f.return_type(&[ConcreteDataType::date_datatype()]).unwrap()
);
assert_eq!(
ConcreteDataType::datetime_datatype(),
f.return_type(&[ConcreteDataType::datetime_datatype()])
.unwrap()
);
assert!(matches!(f.signature(),
assert!(
matches!(f.signature(),
Signature {
type_signature: TypeSignature::OneOf(sigs),
volatility: Volatility::Immutable
} if sigs.len() == 18));
} if sigs.len() == 15),
"{:?}",
f.signature()
);
}
#[test]
@@ -243,36 +191,4 @@ mod tests {
}
}
}
#[test]
fn test_datetime_date_add() {
let f = DateAddFunction;
let dates = vec![Some(123), None, Some(42), None];
// Intervals in months
let intervals = vec![1, 2, 3, 1];
let results = [Some(2678400123), None, Some(7776000042), None];
let date_vector = DateTimeVector::from(dates.clone());
let interval_vector = IntervalYearMonthVector::from_vec(intervals);
let args: Vec<VectorRef> = vec![Arc::new(date_vector), Arc::new(interval_vector)];
let vector = f.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(4, vector.len());
for (i, _t) in dates.iter().enumerate() {
let v = vector.get(i);
let result = results.get(i).unwrap();
if result.is_none() {
assert_eq!(Value::Null, v);
continue;
}
match v {
Value::DateTime(date) => {
assert_eq!(date.val(), result.unwrap());
}
_ => unreachable!(),
}
}
}
}

View File

@@ -14,18 +14,19 @@
use std::fmt;
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
use common_query::error::{ArrowComputeSnafu, IntoVectorSnafu, InvalidFuncArgsSnafu, Result};
use common_query::prelude::Signature;
use datatypes::data_type::DataType;
use datatypes::arrow::compute::kernels::numeric;
use datatypes::prelude::ConcreteDataType;
use datatypes::value::ValueRef;
use datatypes::vectors::VectorRef;
use snafu::ensure;
use datatypes::vectors::{Helper, VectorRef};
use snafu::{ensure, ResultExt};
use crate::function::{Function, FunctionContext};
use crate::helper;
/// A function subtracts an interval value to Timestamp, Date or DateTime, and return the result.
/// A function subtracts an interval value to Timestamp, Date, and return the result.
/// The implementation of datetime type is based on Date64 which is incorrect so this function
/// doesn't support the datetime type.
#[derive(Clone, Debug, Default)]
pub struct DateSubFunction;
@@ -44,7 +45,6 @@ impl Function for DateSubFunction {
helper::one_of_sigs2(
vec![
ConcreteDataType::date_datatype(),
ConcreteDataType::datetime_datatype(),
ConcreteDataType::timestamp_second_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(),
ConcreteDataType::timestamp_microsecond_datatype(),
@@ -69,65 +69,14 @@ impl Function for DateSubFunction {
}
);
let left = &columns[0];
let right = &columns[1];
let left = columns[0].to_arrow_array();
let right = columns[1].to_arrow_array();
let size = left.len();
let left_datatype = columns[0].data_type();
match left_datatype {
ConcreteDataType::Timestamp(_) => {
let mut result = left_datatype.create_mutable_vector(size);
for i in 0..size {
let ts = left.get(i).as_timestamp();
let interval = right.get(i).as_interval();
let new_ts = match (ts, interval) {
(Some(ts), Some(interval)) => ts.sub_interval(interval),
_ => ts,
};
result.push_value_ref(ValueRef::from(new_ts));
}
Ok(result.to_vector())
}
ConcreteDataType::Date(_) => {
let mut result = left_datatype.create_mutable_vector(size);
for i in 0..size {
let date = left.get(i).as_date();
let interval = right.get(i).as_interval();
let new_date = match (date, interval) {
(Some(date), Some(interval)) => date.sub_interval(interval),
_ => date,
};
result.push_value_ref(ValueRef::from(new_date));
}
Ok(result.to_vector())
}
ConcreteDataType::DateTime(_) => {
let mut result = left_datatype.create_mutable_vector(size);
for i in 0..size {
let datetime = left.get(i).as_datetime();
let interval = right.get(i).as_interval();
let new_datetime = match (datetime, interval) {
(Some(datetime), Some(interval)) => datetime.sub_interval(interval),
_ => datetime,
};
result.push_value_ref(ValueRef::from(new_datetime));
}
Ok(result.to_vector())
}
_ => UnsupportedInputDataTypeSnafu {
function: NAME,
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
}
.fail(),
}
let result = numeric::sub(&left, &right).context(ArrowComputeSnafu)?;
let arrow_type = result.data_type().clone();
Helper::try_into_vector(result).context(IntoVectorSnafu {
data_type: arrow_type,
})
}
}
@@ -145,8 +94,7 @@ mod tests {
use datatypes::prelude::ConcreteDataType;
use datatypes::value::Value;
use datatypes::vectors::{
DateTimeVector, DateVector, IntervalDayTimeVector, IntervalYearMonthVector,
TimestampSecondVector,
DateVector, IntervalDayTimeVector, IntervalYearMonthVector, TimestampSecondVector,
};
use super::{DateSubFunction, *};
@@ -174,11 +122,15 @@ mod tests {
f.return_type(&[ConcreteDataType::datetime_datatype()])
.unwrap()
);
assert!(matches!(f.signature(),
assert!(
matches!(f.signature(),
Signature {
type_signature: TypeSignature::OneOf(sigs),
volatility: Volatility::Immutable
} if sigs.len() == 18));
} if sigs.len() == 15),
"{:?}",
f.signature()
);
}
#[test]
@@ -250,42 +202,4 @@ mod tests {
}
}
}
#[test]
fn test_datetime_date_sub() {
let f = DateSubFunction;
let millis_per_month = 3600 * 24 * 30 * 1000;
let dates = vec![
Some(123 * millis_per_month),
None,
Some(42 * millis_per_month),
None,
];
// Intervals in months
let intervals = vec![1, 2, 3, 1];
let results = [Some(316137600000), None, Some(100915200000), None];
let date_vector = DateTimeVector::from(dates.clone());
let interval_vector = IntervalYearMonthVector::from_vec(intervals);
let args: Vec<VectorRef> = vec![Arc::new(date_vector), Arc::new(interval_vector)];
let vector = f.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(4, vector.len());
for (i, _t) in dates.iter().enumerate() {
let v = vector.get(i);
let result = results.get(i).unwrap();
if result.is_none() {
assert_eq!(Value::Null, v);
continue;
}
match v {
Value::DateTime(date) => {
assert_eq!(date.val(), result.unwrap());
}
_ => unreachable!(),
}
}
}
}

View File

@@ -13,11 +13,11 @@
// limitations under the License.
use std::sync::Arc;
pub(crate) mod encoding;
mod geohash;
mod h3;
use geohash::GeohashFunction;
use h3::H3Function;
mod helpers;
mod s2;
use crate::function_registry::FunctionRegistry;
@@ -25,7 +25,40 @@ pub(crate) struct GeoFunctions;
impl GeoFunctions {
pub fn register(registry: &FunctionRegistry) {
registry.register(Arc::new(GeohashFunction));
registry.register(Arc::new(H3Function));
// geohash
registry.register(Arc::new(geohash::GeohashFunction));
registry.register(Arc::new(geohash::GeohashNeighboursFunction));
// h3 index
registry.register(Arc::new(h3::H3LatLngToCell));
registry.register(Arc::new(h3::H3LatLngToCellString));
// h3 index inspection
registry.register(Arc::new(h3::H3CellBase));
registry.register(Arc::new(h3::H3CellIsPentagon));
registry.register(Arc::new(h3::H3StringToCell));
registry.register(Arc::new(h3::H3CellToString));
registry.register(Arc::new(h3::H3CellCenterLatLng));
registry.register(Arc::new(h3::H3CellResolution));
// h3 hierarchical grid
registry.register(Arc::new(h3::H3CellCenterChild));
registry.register(Arc::new(h3::H3CellParent));
registry.register(Arc::new(h3::H3CellToChildren));
registry.register(Arc::new(h3::H3CellToChildrenSize));
registry.register(Arc::new(h3::H3CellToChildPos));
registry.register(Arc::new(h3::H3ChildPosToCell));
// h3 grid traversal
registry.register(Arc::new(h3::H3GridDisk));
registry.register(Arc::new(h3::H3GridDiskDistances));
registry.register(Arc::new(h3::H3GridDistance));
registry.register(Arc::new(h3::H3GridPathCells));
// s2
registry.register(Arc::new(s2::S2LatLngToCell));
registry.register(Arc::new(s2::S2CellLevel));
registry.register(Arc::new(s2::S2CellToToken));
registry.register(Arc::new(s2::S2CellParent));
}
}

View File

@@ -0,0 +1,223 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use common_error::ext::{BoxedError, PlainError};
use common_error::status_code::StatusCode;
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
use common_query::error::{self, InvalidInputStateSnafu, Result};
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::AccumulatorCreatorFunction;
use common_time::Timestamp;
use datatypes::prelude::ConcreteDataType;
use datatypes::value::{ListValue, Value};
use datatypes::vectors::VectorRef;
use snafu::{ensure, ResultExt};
use super::helpers::{ensure_columns_len, ensure_columns_n};
/// Accumulator of lat, lng, timestamp tuples
#[derive(Debug)]
pub struct JsonPathAccumulator {
timestamp_type: ConcreteDataType,
lat: Vec<Option<f64>>,
lng: Vec<Option<f64>>,
timestamp: Vec<Option<Timestamp>>,
}
impl JsonPathAccumulator {
fn new(timestamp_type: ConcreteDataType) -> Self {
Self {
lat: Vec::default(),
lng: Vec::default(),
timestamp: Vec::default(),
timestamp_type,
}
}
}
impl Accumulator for JsonPathAccumulator {
fn state(&self) -> Result<Vec<Value>> {
Ok(vec![
Value::List(ListValue::new(
self.lat.iter().map(|i| Value::from(*i)).collect(),
ConcreteDataType::float64_datatype(),
)),
Value::List(ListValue::new(
self.lng.iter().map(|i| Value::from(*i)).collect(),
ConcreteDataType::float64_datatype(),
)),
Value::List(ListValue::new(
self.timestamp.iter().map(|i| Value::from(*i)).collect(),
self.timestamp_type.clone(),
)),
])
}
fn update_batch(&mut self, columns: &[VectorRef]) -> Result<()> {
// update batch as in datafusion just provides the accumulator original
// input.
//
// columns is vec of [`lat`, `lng`, `timestamp`]
// where
// - `lat` is a vector of `Value::Float64` or similar type. Each item in
// the vector is a row in given dataset.
// - so on so forth for `lng` and `timestamp`
ensure_columns_n!(columns, 3);
let lat = &columns[0];
let lng = &columns[1];
let ts = &columns[2];
let size = lat.len();
for idx in 0..size {
self.lat.push(lat.get(idx).as_f64_lossy());
self.lng.push(lng.get(idx).as_f64_lossy());
self.timestamp.push(ts.get(idx).as_timestamp());
}
Ok(())
}
fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
// merge batch as in datafusion gives state accumulated from the data
// returned from child accumulators' state() call
// In our particular implementation, the data structure is like
//
// states is vec of [`lat`, `lng`, `timestamp`]
// where
// - `lat` is a vector of `Value::List`. Each item in the list is all
// coordinates from a child accumulator.
// - so on so forth for `lng` and `timestamp`
ensure_columns_n!(states, 3);
let lat_lists = &states[0];
let lng_lists = &states[1];
let ts_lists = &states[2];
let len = lat_lists.len();
for idx in 0..len {
if let Some(lat_list) = lat_lists
.get(idx)
.as_list()
.map_err(BoxedError::new)
.context(error::ExecuteSnafu)?
{
for v in lat_list.items() {
self.lat.push(v.as_f64_lossy());
}
}
if let Some(lng_list) = lng_lists
.get(idx)
.as_list()
.map_err(BoxedError::new)
.context(error::ExecuteSnafu)?
{
for v in lng_list.items() {
self.lng.push(v.as_f64_lossy());
}
}
if let Some(ts_list) = ts_lists
.get(idx)
.as_list()
.map_err(BoxedError::new)
.context(error::ExecuteSnafu)?
{
for v in ts_list.items() {
self.timestamp.push(v.as_timestamp());
}
}
}
Ok(())
}
fn evaluate(&self) -> Result<Value> {
let mut work_vec: Vec<(&Option<f64>, &Option<f64>, &Option<Timestamp>)> = self
.lat
.iter()
.zip(self.lng.iter())
.zip(self.timestamp.iter())
.map(|((a, b), c)| (a, b, c))
.collect();
// sort by timestamp, we treat null timestamp as 0
work_vec.sort_unstable_by_key(|tuple| tuple.2.unwrap_or_else(|| Timestamp::new_second(0)));
let result = serde_json::to_string(
&work_vec
.into_iter()
// note that we transform to lng,lat for geojson compatibility
.map(|(lat, lng, _)| vec![lng, lat])
.collect::<Vec<Vec<&Option<f64>>>>(),
)
.map_err(|e| {
BoxedError::new(PlainError::new(
format!("Serialization failure: {}", e),
StatusCode::EngineExecuteQuery,
))
})
.context(error::ExecuteSnafu)?;
Ok(Value::String(result.into()))
}
}
/// This function accept rows of lat, lng and timestamp, sort with timestamp and
/// encoding them into a geojson-like path.
///
/// Example:
///
/// ```sql
/// SELECT json_encode_path(lat, lon, timestamp) FROM table [group by ...];
/// ```
///
#[as_aggr_func_creator]
#[derive(Debug, Default, AggrFuncTypeStore)]
pub struct JsonPathEncodeFunctionCreator {}
impl AggregateFunctionCreator for JsonPathEncodeFunctionCreator {
fn creator(&self) -> AccumulatorCreatorFunction {
let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
let ts_type = types[2].clone();
Ok(Box::new(JsonPathAccumulator::new(ts_type)))
});
creator
}
fn output_type(&self) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::string_datatype())
}
fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
let input_types = self.input_types()?;
ensure!(input_types.len() == 3, InvalidInputStateSnafu);
let timestamp_type = input_types[2].clone();
Ok(vec![
ConcreteDataType::list_datatype(ConcreteDataType::float64_datatype()),
ConcreteDataType::list_datatype(ConcreteDataType::float64_datatype()),
ConcreteDataType::list_datatype(timestamp_type),
])
}
}

View File

@@ -20,23 +20,69 @@ use common_query::error::{self, InvalidFuncArgsSnafu, Result};
use common_query::prelude::{Signature, TypeSignature};
use datafusion::logical_expr::Volatility;
use datatypes::prelude::ConcreteDataType;
use datatypes::scalars::ScalarVectorBuilder;
use datatypes::value::Value;
use datatypes::vectors::{MutableVector, StringVectorBuilder, VectorRef};
use datatypes::scalars::{Scalar, ScalarVectorBuilder};
use datatypes::value::{ListValue, Value};
use datatypes::vectors::{ListVectorBuilder, MutableVector, StringVectorBuilder, VectorRef};
use geohash::Coord;
use snafu::{ensure, ResultExt};
use crate::function::{Function, FunctionContext};
macro_rules! ensure_resolution_usize {
($v: ident) => {
if !($v > 0 && $v <= 12) {
Err(BoxedError::new(PlainError::new(
format!("Invalid geohash resolution {}, expect value: [1, 12]", $v),
StatusCode::EngineExecuteQuery,
)))
.context(error::ExecuteSnafu)
} else {
Ok($v as usize)
}
};
}
fn try_into_resolution(v: Value) -> Result<usize> {
match v {
Value::Int8(v) => {
ensure_resolution_usize!(v)
}
Value::Int16(v) => {
ensure_resolution_usize!(v)
}
Value::Int32(v) => {
ensure_resolution_usize!(v)
}
Value::Int64(v) => {
ensure_resolution_usize!(v)
}
Value::UInt8(v) => {
ensure_resolution_usize!(v)
}
Value::UInt16(v) => {
ensure_resolution_usize!(v)
}
Value::UInt32(v) => {
ensure_resolution_usize!(v)
}
Value::UInt64(v) => {
ensure_resolution_usize!(v)
}
_ => unreachable!(),
}
}
/// Function that return geohash string for a given geospatial coordinate.
#[derive(Clone, Debug, Default)]
pub struct GeohashFunction;
const NAME: &str = "geohash";
impl GeohashFunction {
const NAME: &'static str = "geohash";
}
impl Function for GeohashFunction {
fn name(&self) -> &str {
NAME
Self::NAME
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
@@ -93,17 +139,7 @@ impl Function for GeohashFunction {
for i in 0..size {
let lat = lat_vec.get(i).as_f64_lossy();
let lon = lon_vec.get(i).as_f64_lossy();
let r = match resolution_vec.get(i) {
Value::Int8(v) => v as usize,
Value::Int16(v) => v as usize,
Value::Int32(v) => v as usize,
Value::Int64(v) => v as usize,
Value::UInt8(v) => v as usize,
Value::UInt16(v) => v as usize,
Value::UInt32(v) => v as usize,
Value::UInt64(v) => v as usize,
_ => unreachable!(),
};
let r = try_into_resolution(resolution_vec.get(i))?;
let result = match (lat, lon) {
(Some(lat), Some(lon)) => {
@@ -130,6 +166,134 @@ impl Function for GeohashFunction {
impl fmt::Display for GeohashFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", NAME)
write!(f, "{}", Self::NAME)
}
}
/// Function that return geohash string for a given geospatial coordinate.
#[derive(Clone, Debug, Default)]
pub struct GeohashNeighboursFunction;
impl GeohashNeighboursFunction {
const NAME: &'static str = "geohash_neighbours";
}
impl Function for GeohashNeighboursFunction {
fn name(&self) -> &str {
GeohashNeighboursFunction::NAME
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::list_datatype(
ConcreteDataType::string_datatype(),
))
}
fn signature(&self) -> Signature {
let mut signatures = Vec::new();
for coord_type in &[
ConcreteDataType::float32_datatype(),
ConcreteDataType::float64_datatype(),
] {
for resolution_type in &[
ConcreteDataType::int8_datatype(),
ConcreteDataType::int16_datatype(),
ConcreteDataType::int32_datatype(),
ConcreteDataType::int64_datatype(),
ConcreteDataType::uint8_datatype(),
ConcreteDataType::uint16_datatype(),
ConcreteDataType::uint32_datatype(),
ConcreteDataType::uint64_datatype(),
] {
signatures.push(TypeSignature::Exact(vec![
// latitude
coord_type.clone(),
// longitude
coord_type.clone(),
// resolution
resolution_type.clone(),
]));
}
}
Signature::one_of(signatures, Volatility::Stable)
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure!(
columns.len() == 3,
InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not correct, expect 3, provided : {}",
columns.len()
),
}
);
let lat_vec = &columns[0];
let lon_vec = &columns[1];
let resolution_vec = &columns[2];
let size = lat_vec.len();
let mut results =
ListVectorBuilder::with_type_capacity(ConcreteDataType::string_datatype(), size);
for i in 0..size {
let lat = lat_vec.get(i).as_f64_lossy();
let lon = lon_vec.get(i).as_f64_lossy();
let r = try_into_resolution(resolution_vec.get(i))?;
let result = match (lat, lon) {
(Some(lat), Some(lon)) => {
let coord = Coord { x: lon, y: lat };
let encoded = geohash::encode(coord, r)
.map_err(|e| {
BoxedError::new(PlainError::new(
format!("Geohash error: {}", e),
StatusCode::EngineExecuteQuery,
))
})
.context(error::ExecuteSnafu)?;
let neighbours = geohash::neighbors(&encoded)
.map_err(|e| {
BoxedError::new(PlainError::new(
format!("Geohash error: {}", e),
StatusCode::EngineExecuteQuery,
))
})
.context(error::ExecuteSnafu)?;
Some(ListValue::new(
vec![
neighbours.n,
neighbours.nw,
neighbours.w,
neighbours.sw,
neighbours.s,
neighbours.se,
neighbours.e,
neighbours.ne,
]
.into_iter()
.map(Value::from)
.collect(),
ConcreteDataType::string_datatype(),
))
}
_ => None,
};
if let Some(list_value) = result {
results.push(Some(list_value.as_scalar_ref()));
} else {
results.push(None);
}
}
Ok(results.to_vector())
}
}
impl fmt::Display for GeohashNeighboursFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", GeohashNeighboursFunction::NAME)
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,75 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
macro_rules! ensure_columns_len {
($columns:ident) => {
snafu::ensure!(
$columns.windows(2).all(|c| c[0].len() == c[1].len()),
common_query::error::InvalidFuncArgsSnafu {
err_msg: "The length of input columns are in different size"
}
)
};
($column_a:ident, $column_b:ident, $($column_n:ident),*) => {
snafu::ensure!(
{
let mut result = $column_a.len() == $column_b.len();
$(
result = result && ($column_a.len() == $column_n.len());
)*
result
}
common_query::error::InvalidFuncArgsSnafu {
err_msg: "The length of input columns are in different size"
}
)
};
}
pub(super) use ensure_columns_len;
macro_rules! ensure_columns_n {
($columns:ident, $n:literal) => {
snafu::ensure!(
$columns.len() == $n,
common_query::error::InvalidFuncArgsSnafu {
err_msg: format!(
"The length of arguments is not correct, expect {}, provided : {}",
stringify!($n),
$columns.len()
),
}
);
if $n > 1 {
ensure_columns_len!($columns);
}
};
}
pub(super) use ensure_columns_n;
macro_rules! ensure_and_coerce {
($compare:expr, $coerce:expr) => {{
snafu::ensure!(
$compare,
common_query::error::InvalidFuncArgsSnafu {
err_msg: "Argument was outside of acceptable range "
}
);
Ok($coerce)
}};
}
pub(super) use ensure_and_coerce;

View File

@@ -0,0 +1,275 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use common_query::error::{InvalidFuncArgsSnafu, Result};
use common_query::prelude::{Signature, TypeSignature};
use datafusion::logical_expr::Volatility;
use datatypes::prelude::ConcreteDataType;
use datatypes::scalars::ScalarVectorBuilder;
use datatypes::value::Value;
use datatypes::vectors::{MutableVector, StringVectorBuilder, UInt64VectorBuilder, VectorRef};
use derive_more::Display;
use once_cell::sync::Lazy;
use s2::cellid::{CellID, MAX_LEVEL};
use s2::latlng::LatLng;
use snafu::ensure;
use crate::function::{Function, FunctionContext};
use crate::scalars::geo::helpers::{ensure_and_coerce, ensure_columns_len, ensure_columns_n};
static CELL_TYPES: Lazy<Vec<ConcreteDataType>> = Lazy::new(|| {
vec![
ConcreteDataType::int64_datatype(),
ConcreteDataType::uint64_datatype(),
]
});
static COORDINATE_TYPES: Lazy<Vec<ConcreteDataType>> = Lazy::new(|| {
vec![
ConcreteDataType::float32_datatype(),
ConcreteDataType::float64_datatype(),
]
});
static LEVEL_TYPES: Lazy<Vec<ConcreteDataType>> = Lazy::new(|| {
vec![
ConcreteDataType::int8_datatype(),
ConcreteDataType::int16_datatype(),
ConcreteDataType::int32_datatype(),
ConcreteDataType::int64_datatype(),
ConcreteDataType::uint8_datatype(),
ConcreteDataType::uint16_datatype(),
ConcreteDataType::uint32_datatype(),
ConcreteDataType::uint64_datatype(),
]
});
/// Function that returns [s2] encoding cellid for a given geospatial coordinate.
///
/// [s2]: http://s2geometry.io
#[derive(Clone, Debug, Default, Display)]
#[display("{}", self.name())]
pub struct S2LatLngToCell;
impl Function for S2LatLngToCell {
fn name(&self) -> &str {
"s2_latlng_to_cell"
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::uint64_datatype())
}
fn signature(&self) -> Signature {
let mut signatures = Vec::with_capacity(COORDINATE_TYPES.len());
for coord_type in COORDINATE_TYPES.as_slice() {
signatures.push(TypeSignature::Exact(vec![
// latitude
coord_type.clone(),
// longitude
coord_type.clone(),
]));
}
Signature::one_of(signatures, Volatility::Stable)
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure_columns_n!(columns, 2);
let lat_vec = &columns[0];
let lon_vec = &columns[1];
let size = lat_vec.len();
let mut results = UInt64VectorBuilder::with_capacity(size);
for i in 0..size {
let lat = lat_vec.get(i).as_f64_lossy();
let lon = lon_vec.get(i).as_f64_lossy();
let result = match (lat, lon) {
(Some(lat), Some(lon)) => {
let coord = LatLng::from_degrees(lat, lon);
ensure!(
coord.is_valid(),
InvalidFuncArgsSnafu {
err_msg: "The input coordinates are invalid",
}
);
let cellid = CellID::from(coord);
let encoded: u64 = cellid.0;
Some(encoded)
}
_ => None,
};
results.push(result);
}
Ok(results.to_vector())
}
}
/// Return the level of current s2 cell
#[derive(Clone, Debug, Default, Display)]
#[display("{}", self.name())]
pub struct S2CellLevel;
impl Function for S2CellLevel {
fn name(&self) -> &str {
"s2_cell_level"
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::uint64_datatype())
}
fn signature(&self) -> Signature {
signature_of_cell()
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure_columns_n!(columns, 1);
let cell_vec = &columns[0];
let size = cell_vec.len();
let mut results = UInt64VectorBuilder::with_capacity(size);
for i in 0..size {
let cell = cell_from_value(cell_vec.get(i));
let res = cell.map(|cell| cell.level());
results.push(res);
}
Ok(results.to_vector())
}
}
/// Return the string presentation of the cell
#[derive(Clone, Debug, Default, Display)]
#[display("{}", self.name())]
pub struct S2CellToToken;
impl Function for S2CellToToken {
fn name(&self) -> &str {
"s2_cell_to_token"
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::string_datatype())
}
fn signature(&self) -> Signature {
signature_of_cell()
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure_columns_n!(columns, 1);
let cell_vec = &columns[0];
let size = cell_vec.len();
let mut results = StringVectorBuilder::with_capacity(size);
for i in 0..size {
let cell = cell_from_value(cell_vec.get(i));
let res = cell.map(|cell| cell.to_token());
results.push(res.as_deref());
}
Ok(results.to_vector())
}
}
/// Return parent at given level of current s2 cell
#[derive(Clone, Debug, Default, Display)]
#[display("{}", self.name())]
pub struct S2CellParent;
impl Function for S2CellParent {
fn name(&self) -> &str {
"s2_cell_parent"
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::uint64_datatype())
}
fn signature(&self) -> Signature {
signature_of_cell_and_level()
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure_columns_n!(columns, 2);
let cell_vec = &columns[0];
let level_vec = &columns[1];
let size = cell_vec.len();
let mut results = UInt64VectorBuilder::with_capacity(size);
for i in 0..size {
let cell = cell_from_value(cell_vec.get(i));
let level = value_to_level(level_vec.get(i))?;
let result = cell.map(|cell| cell.parent(level).0);
results.push(result);
}
Ok(results.to_vector())
}
}
fn signature_of_cell() -> Signature {
let mut signatures = Vec::with_capacity(CELL_TYPES.len());
for cell_type in CELL_TYPES.as_slice() {
signatures.push(TypeSignature::Exact(vec![cell_type.clone()]));
}
Signature::one_of(signatures, Volatility::Stable)
}
fn signature_of_cell_and_level() -> Signature {
let mut signatures = Vec::with_capacity(CELL_TYPES.len() * LEVEL_TYPES.len());
for cell_type in CELL_TYPES.as_slice() {
for level_type in LEVEL_TYPES.as_slice() {
signatures.push(TypeSignature::Exact(vec![
cell_type.clone(),
level_type.clone(),
]));
}
}
Signature::one_of(signatures, Volatility::Stable)
}
fn cell_from_value(v: Value) -> Option<CellID> {
match v {
Value::Int64(v) => Some(CellID(v as u64)),
Value::UInt64(v) => Some(CellID(v)),
_ => None,
}
}
fn value_to_level(v: Value) -> Result<u64> {
match v {
Value::Int8(v) => ensure_and_coerce!(v >= 0 && v <= MAX_LEVEL as i8, v as u64),
Value::Int16(v) => ensure_and_coerce!(v >= 0 && v <= MAX_LEVEL as i16, v as u64),
Value::Int32(v) => ensure_and_coerce!(v >= 0 && v <= MAX_LEVEL as i32, v as u64),
Value::Int64(v) => ensure_and_coerce!(v >= 0 && v <= MAX_LEVEL as i64, v as u64),
Value::UInt8(v) => ensure_and_coerce!(v <= MAX_LEVEL as u8, v as u64),
Value::UInt16(v) => ensure_and_coerce!(v <= MAX_LEVEL as u16, v as u64),
Value::UInt32(v) => ensure_and_coerce!(v <= MAX_LEVEL as u32, v as u64),
Value::UInt64(v) => ensure_and_coerce!(v <= MAX_LEVEL, v),
_ => unreachable!(),
}
}

View File

@@ -15,6 +15,8 @@
use std::sync::Arc;
mod json_get;
mod json_is;
mod json_path_exists;
mod json_path_match;
mod json_to_string;
mod parse_json;
@@ -46,5 +48,8 @@ impl JsonFunction {
registry.register(Arc::new(JsonIsBool));
registry.register(Arc::new(JsonIsArray));
registry.register(Arc::new(JsonIsObject));
registry.register(Arc::new(json_path_exists::JsonPathExistsFunction));
registry.register(Arc::new(json_path_match::JsonPathMatchFunction));
}
}

View File

@@ -0,0 +1,172 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt::{self, Display};
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
use common_query::prelude::Signature;
use datafusion::logical_expr::Volatility;
use datatypes::data_type::ConcreteDataType;
use datatypes::prelude::VectorRef;
use datatypes::scalars::ScalarVectorBuilder;
use datatypes::vectors::{BooleanVectorBuilder, MutableVector};
use snafu::ensure;
use crate::function::{Function, FunctionContext};
/// Check if the given JSON data contains the given JSON path.
#[derive(Clone, Debug, Default)]
pub struct JsonPathExistsFunction;
const NAME: &str = "json_path_exists";
impl Function for JsonPathExistsFunction {
fn name(&self) -> &str {
NAME
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::boolean_datatype())
}
fn signature(&self) -> Signature {
Signature::exact(
vec![
ConcreteDataType::json_datatype(),
ConcreteDataType::string_datatype(),
],
Volatility::Immutable,
)
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure!(
columns.len() == 2,
InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not correct, expect exactly two, have: {}",
columns.len()
),
}
);
let jsons = &columns[0];
let paths = &columns[1];
let size = jsons.len();
let datatype = jsons.data_type();
let mut results = BooleanVectorBuilder::with_capacity(size);
match datatype {
// JSON data type uses binary vector
ConcreteDataType::Binary(_) => {
for i in 0..size {
let json = jsons.get_ref(i);
let path = paths.get_ref(i);
let json = json.as_binary();
let path = path.as_string();
let result = match (json, path) {
(Ok(Some(json)), Ok(Some(path))) => {
let json_path = jsonb::jsonpath::parse_json_path(path.as_bytes());
match json_path {
Ok(json_path) => jsonb::path_exists(json, json_path).ok(),
Err(_) => None,
}
}
_ => None,
};
results.push(result);
}
}
_ => {
return UnsupportedInputDataTypeSnafu {
function: NAME,
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
}
.fail();
}
}
Ok(results.to_vector())
}
}
impl Display for JsonPathExistsFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "JSON_PATH_EXISTS")
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use common_query::prelude::TypeSignature;
use datatypes::scalars::ScalarVector;
use datatypes::vectors::{BinaryVector, StringVector};
use super::*;
#[test]
fn test_json_path_exists_function() {
let json_path_exists = JsonPathExistsFunction;
assert_eq!("json_path_exists", json_path_exists.name());
assert_eq!(
ConcreteDataType::boolean_datatype(),
json_path_exists
.return_type(&[ConcreteDataType::json_datatype()])
.unwrap()
);
assert!(matches!(json_path_exists.signature(),
Signature {
type_signature: TypeSignature::Exact(valid_types),
volatility: Volatility::Immutable
} if valid_types == vec![ConcreteDataType::json_datatype(), ConcreteDataType::string_datatype()]
));
let json_strings = [
r#"{"a": {"b": 2}, "b": 2, "c": 3}"#,
r#"{"a": 4, "b": {"c": 6}, "c": 6}"#,
r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
];
let paths = vec!["$.a.b.c", "$.b", "$.c.a", ".d"];
let results = [false, true, true, false];
let jsonbs = json_strings
.iter()
.map(|s| {
let value = jsonb::parse_value(s.as_bytes()).unwrap();
value.to_vec()
})
.collect::<Vec<_>>();
let json_vector = BinaryVector::from_vec(jsonbs);
let path_vector = StringVector::from_vec(paths);
let args: Vec<VectorRef> = vec![Arc::new(json_vector), Arc::new(path_vector)];
let vector = json_path_exists
.eval(FunctionContext::default(), &args)
.unwrap();
assert_eq!(4, vector.len());
for (i, gt) in results.iter().enumerate() {
let result = vector.get_ref(i);
let result = result.as_boolean().unwrap().unwrap();
assert_eq!(*gt, result);
}
}
}

View File

@@ -0,0 +1,202 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt::{self, Display};
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
use common_query::prelude::Signature;
use datafusion::logical_expr::Volatility;
use datatypes::data_type::ConcreteDataType;
use datatypes::prelude::VectorRef;
use datatypes::scalars::ScalarVectorBuilder;
use datatypes::vectors::{BooleanVectorBuilder, MutableVector};
use snafu::ensure;
use crate::function::{Function, FunctionContext};
/// Check if the given JSON data match the given JSON path's predicate.
#[derive(Clone, Debug, Default)]
pub struct JsonPathMatchFunction;
const NAME: &str = "json_path_match";
impl Function for JsonPathMatchFunction {
fn name(&self) -> &str {
NAME
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::boolean_datatype())
}
fn signature(&self) -> Signature {
Signature::exact(
vec![
ConcreteDataType::json_datatype(),
ConcreteDataType::string_datatype(),
],
Volatility::Immutable,
)
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure!(
columns.len() == 2,
InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not correct, expect exactly two, have: {}",
columns.len()
),
}
);
let jsons = &columns[0];
let paths = &columns[1];
let size = jsons.len();
let mut results = BooleanVectorBuilder::with_capacity(size);
for i in 0..size {
let json = jsons.get_ref(i);
let path = paths.get_ref(i);
match json.data_type() {
// JSON data type uses binary vector
ConcreteDataType::Binary(_) => {
let json = json.as_binary();
let path = path.as_string();
let result = match (json, path) {
(Ok(Some(json)), Ok(Some(path))) => {
if !jsonb::is_null(json) {
let json_path = jsonb::jsonpath::parse_json_path(path.as_bytes());
match json_path {
Ok(json_path) => jsonb::path_match(json, json_path).ok(),
Err(_) => None,
}
} else {
None
}
}
_ => None,
};
results.push(result);
}
_ => {
return UnsupportedInputDataTypeSnafu {
function: NAME,
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
}
.fail();
}
}
}
Ok(results.to_vector())
}
}
impl Display for JsonPathMatchFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "JSON_PATH_MATCH")
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use common_query::prelude::TypeSignature;
use datatypes::vectors::{BinaryVector, StringVector};
use super::*;
#[test]
fn test_json_path_match_function() {
let json_path_match = JsonPathMatchFunction;
assert_eq!("json_path_match", json_path_match.name());
assert_eq!(
ConcreteDataType::boolean_datatype(),
json_path_match
.return_type(&[ConcreteDataType::json_datatype()])
.unwrap()
);
assert!(matches!(json_path_match.signature(),
Signature {
type_signature: TypeSignature::Exact(valid_types),
volatility: Volatility::Immutable
} if valid_types == vec![ConcreteDataType::json_datatype(), ConcreteDataType::string_datatype()],
));
let json_strings = [
Some(r#"{"a": {"b": 2}, "b": 2, "c": 3}"#.to_string()),
Some(r#"{"a": 1, "b": [1,2,3]}"#.to_string()),
Some(r#"{"a": 1 ,"b": [1,2,3]}"#.to_string()),
Some(r#"[1,2,3]"#.to_string()),
Some(r#"{"a":1,"b":[1,2,3]}"#.to_string()),
Some(r#"null"#.to_string()),
Some(r#"null"#.to_string()),
];
let paths = vec![
Some("$.a.b == 2".to_string()),
Some("$.b[1 to last] >= 2".to_string()),
Some("$.c > 0".to_string()),
Some("$[0 to last] > 0".to_string()),
Some(r#"null"#.to_string()),
Some("$.c > 0".to_string()),
Some(r#"null"#.to_string()),
];
let results = [
Some(true),
Some(true),
Some(false),
Some(true),
None,
None,
None,
];
let jsonbs = json_strings
.into_iter()
.map(|s| s.map(|json| jsonb::parse_value(json.as_bytes()).unwrap().to_vec()))
.collect::<Vec<_>>();
let json_vector = BinaryVector::from(jsonbs);
let path_vector = StringVector::from(paths);
let args: Vec<VectorRef> = vec![Arc::new(json_vector), Arc::new(path_vector)];
let vector = json_path_match
.eval(FunctionContext::default(), &args)
.unwrap();
assert_eq!(7, vector.len());
for (i, expected) in results.iter().enumerate() {
let result = vector.get_ref(i);
match expected {
Some(expected_value) => {
assert!(!result.is_null());
let result_value = result.as_boolean().unwrap().unwrap();
assert_eq!(*expected_value, result_value);
}
None => {
assert!(result.is_null());
}
}
}
}
}

View File

@@ -25,13 +25,13 @@ use session::context::QueryContextRef;
use crate::handlers::ProcedureServiceHandlerRef;
use crate::helper::cast_u64;
const DEFAULT_REPLAY_TIMEOUT_SECS: u64 = 10;
const DEFAULT_TIMEOUT_SECS: u64 = 30;
/// A function to migrate a region from source peer to target peer.
/// Returns the submitted procedure id if success. Only available in cluster mode.
///
/// - `migrate_region(region_id, from_peer, to_peer)`, with default replay WAL timeout(10 seconds).
/// - `migrate_region(region_id, from_peer, to_peer, timeout(secs))`
/// - `migrate_region(region_id, from_peer, to_peer)`, with timeout(30 seconds).
/// - `migrate_region(region_id, from_peer, to_peer, timeout(secs))`.
///
/// The parameters:
/// - `region_id`: the region id
@@ -48,18 +48,13 @@ pub(crate) async fn migrate_region(
_ctx: &QueryContextRef,
params: &[ValueRef<'_>],
) -> Result<Value> {
let (region_id, from_peer, to_peer, replay_timeout) = match params.len() {
let (region_id, from_peer, to_peer, timeout) = match params.len() {
3 => {
let region_id = cast_u64(&params[0])?;
let from_peer = cast_u64(&params[1])?;
let to_peer = cast_u64(&params[2])?;
(
region_id,
from_peer,
to_peer,
Some(DEFAULT_REPLAY_TIMEOUT_SECS),
)
(region_id, from_peer, to_peer, Some(DEFAULT_TIMEOUT_SECS))
}
4 => {
@@ -82,14 +77,14 @@ pub(crate) async fn migrate_region(
}
};
match (region_id, from_peer, to_peer, replay_timeout) {
(Some(region_id), Some(from_peer), Some(to_peer), Some(replay_timeout)) => {
match (region_id, from_peer, to_peer, timeout) {
(Some(region_id), Some(from_peer), Some(to_peer), Some(timeout)) => {
let pid = procedure_service_handler
.migrate_region(MigrateRegionRequest {
region_id,
from_peer,
to_peer,
replay_timeout: Duration::from_secs(replay_timeout),
timeout: Duration::from_secs(timeout),
})
.await?;

View File

@@ -199,6 +199,7 @@ pub fn default_get_uuid(working_home: &Option<String>) -> Option<String> {
}
/// Report version info to GreptimeDB.
///
/// We do not collect any identity-sensitive information.
/// This task is scheduled to run every 30 minutes.
/// The task will be disabled default. It can be enabled by setting the build feature `greptimedb-telemetry`
@@ -324,7 +325,7 @@ mod tests {
});
let addr = ([127, 0, 0, 1], port).into();
let server = Server::bind(&addr).serve(make_svc);
let server = Server::try_bind(&addr).unwrap().serve(make_svc);
let graceful = server.with_graceful_shutdown(async {
rx.await.ok();
});

View File

@@ -18,6 +18,7 @@ common-time.workspace = true
datatypes.workspace = true
prost.workspace = true
snafu.workspace = true
store-api.workspace = true
table.workspace = true
[dev-dependencies]

View File

@@ -22,12 +22,13 @@ use api::v1::{
use common_query::AddColumnLocation;
use datatypes::schema::{ColumnSchema, RawSchema};
use snafu::{ensure, OptionExt, ResultExt};
use store_api::region_request::ChangeOption;
use table::metadata::TableId;
use table::requests::{AddColumnRequest, AlterKind, AlterTableRequest, ChangeColumnTypeRequest};
use crate::error::{
InvalidColumnDefSnafu, MissingFieldSnafu, MissingTimestampColumnSnafu, Result,
UnknownLocationTypeSnafu,
InvalidChangeTableOptionRequestSnafu, InvalidColumnDefSnafu, MissingFieldSnafu,
MissingTimestampColumnSnafu, Result, UnknownLocationTypeSnafu,
};
const LOCATION_TYPE_FIRST: i32 = LocationType::First as i32;
@@ -92,6 +93,15 @@ pub fn alter_expr_to_request(table_id: TableId, expr: AlterExpr) -> Result<Alter
Kind::RenameTable(RenameTable { new_table_name }) => {
AlterKind::RenameTable { new_table_name }
}
Kind::ChangeTableOptions(api::v1::ChangeTableOptions {
change_table_options,
}) => AlterKind::ChangeTableOptions {
options: change_table_options
.iter()
.map(ChangeOption::try_from)
.collect::<std::result::Result<Vec<_>, _>>()
.context(InvalidChangeTableOptionRequestSnafu)?,
},
};
let request = AlterTableRequest {

View File

@@ -19,6 +19,7 @@ use common_error::ext::ErrorExt;
use common_error::status_code::StatusCode;
use common_macro::stack_trace_debug;
use snafu::{Location, Snafu};
use store_api::metadata::MetadataError;
#[derive(Snafu)]
#[snafu(visibility(pub))]
@@ -118,6 +119,12 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Invalid change table option request"))]
InvalidChangeTableOptionRequest {
#[snafu(source)]
error: MetadataError,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -141,6 +148,7 @@ impl ErrorExt for Error {
Error::UnknownColumnDataType { .. } | Error::InvalidFulltextColumnType { .. } => {
StatusCode::InvalidArguments
}
Error::InvalidChangeTableOptionRequest { .. } => StatusCode::InvalidArguments,
}
}

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use api::helper::{convert_i128_to_interval, convert_to_pb_decimal128};
use api::helper::{convert_month_day_nano_to_pb, convert_to_pb_decimal128};
use api::v1::column::Values;
use common_base::BitVec;
use datatypes::types::{IntervalType, TimeType, TimestampType, WrapperType};
@@ -211,7 +211,7 @@ pub fn values(arrays: &[VectorRef]) -> Result<Values> {
ConcreteDataType::Interval(IntervalType::MonthDayNano(_)),
IntervalMonthDayNanoVector,
interval_month_day_nano_values,
|x| { convert_i128_to_interval(x.into_native()) }
|x| { convert_month_day_nano_to_pb(x) }
),
(
ConcreteDataType::Decimal128(_),

View File

@@ -21,23 +21,19 @@ use syn::{parse_macro_input, DeriveInput, ItemStruct};
pub(crate) fn impl_aggr_func_type_store(ast: &DeriveInput) -> TokenStream {
let name = &ast.ident;
let gen = quote! {
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
use common_query::error::{InvalidInputStateSnafu, Error as QueryError};
use datatypes::prelude::ConcreteDataType;
impl AggrFuncTypeStore for #name {
fn input_types(&self) -> std::result::Result<Vec<ConcreteDataType>, QueryError> {
impl common_query::logical_plan::accumulator::AggrFuncTypeStore for #name {
fn input_types(&self) -> std::result::Result<Vec<datatypes::prelude::ConcreteDataType>, common_query::error::Error> {
let input_types = self.input_types.load();
snafu::ensure!(input_types.is_some(), InvalidInputStateSnafu);
snafu::ensure!(input_types.is_some(), common_query::error::InvalidInputStateSnafu);
Ok(input_types.as_ref().unwrap().as_ref().clone())
}
fn set_input_types(&self, input_types: Vec<ConcreteDataType>) -> std::result::Result<(), QueryError> {
fn set_input_types(&self, input_types: Vec<datatypes::prelude::ConcreteDataType>) -> std::result::Result<(), common_query::error::Error> {
let old = self.input_types.swap(Some(std::sync::Arc::new(input_types.clone())));
if let Some(old) = old {
snafu::ensure!(old.len() == input_types.len(), InvalidInputStateSnafu);
snafu::ensure!(old.len() == input_types.len(), common_query::error::InvalidInputStateSnafu);
for (x, y) in old.iter().zip(input_types.iter()) {
snafu::ensure!(x == y, InvalidInputStateSnafu);
snafu::ensure!(x == y, common_query::error::InvalidInputStateSnafu);
}
}
Ok(())
@@ -51,7 +47,7 @@ pub(crate) fn impl_as_aggr_func_creator(_args: TokenStream, input: TokenStream)
let mut item_struct = parse_macro_input!(input as ItemStruct);
if let syn::Fields::Named(ref mut fields) = item_struct.fields {
let result = syn::Field::parse_named.parse2(quote! {
input_types: arc_swap::ArcSwapOption<Vec<ConcreteDataType>>
input_types: arc_swap::ArcSwapOption<Vec<datatypes::prelude::ConcreteDataType>>
});
match result {
Ok(field) => fields.named.push(field),

View File

@@ -35,7 +35,9 @@ pub fn aggr_func_type_store_derive(input: TokenStream) -> TokenStream {
}
/// A struct can be used as a creator for aggregate function if it has been annotated with this
/// attribute first. This attribute add a necessary field which is intended to store the input
/// attribute first.
///
/// This attribute add a necessary field which is intended to store the input
/// data's types to the struct.
/// This attribute is expected to be used along with derive macro [AggrFuncTypeStore].
#[proc_macro_attribute]
@@ -44,9 +46,10 @@ pub fn as_aggr_func_creator(args: TokenStream, input: TokenStream) -> TokenStrea
}
/// Attribute macro to convert an arithimetic function to a range function. The annotated function
/// should accept servaral arrays as input and return a single value as output. This procedure
/// macro can works on any number of input parameters. Return type can be either primitive type
/// or wrapped in `Option`.
/// should accept servaral arrays as input and return a single value as output.
///
/// This procedure macro can works on any number of input parameters. Return type can be either
/// primitive type or wrapped in `Option`.
///
/// # Example
/// Take `count_over_time()` in PromQL as an example:

View File

@@ -24,5 +24,5 @@ struct Foo {}
fn test_derive() {
let _ = Foo::default();
assert_fields!(Foo: input_types);
assert_impl_all!(Foo: std::fmt::Debug, Default, AggrFuncTypeStore);
assert_impl_all!(Foo: std::fmt::Debug, Default, common_query::logical_plan::accumulator::AggrFuncTypeStore);
}

View File

@@ -60,7 +60,7 @@ table.workspace = true
tokio.workspace = true
tokio-postgres = { workspace = true, optional = true }
tonic.workspace = true
typetag = "0.2"
typetag.workspace = true
[dev-dependencies]
chrono.workspace = true

View File

@@ -20,6 +20,7 @@ use regex::Regex;
use serde::{Deserialize, Serialize};
use snafu::{ensure, OptionExt, ResultExt};
use crate::datanode::RegionStat;
use crate::error::{
DecodeJsonSnafu, EncodeJsonSnafu, Error, FromUtf8Snafu, InvalidNodeInfoKeySnafu,
InvalidRoleSnafu, ParseNumSnafu, Result,
@@ -47,10 +48,14 @@ pub trait ClusterInfo {
role: Option<Role>,
) -> std::result::Result<Vec<NodeInfo>, Self::Error>;
/// List all region stats in the cluster.
async fn list_region_stats(&self) -> std::result::Result<Vec<RegionStat>, Self::Error>;
// TODO(jeremy): Other info, like region status, etc.
}
/// The key of [NodeInfo] in the storage. The format is `__meta_cluster_node_info-{cluster_id}-{role}-{node_id}`.
///
/// This key cannot be used to describe the `Metasrv` because the `Metasrv` does not have
/// a `cluster_id`, it serves multiple clusters.
#[derive(Debug, Clone, Eq, Hash, PartialEq, Serialize, Deserialize)]

View File

@@ -0,0 +1,419 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashSet;
use std::str::FromStr;
use api::v1::meta::{HeartbeatRequest, RequestHeader};
use common_time::util as time_util;
use lazy_static::lazy_static;
use regex::Regex;
use serde::{Deserialize, Serialize};
use snafu::{ensure, OptionExt, ResultExt};
use store_api::region_engine::{RegionRole, RegionStatistic};
use store_api::storage::RegionId;
use table::metadata::TableId;
use crate::error::Result;
use crate::{error, ClusterId};
pub(crate) const DATANODE_LEASE_PREFIX: &str = "__meta_datanode_lease";
const INACTIVE_REGION_PREFIX: &str = "__meta_inactive_region";
const DATANODE_STAT_PREFIX: &str = "__meta_datanode_stat";
pub const REGION_STATISTIC_KEY: &str = "__region_statistic";
lazy_static! {
pub(crate) static ref DATANODE_LEASE_KEY_PATTERN: Regex =
Regex::new(&format!("^{DATANODE_LEASE_PREFIX}-([0-9]+)-([0-9]+)$")).unwrap();
static ref DATANODE_STAT_KEY_PATTERN: Regex =
Regex::new(&format!("^{DATANODE_STAT_PREFIX}-([0-9]+)-([0-9]+)$")).unwrap();
static ref INACTIVE_REGION_KEY_PATTERN: Regex = Regex::new(&format!(
"^{INACTIVE_REGION_PREFIX}-([0-9]+)-([0-9]+)-([0-9]+)$"
))
.unwrap();
}
/// The key of the datanode stat in the storage.
///
/// The format is `__meta_datanode_stat-{cluster_id}-{node_id}`.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct Stat {
pub timestamp_millis: i64,
pub cluster_id: ClusterId,
// The datanode Id.
pub id: u64,
// The datanode address.
pub addr: String,
/// The read capacity units during this period
pub rcus: i64,
/// The write capacity units during this period
pub wcus: i64,
/// How many regions on this node
pub region_num: u64,
pub region_stats: Vec<RegionStat>,
// The node epoch is used to check whether the node has restarted or redeployed.
pub node_epoch: u64,
}
/// The statistics of a region.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RegionStat {
/// The region_id.
pub id: RegionId,
/// The read capacity units during this period
pub rcus: i64,
/// The write capacity units during this period
pub wcus: i64,
/// Approximate bytes of this region
pub approximate_bytes: u64,
/// The engine name.
pub engine: String,
/// The region role.
pub role: RegionRole,
/// The number of rows
pub num_rows: u64,
/// The size of the memtable in bytes.
pub memtable_size: u64,
/// The size of the manifest in bytes.
pub manifest_size: u64,
/// The size of the SST data files in bytes.
pub sst_size: u64,
/// The size of the SST index files in bytes.
pub index_size: u64,
}
impl Stat {
#[inline]
pub fn is_empty(&self) -> bool {
self.region_stats.is_empty()
}
pub fn stat_key(&self) -> DatanodeStatKey {
DatanodeStatKey {
cluster_id: self.cluster_id,
node_id: self.id,
}
}
/// Returns a tuple array containing [RegionId] and [RegionRole].
pub fn regions(&self) -> Vec<(RegionId, RegionRole)> {
self.region_stats.iter().map(|s| (s.id, s.role)).collect()
}
/// Returns all table ids in the region stats.
pub fn table_ids(&self) -> HashSet<TableId> {
self.region_stats.iter().map(|s| s.id.table_id()).collect()
}
/// Retains the active region stats and updates the rcus, wcus, and region_num.
pub fn retain_active_region_stats(&mut self, inactive_region_ids: &HashSet<RegionId>) {
if inactive_region_ids.is_empty() {
return;
}
self.region_stats
.retain(|r| !inactive_region_ids.contains(&r.id));
self.rcus = self.region_stats.iter().map(|s| s.rcus).sum();
self.wcus = self.region_stats.iter().map(|s| s.wcus).sum();
self.region_num = self.region_stats.len() as u64;
}
}
impl TryFrom<&HeartbeatRequest> for Stat {
type Error = Option<RequestHeader>;
fn try_from(value: &HeartbeatRequest) -> std::result::Result<Self, Self::Error> {
let HeartbeatRequest {
header,
peer,
region_stats,
node_epoch,
..
} = value;
match (header, peer) {
(Some(header), Some(peer)) => {
let region_stats = region_stats
.iter()
.map(RegionStat::from)
.collect::<Vec<_>>();
Ok(Self {
timestamp_millis: time_util::current_time_millis(),
cluster_id: header.cluster_id,
// datanode id
id: peer.id,
// datanode address
addr: peer.addr.clone(),
rcus: region_stats.iter().map(|s| s.rcus).sum(),
wcus: region_stats.iter().map(|s| s.wcus).sum(),
region_num: region_stats.len() as u64,
region_stats,
node_epoch: *node_epoch,
})
}
(header, _) => Err(header.clone()),
}
}
}
impl From<&api::v1::meta::RegionStat> for RegionStat {
fn from(value: &api::v1::meta::RegionStat) -> Self {
let region_stat = value
.extensions
.get(REGION_STATISTIC_KEY)
.and_then(|value| RegionStatistic::deserialize_from_slice(value))
.unwrap_or_default();
Self {
id: RegionId::from_u64(value.region_id),
rcus: value.rcus,
wcus: value.wcus,
approximate_bytes: value.approximate_bytes as u64,
engine: value.engine.to_string(),
role: RegionRole::from(value.role()),
num_rows: region_stat.num_rows,
memtable_size: region_stat.memtable_size,
manifest_size: region_stat.manifest_size,
sst_size: region_stat.sst_size,
index_size: region_stat.index_size,
}
}
}
/// The key of the datanode stat in the memory store.
///
/// The format is `__meta_datanode_stat-{cluster_id}-{node_id}`.
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct DatanodeStatKey {
pub cluster_id: ClusterId,
pub node_id: u64,
}
impl DatanodeStatKey {
/// The key prefix.
pub fn prefix_key() -> Vec<u8> {
format!("{DATANODE_STAT_PREFIX}-").into_bytes()
}
/// The key prefix with the cluster id.
pub fn key_prefix_with_cluster_id(cluster_id: ClusterId) -> String {
format!("{DATANODE_STAT_PREFIX}-{cluster_id}-")
}
}
impl From<DatanodeStatKey> for Vec<u8> {
fn from(value: DatanodeStatKey) -> Self {
format!(
"{}-{}-{}",
DATANODE_STAT_PREFIX, value.cluster_id, value.node_id
)
.into_bytes()
}
}
impl FromStr for DatanodeStatKey {
type Err = error::Error;
fn from_str(key: &str) -> Result<Self> {
let caps = DATANODE_STAT_KEY_PATTERN
.captures(key)
.context(error::InvalidStatKeySnafu { key })?;
ensure!(caps.len() == 3, error::InvalidStatKeySnafu { key });
let cluster_id = caps[1].to_string();
let node_id = caps[2].to_string();
let cluster_id: u64 = cluster_id.parse().context(error::ParseNumSnafu {
err_msg: format!("invalid cluster_id: {cluster_id}"),
})?;
let node_id: u64 = node_id.parse().context(error::ParseNumSnafu {
err_msg: format!("invalid node_id: {node_id}"),
})?;
Ok(Self {
cluster_id,
node_id,
})
}
}
impl TryFrom<Vec<u8>> for DatanodeStatKey {
type Error = error::Error;
fn try_from(bytes: Vec<u8>) -> Result<Self> {
String::from_utf8(bytes)
.context(error::FromUtf8Snafu {
name: "DatanodeStatKey",
})
.map(|x| x.parse())?
}
}
/// The value of the datanode stat in the memory store.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(transparent)]
pub struct DatanodeStatValue {
pub stats: Vec<Stat>,
}
impl DatanodeStatValue {
/// Get the latest number of regions.
pub fn region_num(&self) -> Option<u64> {
self.stats.last().map(|x| x.region_num)
}
/// Get the latest node addr.
pub fn node_addr(&self) -> Option<String> {
self.stats.last().map(|x| x.addr.clone())
}
}
impl TryFrom<DatanodeStatValue> for Vec<u8> {
type Error = error::Error;
fn try_from(stats: DatanodeStatValue) -> Result<Self> {
Ok(serde_json::to_string(&stats)
.context(error::SerializeToJsonSnafu {
input: format!("{stats:?}"),
})?
.into_bytes())
}
}
impl FromStr for DatanodeStatValue {
type Err = error::Error;
fn from_str(value: &str) -> Result<Self> {
serde_json::from_str(value).context(error::DeserializeFromJsonSnafu { input: value })
}
}
impl TryFrom<Vec<u8>> for DatanodeStatValue {
type Error = error::Error;
fn try_from(value: Vec<u8>) -> Result<Self> {
String::from_utf8(value)
.context(error::FromUtf8Snafu {
name: "DatanodeStatValue",
})
.map(|x| x.parse())?
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_stat_key() {
let stat = Stat {
cluster_id: 3,
id: 101,
region_num: 10,
..Default::default()
};
let stat_key = stat.stat_key();
assert_eq!(3, stat_key.cluster_id);
assert_eq!(101, stat_key.node_id);
}
#[test]
fn test_stat_val_round_trip() {
let stat = Stat {
cluster_id: 0,
id: 101,
region_num: 100,
..Default::default()
};
let stat_val = DatanodeStatValue { stats: vec![stat] };
let bytes: Vec<u8> = stat_val.try_into().unwrap();
let stat_val: DatanodeStatValue = bytes.try_into().unwrap();
let stats = stat_val.stats;
assert_eq!(1, stats.len());
let stat = stats.first().unwrap();
assert_eq!(0, stat.cluster_id);
assert_eq!(101, stat.id);
assert_eq!(100, stat.region_num);
}
#[test]
fn test_get_addr_from_stat_val() {
let empty = DatanodeStatValue { stats: vec![] };
let addr = empty.node_addr();
assert!(addr.is_none());
let stat_val = DatanodeStatValue {
stats: vec![
Stat {
addr: "1".to_string(),
..Default::default()
},
Stat {
addr: "2".to_string(),
..Default::default()
},
Stat {
addr: "3".to_string(),
..Default::default()
},
],
};
let addr = stat_val.node_addr().unwrap();
assert_eq!("3", addr);
}
#[test]
fn test_get_region_num_from_stat_val() {
let empty = DatanodeStatValue { stats: vec![] };
let region_num = empty.region_num();
assert!(region_num.is_none());
let wrong = DatanodeStatValue {
stats: vec![Stat {
region_num: 0,
..Default::default()
}],
};
let right = wrong.region_num();
assert_eq!(Some(0), right);
let stat_val = DatanodeStatValue {
stats: vec![
Stat {
region_num: 1,
..Default::default()
},
Stat {
region_num: 0,
..Default::default()
},
Stat {
region_num: 2,
..Default::default()
},
],
};
let region_num = stat_val.region_num().unwrap();
assert_eq!(2, region_num);
}
}

View File

@@ -15,6 +15,7 @@
use std::collections::HashMap;
use std::sync::Arc;
use api::v1::meta::ProcedureDetailResponse;
use common_telemetry::tracing_context::W3cTrace;
use store_api::storage::{RegionId, RegionNumber, TableId};
@@ -82,6 +83,8 @@ pub trait ProcedureExecutor: Send + Sync {
ctx: &ExecutorContext,
pid: &str,
) -> Result<ProcedureStateResponse>;
async fn list_procedures(&self, ctx: &ExecutorContext) -> Result<ProcedureDetailResponse>;
}
pub type ProcedureExecutorRef = Arc<dyn ProcedureExecutor>;

View File

@@ -43,10 +43,10 @@ impl AlterLogicalTablesProcedure {
&self.data.physical_columns,
);
// Updates physical table's metadata
// Updates physical table's metadata, and we don't need to touch per-region settings.
self.context
.table_metadata_manager
.update_table_info(physical_table_info, new_raw_table_info)
.update_table_info(physical_table_info, None, new_raw_table_info)
.await?;
Ok(())

View File

@@ -43,10 +43,10 @@ use crate::ddl::DdlContext;
use crate::error::{Error, Result};
use crate::instruction::CacheIdent;
use crate::key::table_info::TableInfoValue;
use crate::key::DeserializedValueWithBytes;
use crate::key::{DeserializedValueWithBytes, RegionDistribution};
use crate::lock_key::{CatalogLock, SchemaLock, TableLock, TableNameLock};
use crate::rpc::ddl::AlterTableTask;
use crate::rpc::router::{find_leader_regions, find_leaders};
use crate::rpc::router::{find_leader_regions, find_leaders, region_distribution};
use crate::{metrics, ClusterId};
/// The alter table procedure
@@ -101,6 +101,9 @@ impl AlterTableProcedure {
.get_physical_table_route(table_id)
.await?;
self.data.region_distribution =
Some(region_distribution(&physical_table_route.region_routes));
let leaders = find_leaders(&physical_table_route.region_routes);
let mut alter_region_tasks = Vec::with_capacity(leaders.len());
@@ -161,8 +164,14 @@ impl AlterTableProcedure {
self.on_update_metadata_for_rename(new_table_name.to_string(), table_info_value)
.await?;
} else {
self.on_update_metadata_for_alter(new_info.into(), table_info_value)
.await?;
// region distribution is set in submit_alter_region_requests
let region_distribution = self.data.region_distribution.as_ref().unwrap().clone();
self.on_update_metadata_for_alter(
new_info.into(),
region_distribution,
table_info_value,
)
.await?;
}
info!("Updated table metadata for table {table_ref}, table_id: {table_id}");
@@ -271,6 +280,8 @@ pub struct AlterTableData {
table_id: TableId,
/// Table info value before alteration.
table_info_value: Option<DeserializedValueWithBytes<TableInfoValue>>,
/// Region distribution for table in case we need to update region options.
region_distribution: Option<RegionDistribution>,
}
impl AlterTableData {
@@ -281,6 +292,7 @@ impl AlterTableData {
table_id,
cluster_id,
table_info_value: None,
region_distribution: None,
}
}

View File

@@ -106,6 +106,7 @@ fn create_proto_alter_kind(
})))
}
Kind::RenameTable(_) => Ok(None),
Kind::ChangeTableOptions(v) => Ok(Some(alter_request::Kind::ChangeTableOptions(v.clone()))),
}
}
@@ -187,7 +188,7 @@ mod tests {
region: Region::new_test(region_id),
leader_peer: Some(Peer::empty(1)),
follower_peers: vec![],
leader_status: None,
leader_state: None,
leader_down_since: None,
}]),
HashMap::new(),

View File

@@ -20,7 +20,7 @@ use table::requests::AlterKind;
use crate::ddl::alter_table::AlterTableProcedure;
use crate::error::{self, Result};
use crate::key::table_info::TableInfoValue;
use crate::key::DeserializedValueWithBytes;
use crate::key::{DeserializedValueWithBytes, RegionDistribution};
impl AlterTableProcedure {
/// Builds new_meta
@@ -51,7 +51,9 @@ impl AlterTableProcedure {
AlterKind::RenameTable { new_table_name } => {
new_info.name = new_table_name.to_string();
}
AlterKind::DropColumns { .. } | AlterKind::ChangeColumnTypes { .. } => {}
AlterKind::DropColumns { .. }
| AlterKind::ChangeColumnTypes { .. }
| AlterKind::ChangeTableOptions { .. } => {}
}
Ok(new_info)
@@ -75,11 +77,16 @@ impl AlterTableProcedure {
pub(crate) async fn on_update_metadata_for_alter(
&self,
new_table_info: RawTableInfo,
region_distribution: RegionDistribution,
current_table_info_value: &DeserializedValueWithBytes<TableInfoValue>,
) -> Result<()> {
let table_metadata_manager = &self.context.table_metadata_manager;
table_metadata_manager
.update_table_info(current_table_info_value, new_table_info)
.update_table_info(
current_table_info_value,
Some(region_distribution),
new_table_info,
)
.await?;
Ok(())

View File

@@ -58,10 +58,10 @@ impl CreateLogicalTablesProcedure {
&new_table_info.name,
);
// Update physical table's metadata
// Update physical table's metadata and we don't need to touch per-region settings.
self.context
.table_metadata_manager
.update_table_info(&physical_table_info, new_table_info)
.update_table_info(&physical_table_info, None, new_table_info)
.await?;
// Invalid physical table cache

View File

@@ -29,7 +29,10 @@ use crate::test_util::MockDatanodeHandler;
#[async_trait::async_trait]
impl MockDatanodeHandler for () {
async fn handle(&self, _peer: &Peer, _request: RegionRequest) -> Result<RegionResponse> {
unreachable!()
Ok(RegionResponse {
affected_rows: 0,
extensions: Default::default(),
})
}
async fn handle_query(

Some files were not shown because too many files have changed in this diff Show More