Compare commits

...

102 Commits

Author SHA1 Message Date
Yingwen
1008af5324 feat!: Divide flush and compaction job pool (#4871)
* feat: divide flush/compact job pool

* feat!: divide bg jobs config

* docs: update config examples

* test: fix tests
2024-10-25 23:36:16 +00:00
discord9
2485f66077 chore: graceful exit on bind fail (#4882) 2024-10-25 09:29:39 +00:00
Weny Xu
4f3afb13b6 fix: fix broken import (#4880) 2024-10-25 07:09:51 +00:00
shuiyisong
32a0023010 chore: add schema urls to otlp logs (#4876)
* chore: add schema urls to otlp logs table

* chore: update meter-macros version to remove anymap warning

* chore: change span id and trace id to field
2024-10-25 03:45:24 +00:00
Kaifeng Zheng
4e9c251041 feat: add json_path_match udf (#4864)
* add json_path_match udf

* sql tests for json_path_match

* fix clippy & comment

* fix null value behavior

* added null tests

* adjust function's behavior on nulls

* update test cases

* fix null check of json
2024-10-25 03:13:34 +00:00
Lei, HUANG
e328c7067c chore: udapte Rust toolchain to 2024-10-19 (#4857)
* update rust toolchain

* change toolchain to 2024-10-17

* fix: clippy

* fix: ut

* bump shadow-rs

* fix: use nightly-2024-10-19

* fix: clippy

* chore/udapte-toolchain-2024-10-17: Update DEV_BUILDER_IMAGE_TAG to 2024-10-19-a5c00e85-20241024184445 in Makefile
2024-10-25 00:23:32 +00:00
Weny Xu
8b307e4548 feat: introduce the PluginOptions (#4835)
* feat: introduce the `PluginOptions`

* chore: apply suggestions from CR
2024-10-24 12:02:10 +00:00
discord9
ff38abde2e chore: better column schema check for flow (#4855)
* chore: better column schema check for flow

* chore: better msg

* tests: clean up after tests

* chore: better msg

* chore: per review

* tests: sqlness
2024-10-24 09:43:32 +00:00
jeremyhi
aa9a265984 chore: make pusher log easy to understand (#4841)
* chore: make pusher log easy to understand

* Update src/meta-srv/src/service/heartbeat.rs

Co-authored-by: Yingwen <realevenyag@gmail.com>

* Update src/meta-srv/src/service/heartbeat.rs

Co-authored-by: Yingwen <realevenyag@gmail.com>

* chore: by comment

---------

Co-authored-by: Yingwen <realevenyag@gmail.com>
2024-10-24 07:44:16 +00:00
pa
9d3ee6384a feat: Limit CPU in runtime (#3685) (#4782)
feat: add throttle runtime (#3685)
2024-10-24 07:30:24 +00:00
localhost
fcde0a4874 feat: Add functionality to the Opentelemetry write interface to extract fields from attr to top-level data. (#4859)
* chore: add otlp select

* chore: change otlp select

* chore: remove json path

* chore: format toml

* chore: change opentelemetry extract keys header name

* chore: add some doc and remove useless code and lib

* chore: make clippy happy

* chore: fix by pr comment

* chore: fix by pr comment

* chore: opentelemetry logs select key change some type default semantic type
2024-10-24 05:55:57 +00:00
Weny Xu
5d42e63ab0 fix!: replace timeout_millis and connect_timeout_millis with Duration in DatanodeClientOptions (#4867)
* fix: correct options struct

* fix: fix unit test
2024-10-23 08:20:34 +00:00
discord9
0c01532a37 feat: Sort within each PartitionRange (#4847)
* feat: PartSort

* chore: rm unused

* chore: typo

* chore: mem pool df

* chore: add location to arrow error

* refactor: test_util

* refactor: per review

* chore: rm unused

* chore: more cases

* chore: test&buffer clear

* fix: remove fetch

* chore: fmt

* chore: per review

* chore: rm unused
2024-10-23 07:01:55 +00:00
ZonaHe
6d503b047a feat: update dashboard to v0.6.0 (#4861)
Co-authored-by: ZonaHex <ZonaHex@users.noreply.github.com>
2024-10-22 02:34:09 +00:00
Yingwen
5d28f7a912 feat: yields empty batch after reading a range (#4845)
* feat: add empty batch to end of range stream

* feat: add batch validation

* fix: validate batch order

* fix: not yield empty batch in compaction

* fix: empty record batch

* feat: add a flag to enable empty batch
2024-10-21 13:52:47 +00:00
Lei, HUANG
a50eea76a6 chore: bump greptime-meter (#4858)
chore/bump-greptime-meter: Add meter-core package and update meter-core dependency across various packages to
new git revision.
2024-10-21 08:18:30 +00:00
Yingwen
2ee1ce2ba1 docs: change cpu/mem panel to time-series (#4844)
* docs: change cpu/mem panel to time-series

* docs: update version
2024-10-18 08:42:01 +00:00
Weny Xu
c02b5dae93 chore: bump version to 0.9.5 (#4853) 2024-10-18 08:07:13 +00:00
Weny Xu
081c6d9e74 fix: flush metric metadata region (#4852)
* fix: flush metric metadata region

* chore: apply suggestions from CR
2024-10-18 07:21:35 +00:00
Weny Xu
ca6e02980e fix: overwrite entry_id if entry id is less than start_offset (#4842)
* fix: overwrite entry_id if entry id is less than start_offset

* feat: add `overwrite_entry_start_id` to options

* chore: update config.md
2024-10-18 06:31:02 +00:00
Weny Xu
74bdba4613 fix: fix metadata forward compatibility issue (#4846) 2024-10-18 06:26:41 +00:00
Weny Xu
2e0e82ddc8 chore: update greptime-proto to b4d3011 (#4850) 2024-10-18 04:10:22 +00:00
Yingwen
e0c4157ad8 feat: Seq scanner scans data by time range (#4809)
* feat: seq scan by partition

* feat: part metrics

* chore: remove unused codes

* chore: fmt stream

* feat: build ranges returns smallvec

* feat: move scan mem/file ranges to util and reuse

* feat: log metrics

* chore: correct some metrics

* feat: get explain info from ranges

* test: group test and remove unused codes

* chore: fix clippy

* feat: change PartitionRange end to exclusive

* test: add tests
2024-10-17 11:05:12 +00:00
discord9
613e07afb4 feat: window sort physical plan (#4814)
* WIP

* feat: range split& tests

* WIP: split range

* add sort exprs

* chore: typo

* WIP

* feat: find successive runs

* WIP

* READY FOR REVIEW PART ONE: more tests

* refactor: break into smaller functions

* feat: precompute working range(need testing)

* tests: on working range

* tests: on working range

* feat: support rev working range

* feat(to be tested): core logic of merge sort

* fix: poll results

* fix: find_slice_from_range&test

* chore: remove some unused util func&fields

* chore: typos

* chore: impl exec plan for WindowedSortExec

* test(WIP): window sort stream

* test: window sort stream

* chore: remove unused

* fix: fetch

* fix: WIP intersection remaining

* test: fix and test!

* chore: remove outdated comments

* chore: rename test

* chore: remove dbg line

* chore: sorted runs

* feat: handling unexpected data

* chore: unused

* chore: remove a print in test

* chore: per review

* docs: wrong comment

* chore: more test cases
2024-10-16 11:50:25 +00:00
Weny Xu
0ce93f0b88 chore: add more metrics for region migration (#4838) 2024-10-16 09:36:57 +00:00
Ning Sun
c231eee7c1 fix: respect feature flags for geo function (#4836) 2024-10-16 07:46:31 +00:00
Yiran
176f2df5b3 fix: dead links (#4837) 2024-10-16 07:43:14 +00:00
localhost
4622412dfe feat: add API to write OpenTelemetry logs to GreptimeDB (#4755)
* chore: otlp logs api

* feat: add API to write OpenTelemetry logs to GreptimeDB

* chore: fix test data schema error

* chore: modify the underlying data structure of the pipeline value map type from hashmap to btremap to keep key order

* chore: fix by pr comment

* chore: resolve conflicts and add some test

* chore: remove useless error

* chore: change otlp header name

* chore: fmt code

* chore: fix integration test for otlp log write api

* chore: fix by pr comment

* chore: set otlp body with fulltext default
2024-10-16 04:36:08 +00:00
jeremyhi
59ec90299b refactor: metasrv cannot be cloned (#4834)
* refactor: metasrv cannot be cloned

* chore: remove MetasrvInstance's clone
2024-10-15 11:36:48 +00:00
discord9
16b8cdc3d5 chore: bump version v0.9.4 (#4833) 2024-10-15 10:48:03 +00:00
Weny Xu
3197b8b535 feat: introduce default customizers (#4831)
* feat: introduce `DefaultHeartbeatHandlerGroupBuilderCustomizer` and `DefaultLeadershipChangeNotifierCustomizer`

* chore: code styling
2024-10-15 09:48:13 +00:00
zyy17
972c2441af chore: bump promql-parser to v0.4.1 and use to_string() for EvalStmt (#4832)
chore: bump promql-parser to v0.4.1 and use to_string() for EvalStmt
2024-10-15 08:50:37 +00:00
Ning Sun
bb8b54b5d3 feat: add some s2 geo functions (#4823)
* feat: add first batch of s2 functions

* refactor: update reusable code from main

* test: add sqlness tests for s2

* feat: add tostring function for s2

* Update src/common/function/src/scalars/geo/s2.rs

Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>

* Apply suggestions from code review

* one more change

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>
2024-10-15 06:47:29 +00:00
Weny Xu
b5233e500b feat: defer HeartbeatHandlerGroup construction and enhance LeadershipChangeNotifier (#4826)
* feat: enhance `HeartbeatHandlerGroup`

* chore: apply suggestions from CR

* chore: minor refactoring

* chore: code styling

* chore: apply suggestions from CR
2024-10-15 03:35:31 +00:00
Ruihang Xia
b61a388d04 refactor: replace info logs with debug logs in region server (#4829)
* refactor: replace info logs with debug logs in region server

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: update error handling for closing and opening nonexistent regions

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-10-14 12:46:07 +00:00
Ruihang Xia
06e565d25a feat: cache logical region's metadata (#4827)
* feat: cache logical region's metadata

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* feat: implement logical region locking for metadata operations

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: correct typo in comment for MetadataRegion struct

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-10-14 08:44:13 +00:00
Yingwen
3b2ce31a19 feat: enable prof features by default (#4815)
* feat: enable prof by default

* docs: don't need to build with features

* feat: add common-pprof as optional dep for pprof feature

* build: remove optional

* feat: use dump_text
2024-10-14 03:32:47 +00:00
Ruihang Xia
a889ea88ca fix: case sensitive for __field__ matcher (#4822)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-10-14 03:18:59 +00:00
Yingwen
2f2b4b306c feat!: implement interval type by multiple structs (#4772)
* define structs and methods

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* feat: re-implement interval types in time crate

* feat: use new

* feat: interval value

* feat: query crate interval

* feat: pg and mysql interval

* chore: remove unused imports

* chore: remove commented codes

* feat: make flow compile but may not work

* feat: flow datetime

* test: fix some tests

* test: fix some flow tests(WIP)

* chore: some fix test&docs

* fix: change interval order

* chore: remove unused codes

* chore: fix cilppy

* chore: now signature change

* chore: remove todo

* feat: update error message

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: discord9 <discord9@163.com>
2024-10-14 03:09:03 +00:00
jeremyhi
856c0280f5 feat: remove the distributed lock (#4825)
* feat: remove the distributed lock as we do not need it any more

* chore: delete todo comment

* chore: remove unused error
2024-10-12 09:04:22 +00:00
Ning Sun
aaa9b32908 feat: add more h3 functions (#4770)
* feat: add more h3 grid functions

* feat: add more traversal functions

* refactor: update some function definitions

* style: format

* refactor: avoid creating slice in nested loop

* feat: ensure column number and length

* refactor: fix lint warnings

* refactor: merge main

* Apply suggestions from code review

Co-authored-by: LFC <990479+MichaelScofield@users.noreply.github.com>

* Update src/common/function/src/scalars/geo/h3.rs

Co-authored-by: LFC <990479+MichaelScofield@users.noreply.github.com>

* style: format

---------

Co-authored-by: LFC <990479+MichaelScofield@users.noreply.github.com>
2024-10-11 17:57:54 +00:00
Weny Xu
4bb1f4f184 feat: introduce LeadershipChangeNotifier and LeadershipChangeListener (#4817)
* feat: introduce `LeadershipChangeNotifier`

* refactor: use `LeadershipChangeNotifier`

* chore: apply suggestions from CR

* chore: apply suggestions from CR

* chore: adjust log styling
2024-10-11 12:48:53 +00:00
Weny Xu
0f907ef99e fix: correct table name formatting (#4819) 2024-10-11 11:32:15 +00:00
Ruihang Xia
a61c0bd1d8 fix: error in admin function is not formatted properly (#4820)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-10-11 10:02:45 +00:00
Lei, HUANG
7dd0e3ab37 fix: Panic in UNION ALL queries (#4796)
* fix/union_all_panic:
 Improve MetricCollector by incrementing level and fix underflow issue; add tests for UNION ALL queries

* chore: remove useless documentation

* fix/union_all_panic: Add order by clause to UNION ALL select queries in tests
2024-10-11 08:23:01 +00:00
Yingwen
d168bde226 feat!: move v1/prof API to debug/prof (#4810)
* feat!: move v1/prof to debug/prof

* docs: update readme

* docs: move prof docs to docs dir

* chore: update message

* feat!: remove v1/prof

* docs: update mem prof docs
2024-10-11 04:16:37 +00:00
jeremyhi
4b34f610aa feat: information extension (#4811)
* feat: information extension

* Update manager.rs

Co-authored-by: Weny Xu <wenymedia@gmail.com>

* chore: by comment

---------

Co-authored-by: Weny Xu <wenymedia@gmail.com>
2024-10-11 03:13:49 +00:00
Weny Xu
695ff1e037 feat: expose RegionMigrationManagerRef (#4812)
* chore: expose `RegionMigrationProcedureTask`

* fix: fix typos

* chore: expose `tracker`
2024-10-11 02:40:51 +00:00
Yohan Wal
288fdc3145 feat: json_path_exists udf (#4807)
* feat: json_path_exists udf

* chore: fix comments

* fix: caution when copy&paste QAQ
2024-10-10 14:15:34 +00:00
discord9
a8ed3db0aa feat: Merge sort Logical plan (#4768)
* feat(WIP): MergeSort

* wip

* feat: MergeSort LogicalPlan

* update sqlness result

* Apply suggestions from code review

Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>

* refactor: per review advice

* refactor: more per review

* chore: per review

---------

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>
2024-10-09 09:37:27 +00:00
Kaifeng Zheng
0dd11f53f5 feat: add json format output for http interface (#4797)
* feat: json output format for http

* feat: add json result test case

* fix: typo and refactor a piece of code

* fix: cargo check

* move affected_rows to top level
2024-10-09 07:11:57 +00:00
Ning Sun
19918928c5 feat: add function to aggregate path into a geojson path (#4798)
* feat: add geojson function to aggregate paths

* test: add sqlness results

* test: add sqlness

* refactor: corrected to aggregation function

* chore: update comments

* fix: make linter happy again

* refactor: rename to remove `geo` from `geojson` function name

The return type is not geojson at all. It's just compatible with geojson's
coordinates part and superset's deckgl path plugin.
2024-10-09 02:38:44 +00:00
shuiyisong
5f0a83b2b1 fix: ts conversion during transform phase (#4790)
* fix: allow ts conversion during transform phase

* chore: replace `unimplemented` with snafu
2024-10-08 17:54:44 +00:00
localhost
71a66d15f7 chore: add json write (#4744)
* chore: add json write

* chore: add test for write json log api

* chore: enhancement of Error Handling

* chore: fix by pr comment

* chore: fix by pr comment

* chore: enhancement of error content and add some doc
2024-10-08 12:11:09 +00:00
Weny Xu
2cdd103874 feat: introduce HeartbeatHandlerGroupBuilderCustomizer (#4803)
* feat: introduce `HeartbeatHandlerGroupBuilderFinalizer`

* chore: rename to `HeartbeatHandlerGroupBuilderCustomizer`
2024-10-08 09:02:06 +00:00
Ning Sun
4dea4cac47 refactor: change sqlness ports to avoid conflict with local instance (#4794) 2024-10-08 07:33:24 +00:00
Kaifeng Zheng
a283e13da7 feat: set max log files to 720 by default, info log only (#4787)
* feat: set max log files to 720 by default, info log only

* expose max_log_files in tomls

* include dir info when panicing, limit max_log_files of err_log to 30, and that of slow_queries to opt.max_log_files

* fix clippy

* update config.md

* update expected config str

* limit err_log max files size to `max_log_files` too, include err info when panicing, put `max_l_f` in right position

* fix typos

* chore: config

Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>

---------

Co-authored-by: dennis zhuang <killme2008@gmail.com>
Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>
2024-10-04 18:05:40 +00:00
JohnsonLee
47a3277d12 feat: customize channel information for sqlness tests (#4729)
* feat: add pg and mysql server_address options

* feat: start pg and mysql server(standalone)

* feat: start pg and mysql in distribute

* feat: finally get there, specify postgres sqlness

* feat: support mysql sqlness

* fix: license

* fix: remove unused import

* fix: toml

* fix: clippy

* refactor: BeginProtocolInterceptorFactory to ProtocolInterceptorFactory

* fix: sqlness pg connect

* fix: clippy

* Apply suggestions from code review

Co-authored-by: Yingwen <realevenyag@gmail.com>

* fix: rustfmt

* fix: reconnect pg and mysql when restart

* test: add mysql related sqlness

* fix: wait for start while restarting

* fix: clippy

* fix: cargo lock conflict

fix: Cargo.lock conflict

* fix: usage of '@@tx_isolation' in sqlness

* fix: typos

* feat: retry with backoff when create client

* fix: use millisecond rather than microseconds in backoff

---------

Co-authored-by: Yingwen <realevenyag@gmail.com>
2024-10-04 09:26:57 +00:00
Yingwen
caf5f2c7a5 docs: add TM to logos (#4789) 2024-10-01 03:35:04 +00:00
Weny Xu
c1e8084af6 feat: add add_handler_after, add_handler_before, replace_handler (#4788)
* feat: add `add_handler_after`, `add_handler_before`, `replace_handler`

* chore: apply suggestions from CR

* test: add more tests

* feat: use `Vec` instead of `LinkedList`

* Update src/meta-srv/src/lib.rs

Co-authored-by: Yingwen <realevenyag@gmail.com>

---------

Co-authored-by: Yingwen <realevenyag@gmail.com>
2024-09-30 08:53:59 +00:00
Weny Xu
6e776d5f98 feat: support to reject write after flushing (#4759)
* refactor: use `RegionRoleState` instead of `RegionState`

* feat: introducing `RegionLeaderState::Downgrading`

* refactor: introduce `set_region_role_state_gracefully`

* refactor: use `set_region_role` instead of `set_writable`

* feat: support to reject write after flushing

* fix: fix unit tests

* test: add unit test for `should_reject_write`

* chore: add comments

* chore: refine comments

* fix: fix unit test

* test: enable fuzz tests for Local WAL

* chore: add logs

* chore: rename `RegionStatus` to `RegionState`

* feat: introduce `DowngradingLeader`

* chore: rename

* refactor: refactor `set_role_state` tests

* test: ensure downgrading region will reject write

* chore: enhance logs

* chore: refine name

* chore: refine comment

* test: add tests for `set_role_role_state`

* fix: fix unit tests

* chore: apply suggestions from CR

* chore: apply suggestions from CR
2024-09-30 08:28:51 +00:00
zyy17
e39a9e6feb feat: add StatementStatistics for slow query logging implementation (#4719)
* feat: log slow query

* feat: log slow query for sql

* refactor: add slow query logging options

* ci: fix errors

* feat: add StatementStatistics

* chore: revert modification of servers crate

* docs: update config docs

* fix: clippy errors
2024-09-30 03:26:50 +00:00
Weny Xu
77af4fd981 refactor: introduce HeartbeatHandlerGroupBuilder (#4785) 2024-09-30 02:56:53 +00:00
Yingwen
cd55202136 feat: unordered scanner scans data by time ranges (#4757)
* feat: define range meta

* feat: group ranges

* feat: split range

* feat: build ranges from the scan input

* feat: get partition range from range meta

* feat: build file range

* feat: unordered scan read by ranges

* feat: wip for mem ranges

* feat: build ranges

* feat: remove unused codes

* chore: update comments

* feat: update metrics

* chore: address review comments

* chore: debug assertion
2024-09-29 07:14:48 +00:00
Lei, HUANG
50cb59587d chore: replace anymap with anymap2 (#4781) 2024-09-29 06:27:35 +00:00
Lei, HUANG
0a82b12d08 fix(sqlness): sqlness isolation (#4780)
* fix: isolate logs

* fix: copy cases

* fix: clippy
2024-09-29 05:54:01 +00:00
LFC
d9f2f0ccf0 feat: add a new status code for "external" errors (#4775)
* feat: add a new status code for "external" errors

* Update src/auth/src/error.rs

Co-authored-by: shuiyisong <113876041+shuiyisong@users.noreply.github.com>

* support mysql cli cleartext auth

* resolve PR comments

---------

Co-authored-by: shuiyisong <113876041+shuiyisong@users.noreply.github.com>
2024-09-29 03:38:50 +00:00
Ning Sun
cedbbcf2b8 fix: update pgwire for potential issue with connection establish (#4783) 2024-09-28 19:27:31 +00:00
Ning Sun
d6be44bc7f fix: dead loop on detecting postgres ssl handshake (#4778) 2024-09-28 01:39:29 +00:00
JohnsonLee
3a46c1b235 fix: use information_schema returns Unknown database (#4774)
* fix: use information_schema returns Unknown database 'information_schema'

* test: make sure 'use information_schma' successful
2024-09-27 23:15:48 +00:00
Lei, HUANG
934bc13967 feat(mito): limit compaction output file size (#4754)
* Commit Message

 Clarify documentation for CompactionOutput struct

 Updated the documentation for the `CompactionOutput` struct to specify that the output time range is only relevant for windowed compaction.

* Add max_output_file_size to TwcsPicker and TwcsOptions

 - Introduced `max_output_file_size` to `TwcsPicker` struct and its logic to enforce output file size limits during compaction.
 - Updated `TwcsOptions` to include `max_output_file_size` and adjusted related tests.
 - Modified `new_picker` function to initialize `TwcsPicker` with the new `max_output_file_size` field.

* feat/limit-compaction-output-size:
 Refactor compaction picker and TWCS to support append mode and improve options handling

 - Update compaction picker to accept a reference to options and append mode flag
 - Modify TWCS picker logic to consider append mode when filtering deleted rows
 - Remove VersionControl usage in compactor and simplify return type
 - Adjust enforce_max_output_size logic in TWCS picker to handle max output file size
 - Add append mode flag to TwcsPicker struct
 - Fix incorrect condition in TWCS picker for enforcing max output size
 - Update region options tests to reflect new max output file size format (1GB and 7MB)
 - Simplify InvalidTableOptionSnafu error handling in create_parser
 - Add `compaction.twcs.max_output_file_size` to mito engine option keys

* resolve some comments
2024-09-27 11:17:36 +00:00
Weny Xu
4045298cb2 feat: add region_statistics table (#4771)
* refactor: introduce `region_statistic`

* refactor: move DatanodeStat related structs to common_meta

* chore: add comments

* feat: implement `list_region_stats` for `ClusterInfo` trait

* feat: add `region_statistics` table

* feat: add table_id and region_number fields

* chore: rename unused snafu

* chore: udpate sqlness results

* chore: avoid to print source in error msg

* chore: move `procedure_info` under `greptime` catalog

* chore: apply suggestions from CR

* Update src/common/meta/src/datanode.rs

Co-authored-by: jeremyhi <jiachun_feng@proton.me>

---------

Co-authored-by: jeremyhi <jiachun_feng@proton.me>
2024-09-27 09:54:52 +00:00
Lanqing Yang
cc4106cbd2 feat: protect datanode with concurrency limit. (#4699)
Adding parallelism in region server to protect datanode from query overload.
2024-09-27 06:12:57 +00:00
localhost
627a326273 refactor: Change the error type in the pipeline crate from String to Error (#4763)
* chore: in process

* chore: change pipeline crate error type

* chore: improve event error

* chore: fix by pr comment

* chore: use snafu context replace ok_or_else

* refactor: update snafu usage

---------

Co-authored-by: Ning Sun <sunng@protonmail.com>
2024-09-25 19:32:34 +00:00
discord9
0274e752ae chore: make sure aws-lc-sys wouldn't be built (#4767)
* chore: make sure aws-lc-sys wouldn't be built

* fix: bash

* refactor: multiple line bash
2024-09-25 17:11:37 +00:00
Lei, HUANG
cd4bf239d0 chore: relax table name constraint (#4766)
chore/relax-table-name-constraint: Updated NAME_PATTERN to allow '@' and '#' characters and adjusted tests for new table name validation rules.
2024-09-25 02:45:18 +00:00
Ning Sun
e3c0b5482f feat: returning warning instead of error on unsupported SET statement (#4761)
* feat: add capability to send warning to pgclient

* fix: refactor query context to carry query scope data

* feat: return a warning for unsupported postgres statement
2024-09-24 08:45:55 +00:00
Weny Xu
d1b252736d refactor: unify the styling in create_or_alter_tables_on_demand (#4756)
* refactor: refactor `create_or_alter_tables_on_demand`

* chore: apply suggestions from CR
2024-09-24 03:48:16 +00:00
discord9
54f6e13d13 fix: Release CI & make rustls use ring (#4750)
* feat: new dev-build

* fix: copy

* chore: rm dbg run

* fix: binstall install script

* chore: typos

* fix: update useable image

* fix: properly uninstall gcc-9

* chore: another try

* chore: print current cc version

* chore: another try to release

* fix: use gcc-10 by `update-alternatives`

* chore: update dev-build image

* remove gcc-9 again and install make/cmake

* use new image

* alias gcc-10/g++-10 to every variant

* again.....

* fix....

* again release ....

* chore: remove auto remove

* feat: rustls default to ring

* chore: update Cargo.lock

* chore: update Cargo.lock

* Apply suggestions from code review

---------

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
2024-09-24 03:11:12 +00:00
Ruihang Xia
5c64f0ce09 refactor!: simplify NativeType trait and remove percentile UDAF (#4758)
* refactor!: simplify NativeType trait and remove percentile UDAF

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove NativeType

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* recover a mis-deleted case

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-09-23 10:55:20 +00:00
Ruihang Xia
2feddca1cb feat: include order by to commutativity rule set (#4753)
* feat: include order by to commutativity rule set

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* tune sqlness replace interceptor

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-09-23 08:35:06 +00:00
Ning Sun
0f99218386 feat: list/array/timezone support for postgres output (#4727)
* feat: list/array support for postgres output

* fix: implement time zone support for postgrsql

* feat: add a geohash function that returns array

* fix: typo

* fix: lint warnings

* test: add sqlness test

* refactor: check resolution range before convert value

* fix: test result for sqlness

* feat: upgrade pgwire apis
2024-09-22 02:39:38 +00:00
Weny Xu
163cea81c2 feat: migrate local WAL regions (#4715)
* feat: allow to flush region before migrating

* fix: fix unit tests

* feat: allow to set `flush_timeout`

* feat: skip to replay memtable

* fix: fix unit tests

* test: add more tests

* refactor: simplify timeout logical

* test: add unit tests

* test: add unit tests

* chore: update comments

* fix: fix unit tests

* fix: fmt and clippy

* feat: change default timeout to 30s

* fix: throw `ExceededDeadline` error

* test: add tests for `downgrade_region_with_retry`

* chore: apply suggestions from CR

* chore: apply suggestions from CR

* chore: apply suggestions from CR

* chore: update proto to `3633474`

* refactor: refactor `upgrade_region_with_retry`

* chore: apply suggestions from CR
2024-09-20 08:27:20 +00:00
taobo
0c9b8eb0d2 feat: improve observability for procedure (#4675)
* feat: improve observability for procedure

* fix: test error

* test: add sqlness test for information_schema.procedure_info

* fix: sqlness test error

* fix: cr comment

* chore: update proto version

* fix: apply cr comment

* update version

* fix: cr comment

* optimize procedure type output format

* upgrade dep version

* fix: clippy error

* fix: `procedure` borrowed error

* fix: optimize code
2024-09-20 06:07:53 +00:00
Ning Sun
75c6fad1a3 feat: add more h3 scalar functions (#4707)
* feat: add more h3 scalar functions

* chore: comment up
2024-09-20 04:19:50 +00:00
Yingwen
e12ffbeb2f feat: flush other workers if still need flush (#4746) 2024-09-20 02:55:31 +00:00
discord9
c4e52ebf91 feat: use new image for gcc-10 (#4748)
feat: use new image
2024-09-20 02:34:45 +00:00
Yingwen
f02410c39b fix: disable field pruning in last non null mode (#4740)
* fix: don't prune fields in last non null mode

* test: add sqlness test for field pruning

* test: add flush

* refine implementation

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
2024-09-20 00:35:37 +00:00
Ruihang Xia
f5cf25b0db refactor: remove DfPlan wrapper (#4733)
* refactor: remove DfPlan wrapper

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* clean up

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove unused errors

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix test assertion

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-09-19 12:29:33 +00:00
liyang
1acda74c26 fix: cannot input tag for the dev-builder image (#4743) 2024-09-19 11:14:34 +00:00
Yohan Wal
95787825f1 build(deps): use original jsonb repo (#4742) 2024-09-19 09:44:44 +00:00
Weny Xu
49004391d3 chore(fuzz): print table name for debugging (#4738)
* chore(fuzz): print table name for debugging

* chore: apply suggestions
2024-09-19 09:40:10 +00:00
discord9
d0f5b2ad7d fix: use gcc-10 in release dev build (#4741) 2024-09-19 09:34:06 +00:00
Yohan Wal
0295f8dbea docs: json datatype rfc (#4515)
* docs: json datatype rfc

* docs: turn to a jsonb proposal

* chore: fix typo

* feat: add store and query process

* fix: typo

* fix: use query nodes instead of query plans

* feat: a detailed overview of query

* fix: grammar

* fix: use independent cast function

* fix: unify cast function

* fix: refine, make statements clear

* docs: update rfc according to impl

* docs: refine

* docs: fix wrong arrows

* docs: refine

* docs: fix some errors qaq
2024-09-19 05:49:10 +00:00
Ning Sun
8786624515 feat: improve support for postgres extended protocol (#4721)
* feat: improve support for postgres extended protocol

* fix: lint fix

* fix: test code

* fix: adopt upstream

* refactor: remove dup code

* refactor: avoid copy on error message
2024-09-19 05:30:56 +00:00
shuiyisong
52d627e37d chore: add log ingest interceptor (#4734)
* chore: add log ingest interceptor

* chore: rename

* chore: update interceptor signature
2024-09-19 05:14:47 +00:00
Lei, HUANG
b5f7138d33 refactor(tables): improve tables performance (#4737)
* chore: cherrypick 52e8eebb2dbbbe81179583c05094004a5eedd7fd

* refactor/tables: Change variable from immutable to mutable in KvBackendCatalogManager's method

* refactor/tables: Replace unbounded channel with bounded and use semaphore for concurrency control in KvBackendCatalogManager

* refactor/tables: Add common-runtime dependency and update KvBackendCatalogManager to use common_runtime::spawn_global

* refactor/tables: Await on sending error through channel in KvBackendCatalogManager
2024-09-19 04:44:02 +00:00
Ning Sun
08bd40333c feat: add an option to turn on compression for arrow output (#4730)
* feat: add an option to turn on compression for arrow output

* fix: typo
2024-09-19 04:38:41 +00:00
discord9
d1e0602c76 fix: opensrv Use After Free update (#4732)
* chore: version skew

* fix: even more version skew

* feat: use `ring` instead of `aws-lc` for remove nasm assembler on windows

* feat: use `ring` for pgwire

* feat: change to use `aws-lc-sys` on windows instead

* feat: change back to use `ring`

* chore: provide CryptoProvider

* feat: use upstream repo

* feat: install ring crypto lib in main

* chore: use same fn to install in tests

* feat: make pgwire use `ring`
2024-09-19 04:12:13 +00:00
Weny Xu
befb6d85f0 fix: determine region role by using is_readonly (#4725)
fix: correct `is_writable` behavior
2024-09-18 22:17:39 +00:00
Yohan Wal
f73fb82133 feat: add respective json_is UDFs for JSON type (#4726)
* feat: add respective json_is UDFs

* refactor: rename to_json to parse_json

* chore: happy clippy

* chore: some rename

* fix: small fixes
2024-09-18 11:07:30 +00:00
shuiyisong
50b3bb4c0d fix: sort cargo toml (#4735) 2024-09-18 09:19:05 +00:00
540 changed files with 25252 additions and 9146 deletions

View File

@@ -50,7 +50,7 @@ runs:
BUILDX_MULTI_PLATFORM_BUILD=all \
IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \
IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
IMAGE_TAG=${{ inputs.version }}
DEV_BUILDER_IMAGE_TAG=${{ inputs.version }}
- name: Build and push dev-builder-centos image
shell: bash
@@ -61,7 +61,7 @@ runs:
BUILDX_MULTI_PLATFORM_BUILD=amd64 \
IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \
IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
IMAGE_TAG=${{ inputs.version }}
DEV_BUILDER_IMAGE_TAG=${{ inputs.version }}
- name: Build and push dev-builder-android image # Only build image for amd64 platform.
shell: bash
@@ -71,6 +71,6 @@ runs:
BASE_IMAGE=android \
IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \
IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
IMAGE_TAG=${{ inputs.version }} && \
DEV_BUILDER_IMAGE_TAG=${{ inputs.version }} && \
docker push ${{ inputs.dockerhub-image-registry }}/${{ inputs.dockerhub-image-namespace }}/dev-builder-android:${{ inputs.version }}

View File

@@ -269,6 +269,13 @@ jobs:
- name: Install cargo-gc-bin
shell: bash
run: cargo install cargo-gc-bin
- name: Check aws-lc-sys will not build
shell: bash
run: |
if cargo tree -i aws-lc-sys -e features | grep -q aws-lc-sys; then
echo "Found aws-lc-sys, which has compilation problems on older gcc versions. Please replace it with ring until its building experience improves."
exit 1
fi
- name: Build greptime bianry
shell: bash
# `cargo gc` will invoke `cargo build` with specified args
@@ -435,6 +442,13 @@ jobs:
minio: true
kafka: true
values: "with-remote-wal.yaml"
include:
- target: "fuzz_migrate_mito_regions"
mode:
name: "Local WAL"
minio: true
kafka: false
values: "with-minio.yaml"
steps:
- name: Remove unused software
run: |
@@ -523,7 +537,7 @@ jobs:
with:
image-registry: localhost:5001
values-filename: ${{ matrix.mode.values }}
enable-region-failover: true
enable-region-failover: ${{ matrix.mode.kafka }}
- name: Port forward (mysql)
run: |
kubectl port-forward service/my-greptimedb-frontend 4002:4002 -n my-greptimedb&

2676
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -20,6 +20,7 @@ members = [
"src/common/mem-prof",
"src/common/meta",
"src/common/plugins",
"src/common/pprof",
"src/common/procedure",
"src/common/procedure-test",
"src/common/query",
@@ -64,7 +65,7 @@ members = [
resolver = "2"
[workspace.package]
version = "0.9.3"
version = "0.9.5"
edition = "2021"
license = "Apache-2.0"
@@ -90,7 +91,7 @@ aquamarine = "0.3"
arrow = { version = "51.0.0", features = ["prettyprint"] }
arrow-array = { version = "51.0.0", default-features = false, features = ["chrono-tz"] }
arrow-flight = "51.0"
arrow-ipc = { version = "51.0.0", default-features = false, features = ["lz4"] }
arrow-ipc = { version = "51.0.0", default-features = false, features = ["lz4", "zstd"] }
arrow-schema = { version = "51.0", features = ["serde"] }
async-stream = "0.3"
async-trait = "0.1"
@@ -99,7 +100,7 @@ base64 = "0.21"
bigdecimal = "0.4.2"
bitflags = "2.4.1"
bytemuck = "1.12"
bytes = { version = "1.5", features = ["serde"] }
bytes = { version = "1.7", features = ["serde"] }
chrono = { version = "0.4", features = ["serde"] }
clap = { version = "4.4", features = ["derive"] }
config = "0.13.0"
@@ -120,13 +121,13 @@ etcd-client = { version = "0.13" }
fst = "0.4.7"
futures = "0.3"
futures-util = "0.3"
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "973f49cde88a582fb65755cc572ebcf6fb93ccf7" }
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "b4d301184eb0d01fd4d1042fcc7c5dfb54f3c1e3" }
humantime = "2.1"
humantime-serde = "1.1"
itertools = "0.10"
jsonb = { git = "https://github.com/CookiePieWw/jsonb.git", rev = "d0166c130fce903bf6c58643417a3173a6172d31", default-features = false }
jsonb = { git = "https://github.com/datafuselabs/jsonb.git", rev = "46ad50fc71cf75afbf98eec455f7892a6387c1fc", default-features = false }
lazy_static = "1.4"
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "80eb97c24c88af4dd9a86f8bbaf50e741d4eb8cd" }
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "a10facb353b41460eeb98578868ebf19c2084fac" }
mockall = "0.11.4"
moka = "0.12"
notify = "6.1"
@@ -137,15 +138,18 @@ opentelemetry-proto = { version = "0.5", features = [
"metrics",
"trace",
"with-serde",
"logs",
] }
parking_lot = "0.12"
parquet = { version = "51.0.0", default-features = false, features = ["arrow", "async", "object_store"] }
paste = "1.0"
pin-project = "1.0"
prometheus = { version = "0.13.3", features = ["process"] }
promql-parser = { version = "0.4" }
promql-parser = { version = "0.4.1" }
prost = "0.12"
raft-engine = { version = "0.4.1", default-features = false }
rand = "0.8"
ratelimit = "0.9"
regex = "1.8"
regex-automata = { version = "0.4" }
reqwest = { version = "0.12", default-features = false, features = [
@@ -165,11 +169,11 @@ schemars = "0.8"
serde = { version = "1.0", features = ["derive"] }
serde_json = { version = "1.0", features = ["float_roundtrip"] }
serde_with = "3"
shadow-rs = "0.31"
shadow-rs = "0.35"
similar-asserts = "1.6.0"
smallvec = { version = "1", features = ["serde"] }
snafu = "0.8"
sysinfo = "0.30"
similar-asserts = "1.6.0"
# on branch v0.44.x
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "54a267ac89c09b11c0c88934690530807185d3e7", features = [
"visitor",
@@ -208,6 +212,7 @@ common-macro = { path = "src/common/macro" }
common-mem-prof = { path = "src/common/mem-prof" }
common-meta = { path = "src/common/meta" }
common-plugins = { path = "src/common/plugins" }
common-pprof = { path = "src/common/pprof" }
common-procedure = { path = "src/common/procedure" }
common-procedure-test = { path = "src/common/procedure-test" }
common-query = { path = "src/common/query" }
@@ -245,9 +250,18 @@ store-api = { path = "src/store-api" }
substrait = { path = "src/common/substrait" }
table = { path = "src/table" }
[patch.crates-io]
# change all rustls dependencies to use our fork to default to `ring` to make it "just work"
hyper-rustls = { git = "https://github.com/GreptimeTeam/hyper-rustls" }
rustls = { git = "https://github.com/GreptimeTeam/rustls" }
tokio-rustls = { git = "https://github.com/GreptimeTeam/tokio-rustls" }
# This is commented, since we are not using aws-lc-sys, if we need to use it, we need to uncomment this line or use a release after this commit, or it wouldn't compile with gcc < 8.1
# see https://github.com/aws/aws-lc-rs/pull/526
# aws-lc-sys = { git ="https://github.com/aws/aws-lc-rs", rev = "556558441e3494af4b156ae95ebc07ebc2fd38aa" }
[workspace.dependencies.meter-macros]
git = "https://github.com/GreptimeTeam/greptime-meter.git"
rev = "80eb97c24c88af4dd9a86f8bbaf50e741d4eb8cd"
rev = "a10facb353b41460eeb98578868ebf19c2084fac"
[profile.release]
debug = 1

View File

@@ -8,7 +8,7 @@ CARGO_BUILD_OPTS := --locked
IMAGE_REGISTRY ?= docker.io
IMAGE_NAMESPACE ?= greptime
IMAGE_TAG ?= latest
DEV_BUILDER_IMAGE_TAG ?= 2024-06-06-b4b105ad-20240827021230
DEV_BUILDER_IMAGE_TAG ?= 2024-10-19-a5c00e85-20241024184445
BUILDX_MULTI_PLATFORM_BUILD ?= false
BUILDX_BUILDER_NAME ?= gtbuilder
BASE_IMAGE ?= ubuntu

View File

@@ -17,6 +17,7 @@
| `default_timezone` | String | Unset | The default timezone of the server. |
| `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.<br/>By default, it provides services after all regions have been initialized. |
| `init_regions_parallelism` | Integer | `16` | Parallelism of initializing regions. |
| `max_concurrent_queries` | Integer | `0` | The maximum current queries allowed to be executed. Zero means unlimited. |
| `runtime` | -- | -- | The runtime options. |
| `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
| `runtime.compact_rt_size` | Integer | `4` | The number of threads to execute the runtime for global write operations. |
@@ -82,6 +83,7 @@
| `wal.backoff_max` | String | `10s` | The maximum backoff delay.<br/>**It's only used when the provider is `kafka`**. |
| `wal.backoff_base` | Integer | `2` | The exponential backoff rate, i.e. next backoff = base * current backoff.<br/>**It's only used when the provider is `kafka`**. |
| `wal.backoff_deadline` | String | `5mins` | The deadline of retries.<br/>**It's only used when the provider is `kafka`**. |
| `wal.overwrite_entry_start_id` | Bool | `false` | Ignore missing entries during read WAL.<br/>**It's only used when the provider is `kafka`**.<br/><br/>This option ensures that when Kafka messages are deleted, the system<br/>can still successfully replay memtable data without throwing an<br/>out-of-range error.<br/>However, enabling this option might lead to unexpected data loss,<br/>as the system will skip over missing entries instead of treating<br/>them as critical errors. |
| `metadata_store` | -- | -- | Metadata storage options. |
| `metadata_store.file_size` | String | `256MB` | Kv file size in bytes. |
| `metadata_store.purge_threshold` | String | `4GB` | Kv purge threshold. |
@@ -114,7 +116,9 @@
| `region_engine.mito.worker_request_batch_size` | Integer | `64` | Max batch size for a worker to handle requests. |
| `region_engine.mito.manifest_checkpoint_distance` | Integer | `10` | Number of meta action updated to trigger a new checkpoint for the manifest. |
| `region_engine.mito.compress_manifest` | Bool | `false` | Whether to compress manifest and checkpoint file by gzip (default false). |
| `region_engine.mito.max_background_jobs` | Integer | `4` | Max number of running background jobs |
| `region_engine.mito.max_background_flushes` | Integer | Auto | Max number of running background flush jobs (default: 1/2 of cpu cores). |
| `region_engine.mito.max_background_compactions` | Integer | Auto | Max number of running background compaction jobs (default: 1/4 of cpu cores). |
| `region_engine.mito.max_background_purges` | Integer | Auto | Max number of running background purge jobs (default: number of cpu cores). |
| `region_engine.mito.auto_flush_interval` | String | `1h` | Interval to auto flush a region if it has not flushed yet. |
| `region_engine.mito.global_write_buffer_size` | String | Auto | Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB. |
| `region_engine.mito.global_write_buffer_reject_size` | String | Auto | Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size`. |
@@ -160,8 +164,13 @@
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
| `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
| `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
| `logging.slow_query` | -- | -- | The slow query log options. |
| `logging.slow_query.enable` | Bool | `false` | Whether to enable slow query log. |
| `logging.slow_query.threshold` | String | Unset | The threshold of slow query. |
| `logging.slow_query.sample_ratio` | Float | Unset | The sampling ratio of slow query log. The value should be in the range of (0, 1]. |
| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
@@ -246,8 +255,13 @@
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
| `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
| `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
| `logging.slow_query` | -- | -- | The slow query log options. |
| `logging.slow_query.enable` | Bool | `false` | Whether to enable slow query log. |
| `logging.slow_query.threshold` | String | Unset | The threshold of slow query. |
| `logging.slow_query.sample_ratio` | Float | Unset | The sampling ratio of slow query log. The value should be in the range of (0, 1]. |
| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
@@ -311,8 +325,13 @@
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
| `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
| `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
| `logging.slow_query` | -- | -- | The slow query log options. |
| `logging.slow_query.enable` | Bool | `false` | Whether to enable slow query log. |
| `logging.slow_query.threshold` | String | Unset | The threshold of slow query. |
| `logging.slow_query.sample_ratio` | Float | Unset | The sampling ratio of slow query log. The value should be in the range of (0, 1]. |
| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
@@ -335,6 +354,7 @@
| `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.<br/>By default, it provides services after all regions have been initialized. |
| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. |
| `init_regions_parallelism` | Integer | `16` | Parallelism of initializing regions. |
| `max_concurrent_queries` | Integer | `0` | The maximum current queries allowed to be executed. Zero means unlimited. |
| `rpc_addr` | String | Unset | Deprecated, use `grpc.addr` instead. |
| `rpc_hostname` | String | Unset | Deprecated, use `grpc.hostname` instead. |
| `rpc_runtime_size` | Integer | Unset | Deprecated, use `grpc.runtime_size` instead. |
@@ -392,6 +412,7 @@
| `wal.backoff_deadline` | String | `5mins` | The deadline of retries.<br/>**It's only used when the provider is `kafka`**. |
| `wal.create_index` | Bool | `true` | Whether to enable WAL index creation.<br/>**It's only used when the provider is `kafka`**. |
| `wal.dump_index_interval` | String | `60s` | The interval for dumping WAL indexes.<br/>**It's only used when the provider is `kafka`**. |
| `wal.overwrite_entry_start_id` | Bool | `false` | Ignore missing entries during read WAL.<br/>**It's only used when the provider is `kafka`**.<br/><br/>This option ensures that when Kafka messages are deleted, the system<br/>can still successfully replay memtable data without throwing an<br/>out-of-range error.<br/>However, enabling this option might lead to unexpected data loss,<br/>as the system will skip over missing entries instead of treating<br/>them as critical errors. |
| `storage` | -- | -- | The data storage options. |
| `storage.data_home` | String | `/tmp/greptimedb/` | The working home directory. |
| `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
@@ -418,7 +439,9 @@
| `region_engine.mito.worker_request_batch_size` | Integer | `64` | Max batch size for a worker to handle requests. |
| `region_engine.mito.manifest_checkpoint_distance` | Integer | `10` | Number of meta action updated to trigger a new checkpoint for the manifest. |
| `region_engine.mito.compress_manifest` | Bool | `false` | Whether to compress manifest and checkpoint file by gzip (default false). |
| `region_engine.mito.max_background_jobs` | Integer | `4` | Max number of running background jobs |
| `region_engine.mito.max_background_flushes` | Integer | Auto | Max number of running background flush jobs (default: 1/2 of cpu cores). |
| `region_engine.mito.max_background_compactions` | Integer | Auto | Max number of running background compaction jobs (default: 1/4 of cpu cores). |
| `region_engine.mito.max_background_purges` | Integer | Auto | Max number of running background purge jobs (default: number of cpu cores). |
| `region_engine.mito.auto_flush_interval` | String | `1h` | Interval to auto flush a region if it has not flushed yet. |
| `region_engine.mito.global_write_buffer_size` | String | Auto | Global write buffer size for all regions. If not set, it's default to 1/8 of OS memory with a max limitation of 1GB. |
| `region_engine.mito.global_write_buffer_reject_size` | String | Auto | Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size` |
@@ -462,8 +485,13 @@
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
| `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
| `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
| `logging.slow_query` | -- | -- | The slow query log options. |
| `logging.slow_query.enable` | Bool | `false` | Whether to enable slow query log. |
| `logging.slow_query.threshold` | String | Unset | The threshold of slow query. |
| `logging.slow_query.sample_ratio` | Float | Unset | The sampling ratio of slow query log. The value should be in the range of (0, 1]. |
| `export_metrics` | -- | -- | The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.<br/>This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape. |
| `export_metrics.enable` | Bool | `false` | whether enable export metrics. |
| `export_metrics.write_interval` | String | `30s` | The interval of export metrics. |
@@ -508,7 +536,12 @@
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
| `logging.append_stdout` | Bool | `true` | Whether to append logs to stdout. |
| `logging.log_format` | String | `text` | The log format. Can be `text`/`json`. |
| `logging.max_log_files` | Integer | `720` | The maximum amount of log files. |
| `logging.tracing_sample_ratio` | -- | -- | The percentage of tracing will be sampled and exported.<br/>Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.<br/>ratio > 1 are treated as 1. Fractions < 0 are treated as 0 |
| `logging.tracing_sample_ratio.default_ratio` | Float | `1.0` | -- |
| `logging.slow_query` | -- | -- | The slow query log options. |
| `logging.slow_query.enable` | Bool | `false` | Whether to enable slow query log. |
| `logging.slow_query.threshold` | String | Unset | The threshold of slow query. |
| `logging.slow_query.sample_ratio` | Float | Unset | The sampling ratio of slow query log. The value should be in the range of (0, 1]. |
| `tracing` | -- | -- | The tracing options. Only effect when compiled with `tokio-console` feature. |
| `tracing.tokio_console_addr` | String | Unset | The tokio console address. |

View File

@@ -19,6 +19,9 @@ enable_telemetry = true
## Parallelism of initializing regions.
init_regions_parallelism = 16
## The maximum current queries allowed to be executed. Zero means unlimited.
max_concurrent_queries = 0
## Deprecated, use `grpc.addr` instead.
## @toml2docs:none-default
rpc_addr = "127.0.0.1:3001"
@@ -210,6 +213,17 @@ create_index = true
## **It's only used when the provider is `kafka`**.
dump_index_interval = "60s"
## Ignore missing entries during read WAL.
## **It's only used when the provider is `kafka`**.
##
## This option ensures that when Kafka messages are deleted, the system
## can still successfully replay memtable data without throwing an
## out-of-range error.
## However, enabling this option might lead to unexpected data loss,
## as the system will skip over missing entries instead of treating
## them as critical errors.
overwrite_entry_start_id = false
# The Kafka SASL configuration.
# **It's only used when the provider is `kafka`**.
# Available SASL mechanisms:
@@ -402,8 +416,17 @@ manifest_checkpoint_distance = 10
## Whether to compress manifest and checkpoint file by gzip (default false).
compress_manifest = false
## Max number of running background jobs
max_background_jobs = 4
## Max number of running background flush jobs (default: 1/2 of cpu cores).
## @toml2docs:none-default="Auto"
#+ max_background_flushes = 4
## Max number of running background compaction jobs (default: 1/4 of cpu cores).
## @toml2docs:none-default="Auto"
#+ max_background_compactions = 2
## Max number of running background purge jobs (default: number of cpu cores).
## @toml2docs:none-default="Auto"
#+ max_background_purges = 8
## Interval to auto flush a region if it has not flushed yet.
auto_flush_interval = "1h"
@@ -577,12 +600,28 @@ append_stdout = true
## The log format. Can be `text`/`json`.
log_format = "text"
## The maximum amount of log files.
max_log_files = 720
## The percentage of tracing will be sampled and exported.
## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
[logging.tracing_sample_ratio]
default_ratio = 1.0
## The slow query log options.
[logging.slow_query]
## Whether to enable slow query log.
enable = false
## The threshold of slow query.
## @toml2docs:none-default
threshold = "10s"
## The sampling ratio of slow query log. The value should be in the range of (0, 1].
## @toml2docs:none-default
sample_ratio = 1.0
## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
[export_metrics]

View File

@@ -78,12 +78,28 @@ append_stdout = true
## The log format. Can be `text`/`json`.
log_format = "text"
## The maximum amount of log files.
max_log_files = 720
## The percentage of tracing will be sampled and exported.
## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
[logging.tracing_sample_ratio]
default_ratio = 1.0
## The slow query log options.
[logging.slow_query]
## Whether to enable slow query log.
enable = false
## The threshold of slow query.
## @toml2docs:none-default
threshold = "10s"
## The sampling ratio of slow query log. The value should be in the range of (0, 1].
## @toml2docs:none-default
sample_ratio = 1.0
## The tracing options. Only effect when compiled with `tokio-console` feature.
[tracing]
## The tokio console address.

View File

@@ -185,12 +185,28 @@ append_stdout = true
## The log format. Can be `text`/`json`.
log_format = "text"
## The maximum amount of log files.
max_log_files = 720
## The percentage of tracing will be sampled and exported.
## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
[logging.tracing_sample_ratio]
default_ratio = 1.0
## The slow query log options.
[logging.slow_query]
## Whether to enable slow query log.
enable = false
## The threshold of slow query.
## @toml2docs:none-default
threshold = "10s"
## The sampling ratio of slow query log. The value should be in the range of (0, 1].
## @toml2docs:none-default
sample_ratio = 1.0
## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
[export_metrics]

View File

@@ -172,12 +172,28 @@ append_stdout = true
## The log format. Can be `text`/`json`.
log_format = "text"
## The maximum amount of log files.
max_log_files = 720
## The percentage of tracing will be sampled and exported.
## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
[logging.tracing_sample_ratio]
default_ratio = 1.0
## The slow query log options.
[logging.slow_query]
## Whether to enable slow query log.
enable = false
## The threshold of slow query.
## @toml2docs:none-default
threshold = "10s"
## The sampling ratio of slow query log. The value should be in the range of (0, 1].
## @toml2docs:none-default
sample_ratio = 1.0
## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
[export_metrics]

View File

@@ -15,6 +15,9 @@ init_regions_in_background = false
## Parallelism of initializing regions.
init_regions_parallelism = 16
## The maximum current queries allowed to be executed. Zero means unlimited.
max_concurrent_queries = 0
## The runtime options.
#+ [runtime]
## The number of threads to execute the runtime for global read operations.
@@ -234,6 +237,17 @@ backoff_base = 2
## **It's only used when the provider is `kafka`**.
backoff_deadline = "5mins"
## Ignore missing entries during read WAL.
## **It's only used when the provider is `kafka`**.
##
## This option ensures that when Kafka messages are deleted, the system
## can still successfully replay memtable data without throwing an
## out-of-range error.
## However, enabling this option might lead to unexpected data loss,
## as the system will skip over missing entries instead of treating
## them as critical errors.
overwrite_entry_start_id = false
# The Kafka SASL configuration.
# **It's only used when the provider is `kafka`**.
# Available SASL mechanisms:
@@ -440,8 +454,17 @@ manifest_checkpoint_distance = 10
## Whether to compress manifest and checkpoint file by gzip (default false).
compress_manifest = false
## Max number of running background jobs
max_background_jobs = 4
## Max number of running background flush jobs (default: 1/2 of cpu cores).
## @toml2docs:none-default="Auto"
#+ max_background_flushes = 4
## Max number of running background compaction jobs (default: 1/4 of cpu cores).
## @toml2docs:none-default="Auto"
#+ max_background_compactions = 2
## Max number of running background purge jobs (default: number of cpu cores).
## @toml2docs:none-default="Auto"
#+ max_background_purges = 8
## Interval to auto flush a region if it has not flushed yet.
auto_flush_interval = "1h"
@@ -621,12 +644,28 @@ append_stdout = true
## The log format. Can be `text`/`json`.
log_format = "text"
## The maximum amount of log files.
max_log_files = 720
## The percentage of tracing will be sampled and exported.
## Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1.
## ratio > 1 are treated as 1. Fractions < 0 are treated as 0
[logging.tracing_sample_ratio]
default_ratio = 1.0
## The slow query log options.
[logging.slow_query]
## Whether to enable slow query log.
enable = false
## The threshold of slow query.
## @toml2docs:none-default
threshold = "10s"
## The sampling ratio of slow query log. The value should be in the range of (0, 1].
## @toml2docs:none-default
sample_ratio = 1.0
## The datanode can export its metrics and send to Prometheus compatible service (e.g. send to `greptimedb` itself) from remote-write API.
## This is only used for `greptimedb` to export its own metrics internally. It's different from prometheus scrape.
[export_metrics]

View File

@@ -0,0 +1,50 @@
#!/bin/bash
set -euxo pipefail
cd "$(mktemp -d)"
# Fix version to v1.6.6, this is different than the latest version in original install script in
# https://raw.githubusercontent.com/cargo-bins/cargo-binstall/main/install-from-binstall-release.sh
base_url="https://github.com/cargo-bins/cargo-binstall/releases/download/v1.6.6/cargo-binstall-"
os="$(uname -s)"
if [ "$os" == "Darwin" ]; then
url="${base_url}universal-apple-darwin.zip"
curl -LO --proto '=https' --tlsv1.2 -sSf "$url"
unzip cargo-binstall-universal-apple-darwin.zip
elif [ "$os" == "Linux" ]; then
machine="$(uname -m)"
if [ "$machine" == "armv7l" ]; then
machine="armv7"
fi
target="${machine}-unknown-linux-musl"
if [ "$machine" == "armv7" ]; then
target="${target}eabihf"
fi
url="${base_url}${target}.tgz"
curl -L --proto '=https' --tlsv1.2 -sSf "$url" | tar -xvzf -
elif [ "${OS-}" = "Windows_NT" ]; then
machine="$(uname -m)"
target="${machine}-pc-windows-msvc"
url="${base_url}${target}.zip"
curl -LO --proto '=https' --tlsv1.2 -sSf "$url"
unzip "cargo-binstall-${target}.zip"
else
echo "Unsupported OS ${os}"
exit 1
fi
./cargo-binstall -y --force cargo-binstall
CARGO_HOME="${CARGO_HOME:-$HOME/.cargo}"
if ! [[ ":$PATH:" == *":$CARGO_HOME/bin:"* ]]; then
if [ -n "${CI:-}" ] && [ -n "${GITHUB_PATH:-}" ]; then
echo "$CARGO_HOME/bin" >> "$GITHUB_PATH"
else
echo
printf "\033[0;31mYour path is missing %s, you might want to add it.\033[0m\n" "$CARGO_HOME/bin"
echo
fi
fi

View File

@@ -32,7 +32,9 @@ RUN rustup toolchain install ${RUST_TOOLCHAIN}
# Install cargo-binstall with a specific version to adapt the current rust toolchain.
# Note: if we use the latest version, we may encounter the following `use of unstable library feature 'io_error_downcast'` error.
RUN cargo install cargo-binstall --version 1.6.6 --locked
# compile from source take too long, so we use the precompiled binary instead
COPY $DOCKER_BUILD_ROOT/docker/dev-builder/binstall/pull_binstall.sh /usr/local/bin/pull_binstall.sh
RUN chmod +x /usr/local/bin/pull_binstall.sh && /usr/local/bin/pull_binstall.sh
# Install nextest.
RUN cargo binstall cargo-nextest --no-confirm

View File

@@ -24,6 +24,15 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
python3.10 \
python3.10-dev
# https://github.com/GreptimeTeam/greptimedb/actions/runs/10935485852/job/30357457188#step:3:7106
# `aws-lc-sys` require gcc >= 10.3.0 to work, hence alias to use gcc-10
RUN apt-get remove -y gcc-9 g++-9 cpp-9 && \
apt-get install -y gcc-10 g++-10 cpp-10 make cmake && \
ln -sf /usr/bin/gcc-10 /usr/bin/gcc && ln -sf /usr/bin/g++-10 /usr/bin/g++ && \
ln -sf /usr/bin/gcc-10 /usr/bin/cc && \
ln -sf /usr/bin/g++-10 /usr/bin/cpp && ln -sf /usr/bin/g++-10 /usr/bin/c++ && \
cc --version && gcc --version && g++ --version && cpp --version && c++ --version
# Remove Python 3.8 and install pip.
RUN apt-get -y purge python3.8 && \
apt-get -y autoremove && \
@@ -57,7 +66,9 @@ RUN rustup toolchain install ${RUST_TOOLCHAIN}
# Install cargo-binstall with a specific version to adapt the current rust toolchain.
# Note: if we use the latest version, we may encounter the following `use of unstable library feature 'io_error_downcast'` error.
RUN cargo install cargo-binstall --version 1.6.6 --locked
# compile from source take too long, so we use the precompiled binary instead
COPY $DOCKER_BUILD_ROOT/docker/dev-builder/binstall/pull_binstall.sh /usr/local/bin/pull_binstall.sh
RUN chmod +x /usr/local/bin/pull_binstall.sh && /usr/local/bin/pull_binstall.sh
# Install nextest.
RUN cargo binstall cargo-nextest --no-confirm

View File

@@ -48,4 +48,4 @@ Please refer to [SQL query](./query.sql) for GreptimeDB and Clickhouse, and [que
## Addition
- You can tune GreptimeDB's configuration to get better performance.
- You can setup GreptimeDB to use S3 as storage, see [here](https://docs.greptime.com/user-guide/operations/configuration/#storage-options).
- You can setup GreptimeDB to use S3 as storage, see [here](https://docs.greptime.com/user-guide/deployments/configuration#storage-options).

View File

@@ -1,15 +1,9 @@
# Profiling CPU
## Build GreptimeDB with `pprof` feature
```bash
cargo build --features=pprof
```
## HTTP API
Sample at 99 Hertz, for 5 seconds, output report in [protobuf format](https://github.com/google/pprof/blob/master/proto/profile.proto).
```bash
curl -s '0:4000/v1/prof/cpu' > /tmp/pprof.out
curl -s '0:4000/debug/prof/cpu' > /tmp/pprof.out
```
Then you can use `pprof` command with the protobuf file.
@@ -19,10 +13,10 @@ go tool pprof -top /tmp/pprof.out
Sample at 99 Hertz, for 60 seconds, output report in flamegraph format.
```bash
curl -s '0:4000/v1/prof/cpu?seconds=60&output=flamegraph' > /tmp/pprof.svg
curl -s '0:4000/debug/prof/cpu?seconds=60&output=flamegraph' > /tmp/pprof.svg
```
Sample at 49 Hertz, for 10 seconds, output report in text format.
```bash
curl -s '0:4000/v1/prof/cpu?seconds=10&frequency=49&output=text' > /tmp/pprof.txt
curl -s '0:4000/debug/prof/cpu?seconds=10&frequency=49&output=text' > /tmp/pprof.txt
```

View File

@@ -12,16 +12,10 @@ brew install jemalloc
sudo apt install libjemalloc-dev
```
### [flamegraph](https://github.com/brendangregg/FlameGraph)
### [flamegraph](https://github.com/brendangregg/FlameGraph)
```bash
curl https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl > ./flamegraph.pl
```
### Build GreptimeDB with `mem-prof` feature.
```bash
cargo build --features=mem-prof
curl https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl > ./flamegraph.pl
```
## Profiling
@@ -35,7 +29,7 @@ MALLOC_CONF=prof:true,lg_prof_interval:28 ./target/debug/greptime standalone sta
Dump memory profiling data through HTTP API:
```bash
curl localhost:4000/v1/prof/mem > greptime.hprof
curl localhost:4000/debug/prof/mem > greptime.hprof
```
You can periodically dump profiling data and compare them to find the delta memory usage.
@@ -45,6 +39,9 @@ You can periodically dump profiling data and compare them to find the delta memo
To create flamegraph according to dumped profiling data:
```bash
jeprof --svg <path_to_greptimedb_binary> --base=<baseline_prof> <profile_data> > output.svg
```
sudo apt install -y libjemalloc-dev
jeprof <path_to_greptime_binary> <profile_data> --collapse | ./flamegraph.pl > mem-prof.svg
jeprof <path_to_greptime_binary> --base <baseline_prof> <profile_data> --collapse | ./flamegraph.pl > output.svg
```

Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

After

Width:  |  Height:  |  Size: 36 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 18 KiB

After

Width:  |  Height:  |  Size: 25 KiB

View File

@@ -0,0 +1,197 @@
---
Feature Name: Json Datatype
Tracking Issue: https://github.com/GreptimeTeam/greptimedb/issues/4230
Date: 2024-8-6
Author: "Yuhan Wang <profsyb@gmail.com>"
---
# Summary
This RFC proposes a method for storing and querying JSON data in the database.
# Motivation
JSON is widely used across various scenarios. Direct support for writing and querying JSON can significantly enhance the database's flexibility.
# Details
## Storage and Query
GreptimeDB's type system is built on Arrow/DataFusion, where each data type in GreptimeDB corresponds to a data type in Arrow/DataFusion. The proposed JSON type will be implemented on top of the existing `Binary` type, leveraging the current `datatype::value::Value` and `datatype::vectors::BinaryVector` implementations, utilizing the JSONB format as the encoding of JSON data. JSON data is stored and processed similarly to binary data within the storage layer and query engine.
This approach brings problems when dealing with insertions and queries of JSON columns.
## Insertion
Users commonly write JSON data as strings. Thus we need to make conversions between string and JSONB. There are 2 ways to do this:
1. MySQL and PostgreSQL servers provide auto-conversions between strings and JSONB. When a string is inserted into a JSON column, the server will try to parse the string as JSON and convert it to JSONB. The non-JSON strings will be rejected.
2. A function `parse_json` is provided to convert string to JSONB. If the string is not a valid JSON string, the function will return an error.
For example, in MySQL client:
```SQL
CREATE TABLE IF NOT EXISTS test (
ts TIMESTAMP TIME INDEX,
a INT,
b JSON
);
INSERT INTO test VALUES(
0,
0,
'{
"name": "jHl2oDDnPc1i2OzlP5Y",
"timestamp": "2024-07-25T04:33:11.369386Z",
"attributes": { "event_attributes": 48.28667 }
}'
);
INSERT INTO test VALUES(
0,
0,
parse_json('{
"name": "jHl2oDDnPc1i2OzlP5Y",
"timestamp": "2024-07-25T04:33:11.369386Z",
"attributes": { "event_attributes": 48.28667 }
}')
);
```
Are both valid.
The dataflow of the insertion process is as follows:
```
Insert JSON strings directly through client:
Parse Insert
String(Serialized JSON)┌──────────┐Arrow Binary(JSONB)┌──────┐Arrow Binary(JSONB)
Client ---------------------->│ Server │------------------>│ Mito │------------------> Storage
└──────────┘ └──────┘
(Server identifies JSON type and performs auto-conversion)
Insert JSON strings through parse_json function:
Parse Insert
String(Serialized JSON)┌──────────┐String(Serialized JSON)┌─────┐Arrow Binary(JSONB)┌──────┐Arrow Binary(JSONB)
Client ---------------------->│ Server │---------------------->│ UDF │------------------>│ Mito │------------------> Storage
└──────────┘ └─────┘ └──────┘
(Conversion is performed by UDF inside Query Engine)
```
Servers identify JSON column through column schema and perform auto-conversions. But when using prepared statements and binding parameters, the corresponding cached plans in datafusion generated by prepared statements cannot identify JSON columns. Under this circumstance, the servers identify JSON columns through the given parameters and perform auto-conversions.
The following is an example of inserting JSON data through prepared statements:
```Rust
sqlx::query(
"create table test(ts timestamp time index, j json)",
)
.execute(&pool)
.await
.unwrap();
let json = serde_json::json!({
"code": 200,
"success": true,
"payload": {
"features": [
"serde",
"json"
],
"homepage": null
}
});
// Valid, can identify serde_json::Value as JSON type
sqlx::query("insert into test values($1, $2)")
.bind(i)
.bind(json)
.execute(&pool)
.await
.unwrap();
// Invalid, cannot identify String as JSON type
sqlx::query("insert into test values($1, $2)")
.bind(i)
.bind(json.to_string())
.execute(&pool)
.await
.unwrap();
```
## Query
Correspondingly, users prefer to display JSON data as strings. Thus we need to make conversions between JSON data and strings before presenting JSON data. There are also 2 ways to do this: auto-conversions on MySQL and PostgreSQL servers, and function `json_to_string`.
For example, in MySQL client:
```SQL
SELECT b FROM test;
SELECT json_to_string(b) FROM test;
```
Will both return the JSON as human-readable strings.
Specifically, to perform auto-conversions, we attach a message to JSON data in the `metadata` of `Field` in Arrow/Datafusion schema when scanning a JSON column. Frontend servers could identify JSON data and convert it to strings.
The dataflow of the query process is as follows:
```
Query directly through client:
Decode Scan
String(Serialized JSON)┌──────────┐Arrow Binary(JSONB)┌──────────────┐Arrow Binary(JSONB)
Client <----------------------│ Server │<------------------│ Query Engine │<----------------- Storage
└──────────┘ └──────────────┘
(Server identifies JSON type and performs auto-conversion based on column metadata)
Query through json_to_string function:
Scan & Decode
String(Serialized JSON)┌──────────┐String(Serialized JSON)┌──────────────┐Arrow Binary(JSONB)
Client <----------------------│ Server │<----------------------│ Query Engine │<----------------- Storage
└──────────┘ └──────────────┘
(Conversion is performed by UDF inside Query Engine)
```
However, if a function uses JSON type as its return type, the metadata method mentioned above is not applicable. Thus the functions of JSON type should specify the return type explicitly instead of returning a JSON type, such as `json_get_int` and `json_get_float` which return corresponding data of `INT` and `FLOAT` type respectively.
## Functions
Similar to the common JSON type, JSON data can be queried with functions.
For example:
```SQL
CREATE TABLE IF NOT EXISTS test (
ts TIMESTAMP TIME INDEX,
a INT,
b JSON
);
INSERT INTO test VALUES(
0,
0,
'{
"name": "jHl2oDDnPc1i2OzlP5Y",
"timestamp": "2024-07-25T04:33:11.369386Z",
"attributes": { "event_attributes": 48.28667 }
}'
);
SELECT json_get_string(b, 'name') FROM test;
+---------------------+
| b.name |
+---------------------+
| jHl2oDDnPc1i2OzlP5Y |
+---------------------+
SELECT json_get_float(b, 'attributes.event_attributes') FROM test;
+--------------------------------+
| b.attributes.event_attributes |
+--------------------------------+
| 48.28667 |
+--------------------------------+
```
And more functions can be added in the future.
# Drawbacks
As a general purpose JSON data type, JSONB may not be as efficient as specialized data types for specific scenarios.
The auto-conversion mechanism is not supported in all scenarios. We need to find workarounds for these scenarios.
# Alternatives
Extract and flatten JSON schema to store in a structured format through pipeline. For nested data, we can provide nested types like `STRUCT` or `ARRAY`.

View File

@@ -409,7 +409,39 @@
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"fieldMinMax": false,
"mappings": [],
@@ -438,18 +470,16 @@
},
"id": 27,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": ["lastNotNull"],
"fields": "",
"values": false
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"text": {},
"textMode": "auto",
"wideLayout": true
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "10.2.3",
"targets": [
@@ -467,7 +497,7 @@
}
],
"title": "CPU",
"type": "stat"
"type": "timeseries"
},
{
"datasource": {
@@ -477,7 +507,39 @@
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"decimals": 0,
"fieldMinMax": false,
@@ -503,18 +565,16 @@
},
"id": 28,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": ["lastNotNull"],
"fields": "",
"values": false
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"text": {},
"textMode": "auto",
"wideLayout": true
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "10.2.3",
"targets": [
@@ -532,7 +592,7 @@
}
],
"title": "Memory",
"type": "stat"
"type": "timeseries"
},
{
"collapsed": false,
@@ -3335,6 +3395,6 @@
"timezone": "",
"title": "GreptimeDB",
"uid": "e7097237-669b-4f8d-b751-13067afbfb68",
"version": 15,
"version": 16,
"weekStart": ""
}

View File

@@ -1,3 +1,2 @@
[toolchain]
channel = "nightly-2024-06-06"
channel = "nightly-2024-10-19"

View File

@@ -17,10 +17,11 @@ use std::sync::Arc;
use common_base::BitVec;
use common_decimal::decimal128::{DECIMAL128_DEFAULT_SCALE, DECIMAL128_MAX_PRECISION};
use common_decimal::Decimal128;
use common_time::interval::IntervalUnit;
use common_time::time::Time;
use common_time::timestamp::TimeUnit;
use common_time::{Date, DateTime, Interval, Timestamp};
use common_time::{
Date, DateTime, IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth, Timestamp,
};
use datatypes::prelude::{ConcreteDataType, ValueRef};
use datatypes::scalars::ScalarVector;
use datatypes::types::{
@@ -456,13 +457,11 @@ pub fn push_vals(column: &mut Column, origin_count: usize, vector: VectorRef) {
TimeUnit::Microsecond => values.time_microsecond_values.push(val.value()),
TimeUnit::Nanosecond => values.time_nanosecond_values.push(val.value()),
},
Value::Interval(val) => match val.unit() {
IntervalUnit::YearMonth => values.interval_year_month_values.push(val.to_i32()),
IntervalUnit::DayTime => values.interval_day_time_values.push(val.to_i64()),
IntervalUnit::MonthDayNano => values
.interval_month_day_nano_values
.push(convert_i128_to_interval(val.to_i128())),
},
Value::IntervalYearMonth(val) => values.interval_year_month_values.push(val.to_i32()),
Value::IntervalDayTime(val) => values.interval_day_time_values.push(val.to_i64()),
Value::IntervalMonthDayNano(val) => values
.interval_month_day_nano_values
.push(convert_month_day_nano_to_pb(val)),
Value::Decimal128(val) => values.decimal128_values.push(convert_to_pb_decimal128(val)),
Value::List(_) | Value::Duration(_) => unreachable!(),
});
@@ -507,14 +506,12 @@ fn ddl_request_type(request: &DdlRequest) -> &'static str {
}
}
/// Converts an i128 value to google protobuf type [IntervalMonthDayNano].
pub fn convert_i128_to_interval(v: i128) -> v1::IntervalMonthDayNano {
let interval = Interval::from_i128(v);
let (months, days, nanoseconds) = interval.to_month_day_nano();
/// Converts an interval to google protobuf type [IntervalMonthDayNano].
pub fn convert_month_day_nano_to_pb(v: IntervalMonthDayNano) -> v1::IntervalMonthDayNano {
v1::IntervalMonthDayNano {
months,
days,
nanoseconds,
months: v.months,
days: v.days,
nanoseconds: v.nanoseconds,
}
}
@@ -562,11 +559,15 @@ pub fn pb_value_to_value_ref<'a>(
ValueData::TimeMillisecondValue(t) => ValueRef::Time(Time::new_millisecond(*t)),
ValueData::TimeMicrosecondValue(t) => ValueRef::Time(Time::new_microsecond(*t)),
ValueData::TimeNanosecondValue(t) => ValueRef::Time(Time::new_nanosecond(*t)),
ValueData::IntervalYearMonthValue(v) => ValueRef::Interval(Interval::from_i32(*v)),
ValueData::IntervalDayTimeValue(v) => ValueRef::Interval(Interval::from_i64(*v)),
ValueData::IntervalYearMonthValue(v) => {
ValueRef::IntervalYearMonth(IntervalYearMonth::from_i32(*v))
}
ValueData::IntervalDayTimeValue(v) => {
ValueRef::IntervalDayTime(IntervalDayTime::from_i64(*v))
}
ValueData::IntervalMonthDayNanoValue(v) => {
let interval = Interval::from_month_day_nano(v.months, v.days, v.nanoseconds);
ValueRef::Interval(interval)
let interval = IntervalMonthDayNano::new(v.months, v.days, v.nanoseconds);
ValueRef::IntervalMonthDayNano(interval)
}
ValueData::Decimal128Value(v) => {
// get precision and scale from datatype_extension
@@ -657,7 +658,7 @@ pub fn pb_values_to_vector_ref(data_type: &ConcreteDataType, values: Values) ->
IntervalType::MonthDayNano(_) => {
Arc::new(IntervalMonthDayNanoVector::from_iter_values(
values.interval_month_day_nano_values.iter().map(|x| {
Interval::from_month_day_nano(x.months, x.days, x.nanoseconds).to_i128()
IntervalMonthDayNano::new(x.months, x.days, x.nanoseconds).to_i128()
}),
))
}
@@ -802,18 +803,18 @@ pub fn pb_values_to_values(data_type: &ConcreteDataType, values: Values) -> Vec<
ConcreteDataType::Interval(IntervalType::YearMonth(_)) => values
.interval_year_month_values
.into_iter()
.map(|v| Value::Interval(Interval::from_i32(v)))
.map(|v| Value::IntervalYearMonth(IntervalYearMonth::from_i32(v)))
.collect(),
ConcreteDataType::Interval(IntervalType::DayTime(_)) => values
.interval_day_time_values
.into_iter()
.map(|v| Value::Interval(Interval::from_i64(v)))
.map(|v| Value::IntervalDayTime(IntervalDayTime::from_i64(v)))
.collect(),
ConcreteDataType::Interval(IntervalType::MonthDayNano(_)) => values
.interval_month_day_nano_values
.into_iter()
.map(|v| {
Value::Interval(Interval::from_month_day_nano(
Value::IntervalMonthDayNano(IntervalMonthDayNano::new(
v.months,
v.days,
v.nanoseconds,
@@ -941,18 +942,16 @@ pub fn to_proto_value(value: Value) -> Option<v1::Value> {
value_data: Some(ValueData::TimeNanosecondValue(v.value())),
},
},
Value::Interval(v) => match v.unit() {
IntervalUnit::YearMonth => v1::Value {
value_data: Some(ValueData::IntervalYearMonthValue(v.to_i32())),
},
IntervalUnit::DayTime => v1::Value {
value_data: Some(ValueData::IntervalDayTimeValue(v.to_i64())),
},
IntervalUnit::MonthDayNano => v1::Value {
value_data: Some(ValueData::IntervalMonthDayNanoValue(
convert_i128_to_interval(v.to_i128()),
)),
},
Value::IntervalYearMonth(v) => v1::Value {
value_data: Some(ValueData::IntervalYearMonthValue(v.to_i32())),
},
Value::IntervalDayTime(v) => v1::Value {
value_data: Some(ValueData::IntervalDayTimeValue(v.to_i64())),
},
Value::IntervalMonthDayNano(v) => v1::Value {
value_data: Some(ValueData::IntervalMonthDayNanoValue(
convert_month_day_nano_to_pb(v),
)),
},
Value::Decimal128(v) => v1::Value {
value_data: Some(ValueData::Decimal128Value(convert_to_pb_decimal128(v))),
@@ -1044,13 +1043,11 @@ pub fn value_to_grpc_value(value: Value) -> GrpcValue {
TimeUnit::Microsecond => ValueData::TimeMicrosecondValue(v.value()),
TimeUnit::Nanosecond => ValueData::TimeNanosecondValue(v.value()),
}),
Value::Interval(v) => Some(match v.unit() {
IntervalUnit::YearMonth => ValueData::IntervalYearMonthValue(v.to_i32()),
IntervalUnit::DayTime => ValueData::IntervalDayTimeValue(v.to_i64()),
IntervalUnit::MonthDayNano => {
ValueData::IntervalMonthDayNanoValue(convert_i128_to_interval(v.to_i128()))
}
}),
Value::IntervalYearMonth(v) => Some(ValueData::IntervalYearMonthValue(v.to_i32())),
Value::IntervalDayTime(v) => Some(ValueData::IntervalDayTimeValue(v.to_i64())),
Value::IntervalMonthDayNano(v) => Some(ValueData::IntervalMonthDayNanoValue(
convert_month_day_nano_to_pb(v),
)),
Value::Decimal128(v) => Some(ValueData::Decimal128Value(convert_to_pb_decimal128(v))),
Value::List(_) | Value::Duration(_) => unreachable!(),
},
@@ -1061,6 +1058,7 @@ pub fn value_to_grpc_value(value: Value) -> GrpcValue {
mod tests {
use std::sync::Arc;
use common_time::interval::IntervalUnit;
use datatypes::types::{
Int32Type, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalYearMonthType,
TimeMillisecondType, TimeSecondType, TimestampMillisecondType, TimestampSecondType,
@@ -1506,11 +1504,11 @@ mod tests {
#[test]
fn test_convert_i128_to_interval() {
let i128_val = 3000;
let interval = convert_i128_to_interval(i128_val);
let i128_val = 3;
let interval = convert_month_day_nano_to_pb(IntervalMonthDayNano::from_i128(i128_val));
assert_eq!(interval.months, 0);
assert_eq!(interval.days, 0);
assert_eq!(interval.nanoseconds, 3000);
assert_eq!(interval.nanoseconds, 3);
}
#[test]
@@ -1590,9 +1588,9 @@ mod tests {
},
);
let expect = vec![
Value::Interval(Interval::from_year_month(1_i32)),
Value::Interval(Interval::from_year_month(2_i32)),
Value::Interval(Interval::from_year_month(3_i32)),
Value::IntervalYearMonth(IntervalYearMonth::new(1_i32)),
Value::IntervalYearMonth(IntervalYearMonth::new(2_i32)),
Value::IntervalYearMonth(IntervalYearMonth::new(3_i32)),
];
assert_eq!(expect, actual);
@@ -1605,9 +1603,9 @@ mod tests {
},
);
let expect = vec![
Value::Interval(Interval::from_i64(1_i64)),
Value::Interval(Interval::from_i64(2_i64)),
Value::Interval(Interval::from_i64(3_i64)),
Value::IntervalDayTime(IntervalDayTime::from_i64(1_i64)),
Value::IntervalDayTime(IntervalDayTime::from_i64(2_i64)),
Value::IntervalDayTime(IntervalDayTime::from_i64(3_i64)),
];
assert_eq!(expect, actual);
@@ -1636,9 +1634,9 @@ mod tests {
},
);
let expect = vec![
Value::Interval(Interval::from_month_day_nano(1, 2, 3)),
Value::Interval(Interval::from_month_day_nano(5, 6, 7)),
Value::Interval(Interval::from_month_day_nano(9, 10, 11)),
Value::IntervalMonthDayNano(IntervalMonthDayNano::new(1, 2, 3)),
Value::IntervalMonthDayNano(IntervalMonthDayNano::new(5, 6, 7)),
Value::IntervalMonthDayNano(IntervalMonthDayNano::new(9, 10, 11)),
];
assert_eq!(expect, actual);
}

View File

@@ -75,6 +75,16 @@ pub enum Password<'a> {
PgMD5(HashedPassword<'a>, Salt<'a>),
}
impl Password<'_> {
pub fn r#type(&self) -> &str {
match self {
Password::PlainText(_) => "plain_text",
Password::MysqlNativePassword(_, _) => "mysql_native_password",
Password::PgMD5(_, _) => "pg_md5",
}
}
}
pub fn auth_mysql(
auth_data: HashedPassword,
salt: Salt,

View File

@@ -89,7 +89,7 @@ impl ErrorExt for Error {
Error::FileWatch { .. } => StatusCode::InvalidArguments,
Error::InternalState { .. } => StatusCode::Unexpected,
Error::Io { .. } => StatusCode::StorageUnavailable,
Error::AuthBackend { .. } => StatusCode::Internal,
Error::AuthBackend { source, .. } => source.status_code(),
Error::UserNotFound { .. } => StatusCode::UserNotFound,
Error::UnsupportedPasswordType { .. } => StatusCode::UnsupportedPasswordType,

View File

@@ -57,6 +57,11 @@ pub trait UserProvider: Send + Sync {
self.authorize(catalog, schema, &user_info).await?;
Ok(user_info)
}
/// Returns whether this user provider implementation is backed by an external system.
fn external(&self) -> bool {
false
}
}
fn load_credential_from_file(filepath: &str) -> Result<Option<HashMap<String, Vec<u8>>>> {

View File

@@ -33,7 +33,7 @@ impl StaticUserProvider {
value: value.to_string(),
msg: "StaticUserProviderOption must be in format `<option>:<value>`",
})?;
return match mode {
match mode {
"file" => {
let users = load_credential_from_file(content)?
.context(InvalidConfigSnafu {
@@ -58,7 +58,7 @@ impl StaticUserProvider {
msg: "StaticUserProviderOption must be in format `file:<path>` or `cmd:<values>`",
}
.fail(),
};
}
}
}

View File

@@ -22,8 +22,10 @@ common-config.workspace = true
common-error.workspace = true
common-macro.workspace = true
common-meta.workspace = true
common-procedure.workspace = true
common-query.workspace = true
common-recordbatch.workspace = true
common-runtime.workspace = true
common-telemetry.workspace = true
common-time.workspace = true
common-version.workspace = true
@@ -48,6 +50,7 @@ sql.workspace = true
store-api.workspace = true
table.workspace = true
tokio.workspace = true
tokio-stream = "0.1"
[dev-dependencies]
cache.workspace = true

View File

@@ -50,13 +50,20 @@ pub enum Error {
source: BoxedError,
},
#[snafu(display("Failed to list nodes in cluster: {source}"))]
#[snafu(display("Failed to list nodes in cluster"))]
ListNodes {
#[snafu(implicit)]
location: Location,
source: BoxedError,
},
#[snafu(display("Failed to region stats in cluster"))]
ListRegionStats {
#[snafu(implicit)]
location: Location,
source: BoxedError,
},
#[snafu(display("Failed to list flows in catalog {catalog}"))]
ListFlows {
#[snafu(implicit)]
@@ -82,6 +89,32 @@ pub enum Error {
location: Location,
},
#[snafu(display("Failed to get information extension client"))]
GetInformationExtension {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Failed to list procedures"))]
ListProcedures {
#[snafu(implicit)]
location: Location,
source: BoxedError,
},
#[snafu(display("Procedure id not found"))]
ProcedureIdNotFound {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("convert proto data error"))]
ConvertProtoData {
#[snafu(implicit)]
location: Location,
source: BoxedError,
},
#[snafu(display("Failed to re-compile script due to internal error"))]
CompileScriptInternal {
#[snafu(implicit)]
@@ -266,7 +299,9 @@ impl ErrorExt for Error {
| Error::FindRegionRoutes { .. }
| Error::CacheNotFound { .. }
| Error::CastManager { .. }
| Error::Json { .. } => StatusCode::Unexpected,
| Error::Json { .. }
| Error::GetInformationExtension { .. }
| Error::ProcedureIdNotFound { .. } => StatusCode::Unexpected,
Error::ViewPlanColumnsChanged { .. } => StatusCode::InvalidArguments,
@@ -283,7 +318,10 @@ impl ErrorExt for Error {
| Error::ListNodes { source, .. }
| Error::ListSchemas { source, .. }
| Error::ListTables { source, .. }
| Error::ListFlows { source, .. } => source.status_code(),
| Error::ListFlows { source, .. }
| Error::ListProcedures { source, .. }
| Error::ListRegionStats { source, .. }
| Error::ConvertProtoData { source, .. } => source.status_code(),
Error::CreateTable { source, .. } => source.status_code(),

View File

@@ -21,7 +21,6 @@ use common_catalog::consts::{
DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, NUMBERS_TABLE_ID,
PG_CATALOG_NAME,
};
use common_config::Mode;
use common_error::ext::BoxedError;
use common_meta::cache::{LayeredCacheRegistryRef, ViewInfoCacheRef};
use common_meta::key::catalog_name::CatalogNameKey;
@@ -31,9 +30,9 @@ use common_meta::key::table_info::TableInfoValue;
use common_meta::key::table_name::TableNameKey;
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
use common_meta::kv_backend::KvBackendRef;
use common_procedure::ProcedureManagerRef;
use futures_util::stream::BoxStream;
use futures_util::{StreamExt, TryStreamExt};
use meta_client::client::MetaClient;
use moka::sync::Cache;
use partition::manager::{PartitionRuleManager, PartitionRuleManagerRef};
use session::context::{Channel, QueryContext};
@@ -42,12 +41,14 @@ use table::dist_table::DistTable;
use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME};
use table::table_name::TableName;
use table::TableRef;
use tokio::sync::Semaphore;
use tokio_stream::wrappers::ReceiverStream;
use crate::error::{
CacheNotFoundSnafu, GetTableCacheSnafu, InvalidTableInfoInCatalogSnafu, ListCatalogsSnafu,
ListSchemasSnafu, ListTablesSnafu, Result, TableMetadataManagerSnafu,
};
use crate::information_schema::InformationSchemaProvider;
use crate::information_schema::{InformationExtensionRef, InformationSchemaProvider};
use crate::kvbackend::TableCacheRef;
use crate::system_schema::pg_catalog::PGCatalogProvider;
use crate::system_schema::SystemSchemaProvider;
@@ -60,27 +61,31 @@ use crate::CatalogManager;
/// comes from `SystemCatalog`, which is static and read-only.
#[derive(Clone)]
pub struct KvBackendCatalogManager {
mode: Mode,
meta_client: Option<Arc<MetaClient>>,
/// Provides the extension methods for the `information_schema` tables
information_extension: InformationExtensionRef,
/// Manages partition rules.
partition_manager: PartitionRuleManagerRef,
/// Manages table metadata.
table_metadata_manager: TableMetadataManagerRef,
/// A sub-CatalogManager that handles system tables
system_catalog: SystemCatalog,
/// Cache registry for all caches.
cache_registry: LayeredCacheRegistryRef,
/// Only available in `Standalone` mode.
procedure_manager: Option<ProcedureManagerRef>,
}
const CATALOG_CACHE_MAX_CAPACITY: u64 = 128;
impl KvBackendCatalogManager {
pub fn new(
mode: Mode,
meta_client: Option<Arc<MetaClient>>,
information_extension: InformationExtensionRef,
backend: KvBackendRef,
cache_registry: LayeredCacheRegistryRef,
procedure_manager: Option<ProcedureManagerRef>,
) -> Arc<Self> {
Arc::new_cyclic(|me| Self {
mode,
meta_client,
information_extension,
partition_manager: Arc::new(PartitionRuleManager::new(
backend.clone(),
cache_registry
@@ -104,23 +109,19 @@ impl KvBackendCatalogManager {
backend,
},
cache_registry,
procedure_manager,
})
}
/// Returns the server running mode.
pub fn running_mode(&self) -> &Mode {
&self.mode
}
pub fn view_info_cache(&self) -> Result<ViewInfoCacheRef> {
self.cache_registry.get().context(CacheNotFoundSnafu {
name: "view_info_cache",
})
}
/// Returns the `[MetaClient]`.
pub fn meta_client(&self) -> Option<Arc<MetaClient>> {
self.meta_client.clone()
/// Returns the [`InformationExtension`].
pub fn information_extension(&self) -> InformationExtensionRef {
self.information_extension.clone()
}
pub fn partition_manager(&self) -> PartitionRuleManagerRef {
@@ -130,6 +131,10 @@ impl KvBackendCatalogManager {
pub fn table_metadata_manager_ref(&self) -> &TableMetadataManagerRef {
&self.table_metadata_manager
}
pub fn procedure_manager(&self) -> Option<ProcedureManagerRef> {
self.procedure_manager.clone()
}
}
#[async_trait::async_trait]
@@ -179,21 +184,18 @@ impl CatalogManager for KvBackendCatalogManager {
schema: &str,
query_ctx: Option<&QueryContext>,
) -> Result<Vec<String>> {
let stream = self
let mut tables = self
.table_metadata_manager
.table_name_manager()
.tables(catalog, schema);
let mut tables = stream
.tables(catalog, schema)
.map_ok(|(table_name, _)| table_name)
.try_collect::<Vec<_>>()
.await
.map_err(BoxedError::new)
.context(ListTablesSnafu { catalog, schema })?
.into_iter()
.map(|(k, _)| k)
.collect::<Vec<_>>();
tables.extend_from_slice(&self.system_catalog.table_names(schema, query_ctx));
.context(ListTablesSnafu { catalog, schema })?;
Ok(tables.into_iter().collect())
tables.extend(self.system_catalog.table_names(schema, query_ctx));
Ok(tables)
}
async fn catalog_exists(&self, catalog: &str) -> Result<bool> {
@@ -303,36 +305,68 @@ impl CatalogManager for KvBackendCatalogManager {
}
});
let table_id_stream = self
.table_metadata_manager
.table_name_manager()
.tables(catalog, schema)
.map_ok(|(_, v)| v.table_id());
const BATCH_SIZE: usize = 128;
let user_tables = try_stream!({
const CONCURRENCY: usize = 8;
let (tx, rx) = tokio::sync::mpsc::channel(64);
let metadata_manager = self.table_metadata_manager.clone();
let catalog = catalog.to_string();
let schema = schema.to_string();
let semaphore = Arc::new(Semaphore::new(CONCURRENCY));
common_runtime::spawn_global(async move {
let table_id_stream = metadata_manager
.table_name_manager()
.tables(&catalog, &schema)
.map_ok(|(_, v)| v.table_id());
// Split table ids into chunks
let mut table_id_chunks = table_id_stream.ready_chunks(BATCH_SIZE);
while let Some(table_ids) = table_id_chunks.next().await {
let table_ids = table_ids
let table_ids = match table_ids
.into_iter()
.collect::<std::result::Result<Vec<_>, _>>()
.map_err(BoxedError::new)
.context(ListTablesSnafu { catalog, schema })?;
.context(ListTablesSnafu {
catalog: &catalog,
schema: &schema,
}) {
Ok(table_ids) => table_ids,
Err(e) => {
let _ = tx.send(Err(e)).await;
return;
}
};
let table_info_values = self
.table_metadata_manager
.table_info_manager()
.batch_get(&table_ids)
.await
.context(TableMetadataManagerSnafu)?;
let metadata_manager = metadata_manager.clone();
let tx = tx.clone();
let semaphore = semaphore.clone();
common_runtime::spawn_global(async move {
// we don't explicitly close the semaphore so just ignore the potential error.
let _ = semaphore.acquire().await;
let table_info_values = match metadata_manager
.table_info_manager()
.batch_get(&table_ids)
.await
.context(TableMetadataManagerSnafu)
{
Ok(table_info_values) => table_info_values,
Err(e) => {
let _ = tx.send(Err(e)).await;
return;
}
};
for table_info_value in table_info_values.into_values() {
yield build_table(table_info_value)?;
}
for table in table_info_values.into_values().map(build_table) {
if tx.send(table).await.is_err() {
return;
}
}
});
}
});
let user_tables = ReceiverStream::new(rx);
Box::pin(sys_tables.chain(user_tables))
}
}

View File

@@ -18,7 +18,9 @@ pub mod flows;
mod information_memory_table;
pub mod key_column_usage;
mod partitions;
mod procedure_info;
mod region_peers;
mod region_statistics;
mod runtime_metrics;
pub mod schemata;
mod table_constraints;
@@ -30,7 +32,11 @@ use std::collections::HashMap;
use std::sync::{Arc, Weak};
use common_catalog::consts::{self, DEFAULT_CATALOG_NAME, INFORMATION_SCHEMA_NAME};
use common_error::ext::ErrorExt;
use common_meta::cluster::NodeInfo;
use common_meta::datanode::RegionStat;
use common_meta::key::flow::FlowMetadataManager;
use common_procedure::ProcedureInfo;
use common_recordbatch::SendableRecordBatchStream;
use datatypes::schema::SchemaRef;
use lazy_static::lazy_static;
@@ -43,7 +49,7 @@ use views::InformationSchemaViews;
use self::columns::InformationSchemaColumns;
use super::{SystemSchemaProviderInner, SystemTable, SystemTableRef};
use crate::error::Result;
use crate::error::{Error, Result};
use crate::system_schema::information_schema::cluster_info::InformationSchemaClusterInfo;
use crate::system_schema::information_schema::flows::InformationSchemaFlows;
use crate::system_schema::information_schema::information_memory_table::get_schema_columns;
@@ -188,6 +194,16 @@ impl SystemSchemaProviderInner for InformationSchemaProvider {
self.catalog_name.clone(),
self.flow_metadata_manager.clone(),
)) as _),
PROCEDURE_INFO => Some(
Arc::new(procedure_info::InformationSchemaProcedureInfo::new(
self.catalog_manager.clone(),
)) as _,
),
REGION_STATISTICS => Some(Arc::new(
region_statistics::InformationSchemaRegionStatistics::new(
self.catalog_manager.clone(),
),
) as _),
_ => None,
}
}
@@ -235,6 +251,14 @@ impl InformationSchemaProvider {
CLUSTER_INFO.to_string(),
self.build_table(CLUSTER_INFO).unwrap(),
);
tables.insert(
PROCEDURE_INFO.to_string(),
self.build_table(PROCEDURE_INFO).unwrap(),
);
tables.insert(
REGION_STATISTICS.to_string(),
self.build_table(REGION_STATISTICS).unwrap(),
);
}
tables.insert(TABLES.to_string(), self.build_table(TABLES).unwrap());
@@ -250,7 +274,6 @@ impl InformationSchemaProvider {
self.build_table(TABLE_CONSTRAINTS).unwrap(),
);
tables.insert(FLOWS.to_string(), self.build_table(FLOWS).unwrap());
// Add memory tables
for name in MEMORY_TABLES.iter() {
tables.insert((*name).to_string(), self.build_table(name).expect(name));
@@ -299,3 +322,39 @@ where
InformationTable::to_stream(self, request)
}
}
pub type InformationExtensionRef = Arc<dyn InformationExtension<Error = Error> + Send + Sync>;
/// The `InformationExtension` trait provides the extension methods for the `information_schema` tables.
#[async_trait::async_trait]
pub trait InformationExtension {
type Error: ErrorExt;
/// Gets the nodes information.
async fn nodes(&self) -> std::result::Result<Vec<NodeInfo>, Self::Error>;
/// Gets the procedures information.
async fn procedures(&self) -> std::result::Result<Vec<(String, ProcedureInfo)>, Self::Error>;
/// Gets the region statistics.
async fn region_stats(&self) -> std::result::Result<Vec<RegionStat>, Self::Error>;
}
pub struct NoopInformationExtension;
#[async_trait::async_trait]
impl InformationExtension for NoopInformationExtension {
type Error = Error;
async fn nodes(&self) -> std::result::Result<Vec<NodeInfo>, Self::Error> {
Ok(vec![])
}
async fn procedures(&self) -> std::result::Result<Vec<(String, ProcedureInfo)>, Self::Error> {
Ok(vec![])
}
async fn region_stats(&self) -> std::result::Result<Vec<RegionStat>, Self::Error> {
Ok(vec![])
}
}

View File

@@ -17,13 +17,10 @@ use std::time::Duration;
use arrow_schema::SchemaRef as ArrowSchemaRef;
use common_catalog::consts::INFORMATION_SCHEMA_CLUSTER_INFO_TABLE_ID;
use common_config::Mode;
use common_error::ext::BoxedError;
use common_meta::cluster::{ClusterInfo, NodeInfo, NodeStatus};
use common_meta::peer::Peer;
use common_meta::cluster::NodeInfo;
use common_recordbatch::adapter::RecordBatchStreamAdapter;
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
use common_telemetry::warn;
use common_time::timestamp::Timestamp;
use datafusion::execution::TaskContext;
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
@@ -40,7 +37,7 @@ use snafu::ResultExt;
use store_api::storage::{ScanRequest, TableId};
use super::CLUSTER_INFO;
use crate::error::{CreateRecordBatchSnafu, InternalSnafu, ListNodesSnafu, Result};
use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
use crate::system_schema::information_schema::{InformationTable, Predicates};
use crate::system_schema::utils;
use crate::CatalogManager;
@@ -70,7 +67,6 @@ const INIT_CAPACITY: usize = 42;
pub(super) struct InformationSchemaClusterInfo {
schema: SchemaRef,
catalog_manager: Weak<dyn CatalogManager>,
start_time_ms: u64,
}
impl InformationSchemaClusterInfo {
@@ -78,7 +74,6 @@ impl InformationSchemaClusterInfo {
Self {
schema: Self::schema(),
catalog_manager,
start_time_ms: common_time::util::current_time_millis() as u64,
}
}
@@ -100,11 +95,7 @@ impl InformationSchemaClusterInfo {
}
fn builder(&self) -> InformationSchemaClusterInfoBuilder {
InformationSchemaClusterInfoBuilder::new(
self.schema.clone(),
self.catalog_manager.clone(),
self.start_time_ms,
)
InformationSchemaClusterInfoBuilder::new(self.schema.clone(), self.catalog_manager.clone())
}
}
@@ -144,7 +135,6 @@ impl InformationTable for InformationSchemaClusterInfo {
struct InformationSchemaClusterInfoBuilder {
schema: SchemaRef,
start_time_ms: u64,
catalog_manager: Weak<dyn CatalogManager>,
peer_ids: Int64VectorBuilder,
@@ -158,11 +148,7 @@ struct InformationSchemaClusterInfoBuilder {
}
impl InformationSchemaClusterInfoBuilder {
fn new(
schema: SchemaRef,
catalog_manager: Weak<dyn CatalogManager>,
start_time_ms: u64,
) -> Self {
fn new(schema: SchemaRef, catalog_manager: Weak<dyn CatalogManager>) -> Self {
Self {
schema,
catalog_manager,
@@ -174,56 +160,17 @@ impl InformationSchemaClusterInfoBuilder {
start_times: TimestampMillisecondVectorBuilder::with_capacity(INIT_CAPACITY),
uptimes: StringVectorBuilder::with_capacity(INIT_CAPACITY),
active_times: StringVectorBuilder::with_capacity(INIT_CAPACITY),
start_time_ms,
}
}
/// Construct the `information_schema.cluster_info` virtual table
async fn make_cluster_info(&mut self, request: Option<ScanRequest>) -> Result<RecordBatch> {
let predicates = Predicates::from_scan_request(&request);
let mode = utils::running_mode(&self.catalog_manager)?.unwrap_or(Mode::Standalone);
match mode {
Mode::Standalone => {
let build_info = common_version::build_info();
self.add_node_info(
&predicates,
NodeInfo {
// For the standalone:
// - id always 0
// - empty string for peer_addr
peer: Peer {
id: 0,
addr: "".to_string(),
},
last_activity_ts: -1,
status: NodeStatus::Standalone,
version: build_info.version.to_string(),
git_commit: build_info.commit_short.to_string(),
// Use `self.start_time_ms` instead.
// It's not precise but enough.
start_time_ms: self.start_time_ms,
},
);
}
Mode::Distributed => {
if let Some(meta_client) = utils::meta_client(&self.catalog_manager)? {
let node_infos = meta_client
.list_nodes(None)
.await
.map_err(BoxedError::new)
.context(ListNodesSnafu)?;
for node_info in node_infos {
self.add_node_info(&predicates, node_info);
}
} else {
warn!("Could not find meta client in distributed mode.");
}
}
let information_extension = utils::information_extension(&self.catalog_manager)?;
let node_infos = information_extension.nodes().await?;
for node_info in node_infos {
self.add_node_info(&predicates, node_info);
}
self.finish()
}

View File

@@ -0,0 +1,241 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::{Arc, Weak};
use arrow_schema::SchemaRef as ArrowSchemaRef;
use common_catalog::consts::INFORMATION_SCHEMA_PROCEDURE_INFO_TABLE_ID;
use common_error::ext::BoxedError;
use common_procedure::ProcedureInfo;
use common_recordbatch::adapter::RecordBatchStreamAdapter;
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
use common_time::timestamp::Timestamp;
use datafusion::execution::TaskContext;
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::timestamp::TimestampMillisecond;
use datatypes::value::Value;
use datatypes::vectors::{StringVectorBuilder, TimestampMillisecondVectorBuilder};
use snafu::ResultExt;
use store_api::storage::{ScanRequest, TableId};
use super::PROCEDURE_INFO;
use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
use crate::system_schema::information_schema::{InformationTable, Predicates};
use crate::system_schema::utils;
use crate::CatalogManager;
const PROCEDURE_ID: &str = "procedure_id";
const PROCEDURE_TYPE: &str = "procedure_type";
const START_TIME: &str = "start_time";
const END_TIME: &str = "end_time";
const STATUS: &str = "status";
const LOCK_KEYS: &str = "lock_keys";
const INIT_CAPACITY: usize = 42;
/// The `PROCEDURE_INFO` table provides information about the current procedure information of the cluster.
///
/// - `procedure_id`: the unique identifier of the procedure.
/// - `procedure_name`: the name of the procedure.
/// - `start_time`: the starting execution time of the procedure.
/// - `end_time`: the ending execution time of the procedure.
/// - `status`: the status of the procedure.
/// - `lock_keys`: the lock keys of the procedure.
///
pub(super) struct InformationSchemaProcedureInfo {
schema: SchemaRef,
catalog_manager: Weak<dyn CatalogManager>,
}
impl InformationSchemaProcedureInfo {
pub(super) fn new(catalog_manager: Weak<dyn CatalogManager>) -> Self {
Self {
schema: Self::schema(),
catalog_manager,
}
}
pub(crate) fn schema() -> SchemaRef {
Arc::new(Schema::new(vec![
ColumnSchema::new(PROCEDURE_ID, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(PROCEDURE_TYPE, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(
START_TIME,
ConcreteDataType::timestamp_millisecond_datatype(),
true,
),
ColumnSchema::new(
END_TIME,
ConcreteDataType::timestamp_millisecond_datatype(),
true,
),
ColumnSchema::new(STATUS, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(LOCK_KEYS, ConcreteDataType::string_datatype(), true),
]))
}
fn builder(&self) -> InformationSchemaProcedureInfoBuilder {
InformationSchemaProcedureInfoBuilder::new(
self.schema.clone(),
self.catalog_manager.clone(),
)
}
}
impl InformationTable for InformationSchemaProcedureInfo {
fn table_id(&self) -> TableId {
INFORMATION_SCHEMA_PROCEDURE_INFO_TABLE_ID
}
fn table_name(&self) -> &'static str {
PROCEDURE_INFO
}
fn schema(&self) -> SchemaRef {
self.schema.clone()
}
fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_procedure_info(Some(request))
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
}),
));
Ok(Box::pin(
RecordBatchStreamAdapter::try_new(stream)
.map_err(BoxedError::new)
.context(InternalSnafu)?,
))
}
}
struct InformationSchemaProcedureInfoBuilder {
schema: SchemaRef,
catalog_manager: Weak<dyn CatalogManager>,
procedure_ids: StringVectorBuilder,
procedure_types: StringVectorBuilder,
start_times: TimestampMillisecondVectorBuilder,
end_times: TimestampMillisecondVectorBuilder,
statuses: StringVectorBuilder,
lock_keys: StringVectorBuilder,
}
impl InformationSchemaProcedureInfoBuilder {
fn new(schema: SchemaRef, catalog_manager: Weak<dyn CatalogManager>) -> Self {
Self {
schema,
catalog_manager,
procedure_ids: StringVectorBuilder::with_capacity(INIT_CAPACITY),
procedure_types: StringVectorBuilder::with_capacity(INIT_CAPACITY),
start_times: TimestampMillisecondVectorBuilder::with_capacity(INIT_CAPACITY),
end_times: TimestampMillisecondVectorBuilder::with_capacity(INIT_CAPACITY),
statuses: StringVectorBuilder::with_capacity(INIT_CAPACITY),
lock_keys: StringVectorBuilder::with_capacity(INIT_CAPACITY),
}
}
/// Construct the `information_schema.procedure_info` virtual table
async fn make_procedure_info(&mut self, request: Option<ScanRequest>) -> Result<RecordBatch> {
let predicates = Predicates::from_scan_request(&request);
let information_extension = utils::information_extension(&self.catalog_manager)?;
let procedures = information_extension.procedures().await?;
for (status, procedure_info) in procedures {
self.add_procedure(&predicates, status, procedure_info);
}
self.finish()
}
fn add_procedure(
&mut self,
predicates: &Predicates,
status: String,
procedure_info: ProcedureInfo,
) {
let ProcedureInfo {
id,
type_name,
start_time_ms,
end_time_ms,
lock_keys,
..
} = procedure_info;
let pid = id.to_string();
let start_time = TimestampMillisecond(Timestamp::new_millisecond(start_time_ms));
let end_time = TimestampMillisecond(Timestamp::new_millisecond(end_time_ms));
let lock_keys = lock_keys.join(",");
let row = [
(PROCEDURE_ID, &Value::from(pid.clone())),
(PROCEDURE_TYPE, &Value::from(type_name.clone())),
(START_TIME, &Value::from(start_time)),
(END_TIME, &Value::from(end_time)),
(STATUS, &Value::from(status.clone())),
(LOCK_KEYS, &Value::from(lock_keys.clone())),
];
if !predicates.eval(&row) {
return;
}
self.procedure_ids.push(Some(&pid));
self.procedure_types.push(Some(&type_name));
self.start_times.push(Some(start_time));
self.end_times.push(Some(end_time));
self.statuses.push(Some(&status));
self.lock_keys.push(Some(&lock_keys));
}
fn finish(&mut self) -> Result<RecordBatch> {
let columns: Vec<VectorRef> = vec![
Arc::new(self.procedure_ids.finish()),
Arc::new(self.procedure_types.finish()),
Arc::new(self.start_times.finish()),
Arc::new(self.end_times.finish()),
Arc::new(self.statuses.finish()),
Arc::new(self.lock_keys.finish()),
];
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
}
}
impl DfPartitionStream for InformationSchemaProcedureInfo {
fn schema(&self) -> &ArrowSchemaRef {
self.schema.arrow_schema()
}
fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_procedure_info(None)
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
}),
))
}
}

View File

@@ -224,8 +224,8 @@ impl InformationSchemaRegionPeersBuilder {
let region_id = RegionId::new(table_id, route.region.id.region_number()).as_u64();
let peer_id = route.leader_peer.clone().map(|p| p.id);
let peer_addr = route.leader_peer.clone().map(|p| p.addr);
let status = if let Some(status) = route.leader_status {
Some(status.as_ref().to_string())
let state = if let Some(state) = route.leader_state {
Some(state.as_ref().to_string())
} else {
// Alive by default
Some("ALIVE".to_string())
@@ -242,7 +242,7 @@ impl InformationSchemaRegionPeersBuilder {
self.peer_ids.push(peer_id);
self.peer_addrs.push(peer_addr.as_deref());
self.is_leaders.push(Some("Yes"));
self.statuses.push(status.as_deref());
self.statuses.push(state.as_deref());
self.down_seconds
.push(route.leader_down_millis().map(|m| m / 1000));
}

View File

@@ -0,0 +1,237 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::{Arc, Weak};
use arrow_schema::SchemaRef as ArrowSchemaRef;
use common_catalog::consts::INFORMATION_SCHEMA_REGION_STATISTICS_TABLE_ID;
use common_error::ext::BoxedError;
use common_meta::datanode::RegionStat;
use common_recordbatch::adapter::RecordBatchStreamAdapter;
use common_recordbatch::{DfSendableRecordBatchStream, RecordBatch, SendableRecordBatchStream};
use datafusion::execution::TaskContext;
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::value::Value;
use datatypes::vectors::{StringVectorBuilder, UInt32VectorBuilder, UInt64VectorBuilder};
use snafu::ResultExt;
use store_api::storage::{ScanRequest, TableId};
use super::{InformationTable, REGION_STATISTICS};
use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
use crate::information_schema::Predicates;
use crate::system_schema::utils;
use crate::CatalogManager;
const REGION_ID: &str = "region_id";
const TABLE_ID: &str = "table_id";
const REGION_NUMBER: &str = "region_number";
const MEMTABLE_SIZE: &str = "memtable_size";
const MANIFEST_SIZE: &str = "manifest_size";
const SST_SIZE: &str = "sst_size";
const ENGINE: &str = "engine";
const REGION_ROLE: &str = "region_role";
const INIT_CAPACITY: usize = 42;
/// The `REGION_STATISTICS` table provides information about the region statistics. Including fields:
///
/// - `region_id`: The region id.
/// - `table_id`: The table id.
/// - `region_number`: The region number.
/// - `memtable_size`: The memtable size in bytes.
/// - `manifest_size`: The manifest size in bytes.
/// - `sst_size`: The sst size in bytes.
/// - `engine`: The engine type.
/// - `region_role`: The region role.
///
pub(super) struct InformationSchemaRegionStatistics {
schema: SchemaRef,
catalog_manager: Weak<dyn CatalogManager>,
}
impl InformationSchemaRegionStatistics {
pub(super) fn new(catalog_manager: Weak<dyn CatalogManager>) -> Self {
Self {
schema: Self::schema(),
catalog_manager,
}
}
pub(crate) fn schema() -> SchemaRef {
Arc::new(Schema::new(vec![
ColumnSchema::new(REGION_ID, ConcreteDataType::uint64_datatype(), false),
ColumnSchema::new(TABLE_ID, ConcreteDataType::uint32_datatype(), false),
ColumnSchema::new(REGION_NUMBER, ConcreteDataType::uint32_datatype(), false),
ColumnSchema::new(MEMTABLE_SIZE, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new(MANIFEST_SIZE, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new(SST_SIZE, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new(ENGINE, ConcreteDataType::string_datatype(), true),
ColumnSchema::new(REGION_ROLE, ConcreteDataType::string_datatype(), true),
]))
}
fn builder(&self) -> InformationSchemaRegionStatisticsBuilder {
InformationSchemaRegionStatisticsBuilder::new(
self.schema.clone(),
self.catalog_manager.clone(),
)
}
}
impl InformationTable for InformationSchemaRegionStatistics {
fn table_id(&self) -> TableId {
INFORMATION_SCHEMA_REGION_STATISTICS_TABLE_ID
}
fn table_name(&self) -> &'static str {
REGION_STATISTICS
}
fn schema(&self) -> SchemaRef {
self.schema.clone()
}
fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
let stream = Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_region_statistics(Some(request))
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
}),
));
Ok(Box::pin(
RecordBatchStreamAdapter::try_new(stream)
.map_err(BoxedError::new)
.context(InternalSnafu)?,
))
}
}
struct InformationSchemaRegionStatisticsBuilder {
schema: SchemaRef,
catalog_manager: Weak<dyn CatalogManager>,
region_ids: UInt64VectorBuilder,
table_ids: UInt32VectorBuilder,
region_numbers: UInt32VectorBuilder,
memtable_sizes: UInt64VectorBuilder,
manifest_sizes: UInt64VectorBuilder,
sst_sizes: UInt64VectorBuilder,
engines: StringVectorBuilder,
region_roles: StringVectorBuilder,
}
impl InformationSchemaRegionStatisticsBuilder {
fn new(schema: SchemaRef, catalog_manager: Weak<dyn CatalogManager>) -> Self {
Self {
schema,
catalog_manager,
region_ids: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
table_ids: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
region_numbers: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
memtable_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
manifest_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
sst_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
engines: StringVectorBuilder::with_capacity(INIT_CAPACITY),
region_roles: StringVectorBuilder::with_capacity(INIT_CAPACITY),
}
}
/// Construct a new `InformationSchemaRegionStatistics` from the collected data.
async fn make_region_statistics(
&mut self,
request: Option<ScanRequest>,
) -> Result<RecordBatch> {
let predicates = Predicates::from_scan_request(&request);
let information_extension = utils::information_extension(&self.catalog_manager)?;
let region_stats = information_extension.region_stats().await?;
for region_stat in region_stats {
self.add_region_statistic(&predicates, region_stat);
}
self.finish()
}
fn add_region_statistic(&mut self, predicate: &Predicates, region_stat: RegionStat) {
let row = [
(REGION_ID, &Value::from(region_stat.id.as_u64())),
(TABLE_ID, &Value::from(region_stat.id.table_id())),
(REGION_NUMBER, &Value::from(region_stat.id.region_number())),
(MEMTABLE_SIZE, &Value::from(region_stat.memtable_size)),
(MANIFEST_SIZE, &Value::from(region_stat.manifest_size)),
(SST_SIZE, &Value::from(region_stat.sst_size)),
(ENGINE, &Value::from(region_stat.engine.as_str())),
(REGION_ROLE, &Value::from(region_stat.role.to_string())),
];
if !predicate.eval(&row) {
return;
}
self.region_ids.push(Some(region_stat.id.as_u64()));
self.table_ids.push(Some(region_stat.id.table_id()));
self.region_numbers
.push(Some(region_stat.id.region_number()));
self.memtable_sizes.push(Some(region_stat.memtable_size));
self.manifest_sizes.push(Some(region_stat.manifest_size));
self.sst_sizes.push(Some(region_stat.sst_size));
self.engines.push(Some(&region_stat.engine));
self.region_roles.push(Some(&region_stat.role.to_string()));
}
fn finish(&mut self) -> Result<RecordBatch> {
let columns: Vec<VectorRef> = vec![
Arc::new(self.region_ids.finish()),
Arc::new(self.table_ids.finish()),
Arc::new(self.region_numbers.finish()),
Arc::new(self.memtable_sizes.finish()),
Arc::new(self.manifest_sizes.finish()),
Arc::new(self.sst_sizes.finish()),
Arc::new(self.engines.finish()),
Arc::new(self.region_roles.finish()),
];
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
}
}
impl DfPartitionStream for InformationSchemaRegionStatistics {
fn schema(&self) -> &ArrowSchemaRef {
self.schema.arrow_schema()
}
fn execute(&self, _: Arc<TaskContext>) -> DfSendableRecordBatchStream {
let schema = self.schema.arrow_schema().clone();
let mut builder = self.builder();
Box::pin(DfRecordBatchStreamAdapter::new(
schema,
futures::stream::once(async move {
builder
.make_region_statistics(None)
.await
.map(|x| x.into_df_record_batch())
.map_err(Into::into)
}),
))
}
}

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
/// All table names in `information_schema`.
//! All table names in `information_schema`.
pub const TABLES: &str = "tables";
pub const COLUMNS: &str = "columns";
@@ -45,3 +45,5 @@ pub const TABLE_CONSTRAINTS: &str = "table_constraints";
pub const CLUSTER_INFO: &str = "cluster_info";
pub const VIEWS: &str = "views";
pub const FLOWS: &str = "flows";
pub const PROCEDURE_INFO: &str = "procedure_info";
pub const REGION_STATISTICS: &str = "region_statistics";

View File

@@ -74,7 +74,7 @@ impl MemoryTableBuilder {
/// Construct the `information_schema.{table_name}` virtual table
pub async fn memory_records(&mut self) -> Result<RecordBatch> {
if self.columns.is_empty() {
RecordBatch::new_empty(self.schema.clone()).context(CreateRecordBatchSnafu)
Ok(RecordBatch::new_empty(self.schema.clone()))
} else {
RecordBatch::new(self.schema.clone(), std::mem::take(&mut self.columns))
.context(CreateRecordBatchSnafu)

View File

@@ -12,6 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//! The `pg_catalog.pg_namespace` table implementation.
//! namespace is a schema in greptime
pub(super) mod oid_map;
use std::sync::{Arc, Weak};
@@ -40,9 +43,6 @@ use crate::system_schema::utils::tables::{string_column, u32_column};
use crate::system_schema::SystemTable;
use crate::CatalogManager;
/// The `pg_catalog.pg_namespace` table implementation.
/// namespace is a schema in greptime
const NSPNAME: &str = "nspname";
const INIT_CAPACITY: usize = 42;

View File

@@ -12,47 +12,33 @@
// See the License for the specific language governing permissions and
// limitations under the License.
pub mod tables;
use std::sync::Weak;
use std::sync::{Arc, Weak};
use common_config::Mode;
use common_meta::key::TableMetadataManagerRef;
use meta_client::client::MetaClient;
use snafu::OptionExt;
use crate::error::{Result, UpgradeWeakCatalogManagerRefSnafu};
use crate::error::{GetInformationExtensionSnafu, Result, UpgradeWeakCatalogManagerRefSnafu};
use crate::information_schema::InformationExtensionRef;
use crate::kvbackend::KvBackendCatalogManager;
use crate::CatalogManager;
/// Try to get the server running mode from `[CatalogManager]` weak reference.
pub fn running_mode(catalog_manager: &Weak<dyn CatalogManager>) -> Result<Option<Mode>> {
pub mod tables;
/// Try to get the `[InformationExtension]` from `[CatalogManager]` weak reference.
pub fn information_extension(
catalog_manager: &Weak<dyn CatalogManager>,
) -> Result<InformationExtensionRef> {
let catalog_manager = catalog_manager
.upgrade()
.context(UpgradeWeakCatalogManagerRefSnafu)?;
Ok(catalog_manager
let information_extension = catalog_manager
.as_any()
.downcast_ref::<KvBackendCatalogManager>()
.map(|manager| manager.running_mode())
.copied())
}
.map(|manager| manager.information_extension())
.context(GetInformationExtensionSnafu)?;
/// Try to get the `[MetaClient]` from `[CatalogManager]` weak reference.
pub fn meta_client(catalog_manager: &Weak<dyn CatalogManager>) -> Result<Option<Arc<MetaClient>>> {
let catalog_manager = catalog_manager
.upgrade()
.context(UpgradeWeakCatalogManagerRefSnafu)?;
let meta_client = match catalog_manager
.as_any()
.downcast_ref::<KvBackendCatalogManager>()
{
None => None,
Some(manager) => manager.meta_client(),
};
Ok(meta_client)
Ok(information_extension)
}
/// Try to get the `[TableMetadataManagerRef]` from `[CatalogManager]` weak reference.

View File

@@ -259,7 +259,6 @@ mod tests {
use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
use cache::{build_fundamental_cache_registry, with_default_composite_cache_registry};
use common_config::Mode;
use common_meta::cache::{CacheRegistryBuilder, LayeredCacheRegistryBuilder};
use common_meta::key::TableMetadataManager;
use common_meta::kv_backend::memory::MemoryKvBackend;
@@ -269,6 +268,8 @@ mod tests {
use datafusion::logical_expr::builder::LogicalTableSource;
use datafusion::logical_expr::{col, lit, LogicalPlan, LogicalPlanBuilder};
use crate::information_schema::NoopInformationExtension;
struct MockDecoder;
impl MockDecoder {
pub fn arc() -> Arc<Self> {
@@ -323,10 +324,10 @@ mod tests {
);
let catalog_manager = KvBackendCatalogManager::new(
Mode::Standalone,
None,
Arc::new(NoopInformationExtension),
backend.clone(),
layered_cache_registry,
None,
);
let table_metadata_manager = TableMetadataManager::new(backend);
let mut view_info = common_meta::key::test_utils::new_test_table_info(1024, vec![]);

View File

@@ -28,7 +28,7 @@ enum_dispatch = "0.3"
futures-util.workspace = true
lazy_static.workspace = true
moka = { workspace = true, features = ["future"] }
parking_lot = "0.12"
parking_lot.workspace = true
prometheus.workspace = true
prost.workspace = true
query.workspace = true

View File

@@ -10,7 +10,7 @@ name = "greptime"
path = "src/bin/greptime.rs"
[features]
default = ["python"]
default = ["python", "servers/pprof", "servers/mem-prof"]
tokio-console = ["common-telemetry/tokio-console"]
python = ["frontend/python"]

View File

@@ -15,10 +15,11 @@
#![doc = include_str!("../../../../README.md")]
use clap::{Parser, Subcommand};
use cmd::error::Result;
use cmd::error::{InitTlsProviderSnafu, Result};
use cmd::options::GlobalOptions;
use cmd::{cli, datanode, flownode, frontend, metasrv, standalone, App};
use common_version::version;
use servers::install_ring_crypto_provider;
#[derive(Parser)]
#[command(name = "greptime", author, version, long_version = version(), about)]
@@ -94,6 +95,7 @@ async fn main() -> Result<()> {
async fn main_body() -> Result<()> {
setup_human_panic();
install_ring_crypto_provider().map_err(|msg| InitTlsProviderSnafu { msg }.build())?;
start(Command::parse()).await
}

View File

@@ -158,7 +158,7 @@ fn create_region_routes(regions: Vec<RegionNumber>) -> Vec<RegionRoute> {
addr: String::new(),
}),
follower_peers: vec![],
leader_status: None,
leader_state: None,
leader_down_since: None,
});
}

View File

@@ -35,7 +35,6 @@ use either::Either;
use meta_client::client::MetaClientBuilder;
use query::datafusion::DatafusionQueryEngine;
use query::parser::QueryLanguageParser;
use query::plan::LogicalPlan;
use query::query_engine::{DefaultSerializer, QueryEngineState};
use query::QueryEngine;
use rustyline::error::ReadlineError;
@@ -47,12 +46,12 @@ use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan};
use crate::cli::cmd::ReplCommand;
use crate::cli::helper::RustylineHelper;
use crate::cli::AttachCommand;
use crate::error;
use crate::error::{
CollectRecordBatchesSnafu, ParseSqlSnafu, PlanStatementSnafu, PrettyPrintRecordBatchesSnafu,
ReadlineSnafu, ReplCreationSnafu, RequestDatabaseSnafu, Result, StartMetaClientSnafu,
SubstraitEncodeLogicalPlanSnafu,
};
use crate::{error, DistributedInformationExtension};
/// Captures the state of the repl, gathers commands and executes them one by one
pub struct Repl {
@@ -179,7 +178,7 @@ impl Repl {
.await
.context(PlanStatementSnafu)?;
let LogicalPlan::DfPlan(plan) = query_engine
let plan = query_engine
.optimize(&query_engine.engine_context(query_ctx), &plan)
.context(PlanStatementSnafu)?;
@@ -276,11 +275,12 @@ async fn create_query_engine(meta_addr: &str) -> Result<DatafusionQueryEngine> {
.build(),
);
let information_extension = Arc::new(DistributedInformationExtension::new(meta_client.clone()));
let catalog_manager = KvBackendCatalogManager::new(
Mode::Distributed,
Some(meta_client.clone()),
information_extension,
cached_meta_backend.clone(),
layered_cache_registry,
None,
);
let plugins: Plugins = Default::default();
let state = Arc::new(QueryEngineState::new(

View File

@@ -272,9 +272,10 @@ impl StartCommand {
info!("Datanode start command: {:#?}", self);
info!("Datanode options: {:#?}", opts);
let plugin_opts = opts.plugins;
let opts = opts.component;
let mut plugins = Plugins::new();
plugins::setup_datanode_plugins(&mut plugins, &opts)
plugins::setup_datanode_plugins(&mut plugins, &plugin_opts, &opts)
.await
.context(StartDatanodeSnafu)?;

View File

@@ -24,6 +24,12 @@ use snafu::{Location, Snafu};
#[snafu(visibility(pub))]
#[stack_trace_debug]
pub enum Error {
#[snafu(display("Failed to install ring crypto provider: {}", msg))]
InitTlsProvider {
#[snafu(implicit)]
location: Location,
msg: String,
},
#[snafu(display("Failed to create default catalog and schema"))]
InitMetadata {
#[snafu(implicit)]
@@ -369,9 +375,10 @@ impl ErrorExt for Error {
}
Error::SubstraitEncodeLogicalPlan { source, .. } => source.status_code(),
Error::SerdeJson { .. } | Error::FileIo { .. } | Error::SpawnThread { .. } => {
StatusCode::Unexpected
}
Error::SerdeJson { .. }
| Error::FileIo { .. }
| Error::SpawnThread { .. }
| Error::InitTlsProvider { .. } => StatusCode::Unexpected,
Error::Other { source, .. } => source.status_code(),

View File

@@ -41,7 +41,7 @@ use crate::error::{
MissingConfigSnafu, Result, ShutdownFlownodeSnafu, StartFlownodeSnafu,
};
use crate::options::{GlobalOptions, GreptimeOptions};
use crate::{log_versions, App};
use crate::{log_versions, App, DistributedInformationExtension};
pub const APP_NAME: &str = "greptime-flownode";
@@ -269,11 +269,13 @@ impl StartCommand {
.build(),
);
let information_extension =
Arc::new(DistributedInformationExtension::new(meta_client.clone()));
let catalog_manager = KvBackendCatalogManager::new(
opts.mode,
Some(meta_client.clone()),
information_extension,
cached_meta_backend.clone(),
layered_cache_registry.clone(),
None,
);
let table_metadata_manager =

View File

@@ -36,8 +36,8 @@ use frontend::instance::builder::FrontendBuilder;
use frontend::instance::{FrontendInstance, Instance as FeInstance};
use frontend::server::Services;
use meta_client::{MetaClientOptions, MetaClientType};
use query::stats::StatementStatistics;
use servers::tls::{TlsMode, TlsOption};
use servers::Mode;
use snafu::{OptionExt, ResultExt};
use tracing_appender::non_blocking::WorkerGuard;
@@ -46,7 +46,7 @@ use crate::error::{
Result, StartFrontendSnafu,
};
use crate::options::{GlobalOptions, GreptimeOptions};
use crate::{log_versions, App};
use crate::{log_versions, App, DistributedInformationExtension};
type FrontendOptions = GreptimeOptions<frontend::frontend::FrontendOptions>;
@@ -266,9 +266,10 @@ impl StartCommand {
info!("Frontend start command: {:#?}", self);
info!("Frontend options: {:#?}", opts);
let plugin_opts = opts.plugins;
let opts = opts.component;
let mut plugins = Plugins::new();
plugins::setup_frontend_plugins(&mut plugins, &opts)
plugins::setup_frontend_plugins(&mut plugins, &plugin_opts, &opts)
.await
.context(StartFrontendSnafu)?;
@@ -315,11 +316,13 @@ impl StartCommand {
.build(),
);
let information_extension =
Arc::new(DistributedInformationExtension::new(meta_client.clone()));
let catalog_manager = KvBackendCatalogManager::new(
Mode::Distributed,
Some(meta_client.clone()),
information_extension,
cached_meta_backend.clone(),
layered_cache_registry.clone(),
None,
);
let executor = HandlerGroupExecutor::new(vec![
@@ -340,6 +343,8 @@ impl StartCommand {
// Some queries are expected to take long time.
let channel_config = ChannelConfig {
timeout: None,
tcp_nodelay: opts.datanode.client.tcp_nodelay,
connect_timeout: Some(opts.datanode.client.connect_timeout),
..Default::default()
};
let client = NodeClients::new(channel_config);
@@ -351,6 +356,7 @@ impl StartCommand {
catalog_manager,
Arc::new(client),
meta_client,
StatementStatistics::new(opts.logging.slow_query.clone()),
)
.with_plugin(plugins.clone())
.with_local_cache_invalidator(layered_cache_registry)
@@ -469,7 +475,7 @@ mod tests {
};
let mut plugins = Plugins::new();
plugins::setup_frontend_plugins(&mut plugins, &fe_opts)
plugins::setup_frontend_plugins(&mut plugins, &[], &fe_opts)
.await
.unwrap();

View File

@@ -15,7 +15,17 @@
#![feature(assert_matches, let_chains)]
use async_trait::async_trait;
use catalog::information_schema::InformationExtension;
use client::api::v1::meta::ProcedureStatus;
use common_error::ext::BoxedError;
use common_meta::cluster::{ClusterInfo, NodeInfo};
use common_meta::datanode::RegionStat;
use common_meta::ddl::{ExecutorContext, ProcedureExecutor};
use common_meta::rpc::procedure;
use common_procedure::{ProcedureInfo, ProcedureState};
use common_telemetry::{error, info};
use meta_client::MetaClientRef;
use snafu::ResultExt;
use crate::error::Result;
@@ -74,6 +84,7 @@ pub trait App: Send {
}
/// Log the versions of the application, and the arguments passed to the cli.
///
/// `version` should be the same as the output of cli "--version";
/// and the `short_version` is the short version of the codes, often consist of git branch and commit.
pub fn log_versions(version: &str, short_version: &str, app: &str) {
@@ -94,3 +105,69 @@ fn log_env_flags() {
info!("argument: {}", argument);
}
}
pub struct DistributedInformationExtension {
meta_client: MetaClientRef,
}
impl DistributedInformationExtension {
pub fn new(meta_client: MetaClientRef) -> Self {
Self { meta_client }
}
}
#[async_trait::async_trait]
impl InformationExtension for DistributedInformationExtension {
type Error = catalog::error::Error;
async fn nodes(&self) -> std::result::Result<Vec<NodeInfo>, Self::Error> {
self.meta_client
.list_nodes(None)
.await
.map_err(BoxedError::new)
.context(catalog::error::ListNodesSnafu)
}
async fn procedures(&self) -> std::result::Result<Vec<(String, ProcedureInfo)>, Self::Error> {
let procedures = self
.meta_client
.list_procedures(&ExecutorContext::default())
.await
.map_err(BoxedError::new)
.context(catalog::error::ListProceduresSnafu)?
.procedures;
let mut result = Vec::with_capacity(procedures.len());
for procedure in procedures {
let pid = match procedure.id {
Some(pid) => pid,
None => return catalog::error::ProcedureIdNotFoundSnafu {}.fail(),
};
let pid = procedure::pb_pid_to_pid(&pid)
.map_err(BoxedError::new)
.context(catalog::error::ConvertProtoDataSnafu)?;
let status = ProcedureStatus::try_from(procedure.status)
.map(|v| v.as_str_name())
.unwrap_or("Unknown")
.to_string();
let procedure_info = ProcedureInfo {
id: pid,
type_name: procedure.type_name,
start_time_ms: procedure.start_time_ms,
end_time_ms: procedure.end_time_ms,
state: ProcedureState::Running,
lock_keys: procedure.lock_keys,
};
result.push((status, procedure_info));
}
Ok(result)
}
async fn region_stats(&self) -> std::result::Result<Vec<RegionStat>, Self::Error> {
self.meta_client
.list_region_stats()
.await
.map_err(BoxedError::new)
.context(catalog::error::ListRegionStatsSnafu)
}
}

View File

@@ -48,6 +48,10 @@ impl Instance {
_guard: guard,
}
}
pub fn get_inner(&self) -> &MetasrvInstance {
&self.instance
}
}
#[async_trait]
@@ -86,6 +90,14 @@ impl Command {
pub fn load_options(&self, global_options: &GlobalOptions) -> Result<MetasrvOptions> {
self.subcmd.load_options(global_options)
}
pub fn config_file(&self) -> &Option<String> {
self.subcmd.config_file()
}
pub fn env_prefix(&self) -> &String {
self.subcmd.env_prefix()
}
}
#[derive(Parser)]
@@ -105,6 +117,18 @@ impl SubCommand {
SubCommand::Start(cmd) => cmd.load_options(global_options),
}
}
fn config_file(&self) -> &Option<String> {
match self {
SubCommand::Start(cmd) => &cmd.config_file,
}
}
fn env_prefix(&self) -> &String {
match self {
SubCommand::Start(cmd) => &cmd.env_prefix,
}
}
}
#[derive(Debug, Default, Parser)]
@@ -249,9 +273,10 @@ impl StartCommand {
info!("Metasrv start command: {:#?}", self);
info!("Metasrv options: {:#?}", opts);
let plugin_opts = opts.plugins;
let opts = opts.component;
let mut plugins = Plugins::new();
plugins::setup_metasrv_plugins(&mut plugins, &opts)
plugins::setup_metasrv_plugins(&mut plugins, &plugin_opts, &opts)
.await
.context(StartMetaServerSnafu)?;

View File

@@ -15,6 +15,7 @@
use clap::Parser;
use common_config::Configurable;
use common_runtime::global::RuntimeOptions;
use plugins::PluginOptions;
use serde::{Deserialize, Serialize};
#[derive(Parser, Default, Debug, Clone)]
@@ -40,6 +41,8 @@ pub struct GlobalOptions {
pub struct GreptimeOptions<T> {
/// The runtime options.
pub runtime: RuntimeOptions,
/// The plugin options.
pub plugins: Vec<PluginOptions>,
/// The options of each component (like Datanode or Standalone) of GreptimeDB.
#[serde(flatten)]

View File

@@ -17,14 +17,18 @@ use std::{fs, path};
use async_trait::async_trait;
use cache::{build_fundamental_cache_registry, with_default_composite_cache_registry};
use catalog::information_schema::InformationExtension;
use catalog::kvbackend::KvBackendCatalogManager;
use clap::Parser;
use client::api::v1::meta::RegionRole;
use common_base::Plugins;
use common_catalog::consts::{MIN_USER_FLOW_ID, MIN_USER_TABLE_ID};
use common_config::{metadata_store_dir, Configurable, KvBackendConfig};
use common_error::ext::BoxedError;
use common_meta::cache::LayeredCacheRegistryBuilder;
use common_meta::cache_invalidator::CacheInvalidatorRef;
use common_meta::cluster::{NodeInfo, NodeStatus};
use common_meta::datanode::RegionStat;
use common_meta::ddl::flow_meta::{FlowMetadataAllocator, FlowMetadataAllocatorRef};
use common_meta::ddl::table_meta::{TableMetadataAllocator, TableMetadataAllocatorRef};
use common_meta::ddl::{DdlContext, NoopRegionFailureDetectorControl, ProcedureExecutorRef};
@@ -33,10 +37,11 @@ use common_meta::key::flow::{FlowMetadataManager, FlowMetadataManagerRef};
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
use common_meta::kv_backend::KvBackendRef;
use common_meta::node_manager::NodeManagerRef;
use common_meta::peer::Peer;
use common_meta::region_keeper::MemoryRegionKeeper;
use common_meta::sequence::SequenceBuilder;
use common_meta::wal_options_allocator::{WalOptionsAllocator, WalOptionsAllocatorRef};
use common_procedure::ProcedureManagerRef;
use common_procedure::{ProcedureInfo, ProcedureManagerRef};
use common_telemetry::info;
use common_telemetry::logging::{LoggingOptions, TracingOptions};
use common_time::timezone::set_default_timezone;
@@ -44,6 +49,7 @@ use common_version::{short_version, version};
use common_wal::config::DatanodeWalConfig;
use datanode::config::{DatanodeOptions, ProcedureConfig, RegionEngineConfig, StorageConfig};
use datanode::datanode::{Datanode, DatanodeBuilder};
use datanode::region_server::RegionServer;
use file_engine::config::EngineConfig as FileEngineConfig;
use flow::{FlowWorkerManager, FlownodeBuilder, FrontendInvoker};
use frontend::frontend::FrontendOptions;
@@ -55,6 +61,7 @@ use frontend::service_config::{
};
use meta_srv::metasrv::{FLOW_ID_SEQ, TABLE_ID_SEQ};
use mito2::config::MitoConfig;
use query::stats::StatementStatistics;
use serde::{Deserialize, Serialize};
use servers::export_metrics::ExportMetricsOption;
use servers::grpc::GrpcOptions;
@@ -438,15 +445,16 @@ impl StartCommand {
info!("Standalone options: {opts:#?}");
let mut plugins = Plugins::new();
let plugin_opts = opts.plugins;
let opts = opts.component;
let fe_opts = opts.frontend_options();
let dn_opts = opts.datanode_options();
plugins::setup_frontend_plugins(&mut plugins, &fe_opts)
plugins::setup_frontend_plugins(&mut plugins, &plugin_opts, &fe_opts)
.await
.context(StartFrontendSnafu)?;
plugins::setup_datanode_plugins(&mut plugins, &dn_opts)
plugins::setup_datanode_plugins(&mut plugins, &plugin_opts, &dn_opts)
.await
.context(StartDatanodeSnafu)?;
@@ -477,22 +485,26 @@ impl StartCommand {
.build(),
);
let catalog_manager = KvBackendCatalogManager::new(
dn_opts.mode,
None,
kv_backend.clone(),
layered_cache_registry.clone(),
);
let table_metadata_manager =
Self::create_table_metadata_manager(kv_backend.clone()).await?;
let datanode = DatanodeBuilder::new(dn_opts, plugins.clone())
.with_kv_backend(kv_backend.clone())
.build()
.await
.context(StartDatanodeSnafu)?;
let information_extension = Arc::new(StandaloneInformationExtension::new(
datanode.region_server(),
procedure_manager.clone(),
));
let catalog_manager = KvBackendCatalogManager::new(
information_extension,
kv_backend.clone(),
layered_cache_registry.clone(),
Some(procedure_manager.clone()),
);
let table_metadata_manager =
Self::create_table_metadata_manager(kv_backend.clone()).await?;
let flow_metadata_manager = Arc::new(FlowMetadataManager::new(kv_backend.clone()));
let flow_builder = FlownodeBuilder::new(
Default::default(),
@@ -556,6 +568,7 @@ impl StartCommand {
catalog_manager.clone(),
node_manager.clone(),
ddl_task_executor.clone(),
StatementStatistics::new(opts.logging.slow_query.clone()),
)
.with_plugin(plugins.clone())
.try_build()
@@ -641,6 +654,91 @@ impl StartCommand {
}
}
pub struct StandaloneInformationExtension {
region_server: RegionServer,
procedure_manager: ProcedureManagerRef,
start_time_ms: u64,
}
impl StandaloneInformationExtension {
pub fn new(region_server: RegionServer, procedure_manager: ProcedureManagerRef) -> Self {
Self {
region_server,
procedure_manager,
start_time_ms: common_time::util::current_time_millis() as u64,
}
}
}
#[async_trait::async_trait]
impl InformationExtension for StandaloneInformationExtension {
type Error = catalog::error::Error;
async fn nodes(&self) -> std::result::Result<Vec<NodeInfo>, Self::Error> {
let build_info = common_version::build_info();
let node_info = NodeInfo {
// For the standalone:
// - id always 0
// - empty string for peer_addr
peer: Peer {
id: 0,
addr: "".to_string(),
},
last_activity_ts: -1,
status: NodeStatus::Standalone,
version: build_info.version.to_string(),
git_commit: build_info.commit_short.to_string(),
// Use `self.start_time_ms` instead.
// It's not precise but enough.
start_time_ms: self.start_time_ms,
};
Ok(vec![node_info])
}
async fn procedures(&self) -> std::result::Result<Vec<(String, ProcedureInfo)>, Self::Error> {
self.procedure_manager
.list_procedures()
.await
.map_err(BoxedError::new)
.map(|procedures| {
procedures
.into_iter()
.map(|procedure| {
let status = procedure.state.as_str_name().to_string();
(status, procedure)
})
.collect::<Vec<_>>()
})
.context(catalog::error::ListProceduresSnafu)
}
async fn region_stats(&self) -> std::result::Result<Vec<RegionStat>, Self::Error> {
let stats = self
.region_server
.reportable_regions()
.into_iter()
.map(|stat| {
let region_stat = self
.region_server
.region_statistic(stat.region_id)
.unwrap_or_default();
RegionStat {
id: stat.region_id,
rcus: 0,
wcus: 0,
approximate_bytes: region_stat.estimated_disk_size() as i64,
engine: stat.engine,
role: RegionRole::from(stat.role).into(),
memtable_size: region_stat.memtable_size,
manifest_size: region_stat.manifest_size,
sst_size: region_stat.sst_size,
}
})
.collect::<Vec<_>>();
Ok(stats)
}
}
#[cfg(test)]
mod tests {
use std::default::Default;
@@ -665,7 +763,7 @@ mod tests {
};
let mut plugins = Plugins::new();
plugins::setup_frontend_plugins(&mut plugins, &fe_opts)
plugins::setup_frontend_plugins(&mut plugins, &[], &fe_opts)
.await
.unwrap();

View File

@@ -20,7 +20,7 @@ use common_config::Configurable;
use common_grpc::channel_manager::{
DEFAULT_MAX_GRPC_RECV_MESSAGE_SIZE, DEFAULT_MAX_GRPC_SEND_MESSAGE_SIZE,
};
use common_telemetry::logging::{LoggingOptions, DEFAULT_OTLP_ENDPOINT};
use common_telemetry::logging::{LoggingOptions, SlowQueryOptions, DEFAULT_OTLP_ENDPOINT};
use common_wal::config::raft_engine::RaftEngineConfig;
use common_wal::config::DatanodeWalConfig;
use datanode::config::{DatanodeOptions, RegionEngineConfig, StorageConfig};
@@ -159,8 +159,20 @@ fn test_load_metasrv_example_config() {
level: Some("info".to_string()),
otlp_endpoint: Some(DEFAULT_OTLP_ENDPOINT.to_string()),
tracing_sample_ratio: Some(Default::default()),
slow_query: SlowQueryOptions {
enable: false,
threshold: Some(Duration::from_secs(10)),
sample_ratio: Some(1.0),
},
..Default::default()
},
datanode: meta_srv::metasrv::DatanodeOptions {
client: meta_srv::metasrv::DatanodeClientOptions {
timeout: Duration::from_secs(10),
connect_timeout: Duration::from_secs(10),
tcp_nodelay: true,
},
},
export_metrics: ExportMetricsOption {
self_import: Some(Default::default()),
remote_write: Some(Default::default()),

View File

@@ -8,7 +8,7 @@ license.workspace = true
workspace = true
[dependencies]
anymap = "1.0.0-beta.2"
anymap2 = "0.13"
async-trait.workspace = true
bitvec = "1.0"
bytes.workspace = true

View File

@@ -12,20 +12,21 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::any::Any;
use std::sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard};
/// [`Plugins`] is a wrapper of [AnyMap](https://github.com/chris-morgan/anymap) and provides a thread-safe way to store and retrieve plugins.
use anymap2::SendSyncAnyMap;
/// [`Plugins`] is a wrapper of [anymap2](https://github.com/azriel91/anymap2) and provides a thread-safe way to store and retrieve plugins.
/// Make it Cloneable and we can treat it like an Arc struct.
#[derive(Default, Clone)]
pub struct Plugins {
inner: Arc<RwLock<anymap::Map<dyn Any + Send + Sync>>>,
inner: Arc<RwLock<SendSyncAnyMap>>,
}
impl Plugins {
pub fn new() -> Self {
Self {
inner: Arc::new(RwLock::new(anymap::Map::new())),
inner: Arc::new(RwLock::new(SendSyncAnyMap::new())),
}
}
@@ -37,6 +38,18 @@ impl Plugins {
self.read().get::<T>().cloned()
}
pub fn get_or_insert<T, F>(&self, f: F) -> T
where
T: 'static + Send + Sync + Clone,
F: FnOnce() -> T,
{
let mut binding = self.write();
if !binding.contains::<T>() {
binding.insert(f());
}
binding.get::<T>().cloned().unwrap()
}
pub fn map_mut<T: 'static + Send + Sync, F, R>(&self, mapper: F) -> R
where
F: FnOnce(Option<&mut T>) -> R,
@@ -61,11 +74,11 @@ impl Plugins {
self.read().is_empty()
}
fn read(&self) -> RwLockReadGuard<anymap::Map<dyn Any + Send + Sync>> {
fn read(&self) -> RwLockReadGuard<SendSyncAnyMap> {
self.inner.read().unwrap()
}
fn write(&self) -> RwLockWriteGuard<anymap::Map<dyn Any + Send + Sync>> {
fn write(&self) -> RwLockWriteGuard<SendSyncAnyMap> {
self.inner.write().unwrap()
}
}

View File

@@ -46,8 +46,9 @@ impl From<String> for SecretString {
}
}
/// Wrapper type for values that contains secrets, which attempts to limit
/// accidental exposure and ensure secrets are wiped from memory when dropped.
/// Wrapper type for values that contains secrets.
///
/// It attempts to limit accidental exposure and ensure secrets are wiped from memory when dropped.
/// (e.g. passwords, cryptographic keys, access tokens or other credentials)
///
/// Access to the secret inner value occurs through the [`ExposeSecret`]

View File

@@ -98,14 +98,20 @@ pub const INFORMATION_SCHEMA_CLUSTER_INFO_TABLE_ID: u32 = 31;
pub const INFORMATION_SCHEMA_VIEW_TABLE_ID: u32 = 32;
/// id for information_schema.FLOWS
pub const INFORMATION_SCHEMA_FLOW_TABLE_ID: u32 = 33;
/// ----- End of information_schema tables -----
/// id for information_schema.procedure_info
pub const INFORMATION_SCHEMA_PROCEDURE_INFO_TABLE_ID: u32 = 34;
/// id for information_schema.region_statistics
pub const INFORMATION_SCHEMA_REGION_STATISTICS_TABLE_ID: u32 = 35;
// ----- End of information_schema tables -----
/// ----- Begin of pg_catalog tables -----
pub const PG_CATALOG_PG_CLASS_TABLE_ID: u32 = 256;
pub const PG_CATALOG_PG_TYPE_TABLE_ID: u32 = 257;
pub const PG_CATALOG_PG_NAMESPACE_TABLE_ID: u32 = 258;
/// ----- End of pg_catalog tables -----
// ----- End of pg_catalog tables -----
pub const MITO_ENGINE: &str = "mito";
pub const MITO2_ENGINE: &str = "mito2";
pub const METRIC_ENGINE: &str = "metric";

View File

@@ -38,6 +38,8 @@ pub enum StatusCode {
Cancelled = 1005,
/// Illegal state, can be exposed to users.
IllegalState = 1006,
/// Caused by some error originated from external system.
External = 1007,
// ====== End of common status code ================
// ====== Begin of SQL related status code =========
@@ -162,7 +164,8 @@ impl StatusCode {
| StatusCode::InvalidAuthHeader
| StatusCode::AccessDenied
| StatusCode::PermissionDenied
| StatusCode::RequestOutdated => false,
| StatusCode::RequestOutdated
| StatusCode::External => false,
}
}
@@ -177,7 +180,9 @@ impl StatusCode {
| StatusCode::IllegalState
| StatusCode::EngineExecuteQuery
| StatusCode::StorageUnavailable
| StatusCode::RuntimeResourcesExhausted => true,
| StatusCode::RuntimeResourcesExhausted
| StatusCode::External => true,
StatusCode::Success
| StatusCode::Unsupported
| StatusCode::InvalidArguments
@@ -256,7 +261,7 @@ macro_rules! define_into_tonic_status {
pub fn status_to_tonic_code(status_code: StatusCode) -> Code {
match status_code {
StatusCode::Success => Code::Ok,
StatusCode::Unknown => Code::Unknown,
StatusCode::Unknown | StatusCode::External => Code::Unknown,
StatusCode::Unsupported => Code::Unimplemented,
StatusCode::Unexpected
| StatusCode::IllegalState

View File

@@ -9,7 +9,7 @@ workspace = true
[features]
default = ["geo"]
geo = ["geohash", "h3o"]
geo = ["geohash", "h3o", "s2"]
[dependencies]
api.workspace = true
@@ -27,6 +27,7 @@ common-time.workspace = true
common-version.workspace = true
datafusion.workspace = true
datatypes.workspace = true
derive_more = { version = "1", default-features = false, features = ["display"] }
geohash = { version = "0.13", optional = true }
h3o = { version = "0.6", optional = true }
jsonb.workspace = true
@@ -34,6 +35,7 @@ num = "0.4"
num-traits = "0.2"
once_cell.workspace = true
paste = "1.0"
s2 = { version = "0.0.12", optional = true }
serde.workspace = true
serde_json.workspace = true
session.workspace = true

View File

@@ -16,7 +16,6 @@ mod argmax;
mod argmin;
mod diff;
mod mean;
mod percentile;
mod polyval;
mod scipy_stats_norm_cdf;
mod scipy_stats_norm_pdf;
@@ -28,7 +27,6 @@ pub use argmin::ArgminAccumulatorCreator;
use common_query::logical_plan::AggregateFunctionCreatorRef;
pub use diff::DiffAccumulatorCreator;
pub use mean::MeanAccumulatorCreator;
pub use percentile::PercentileAccumulatorCreator;
pub use polyval::PolyvalAccumulatorCreator;
pub use scipy_stats_norm_cdf::ScipyStatsNormCdfAccumulatorCreator;
pub use scipy_stats_norm_pdf::ScipyStatsNormPdfAccumulatorCreator;
@@ -91,8 +89,14 @@ impl AggregateFunctions {
register_aggr_func!("polyval", 2, PolyvalAccumulatorCreator);
register_aggr_func!("argmax", 1, ArgmaxAccumulatorCreator);
register_aggr_func!("argmin", 1, ArgminAccumulatorCreator);
register_aggr_func!("percentile", 2, PercentileAccumulatorCreator);
register_aggr_func!("scipystatsnormcdf", 2, ScipyStatsNormCdfAccumulatorCreator);
register_aggr_func!("scipystatsnormpdf", 2, ScipyStatsNormPdfAccumulatorCreator);
#[cfg(feature = "geo")]
register_aggr_func!(
"json_encode_path",
3,
super::geo::encoding::JsonPathEncodeFunctionCreator
);
}
}

View File

@@ -16,7 +16,10 @@ use std::cmp::Ordering;
use std::sync::Arc;
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
use common_query::error::{BadAccumulatorImplSnafu, CreateAccumulatorSnafu, Result};
use common_query::error::{
BadAccumulatorImplSnafu, CreateAccumulatorSnafu, InvalidInputStateSnafu, Result,
};
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;

View File

@@ -16,7 +16,10 @@ use std::cmp::Ordering;
use std::sync::Arc;
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
use common_query::error::{BadAccumulatorImplSnafu, CreateAccumulatorSnafu, Result};
use common_query::error::{
BadAccumulatorImplSnafu, CreateAccumulatorSnafu, InvalidInputStateSnafu, Result,
};
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;

View File

@@ -17,8 +17,10 @@ use std::sync::Arc;
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
use common_query::error::{
CreateAccumulatorSnafu, DowncastVectorSnafu, FromScalarValueSnafu, Result,
CreateAccumulatorSnafu, DowncastVectorSnafu, FromScalarValueSnafu, InvalidInputStateSnafu,
Result,
};
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;

View File

@@ -17,8 +17,10 @@ use std::sync::Arc;
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
use common_query::error::{
BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu, Result,
BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu, InvalidInputStateSnafu,
Result,
};
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;

View File

@@ -1,436 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::cmp::Reverse;
use std::collections::BinaryHeap;
use std::sync::Arc;
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
use common_query::error::{
self, BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu,
FromScalarValueSnafu, InvalidInputColSnafu, Result,
};
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;
use datatypes::types::OrdPrimitive;
use datatypes::value::{ListValue, OrderedFloat};
use datatypes::vectors::{ConstantVector, Float64Vector, Helper, ListVector};
use datatypes::with_match_primitive_type_id;
use num::NumCast;
use snafu::{ensure, OptionExt, ResultExt};
// https://numpy.org/doc/stable/reference/generated/numpy.percentile.html?highlight=percentile#numpy.percentile
// if the p is 50,then the Percentile become median
// we use two heap great and not_greater
// the not_greater push the value that smaller than P-value
// the greater push the value that bigger than P-value
// just like the percentile in numpy:
// Given a vector V of length N, the q-th percentile of V is the value q/100 of the way from the minimum to the maximum in a sorted copy of V.
// The values and distances of the two nearest neighbors as well as the method parameter will determine the percentile
// if the normalized ranking does not match the location of q exactly.
// This function is the same as the median if q=50, the same as the minimum if q=0 and the same as the maximum if q=100.
// This optional method parameter specifies the method to use when the desired quantile lies between two data points i < j.
// If g is the fractional part of the index surrounded by i and alpha and beta are correction constants modifying i and j.
// i+g = (q-alpha)/(n-alpha-beta+1)
// Below, 'q' is the quantile value, 'n' is the sample size and alpha and beta are constants. The following formula gives an interpolation "i + g" of where the quantile would be in the sorted sample.
// With 'i' being the floor and 'g' the fractional part of the result.
// the default method is linear where
// alpha = 1
// beta = 1
#[derive(Debug, Default)]
pub struct Percentile<T>
where
T: WrapperType,
{
greater: BinaryHeap<Reverse<OrdPrimitive<T>>>,
not_greater: BinaryHeap<OrdPrimitive<T>>,
n: u64,
p: Option<f64>,
}
impl<T> Percentile<T>
where
T: WrapperType,
{
fn push(&mut self, value: T) {
let value = OrdPrimitive::<T>(value);
self.n += 1;
if self.not_greater.is_empty() {
self.not_greater.push(value);
return;
}
// to keep the not_greater length == floor+1
// so to ensure the peek of the not_greater is array[floor]
// and the peek of the greater is array[floor+1]
let p = self.p.unwrap_or(0.0_f64);
let floor = (((self.n - 1) as f64) * p / (100_f64)).floor();
if value <= *self.not_greater.peek().unwrap() {
self.not_greater.push(value);
if self.not_greater.len() > (floor + 1.0) as usize {
self.greater.push(Reverse(self.not_greater.pop().unwrap()));
}
} else {
self.greater.push(Reverse(value));
if self.not_greater.len() < (floor + 1.0) as usize {
self.not_greater.push(self.greater.pop().unwrap().0);
}
}
}
}
impl<T> Accumulator for Percentile<T>
where
T: WrapperType,
{
fn state(&self) -> Result<Vec<Value>> {
let nums = self
.greater
.iter()
.map(|x| &x.0)
.chain(self.not_greater.iter())
.map(|&n| n.into())
.collect::<Vec<Value>>();
Ok(vec![
Value::List(ListValue::new(nums, T::LogicalType::build_data_type())),
self.p.into(),
])
}
fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
if values.is_empty() {
return Ok(());
}
ensure!(values.len() == 2, InvalidInputStateSnafu);
ensure!(values[0].len() == values[1].len(), InvalidInputStateSnafu);
if values[0].len() == 0 {
return Ok(());
}
// This is a unary accumulator, so only one column is provided.
let column = &values[0];
let mut len = 1;
let column: &<T as Scalar>::VectorType = if column.is_const() {
len = column.len();
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
unsafe { Helper::static_cast(column.inner()) }
} else {
unsafe { Helper::static_cast(column) }
};
let x = &values[1];
let x = Helper::check_get_scalar::<f64>(x).context(error::InvalidInputTypeSnafu {
err_msg: "expecting \"POLYVAL\" function's second argument to be float64",
})?;
// `get(0)` is safe because we have checked `values[1].len() == values[0].len() != 0`
let first = x.get(0);
ensure!(!first.is_null(), InvalidInputColSnafu);
for i in 1..x.len() {
ensure!(first == x.get(i), InvalidInputColSnafu);
}
let first = match first {
Value::Float64(OrderedFloat(v)) => v,
// unreachable because we have checked `first` is not null and is i64 above
_ => unreachable!(),
};
if let Some(p) = self.p {
ensure!(p == first, InvalidInputColSnafu);
} else {
self.p = Some(first);
};
(0..len).for_each(|_| {
for v in column.iter_data().flatten() {
self.push(v);
}
});
Ok(())
}
fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
if states.is_empty() {
return Ok(());
}
ensure!(
states.len() == 2,
BadAccumulatorImplSnafu {
err_msg: "expect 2 states in `merge_batch`"
}
);
let p = &states[1];
let p = p
.as_any()
.downcast_ref::<Float64Vector>()
.with_context(|| DowncastVectorSnafu {
err_msg: format!(
"expect float64vector, got vector type {}",
p.vector_type_name()
),
})?;
let p = p.get(0);
if p.is_null() {
return Ok(());
}
let p = match p {
Value::Float64(OrderedFloat(p)) => p,
_ => unreachable!(),
};
self.p = Some(p);
let values = &states[0];
let values = values
.as_any()
.downcast_ref::<ListVector>()
.with_context(|| DowncastVectorSnafu {
err_msg: format!(
"expect ListVector, got vector type {}",
values.vector_type_name()
),
})?;
for value in values.values_iter() {
if let Some(value) = value.context(FromScalarValueSnafu)? {
let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
for v in column.iter_data().flatten() {
self.push(v);
}
}
}
Ok(())
}
fn evaluate(&self) -> Result<Value> {
if self.not_greater.is_empty() {
assert!(
self.greater.is_empty(),
"not expected in two-heap percentile algorithm, there must be a bug when implementing it"
);
}
let not_greater = self.not_greater.peek();
if not_greater.is_none() {
return Ok(Value::Null);
}
let not_greater = (*self.not_greater.peek().unwrap()).as_primitive();
let percentile = if self.greater.is_empty() {
NumCast::from(not_greater).unwrap()
} else {
let greater = self.greater.peek().unwrap();
let p = if let Some(p) = self.p {
p
} else {
return Ok(Value::Null);
};
let fract = (((self.n - 1) as f64) * p / 100_f64).fract();
let not_greater_v: f64 = NumCast::from(not_greater).unwrap();
let greater_v: f64 = NumCast::from(greater.0.as_primitive()).unwrap();
not_greater_v * (1.0 - fract) + greater_v * fract
};
Ok(Value::from(percentile))
}
}
#[as_aggr_func_creator]
#[derive(Debug, Default, AggrFuncTypeStore)]
pub struct PercentileAccumulatorCreator {}
impl AggregateFunctionCreator for PercentileAccumulatorCreator {
fn creator(&self) -> AccumulatorCreatorFunction {
let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
let input_type = &types[0];
with_match_primitive_type_id!(
input_type.logical_type_id(),
|$S| {
Ok(Box::new(Percentile::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
},
{
let err_msg = format!(
"\"PERCENTILE\" aggregate function not support data type {:?}",
input_type.logical_type_id(),
);
CreateAccumulatorSnafu { err_msg }.fail()?
}
)
});
creator
}
fn output_type(&self) -> Result<ConcreteDataType> {
let input_types = self.input_types()?;
ensure!(input_types.len() == 2, InvalidInputStateSnafu);
// unwrap is safe because we have checked input_types len must equals 1
Ok(ConcreteDataType::float64_datatype())
}
fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
let input_types = self.input_types()?;
ensure!(input_types.len() == 2, InvalidInputStateSnafu);
Ok(vec![
ConcreteDataType::list_datatype(input_types.into_iter().next().unwrap()),
ConcreteDataType::float64_datatype(),
])
}
}
#[cfg(test)]
mod test {
use datatypes::vectors::{Float64Vector, Int32Vector};
use super::*;
#[test]
fn test_update_batch() {
// test update empty batch, expect not updating anything
let mut percentile = Percentile::<i32>::default();
percentile.update_batch(&[]).unwrap();
assert!(percentile.not_greater.is_empty());
assert!(percentile.greater.is_empty());
assert_eq!(Value::Null, percentile.evaluate().unwrap());
// test update one not-null value
let mut percentile = Percentile::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from(vec![Some(42)])),
Arc::new(Float64Vector::from(vec![Some(100.0_f64)])),
];
percentile.update_batch(&v).unwrap();
assert_eq!(Value::from(42.0_f64), percentile.evaluate().unwrap());
// test update one null value
let mut percentile = Percentile::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from(vec![Option::<i32>::None])),
Arc::new(Float64Vector::from(vec![Some(100.0_f64)])),
];
percentile.update_batch(&v).unwrap();
assert_eq!(Value::Null, percentile.evaluate().unwrap());
// test update no null-value batch
let mut percentile = Percentile::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
Arc::new(Float64Vector::from(vec![
Some(100.0_f64),
Some(100.0_f64),
Some(100.0_f64),
])),
];
percentile.update_batch(&v).unwrap();
assert_eq!(Value::from(2_f64), percentile.evaluate().unwrap());
// test update null-value batch
let mut percentile = Percentile::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from(vec![Some(-2i32), None, Some(3), Some(4)])),
Arc::new(Float64Vector::from(vec![
Some(100.0_f64),
Some(100.0_f64),
Some(100.0_f64),
Some(100.0_f64),
])),
];
percentile.update_batch(&v).unwrap();
assert_eq!(Value::from(4_f64), percentile.evaluate().unwrap());
// test update with constant vector
let mut percentile = Percentile::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(ConstantVector::new(
Arc::new(Int32Vector::from_vec(vec![4])),
2,
)),
Arc::new(Float64Vector::from(vec![Some(100.0_f64), Some(100.0_f64)])),
];
percentile.update_batch(&v).unwrap();
assert_eq!(Value::from(4_f64), percentile.evaluate().unwrap());
// test left border
let mut percentile = Percentile::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
Arc::new(Float64Vector::from(vec![
Some(0.0_f64),
Some(0.0_f64),
Some(0.0_f64),
])),
];
percentile.update_batch(&v).unwrap();
assert_eq!(Value::from(-1.0_f64), percentile.evaluate().unwrap());
// test medium
let mut percentile = Percentile::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
Arc::new(Float64Vector::from(vec![
Some(50.0_f64),
Some(50.0_f64),
Some(50.0_f64),
])),
];
percentile.update_batch(&v).unwrap();
assert_eq!(Value::from(1.0_f64), percentile.evaluate().unwrap());
// test right border
let mut percentile = Percentile::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
Arc::new(Float64Vector::from(vec![
Some(100.0_f64),
Some(100.0_f64),
Some(100.0_f64),
])),
];
percentile.update_batch(&v).unwrap();
assert_eq!(Value::from(2.0_f64), percentile.evaluate().unwrap());
// the following is the result of numpy.percentile
// numpy.percentile
// a = np.array([[10,7,4]])
// np.percentile(a,40)
// >> 6.400000000000
let mut percentile = Percentile::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from(vec![Some(10i32), Some(7), Some(4)])),
Arc::new(Float64Vector::from(vec![
Some(40.0_f64),
Some(40.0_f64),
Some(40.0_f64),
])),
];
percentile.update_batch(&v).unwrap();
assert_eq!(Value::from(6.400000000_f64), percentile.evaluate().unwrap());
// the following is the result of numpy.percentile
// a = np.array([[10,7,4]])
// np.percentile(a,95)
// >> 9.7000000000000011
let mut percentile = Percentile::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from(vec![Some(10i32), Some(7), Some(4)])),
Arc::new(Float64Vector::from(vec![
Some(95.0_f64),
Some(95.0_f64),
Some(95.0_f64),
])),
];
percentile.update_batch(&v).unwrap();
assert_eq!(
Value::from(9.700_000_000_000_001_f64),
percentile.evaluate().unwrap()
);
}
}

View File

@@ -18,8 +18,9 @@ use std::sync::Arc;
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
use common_query::error::{
self, BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu,
FromScalarValueSnafu, InvalidInputColSnafu, Result,
FromScalarValueSnafu, InvalidInputColSnafu, InvalidInputStateSnafu, Result,
};
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;

View File

@@ -17,8 +17,10 @@ use std::sync::Arc;
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
use common_query::error::{
self, BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu,
FromScalarValueSnafu, GenerateFunctionSnafu, InvalidInputColSnafu, Result,
FromScalarValueSnafu, GenerateFunctionSnafu, InvalidInputColSnafu, InvalidInputStateSnafu,
Result,
};
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;

View File

@@ -17,8 +17,10 @@ use std::sync::Arc;
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
use common_query::error::{
self, BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu,
FromScalarValueSnafu, GenerateFunctionSnafu, InvalidInputColSnafu, Result,
FromScalarValueSnafu, GenerateFunctionSnafu, InvalidInputColSnafu, InvalidInputStateSnafu,
Result,
};
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;

View File

@@ -14,18 +14,19 @@
use std::fmt;
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
use common_query::error::{ArrowComputeSnafu, IntoVectorSnafu, InvalidFuncArgsSnafu, Result};
use common_query::prelude::Signature;
use datatypes::data_type::DataType;
use datatypes::arrow::compute::kernels::numeric;
use datatypes::prelude::ConcreteDataType;
use datatypes::value::ValueRef;
use datatypes::vectors::VectorRef;
use snafu::ensure;
use datatypes::vectors::{Helper, VectorRef};
use snafu::{ensure, ResultExt};
use crate::function::{Function, FunctionContext};
use crate::helper;
/// A function adds an interval value to Timestamp, Date or DateTime, and return the result.
/// A function adds an interval value to Timestamp, Date, and return the result.
/// The implementation of datetime type is based on Date64 which is incorrect so this function
/// doesn't support the datetime type.
#[derive(Clone, Debug, Default)]
pub struct DateAddFunction;
@@ -44,7 +45,6 @@ impl Function for DateAddFunction {
helper::one_of_sigs2(
vec![
ConcreteDataType::date_datatype(),
ConcreteDataType::datetime_datatype(),
ConcreteDataType::timestamp_second_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(),
ConcreteDataType::timestamp_microsecond_datatype(),
@@ -69,64 +69,14 @@ impl Function for DateAddFunction {
}
);
let left = &columns[0];
let right = &columns[1];
let left = columns[0].to_arrow_array();
let right = columns[1].to_arrow_array();
let size = left.len();
let left_datatype = columns[0].data_type();
match left_datatype {
ConcreteDataType::Timestamp(_) => {
let mut result = left_datatype.create_mutable_vector(size);
for i in 0..size {
let ts = left.get(i).as_timestamp();
let interval = right.get(i).as_interval();
let new_ts = match (ts, interval) {
(Some(ts), Some(interval)) => ts.add_interval(interval),
_ => ts,
};
result.push_value_ref(ValueRef::from(new_ts));
}
Ok(result.to_vector())
}
ConcreteDataType::Date(_) => {
let mut result = left_datatype.create_mutable_vector(size);
for i in 0..size {
let date = left.get(i).as_date();
let interval = right.get(i).as_interval();
let new_date = match (date, interval) {
(Some(date), Some(interval)) => date.add_interval(interval),
_ => date,
};
result.push_value_ref(ValueRef::from(new_date));
}
Ok(result.to_vector())
}
ConcreteDataType::DateTime(_) => {
let mut result = left_datatype.create_mutable_vector(size);
for i in 0..size {
let datetime = left.get(i).as_datetime();
let interval = right.get(i).as_interval();
let new_datetime = match (datetime, interval) {
(Some(datetime), Some(interval)) => datetime.add_interval(interval),
_ => datetime,
};
result.push_value_ref(ValueRef::from(new_datetime));
}
Ok(result.to_vector())
}
_ => UnsupportedInputDataTypeSnafu {
function: NAME,
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
}
.fail(),
}
let result = numeric::add(&left, &right).context(ArrowComputeSnafu)?;
let arrow_type = result.data_type().clone();
Helper::try_into_vector(result).context(IntoVectorSnafu {
data_type: arrow_type,
})
}
}
@@ -144,8 +94,7 @@ mod tests {
use datatypes::prelude::ConcreteDataType;
use datatypes::value::Value;
use datatypes::vectors::{
DateTimeVector, DateVector, IntervalDayTimeVector, IntervalYearMonthVector,
TimestampSecondVector,
DateVector, IntervalDayTimeVector, IntervalYearMonthVector, TimestampSecondVector,
};
use super::{DateAddFunction, *};
@@ -168,16 +117,15 @@ mod tests {
ConcreteDataType::date_datatype(),
f.return_type(&[ConcreteDataType::date_datatype()]).unwrap()
);
assert_eq!(
ConcreteDataType::datetime_datatype(),
f.return_type(&[ConcreteDataType::datetime_datatype()])
.unwrap()
);
assert!(matches!(f.signature(),
assert!(
matches!(f.signature(),
Signature {
type_signature: TypeSignature::OneOf(sigs),
volatility: Volatility::Immutable
} if sigs.len() == 18));
} if sigs.len() == 15),
"{:?}",
f.signature()
);
}
#[test]
@@ -243,36 +191,4 @@ mod tests {
}
}
}
#[test]
fn test_datetime_date_add() {
let f = DateAddFunction;
let dates = vec![Some(123), None, Some(42), None];
// Intervals in months
let intervals = vec![1, 2, 3, 1];
let results = [Some(2678400123), None, Some(7776000042), None];
let date_vector = DateTimeVector::from(dates.clone());
let interval_vector = IntervalYearMonthVector::from_vec(intervals);
let args: Vec<VectorRef> = vec![Arc::new(date_vector), Arc::new(interval_vector)];
let vector = f.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(4, vector.len());
for (i, _t) in dates.iter().enumerate() {
let v = vector.get(i);
let result = results.get(i).unwrap();
if result.is_none() {
assert_eq!(Value::Null, v);
continue;
}
match v {
Value::DateTime(date) => {
assert_eq!(date.val(), result.unwrap());
}
_ => unreachable!(),
}
}
}
}

View File

@@ -14,18 +14,19 @@
use std::fmt;
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
use common_query::error::{ArrowComputeSnafu, IntoVectorSnafu, InvalidFuncArgsSnafu, Result};
use common_query::prelude::Signature;
use datatypes::data_type::DataType;
use datatypes::arrow::compute::kernels::numeric;
use datatypes::prelude::ConcreteDataType;
use datatypes::value::ValueRef;
use datatypes::vectors::VectorRef;
use snafu::ensure;
use datatypes::vectors::{Helper, VectorRef};
use snafu::{ensure, ResultExt};
use crate::function::{Function, FunctionContext};
use crate::helper;
/// A function subtracts an interval value to Timestamp, Date or DateTime, and return the result.
/// A function subtracts an interval value to Timestamp, Date, and return the result.
/// The implementation of datetime type is based on Date64 which is incorrect so this function
/// doesn't support the datetime type.
#[derive(Clone, Debug, Default)]
pub struct DateSubFunction;
@@ -44,7 +45,6 @@ impl Function for DateSubFunction {
helper::one_of_sigs2(
vec![
ConcreteDataType::date_datatype(),
ConcreteDataType::datetime_datatype(),
ConcreteDataType::timestamp_second_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(),
ConcreteDataType::timestamp_microsecond_datatype(),
@@ -69,65 +69,14 @@ impl Function for DateSubFunction {
}
);
let left = &columns[0];
let right = &columns[1];
let left = columns[0].to_arrow_array();
let right = columns[1].to_arrow_array();
let size = left.len();
let left_datatype = columns[0].data_type();
match left_datatype {
ConcreteDataType::Timestamp(_) => {
let mut result = left_datatype.create_mutable_vector(size);
for i in 0..size {
let ts = left.get(i).as_timestamp();
let interval = right.get(i).as_interval();
let new_ts = match (ts, interval) {
(Some(ts), Some(interval)) => ts.sub_interval(interval),
_ => ts,
};
result.push_value_ref(ValueRef::from(new_ts));
}
Ok(result.to_vector())
}
ConcreteDataType::Date(_) => {
let mut result = left_datatype.create_mutable_vector(size);
for i in 0..size {
let date = left.get(i).as_date();
let interval = right.get(i).as_interval();
let new_date = match (date, interval) {
(Some(date), Some(interval)) => date.sub_interval(interval),
_ => date,
};
result.push_value_ref(ValueRef::from(new_date));
}
Ok(result.to_vector())
}
ConcreteDataType::DateTime(_) => {
let mut result = left_datatype.create_mutable_vector(size);
for i in 0..size {
let datetime = left.get(i).as_datetime();
let interval = right.get(i).as_interval();
let new_datetime = match (datetime, interval) {
(Some(datetime), Some(interval)) => datetime.sub_interval(interval),
_ => datetime,
};
result.push_value_ref(ValueRef::from(new_datetime));
}
Ok(result.to_vector())
}
_ => UnsupportedInputDataTypeSnafu {
function: NAME,
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
}
.fail(),
}
let result = numeric::sub(&left, &right).context(ArrowComputeSnafu)?;
let arrow_type = result.data_type().clone();
Helper::try_into_vector(result).context(IntoVectorSnafu {
data_type: arrow_type,
})
}
}
@@ -145,8 +94,7 @@ mod tests {
use datatypes::prelude::ConcreteDataType;
use datatypes::value::Value;
use datatypes::vectors::{
DateTimeVector, DateVector, IntervalDayTimeVector, IntervalYearMonthVector,
TimestampSecondVector,
DateVector, IntervalDayTimeVector, IntervalYearMonthVector, TimestampSecondVector,
};
use super::{DateSubFunction, *};
@@ -174,11 +122,15 @@ mod tests {
f.return_type(&[ConcreteDataType::datetime_datatype()])
.unwrap()
);
assert!(matches!(f.signature(),
assert!(
matches!(f.signature(),
Signature {
type_signature: TypeSignature::OneOf(sigs),
volatility: Volatility::Immutable
} if sigs.len() == 18));
} if sigs.len() == 15),
"{:?}",
f.signature()
);
}
#[test]
@@ -250,42 +202,4 @@ mod tests {
}
}
}
#[test]
fn test_datetime_date_sub() {
let f = DateSubFunction;
let millis_per_month = 3600 * 24 * 30 * 1000;
let dates = vec![
Some(123 * millis_per_month),
None,
Some(42 * millis_per_month),
None,
];
// Intervals in months
let intervals = vec![1, 2, 3, 1];
let results = [Some(316137600000), None, Some(100915200000), None];
let date_vector = DateTimeVector::from(dates.clone());
let interval_vector = IntervalYearMonthVector::from_vec(intervals);
let args: Vec<VectorRef> = vec![Arc::new(date_vector), Arc::new(interval_vector)];
let vector = f.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(4, vector.len());
for (i, _t) in dates.iter().enumerate() {
let v = vector.get(i);
let result = results.get(i).unwrap();
if result.is_none() {
assert_eq!(Value::Null, v);
continue;
}
match v {
Value::DateTime(date) => {
assert_eq!(date.val(), result.unwrap());
}
_ => unreachable!(),
}
}
}
}

View File

@@ -13,11 +13,11 @@
// limitations under the License.
use std::sync::Arc;
pub(crate) mod encoding;
mod geohash;
mod h3;
use geohash::GeohashFunction;
use h3::H3Function;
mod helpers;
mod s2;
use crate::function_registry::FunctionRegistry;
@@ -25,7 +25,40 @@ pub(crate) struct GeoFunctions;
impl GeoFunctions {
pub fn register(registry: &FunctionRegistry) {
registry.register(Arc::new(GeohashFunction));
registry.register(Arc::new(H3Function));
// geohash
registry.register(Arc::new(geohash::GeohashFunction));
registry.register(Arc::new(geohash::GeohashNeighboursFunction));
// h3 index
registry.register(Arc::new(h3::H3LatLngToCell));
registry.register(Arc::new(h3::H3LatLngToCellString));
// h3 index inspection
registry.register(Arc::new(h3::H3CellBase));
registry.register(Arc::new(h3::H3CellIsPentagon));
registry.register(Arc::new(h3::H3StringToCell));
registry.register(Arc::new(h3::H3CellToString));
registry.register(Arc::new(h3::H3CellCenterLatLng));
registry.register(Arc::new(h3::H3CellResolution));
// h3 hierarchical grid
registry.register(Arc::new(h3::H3CellCenterChild));
registry.register(Arc::new(h3::H3CellParent));
registry.register(Arc::new(h3::H3CellToChildren));
registry.register(Arc::new(h3::H3CellToChildrenSize));
registry.register(Arc::new(h3::H3CellToChildPos));
registry.register(Arc::new(h3::H3ChildPosToCell));
// h3 grid traversal
registry.register(Arc::new(h3::H3GridDisk));
registry.register(Arc::new(h3::H3GridDiskDistances));
registry.register(Arc::new(h3::H3GridDistance));
registry.register(Arc::new(h3::H3GridPathCells));
// s2
registry.register(Arc::new(s2::S2LatLngToCell));
registry.register(Arc::new(s2::S2CellLevel));
registry.register(Arc::new(s2::S2CellToToken));
registry.register(Arc::new(s2::S2CellParent));
}
}

View File

@@ -0,0 +1,223 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use common_error::ext::{BoxedError, PlainError};
use common_error::status_code::StatusCode;
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
use common_query::error::{self, InvalidInputStateSnafu, Result};
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::AccumulatorCreatorFunction;
use common_time::Timestamp;
use datatypes::prelude::ConcreteDataType;
use datatypes::value::{ListValue, Value};
use datatypes::vectors::VectorRef;
use snafu::{ensure, ResultExt};
use super::helpers::{ensure_columns_len, ensure_columns_n};
/// Accumulator of lat, lng, timestamp tuples
#[derive(Debug)]
pub struct JsonPathAccumulator {
timestamp_type: ConcreteDataType,
lat: Vec<Option<f64>>,
lng: Vec<Option<f64>>,
timestamp: Vec<Option<Timestamp>>,
}
impl JsonPathAccumulator {
fn new(timestamp_type: ConcreteDataType) -> Self {
Self {
lat: Vec::default(),
lng: Vec::default(),
timestamp: Vec::default(),
timestamp_type,
}
}
}
impl Accumulator for JsonPathAccumulator {
fn state(&self) -> Result<Vec<Value>> {
Ok(vec![
Value::List(ListValue::new(
self.lat.iter().map(|i| Value::from(*i)).collect(),
ConcreteDataType::float64_datatype(),
)),
Value::List(ListValue::new(
self.lng.iter().map(|i| Value::from(*i)).collect(),
ConcreteDataType::float64_datatype(),
)),
Value::List(ListValue::new(
self.timestamp.iter().map(|i| Value::from(*i)).collect(),
self.timestamp_type.clone(),
)),
])
}
fn update_batch(&mut self, columns: &[VectorRef]) -> Result<()> {
// update batch as in datafusion just provides the accumulator original
// input.
//
// columns is vec of [`lat`, `lng`, `timestamp`]
// where
// - `lat` is a vector of `Value::Float64` or similar type. Each item in
// the vector is a row in given dataset.
// - so on so forth for `lng` and `timestamp`
ensure_columns_n!(columns, 3);
let lat = &columns[0];
let lng = &columns[1];
let ts = &columns[2];
let size = lat.len();
for idx in 0..size {
self.lat.push(lat.get(idx).as_f64_lossy());
self.lng.push(lng.get(idx).as_f64_lossy());
self.timestamp.push(ts.get(idx).as_timestamp());
}
Ok(())
}
fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
// merge batch as in datafusion gives state accumulated from the data
// returned from child accumulators' state() call
// In our particular implementation, the data structure is like
//
// states is vec of [`lat`, `lng`, `timestamp`]
// where
// - `lat` is a vector of `Value::List`. Each item in the list is all
// coordinates from a child accumulator.
// - so on so forth for `lng` and `timestamp`
ensure_columns_n!(states, 3);
let lat_lists = &states[0];
let lng_lists = &states[1];
let ts_lists = &states[2];
let len = lat_lists.len();
for idx in 0..len {
if let Some(lat_list) = lat_lists
.get(idx)
.as_list()
.map_err(BoxedError::new)
.context(error::ExecuteSnafu)?
{
for v in lat_list.items() {
self.lat.push(v.as_f64_lossy());
}
}
if let Some(lng_list) = lng_lists
.get(idx)
.as_list()
.map_err(BoxedError::new)
.context(error::ExecuteSnafu)?
{
for v in lng_list.items() {
self.lng.push(v.as_f64_lossy());
}
}
if let Some(ts_list) = ts_lists
.get(idx)
.as_list()
.map_err(BoxedError::new)
.context(error::ExecuteSnafu)?
{
for v in ts_list.items() {
self.timestamp.push(v.as_timestamp());
}
}
}
Ok(())
}
fn evaluate(&self) -> Result<Value> {
let mut work_vec: Vec<(&Option<f64>, &Option<f64>, &Option<Timestamp>)> = self
.lat
.iter()
.zip(self.lng.iter())
.zip(self.timestamp.iter())
.map(|((a, b), c)| (a, b, c))
.collect();
// sort by timestamp, we treat null timestamp as 0
work_vec.sort_unstable_by_key(|tuple| tuple.2.unwrap_or_else(|| Timestamp::new_second(0)));
let result = serde_json::to_string(
&work_vec
.into_iter()
// note that we transform to lng,lat for geojson compatibility
.map(|(lat, lng, _)| vec![lng, lat])
.collect::<Vec<Vec<&Option<f64>>>>(),
)
.map_err(|e| {
BoxedError::new(PlainError::new(
format!("Serialization failure: {}", e),
StatusCode::EngineExecuteQuery,
))
})
.context(error::ExecuteSnafu)?;
Ok(Value::String(result.into()))
}
}
/// This function accept rows of lat, lng and timestamp, sort with timestamp and
/// encoding them into a geojson-like path.
///
/// Example:
///
/// ```sql
/// SELECT json_encode_path(lat, lon, timestamp) FROM table [group by ...];
/// ```
///
#[as_aggr_func_creator]
#[derive(Debug, Default, AggrFuncTypeStore)]
pub struct JsonPathEncodeFunctionCreator {}
impl AggregateFunctionCreator for JsonPathEncodeFunctionCreator {
fn creator(&self) -> AccumulatorCreatorFunction {
let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
let ts_type = types[2].clone();
Ok(Box::new(JsonPathAccumulator::new(ts_type)))
});
creator
}
fn output_type(&self) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::string_datatype())
}
fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
let input_types = self.input_types()?;
ensure!(input_types.len() == 3, InvalidInputStateSnafu);
let timestamp_type = input_types[2].clone();
Ok(vec![
ConcreteDataType::list_datatype(ConcreteDataType::float64_datatype()),
ConcreteDataType::list_datatype(ConcreteDataType::float64_datatype()),
ConcreteDataType::list_datatype(timestamp_type),
])
}
}

View File

@@ -20,23 +20,69 @@ use common_query::error::{self, InvalidFuncArgsSnafu, Result};
use common_query::prelude::{Signature, TypeSignature};
use datafusion::logical_expr::Volatility;
use datatypes::prelude::ConcreteDataType;
use datatypes::scalars::ScalarVectorBuilder;
use datatypes::value::Value;
use datatypes::vectors::{MutableVector, StringVectorBuilder, VectorRef};
use datatypes::scalars::{Scalar, ScalarVectorBuilder};
use datatypes::value::{ListValue, Value};
use datatypes::vectors::{ListVectorBuilder, MutableVector, StringVectorBuilder, VectorRef};
use geohash::Coord;
use snafu::{ensure, ResultExt};
use crate::function::{Function, FunctionContext};
macro_rules! ensure_resolution_usize {
($v: ident) => {
if !($v > 0 && $v <= 12) {
Err(BoxedError::new(PlainError::new(
format!("Invalid geohash resolution {}, expect value: [1, 12]", $v),
StatusCode::EngineExecuteQuery,
)))
.context(error::ExecuteSnafu)
} else {
Ok($v as usize)
}
};
}
fn try_into_resolution(v: Value) -> Result<usize> {
match v {
Value::Int8(v) => {
ensure_resolution_usize!(v)
}
Value::Int16(v) => {
ensure_resolution_usize!(v)
}
Value::Int32(v) => {
ensure_resolution_usize!(v)
}
Value::Int64(v) => {
ensure_resolution_usize!(v)
}
Value::UInt8(v) => {
ensure_resolution_usize!(v)
}
Value::UInt16(v) => {
ensure_resolution_usize!(v)
}
Value::UInt32(v) => {
ensure_resolution_usize!(v)
}
Value::UInt64(v) => {
ensure_resolution_usize!(v)
}
_ => unreachable!(),
}
}
/// Function that return geohash string for a given geospatial coordinate.
#[derive(Clone, Debug, Default)]
pub struct GeohashFunction;
const NAME: &str = "geohash";
impl GeohashFunction {
const NAME: &'static str = "geohash";
}
impl Function for GeohashFunction {
fn name(&self) -> &str {
NAME
Self::NAME
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
@@ -93,17 +139,7 @@ impl Function for GeohashFunction {
for i in 0..size {
let lat = lat_vec.get(i).as_f64_lossy();
let lon = lon_vec.get(i).as_f64_lossy();
let r = match resolution_vec.get(i) {
Value::Int8(v) => v as usize,
Value::Int16(v) => v as usize,
Value::Int32(v) => v as usize,
Value::Int64(v) => v as usize,
Value::UInt8(v) => v as usize,
Value::UInt16(v) => v as usize,
Value::UInt32(v) => v as usize,
Value::UInt64(v) => v as usize,
_ => unreachable!(),
};
let r = try_into_resolution(resolution_vec.get(i))?;
let result = match (lat, lon) {
(Some(lat), Some(lon)) => {
@@ -130,6 +166,134 @@ impl Function for GeohashFunction {
impl fmt::Display for GeohashFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", NAME)
write!(f, "{}", Self::NAME)
}
}
/// Function that return geohash string for a given geospatial coordinate.
#[derive(Clone, Debug, Default)]
pub struct GeohashNeighboursFunction;
impl GeohashNeighboursFunction {
const NAME: &'static str = "geohash_neighbours";
}
impl Function for GeohashNeighboursFunction {
fn name(&self) -> &str {
GeohashNeighboursFunction::NAME
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::list_datatype(
ConcreteDataType::string_datatype(),
))
}
fn signature(&self) -> Signature {
let mut signatures = Vec::new();
for coord_type in &[
ConcreteDataType::float32_datatype(),
ConcreteDataType::float64_datatype(),
] {
for resolution_type in &[
ConcreteDataType::int8_datatype(),
ConcreteDataType::int16_datatype(),
ConcreteDataType::int32_datatype(),
ConcreteDataType::int64_datatype(),
ConcreteDataType::uint8_datatype(),
ConcreteDataType::uint16_datatype(),
ConcreteDataType::uint32_datatype(),
ConcreteDataType::uint64_datatype(),
] {
signatures.push(TypeSignature::Exact(vec![
// latitude
coord_type.clone(),
// longitude
coord_type.clone(),
// resolution
resolution_type.clone(),
]));
}
}
Signature::one_of(signatures, Volatility::Stable)
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure!(
columns.len() == 3,
InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not correct, expect 3, provided : {}",
columns.len()
),
}
);
let lat_vec = &columns[0];
let lon_vec = &columns[1];
let resolution_vec = &columns[2];
let size = lat_vec.len();
let mut results =
ListVectorBuilder::with_type_capacity(ConcreteDataType::string_datatype(), size);
for i in 0..size {
let lat = lat_vec.get(i).as_f64_lossy();
let lon = lon_vec.get(i).as_f64_lossy();
let r = try_into_resolution(resolution_vec.get(i))?;
let result = match (lat, lon) {
(Some(lat), Some(lon)) => {
let coord = Coord { x: lon, y: lat };
let encoded = geohash::encode(coord, r)
.map_err(|e| {
BoxedError::new(PlainError::new(
format!("Geohash error: {}", e),
StatusCode::EngineExecuteQuery,
))
})
.context(error::ExecuteSnafu)?;
let neighbours = geohash::neighbors(&encoded)
.map_err(|e| {
BoxedError::new(PlainError::new(
format!("Geohash error: {}", e),
StatusCode::EngineExecuteQuery,
))
})
.context(error::ExecuteSnafu)?;
Some(ListValue::new(
vec![
neighbours.n,
neighbours.nw,
neighbours.w,
neighbours.sw,
neighbours.s,
neighbours.se,
neighbours.e,
neighbours.ne,
]
.into_iter()
.map(Value::from)
.collect(),
ConcreteDataType::string_datatype(),
))
}
_ => None,
};
if let Some(list_value) = result {
results.push(Some(list_value.as_scalar_ref()));
} else {
results.push(None);
}
}
Ok(results.to_vector())
}
}
impl fmt::Display for GeohashNeighboursFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", GeohashNeighboursFunction::NAME)
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,75 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
macro_rules! ensure_columns_len {
($columns:ident) => {
snafu::ensure!(
$columns.windows(2).all(|c| c[0].len() == c[1].len()),
common_query::error::InvalidFuncArgsSnafu {
err_msg: "The length of input columns are in different size"
}
)
};
($column_a:ident, $column_b:ident, $($column_n:ident),*) => {
snafu::ensure!(
{
let mut result = $column_a.len() == $column_b.len();
$(
result = result && ($column_a.len() == $column_n.len());
)*
result
}
common_query::error::InvalidFuncArgsSnafu {
err_msg: "The length of input columns are in different size"
}
)
};
}
pub(super) use ensure_columns_len;
macro_rules! ensure_columns_n {
($columns:ident, $n:literal) => {
snafu::ensure!(
$columns.len() == $n,
common_query::error::InvalidFuncArgsSnafu {
err_msg: format!(
"The length of arguments is not correct, expect {}, provided : {}",
stringify!($n),
$columns.len()
),
}
);
if $n > 1 {
ensure_columns_len!($columns);
}
};
}
pub(super) use ensure_columns_n;
macro_rules! ensure_and_coerce {
($compare:expr, $coerce:expr) => {{
snafu::ensure!(
$compare,
common_query::error::InvalidFuncArgsSnafu {
err_msg: "Argument was outside of acceptable range "
}
);
Ok($coerce)
}};
}
pub(super) use ensure_and_coerce;

View File

@@ -0,0 +1,275 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use common_query::error::{InvalidFuncArgsSnafu, Result};
use common_query::prelude::{Signature, TypeSignature};
use datafusion::logical_expr::Volatility;
use datatypes::prelude::ConcreteDataType;
use datatypes::scalars::ScalarVectorBuilder;
use datatypes::value::Value;
use datatypes::vectors::{MutableVector, StringVectorBuilder, UInt64VectorBuilder, VectorRef};
use derive_more::Display;
use once_cell::sync::Lazy;
use s2::cellid::{CellID, MAX_LEVEL};
use s2::latlng::LatLng;
use snafu::ensure;
use crate::function::{Function, FunctionContext};
use crate::scalars::geo::helpers::{ensure_and_coerce, ensure_columns_len, ensure_columns_n};
static CELL_TYPES: Lazy<Vec<ConcreteDataType>> = Lazy::new(|| {
vec![
ConcreteDataType::int64_datatype(),
ConcreteDataType::uint64_datatype(),
]
});
static COORDINATE_TYPES: Lazy<Vec<ConcreteDataType>> = Lazy::new(|| {
vec![
ConcreteDataType::float32_datatype(),
ConcreteDataType::float64_datatype(),
]
});
static LEVEL_TYPES: Lazy<Vec<ConcreteDataType>> = Lazy::new(|| {
vec![
ConcreteDataType::int8_datatype(),
ConcreteDataType::int16_datatype(),
ConcreteDataType::int32_datatype(),
ConcreteDataType::int64_datatype(),
ConcreteDataType::uint8_datatype(),
ConcreteDataType::uint16_datatype(),
ConcreteDataType::uint32_datatype(),
ConcreteDataType::uint64_datatype(),
]
});
/// Function that returns [s2] encoding cellid for a given geospatial coordinate.
///
/// [s2]: http://s2geometry.io
#[derive(Clone, Debug, Default, Display)]
#[display("{}", self.name())]
pub struct S2LatLngToCell;
impl Function for S2LatLngToCell {
fn name(&self) -> &str {
"s2_latlng_to_cell"
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::uint64_datatype())
}
fn signature(&self) -> Signature {
let mut signatures = Vec::with_capacity(COORDINATE_TYPES.len());
for coord_type in COORDINATE_TYPES.as_slice() {
signatures.push(TypeSignature::Exact(vec![
// latitude
coord_type.clone(),
// longitude
coord_type.clone(),
]));
}
Signature::one_of(signatures, Volatility::Stable)
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure_columns_n!(columns, 2);
let lat_vec = &columns[0];
let lon_vec = &columns[1];
let size = lat_vec.len();
let mut results = UInt64VectorBuilder::with_capacity(size);
for i in 0..size {
let lat = lat_vec.get(i).as_f64_lossy();
let lon = lon_vec.get(i).as_f64_lossy();
let result = match (lat, lon) {
(Some(lat), Some(lon)) => {
let coord = LatLng::from_degrees(lat, lon);
ensure!(
coord.is_valid(),
InvalidFuncArgsSnafu {
err_msg: "The input coordinates are invalid",
}
);
let cellid = CellID::from(coord);
let encoded: u64 = cellid.0;
Some(encoded)
}
_ => None,
};
results.push(result);
}
Ok(results.to_vector())
}
}
/// Return the level of current s2 cell
#[derive(Clone, Debug, Default, Display)]
#[display("{}", self.name())]
pub struct S2CellLevel;
impl Function for S2CellLevel {
fn name(&self) -> &str {
"s2_cell_level"
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::uint64_datatype())
}
fn signature(&self) -> Signature {
signature_of_cell()
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure_columns_n!(columns, 1);
let cell_vec = &columns[0];
let size = cell_vec.len();
let mut results = UInt64VectorBuilder::with_capacity(size);
for i in 0..size {
let cell = cell_from_value(cell_vec.get(i));
let res = cell.map(|cell| cell.level());
results.push(res);
}
Ok(results.to_vector())
}
}
/// Return the string presentation of the cell
#[derive(Clone, Debug, Default, Display)]
#[display("{}", self.name())]
pub struct S2CellToToken;
impl Function for S2CellToToken {
fn name(&self) -> &str {
"s2_cell_to_token"
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::string_datatype())
}
fn signature(&self) -> Signature {
signature_of_cell()
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure_columns_n!(columns, 1);
let cell_vec = &columns[0];
let size = cell_vec.len();
let mut results = StringVectorBuilder::with_capacity(size);
for i in 0..size {
let cell = cell_from_value(cell_vec.get(i));
let res = cell.map(|cell| cell.to_token());
results.push(res.as_deref());
}
Ok(results.to_vector())
}
}
/// Return parent at given level of current s2 cell
#[derive(Clone, Debug, Default, Display)]
#[display("{}", self.name())]
pub struct S2CellParent;
impl Function for S2CellParent {
fn name(&self) -> &str {
"s2_cell_parent"
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::uint64_datatype())
}
fn signature(&self) -> Signature {
signature_of_cell_and_level()
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure_columns_n!(columns, 2);
let cell_vec = &columns[0];
let level_vec = &columns[1];
let size = cell_vec.len();
let mut results = UInt64VectorBuilder::with_capacity(size);
for i in 0..size {
let cell = cell_from_value(cell_vec.get(i));
let level = value_to_level(level_vec.get(i))?;
let result = cell.map(|cell| cell.parent(level).0);
results.push(result);
}
Ok(results.to_vector())
}
}
fn signature_of_cell() -> Signature {
let mut signatures = Vec::with_capacity(CELL_TYPES.len());
for cell_type in CELL_TYPES.as_slice() {
signatures.push(TypeSignature::Exact(vec![cell_type.clone()]));
}
Signature::one_of(signatures, Volatility::Stable)
}
fn signature_of_cell_and_level() -> Signature {
let mut signatures = Vec::with_capacity(CELL_TYPES.len() * LEVEL_TYPES.len());
for cell_type in CELL_TYPES.as_slice() {
for level_type in LEVEL_TYPES.as_slice() {
signatures.push(TypeSignature::Exact(vec![
cell_type.clone(),
level_type.clone(),
]));
}
}
Signature::one_of(signatures, Volatility::Stable)
}
fn cell_from_value(v: Value) -> Option<CellID> {
match v {
Value::Int64(v) => Some(CellID(v as u64)),
Value::UInt64(v) => Some(CellID(v)),
_ => None,
}
}
fn value_to_level(v: Value) -> Result<u64> {
match v {
Value::Int8(v) => ensure_and_coerce!(v >= 0 && v <= MAX_LEVEL as i8, v as u64),
Value::Int16(v) => ensure_and_coerce!(v >= 0 && v <= MAX_LEVEL as i16, v as u64),
Value::Int32(v) => ensure_and_coerce!(v >= 0 && v <= MAX_LEVEL as i32, v as u64),
Value::Int64(v) => ensure_and_coerce!(v >= 0 && v <= MAX_LEVEL as i64, v as u64),
Value::UInt8(v) => ensure_and_coerce!(v <= MAX_LEVEL as u8, v as u64),
Value::UInt16(v) => ensure_and_coerce!(v <= MAX_LEVEL as u16, v as u64),
Value::UInt32(v) => ensure_and_coerce!(v <= MAX_LEVEL as u32, v as u64),
Value::UInt64(v) => ensure_and_coerce!(v <= MAX_LEVEL, v),
_ => unreachable!(),
}
}

View File

@@ -14,12 +14,18 @@
use std::sync::Arc;
mod json_get;
mod json_is;
mod json_path_exists;
mod json_path_match;
mod json_to_string;
mod to_json;
mod parse_json;
use json_get::{JsonGetBool, JsonGetFloat, JsonGetInt, JsonGetString};
use json_is::{
JsonIsArray, JsonIsBool, JsonIsFloat, JsonIsInt, JsonIsNull, JsonIsObject, JsonIsString,
};
use json_to_string::JsonToStringFunction;
use to_json::ToJsonFunction;
use parse_json::ParseJsonFunction;
use crate::function_registry::FunctionRegistry;
@@ -28,11 +34,22 @@ pub(crate) struct JsonFunction;
impl JsonFunction {
pub fn register(registry: &FunctionRegistry) {
registry.register(Arc::new(JsonToStringFunction));
registry.register(Arc::new(ToJsonFunction));
registry.register(Arc::new(ParseJsonFunction));
registry.register(Arc::new(JsonGetInt));
registry.register(Arc::new(JsonGetFloat));
registry.register(Arc::new(JsonGetString));
registry.register(Arc::new(JsonGetBool));
registry.register(Arc::new(JsonIsNull));
registry.register(Arc::new(JsonIsInt));
registry.register(Arc::new(JsonIsFloat));
registry.register(Arc::new(JsonIsString));
registry.register(Arc::new(JsonIsBool));
registry.register(Arc::new(JsonIsArray));
registry.register(Arc::new(JsonIsObject));
registry.register(Arc::new(json_path_exists::JsonPathExistsFunction));
registry.register(Arc::new(json_path_match::JsonPathMatchFunction));
}
}

View File

@@ -47,7 +47,7 @@ fn get_json_by_path(json: &[u8], path: &str) -> Option<Vec<u8>> {
/// If the path does not exist or the value is not the type specified, return `NULL`.
macro_rules! json_get {
// e.g. name = JsonGetInt, type = Int64, rust_type = i64, doc = "Get the value from the JSONB by the given path and return it as an integer."
($name: ident, $type: ident, $rust_type: ident, $doc:expr) => {
($name:ident, $type:ident, $rust_type:ident, $doc:expr) => {
paste::paste! {
#[doc = $doc]
#[derive(Clone, Debug, Default)]

View File

@@ -0,0 +1,215 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt::{self, Display};
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
use common_query::prelude::Signature;
use datafusion::logical_expr::Volatility;
use datatypes::data_type::ConcreteDataType;
use datatypes::prelude::VectorRef;
use datatypes::scalars::ScalarVectorBuilder;
use datatypes::vectors::{BooleanVectorBuilder, MutableVector};
use snafu::ensure;
use crate::function::{Function, FunctionContext};
/// Checks if the input is a JSON object of the given type.
macro_rules! json_is {
($name:ident, $json_type:ident, $doc:expr) => {
paste::paste! {
#[derive(Clone, Debug, Default)]
pub struct $name;
impl Function for $name {
fn name(&self) -> &str {
stringify!([<$name:snake>])
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::boolean_datatype())
}
fn signature(&self) -> Signature {
Signature::exact(vec![ConcreteDataType::json_datatype()], Volatility::Immutable)
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure!(
columns.len() == 1,
InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not correct, expect exactly one, have: {}",
columns.len()
),
}
);
let jsons = &columns[0];
let size = jsons.len();
let datatype = jsons.data_type();
let mut results = BooleanVectorBuilder::with_capacity(size);
match datatype {
// JSON data type uses binary vector
ConcreteDataType::Binary(_) => {
for i in 0..size {
let json = jsons.get_ref(i);
let json = json.as_binary();
let result = match json {
Ok(Some(json)) => {
Some(jsonb::[<is_ $json_type>](json))
}
_ => None,
};
results.push(result);
}
}
_ => {
return UnsupportedInputDataTypeSnafu {
function: stringify!([<$name:snake>]),
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
}
.fail();
}
}
Ok(results.to_vector())
}
}
impl Display for $name {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", stringify!([<$name:snake>]).to_ascii_uppercase())
}
}
}
}
}
json_is!(JsonIsNull, null, "Checks if the input JSONB is null");
json_is!(
JsonIsBool,
boolean,
"Checks if the input JSONB is a boolean type JSON value"
);
json_is!(
JsonIsInt,
i64,
"Checks if the input JSONB is a integer type JSON value"
);
json_is!(
JsonIsFloat,
number,
"Checks if the input JSONB is a JSON float"
);
json_is!(
JsonIsString,
string,
"Checks if the input JSONB is a JSON string"
);
json_is!(
JsonIsArray,
array,
"Checks if the input JSONB is a JSON array"
);
json_is!(
JsonIsObject,
object,
"Checks if the input JSONB is a JSON object"
);
#[cfg(test)]
mod tests {
use std::sync::Arc;
use datatypes::scalars::ScalarVector;
use datatypes::vectors::BinaryVector;
use super::*;
#[test]
fn test_json_is_functions() {
let json_is_functions: [&dyn Function; 6] = [
&JsonIsBool,
&JsonIsInt,
&JsonIsFloat,
&JsonIsString,
&JsonIsArray,
&JsonIsObject,
];
let expected_names = [
"json_is_bool",
"json_is_int",
"json_is_float",
"json_is_string",
"json_is_array",
"json_is_object",
];
for (func, expected_name) in json_is_functions.iter().zip(expected_names.iter()) {
assert_eq!(func.name(), *expected_name);
assert_eq!(
func.return_type(&[ConcreteDataType::json_datatype()])
.unwrap(),
ConcreteDataType::boolean_datatype()
);
assert_eq!(
func.signature(),
Signature::exact(
vec![ConcreteDataType::json_datatype()],
Volatility::Immutable
)
);
}
let json_strings = [
r#"true"#,
r#"1"#,
r#"1.0"#,
r#""The pig fly through a castle, and has been attracted by the princess.""#,
r#"[1, 2]"#,
r#"{"a": 1}"#,
];
let expected_results = [
[true, false, false, false, false, false],
[false, true, false, false, false, false],
// Integers are also floats
[false, true, true, false, false, false],
[false, false, false, true, false, false],
[false, false, false, false, true, false],
[false, false, false, false, false, true],
];
let jsonbs = json_strings
.iter()
.map(|s| {
let value = jsonb::parse_value(s.as_bytes()).unwrap();
value.to_vec()
})
.collect::<Vec<_>>();
let json_vector = BinaryVector::from_vec(jsonbs);
let args: Vec<VectorRef> = vec![Arc::new(json_vector)];
for (func, expected_result) in json_is_functions.iter().zip(expected_results.iter()) {
let vector = func.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(vector.len(), json_strings.len());
for (i, expected) in expected_result.iter().enumerate() {
let result = vector.get_ref(i);
let result = result.as_boolean().unwrap().unwrap();
assert_eq!(result, *expected);
}
}
}
}

View File

@@ -0,0 +1,172 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt::{self, Display};
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
use common_query::prelude::Signature;
use datafusion::logical_expr::Volatility;
use datatypes::data_type::ConcreteDataType;
use datatypes::prelude::VectorRef;
use datatypes::scalars::ScalarVectorBuilder;
use datatypes::vectors::{BooleanVectorBuilder, MutableVector};
use snafu::ensure;
use crate::function::{Function, FunctionContext};
/// Check if the given JSON data contains the given JSON path.
#[derive(Clone, Debug, Default)]
pub struct JsonPathExistsFunction;
const NAME: &str = "json_path_exists";
impl Function for JsonPathExistsFunction {
fn name(&self) -> &str {
NAME
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::boolean_datatype())
}
fn signature(&self) -> Signature {
Signature::exact(
vec![
ConcreteDataType::json_datatype(),
ConcreteDataType::string_datatype(),
],
Volatility::Immutable,
)
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure!(
columns.len() == 2,
InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not correct, expect exactly two, have: {}",
columns.len()
),
}
);
let jsons = &columns[0];
let paths = &columns[1];
let size = jsons.len();
let datatype = jsons.data_type();
let mut results = BooleanVectorBuilder::with_capacity(size);
match datatype {
// JSON data type uses binary vector
ConcreteDataType::Binary(_) => {
for i in 0..size {
let json = jsons.get_ref(i);
let path = paths.get_ref(i);
let json = json.as_binary();
let path = path.as_string();
let result = match (json, path) {
(Ok(Some(json)), Ok(Some(path))) => {
let json_path = jsonb::jsonpath::parse_json_path(path.as_bytes());
match json_path {
Ok(json_path) => jsonb::path_exists(json, json_path).ok(),
Err(_) => None,
}
}
_ => None,
};
results.push(result);
}
}
_ => {
return UnsupportedInputDataTypeSnafu {
function: NAME,
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
}
.fail();
}
}
Ok(results.to_vector())
}
}
impl Display for JsonPathExistsFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "JSON_PATH_EXISTS")
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use common_query::prelude::TypeSignature;
use datatypes::scalars::ScalarVector;
use datatypes::vectors::{BinaryVector, StringVector};
use super::*;
#[test]
fn test_json_path_exists_function() {
let json_path_exists = JsonPathExistsFunction;
assert_eq!("json_path_exists", json_path_exists.name());
assert_eq!(
ConcreteDataType::boolean_datatype(),
json_path_exists
.return_type(&[ConcreteDataType::json_datatype()])
.unwrap()
);
assert!(matches!(json_path_exists.signature(),
Signature {
type_signature: TypeSignature::Exact(valid_types),
volatility: Volatility::Immutable
} if valid_types == vec![ConcreteDataType::json_datatype(), ConcreteDataType::string_datatype()]
));
let json_strings = [
r#"{"a": {"b": 2}, "b": 2, "c": 3}"#,
r#"{"a": 4, "b": {"c": 6}, "c": 6}"#,
r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
r#"{"a": 7, "b": 8, "c": {"a": 7}}"#,
];
let paths = vec!["$.a.b.c", "$.b", "$.c.a", ".d"];
let results = [false, true, true, false];
let jsonbs = json_strings
.iter()
.map(|s| {
let value = jsonb::parse_value(s.as_bytes()).unwrap();
value.to_vec()
})
.collect::<Vec<_>>();
let json_vector = BinaryVector::from_vec(jsonbs);
let path_vector = StringVector::from_vec(paths);
let args: Vec<VectorRef> = vec![Arc::new(json_vector), Arc::new(path_vector)];
let vector = json_path_exists
.eval(FunctionContext::default(), &args)
.unwrap();
assert_eq!(4, vector.len());
for (i, gt) in results.iter().enumerate() {
let result = vector.get_ref(i);
let result = result.as_boolean().unwrap().unwrap();
assert_eq!(*gt, result);
}
}
}

View File

@@ -0,0 +1,202 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt::{self, Display};
use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
use common_query::prelude::Signature;
use datafusion::logical_expr::Volatility;
use datatypes::data_type::ConcreteDataType;
use datatypes::prelude::VectorRef;
use datatypes::scalars::ScalarVectorBuilder;
use datatypes::vectors::{BooleanVectorBuilder, MutableVector};
use snafu::ensure;
use crate::function::{Function, FunctionContext};
/// Check if the given JSON data match the given JSON path's predicate.
#[derive(Clone, Debug, Default)]
pub struct JsonPathMatchFunction;
const NAME: &str = "json_path_match";
impl Function for JsonPathMatchFunction {
fn name(&self) -> &str {
NAME
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::boolean_datatype())
}
fn signature(&self) -> Signature {
Signature::exact(
vec![
ConcreteDataType::json_datatype(),
ConcreteDataType::string_datatype(),
],
Volatility::Immutable,
)
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure!(
columns.len() == 2,
InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not correct, expect exactly two, have: {}",
columns.len()
),
}
);
let jsons = &columns[0];
let paths = &columns[1];
let size = jsons.len();
let mut results = BooleanVectorBuilder::with_capacity(size);
for i in 0..size {
let json = jsons.get_ref(i);
let path = paths.get_ref(i);
match json.data_type() {
// JSON data type uses binary vector
ConcreteDataType::Binary(_) => {
let json = json.as_binary();
let path = path.as_string();
let result = match (json, path) {
(Ok(Some(json)), Ok(Some(path))) => {
if !jsonb::is_null(json) {
let json_path = jsonb::jsonpath::parse_json_path(path.as_bytes());
match json_path {
Ok(json_path) => jsonb::path_match(json, json_path).ok(),
Err(_) => None,
}
} else {
None
}
}
_ => None,
};
results.push(result);
}
_ => {
return UnsupportedInputDataTypeSnafu {
function: NAME,
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
}
.fail();
}
}
}
Ok(results.to_vector())
}
}
impl Display for JsonPathMatchFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "JSON_PATH_MATCH")
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use common_query::prelude::TypeSignature;
use datatypes::vectors::{BinaryVector, StringVector};
use super::*;
#[test]
fn test_json_path_match_function() {
let json_path_match = JsonPathMatchFunction;
assert_eq!("json_path_match", json_path_match.name());
assert_eq!(
ConcreteDataType::boolean_datatype(),
json_path_match
.return_type(&[ConcreteDataType::json_datatype()])
.unwrap()
);
assert!(matches!(json_path_match.signature(),
Signature {
type_signature: TypeSignature::Exact(valid_types),
volatility: Volatility::Immutable
} if valid_types == vec![ConcreteDataType::json_datatype(), ConcreteDataType::string_datatype()],
));
let json_strings = [
Some(r#"{"a": {"b": 2}, "b": 2, "c": 3}"#.to_string()),
Some(r#"{"a": 1, "b": [1,2,3]}"#.to_string()),
Some(r#"{"a": 1 ,"b": [1,2,3]}"#.to_string()),
Some(r#"[1,2,3]"#.to_string()),
Some(r#"{"a":1,"b":[1,2,3]}"#.to_string()),
Some(r#"null"#.to_string()),
Some(r#"null"#.to_string()),
];
let paths = vec![
Some("$.a.b == 2".to_string()),
Some("$.b[1 to last] >= 2".to_string()),
Some("$.c > 0".to_string()),
Some("$[0 to last] > 0".to_string()),
Some(r#"null"#.to_string()),
Some("$.c > 0".to_string()),
Some(r#"null"#.to_string()),
];
let results = [
Some(true),
Some(true),
Some(false),
Some(true),
None,
None,
None,
];
let jsonbs = json_strings
.into_iter()
.map(|s| s.map(|json| jsonb::parse_value(json.as_bytes()).unwrap().to_vec()))
.collect::<Vec<_>>();
let json_vector = BinaryVector::from(jsonbs);
let path_vector = StringVector::from(paths);
let args: Vec<VectorRef> = vec![Arc::new(json_vector), Arc::new(path_vector)];
let vector = json_path_match
.eval(FunctionContext::default(), &args)
.unwrap();
assert_eq!(7, vector.len());
for (i, expected) in results.iter().enumerate() {
let result = vector.get_ref(i);
match expected {
Some(expected_value) => {
assert!(!result.is_null());
let result_value = result.as_boolean().unwrap().unwrap();
assert_eq!(*expected_value, result_value);
}
None => {
assert!(result.is_null());
}
}
}
}
}

View File

@@ -119,7 +119,7 @@ mod tests {
use super::*;
#[test]
fn test_get_by_path_function() {
fn test_json_to_string_function() {
let json_to_string = JsonToStringFunction;
assert_eq!("json_to_string", json_to_string.name());

View File

@@ -27,11 +27,11 @@ use crate::function::{Function, FunctionContext};
/// Parses the `String` into `JSONB`.
#[derive(Clone, Debug, Default)]
pub struct ToJsonFunction;
pub struct ParseJsonFunction;
const NAME: &str = "to_json";
const NAME: &str = "parse_json";
impl Function for ToJsonFunction {
impl Function for ParseJsonFunction {
fn name(&self) -> &str {
NAME
}
@@ -101,9 +101,9 @@ impl Function for ToJsonFunction {
}
}
impl Display for ToJsonFunction {
impl Display for ParseJsonFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "TO_JSON")
write!(f, "PARSE_JSON")
}
}
@@ -119,17 +119,17 @@ mod tests {
#[test]
fn test_get_by_path_function() {
let to_json = ToJsonFunction;
let parse_json = ParseJsonFunction;
assert_eq!("to_json", to_json.name());
assert_eq!("parse_json", parse_json.name());
assert_eq!(
ConcreteDataType::json_datatype(),
to_json
parse_json
.return_type(&[ConcreteDataType::json_datatype()])
.unwrap()
);
assert!(matches!(to_json.signature(),
assert!(matches!(parse_json.signature(),
Signature {
type_signature: TypeSignature::Exact(valid_types),
volatility: Volatility::Immutable
@@ -152,13 +152,12 @@ mod tests {
let json_string_vector = StringVector::from_vec(json_strings.to_vec());
let args: Vec<VectorRef> = vec![Arc::new(json_string_vector)];
let vector = to_json.eval(FunctionContext::default(), &args).unwrap();
let vector = parse_json.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(3, vector.len());
for (i, gt) in jsonbs.iter().enumerate() {
let result = vector.get_ref(i);
let result = result.as_binary().unwrap().unwrap();
// remove whitespaces
assert_eq!(gt, result);
}
}

View File

@@ -25,13 +25,13 @@ use session::context::QueryContextRef;
use crate::handlers::ProcedureServiceHandlerRef;
use crate::helper::cast_u64;
const DEFAULT_REPLAY_TIMEOUT_SECS: u64 = 10;
const DEFAULT_TIMEOUT_SECS: u64 = 30;
/// A function to migrate a region from source peer to target peer.
/// Returns the submitted procedure id if success. Only available in cluster mode.
///
/// - `migrate_region(region_id, from_peer, to_peer)`, with default replay WAL timeout(10 seconds).
/// - `migrate_region(region_id, from_peer, to_peer, timeout(secs))`
/// - `migrate_region(region_id, from_peer, to_peer)`, with timeout(30 seconds).
/// - `migrate_region(region_id, from_peer, to_peer, timeout(secs))`.
///
/// The parameters:
/// - `region_id`: the region id
@@ -48,18 +48,13 @@ pub(crate) async fn migrate_region(
_ctx: &QueryContextRef,
params: &[ValueRef<'_>],
) -> Result<Value> {
let (region_id, from_peer, to_peer, replay_timeout) = match params.len() {
let (region_id, from_peer, to_peer, timeout) = match params.len() {
3 => {
let region_id = cast_u64(&params[0])?;
let from_peer = cast_u64(&params[1])?;
let to_peer = cast_u64(&params[2])?;
(
region_id,
from_peer,
to_peer,
Some(DEFAULT_REPLAY_TIMEOUT_SECS),
)
(region_id, from_peer, to_peer, Some(DEFAULT_TIMEOUT_SECS))
}
4 => {
@@ -82,14 +77,14 @@ pub(crate) async fn migrate_region(
}
};
match (region_id, from_peer, to_peer, replay_timeout) {
(Some(region_id), Some(from_peer), Some(to_peer), Some(replay_timeout)) => {
match (region_id, from_peer, to_peer, timeout) {
(Some(region_id), Some(from_peer), Some(to_peer), Some(timeout)) => {
let pid = procedure_service_handler
.migrate_region(MigrateRegionRequest {
region_id,
from_peer,
to_peer,
replay_timeout: Duration::from_secs(replay_timeout),
timeout: Duration::from_secs(timeout),
})
.await?;

View File

@@ -199,6 +199,7 @@ pub fn default_get_uuid(working_home: &Option<String>) -> Option<String> {
}
/// Report version info to GreptimeDB.
///
/// We do not collect any identity-sensitive information.
/// This task is scheduled to run every 30 minutes.
/// The task will be disabled default. It can be enabled by setting the build feature `greptimedb-telemetry`
@@ -324,7 +325,7 @@ mod tests {
});
let addr = ([127, 0, 0, 1], port).into();
let server = Server::bind(&addr).serve(make_svc);
let server = Server::try_bind(&addr).unwrap().serve(make_svc);
let graceful = server.with_graceful_shutdown(async {
rx.await.ok();
});

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use api::helper::{convert_i128_to_interval, convert_to_pb_decimal128};
use api::helper::{convert_month_day_nano_to_pb, convert_to_pb_decimal128};
use api::v1::column::Values;
use common_base::BitVec;
use datatypes::types::{IntervalType, TimeType, TimestampType, WrapperType};
@@ -211,7 +211,7 @@ pub fn values(arrays: &[VectorRef]) -> Result<Values> {
ConcreteDataType::Interval(IntervalType::MonthDayNano(_)),
IntervalMonthDayNanoVector,
interval_month_day_nano_values,
|x| { convert_i128_to_interval(x.into_native()) }
|x| { convert_month_day_nano_to_pb(x) }
),
(
ConcreteDataType::Decimal128(_),

View File

@@ -21,23 +21,19 @@ use syn::{parse_macro_input, DeriveInput, ItemStruct};
pub(crate) fn impl_aggr_func_type_store(ast: &DeriveInput) -> TokenStream {
let name = &ast.ident;
let gen = quote! {
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
use common_query::error::{InvalidInputStateSnafu, Error as QueryError};
use datatypes::prelude::ConcreteDataType;
impl AggrFuncTypeStore for #name {
fn input_types(&self) -> std::result::Result<Vec<ConcreteDataType>, QueryError> {
impl common_query::logical_plan::accumulator::AggrFuncTypeStore for #name {
fn input_types(&self) -> std::result::Result<Vec<datatypes::prelude::ConcreteDataType>, common_query::error::Error> {
let input_types = self.input_types.load();
snafu::ensure!(input_types.is_some(), InvalidInputStateSnafu);
snafu::ensure!(input_types.is_some(), common_query::error::InvalidInputStateSnafu);
Ok(input_types.as_ref().unwrap().as_ref().clone())
}
fn set_input_types(&self, input_types: Vec<ConcreteDataType>) -> std::result::Result<(), QueryError> {
fn set_input_types(&self, input_types: Vec<datatypes::prelude::ConcreteDataType>) -> std::result::Result<(), common_query::error::Error> {
let old = self.input_types.swap(Some(std::sync::Arc::new(input_types.clone())));
if let Some(old) = old {
snafu::ensure!(old.len() == input_types.len(), InvalidInputStateSnafu);
snafu::ensure!(old.len() == input_types.len(), common_query::error::InvalidInputStateSnafu);
for (x, y) in old.iter().zip(input_types.iter()) {
snafu::ensure!(x == y, InvalidInputStateSnafu);
snafu::ensure!(x == y, common_query::error::InvalidInputStateSnafu);
}
}
Ok(())
@@ -51,7 +47,7 @@ pub(crate) fn impl_as_aggr_func_creator(_args: TokenStream, input: TokenStream)
let mut item_struct = parse_macro_input!(input as ItemStruct);
if let syn::Fields::Named(ref mut fields) = item_struct.fields {
let result = syn::Field::parse_named.parse2(quote! {
input_types: arc_swap::ArcSwapOption<Vec<ConcreteDataType>>
input_types: arc_swap::ArcSwapOption<Vec<datatypes::prelude::ConcreteDataType>>
});
match result {
Ok(field) => fields.named.push(field),

View File

@@ -35,7 +35,9 @@ pub fn aggr_func_type_store_derive(input: TokenStream) -> TokenStream {
}
/// A struct can be used as a creator for aggregate function if it has been annotated with this
/// attribute first. This attribute add a necessary field which is intended to store the input
/// attribute first.
///
/// This attribute add a necessary field which is intended to store the input
/// data's types to the struct.
/// This attribute is expected to be used along with derive macro [AggrFuncTypeStore].
#[proc_macro_attribute]
@@ -44,9 +46,10 @@ pub fn as_aggr_func_creator(args: TokenStream, input: TokenStream) -> TokenStrea
}
/// Attribute macro to convert an arithimetic function to a range function. The annotated function
/// should accept servaral arrays as input and return a single value as output. This procedure
/// macro can works on any number of input parameters. Return type can be either primitive type
/// or wrapped in `Option`.
/// should accept servaral arrays as input and return a single value as output.
///
/// This procedure macro can works on any number of input parameters. Return type can be either
/// primitive type or wrapped in `Option`.
///
/// # Example
/// Take `count_over_time()` in PromQL as an example:

View File

@@ -24,5 +24,5 @@ struct Foo {}
fn test_derive() {
let _ = Foo::default();
assert_fields!(Foo: input_types);
assert_impl_all!(Foo: std::fmt::Debug, Default, AggrFuncTypeStore);
assert_impl_all!(Foo: std::fmt::Debug, Default, common_query::logical_plan::accumulator::AggrFuncTypeStore);
}

View File

@@ -15,6 +15,7 @@ workspace = true
anymap2 = "0.13.0"
api.workspace = true
async-recursion = "1.0"
async-stream = "0.3"
async-trait.workspace = true
base64.workspace = true
bytes.workspace = true

View File

@@ -20,6 +20,7 @@ use regex::Regex;
use serde::{Deserialize, Serialize};
use snafu::{ensure, OptionExt, ResultExt};
use crate::datanode::RegionStat;
use crate::error::{
DecodeJsonSnafu, EncodeJsonSnafu, Error, FromUtf8Snafu, InvalidNodeInfoKeySnafu,
InvalidRoleSnafu, ParseNumSnafu, Result,
@@ -47,10 +48,14 @@ pub trait ClusterInfo {
role: Option<Role>,
) -> std::result::Result<Vec<NodeInfo>, Self::Error>;
/// List all region stats in the cluster.
async fn list_region_stats(&self) -> std::result::Result<Vec<RegionStat>, Self::Error>;
// TODO(jeremy): Other info, like region status, etc.
}
/// The key of [NodeInfo] in the storage. The format is `__meta_cluster_node_info-{cluster_id}-{role}-{node_id}`.
///
/// This key cannot be used to describe the `Metasrv` because the `Metasrv` does not have
/// a `cluster_id`, it serves multiple clusters.
#[derive(Debug, Clone, Eq, Hash, PartialEq, Serialize, Deserialize)]

View File

@@ -0,0 +1,413 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashSet;
use std::str::FromStr;
use api::v1::meta::{HeartbeatRequest, RequestHeader};
use common_time::util as time_util;
use lazy_static::lazy_static;
use regex::Regex;
use serde::{Deserialize, Serialize};
use snafu::{ensure, OptionExt, ResultExt};
use store_api::region_engine::{RegionRole, RegionStatistic};
use store_api::storage::RegionId;
use table::metadata::TableId;
use crate::error::Result;
use crate::{error, ClusterId};
pub(crate) const DATANODE_LEASE_PREFIX: &str = "__meta_datanode_lease";
const INACTIVE_REGION_PREFIX: &str = "__meta_inactive_region";
const DATANODE_STAT_PREFIX: &str = "__meta_datanode_stat";
pub const REGION_STATISTIC_KEY: &str = "__region_statistic";
lazy_static! {
pub(crate) static ref DATANODE_LEASE_KEY_PATTERN: Regex =
Regex::new(&format!("^{DATANODE_LEASE_PREFIX}-([0-9]+)-([0-9]+)$")).unwrap();
static ref DATANODE_STAT_KEY_PATTERN: Regex =
Regex::new(&format!("^{DATANODE_STAT_PREFIX}-([0-9]+)-([0-9]+)$")).unwrap();
static ref INACTIVE_REGION_KEY_PATTERN: Regex = Regex::new(&format!(
"^{INACTIVE_REGION_PREFIX}-([0-9]+)-([0-9]+)-([0-9]+)$"
))
.unwrap();
}
/// The key of the datanode stat in the storage.
///
/// The format is `__meta_datanode_stat-{cluster_id}-{node_id}`.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct Stat {
pub timestamp_millis: i64,
pub cluster_id: ClusterId,
// The datanode Id.
pub id: u64,
// The datanode address.
pub addr: String,
/// The read capacity units during this period
pub rcus: i64,
/// The write capacity units during this period
pub wcus: i64,
/// How many regions on this node
pub region_num: u64,
pub region_stats: Vec<RegionStat>,
// The node epoch is used to check whether the node has restarted or redeployed.
pub node_epoch: u64,
}
/// The statistics of a region.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RegionStat {
/// The region_id.
pub id: RegionId,
/// The read capacity units during this period
pub rcus: i64,
/// The write capacity units during this period
pub wcus: i64,
/// Approximate bytes of this region
pub approximate_bytes: i64,
/// The engine name.
pub engine: String,
/// The region role.
pub role: RegionRole,
/// The size of the memtable in bytes.
pub memtable_size: u64,
/// The size of the manifest in bytes.
pub manifest_size: u64,
/// The size of the SST files in bytes.
pub sst_size: u64,
}
impl Stat {
#[inline]
pub fn is_empty(&self) -> bool {
self.region_stats.is_empty()
}
pub fn stat_key(&self) -> DatanodeStatKey {
DatanodeStatKey {
cluster_id: self.cluster_id,
node_id: self.id,
}
}
/// Returns a tuple array containing [RegionId] and [RegionRole].
pub fn regions(&self) -> Vec<(RegionId, RegionRole)> {
self.region_stats.iter().map(|s| (s.id, s.role)).collect()
}
/// Returns all table ids in the region stats.
pub fn table_ids(&self) -> HashSet<TableId> {
self.region_stats.iter().map(|s| s.id.table_id()).collect()
}
/// Retains the active region stats and updates the rcus, wcus, and region_num.
pub fn retain_active_region_stats(&mut self, inactive_region_ids: &HashSet<RegionId>) {
if inactive_region_ids.is_empty() {
return;
}
self.region_stats
.retain(|r| !inactive_region_ids.contains(&r.id));
self.rcus = self.region_stats.iter().map(|s| s.rcus).sum();
self.wcus = self.region_stats.iter().map(|s| s.wcus).sum();
self.region_num = self.region_stats.len() as u64;
}
}
impl TryFrom<&HeartbeatRequest> for Stat {
type Error = Option<RequestHeader>;
fn try_from(value: &HeartbeatRequest) -> std::result::Result<Self, Self::Error> {
let HeartbeatRequest {
header,
peer,
region_stats,
node_epoch,
..
} = value;
match (header, peer) {
(Some(header), Some(peer)) => {
let region_stats = region_stats
.iter()
.map(RegionStat::from)
.collect::<Vec<_>>();
Ok(Self {
timestamp_millis: time_util::current_time_millis(),
cluster_id: header.cluster_id,
// datanode id
id: peer.id,
// datanode address
addr: peer.addr.clone(),
rcus: region_stats.iter().map(|s| s.rcus).sum(),
wcus: region_stats.iter().map(|s| s.wcus).sum(),
region_num: region_stats.len() as u64,
region_stats,
node_epoch: *node_epoch,
})
}
(header, _) => Err(header.clone()),
}
}
}
impl From<&api::v1::meta::RegionStat> for RegionStat {
fn from(value: &api::v1::meta::RegionStat) -> Self {
let region_stat = value
.extensions
.get(REGION_STATISTIC_KEY)
.and_then(|value| RegionStatistic::deserialize_from_slice(value))
.unwrap_or_default();
Self {
id: RegionId::from_u64(value.region_id),
rcus: value.rcus,
wcus: value.wcus,
approximate_bytes: value.approximate_bytes,
engine: value.engine.to_string(),
role: RegionRole::from(value.role()),
memtable_size: region_stat.memtable_size,
manifest_size: region_stat.manifest_size,
sst_size: region_stat.sst_size,
}
}
}
/// The key of the datanode stat in the memory store.
///
/// The format is `__meta_datanode_stat-{cluster_id}-{node_id}`.
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct DatanodeStatKey {
pub cluster_id: ClusterId,
pub node_id: u64,
}
impl DatanodeStatKey {
/// The key prefix.
pub fn prefix_key() -> Vec<u8> {
format!("{DATANODE_STAT_PREFIX}-").into_bytes()
}
/// The key prefix with the cluster id.
pub fn key_prefix_with_cluster_id(cluster_id: ClusterId) -> String {
format!("{DATANODE_STAT_PREFIX}-{cluster_id}-")
}
}
impl From<DatanodeStatKey> for Vec<u8> {
fn from(value: DatanodeStatKey) -> Self {
format!(
"{}-{}-{}",
DATANODE_STAT_PREFIX, value.cluster_id, value.node_id
)
.into_bytes()
}
}
impl FromStr for DatanodeStatKey {
type Err = error::Error;
fn from_str(key: &str) -> Result<Self> {
let caps = DATANODE_STAT_KEY_PATTERN
.captures(key)
.context(error::InvalidStatKeySnafu { key })?;
ensure!(caps.len() == 3, error::InvalidStatKeySnafu { key });
let cluster_id = caps[1].to_string();
let node_id = caps[2].to_string();
let cluster_id: u64 = cluster_id.parse().context(error::ParseNumSnafu {
err_msg: format!("invalid cluster_id: {cluster_id}"),
})?;
let node_id: u64 = node_id.parse().context(error::ParseNumSnafu {
err_msg: format!("invalid node_id: {node_id}"),
})?;
Ok(Self {
cluster_id,
node_id,
})
}
}
impl TryFrom<Vec<u8>> for DatanodeStatKey {
type Error = error::Error;
fn try_from(bytes: Vec<u8>) -> Result<Self> {
String::from_utf8(bytes)
.context(error::FromUtf8Snafu {
name: "DatanodeStatKey",
})
.map(|x| x.parse())?
}
}
/// The value of the datanode stat in the memory store.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(transparent)]
pub struct DatanodeStatValue {
pub stats: Vec<Stat>,
}
impl DatanodeStatValue {
/// Get the latest number of regions.
pub fn region_num(&self) -> Option<u64> {
self.stats.last().map(|x| x.region_num)
}
/// Get the latest node addr.
pub fn node_addr(&self) -> Option<String> {
self.stats.last().map(|x| x.addr.clone())
}
}
impl TryFrom<DatanodeStatValue> for Vec<u8> {
type Error = error::Error;
fn try_from(stats: DatanodeStatValue) -> Result<Self> {
Ok(serde_json::to_string(&stats)
.context(error::SerializeToJsonSnafu {
input: format!("{stats:?}"),
})?
.into_bytes())
}
}
impl FromStr for DatanodeStatValue {
type Err = error::Error;
fn from_str(value: &str) -> Result<Self> {
serde_json::from_str(value).context(error::DeserializeFromJsonSnafu { input: value })
}
}
impl TryFrom<Vec<u8>> for DatanodeStatValue {
type Error = error::Error;
fn try_from(value: Vec<u8>) -> Result<Self> {
String::from_utf8(value)
.context(error::FromUtf8Snafu {
name: "DatanodeStatValue",
})
.map(|x| x.parse())?
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_stat_key() {
let stat = Stat {
cluster_id: 3,
id: 101,
region_num: 10,
..Default::default()
};
let stat_key = stat.stat_key();
assert_eq!(3, stat_key.cluster_id);
assert_eq!(101, stat_key.node_id);
}
#[test]
fn test_stat_val_round_trip() {
let stat = Stat {
cluster_id: 0,
id: 101,
region_num: 100,
..Default::default()
};
let stat_val = DatanodeStatValue { stats: vec![stat] };
let bytes: Vec<u8> = stat_val.try_into().unwrap();
let stat_val: DatanodeStatValue = bytes.try_into().unwrap();
let stats = stat_val.stats;
assert_eq!(1, stats.len());
let stat = stats.first().unwrap();
assert_eq!(0, stat.cluster_id);
assert_eq!(101, stat.id);
assert_eq!(100, stat.region_num);
}
#[test]
fn test_get_addr_from_stat_val() {
let empty = DatanodeStatValue { stats: vec![] };
let addr = empty.node_addr();
assert!(addr.is_none());
let stat_val = DatanodeStatValue {
stats: vec![
Stat {
addr: "1".to_string(),
..Default::default()
},
Stat {
addr: "2".to_string(),
..Default::default()
},
Stat {
addr: "3".to_string(),
..Default::default()
},
],
};
let addr = stat_val.node_addr().unwrap();
assert_eq!("3", addr);
}
#[test]
fn test_get_region_num_from_stat_val() {
let empty = DatanodeStatValue { stats: vec![] };
let region_num = empty.region_num();
assert!(region_num.is_none());
let wrong = DatanodeStatValue {
stats: vec![Stat {
region_num: 0,
..Default::default()
}],
};
let right = wrong.region_num();
assert_eq!(Some(0), right);
let stat_val = DatanodeStatValue {
stats: vec![
Stat {
region_num: 1,
..Default::default()
},
Stat {
region_num: 0,
..Default::default()
},
Stat {
region_num: 2,
..Default::default()
},
],
};
let region_num = stat_val.region_num().unwrap();
assert_eq!(2, region_num);
}
}

View File

@@ -15,6 +15,7 @@
use std::collections::HashMap;
use std::sync::Arc;
use api::v1::meta::ProcedureDetailResponse;
use common_telemetry::tracing_context::W3cTrace;
use store_api::storage::{RegionId, RegionNumber, TableId};
@@ -82,6 +83,8 @@ pub trait ProcedureExecutor: Send + Sync {
ctx: &ExecutorContext,
pid: &str,
) -> Result<ProcedureStateResponse>;
async fn list_procedures(&self, ctx: &ExecutorContext) -> Result<ProcedureDetailResponse>;
}
pub type ProcedureExecutorRef = Arc<dyn ProcedureExecutor>;

View File

@@ -187,7 +187,7 @@ mod tests {
region: Region::new_test(region_id),
leader_peer: Some(Peer::empty(1)),
follower_peers: vec![],
leader_status: None,
leader_state: None,
leader_down_since: None,
}]),
HashMap::new(),

View File

@@ -107,21 +107,21 @@ async fn test_on_submit_alter_request() {
region: Region::new_test(RegionId::new(table_id, 1)),
leader_peer: Some(Peer::empty(1)),
follower_peers: vec![Peer::empty(5)],
leader_status: None,
leader_state: None,
leader_down_since: None,
},
RegionRoute {
region: Region::new_test(RegionId::new(table_id, 2)),
leader_peer: Some(Peer::empty(2)),
follower_peers: vec![Peer::empty(4)],
leader_status: None,
leader_state: None,
leader_down_since: None,
},
RegionRoute {
region: Region::new_test(RegionId::new(table_id, 3)),
leader_peer: Some(Peer::empty(3)),
follower_peers: vec![],
leader_status: None,
leader_state: None,
leader_down_since: None,
},
]),
@@ -193,21 +193,21 @@ async fn test_on_submit_alter_request_with_outdated_request() {
region: Region::new_test(RegionId::new(table_id, 1)),
leader_peer: Some(Peer::empty(1)),
follower_peers: vec![Peer::empty(5)],
leader_status: None,
leader_state: None,
leader_down_since: None,
},
RegionRoute {
region: Region::new_test(RegionId::new(table_id, 2)),
leader_peer: Some(Peer::empty(2)),
follower_peers: vec![Peer::empty(4)],
leader_status: None,
leader_state: None,
leader_down_since: None,
},
RegionRoute {
region: Region::new_test(RegionId::new(table_id, 3)),
leader_peer: Some(Peer::empty(3)),
follower_peers: vec![],
leader_status: None,
leader_state: None,
leader_down_since: None,
},
]),

View File

@@ -119,21 +119,21 @@ async fn test_on_datanode_drop_regions() {
region: Region::new_test(RegionId::new(table_id, 1)),
leader_peer: Some(Peer::empty(1)),
follower_peers: vec![Peer::empty(5)],
leader_status: None,
leader_state: None,
leader_down_since: None,
},
RegionRoute {
region: Region::new_test(RegionId::new(table_id, 2)),
leader_peer: Some(Peer::empty(2)),
follower_peers: vec![Peer::empty(4)],
leader_status: None,
leader_state: None,
leader_down_since: None,
},
RegionRoute {
region: Region::new_test(RegionId::new(table_id, 3)),
leader_peer: Some(Peer::empty(3)),
follower_peers: vec![],
leader_status: None,
leader_state: None,
leader_down_since: None,
},
]),

View File

@@ -18,6 +18,7 @@ use common_procedure::error::Error as ProcedureError;
use snafu::{ensure, OptionExt, ResultExt};
use store_api::metric_engine_consts::LOGICAL_TABLE_METADATA_KEY;
use table::metadata::TableId;
use table::table_reference::TableReference;
use crate::ddl::DetectingRegion;
use crate::error::{Error, OperateDatanodeSnafu, Result, TableNotFoundSnafu, UnsupportedSnafu};
@@ -109,8 +110,8 @@ pub async fn check_and_get_physical_table_id(
.table_name_manager()
.get(physical_table_name)
.await?
.context(TableNotFoundSnafu {
table_name: physical_table_name.to_string(),
.with_context(|| TableNotFoundSnafu {
table_name: TableReference::from(physical_table_name).to_string(),
})
.map(|table| table.table_id())
}
@@ -123,8 +124,8 @@ pub async fn get_physical_table_id(
.table_name_manager()
.get(logical_table_name)
.await?
.context(TableNotFoundSnafu {
table_name: logical_table_name.to_string(),
.with_context(|| TableNotFoundSnafu {
table_name: TableReference::from(logical_table_name).to_string(),
})
.map(|table| table.table_id())?;

View File

@@ -14,6 +14,7 @@
use std::sync::Arc;
use api::v1::meta::ProcedureDetailResponse;
use common_procedure::{
watcher, BoxedProcedureLoader, Output, ProcedureId, ProcedureManagerRef, ProcedureWithId,
};
@@ -825,6 +826,15 @@ impl ProcedureExecutor for DdlManager {
Ok(procedure::procedure_state_to_pb_response(&state))
}
async fn list_procedures(&self, _ctx: &ExecutorContext) -> Result<ProcedureDetailResponse> {
let metas = self
.procedure_manager
.list_procedures()
.await
.context(QueryProcedureSnafu)?;
Ok(procedure::procedure_details_to_pb_response(metas))
}
}
#[cfg(test)]

View File

@@ -147,6 +147,20 @@ pub enum Error {
source: common_procedure::Error,
},
#[snafu(display("Failed to start procedure manager"))]
StartProcedureManager {
#[snafu(implicit)]
location: Location,
source: common_procedure::Error,
},
#[snafu(display("Failed to stop procedure manager"))]
StopProcedureManager {
#[snafu(implicit)]
location: Location,
source: common_procedure::Error,
},
#[snafu(display(
"Failed to get procedure output, procedure id: {procedure_id}, error: {err_msg}"
))]
@@ -218,6 +232,24 @@ pub enum Error {
error: JsonError,
},
#[snafu(display("Failed to serialize to json: {}", input))]
SerializeToJson {
input: String,
#[snafu(source)]
error: serde_json::error::Error,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Failed to deserialize from json: {}", input))]
DeserializeFromJson {
input: String,
#[snafu(source)]
error: serde_json::error::Error,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Payload not exist"))]
PayloadNotExist {
#[snafu(implicit)]
@@ -531,13 +563,20 @@ pub enum Error {
location: Location,
},
#[snafu(display("Invalid node info key: {}", key))]
#[snafu(display("Invalid node info key: {}", key))]
InvalidNodeInfoKey {
key: String,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Invalid node stat key: {}", key))]
InvalidStatKey {
key: String,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Failed to parse number: {}", err_msg))]
ParseNum {
err_msg: String,
@@ -627,7 +666,9 @@ impl ErrorExt for Error {
| EtcdTxnFailed { .. }
| ConnectEtcd { .. }
| MoveValues { .. }
| GetCache { .. } => StatusCode::Internal,
| GetCache { .. }
| SerializeToJson { .. }
| DeserializeFromJson { .. } => StatusCode::Internal,
ValueNotExist { .. } => StatusCode::Unexpected,
@@ -688,7 +729,9 @@ impl ErrorExt for Error {
SubmitProcedure { source, .. }
| QueryProcedure { source, .. }
| WaitProcedure { source, .. } => source.status_code(),
| WaitProcedure { source, .. }
| StartProcedureManager { source, .. }
| StopProcedureManager { source, .. } => source.status_code(),
RegisterProcedureLoader { source, .. } => source.status_code(),
External { source, .. } => source.status_code(),
OperateDatanode { source, .. } => source.status_code(),
@@ -700,6 +743,7 @@ impl ErrorExt for Error {
| InvalidNumTopics { .. }
| SchemaNotFound { .. }
| InvalidNodeInfoKey { .. }
| InvalidStatKey { .. }
| ParseNum { .. }
| InvalidRole { .. }
| EmptyDdlTasks { .. } => StatusCode::InvalidArguments,

Some files were not shown because too many files have changed in this diff Show More