Compare commits

...

45 Commits

Author SHA1 Message Date
Ning Sun
deaa1f9578 ci: move components to flakes so it won't affect builders (#5464)
* ci: move components to flakes so it won't affect builders

* chore: add gnuplot for benchmark/criterion
2025-01-31 08:55:59 +00:00
Ruihang Xia
f378d218e9 perf: optimize writing non-null primitive value (#5460)
* avoid using arrow builder

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* optimize from_vec

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2025-01-30 14:53:59 +00:00
yihong
5b6279f191 fix: no need for special case since datafusion updated (#5458)
* fix: no need for special case since datafusion updated

Signed-off-by: yihong0618 <zouzou0208@gmail.com>

* fix: drop useless tests

Signed-off-by: yihong0618 <zouzou0208@gmail.com>

---------

Signed-off-by: yihong0618 <zouzou0208@gmail.com>
2025-01-26 05:57:23 +00:00
Ning Sun
698b28c636 feat: provide options to disable or customize http corss-origin settings (#5450)
* feat: add cors headers for http server

* test: add cors test

* test: add preflight test

* feat: allow customize http cross-origin settings

* chore: typo fix

* test: update tests

* test: fix test for config

* refactor: address review comments
2025-01-26 03:55:34 +00:00
yihong
c4d10313e6 fix: better error handler for the time range close #5449 (#5453)
* fix: better error handler for the time range close #5499

Signed-off-by: yihong0618 <zouzou0208@gmail.com>

* fix: wrong compare

Signed-off-by: yihong0618 <zouzou0208@gmail.com>

* fix: address comments

Signed-off-by: yihong0618 <zouzou0208@gmail.com>

---------

Signed-off-by: yihong0618 <zouzou0208@gmail.com>
2025-01-26 03:33:12 +00:00
Ruihang Xia
f165bfb0af fix: remove metric engine's internal column from promql's query (#5032)
* fix: remove metric engine's internal column from promql's query

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove unwrap

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* filter out physical table

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add integration test

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2025-01-26 03:10:59 +00:00
Weny Xu
4111c18d44 chore: avoid necessary cloning (#5454)
* chore: avoid necessary cloning

* Apply suggestions from code review

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

---------

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
2025-01-25 14:16:22 +00:00
Ruihang Xia
5abe4c141a feat: expose http endpoint for flownode and metasrv (#5437)
* feat: expose http endpoint for flownode and metasrv

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* adjust health check

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2025-01-25 13:20:25 +00:00
yihong
adb5c3743c fix: flush table panic when table has interval column close #3235 (#5422)
* fix: flash table panic when table has interval column close #3235

Signed-off-by: yihong0618 <zouzou0208@gmail.com>

* Revert "fix: flash table panic when table has interval column close #3235"

This reverts commit ffc63efda39cd6ef525313b60ede061c5ec24b12.

* fix: create table do not support interval type for now close #3235

Signed-off-by: yihong0618 <zouzou0208@gmail.com>

* fix: sqlness

Signed-off-by: yihong0618 <zouzou0208@gmail.com>

* fix: address comments

Signed-off-by: yihong0618 <zouzou0208@gmail.com>

* fix: address comments fix conflict and more tests

Signed-off-by: yihong0618 <zouzou0208@gmail.com>

* fix: address final comments drop useless sqlness tests

Signed-off-by: yihong0618 <zouzou0208@gmail.com>

---------

Signed-off-by: yihong0618 <zouzou0208@gmail.com>
2025-01-25 08:09:55 +00:00
Ruihang Xia
7c5ead90ac feat: mirror insert request to flownode in async (#5444)
* feat: mirror insert request to flownode in async

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix typo

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2025-01-24 13:42:27 +00:00
shuiyisong
d870987a65 chore: update toolchain to 2024-12-25 (#5430)
* chore: update toolchain to 2024-12-25

* chore: fix clippy

* feat: update flakes

* chore: remove `rerun-if-changed` for now

* chore: update shadow-rs

* fix: clippy

* chore: update version in DEV_BUILDER_IMAGE_TAG

---------

Co-authored-by: Ning Sun <sunning@greptime.com>
2025-01-24 09:30:54 +00:00
Ning Sun
dce4ed9f1d feat: add CORS headers for http interfaces (#5447)
* feat: add cors headers for http server

* test: add cors test

* test: add preflight test
2025-01-24 09:28:04 +00:00
yihong
bbfbc9f0f8 fix: drop unused numpy code since pyo3 rustpython do not support any more (#5442)
Signed-off-by: yihong0618 <zouzou0208@gmail.com>
2025-01-24 08:20:01 +00:00
Weny Xu
b107384cc6 feat(metric-engine): support to write rows with sparse primary key encoding (#5424)
* feat: support to write rows with sparse primary key encoding

* feat: cache decoded primary key

* chore: remove unused code

* feat: create physical table based on the engine config

* chore: log primary key encoding info

* fix: correct sqlness test

* chore: correct config.md

* chore: apply suggestions from CR

* chore: apply suggestions from CR
2025-01-24 06:56:09 +00:00
zyy17
2802c8bf28 ci: update dev-builder version to fix build android image failed (#5445)
ci: update dev-builder version
2025-01-24 06:48:26 +00:00
zyy17
9b9784a557 fix: install x86-64 protoc on android dev-builder (#5443) 2025-01-24 04:39:39 +00:00
yihong
1e61d05211 fix: arm actions test failed (#5433)
Signed-off-by: yihong0618 <zouzou0208@gmail.com>
2025-01-24 03:43:59 +00:00
Ning Sun
d53b9fbd03 ci: switch to nix flakes for more reproducible builds (#5426) 2025-01-24 03:30:45 +00:00
zyy17
d01bc916f1 ci: unify all protoc version to 29.3 (#5434)
Co-authored-by: Ning Sun <sunng@protonmail.com>
Co-authored-by: shuiyisong <113876041+shuiyisong@users.noreply.github.com>
2025-01-24 03:29:11 +00:00
Stephan3555
8ea463f516 feat: Address different Metrics for Prometheus queries in the Dashboard and fix typo in metric name (#5441)
Fix typo in metric and add metric for prometheus compatible endpoint

Co-authored-by: shuiyisong <113876041+shuiyisong@users.noreply.github.com>
2025-01-24 03:28:05 +00:00
Zhenchi
088317fd3a fix: unexpected warning on applying bloom (#5431)
Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2025-01-24 03:09:17 +00:00
Ning Sun
69881e3bc1 ci: allow skipping tests as required tasks (#5436)
ci: allow skipping tests
2025-01-24 03:04:55 +00:00
Ruihang Xia
9af4160068 fix(log-query): panic on prometheus (#5429)
* fix(log-query): panic on prometheus

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix test environment setup

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2025-01-23 11:48:37 +00:00
zyy17
45e68603a1 ci: update dev-builder version (#5435) 2025-01-23 11:43:10 +00:00
zyy17
1eb4b8ed4f refactor: support to flatten json object in greptime_identity pipeline (#5358)
* refactor: support to flatten json object in greptime_identity pipeline

* refactor: add GreptimeIdentityPipelineParams to configure greptime_identity pipeline

* refactor: pass greptime identity pipeline params by one header kv

* refactor: code review

* refactor: make pipeline params more general for all internal pipelines

* chore: remove axum deps from pipeline

* fix: clippy errors

* chore: fix and add test

* test: adopt api change for test client

---------

Co-authored-by: shuiyisong <xixing.sys@gmail.com>
Co-authored-by: Ning Sun <sunng@protonmail.com>
Co-authored-by: Ning Sun <sunning@greptime.com>
2025-01-23 08:50:50 +00:00
Weny Xu
05f21679d6 feat: replace DensePrimaryKeyCodec with Arc<dyn PrimaryKeyCodec> (#5408)
* feat: use `PrimaryKeyCodec` trait object

* feat: introduce `RewritePrimaryKey`

* chore: apply suggestions from CR

* fix: fix clippy

* chore: add comments
2025-01-23 08:44:17 +00:00
Yingwen
35b635f639 feat!: Bump datafusion, prost, hyper, tonic, tower, axum (#5417)
* change dep

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* feat: adapt to arrow's interval array

* chore: fix compile errors in datatypes crate

* chore: fix api crate compiler errors

* chore: fix compiler errors in common-grpc

* chore: fix common-datasource errors

* chore: fix deprecated code in common-datasource

* fix promql and physical plan related

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* wip: upgrading network deps

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* block on updating `sqlparser`

* upgrade sqlparser

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* adapt new df's trait requirements

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* chore: fix compiler errors in mito2

* chore: fix common-function crate errors

* chore: fix catalog errors

* change import path

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* chore: fix some errors in query crate

* chore: fix some errors in query crate

* aggr expr and some other tiny fixes

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* chore: fix expr related errors in query crate

* chore: fix query serializer and admin command

* chore: fix grpc services

* feat: axum serve

* chore: fix http server

* remove handle_error handler
* refactor timeout layer
* serve axum

* chore: fix flow aggr functions

* chore: fix flow

* feat: fix errors in meta-srv

* boxed()
* use TokioIo

* feat!: Remove script crate and python feature (#5321)

* feat: exclude script crate

* chore: simplify feature

* feat: remove the script crate

* chore: remove python feature and some comments

* chore: fix warning

* chore: fix servers tests compiler errors

* feat: fix tests-integration errors

* chore: fix unused

* test: fix catalog test

* chore: fix compiler errors for crates using common-meta

testing feature is enabled when check with --workspace

* test: use display for logical plan test

* test: implement rewrite for ScanHintRule

* fix: http server build panic

* test: fix mito test

* fix: sql parser type alias error

* test: fix TestClient not listen

* test: some flow tests

* test(flow): more fix

* fix: test_otlp_logs

* test: fix promql test that using deprecated method fun()

* fix: sql type replace supports Int8 ~ Int64, UInt8 ~ UInt64

* test: fix infer schema test case

* test: fix tests related to plan display

* chore: fix last flow test

* test: fix function format related assertion

* test: use larger port range for tests

* fix: test_otlp_traces

* fix: test_otlp_metrics

* fix range query and dist plan

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: flow handle distinct use deprecated field

* fix: can't pass Join plan expressions to LogicalPlan::with_new_exprs

* test: fix deserialize test

* test: reduce split key case num

* tests: lower case aggr func name

* test: fix some sqlness tests

* tests: more sqlness fix

* tests: fixed sqlness test

* commit non-bug changes

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: make our udf correct

* fix: implement empty methods of ContextProvider for DfContextProviderAdapter

* test: update sqlness test result

* chore: remove unused

* fix: provide alias name for AggregateExprBuilder in range plan

* test: update range query result

* fix: implement missing ContextProvider methods for DfContextProviderAdapter

* test: update timestamps, cte result

* fix: supports empty projection in mito

* test: update comment for cte test

* fix: support projection for numbers

* test: update test cases after projection fix

* fix: fix range select first_value/last_value

* fix: handle CAST and time index conflict

* fix: handle order by correctly in range first_value/last_value

* test: update sqlness result

* test: update view test result

* test: update decimal test

wait for https://github.com/apache/datafusion/pull/14126 to fix this

* feat: remove redundant physical optimization

todo(ruihang): Check if we can remove this.

* test: update sqlness test result

* chore: range select default sort use nulls_first = false

* test: update filter push down test result

* test: comment deciaml test to avoid different panic message

* test: update some distributed test result

* test: update test for distributed count and filter push down

* test: update subqueries test

* fix: SessionState may overwrite our UDFs

* chore: fix compiler errors after merging main

* fix: fix elasticsearch and dashboard router panic

* chore: fix common-functions tests

* chore: update sqlness result

* test: fix id keyword and update sqlness result

* test: fix flow_null test

* fix: enlarge thread size in debug mode to avoid overflow

* chore: fix warnings in common-function

* chore: fix warning in flow

* chore: fix warnings in query crate

* chore: remove unused warnings

* chore: fix deprecated warnings for parquet

* chore: fix deprecated warning in servers crate

* style: fix clippy

* test: enlarge mito cache tttl test ttl time

* chore: fix typo

* style: fmt toml

* refactor: reimplement PartialOrd for RangeSelect

* chore: remove script crate files introduced by merge

* fix: return error if sql option is not kv

* chore: do not use ..default::default()

* chore: per review

* chore: update error message in BuildAdminFunctionArgsSnafu

Co-authored-by: jeremyhi <jiachun_feng@proton.me>

* refactor: typed precision

* update sqlness view case

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* chore: flow per review

* chore: add example in comment

* chore: warn if parquet stats of timestamp is not INT64

* style: add a newline before derive to make the comment more clear

* test: update sqlness result

* fix: flow from substrait

* chore: change update_range_context log to debug level

* chore: move axum-extra axum-macros to workspace

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: luofucong <luofc@foxmail.com>
Co-authored-by: discord9 <discord9@163.com>
Co-authored-by: shuiyisong <xixing.sys@gmail.com>
Co-authored-by: jeremyhi <jiachun_feng@proton.me>
2025-01-23 06:15:40 +00:00
Yohan Wal
3ed085459c feat(remote-wal): introduce TopicRegionManager (#5407)
* feat: add manager to map region to topic

* chore: add a delete

* chore: rename keys

* chore: update config file

* fix: fix unit test

* chore: change prefix

* chore: clean up

* chore: follow review comments

* chore: follow review comments

* chore: follow review comments

* chore: follow review comments

* chore: follow review comments
2025-01-22 06:06:27 +00:00
Lei, HUANG
51a8d0a726 fix: avoid suppress manual compaction (#5399)
* fix/avoid-suppress-manual-compaction:
 **Refactor Compaction Logic**

 - Removed `PendingCompaction` struct and integrated its functionality directly into `CompactionStatus` in `compaction.rs`.
 - Simplified waiter management by consolidating waiter handling logic into `CompactionStatus`.
 - Updated `CompactionRequest` creation to directly handle waiters without intermediate structures.
 - Adjusted test cases in `compaction.rs` to align with the new waiter management approach.

(cherry picked from commit 87e2d1c2cc9bd82c02991d22e429bef25c5ee348)

* fix/avoid-suppress-manual-compaction:
 ### Add Support for Manual Compaction Requests

 - **Compaction Logic Enhancements**:
   - Updated `CompactionScheduler` in `compaction.rs` to handle manual compaction requests using `Options::StrictWindow`.
   - Introduced `PendingCompaction` struct to manage pending manual compaction requests.
   - Added logic to reschedule manual compaction requests once the current compaction task is completed.

 - **Testing**:
   - Added `test_manual_compaction_when_compaction_in_progress` to verify the handling of manual compaction requests during ongoing compaction processes.

 These changes enhance the compaction scheduling mechanism by allowing manual compaction requests to be queued and processed efficiently.

(cherry picked from commit bc38ed0f2f8ba2c4690e0d0e251aeb2acce308ca)

* chore: fix conflicts

* fix/avoid-suppress-manual-compaction:
 ### Add Error Handling for Manual Compaction Override

 - **`compaction.rs`**: Enhanced the `set_pending_request` method to handle manual compaction overrides by sending an error to the waiter if a previous request exists.
 - **`error.rs`**: Introduced a new error variant `ManualCompactionOverride` to represent manual compaction being overridden, and mapped it to the `Cancelled` status code.

* fix: format

* fix/avoid-suppress-manual-compaction:
 **Add Error Handling for Pending Compaction Requests**

 - Enhanced error handling in `compaction.rs` by adding logic to handle errors for pending compaction requests.
 - Introduced a mechanism to send errors using `waiter.send` when a pending compaction request fails, ensuring proper error propagation and context with `CompactRegionSnafu`.

* fix/avoid-suppress-manual-compaction:
 **Fix Typo and Simplify Code Logic in `compaction.rs`**

 - Corrected a typo in the license comment from "langucage" to "language".
 - Simplified the logic for handling `pending_compaction` in `CompactionStatus` by removing unnecessary pattern matching and directly accessing `waiter`.

* fix: typo
2025-01-22 05:36:39 +00:00
Weny Xu
965a48656f feat(metric-engine): introduce RowModifier for MetricEngine (#5380)
* feat(metric-engine): store physical table ColumnIds in `MetricEngineState`

* feat(metric-engine): introduce `RowModifier` for MetricEngine

* chore: upgrade greptime-proto

* feat: introduce `WriteHint` to `RegionPutRequest`

* chore: apply suggestions from CR

* chore: udpate greptime-proto

* chore: apply suggestions from CR

* chore: add comments

* chore: update proto
2025-01-22 05:16:44 +00:00
dennis zhuang
4259975be9 feat: support not-equal matcher for PromQL metric names (#5385)
* feat: make instant_query and range_query to supports not-equal matchers

* feat: impl query_metric_names

* feat: forgot some files and refactor

* chore: test and docs

* fix: typo

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

* refactor: parse_query

* chore: improve test

* fix: use current catalog to query information_schema

---------

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
2025-01-22 03:04:40 +00:00
yihong
d2f3f2e24d fix: vector function for PromQL need to ignore the time index also (#5398)
* fix: vector function for PromQL need to ignore the time index also close #5392

Signed-off-by: yihong0618 <zouzou0208@gmail.com>

* fix: do not affect scalar function

Signed-off-by: yihong0618 <zouzou0208@gmail.com>

* fix: betteer name for it

Signed-off-by: yihong0618 <zouzou0208@gmail.com>

---------

Signed-off-by: yihong0618 <zouzou0208@gmail.com>
2025-01-22 02:51:12 +00:00
Zhenchi
f74a955504 feat: bloom filter as fulltext index v2 (Part 1) (#5406)
* feat: bloom filter as fulltext index v2

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* add unit tests for tokenizer

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* address comments

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* address comments

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* refactor dup vars

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* address comments

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2025-01-21 23:33:11 +00:00
Ruihang Xia
6f1b5101a3 feat(grafana): update cluster dashboard to include flow-related metrics (#5419) 2025-01-21 20:21:01 +08:00
discord9
9f626ec776 chore: better error msg (#5415) 2025-01-21 07:32:32 +00:00
Niwaka
0163ce8df9 feat: add column if not exists (#5393)
* feat: add column if not exists

* chore: address reviews
2025-01-21 02:38:25 +00:00
shuiyisong
2ab235ec9d chore: extract service_name in OTLP traces by default (#5412)
chore: add service_name in traces
2025-01-21 02:34:56 +00:00
Zhenchi
281d9a5920 fix: matches incorrectly uses byte len as char len (#5411)
Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
2025-01-21 02:34:36 +00:00
Weny Xu
385b1bcbb0 feat(metric-engine): introduce index options from metric engine (#5374)
* feat(metric-engine): introduce index options from metric engine

* chore: fmt toml

* test: add sqlness test

* fix: ignore internal columns

* chore: remove unused dep

* chore: update sqlness result

* chore: ignore metric engine internal columns

* chore: refine code styling

* test: update sqlness test

* refactor: refactor `create_table_constraints`

* test: show index

* chore: apply suggestions from CR

* fix: set inverted index explicitly

* chore: apply suggestions from CR
2025-01-20 08:48:00 +00:00
Yohan Wal
5287d46073 refactor: use MetadataKey for kafka topic (#5351)
* refactor: use MetadataKey

* fix: match all prefix

* refactor: introduce TopicPool

* fix: fix test, some rename

* test: add unit test for legacy restore

* fix: add _ between prefix and topic id

* chore: readable legacy topics

* refactor: a refactor

* Apply suggestions from code review

* Apply suggestions from code review

* refactor: introduce TopicPool

* fix: fix unit test

* chore: fix unit test and add some comments

* fix: fix unit test

* refactor: just refactor

* refactor: rename

* chore: rename, comments and remove unnecessary clone
2025-01-20 07:38:22 +00:00
Lei, HUANG
64ce9d3744 chore(http): change authorization header (#5389)
* chore/change-authorization-header:
 ### Add Custom Authorization Header Support

 - **Files Modified**: `http.rs`, `authorize.rs`, `authorize.rs` (tests)
 - **Key Changes**:
   - Introduced a custom authorization header `x-greptime-auth` in `http.rs`.
   - Updated authorization logic in `authorize.rs` to support both `x-greptime-auth` and the standard `Authorization` header.
   - Enhanced test cases in `authorize.rs` to validate the new custom header functionality.

* chore: add more tests
2025-01-20 07:09:44 +00:00
yihong
80790daae0 fix: better sqlness show, replace the unwarp with better show message (#5400)
Signed-off-by: yihong0618 <zouzou0208@gmail.com>
2025-01-20 04:59:39 +00:00
Ning Sun
5daac5fe3d ci: revert coverage runner (#5403) 2025-01-20 03:52:38 +00:00
Weny Xu
4323c20d18 feat(metric-engine): introduce experimental_sparse_primary_key_encoding to MetricEngineConfig (#5373)
* feat: introduce `experimental_sparse_primary_key_encoding` to `MetricEngineConfig`

* fix: unit test
2025-01-20 03:49:39 +00:00
Ning Sun
f53b6777cc ci: use arm builders for tests (#5395) 2025-01-20 02:12:12 +00:00
493 changed files with 15436 additions and 9207 deletions

View File

@@ -631,7 +631,7 @@ jobs:
test:
if: github.event_name != 'merge_group'
runs-on: ubuntu-20.04-8-cores
runs-on: ubuntu-22.04-arm
timeout-minutes: 60
needs: [conflict-check, clippy, fmt]
steps:
@@ -661,6 +661,7 @@ jobs:
env:
CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=mold"
RUST_BACKTRACE: 1
RUST_MIN_STACK: 8388608 # 8MB
CARGO_INCREMENTAL: 0
GT_S3_BUCKET: ${{ vars.AWS_CI_TEST_BUCKET }}
GT_S3_ACCESS_KEY_ID: ${{ secrets.AWS_CI_TEST_ACCESS_KEY_ID }}

View File

@@ -66,6 +66,11 @@ jobs:
steps:
- run: 'echo "No action required"'
test:
runs-on: ubuntu-20.04
steps:
- run: 'echo "No action required"'
sqlness:
name: Sqlness Test (${{ matrix.mode.name }})
runs-on: ${{ matrix.os }}

View File

@@ -110,14 +110,14 @@ jobs:
cleanbuild-linux-nix:
name: Run clean build on Linux
runs-on: ubuntu-latest-8-cores
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
- uses: cachix/install-nix-action@v27
with:
nix_path: nixpkgs=channel:nixos-unstable
- run: nix-shell --pure --run "cargo build"
nix_path: nixpkgs=channel:nixos-24.11
- run: nix develop --command cargo build
check-status:
name: Check status

1703
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -88,14 +88,17 @@ rust.unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] }
# See for more detaiils: https://github.com/rust-lang/cargo/issues/11329
ahash = { version = "0.8", features = ["compile-time-rng"] }
aquamarine = "0.3"
arrow = { version = "51.0.0", features = ["prettyprint"] }
arrow-array = { version = "51.0.0", default-features = false, features = ["chrono-tz"] }
arrow-flight = "51.0"
arrow-ipc = { version = "51.0.0", default-features = false, features = ["lz4", "zstd"] }
arrow-schema = { version = "51.0", features = ["serde"] }
arrow = { version = "53.0.0", features = ["prettyprint"] }
arrow-array = { version = "53.0.0", default-features = false, features = ["chrono-tz"] }
arrow-flight = "53.0"
arrow-ipc = { version = "53.0.0", default-features = false, features = ["lz4", "zstd"] }
arrow-schema = { version = "53.0", features = ["serde"] }
async-stream = "0.3"
async-trait = "0.1"
axum = { version = "0.6", features = ["headers"] }
# Remember to update axum-extra, axum-macros when updating axum
axum = "0.8"
axum-extra = "0.10"
axum-macros = "0.4"
backon = "1"
base64 = "0.21"
bigdecimal = "0.4.2"
@@ -107,32 +110,35 @@ clap = { version = "4.4", features = ["derive"] }
config = "0.13.0"
crossbeam-utils = "0.8"
dashmap = "5.4"
datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" }
datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" }
datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" }
datafusion-functions = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" }
datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" }
datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" }
datafusion-physical-plan = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" }
datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" }
datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "7823ef2f63663907edab46af0d51359900f608d6" }
datafusion = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
datafusion-common = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
datafusion-expr = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
datafusion-functions = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
datafusion-optimizer = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
datafusion-physical-expr = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
datafusion-physical-plan = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
datafusion-sql = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
datafusion-substrait = { git = "https://github.com/apache/datafusion.git", rev = "2464703c84c400a09cc59277018813f0e797bb4e" }
deadpool = "0.10"
deadpool-postgres = "0.12"
derive_builder = "0.12"
dotenv = "0.15"
etcd-client = "0.13"
etcd-client = "0.14"
fst = "0.4.7"
futures = "0.3"
futures-util = "0.3"
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "ec801a91aa22f9666063d02805f1f60f7c93458a" }
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "683e9d10ae7f3dfb8aaabd89082fc600c17e3795" }
hex = "0.4"
http = "0.2"
http = "1"
humantime = "2.1"
humantime-serde = "1.1"
hyper = "1.1"
hyper-util = "0.1"
itertools = "0.10"
jsonb = { git = "https://github.com/databendlabs/jsonb.git", rev = "8c8d2fc294a39f3ff08909d60f718639cfba3875", default-features = false }
lazy_static = "1.4"
local-ip-address = "0.6"
loki-api = { git = "https://github.com/shuiyisong/tracing-loki", branch = "chore/prost_version" }
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "a10facb353b41460eeb98578868ebf19c2084fac" }
mockall = "0.11.4"
moka = "0.12"
@@ -140,7 +146,7 @@ nalgebra = "0.33"
notify = "6.1"
num_cpus = "1.16"
once_cell = "1.18"
opentelemetry-proto = { version = "0.5", features = [
opentelemetry-proto = { version = "0.27", features = [
"gen-tonic",
"metrics",
"trace",
@@ -148,12 +154,12 @@ opentelemetry-proto = { version = "0.5", features = [
"logs",
] }
parking_lot = "0.12"
parquet = { version = "51.0.0", default-features = false, features = ["arrow", "async", "object_store"] }
parquet = { version = "53.0.0", default-features = false, features = ["arrow", "async", "object_store"] }
paste = "1.0"
pin-project = "1.0"
prometheus = { version = "0.13.3", features = ["process"] }
promql-parser = { version = "0.4.3", features = ["ser"] }
prost = "0.12"
prost = "0.13"
raft-engine = { version = "0.4.1", default-features = false }
rand = "0.8"
ratelimit = "0.9"
@@ -172,17 +178,17 @@ rstest = "0.21"
rstest_reuse = "0.7"
rust_decimal = "1.33"
rustc-hash = "2.0"
rustls = { version = "0.23.20", default-features = false } # override by patch, see [patch.crates-io]
serde = { version = "1.0", features = ["derive"] }
serde_json = { version = "1.0", features = ["float_roundtrip"] }
serde_with = "3"
shadow-rs = "0.35"
shadow-rs = "0.38"
similar-asserts = "1.6.0"
smallvec = { version = "1", features = ["serde"] }
snafu = "0.8"
sysinfo = "0.30"
rustls = { version = "0.23.20", default-features = false } # override by patch, see [patch.crates-io]
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "54a267ac89c09b11c0c88934690530807185d3e7", features = [
# on branch v0.52.x
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "71dd86058d2af97b9925093d40c4e03360403170", features = [
"visitor",
"serde",
] } # on branch v0.44.x
@@ -194,8 +200,8 @@ tokio-rustls = { version = "0.26.0", default-features = false } # override by pa
tokio-stream = "0.1"
tokio-util = { version = "0.7", features = ["io-util", "compat"] }
toml = "0.8.8"
tonic = { version = "0.11", features = ["tls", "gzip", "zstd"] }
tower = "0.4"
tonic = { version = "0.12", features = ["tls", "gzip", "zstd"] }
tower = "0.5"
tracing-appender = "0.2"
tracing-subscriber = { version = "0.3", features = ["env-filter", "json", "fmt"] }
typetag = "0.2"

View File

@@ -8,7 +8,7 @@ CARGO_BUILD_OPTS := --locked
IMAGE_REGISTRY ?= docker.io
IMAGE_NAMESPACE ?= greptime
IMAGE_TAG ?= latest
DEV_BUILDER_IMAGE_TAG ?= 2024-10-19-a5c00e85-20241024184445
DEV_BUILDER_IMAGE_TAG ?= 2024-12-25-9d0fa5d5-20250124085746
BUILDX_MULTI_PLATFORM_BUILD ?= false
BUILDX_BUILDER_NAME ?= gtbuilder
BASE_IMAGE ?= ubuntu

View File

@@ -26,6 +26,8 @@
| `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
| `http.timeout` | String | `30s` | HTTP request timeout. Set to 0 to disable timeout. |
| `http.body_limit` | String | `64MB` | HTTP request body limit.<br/>The following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.<br/>Set to 0 to disable limit. |
| `http.enable_cors` | Bool | `true` | HTTP CORS support, it's turned on by default<br/>This allows browser to access http APIs without CORS restrictions |
| `http.cors_allowed_origins` | Array | Unset | Customize allowed origins for HTTP CORS. |
| `grpc` | -- | -- | The gRPC server options. |
| `grpc.addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
| `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
@@ -170,6 +172,8 @@
| `region_engine.mito.memtable.data_freeze_threshold` | Integer | `32768` | The max rows of data inside the actively writing buffer in one shard.<br/>Only available for `partition_tree` memtable. |
| `region_engine.mito.memtable.fork_dictionary_bytes` | String | `1GiB` | Max dictionary bytes.<br/>Only available for `partition_tree` memtable. |
| `region_engine.file` | -- | -- | Enable the file engine. |
| `region_engine.metric` | -- | -- | Metric engine options. |
| `region_engine.metric.experimental_sparse_primary_key_encoding` | Bool | `false` | Whether to enable the experimental sparse primary key encoding. |
| `logging` | -- | -- | The logging options. |
| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
| `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
@@ -214,6 +218,8 @@
| `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
| `http.timeout` | String | `30s` | HTTP request timeout. Set to 0 to disable timeout. |
| `http.body_limit` | String | `64MB` | HTTP request body limit.<br/>The following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.<br/>Set to 0 to disable limit. |
| `http.enable_cors` | Bool | `true` | HTTP CORS support, it's turned on by default<br/>This allows browser to access http APIs without CORS restrictions |
| `http.cors_allowed_origins` | Array | Unset | Customize allowed origins for HTTP CORS. |
| `grpc` | -- | -- | The gRPC server options. |
| `grpc.addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
| `grpc.hostname` | String | `127.0.0.1:4001` | The hostname advertised to the metasrv,<br/>and used for connections from outside the host |
@@ -327,7 +333,7 @@
| `wal.auto_create_topics` | Bool | `true` | Automatically create topics for WAL.<br/>Set to `true` to automatically create topics for WAL.<br/>Otherwise, use topics named `topic_name_prefix_[0..num_topics)` |
| `wal.num_topics` | Integer | `64` | Number of topics. |
| `wal.selector_type` | String | `round_robin` | Topic selector type.<br/>Available selector types:<br/>- `round_robin` (default) |
| `wal.topic_name_prefix` | String | `greptimedb_wal_topic` | A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.<br/>i.g., greptimedb_wal_topic_0, greptimedb_wal_topic_1. |
| `wal.topic_name_prefix` | String | `greptimedb_wal_topic` | A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.<br/>Only accepts strings that match the following regular expression pattern:<br/>[a-zA-Z_:-][a-zA-Z0-9_:\-\.@#]*<br/>i.g., greptimedb_wal_topic_0, greptimedb_wal_topic_1. |
| `wal.replication_factor` | Integer | `1` | Expected number of replicas of each partition. |
| `wal.create_topic_timeout` | String | `30s` | Above which a topic creation operation will be cancelled. |
| `wal.backoff_init` | String | `500ms` | The initial backoff for kafka clients. |
@@ -506,6 +512,8 @@
| `region_engine.mito.memtable.data_freeze_threshold` | Integer | `32768` | The max rows of data inside the actively writing buffer in one shard.<br/>Only available for `partition_tree` memtable. |
| `region_engine.mito.memtable.fork_dictionary_bytes` | String | `1GiB` | Max dictionary bytes.<br/>Only available for `partition_tree` memtable. |
| `region_engine.file` | -- | -- | Enable the file engine. |
| `region_engine.metric` | -- | -- | Metric engine options. |
| `region_engine.metric.experimental_sparse_primary_key_encoding` | Bool | `false` | Whether to enable the experimental sparse primary key encoding. |
| `logging` | -- | -- | The logging options. |
| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
| `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |

View File

@@ -622,6 +622,12 @@ fork_dictionary_bytes = "1GiB"
## Enable the file engine.
[region_engine.file]
[[region_engine]]
## Metric engine options.
[region_engine.metric]
## Whether to enable the experimental sparse primary key encoding.
experimental_sparse_primary_key_encoding = false
## The logging options.
[logging]
## The directory to store the log files. If set to empty, logs will not be written to files.

View File

@@ -31,6 +31,12 @@ timeout = "30s"
## The following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.
## Set to 0 to disable limit.
body_limit = "64MB"
## HTTP CORS support, it's turned on by default
## This allows browser to access http APIs without CORS restrictions
enable_cors = true
## Customize allowed origins for HTTP CORS.
## @toml2docs:none-default
cors_allowed_origins = ["https://example.com"]
## The gRPC server options.
[grpc]

View File

@@ -129,6 +129,8 @@ num_topics = 64
selector_type = "round_robin"
## A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.
## Only accepts strings that match the following regular expression pattern:
## [a-zA-Z_:-][a-zA-Z0-9_:\-\.@#]*
## i.g., greptimedb_wal_topic_0, greptimedb_wal_topic_1.
topic_name_prefix = "greptimedb_wal_topic"

View File

@@ -39,6 +39,12 @@ timeout = "30s"
## The following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.
## Set to 0 to disable limit.
body_limit = "64MB"
## HTTP CORS support, it's turned on by default
## This allows browser to access http APIs without CORS restrictions
enable_cors = true
## Customize allowed origins for HTTP CORS.
## @toml2docs:none-default
cors_allowed_origins = ["https://example.com"]
## The gRPC server options.
[grpc]
@@ -671,6 +677,12 @@ fork_dictionary_bytes = "1GiB"
## Enable the file engine.
[region_engine.file]
[[region_engine]]
## Metric engine options.
[region_engine.metric]
## Whether to enable the experimental sparse primary key encoding.
experimental_sparse_primary_key_encoding = false
## The logging options.
[logging]
## The directory to store the log files. If set to empty, logs will not be written to files.

View File

@@ -9,12 +9,21 @@ RUN cp ${NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/lib64/clang/14.0.7/lib/
# Install dependencies.
RUN apt-get update && apt-get install -y \
libssl-dev \
protobuf-compiler \
curl \
git \
unzip \
build-essential \
pkg-config
# Install protoc
ARG PROTOBUF_VERSION=29.3
RUN curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOBUF_VERSION}/protoc-${PROTOBUF_VERSION}-linux-x86_64.zip && \
unzip protoc-${PROTOBUF_VERSION}-linux-x86_64.zip -d protoc3;
RUN mv protoc3/bin/* /usr/local/bin/
RUN mv protoc3/include/* /usr/local/include/
# Trust workdir
RUN git config --global --add safe.directory /greptimedb

View File

@@ -15,8 +15,13 @@ RUN yum install -y epel-release \
which
# Install protoc
RUN curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v3.15.8/protoc-3.15.8-linux-x86_64.zip
RUN unzip protoc-3.15.8-linux-x86_64.zip -d /usr/local/
ARG PROTOBUF_VERSION=29.3
RUN curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOBUF_VERSION}/protoc-${PROTOBUF_VERSION}-linux-x86_64.zip && \
unzip protoc-${PROTOBUF_VERSION}-linux-x86_64.zip -d protoc3;
RUN mv protoc3/bin/* /usr/local/bin/
RUN mv protoc3/include/* /usr/local/include/
# Install Rust
SHELL ["/bin/bash", "-c"]

View File

@@ -22,13 +22,15 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
ARG TARGETPLATFORM
RUN echo "target platform: $TARGETPLATFORM"
ARG PROTOBUF_VERSION=29.3
# Install protobuf, because the one in the apt is too old (v3.12).
RUN if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v29.1/protoc-29.1-linux-aarch_64.zip && \
unzip protoc-29.1-linux-aarch_64.zip -d protoc3; \
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOBUF_VERSION}/protoc-${PROTOBUF_VERSION}-linux-aarch_64.zip && \
unzip protoc-${PROTOBUF_VERSION}-linux-aarch_64.zip -d protoc3; \
elif [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v29.1/protoc-29.1-linux-x86_64.zip && \
unzip protoc-29.1-linux-x86_64.zip -d protoc3; \
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOBUF_VERSION}/protoc-${PROTOBUF_VERSION}-linux-x86_64.zip && \
unzip protoc-${PROTOBUF_VERSION}-linux-x86_64.zip -d protoc3; \
fi
RUN mv protoc3/bin/* /usr/local/bin/
RUN mv protoc3/include/* /usr/local/include/

View File

@@ -21,7 +21,7 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
pkg-config
# Install protoc.
ENV PROTOC_VERSION=25.1
ENV PROTOC_VERSION=29.3
RUN if [ "$(uname -m)" = "x86_64" ]; then \
PROTOC_ZIP=protoc-${PROTOC_VERSION}-linux-x86_64.zip; \
elif [ "$(uname -m)" = "aarch64" ]; then \

View File

@@ -39,14 +39,16 @@ services:
container_name: metasrv
ports:
- 3002:3002
- 3000:3000
command:
- metasrv
- start
- --bind-addr=0.0.0.0:3002
- --server-addr=metasrv:3002
- --store-addrs=etcd0:2379
- --http-addr=0.0.0.0:3000
healthcheck:
test: [ "CMD", "curl", "-f", "http://metasrv:3002/health" ]
test: [ "CMD", "curl", "-f", "http://metasrv:3000/health" ]
interval: 5s
timeout: 3s
retries: 5
@@ -73,10 +75,10 @@ services:
volumes:
- /tmp/greptimedb-cluster-docker-compose/datanode0:/tmp/greptimedb
healthcheck:
test: [ "CMD", "curl", "-f", "http://datanode0:5000/health" ]
test: [ "CMD", "curl", "-fv", "http://datanode0:5000/health" ]
interval: 5s
timeout: 3s
retries: 5
retries: 10
depends_on:
metasrv:
condition: service_healthy
@@ -115,6 +117,7 @@ services:
container_name: flownode0
ports:
- 4004:4004
- 4005:4005
command:
- flownode
- start
@@ -122,9 +125,15 @@ services:
- --metasrv-addrs=metasrv:3002
- --rpc-addr=0.0.0.0:4004
- --rpc-hostname=flownode0:4004
- --http-addr=0.0.0.0:4005
depends_on:
frontend0:
condition: service_healthy
healthcheck:
test: [ "CMD", "curl", "-f", "http://flownode0:4005/health" ]
interval: 5s
timeout: 3s
retries: 5
networks:
- greptimedb

100
flake.lock generated Normal file
View File

@@ -0,0 +1,100 @@
{
"nodes": {
"fenix": {
"inputs": {
"nixpkgs": [
"nixpkgs"
],
"rust-analyzer-src": "rust-analyzer-src"
},
"locked": {
"lastModified": 1737613896,
"narHash": "sha256-ldqXIglq74C7yKMFUzrS9xMT/EVs26vZpOD68Sh7OcU=",
"owner": "nix-community",
"repo": "fenix",
"rev": "303a062fdd8e89f233db05868468975d17855d80",
"type": "github"
},
"original": {
"owner": "nix-community",
"repo": "fenix",
"type": "github"
}
},
"flake-utils": {
"inputs": {
"systems": "systems"
},
"locked": {
"lastModified": 1731533236,
"narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1737569578,
"narHash": "sha256-6qY0pk2QmUtBT9Mywdvif0i/CLVgpCjMUn6g9vB+f3M=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "47addd76727f42d351590c905d9d1905ca895b82",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-24.11",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"fenix": "fenix",
"flake-utils": "flake-utils",
"nixpkgs": "nixpkgs"
}
},
"rust-analyzer-src": {
"flake": false,
"locked": {
"lastModified": 1737581772,
"narHash": "sha256-t1P2Pe3FAX9TlJsCZbmJ3wn+C4qr6aSMypAOu8WNsN0=",
"owner": "rust-lang",
"repo": "rust-analyzer",
"rev": "582af7ee9c8d84f5d534272fc7de9f292bd849be",
"type": "github"
},
"original": {
"owner": "rust-lang",
"ref": "nightly",
"repo": "rust-analyzer",
"type": "github"
}
},
"systems": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
}
},
"root": "root",
"version": 7
}

56
flake.nix Normal file
View File

@@ -0,0 +1,56 @@
{
description = "Development environment flake";
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.11";
fenix = {
url = "github:nix-community/fenix";
inputs.nixpkgs.follows = "nixpkgs";
};
flake-utils.url = "github:numtide/flake-utils";
};
outputs = { self, nixpkgs, fenix, flake-utils }:
flake-utils.lib.eachDefaultSystem (system:
let
pkgs = nixpkgs.legacyPackages.${system};
buildInputs = with pkgs; [
libgit2
libz
];
lib = nixpkgs.lib;
rustToolchain = fenix.packages.${system}.fromToolchainName {
name = (lib.importTOML ./rust-toolchain.toml).toolchain.channel;
sha256 = "sha256-f/CVA1EC61EWbh0SjaRNhLL0Ypx2ObupbzigZp8NmL4=";
};
in
{
devShells.default = pkgs.mkShell {
nativeBuildInputs = with pkgs; [
pkg-config
git
clang
gcc
protobuf
gnumake
mold
(rustToolchain.withComponents [
"cargo"
"clippy"
"rust-src"
"rustc"
"rustfmt"
"rust-analyzer"
"llvm-tools"
])
cargo-nextest
cargo-llvm-cov
taplo
curl
gnuplot ## for cargo bench
];
LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath buildInputs;
};
});
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,3 +1,2 @@
[toolchain]
channel = "nightly-2024-10-19"
components = ["rust-analyzer", "llvm-tools"]
channel = "nightly-2024-12-25"

View File

@@ -1,31 +0,0 @@
let
nixpkgs = fetchTarball "https://github.com/NixOS/nixpkgs/tarball/nixos-24.11";
fenix = import (fetchTarball "https://github.com/nix-community/fenix/archive/main.tar.gz") {};
pkgs = import nixpkgs { config = {}; overlays = []; };
in
pkgs.mkShell rec {
nativeBuildInputs = with pkgs; [
pkg-config
git
clang
gcc
protobuf
gnumake
mold
(fenix.fromToolchainFile {
dir = ./.;
})
cargo-nextest
cargo-llvm-cov
taplo
curl
];
buildInputs = with pkgs; [
libgit2
libz
];
LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath buildInputs;
}

View File

@@ -33,7 +33,7 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
#[snafu(source)]
error: prost::DecodeError,
error: prost::UnknownEnumValue,
},
#[snafu(display("Failed to create column datatype from {:?}", from))]

View File

@@ -86,7 +86,7 @@ impl ColumnDataTypeWrapper {
/// Get a tuple of ColumnDataType and ColumnDataTypeExtension.
pub fn to_parts(&self) -> (ColumnDataType, Option<ColumnDataTypeExtension>) {
(self.datatype, self.datatype_ext.clone())
(self.datatype, self.datatype_ext)
}
}
@@ -685,14 +685,18 @@ pub fn pb_values_to_vector_ref(data_type: &ConcreteDataType, values: Values) ->
IntervalType::YearMonth(_) => Arc::new(IntervalYearMonthVector::from_vec(
values.interval_year_month_values,
)),
IntervalType::DayTime(_) => Arc::new(IntervalDayTimeVector::from_vec(
values.interval_day_time_values,
IntervalType::DayTime(_) => Arc::new(IntervalDayTimeVector::from_iter_values(
values
.interval_day_time_values
.iter()
.map(|x| IntervalDayTime::from_i64(*x).into()),
)),
IntervalType::MonthDayNano(_) => {
Arc::new(IntervalMonthDayNanoVector::from_iter_values(
values.interval_month_day_nano_values.iter().map(|x| {
IntervalMonthDayNano::new(x.months, x.days, x.nanoseconds).to_i128()
}),
values
.interval_month_day_nano_values
.iter()
.map(|x| IntervalMonthDayNano::new(x.months, x.days, x.nanoseconds).into()),
))
}
},
@@ -1495,14 +1499,22 @@ mod tests {
column.values.as_ref().unwrap().interval_year_month_values
);
let vector = Arc::new(IntervalDayTimeVector::from_vec(vec![4, 5, 6]));
let vector = Arc::new(IntervalDayTimeVector::from_vec(vec![
IntervalDayTime::new(0, 4).into(),
IntervalDayTime::new(0, 5).into(),
IntervalDayTime::new(0, 6).into(),
]));
push_vals(&mut column, 3, vector);
assert_eq!(
vec![4, 5, 6],
column.values.as_ref().unwrap().interval_day_time_values
);
let vector = Arc::new(IntervalMonthDayNanoVector::from_vec(vec![7, 8, 9]));
let vector = Arc::new(IntervalMonthDayNanoVector::from_vec(vec![
IntervalMonthDayNano::new(0, 0, 7).into(),
IntervalMonthDayNano::new(0, 0, 8).into(),
IntervalMonthDayNano::new(0, 0, 9).into(),
]));
let len = vector.len();
push_vals(&mut column, 3, vector);
(0..len).for_each(|i| {

View File

@@ -34,10 +34,8 @@ const SKIPPING_INDEX_GRPC_KEY: &str = "skipping_index";
/// Tries to construct a `ColumnSchema` from the given `ColumnDef`.
pub fn try_as_column_schema(column_def: &ColumnDef) -> Result<ColumnSchema> {
let data_type = ColumnDataTypeWrapper::try_new(
column_def.data_type,
column_def.datatype_extension.clone(),
)?;
let data_type =
ColumnDataTypeWrapper::try_new(column_def.data_type, column_def.datatype_extension)?;
let constraint = if column_def.default_constraint.is_empty() {
None
@@ -102,7 +100,7 @@ pub fn options_from_column_schema(column_schema: &ColumnSchema) -> Option<Column
pub fn contains_fulltext(options: &Option<ColumnOptions>) -> bool {
options
.as_ref()
.map_or(false, |o| o.options.contains_key(FULLTEXT_GRPC_KEY))
.is_some_and(|o| o.options.contains_key(FULLTEXT_GRPC_KEY))
}
/// Tries to construct a `ColumnOptions` from the given `FulltextOptions`.
@@ -188,7 +186,7 @@ mod tests {
case_sensitive: false,
})
.unwrap();
schema.with_inverted_index(true);
schema.set_inverted_index(true);
let options = options_from_column_schema(&schema).unwrap();
assert_eq!(
options.options.get(FULLTEXT_GRPC_KEY).unwrap(),

View File

@@ -303,7 +303,7 @@ impl KvBackend for CachedKvBackend {
.lock()
.unwrap()
.as_ref()
.map_or(false, |v| !self.validate_version(*v))
.is_some_and(|v| !self.validate_version(*v))
{
self.cache.invalidate(key).await;
}

View File

@@ -41,6 +41,7 @@ pub mod information_schema {
}
pub mod table_source;
#[async_trait::async_trait]
pub trait CatalogManager: Send + Sync {
fn as_any(&self) -> &dyn Any;

View File

@@ -64,6 +64,7 @@ const INIT_CAPACITY: usize = 42;
/// - `uptime`: the uptime of the peer.
/// - `active_time`: the time since the last activity of the peer.
///
#[derive(Debug)]
pub(super) struct InformationSchemaClusterInfo {
schema: SchemaRef,
catalog_manager: Weak<dyn CatalogManager>,

View File

@@ -45,6 +45,7 @@ use crate::error::{
use crate::information_schema::Predicates;
use crate::CatalogManager;
#[derive(Debug)]
pub(super) struct InformationSchemaColumns {
schema: SchemaRef,
catalog_name: String,

View File

@@ -61,7 +61,7 @@ pub const FLOWNODE_IDS: &str = "flownode_ids";
pub const OPTIONS: &str = "options";
/// The `information_schema.flows` to provides information about flows in databases.
///
#[derive(Debug)]
pub(super) struct InformationSchemaFlows {
schema: SchemaRef,
catalog_name: String,

View File

@@ -62,6 +62,7 @@ pub(crate) const FULLTEXT_INDEX_CONSTRAINT_NAME: &str = "FULLTEXT INDEX";
pub(crate) const SKIPPING_INDEX_CONSTRAINT_NAME: &str = "SKIPPING INDEX";
/// The virtual table implementation for `information_schema.KEY_COLUMN_USAGE`.
#[derive(Debug)]
pub(super) struct InformationSchemaKeyColumnUsage {
schema: SchemaRef,
catalog_name: String,

View File

@@ -59,6 +59,7 @@ const INIT_CAPACITY: usize = 42;
/// The `PARTITIONS` table provides information about partitioned tables.
/// See https://dev.mysql.com/doc/refman/8.0/en/information-schema-partitions-table.html
/// We provide an extral column `greptime_partition_id` for GreptimeDB region id.
#[derive(Debug)]
pub(super) struct InformationSchemaPartitions {
schema: SchemaRef,
catalog_name: String,

View File

@@ -56,7 +56,7 @@ const INIT_CAPACITY: usize = 42;
/// - `end_time`: the ending execution time of the procedure.
/// - `status`: the status of the procedure.
/// - `lock_keys`: the lock keys of the procedure.
///
#[derive(Debug)]
pub(super) struct InformationSchemaProcedureInfo {
schema: SchemaRef,
catalog_manager: Weak<dyn CatalogManager>,

View File

@@ -59,7 +59,7 @@ const INIT_CAPACITY: usize = 42;
/// - `is_leader`: whether the peer is the leader
/// - `status`: the region status, `ALIVE` or `DOWNGRADED`.
/// - `down_seconds`: the duration of being offline, in seconds.
///
#[derive(Debug)]
pub(super) struct InformationSchemaRegionPeers {
schema: SchemaRef,
catalog_name: String,

View File

@@ -63,7 +63,7 @@ const INIT_CAPACITY: usize = 42;
/// - `index_size`: The sst index files size in bytes.
/// - `engine`: The engine type.
/// - `region_role`: The region role.
///
#[derive(Debug)]
pub(super) struct InformationSchemaRegionStatistics {
schema: SchemaRef,
catalog_manager: Weak<dyn CatalogManager>,

View File

@@ -38,6 +38,7 @@ use store_api::storage::{ScanRequest, TableId};
use super::{InformationTable, RUNTIME_METRICS};
use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
#[derive(Debug)]
pub(super) struct InformationSchemaMetrics {
schema: SchemaRef,
}

View File

@@ -49,6 +49,7 @@ pub const SCHEMA_OPTS: &str = "options";
const INIT_CAPACITY: usize = 42;
/// The `information_schema.schemata` table implementation.
#[derive(Debug)]
pub(super) struct InformationSchemaSchemata {
schema: SchemaRef,
catalog_name: String,

View File

@@ -43,6 +43,7 @@ use crate::information_schema::Predicates;
use crate::CatalogManager;
/// The `TABLE_CONSTRAINTS` table describes which tables have constraints.
#[derive(Debug)]
pub(super) struct InformationSchemaTableConstraints {
schema: SchemaRef,
catalog_name: String,

View File

@@ -71,6 +71,7 @@ const TABLE_ID: &str = "table_id";
pub const ENGINE: &str = "engine";
const INIT_CAPACITY: usize = 42;
#[derive(Debug)]
pub(super) struct InformationSchemaTables {
schema: SchemaRef,
catalog_name: String,

View File

@@ -54,6 +54,7 @@ pub const CHARACTER_SET_CLIENT: &str = "character_set_client";
pub const COLLATION_CONNECTION: &str = "collation_connection";
/// The `information_schema.views` to provides information about views in databases.
#[derive(Debug)]
pub(super) struct InformationSchemaViews {
schema: SchemaRef,
catalog_name: String,

View File

@@ -33,6 +33,7 @@ use super::SystemTable;
use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
/// A memory table with specified schema and columns.
#[derive(Debug)]
pub(crate) struct MemoryTable {
pub(crate) table_id: TableId,
pub(crate) table_name: &'static str,

View File

@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt;
use std::sync::{Arc, Weak};
use arrow_schema::SchemaRef as ArrowSchemaRef;
@@ -100,6 +101,15 @@ impl PGClass {
}
}
impl fmt::Debug for PGClass {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("PGClass")
.field("schema", &self.schema)
.field("catalog_name", &self.catalog_name)
.finish()
}
}
impl SystemTable for PGClass {
fn table_id(&self) -> table::metadata::TableId {
PG_CATALOG_PG_CLASS_TABLE_ID

View File

@@ -55,6 +55,15 @@ pub(super) struct PGDatabase {
namespace_oid_map: PGNamespaceOidMapRef,
}
impl std::fmt::Debug for PGDatabase {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("PGDatabase")
.field("schema", &self.schema)
.field("catalog_name", &self.catalog_name)
.finish()
}
}
impl PGDatabase {
pub(super) fn new(
catalog_name: String,

View File

@@ -17,6 +17,7 @@
pub(super) mod oid_map;
use std::fmt;
use std::sync::{Arc, Weak};
use arrow_schema::SchemaRef as ArrowSchemaRef;
@@ -87,6 +88,15 @@ impl PGNamespace {
}
}
impl fmt::Debug for PGNamespace {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("PGNamespace")
.field("schema", &self.schema)
.field("catalog_name", &self.catalog_name)
.finish()
}
}
impl SystemTable for PGNamespace {
fn schema(&self) -> SchemaRef {
self.schema.clone()

View File

@@ -365,7 +365,7 @@ mod tests {
Projection: person.id AS a, person.name AS b
Filter: person.id > Int32(500)
TableScan: person"#,
format!("\n{:?}", source.get_logical_plan().unwrap())
format!("\n{}", source.get_logical_plan().unwrap())
);
}
}

View File

@@ -15,12 +15,12 @@
//! Dummy catalog for region server.
use std::any::Any;
use std::fmt;
use std::sync::Arc;
use async_trait::async_trait;
use common_catalog::format_full_table_name;
use datafusion::catalog::schema::SchemaProvider;
use datafusion::catalog::{CatalogProvider, CatalogProviderList};
use datafusion::catalog::{CatalogProvider, CatalogProviderList, SchemaProvider};
use datafusion::datasource::TableProvider;
use snafu::OptionExt;
use table::table::adapter::DfTableProviderAdapter;
@@ -41,6 +41,12 @@ impl DummyCatalogList {
}
}
impl fmt::Debug for DummyCatalogList {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("DummyCatalogList").finish()
}
}
impl CatalogProviderList for DummyCatalogList {
fn as_any(&self) -> &dyn Any {
self
@@ -91,6 +97,14 @@ impl CatalogProvider for DummyCatalogProvider {
}
}
impl fmt::Debug for DummyCatalogProvider {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("DummyCatalogProvider")
.field("catalog_name", &self.catalog_name)
.finish()
}
}
/// A dummy schema provider for [DummyCatalogList].
#[derive(Clone)]
struct DummySchemaProvider {
@@ -127,3 +141,12 @@ impl SchemaProvider for DummySchemaProvider {
true
}
}
impl fmt::Debug for DummySchemaProvider {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("DummySchemaProvider")
.field("catalog_name", &self.catalog_name)
.field("schema_name", &self.schema_name)
.finish()
}
}

View File

@@ -57,6 +57,7 @@ humantime.workspace = true
lazy_static.workspace = true
meta-client.workspace = true
meta-srv.workspace = true
metric-engine.workspace = true
mito2.workspace = true
moka.workspace = true
nu-ansi-term = "0.46"

View File

@@ -51,8 +51,7 @@ impl App for Instance {
}
async fn start(&mut self) -> Result<()> {
self.start().await.unwrap();
Ok(())
self.start().await
}
fn wait_signal(&self) -> bool {

View File

@@ -345,6 +345,13 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Failed to build wal options allocator"))]
BuildWalOptionsAllocator {
#[snafu(implicit)]
location: Location,
source: common_meta::error::Error,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -378,7 +385,8 @@ impl ErrorExt for Error {
Error::StartProcedureManager { source, .. }
| Error::StopProcedureManager { source, .. } => source.status_code(),
Error::StartWalOptionsAllocator { source, .. } => source.status_code(),
Error::BuildWalOptionsAllocator { source, .. }
| Error::StartWalOptionsAllocator { source, .. } => source.status_code(),
Error::ReplCreation { .. } | Error::Readline { .. } | Error::HttpQuerySql { .. } => {
StatusCode::Internal
}

View File

@@ -43,7 +43,7 @@ use common_meta::node_manager::NodeManagerRef;
use common_meta::peer::Peer;
use common_meta::region_keeper::MemoryRegionKeeper;
use common_meta::sequence::SequenceBuilder;
use common_meta::wal_options_allocator::{WalOptionsAllocator, WalOptionsAllocatorRef};
use common_meta::wal_options_allocator::{build_wal_options_allocator, WalOptionsAllocatorRef};
use common_procedure::{ProcedureInfo, ProcedureManagerRef};
use common_telemetry::info;
use common_telemetry::logging::{LoggingOptions, TracingOptions};
@@ -76,10 +76,10 @@ use tokio::sync::{broadcast, RwLock};
use tracing_appender::non_blocking::WorkerGuard;
use crate::error::{
BuildCacheRegistrySnafu, CreateDirSnafu, IllegalConfigSnafu, InitDdlManagerSnafu,
InitMetadataSnafu, InitTimezoneSnafu, LoadLayeredConfigSnafu, OtherSnafu, Result,
ShutdownDatanodeSnafu, ShutdownFlownodeSnafu, ShutdownFrontendSnafu, StartDatanodeSnafu,
StartFlownodeSnafu, StartFrontendSnafu, StartProcedureManagerSnafu,
BuildCacheRegistrySnafu, BuildWalOptionsAllocatorSnafu, CreateDirSnafu, IllegalConfigSnafu,
InitDdlManagerSnafu, InitMetadataSnafu, InitTimezoneSnafu, LoadLayeredConfigSnafu, OtherSnafu,
Result, ShutdownDatanodeSnafu, ShutdownFlownodeSnafu, ShutdownFrontendSnafu,
StartDatanodeSnafu, StartFlownodeSnafu, StartFrontendSnafu, StartProcedureManagerSnafu,
StartWalOptionsAllocatorSnafu, StopProcedureManagerSnafu,
};
use crate::options::{GlobalOptions, GreptimeOptions};
@@ -569,10 +569,11 @@ impl StartCommand {
.step(10)
.build(),
);
let wal_options_allocator = Arc::new(WalOptionsAllocator::new(
opts.wal.clone().into(),
kv_backend.clone(),
));
let kafka_options = opts.wal.clone().into();
let wal_options_allocator = build_wal_options_allocator(&kafka_options, kv_backend.clone())
.await
.context(BuildWalOptionsAllocatorSnafu)?;
let wal_options_allocator = Arc::new(wal_options_allocator);
let table_meta_allocator = Arc::new(TableMetadataAllocator::new(
table_id_sequence,
wal_options_allocator.clone(),

View File

@@ -25,14 +25,16 @@ use common_telemetry::logging::{LoggingOptions, SlowQueryOptions, DEFAULT_OTLP_E
use common_wal::config::raft_engine::RaftEngineConfig;
use common_wal::config::DatanodeWalConfig;
use datanode::config::{DatanodeOptions, RegionEngineConfig, StorageConfig};
use file_engine::config::EngineConfig;
use file_engine::config::EngineConfig as FileEngineConfig;
use frontend::frontend::FrontendOptions;
use meta_client::MetaClientOptions;
use meta_srv::metasrv::MetasrvOptions;
use meta_srv::selector::SelectorType;
use metric_engine::config::EngineConfig as MetricEngineConfig;
use mito2::config::MitoConfig;
use servers::export_metrics::ExportMetricsOption;
use servers::grpc::GrpcOptions;
use servers::http::HttpOptions;
#[allow(deprecated)]
#[test]
@@ -72,7 +74,10 @@ fn test_load_datanode_example_config() {
write_cache_ttl: Some(Duration::from_secs(60 * 60 * 8)),
..Default::default()
}),
RegionEngineConfig::File(EngineConfig {}),
RegionEngineConfig::File(FileEngineConfig {}),
RegionEngineConfig::Metric(MetricEngineConfig {
experimental_sparse_primary_key_encoding: false,
}),
],
logging: LoggingOptions {
level: Some("info".to_string()),
@@ -140,6 +145,10 @@ fn test_load_frontend_example_config() {
..Default::default()
},
grpc: GrpcOptions::default().with_hostname("127.0.0.1:4001"),
http: HttpOptions {
cors_allowed_origins: vec!["https://example.com".to_string()],
..Default::default()
},
..Default::default()
},
..Default::default()
@@ -210,7 +219,10 @@ fn test_load_standalone_example_config() {
write_cache_ttl: Some(Duration::from_secs(60 * 60 * 8)),
..Default::default()
}),
RegionEngineConfig::File(EngineConfig {}),
RegionEngineConfig::File(FileEngineConfig {}),
RegionEngineConfig::Metric(MetricEngineConfig {
experimental_sparse_primary_key_encoding: false,
}),
],
storage: StorageConfig {
data_home: "/tmp/greptimedb/".to_string(),
@@ -227,6 +239,10 @@ fn test_load_standalone_example_config() {
remote_write: Some(Default::default()),
..Default::default()
},
http: HttpOptions {
cors_allowed_origins: vec!["https://example.com".to_string()],
..Default::default()
},
..Default::default()
},
..Default::default()

View File

@@ -223,7 +223,6 @@ impl FileReader {
}
}
#[cfg(any(test, feature = "testing"))]
impl SizeAwareRangeReader for FileReader {
fn with_file_size_hint(&mut self, _file_size_hint: u64) {
// do nothing

View File

@@ -31,7 +31,7 @@ derive_builder.workspace = true
futures.workspace = true
lazy_static.workspace = true
object-store.workspace = true
orc-rust = { git = "https://github.com/datafusion-contrib/datafusion-orc.git", rev = "502217315726314c4008808fe169764529640599", default-features = false, features = [
orc-rust = { version = "0.5", default-features = false, features = [
"async",
] }
parquet.workspace = true

View File

@@ -126,8 +126,7 @@ impl ArrowDecoder for arrow::csv::reader::Decoder {
}
}
#[allow(deprecated)]
impl ArrowDecoder for arrow::json::RawDecoder {
impl ArrowDecoder for arrow::json::reader::Decoder {
fn decode(&mut self, buf: &[u8]) -> result::Result<usize, ArrowError> {
self.decode(buf)
}

View File

@@ -17,8 +17,7 @@ use std::str::FromStr;
use std::sync::Arc;
use arrow::csv;
#[allow(deprecated)]
use arrow::csv::reader::infer_reader_schema as infer_csv_schema;
use arrow::csv::reader::Format;
use arrow::record_batch::RecordBatch;
use arrow_schema::{Schema, SchemaRef};
use async_trait::async_trait;
@@ -161,7 +160,6 @@ impl FileOpener for CsvOpener {
}
}
#[allow(deprecated)]
#[async_trait]
impl FileFormat for CsvFormat {
async fn infer_schema(&self, store: &ObjectStore, path: &str) -> Result<Schema> {
@@ -188,9 +186,12 @@ impl FileFormat for CsvFormat {
common_runtime::spawn_blocking_global(move || {
let reader = SyncIoBridge::new(decoded);
let (schema, _records_read) =
infer_csv_schema(reader, delimiter, schema_infer_max_record, has_header)
.context(error::InferSchemaSnafu)?;
let format = Format::default()
.with_delimiter(delimiter)
.with_header(has_header);
let (schema, _records_read) = format
.infer_schema(reader, schema_infer_max_record)
.context(error::InferSchemaSnafu)?;
Ok(schema)
})
.await
@@ -253,7 +254,7 @@ mod tests {
"c7: Int64: NULL",
"c8: Int64: NULL",
"c9: Int64: NULL",
"c10: Int64: NULL",
"c10: Utf8: NULL",
"c11: Float64: NULL",
"c12: Float64: NULL",
"c13: Utf8: NULL"

View File

@@ -20,8 +20,7 @@ use std::sync::Arc;
use arrow::datatypes::SchemaRef;
use arrow::json::reader::{infer_json_schema_from_iterator, ValueIter};
use arrow::json::writer::LineDelimited;
#[allow(deprecated)]
use arrow::json::{self, RawReaderBuilder};
use arrow::json::{self, ReaderBuilder};
use arrow::record_batch::RecordBatch;
use arrow_schema::Schema;
use async_trait::async_trait;
@@ -140,7 +139,6 @@ impl JsonOpener {
}
}
#[allow(deprecated)]
impl FileOpener for JsonOpener {
fn open(&self, meta: FileMeta) -> DataFusionResult<FileOpenFuture> {
open_with_decoder(
@@ -148,7 +146,7 @@ impl FileOpener for JsonOpener {
meta.location().to_string(),
self.compression_type,
|| {
RawReaderBuilder::new(self.projected_schema.clone())
ReaderBuilder::new(self.projected_schema.clone())
.with_batch_size(self.batch_size)
.build_decoder()
.map_err(DataFusionError::from)

View File

@@ -42,7 +42,7 @@ struct Test<'a, T: FileOpener> {
expected: Vec<&'a str>,
}
impl<'a, T: FileOpener> Test<'a, T> {
impl<T: FileOpener> Test<'_, T> {
pub async fn run(self) {
let result = FileStream::new(
&self.config,

View File

@@ -164,7 +164,7 @@ impl FromStr for Decimal128 {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let len = s.as_bytes().len();
let len = s.len();
if len <= BYTES_TO_OVERFLOW_RUST_DECIMAL {
let rd = RustDecimal::from_str_exact(s).context(ParseRustDecimalStrSnafu { raw: s })?;
Ok(Self::from(rd))

View File

@@ -25,7 +25,6 @@ use crate::scalars::expression::ExpressionFunction;
use crate::scalars::json::JsonFunction;
use crate::scalars::matches::MatchesFunction;
use crate::scalars::math::MathFunction;
use crate::scalars::numpy::NumpyFunction;
use crate::scalars::timestamp::TimestampFunction;
use crate::scalars::vector::VectorFunction;
use crate::system::SystemFunction;
@@ -103,7 +102,6 @@ pub static FUNCTION_REGISTRY: Lazy<Arc<FunctionRegistry>> = Lazy::new(|| {
// Utility functions
MathFunction::register(&function_registry);
NumpyFunction::register(&function_registry);
TimestampFunction::register(&function_registry);
DateFunction::register(&function_registry);
ExpressionFunction::register(&function_registry);

View File

@@ -20,7 +20,6 @@ pub mod geo;
pub mod json;
pub mod matches;
pub mod math;
pub mod numpy;
pub mod vector;
#[cfg(test)]

View File

@@ -91,6 +91,7 @@ mod tests {
use std::sync::Arc;
use common_query::prelude::{TypeSignature, Volatility};
use datatypes::arrow::datatypes::IntervalDayTime;
use datatypes::prelude::ConcreteDataType;
use datatypes::value::Value;
use datatypes::vectors::{
@@ -134,7 +135,12 @@ mod tests {
let times = vec![Some(123), None, Some(42), None];
// Intervals in milliseconds
let intervals = vec![1000, 2000, 3000, 1000];
let intervals = vec![
IntervalDayTime::new(0, 1000),
IntervalDayTime::new(0, 2000),
IntervalDayTime::new(0, 3000),
IntervalDayTime::new(0, 1000),
];
let results = [Some(124), None, Some(45), None];
let time_vector = TimestampSecondVector::from(times.clone());

View File

@@ -91,6 +91,7 @@ mod tests {
use std::sync::Arc;
use common_query::prelude::{TypeSignature, Volatility};
use datatypes::arrow::datatypes::IntervalDayTime;
use datatypes::prelude::ConcreteDataType;
use datatypes::value::Value;
use datatypes::vectors::{
@@ -139,7 +140,12 @@ mod tests {
let times = vec![Some(123), None, Some(42), None];
// Intervals in milliseconds
let intervals = vec![1000, 2000, 3000, 1000];
let intervals = vec![
IntervalDayTime::new(0, 1000),
IntervalDayTime::new(0, 2000),
IntervalDayTime::new(0, 3000),
IntervalDayTime::new(0, 1000),
];
let results = [Some(122), None, Some(39), None];
let time_vector = TimestampSecondVector::from(times.clone());

View File

@@ -21,10 +21,9 @@ use common_query::error::{
};
use datafusion::common::tree_node::{Transformed, TreeNode, TreeNodeIterator, TreeNodeRecursion};
use datafusion::common::{DFSchema, Result as DfResult};
use datafusion::execution::context::SessionState;
use datafusion::execution::SessionStateBuilder;
use datafusion::logical_expr::{self, Expr, Volatility};
use datafusion::physical_planner::{DefaultPhysicalPlanner, PhysicalPlanner};
use datafusion::prelude::SessionConfig;
use datatypes::arrow::array::RecordBatch;
use datatypes::arrow::datatypes::{DataType, Field};
use datatypes::prelude::VectorRef;
@@ -104,8 +103,7 @@ impl MatchesFunction {
let like_expr = ast.into_like_expr(col_name);
let input_schema = Self::input_schema();
let session_state =
SessionState::new_with_config_rt(SessionConfig::default(), Arc::default());
let session_state = SessionStateBuilder::new().with_default_features().build();
let planner = DefaultPhysicalPlanner::default();
let physical_expr = planner
.create_physical_expr(&like_expr, &input_schema, &session_state)
@@ -131,7 +129,7 @@ impl MatchesFunction {
}
fn input_schema() -> DFSchema {
DFSchema::from_unqualifed_fields(
DFSchema::from_unqualified_fields(
[Arc::new(Field::new("data", DataType::Utf8, true))].into(),
HashMap::new(),
)
@@ -725,7 +723,8 @@ struct Tokenizer {
impl Tokenizer {
pub fn tokenize(mut self, pattern: &str) -> Result<Vec<Token>> {
let mut tokens = vec![];
while self.cursor < pattern.len() {
let char_len = pattern.chars().count();
while self.cursor < char_len {
// TODO: collect pattern into Vec<char> if this tokenizer is bottleneck in the future
let c = pattern.chars().nth(self.cursor).unwrap();
match c {
@@ -794,7 +793,8 @@ impl Tokenizer {
let mut phase = String::new();
let mut is_quote_present = false;
while self.cursor < pattern.len() {
let char_len = pattern.chars().count();
while self.cursor < char_len {
let mut c = pattern.chars().nth(self.cursor).unwrap();
match c {
@@ -899,6 +899,26 @@ mod test {
Phase("c".to_string()),
],
),
(
r#"中文 测试"#,
vec![Phase("中文".to_string()), Phase("测试".to_string())],
),
(
r#"中文 AND 测试"#,
vec![Phase("中文".to_string()), And, Phase("测试".to_string())],
),
(
r#"中文 +测试"#,
vec![Phase("中文".to_string()), Must, Phase("测试".to_string())],
),
(
r#"中文 -测试"#,
vec![
Phase("中文".to_string()),
Negative,
Phase("测试".to_string()),
],
),
];
for (query, expected) in cases {
@@ -1030,6 +1050,61 @@ mod test {
],
},
),
(
r#"中文 测试"#,
PatternAst::Binary {
op: BinaryOp::Or,
children: vec![
PatternAst::Literal {
op: UnaryOp::Optional,
pattern: "中文".to_string(),
},
PatternAst::Literal {
op: UnaryOp::Optional,
pattern: "测试".to_string(),
},
],
},
),
(
r#"中文 AND 测试"#,
PatternAst::Binary {
op: BinaryOp::And,
children: vec![
PatternAst::Literal {
op: UnaryOp::Optional,
pattern: "中文".to_string(),
},
PatternAst::Literal {
op: UnaryOp::Optional,
pattern: "测试".to_string(),
},
],
},
),
(
r#"中文 +测试"#,
PatternAst::Literal {
op: UnaryOp::Must,
pattern: "测试".to_string(),
},
),
(
r#"中文 -测试"#,
PatternAst::Binary {
op: BinaryOp::And,
children: vec![
PatternAst::Literal {
op: UnaryOp::Negative,
pattern: "测试".to_string(),
},
PatternAst::Literal {
op: UnaryOp::Optional,
pattern: "中文".to_string(),
},
],
},
),
];
for (query, expected) in cases {

View File

@@ -1,30 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
mod clip;
mod interp;
use std::sync::Arc;
use clip::ClipFunction;
use crate::function_registry::FunctionRegistry;
pub(crate) struct NumpyFunction;
impl NumpyFunction {
pub fn register(registry: &FunctionRegistry) {
registry.register(Arc::new(ClipFunction));
}
}

View File

@@ -1,298 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt;
use std::sync::Arc;
use common_query::error::Result;
use common_query::prelude::{Signature, Volatility};
use datatypes::arrow::compute;
use datatypes::arrow::datatypes::ArrowPrimitiveType;
use datatypes::data_type::ConcreteDataType;
use datatypes::prelude::*;
use datatypes::vectors::PrimitiveVector;
use paste::paste;
use crate::function::{Function, FunctionContext};
use crate::scalars::expression::{scalar_binary_op, EvalContext};
/// numpy.clip function, <https://numpy.org/doc/stable/reference/generated/numpy.clip.html>
#[derive(Clone, Debug, Default)]
pub struct ClipFunction;
macro_rules! define_eval {
($O: ident) => {
paste! {
fn [<eval_ $O>](columns: &[VectorRef]) -> Result<VectorRef> {
fn cast_vector(input: &VectorRef) -> VectorRef {
Arc::new(PrimitiveVector::<<$O as WrapperType>::LogicalType>::try_from_arrow_array(
compute::cast(&input.to_arrow_array(), &<<<$O as WrapperType>::LogicalType as LogicalPrimitiveType>::ArrowPrimitive as ArrowPrimitiveType>::DATA_TYPE).unwrap()
).unwrap()) as _
}
let operator_1 = cast_vector(&columns[0]);
let operator_2 = cast_vector(&columns[1]);
let operator_3 = cast_vector(&columns[2]);
// clip(a, min, max) is equals to min(max(a, min), max)
let col: VectorRef = Arc::new(scalar_binary_op::<$O, $O, $O, _>(
&operator_1,
&operator_2,
scalar_max,
&mut EvalContext::default(),
)?);
let col = scalar_binary_op::<$O, $O, $O, _>(
&col,
&operator_3,
scalar_min,
&mut EvalContext::default(),
)?;
Ok(Arc::new(col))
}
}
};
}
define_eval!(i64);
define_eval!(u64);
define_eval!(f64);
impl Function for ClipFunction {
fn name(&self) -> &str {
"clip"
}
fn return_type(&self, input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
if input_types.iter().all(ConcreteDataType::is_signed) {
Ok(ConcreteDataType::int64_datatype())
} else if input_types.iter().all(ConcreteDataType::is_unsigned) {
Ok(ConcreteDataType::uint64_datatype())
} else {
Ok(ConcreteDataType::float64_datatype())
}
}
fn signature(&self) -> Signature {
Signature::uniform(3, ConcreteDataType::numerics(), Volatility::Immutable)
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
if columns.iter().all(|v| v.data_type().is_signed()) {
eval_i64(columns)
} else if columns.iter().all(|v| v.data_type().is_unsigned()) {
eval_u64(columns)
} else {
eval_f64(columns)
}
}
}
#[inline]
pub fn min<T: PartialOrd>(input: T, min: T) -> T {
if input < min {
input
} else {
min
}
}
#[inline]
pub fn max<T: PartialOrd>(input: T, max: T) -> T {
if input > max {
input
} else {
max
}
}
#[inline]
fn scalar_min<O>(left: Option<O>, right: Option<O>, _ctx: &mut EvalContext) -> Option<O>
where
O: Scalar + Copy + PartialOrd,
{
match (left, right) {
(Some(left), Some(right)) => Some(min(left, right)),
_ => None,
}
}
#[inline]
fn scalar_max<O>(left: Option<O>, right: Option<O>, _ctx: &mut EvalContext) -> Option<O>
where
O: Scalar + Copy + PartialOrd,
{
match (left, right) {
(Some(left), Some(right)) => Some(max(left, right)),
_ => None,
}
}
impl fmt::Display for ClipFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "CLIP")
}
}
#[cfg(test)]
mod tests {
use common_query::prelude::TypeSignature;
use datatypes::value::Value;
use datatypes::vectors::{
ConstantVector, Float32Vector, Int16Vector, Int32Vector, Int8Vector, UInt16Vector,
UInt32Vector, UInt8Vector,
};
use super::*;
#[test]
fn test_clip_signature() {
let clip = ClipFunction;
assert_eq!("clip", clip.name());
assert_eq!(
ConcreteDataType::int64_datatype(),
clip.return_type(&[]).unwrap()
);
assert_eq!(
ConcreteDataType::int64_datatype(),
clip.return_type(&[
ConcreteDataType::int16_datatype(),
ConcreteDataType::int64_datatype(),
ConcreteDataType::int8_datatype()
])
.unwrap()
);
assert_eq!(
ConcreteDataType::uint64_datatype(),
clip.return_type(&[
ConcreteDataType::uint16_datatype(),
ConcreteDataType::uint64_datatype(),
ConcreteDataType::uint8_datatype()
])
.unwrap()
);
assert_eq!(
ConcreteDataType::float64_datatype(),
clip.return_type(&[
ConcreteDataType::uint16_datatype(),
ConcreteDataType::int64_datatype(),
ConcreteDataType::uint8_datatype()
])
.unwrap()
);
assert!(matches!(clip.signature(),
Signature {
type_signature: TypeSignature::Uniform(3, valid_types),
volatility: Volatility::Immutable
} if valid_types == ConcreteDataType::numerics()
));
}
#[test]
fn test_clip_fn_signed() {
// eval with signed integers
let args: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from_values(0..10)),
Arc::new(ConstantVector::new(
Arc::new(Int8Vector::from_vec(vec![3])),
10,
)),
Arc::new(ConstantVector::new(
Arc::new(Int16Vector::from_vec(vec![6])),
10,
)),
];
let vector = ClipFunction
.eval(FunctionContext::default(), &args)
.unwrap();
assert_eq!(10, vector.len());
// clip([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 3, 6) = [3, 3, 3, 3, 4, 5, 6, 6, 6, 6]
for i in 0..10 {
if i <= 3 {
assert!(matches!(vector.get(i), Value::Int64(v) if v == 3));
} else if i <= 6 {
assert!(matches!(vector.get(i), Value::Int64(v) if v == (i as i64)));
} else {
assert!(matches!(vector.get(i), Value::Int64(v) if v == 6));
}
}
}
#[test]
fn test_clip_fn_unsigned() {
// eval with unsigned integers
let args: Vec<VectorRef> = vec![
Arc::new(UInt8Vector::from_values(0..10)),
Arc::new(ConstantVector::new(
Arc::new(UInt32Vector::from_vec(vec![3])),
10,
)),
Arc::new(ConstantVector::new(
Arc::new(UInt16Vector::from_vec(vec![6])),
10,
)),
];
let vector = ClipFunction
.eval(FunctionContext::default(), &args)
.unwrap();
assert_eq!(10, vector.len());
// clip([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 3, 6) = [3, 3, 3, 3, 4, 5, 6, 6, 6, 6]
for i in 0..10 {
if i <= 3 {
assert!(matches!(vector.get(i), Value::UInt64(v) if v == 3));
} else if i <= 6 {
assert!(matches!(vector.get(i), Value::UInt64(v) if v == (i as u64)));
} else {
assert!(matches!(vector.get(i), Value::UInt64(v) if v == 6));
}
}
}
#[test]
fn test_clip_fn_float() {
// eval with floats
let args: Vec<VectorRef> = vec![
Arc::new(Int8Vector::from_values(0..10)),
Arc::new(ConstantVector::new(
Arc::new(UInt32Vector::from_vec(vec![3])),
10,
)),
Arc::new(ConstantVector::new(
Arc::new(Float32Vector::from_vec(vec![6f32])),
10,
)),
];
let vector = ClipFunction
.eval(FunctionContext::default(), &args)
.unwrap();
assert_eq!(10, vector.len());
// clip([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 3, 6) = [3, 3, 3, 3, 4, 5, 6, 6, 6, 6]
for i in 0..10 {
if i <= 3 {
assert!(matches!(vector.get(i), Value::Float64(v) if v == 3.0));
} else if i <= 6 {
assert!(matches!(vector.get(i), Value::Float64(v) if v == (i as f64)));
} else {
assert!(matches!(vector.get(i), Value::Float64(v) if v == 6.0));
}
}
}
}

View File

@@ -1,360 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use common_query::error::{self, Result};
use datatypes::arrow::compute::cast;
use datatypes::arrow::datatypes::DataType as ArrowDataType;
use datatypes::data_type::DataType;
use datatypes::prelude::ScalarVector;
use datatypes::value::Value;
use datatypes::vectors::{Float64Vector, Vector, VectorRef};
use datatypes::with_match_primitive_type_id;
use snafu::{ensure, ResultExt};
/// search the biggest number that smaller than x in xp
fn linear_search_ascending_vector(x: Value, xp: &Float64Vector) -> usize {
for i in 0..xp.len() {
if x < xp.get(i) {
return i - 1;
}
}
xp.len() - 1
}
/// search the biggest number that smaller than x in xp
fn binary_search_ascending_vector(key: Value, xp: &Float64Vector) -> usize {
let mut left = 0;
let mut right = xp.len();
/* If len <= 4 use linear search. */
if xp.len() <= 4 {
return linear_search_ascending_vector(key, xp);
}
/* find index by bisection */
while left < right {
let mid = left + ((right - left) >> 1);
if key >= xp.get(mid) {
left = mid + 1;
} else {
right = mid;
}
}
left - 1
}
fn concrete_type_to_primitive_vector(arg: &VectorRef) -> Result<Float64Vector> {
with_match_primitive_type_id!(arg.data_type().logical_type_id(), |$S| {
let tmp = arg.to_arrow_array();
let array = cast(&tmp, &ArrowDataType::Float64).context(error::TypeCastSnafu {
typ: ArrowDataType::Float64,
})?;
// Safety: array has been cast to Float64Array.
Ok(Float64Vector::try_from_arrow_array(array).unwrap())
},{
unreachable!()
})
}
/// One-dimensional linear interpolation for monotonically increasing sample points. Refers to
/// <https://github.com/numpy/numpy/blob/b101756ac02e390d605b2febcded30a1da50cc2c/numpy/core/src/multiarray/compiled_base.c#L491>
#[allow(unused)]
pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
let mut left = None;
let mut right = None;
ensure!(
args.len() >= 3,
error::InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not enough, expect at least: {}, have: {}",
3,
args.len()
),
}
);
let x = concrete_type_to_primitive_vector(&args[0])?;
let xp = concrete_type_to_primitive_vector(&args[1])?;
let fp = concrete_type_to_primitive_vector(&args[2])?;
// make sure the args.len() is 3 or 5
if args.len() > 3 {
ensure!(
args.len() == 5,
error::InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not enough, expect at least: {}, have: {}",
5,
args.len()
),
}
);
left = concrete_type_to_primitive_vector(&args[3])
.unwrap()
.get_data(0);
right = concrete_type_to_primitive_vector(&args[4])
.unwrap()
.get_data(0);
}
ensure!(
x.len() != 0,
error::InvalidFuncArgsSnafu {
err_msg: "The sample x is empty",
}
);
ensure!(
xp.len() != 0,
error::InvalidFuncArgsSnafu {
err_msg: "The sample xp is empty",
}
);
ensure!(
fp.len() != 0,
error::InvalidFuncArgsSnafu {
err_msg: "The sample fp is empty",
}
);
ensure!(
xp.len() == fp.len(),
error::InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the len1: {} don't match the length of the len2: {}",
xp.len(),
fp.len()
),
}
);
/* Get left and right fill values. */
let left = match left {
Some(left) => Some(left),
_ => fp.get_data(0),
};
let right = match right {
Some(right) => Some(right),
_ => fp.get_data(fp.len() - 1),
};
let res;
if xp.len() == 1 {
let data = x
.iter_data()
.map(|x| {
if Value::from(x) < xp.get(0) {
left
} else if Value::from(x) > xp.get(xp.len() - 1) {
right
} else {
fp.get_data(0)
}
})
.collect::<Vec<_>>();
res = Float64Vector::from(data);
} else {
let mut j = 0;
/* only pre-calculate slopes if there are relatively few of them. */
let mut slopes: Option<Vec<_>> = None;
if x.len() >= xp.len() {
let mut slopes_tmp = Vec::with_capacity(xp.len() - 1);
for i in 0..xp.len() - 1 {
let slope = match (
fp.get_data(i + 1),
fp.get_data(i),
xp.get_data(i + 1),
xp.get_data(i),
) {
(Some(fp1), Some(fp2), Some(xp1), Some(xp2)) => {
if xp1 == xp2 {
None
} else {
Some((fp1 - fp2) / (xp1 - xp2))
}
}
_ => None,
};
slopes_tmp.push(slope);
}
slopes = Some(slopes_tmp);
}
let data = x
.iter_data()
.map(|x| match x {
Some(xi) => {
if Value::from(xi) > xp.get(xp.len() - 1) {
right
} else if Value::from(xi) < xp.get(0) {
left
} else {
j = binary_search_ascending_vector(Value::from(xi), &xp);
if j == xp.len() - 1 || xp.get(j) == Value::from(xi) {
fp.get_data(j)
} else {
let slope = match &slopes {
Some(slopes) => slopes[j],
_ => match (
fp.get_data(j + 1),
fp.get_data(j),
xp.get_data(j + 1),
xp.get_data(j),
) {
(Some(fp1), Some(fp2), Some(xp1), Some(xp2)) => {
if xp1 == xp2 {
None
} else {
Some((fp1 - fp2) / (xp1 - xp2))
}
}
_ => None,
},
};
/* If we get nan in one direction, try the other */
let ans = match (slope, xp.get_data(j), fp.get_data(j)) {
(Some(slope), Some(xp), Some(fp)) => Some(slope * (xi - xp) + fp),
_ => None,
};
let ans = match ans {
Some(ans) => Some(ans),
_ => match (slope, xp.get_data(j + 1), fp.get_data(j + 1)) {
(Some(slope), Some(xp), Some(fp)) => {
Some(slope * (xi - xp) + fp)
}
_ => None,
},
};
let ans = match ans {
Some(ans) => Some(ans),
_ => {
if fp.get_data(j) == fp.get_data(j + 1) {
fp.get_data(j)
} else {
None
}
}
};
ans
}
}
}
_ => None,
})
.collect::<Vec<_>>();
res = Float64Vector::from(data);
}
Ok(Arc::new(res) as _)
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use datatypes::vectors::{Int32Vector, Int64Vector};
use super::*;
#[test]
fn test_basic_interp() {
// x xp fp
let x = 2.5;
let xp = vec![1i32, 2i32, 3i32];
let fp = vec![3i64, 2i64, 0i64];
let args: Vec<VectorRef> = vec![
Arc::new(Float64Vector::from_vec(vec![x])),
Arc::new(Int32Vector::from_vec(xp.clone())),
Arc::new(Int64Vector::from_vec(fp.clone())),
];
let vector = interp(&args).unwrap();
assert_eq!(vector.len(), 1);
assert!(matches!(vector.get(0), Value::Float64(v) if v==1.0));
let x = vec![0.0, 1.0, 1.5, 3.2];
let args: Vec<VectorRef> = vec![
Arc::new(Float64Vector::from_vec(x)),
Arc::new(Int32Vector::from_vec(xp)),
Arc::new(Int64Vector::from_vec(fp)),
];
let vector = interp(&args).unwrap();
assert_eq!(4, vector.len());
let res = [3.0, 3.0, 2.5, 0.0];
for (i, item) in res.iter().enumerate().take(vector.len()) {
assert!(matches!(vector.get(i),Value::Float64(v) if v==*item));
}
}
#[test]
fn test_left_right() {
let x = vec![0.0, 1.0, 1.5, 2.0, 3.0, 4.0];
let xp = vec![1i32, 2i32, 3i32];
let fp = vec![3i64, 2i64, 0i64];
let left = vec![-1];
let right = vec![2];
let expect = [-1.0, 3.0, 2.5, 2.0, 0.0, 2.0];
let args: Vec<VectorRef> = vec![
Arc::new(Float64Vector::from_vec(x)),
Arc::new(Int32Vector::from_vec(xp)),
Arc::new(Int64Vector::from_vec(fp)),
Arc::new(Int32Vector::from_vec(left)),
Arc::new(Int32Vector::from_vec(right)),
];
let vector = interp(&args).unwrap();
for (i, item) in expect.iter().enumerate().take(vector.len()) {
assert!(matches!(vector.get(i),Value::Float64(v) if v==*item));
}
}
#[test]
fn test_scalar_interpolation_point() {
// x=0 output:0
let x = vec![0];
let xp = vec![0, 1, 5];
let fp = vec![0, 1, 5];
let args: Vec<VectorRef> = vec![
Arc::new(Int64Vector::from_vec(x.clone())),
Arc::new(Int64Vector::from_vec(xp.clone())),
Arc::new(Int64Vector::from_vec(fp.clone())),
];
let vector = interp(&args).unwrap();
assert!(matches!(vector.get(0), Value::Float64(v) if v==x[0] as f64));
// x=0.3 output:0.3
let x = vec![0.3];
let args: Vec<VectorRef> = vec![
Arc::new(Float64Vector::from_vec(x.clone())),
Arc::new(Int64Vector::from_vec(xp.clone())),
Arc::new(Int64Vector::from_vec(fp.clone())),
];
let vector = interp(&args).unwrap();
assert!(matches!(vector.get(0), Value::Float64(v) if v == x[0]));
// x=None output:Null
let input = vec![None, Some(0.0), Some(0.3)];
let x = Float64Vector::from(input);
let args: Vec<VectorRef> = vec![
Arc::new(x),
Arc::new(Int64Vector::from_vec(xp)),
Arc::new(Int64Vector::from_vec(fp)),
];
let vector = interp(&args).unwrap();
assert!(matches!(vector.get(0), Value::Null));
}
}

View File

@@ -42,7 +42,7 @@ impl Function for BuildFunction {
}
fn signature(&self) -> Signature {
Signature::uniform(0, vec![], Volatility::Immutable)
Signature::nullary(Volatility::Immutable)
}
fn eval(&self, _func_ctx: FunctionContext, _columns: &[VectorRef]) -> Result<VectorRef> {
@@ -56,8 +56,6 @@ impl Function for BuildFunction {
mod tests {
use std::sync::Arc;
use common_query::prelude::TypeSignature;
use super::*;
#[test]
fn test_build_function() {
@@ -67,12 +65,7 @@ mod tests {
ConcreteDataType::string_datatype(),
build.return_type(&[]).unwrap()
);
assert!(matches!(build.signature(),
Signature {
type_signature: TypeSignature::Uniform(0, valid_types),
volatility: Volatility::Immutable
} if valid_types.is_empty()
));
assert_eq!(build.signature(), Signature::nullary(Volatility::Immutable));
let build_info = common_version::build_info().to_string();
let vector = build.eval(FunctionContext::default(), &[]).unwrap();
let expect: VectorRef = Arc::new(StringVector::from(vec![build_info]));

View File

@@ -44,7 +44,7 @@ impl Function for DatabaseFunction {
}
fn signature(&self) -> Signature {
Signature::uniform(0, vec![], Volatility::Immutable)
Signature::nullary(Volatility::Immutable)
}
fn eval(&self, func_ctx: FunctionContext, _columns: &[VectorRef]) -> Result<VectorRef> {
@@ -116,7 +116,6 @@ impl fmt::Display for SessionUserFunction {
mod tests {
use std::sync::Arc;
use common_query::prelude::TypeSignature;
use session::context::QueryContextBuilder;
use super::*;
@@ -128,12 +127,7 @@ mod tests {
ConcreteDataType::string_datatype(),
build.return_type(&[]).unwrap()
);
assert!(matches!(build.signature(),
Signature {
type_signature: TypeSignature::Uniform(0, valid_types),
volatility: Volatility::Immutable
} if valid_types == vec![]
));
assert_eq!(build.signature(), Signature::nullary(Volatility::Immutable));
let query_ctx = QueryContextBuilder::default()
.current_schema("test_db".to_string())

View File

@@ -38,7 +38,7 @@ impl Function for TimezoneFunction {
}
fn signature(&self) -> Signature {
Signature::uniform(0, vec![], Volatility::Immutable)
Signature::nullary(Volatility::Immutable)
}
fn eval(&self, func_ctx: FunctionContext, _columns: &[VectorRef]) -> Result<VectorRef> {
@@ -58,7 +58,6 @@ impl fmt::Display for TimezoneFunction {
mod tests {
use std::sync::Arc;
use common_query::prelude::TypeSignature;
use session::context::QueryContextBuilder;
use super::*;
@@ -70,12 +69,7 @@ mod tests {
ConcreteDataType::string_datatype(),
build.return_type(&[]).unwrap()
);
assert!(matches!(build.signature(),
Signature {
type_signature: TypeSignature::Uniform(0, valid_types),
volatility: Volatility::Immutable
} if valid_types == vec![]
));
assert_eq!(build.signature(), Signature::nullary(Volatility::Immutable));
let query_ctx = QueryContextBuilder::default().build().into();

View File

@@ -42,7 +42,7 @@ impl Function for VersionFunction {
}
fn signature(&self) -> Signature {
Signature::exact(vec![], Volatility::Immutable)
Signature::nullary(Volatility::Immutable)
}
fn eval(&self, func_ctx: FunctionContext, _columns: &[VectorRef]) -> Result<VectorRef> {

View File

@@ -107,7 +107,7 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
#[snafu(source)]
error: prost::DecodeError,
error: prost::UnknownEnumValue,
},
#[snafu(display(
@@ -137,7 +137,7 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
#[snafu(source)]
error: prost::DecodeError,
error: prost::UnknownEnumValue,
},
#[snafu(display("Missing alter index options"))]

View File

@@ -255,7 +255,7 @@ mod tests {
ConcreteDataType::from(
ColumnDataTypeWrapper::try_new(
decimal_column.data_type,
decimal_column.datatype_extension.clone(),
decimal_column.datatype_extension,
)
.unwrap()
)
@@ -351,7 +351,6 @@ mod tests {
.as_ref()
.unwrap()
.datatype_extension
.clone()
)
.unwrap()
)

View File

@@ -166,7 +166,7 @@ pub fn build_create_table_expr(
default_constraint: vec![],
semantic_type,
comment: String::new(),
datatype_extension: datatype_extension.clone(),
datatype_extension: *datatype_extension,
options: options.clone(),
});
}
@@ -208,7 +208,7 @@ pub fn extract_new_columns(
default_constraint: vec![],
semantic_type: expr.semantic_type,
comment: String::new(),
datatype_extension: expr.datatype_extension.clone(),
datatype_extension: *expr.datatype_extension,
options: expr.options.clone(),
});
AddColumn {

View File

@@ -19,7 +19,8 @@ common-telemetry.workspace = true
common-time.workspace = true
dashmap.workspace = true
datatypes.workspace = true
flatbuffers = "23.1"
flatbuffers = "24"
hyper.workspace = true
lazy_static.workspace = true
prost.workspace = true
snafu.workspace = true
@@ -29,6 +30,7 @@ tower.workspace = true
[dev-dependencies]
criterion = "0.4"
hyper-util = { workspace = true, features = ["tokio"] }
rand.workspace = true
[[bench]]

View File

@@ -25,7 +25,7 @@ use snafu::{OptionExt, ResultExt};
use tonic::transport::{
Certificate, Channel as InnerChannel, ClientTlsConfig, Endpoint, Identity, Uri,
};
use tower::make::MakeConnection;
use tower::Service;
use crate::error::{CreateChannelSnafu, InvalidConfigFilePathSnafu, InvalidTlsConfigSnafu, Result};
@@ -137,8 +137,8 @@ impl ChannelManager {
connector: C,
) -> Result<InnerChannel>
where
C: MakeConnection<Uri> + Send + 'static,
C::Connection: Unpin + Send + 'static,
C: Service<Uri> + Send + 'static,
C::Response: hyper::rt::Read + hyper::rt::Write + Send + Unpin,
C::Future: Send + 'static,
Box<dyn std::error::Error + Send + Sync>: From<C::Error> + Send + 'static,
{
@@ -607,7 +607,7 @@ mod tests {
});
let (client, _) = tokio::io::duplex(1024);
let mut client = Some(client);
let mut client = Some(hyper_util::rt::TokioIo::new(client));
let res = mgr.reset_with_connector(
addr,
service_fn(move |_| {

View File

@@ -205,7 +205,7 @@ pub fn values(arrays: &[VectorRef]) -> Result<Values> {
ConcreteDataType::Interval(IntervalType::DayTime(_)),
IntervalDayTimeVector,
interval_day_time_values,
|x| { x.into_native() }
|x| { x.to_i64() }
),
(
ConcreteDataType::Interval(IntervalType::MonthDayNano(_)),
@@ -232,6 +232,8 @@ pub fn values(arrays: &[VectorRef]) -> Result<Values> {
mod tests {
use std::sync::Arc;
use datatypes::arrow::datatypes::{IntervalDayTime, IntervalMonthDayNano};
use super::*;
#[test]
@@ -266,7 +268,12 @@ mod tests {
#[test]
fn test_convert_arrow_array_interval_day_time() {
let array = IntervalDayTimeVector::from(vec![Some(1), Some(2), None, Some(3)]);
let array = IntervalDayTimeVector::from(vec![
Some(IntervalDayTime::new(0, 1)),
Some(IntervalDayTime::new(0, 2)),
None,
Some(IntervalDayTime::new(0, 3)),
]);
let array: VectorRef = Arc::new(array);
let values = values(&[array]).unwrap();
@@ -276,7 +283,12 @@ mod tests {
#[test]
fn test_convert_arrow_array_interval_month_day_nano() {
let array = IntervalMonthDayNanoVector::from(vec![Some(1), Some(2), None, Some(3)]);
let array = IntervalMonthDayNanoVector::from(vec![
Some(IntervalMonthDayNano::new(0, 0, 1)),
Some(IntervalMonthDayNano::new(0, 0, 2)),
None,
Some(IntervalMonthDayNano::new(0, 0, 3)),
]);
let array: VectorRef = Arc::new(array);
let values = values(&[array]).unwrap();

View File

@@ -138,7 +138,7 @@ fn build_struct(
datafusion_expr::create_udf(
Self::name(),
Self::input_type(),
Arc::new(Self::return_type()),
Self::return_type(),
Volatility::Immutable,
Arc::new(Self::calc) as _,
)

View File

@@ -47,7 +47,7 @@ pub(crate) fn build_template(create_table_expr: &CreateTableExpr) -> Result<Crea
default_constraint: c.default_constraint.clone(),
semantic_type: semantic_type as i32,
comment: String::new(),
datatype_extension: c.datatype_extension.clone(),
datatype_extension: c.datatype_extension,
options: c.options.clone(),
}),
column_id: i as u32,

View File

@@ -78,7 +78,7 @@ impl DropTableProcedure {
})
}
pub(crate) async fn on_prepare<'a>(&mut self) -> Result<Status> {
pub(crate) async fn on_prepare(&mut self) -> Result<Status> {
if self.executor.on_prepare(&self.context).await?.stop() {
return Ok(Status::done());
}

View File

@@ -703,6 +703,13 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Invalid topic name prefix: {}", prefix))]
InvalidTopicNamePrefix {
prefix: String,
#[snafu(implicit)]
location: Location,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -770,7 +777,8 @@ impl ErrorExt for Error {
| MismatchPrefix { .. }
| TlsConfig { .. }
| InvalidSetDatabaseOption { .. }
| InvalidUnsetDatabaseOption { .. } => StatusCode::InvalidArguments,
| InvalidUnsetDatabaseOption { .. }
| InvalidTopicNamePrefix { .. } => StatusCode::InvalidArguments,
FlowNotFound { .. } => StatusCode::FlowNotFound,
FlowRouteNotFound { .. } => StatusCode::Unexpected,

View File

@@ -51,10 +51,17 @@
//! 10. Table flow key: `__flow/source_table/{table_id}/{flownode_id}/{flow_id}/{partition_id}`
//! - Mapping source table's {table_id} to {flownode_id}
//! - Used in `Flownode` booting.
//!
//! 11. View info key: `__view_info/{view_id}`
//! - The value is a [ViewInfoValue] struct; it contains the encoded logical plan.
//! - This key is mainly used in constructing the view in Datanode and Frontend.
//!
//! 12. Kafka topic key: `__topic_name/kafka/{topic_name}`
// - The key is used to mark existing topics in kafka for WAL.
//!
//! 13. Topic name to region map key `__topic_region/{topic_name}/{region_id}`
//! - Mapping {topic_name} to {region_id}
//!
//! All keys have related managers. The managers take care of the serialization and deserialization
//! of keys and values, and the interaction with the underlying KV store backend.
//!
@@ -100,6 +107,8 @@ pub mod table_route;
#[cfg(any(test, feature = "testing"))]
pub mod test_utils;
mod tombstone;
pub mod topic_name;
pub mod topic_region;
pub(crate) mod txn_helper;
pub mod view_info;
@@ -158,6 +167,10 @@ pub const CATALOG_NAME_KEY_PREFIX: &str = "__catalog_name";
pub const SCHEMA_NAME_KEY_PREFIX: &str = "__schema_name";
pub const TABLE_ROUTE_PREFIX: &str = "__table_route";
pub const NODE_ADDRESS_PREFIX: &str = "__node_address";
pub const KAFKA_TOPIC_KEY_PREFIX: &str = "__topic_name/kafka";
// The legacy topic key prefix is used to store the topic name in previous versions.
pub const LEGACY_TOPIC_KEY_PREFIX: &str = "__created_wal_topics/kafka";
pub const TOPIC_REGION_PREFIX: &str = "__topic_region";
/// The keys with these prefixes will be loaded into the cache when the leader starts.
pub const CACHE_KEY_PREFIXES: [&str; 5] = [
@@ -175,6 +188,10 @@ pub type FlowId = u32;
/// The partition of flow.
pub type FlowPartitionId = u32;
lazy_static! {
pub static ref NAME_PATTERN_REGEX: Regex = Regex::new(NAME_PATTERN).unwrap();
}
lazy_static! {
static ref TABLE_INFO_KEY_PATTERN: Regex =
Regex::new(&format!("^{TABLE_INFO_KEY_PREFIX}/([0-9]+)$")).unwrap();
@@ -223,6 +240,18 @@ lazy_static! {
Regex::new(&format!("^{NODE_ADDRESS_PREFIX}/([0-9]+)/([0-9]+)$")).unwrap();
}
lazy_static! {
pub static ref KAFKA_TOPIC_KEY_PATTERN: Regex =
Regex::new(&format!("^{KAFKA_TOPIC_KEY_PREFIX}/(.*)$")).unwrap();
}
lazy_static! {
pub static ref TOPIC_REGION_PATTERN: Regex = Regex::new(&format!(
"^{TOPIC_REGION_PREFIX}/({NAME_PATTERN})/([0-9]+)$"
))
.unwrap();
}
/// The key of metadata.
pub trait MetadataKey<'a, T> {
fn to_bytes(&self) -> Vec<u8>;

View File

@@ -378,6 +378,12 @@ impl FlowMetadataManager {
}
}
impl std::fmt::Debug for FlowMetadataManager {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("FlowMetadataManager").finish()
}
}
#[cfg(test)]
mod tests {
use std::assert_matches::assert_matches;

View File

@@ -0,0 +1,210 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt::{self, Display};
use serde::{Deserialize, Serialize};
use snafu::{OptionExt, ResultExt};
use crate::error::{DecodeJsonSnafu, Error, InvalidMetadataSnafu, Result};
use crate::key::{
MetadataKey, KAFKA_TOPIC_KEY_PATTERN, KAFKA_TOPIC_KEY_PREFIX, LEGACY_TOPIC_KEY_PREFIX,
};
use crate::kv_backend::txn::{Txn, TxnOp};
use crate::kv_backend::KvBackendRef;
use crate::rpc::store::{BatchPutRequest, RangeRequest};
use crate::rpc::KeyValue;
#[derive(Debug, Clone, PartialEq)]
pub struct TopicNameKey<'a> {
pub topic: &'a str,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct TopicNameValue;
impl<'a> TopicNameKey<'a> {
pub fn new(topic: &'a str) -> Self {
Self { topic }
}
pub fn gen_with_id_and_prefix(id: usize, prefix: &'a str) -> String {
format!("{}_{}", prefix, id)
}
pub fn range_start_key() -> String {
KAFKA_TOPIC_KEY_PREFIX.to_string()
}
}
impl<'a> MetadataKey<'a, TopicNameKey<'a>> for TopicNameKey<'_> {
fn to_bytes(&self) -> Vec<u8> {
self.to_string().into_bytes()
}
fn from_bytes(bytes: &'a [u8]) -> Result<TopicNameKey<'a>> {
let key = std::str::from_utf8(bytes).map_err(|e| {
InvalidMetadataSnafu {
err_msg: format!(
"TopicNameKey '{}' is not a valid UTF8 string: {e}",
String::from_utf8_lossy(bytes)
),
}
.build()
})?;
TopicNameKey::try_from(key)
}
}
impl Display for TopicNameKey<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}/{}", KAFKA_TOPIC_KEY_PREFIX, self.topic)
}
}
impl<'a> TryFrom<&'a str> for TopicNameKey<'a> {
type Error = Error;
fn try_from(value: &'a str) -> Result<TopicNameKey<'a>> {
let captures = KAFKA_TOPIC_KEY_PATTERN
.captures(value)
.context(InvalidMetadataSnafu {
err_msg: format!("Invalid topic name key: {}", value),
})?;
// Safety: pass the regex check above
Ok(TopicNameKey {
topic: captures.get(1).unwrap().as_str(),
})
}
}
/// Convert a key-value pair to a topic name.
fn topic_decoder(kv: &KeyValue) -> Result<String> {
let key = TopicNameKey::from_bytes(&kv.key)?;
Ok(key.topic.to_string())
}
pub struct TopicNameManager {
kv_backend: KvBackendRef,
}
impl TopicNameManager {
pub fn new(kv_backend: KvBackendRef) -> Self {
Self { kv_backend }
}
/// Update the topics in legacy format to the new format.
pub async fn update_legacy_topics(&self) -> Result<()> {
if let Some(kv) = self
.kv_backend
.get(LEGACY_TOPIC_KEY_PREFIX.as_bytes())
.await?
{
let topics =
serde_json::from_slice::<Vec<String>>(&kv.value).context(DecodeJsonSnafu)?;
let mut reqs = topics
.iter()
.map(|topic| {
let key = TopicNameKey::new(topic);
TxnOp::Put(key.to_bytes(), vec![])
})
.collect::<Vec<_>>();
let delete_req = TxnOp::Delete(LEGACY_TOPIC_KEY_PREFIX.as_bytes().to_vec());
reqs.push(delete_req);
let txn = Txn::new().and_then(reqs);
self.kv_backend.txn(txn).await?;
}
Ok(())
}
/// Range query for topics.
/// Caution: this method returns keys as String instead of values of range query since the topics are stored in keys.
pub async fn range(&self) -> Result<Vec<String>> {
let prefix = TopicNameKey::range_start_key();
let raw_prefix = prefix.as_bytes();
let req = RangeRequest::new().with_prefix(raw_prefix);
let resp = self.kv_backend.range(req).await?;
resp.kvs
.iter()
.map(topic_decoder)
.collect::<Result<Vec<String>>>()
}
/// Put topics into kvbackend.
pub async fn batch_put(&self, topic_name_keys: Vec<TopicNameKey<'_>>) -> Result<()> {
let req = BatchPutRequest {
kvs: topic_name_keys
.iter()
.map(|key| KeyValue {
key: key.to_bytes(),
value: vec![],
})
.collect(),
prev_kv: false,
};
self.kv_backend.batch_put(req).await?;
Ok(())
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use super::*;
use crate::kv_backend::memory::MemoryKvBackend;
use crate::kv_backend::KvBackend;
use crate::rpc::store::PutRequest;
#[tokio::test]
async fn test_topic_name_key_manager() {
let kv_backend = Arc::new(MemoryKvBackend::default());
let manager = TopicNameManager::new(kv_backend.clone());
let mut all_topics = (0..16)
.map(|i| format!("{}/{}", KAFKA_TOPIC_KEY_PREFIX, i))
.collect::<Vec<_>>();
all_topics.sort();
let topic_name_keys = all_topics
.iter()
.map(|topic| TopicNameKey::new(topic))
.collect::<Vec<_>>();
manager.batch_put(topic_name_keys.clone()).await.unwrap();
let topics = manager.range().await.unwrap();
assert_eq!(topics, all_topics);
kv_backend
.put(PutRequest {
key: LEGACY_TOPIC_KEY_PREFIX.as_bytes().to_vec(),
value: serde_json::to_vec(&all_topics).unwrap(),
prev_kv: false,
})
.await
.unwrap();
manager.update_legacy_topics().await.unwrap();
let res = kv_backend
.get(LEGACY_TOPIC_KEY_PREFIX.as_bytes())
.await
.unwrap();
assert!(res.is_none());
let topics = manager.range().await.unwrap();
assert_eq!(topics, all_topics);
let topics = manager.range().await.unwrap();
assert_eq!(topics, all_topics);
}
}

View File

@@ -0,0 +1,223 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt::{self, Display};
use serde::{Deserialize, Serialize};
use snafu::OptionExt;
use store_api::storage::RegionId;
use crate::error::{Error, InvalidMetadataSnafu, Result};
use crate::key::{MetadataKey, TOPIC_REGION_PATTERN, TOPIC_REGION_PREFIX};
use crate::kv_backend::KvBackendRef;
use crate::rpc::store::{BatchPutRequest, PutRequest, RangeRequest};
use crate::rpc::KeyValue;
#[derive(Debug, Clone, PartialEq)]
pub struct TopicRegionKey<'a> {
pub region_id: RegionId,
pub topic: &'a str,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct TopicRegionValue;
impl<'a> TopicRegionKey<'a> {
pub fn new(region_id: RegionId, topic: &'a str) -> Self {
Self { region_id, topic }
}
pub fn range_topic_key(topic: &str) -> String {
format!("{}/{}", TOPIC_REGION_PREFIX, topic)
}
}
impl<'a> MetadataKey<'a, TopicRegionKey<'a>> for TopicRegionKey<'a> {
fn to_bytes(&self) -> Vec<u8> {
self.to_string().into_bytes()
}
fn from_bytes(bytes: &'a [u8]) -> Result<TopicRegionKey<'a>> {
let key = std::str::from_utf8(bytes).map_err(|e| {
InvalidMetadataSnafu {
err_msg: format!(
"TopicRegionKey '{}' is not a valid UTF8 string: {e}",
String::from_utf8_lossy(bytes)
),
}
.build()
})?;
TopicRegionKey::try_from(key)
}
}
impl Display for TopicRegionKey<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{}/{}",
Self::range_topic_key(self.topic),
self.region_id.as_u64()
)
}
}
impl<'a> TryFrom<&'a str> for TopicRegionKey<'a> {
type Error = Error;
/// Value is of format `{prefix}/{topic}/{region_id}`
fn try_from(value: &'a str) -> Result<TopicRegionKey<'a>> {
let captures = TOPIC_REGION_PATTERN
.captures(value)
.context(InvalidMetadataSnafu {
err_msg: format!("Invalid TopicRegionKey: {}", value),
})?;
let topic = captures.get(1).map(|m| m.as_str()).unwrap();
let region_id = captures[2].parse::<u64>().map_err(|_| {
InvalidMetadataSnafu {
err_msg: format!("Invalid region id in TopicRegionKey: {}", value),
}
.build()
})?;
Ok(TopicRegionKey {
region_id: RegionId::from_u64(region_id),
topic,
})
}
}
fn topic_region_decoder(value: &KeyValue) -> Result<TopicRegionKey<'_>> {
let key = TopicRegionKey::from_bytes(&value.key)?;
Ok(key)
}
/// Manages map of topics and regions in kvbackend.
pub struct TopicRegionManager {
kv_backend: KvBackendRef,
}
impl TopicRegionManager {
pub fn new(kv_backend: KvBackendRef) -> Self {
Self { kv_backend }
}
pub async fn put(&self, key: TopicRegionKey<'_>) -> Result<()> {
let put_req = PutRequest {
key: key.to_bytes(),
value: vec![],
prev_kv: false,
};
self.kv_backend.put(put_req).await?;
Ok(())
}
pub async fn batch_put(&self, keys: Vec<TopicRegionKey<'_>>) -> Result<()> {
let req = BatchPutRequest {
kvs: keys
.into_iter()
.map(|key| KeyValue {
key: key.to_bytes(),
value: vec![],
})
.collect(),
prev_kv: false,
};
self.kv_backend.batch_put(req).await?;
Ok(())
}
/// Returns the list of region ids using specified topic.
pub async fn regions(&self, topic: &str) -> Result<Vec<RegionId>> {
let prefix = TopicRegionKey::range_topic_key(topic);
let req = RangeRequest::new().with_prefix(prefix.as_bytes());
let resp = self.kv_backend.range(req).await?;
let region_ids = resp
.kvs
.iter()
.map(topic_region_decoder)
.collect::<Result<Vec<_>>>()?;
Ok(region_ids.iter().map(|key| key.region_id).collect())
}
pub async fn delete(&self, key: TopicRegionKey<'_>) -> Result<()> {
let raw_key = key.to_bytes();
self.kv_backend.delete(&raw_key, false).await?;
Ok(())
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use super::*;
use crate::kv_backend::memory::MemoryKvBackend;
#[tokio::test]
async fn test_topic_region_manager() {
let kv_backend = Arc::new(MemoryKvBackend::default());
let manager = TopicRegionManager::new(kv_backend.clone());
let topics = (0..16).map(|i| format!("topic_{}", i)).collect::<Vec<_>>();
let keys = (0..64)
.map(|i| TopicRegionKey::new(RegionId::from_u64(i), &topics[(i % 16) as usize]))
.collect::<Vec<_>>();
manager.batch_put(keys.clone()).await.unwrap();
let mut key_values = manager.regions(&topics[0]).await.unwrap();
let expected = keys
.iter()
.filter_map(|key| {
if key.topic == topics[0] {
Some(key.region_id)
} else {
None
}
})
.collect::<Vec<_>>();
key_values.sort_by_key(|id| id.as_u64());
assert_eq!(key_values, expected);
let key = TopicRegionKey::new(RegionId::from_u64(0), "topic_0");
manager.delete(key.clone()).await.unwrap();
let mut key_values = manager.regions(&topics[0]).await.unwrap();
let expected = keys
.iter()
.filter_map(|key| {
if key.topic == topics[0] && key.region_id != RegionId::from_u64(0) {
Some(key.region_id)
} else {
None
}
})
.collect::<Vec<_>>();
key_values.sort_by_key(|id| id.as_u64());
assert_eq!(key_values, expected);
}
}

View File

@@ -54,7 +54,7 @@ impl TxnOpGetResponseSet {
impl From<&mut Vec<TxnOpResponse>> for TxnOpGetResponseSet {
fn from(value: &mut Vec<TxnOpResponse>) -> Self {
let value = value
.extract_if(|resp| matches!(resp, TxnOpResponse::ResponseGet(_)))
.extract_if(.., |resp| matches!(resp, TxnOpResponse::ResponseGet(_)))
.flat_map(|resp| {
// Safety: checked
let TxnOpResponse::ResponseGet(r) = resp else {

View File

@@ -14,7 +14,6 @@
#![feature(assert_matches)]
#![feature(btree_extract_if)]
#![feature(async_closure)]
#![feature(let_chains)]
#![feature(extract_if)]
#![feature(hash_extract_if)]

View File

@@ -290,7 +290,8 @@ mod tests {
num_per_range: u32,
max_bytes: u32,
) {
let num_cases = rand::thread_rng().gen_range(1..=26);
let num_cases = rand::thread_rng().gen_range(1..=8);
common_telemetry::info!("num_cases: {}", num_cases);
let mut cases = Vec::with_capacity(num_cases);
for i in 0..num_cases {
let size = rand::thread_rng().gen_range(size_limit..=max_bytes);
@@ -324,6 +325,7 @@ mod tests {
// Puts the values
for TestCase { key, value, .. } in &cases {
common_telemetry::info!("put key: {}, size: {}", key, value.len());
store.put(key, value.clone()).await.unwrap();
}
@@ -332,6 +334,7 @@ mod tests {
let data = walk_top_down(prefix).await;
assert_eq!(data.len(), 1);
let (keyset, got) = data.into_iter().next().unwrap();
common_telemetry::info!("get key: {}", keyset.key());
let num_expected_keys = value.len().div_ceil(size_limit as usize);
assert_eq!(&got, value);
assert_eq!(keyset.key(), key);
@@ -364,6 +367,7 @@ mod tests {
let prefix = "test_etcd_store_split_value/";
let endpoints = env::var("GT_ETCD_ENDPOINTS").unwrap_or_default();
let kv_backend: KvBackendRef = if endpoints.is_empty() {
common_telemetry::info!("Using MemoryKvBackend");
Arc::new(MemoryKvBackend::new())
} else {
let endpoints = endpoints

View File

@@ -12,7 +12,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.
pub mod kafka;
mod selector;
mod topic_creator;
mod topic_manager;
mod topic_pool;
use std::collections::HashMap;
use std::sync::Arc;
@@ -20,41 +23,33 @@ use std::sync::Arc;
use async_trait::async_trait;
use common_wal::config::MetasrvWalConfig;
use common_wal::options::{KafkaWalOptions, WalOptions, WAL_OPTIONS_KEY};
use snafu::ResultExt;
use snafu::{ensure, ResultExt};
use store_api::storage::{RegionId, RegionNumber};
use crate::error::{EncodeWalOptionsSnafu, Result};
use crate::error::{EncodeWalOptionsSnafu, InvalidTopicNamePrefixSnafu, Result};
use crate::key::NAME_PATTERN_REGEX;
use crate::kv_backend::KvBackendRef;
use crate::leadership_notifier::LeadershipChangeListener;
use crate::wal_options_allocator::kafka::topic_manager::TopicManager as KafkaTopicManager;
use crate::wal_options_allocator::topic_creator::build_kafka_topic_creator;
use crate::wal_options_allocator::topic_pool::KafkaTopicPool;
/// Allocates wal options in region granularity.
#[derive(Default)]
#[derive(Default, Debug)]
pub enum WalOptionsAllocator {
#[default]
RaftEngine,
Kafka(KafkaTopicManager),
Kafka(KafkaTopicPool),
}
/// Arc wrapper of WalOptionsAllocator.
pub type WalOptionsAllocatorRef = Arc<WalOptionsAllocator>;
impl WalOptionsAllocator {
/// Creates a WalOptionsAllocator.
pub fn new(config: MetasrvWalConfig, kv_backend: KvBackendRef) -> Self {
match config {
MetasrvWalConfig::RaftEngine => Self::RaftEngine,
MetasrvWalConfig::Kafka(kafka_config) => {
Self::Kafka(KafkaTopicManager::new(kafka_config, kv_backend))
}
}
}
/// Tries to start the allocator.
pub async fn start(&self) -> Result<()> {
match self {
Self::RaftEngine => Ok(()),
Self::Kafka(kafka_topic_manager) => kafka_topic_manager.start().await,
Self::Kafka(kafka_topic_manager) => kafka_topic_manager.activate().await,
}
}
@@ -111,6 +106,26 @@ impl LeadershipChangeListener for WalOptionsAllocator {
}
}
/// Builds a wal options allocator based on the given configuration.
pub async fn build_wal_options_allocator(
config: &MetasrvWalConfig,
kv_backend: KvBackendRef,
) -> Result<WalOptionsAllocator> {
match config {
MetasrvWalConfig::RaftEngine => Ok(WalOptionsAllocator::RaftEngine),
MetasrvWalConfig::Kafka(kafka_config) => {
let prefix = &kafka_config.kafka_topic.topic_name_prefix;
ensure!(
NAME_PATTERN_REGEX.is_match(prefix),
InvalidTopicNamePrefixSnafu { prefix }
);
let topic_creator = build_kafka_topic_creator(kafka_config).await?;
let topic_pool = KafkaTopicPool::new(kafka_config, kv_backend, topic_creator);
Ok(WalOptionsAllocator::Kafka(topic_pool))
}
}
}
/// Allocates a wal options for each region. The allocated wal options is encoded immediately.
pub fn allocate_region_wal_options(
regions: Vec<RegionNumber>,
@@ -140,20 +155,24 @@ pub fn prepare_wal_options(
#[cfg(test)]
mod tests {
use std::assert_matches::assert_matches;
use common_wal::config::kafka::common::{KafkaConnectionConfig, KafkaTopicConfig};
use common_wal::config::kafka::MetasrvKafkaConfig;
use common_wal::test_util::run_test_with_kafka_wal;
use super::*;
use crate::error::Error;
use crate::kv_backend::memory::MemoryKvBackend;
use crate::wal_options_allocator::kafka::topic_selector::RoundRobinTopicSelector;
// Tests that the wal options allocator could successfully allocate raft-engine wal options.
#[tokio::test]
async fn test_allocator_with_raft_engine() {
let kv_backend = Arc::new(MemoryKvBackend::new()) as KvBackendRef;
let wal_config = MetasrvWalConfig::RaftEngine;
let allocator = WalOptionsAllocator::new(wal_config, kv_backend);
let allocator = build_wal_options_allocator(&wal_config, kv_backend)
.await
.unwrap();
allocator.start().await.unwrap();
let num_regions = 32;
@@ -168,6 +187,22 @@ mod tests {
assert_eq!(got, expected);
}
#[tokio::test]
async fn test_refuse_invalid_topic_name_prefix() {
let kv_backend = Arc::new(MemoryKvBackend::new()) as KvBackendRef;
let wal_config = MetasrvWalConfig::Kafka(MetasrvKafkaConfig {
kafka_topic: KafkaTopicConfig {
topic_name_prefix: "``````".to_string(),
..Default::default()
},
..Default::default()
});
let got = build_wal_options_allocator(&wal_config, kv_backend)
.await
.unwrap_err();
assert_matches!(got, Error::InvalidTopicNamePrefix { .. });
}
// Tests that the wal options allocator could successfully allocate Kafka wal options.
#[tokio::test]
async fn test_allocator_with_kafka() {
@@ -191,14 +226,13 @@ mod tests {
..Default::default()
};
let kv_backend = Arc::new(MemoryKvBackend::new()) as KvBackendRef;
let mut topic_manager = KafkaTopicManager::new(config.clone(), kv_backend);
// Replaces the default topic pool with the constructed topics.
topic_manager.topic_pool.clone_from(&topics);
// Replaces the default selector with a round-robin selector without shuffled.
topic_manager.topic_selector = Arc::new(RoundRobinTopicSelector::default());
let topic_creator = build_kafka_topic_creator(&config).await.unwrap();
let mut topic_pool = KafkaTopicPool::new(&config, kv_backend, topic_creator);
topic_pool.topics.clone_from(&topics);
topic_pool.selector = Arc::new(selector::RoundRobinTopicSelector::default());
// Creates an options allocator.
let allocator = WalOptionsAllocator::Kafka(topic_manager);
let allocator = WalOptionsAllocator::Kafka(topic_pool);
allocator.start().await.unwrap();
let num_regions = 32;

View File

@@ -1,350 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashSet;
use std::sync::Arc;
use common_telemetry::{error, info};
use common_wal::config::kafka::MetasrvKafkaConfig;
use common_wal::TopicSelectorType;
use rskafka::client::controller::ControllerClient;
use rskafka::client::error::Error as RsKafkaError;
use rskafka::client::error::ProtocolError::TopicAlreadyExists;
use rskafka::client::partition::{Compression, UnknownTopicHandling};
use rskafka::client::{Client, ClientBuilder};
use rskafka::record::Record;
use rskafka::BackoffConfig;
use snafu::{ensure, ResultExt};
use crate::error::{
BuildKafkaClientSnafu, BuildKafkaCtrlClientSnafu, BuildKafkaPartitionClientSnafu,
CreateKafkaWalTopicSnafu, DecodeJsonSnafu, EncodeJsonSnafu, InvalidNumTopicsSnafu,
ProduceRecordSnafu, ResolveKafkaEndpointSnafu, Result, TlsConfigSnafu,
};
use crate::kv_backend::KvBackendRef;
use crate::rpc::store::PutRequest;
use crate::wal_options_allocator::kafka::topic_selector::{
RoundRobinTopicSelector, TopicSelectorRef,
};
const CREATED_TOPICS_KEY: &str = "__created_wal_topics/kafka/";
// Each topic only has one partition for now.
// The `DEFAULT_PARTITION` refers to the index of the partition.
const DEFAULT_PARTITION: i32 = 0;
/// Manages topic initialization and selection.
pub struct TopicManager {
config: MetasrvKafkaConfig,
pub(crate) topic_pool: Vec<String>,
pub(crate) topic_selector: TopicSelectorRef,
kv_backend: KvBackendRef,
}
impl TopicManager {
/// Creates a new topic manager.
pub fn new(config: MetasrvKafkaConfig, kv_backend: KvBackendRef) -> Self {
// Topics should be created.
let topics = (0..config.kafka_topic.num_topics)
.map(|topic_id| format!("{}_{topic_id}", config.kafka_topic.topic_name_prefix))
.collect::<Vec<_>>();
let selector = match config.kafka_topic.selector_type {
TopicSelectorType::RoundRobin => RoundRobinTopicSelector::with_shuffle(),
};
Self {
config,
topic_pool: topics,
topic_selector: Arc::new(selector),
kv_backend,
}
}
/// Tries to initialize the topic manager.
/// The initializer first tries to restore persisted topics from the kv backend.
/// If not enough topics retrieved, the initializer will try to contact the Kafka cluster and request creating more topics.
pub async fn start(&self) -> Result<()> {
// Skip creating topics.
if !self.config.auto_create_topics {
return Ok(());
}
let num_topics = self.config.kafka_topic.num_topics;
ensure!(num_topics > 0, InvalidNumTopicsSnafu { num_topics });
// Topics should be created.
let topics = &self.topic_pool;
// Topics already created.
// There may have extra topics created but it's okay since those topics won't break topic allocation.
let created_topics = Self::restore_created_topics(&self.kv_backend)
.await?
.into_iter()
.collect::<HashSet<String>>();
// Creates missing topics.
let to_be_created = topics
.iter()
.enumerate()
.filter_map(|(i, topic)| {
if created_topics.contains(topic) {
return None;
}
Some(i)
})
.collect::<Vec<_>>();
if !to_be_created.is_empty() {
self.try_create_topics(topics, &to_be_created).await?;
Self::persist_created_topics(topics, &self.kv_backend).await?;
}
Ok(())
}
/// Tries to create topics specified by indexes in `to_be_created`.
async fn try_create_topics(&self, topics: &[String], to_be_created: &[usize]) -> Result<()> {
// Builds an kafka controller client for creating topics.
let backoff_config = BackoffConfig {
init_backoff: self.config.backoff.init,
max_backoff: self.config.backoff.max,
base: self.config.backoff.base as f64,
deadline: self.config.backoff.deadline,
};
let broker_endpoints =
common_wal::resolve_to_ipv4(&self.config.connection.broker_endpoints)
.await
.context(ResolveKafkaEndpointSnafu)?;
let mut builder = ClientBuilder::new(broker_endpoints).backoff_config(backoff_config);
if let Some(sasl) = &self.config.connection.sasl {
builder = builder.sasl_config(sasl.config.clone().into_sasl_config());
};
if let Some(tls) = &self.config.connection.tls {
builder = builder.tls_config(tls.to_tls_config().await.context(TlsConfigSnafu)?)
};
let client = builder
.build()
.await
.with_context(|_| BuildKafkaClientSnafu {
broker_endpoints: self.config.connection.broker_endpoints.clone(),
})?;
let control_client = client
.controller_client()
.context(BuildKafkaCtrlClientSnafu)?;
// Try to create missing topics.
let tasks = to_be_created
.iter()
.map(|i| async {
self.try_create_topic(&topics[*i], &control_client).await?;
self.try_append_noop_record(&topics[*i], &client).await?;
Ok(())
})
.collect::<Vec<_>>();
futures::future::try_join_all(tasks).await.map(|_| ())
}
/// Selects one topic from the topic pool through the topic selector.
pub fn select(&self) -> Result<&String> {
self.topic_selector.select(&self.topic_pool)
}
/// Selects a batch of topics from the topic pool through the topic selector.
pub fn select_batch(&self, num_topics: usize) -> Result<Vec<&String>> {
(0..num_topics)
.map(|_| self.topic_selector.select(&self.topic_pool))
.collect()
}
async fn try_append_noop_record(&self, topic: &String, client: &Client) -> Result<()> {
let partition_client = client
.partition_client(topic, DEFAULT_PARTITION, UnknownTopicHandling::Retry)
.await
.context(BuildKafkaPartitionClientSnafu {
topic,
partition: DEFAULT_PARTITION,
})?;
partition_client
.produce(
vec![Record {
key: None,
value: None,
timestamp: chrono::Utc::now(),
headers: Default::default(),
}],
Compression::Lz4,
)
.await
.context(ProduceRecordSnafu { topic })?;
Ok(())
}
async fn try_create_topic(&self, topic: &String, client: &ControllerClient) -> Result<()> {
match client
.create_topic(
topic.clone(),
self.config.kafka_topic.num_partitions,
self.config.kafka_topic.replication_factor,
self.config.kafka_topic.create_topic_timeout.as_millis() as i32,
)
.await
{
Ok(_) => {
info!("Successfully created topic {}", topic);
Ok(())
}
Err(e) => {
if Self::is_topic_already_exist_err(&e) {
info!("The topic {} already exists", topic);
Ok(())
} else {
error!("Failed to create a topic {}, error {:?}", topic, e);
Err(e).context(CreateKafkaWalTopicSnafu)
}
}
}
}
async fn restore_created_topics(kv_backend: &KvBackendRef) -> Result<Vec<String>> {
kv_backend
.get(CREATED_TOPICS_KEY.as_bytes())
.await?
.map_or_else(
|| Ok(vec![]),
|key_value| serde_json::from_slice(&key_value.value).context(DecodeJsonSnafu),
)
}
async fn persist_created_topics(topics: &[String], kv_backend: &KvBackendRef) -> Result<()> {
let raw_topics = serde_json::to_vec(topics).context(EncodeJsonSnafu)?;
kv_backend
.put(PutRequest {
key: CREATED_TOPICS_KEY.as_bytes().to_vec(),
value: raw_topics,
prev_kv: false,
})
.await
.map(|_| ())
}
fn is_topic_already_exist_err(e: &RsKafkaError) -> bool {
matches!(
e,
&RsKafkaError::ServerError {
protocol_error: TopicAlreadyExists,
..
}
)
}
}
#[cfg(test)]
mod tests {
use common_wal::config::kafka::common::{KafkaConnectionConfig, KafkaTopicConfig};
use common_wal::test_util::run_test_with_kafka_wal;
use super::*;
use crate::kv_backend::memory::MemoryKvBackend;
// Tests that topics can be successfully persisted into the kv backend and can be successfully restored from the kv backend.
#[tokio::test]
async fn test_restore_persisted_topics() {
let kv_backend = Arc::new(MemoryKvBackend::new()) as KvBackendRef;
let topic_name_prefix = "greptimedb_wal_topic";
let num_topics = 16;
// Constructs mock topics.
let topics = (0..num_topics)
.map(|topic| format!("{topic_name_prefix}{topic}"))
.collect::<Vec<_>>();
// Persists topics to kv backend.
TopicManager::persist_created_topics(&topics, &kv_backend)
.await
.unwrap();
// Restores topics from kv backend.
let restored_topics = TopicManager::restore_created_topics(&kv_backend)
.await
.unwrap();
assert_eq!(topics, restored_topics);
}
/// Tests that the topic manager could allocate topics correctly.
#[tokio::test]
async fn test_alloc_topics() {
run_test_with_kafka_wal(|broker_endpoints| {
Box::pin(async {
// Constructs topics that should be created.
let topics = (0..256)
.map(|i| format!("test_alloc_topics_{}_{}", i, uuid::Uuid::new_v4()))
.collect::<Vec<_>>();
// Creates a topic manager.
let kafka_topic = KafkaTopicConfig {
replication_factor: broker_endpoints.len() as i16,
..Default::default()
};
let config = MetasrvKafkaConfig {
connection: KafkaConnectionConfig {
broker_endpoints,
..Default::default()
},
kafka_topic,
..Default::default()
};
let kv_backend = Arc::new(MemoryKvBackend::new()) as KvBackendRef;
let mut manager = TopicManager::new(config.clone(), kv_backend);
// Replaces the default topic pool with the constructed topics.
manager.topic_pool.clone_from(&topics);
// Replaces the default selector with a round-robin selector without shuffled.
manager.topic_selector = Arc::new(RoundRobinTopicSelector::default());
manager.start().await.unwrap();
// Selects exactly the number of `num_topics` topics one by one.
let got = (0..topics.len())
.map(|_| manager.select().unwrap())
.cloned()
.collect::<Vec<_>>();
assert_eq!(got, topics);
// Selects exactly the number of `num_topics` topics in a batching manner.
let got = manager
.select_batch(topics.len())
.unwrap()
.into_iter()
.map(ToString::to_string)
.collect::<Vec<_>>();
assert_eq!(got, topics);
// Selects more than the number of `num_topics` topics.
let got = manager
.select_batch(2 * topics.len())
.unwrap()
.into_iter()
.map(ToString::to_string)
.collect::<Vec<_>>();
let expected = vec![topics.clone(); 2]
.into_iter()
.flatten()
.collect::<Vec<_>>();
assert_eq!(got, expected);
})
})
.await;
}
}

View File

@@ -0,0 +1,159 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use common_telemetry::{error, info};
use common_wal::config::kafka::MetasrvKafkaConfig;
use rskafka::client::error::Error as RsKafkaError;
use rskafka::client::error::ProtocolError::TopicAlreadyExists;
use rskafka::client::partition::{Compression, UnknownTopicHandling};
use rskafka::client::{Client, ClientBuilder};
use rskafka::record::Record;
use rskafka::BackoffConfig;
use snafu::ResultExt;
use crate::error::{
BuildKafkaClientSnafu, BuildKafkaCtrlClientSnafu, BuildKafkaPartitionClientSnafu,
CreateKafkaWalTopicSnafu, ProduceRecordSnafu, ResolveKafkaEndpointSnafu, Result,
TlsConfigSnafu,
};
// Each topic only has one partition for now.
// The `DEFAULT_PARTITION` refers to the index of the partition.
const DEFAULT_PARTITION: i32 = 0;
/// Creates topics in kafka.
pub struct KafkaTopicCreator {
client: Client,
/// The number of partitions per topic.
num_partitions: i32,
/// The replication factor of each topic.
replication_factor: i16,
/// The timeout of topic creation in milliseconds.
create_topic_timeout: i32,
}
impl KafkaTopicCreator {
async fn create_topic(&self, topic: &String, client: &Client) -> Result<()> {
let controller = client
.controller_client()
.context(BuildKafkaCtrlClientSnafu)?;
match controller
.create_topic(
topic,
self.num_partitions,
self.replication_factor,
self.create_topic_timeout,
)
.await
{
Ok(_) => {
info!("Successfully created topic {}", topic);
Ok(())
}
Err(e) => {
if Self::is_topic_already_exist_err(&e) {
info!("The topic {} already exists", topic);
Ok(())
} else {
error!("Failed to create a topic {}, error {:?}", topic, e);
Err(e).context(CreateKafkaWalTopicSnafu)
}
}
}
}
async fn append_noop_record(&self, topic: &String, client: &Client) -> Result<()> {
let partition_client = client
.partition_client(topic, DEFAULT_PARTITION, UnknownTopicHandling::Retry)
.await
.context(BuildKafkaPartitionClientSnafu {
topic,
partition: DEFAULT_PARTITION,
})?;
partition_client
.produce(
vec![Record {
key: None,
value: None,
timestamp: chrono::Utc::now(),
headers: Default::default(),
}],
Compression::Lz4,
)
.await
.context(ProduceRecordSnafu { topic })?;
Ok(())
}
/// Prepares topics in Kafka.
/// 1. Creates missing topics.
/// 2. Appends a noop record to each topic.
pub async fn prepare_topics(&self, topics: &[&String]) -> Result<()> {
// Try to create missing topics.
let tasks = topics
.iter()
.map(|topic| async {
self.create_topic(topic, &self.client).await?;
self.append_noop_record(topic, &self.client).await?;
Ok(())
})
.collect::<Vec<_>>();
futures::future::try_join_all(tasks).await.map(|_| ())
}
fn is_topic_already_exist_err(e: &RsKafkaError) -> bool {
matches!(
e,
&RsKafkaError::ServerError {
protocol_error: TopicAlreadyExists,
..
}
)
}
}
pub async fn build_kafka_topic_creator(config: &MetasrvKafkaConfig) -> Result<KafkaTopicCreator> {
// Builds an kafka controller client for creating topics.
let backoff_config = BackoffConfig {
init_backoff: config.backoff.init,
max_backoff: config.backoff.max,
base: config.backoff.base as f64,
deadline: config.backoff.deadline,
};
let broker_endpoints = common_wal::resolve_to_ipv4(&config.connection.broker_endpoints)
.await
.context(ResolveKafkaEndpointSnafu)?;
let mut builder = ClientBuilder::new(broker_endpoints).backoff_config(backoff_config);
if let Some(sasl) = &config.connection.sasl {
builder = builder.sasl_config(sasl.config.clone().into_sasl_config());
};
if let Some(tls) = &config.connection.tls {
builder = builder.tls_config(tls.to_tls_config().await.context(TlsConfigSnafu)?)
};
let client = builder
.build()
.await
.with_context(|_| BuildKafkaClientSnafu {
broker_endpoints: config.connection.broker_endpoints.clone(),
})?;
Ok(KafkaTopicCreator {
client,
num_partitions: config.kafka_topic.num_partitions,
replication_factor: config.kafka_topic.replication_factor,
create_topic_timeout: config.kafka_topic.create_topic_timeout.as_millis() as i32,
})
}

View File

@@ -0,0 +1,166 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashSet;
use crate::error::Result;
use crate::key::topic_name::{TopicNameKey, TopicNameManager};
use crate::kv_backend::KvBackendRef;
/// Manages topics in kvbackend.
/// Responsible for:
/// 1. Restores and persisting topics in kvbackend.
/// 2. Clears topics in legacy format and restores them in the new format.
/// 3. Stores and fetches topic-region mapping in kvbackend.
pub struct KafkaTopicManager {
topic_name_manager: TopicNameManager,
}
impl KafkaTopicManager {
pub fn new(kv_backend: KvBackendRef) -> Self {
Self {
topic_name_manager: TopicNameManager::new(kv_backend.clone()),
}
}
async fn restore_topics(&self) -> Result<Vec<String>> {
self.topic_name_manager.update_legacy_topics().await?;
let topics = self.topic_name_manager.range().await?;
Ok(topics)
}
/// Restores topics from the key-value backend. and returns the topics that are not stored in kvbackend.
pub async fn get_topics_to_create<'a>(
&self,
all_topics: &'a [String],
) -> Result<Vec<&'a String>> {
let existing_topics = self.restore_topics().await?;
let existing_topic_set = existing_topics.iter().collect::<HashSet<_>>();
let mut topics_to_create = Vec::with_capacity(all_topics.len());
for topic in all_topics {
if !existing_topic_set.contains(topic) {
topics_to_create.push(topic);
}
}
Ok(topics_to_create)
}
/// Persists topics into the key-value backend.
pub async fn persist_topics(&self, topics: &[String]) -> Result<()> {
self.topic_name_manager
.batch_put(
topics
.iter()
.map(|topic| TopicNameKey::new(topic))
.collect(),
)
.await?;
Ok(())
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use super::*;
use crate::key::LEGACY_TOPIC_KEY_PREFIX;
use crate::kv_backend::memory::MemoryKvBackend;
use crate::rpc::store::PutRequest;
#[tokio::test]
async fn test_restore_legacy_persisted_topics() {
let kv_backend = Arc::new(MemoryKvBackend::new()) as KvBackendRef;
let topic_kvbackend_manager = KafkaTopicManager::new(kv_backend.clone());
let all_topics = (0..16)
.map(|i| format!("greptimedb_wal_topic_{}", i))
.collect::<Vec<_>>();
// No legacy topics.
let mut topics_to_be_created = topic_kvbackend_manager
.get_topics_to_create(&all_topics)
.await
.unwrap();
topics_to_be_created.sort();
let mut expected = all_topics.iter().collect::<Vec<_>>();
expected.sort();
assert_eq!(expected, topics_to_be_created);
// A topic pool with 16 topics stored in kvbackend in legacy format.
let topics = "[\"greptimedb_wal_topic_0\",\"greptimedb_wal_topic_1\",\"greptimedb_wal_topic_2\",\"greptimedb_wal_topic_3\",\"greptimedb_wal_topic_4\",\"greptimedb_wal_topic_5\",\"greptimedb_wal_topic_6\",\"greptimedb_wal_topic_7\",\"greptimedb_wal_topic_8\",\"greptimedb_wal_topic_9\",\"greptimedb_wal_topic_10\",\"greptimedb_wal_topic_11\",\"greptimedb_wal_topic_12\",\"greptimedb_wal_topic_13\",\"greptimedb_wal_topic_14\",\"greptimedb_wal_topic_15\"]";
let put_req = PutRequest {
key: LEGACY_TOPIC_KEY_PREFIX.as_bytes().to_vec(),
value: topics.as_bytes().to_vec(),
prev_kv: true,
};
let res = kv_backend.put(put_req).await.unwrap();
assert!(res.prev_kv.is_none());
let topics_to_be_created = topic_kvbackend_manager
.get_topics_to_create(&all_topics)
.await
.unwrap();
assert!(topics_to_be_created.is_empty());
// Legacy topics should be deleted after restoring.
let legacy_topics = kv_backend
.get(LEGACY_TOPIC_KEY_PREFIX.as_bytes())
.await
.unwrap();
assert!(legacy_topics.is_none());
// Then we can restore it from the new format.
let mut restored_topics = topic_kvbackend_manager.restore_topics().await.unwrap();
restored_topics.sort();
let mut expected = all_topics.clone();
expected.sort();
assert_eq!(expected, restored_topics);
}
// Tests that topics can be successfully persisted into the kv backend and can be successfully restored from the kv backend.
#[tokio::test]
async fn test_restore_persisted_topics() {
let kv_backend = Arc::new(MemoryKvBackend::new()) as KvBackendRef;
let topic_name_prefix = "greptimedb_wal_topic";
let num_topics = 16;
let all_topics = (0..num_topics)
.map(|i| format!("{}_{}", topic_name_prefix, i))
.collect::<Vec<_>>();
// Constructs mock topics.
let topic_kvbackend_manager = KafkaTopicManager::new(kv_backend);
let mut topics_to_be_created = topic_kvbackend_manager
.get_topics_to_create(&all_topics)
.await
.unwrap();
topics_to_be_created.sort();
let mut expected = all_topics.iter().collect::<Vec<_>>();
expected.sort();
assert_eq!(expected, topics_to_be_created);
// Persists topics to kv backend.
topic_kvbackend_manager
.persist_topics(&all_topics)
.await
.unwrap();
let topics_to_be_created = topic_kvbackend_manager
.get_topics_to_create(&all_topics)
.await
.unwrap();
assert!(topics_to_be_created.is_empty());
}
}

View File

@@ -0,0 +1,190 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt::{self, Formatter};
use std::sync::Arc;
use common_wal::config::kafka::MetasrvKafkaConfig;
use common_wal::TopicSelectorType;
use snafu::ensure;
use crate::error::{InvalidNumTopicsSnafu, Result};
use crate::kv_backend::KvBackendRef;
use crate::wal_options_allocator::selector::{RoundRobinTopicSelector, TopicSelectorRef};
use crate::wal_options_allocator::topic_creator::KafkaTopicCreator;
use crate::wal_options_allocator::topic_manager::KafkaTopicManager;
/// Topic pool for kafka remote wal.
/// Responsible for:
/// 1. Persists topics in kvbackend.
/// 2. Creates topics in kafka.
/// 3. Selects topics for regions.
pub struct KafkaTopicPool {
pub(crate) topics: Vec<String>,
// Manages topics in kvbackend.
topic_manager: KafkaTopicManager,
// Creates topics in kafka.
topic_creator: KafkaTopicCreator,
pub(crate) selector: TopicSelectorRef,
auto_create_topics: bool,
}
impl fmt::Debug for KafkaTopicPool {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
f.debug_struct("KafkaTopicPool")
.field("topics", &self.topics)
.field("auto_create_topics", &self.auto_create_topics)
.finish()
}
}
impl KafkaTopicPool {
pub fn new(
config: &MetasrvKafkaConfig,
kvbackend: KvBackendRef,
topic_creator: KafkaTopicCreator,
) -> Self {
let num_topics = config.kafka_topic.num_topics;
let prefix = config.kafka_topic.topic_name_prefix.clone();
let topics = (0..num_topics)
.map(|i| format!("{}_{}", prefix, i))
.collect();
let selector = match config.kafka_topic.selector_type {
TopicSelectorType::RoundRobin => RoundRobinTopicSelector::with_shuffle(),
};
let topic_manager = KafkaTopicManager::new(kvbackend);
Self {
topics,
topic_manager,
topic_creator,
selector: Arc::new(selector),
auto_create_topics: config.auto_create_topics,
}
}
/// Tries to activate the topic manager when metasrv becomes the leader.
/// First tries to restore persisted topics from the kv backend.
/// If not enough topics retrieved, it will try to contact the Kafka cluster and request creating more topics.
pub async fn activate(&self) -> Result<()> {
if !self.auto_create_topics {
return Ok(());
}
let num_topics = self.topics.len();
ensure!(num_topics > 0, InvalidNumTopicsSnafu { num_topics });
let topics_to_be_created = self
.topic_manager
.get_topics_to_create(&self.topics)
.await?;
if !topics_to_be_created.is_empty() {
self.topic_creator
.prepare_topics(&topics_to_be_created)
.await?;
self.topic_manager.persist_topics(&self.topics).await?;
}
Ok(())
}
/// Selects one topic from the topic pool through the topic selector.
pub fn select(&self) -> Result<&String> {
self.selector.select(&self.topics)
}
/// Selects a batch of topics from the topic pool through the topic selector.
pub fn select_batch(&self, num_topics: usize) -> Result<Vec<&String>> {
(0..num_topics)
.map(|_| self.selector.select(&self.topics))
.collect()
}
}
#[cfg(test)]
mod tests {
use common_wal::config::kafka::common::{KafkaConnectionConfig, KafkaTopicConfig};
use common_wal::test_util::run_test_with_kafka_wal;
use super::*;
use crate::kv_backend::memory::MemoryKvBackend;
use crate::wal_options_allocator::topic_creator::build_kafka_topic_creator;
/// Tests that the topic manager could allocate topics correctly.
#[tokio::test]
async fn test_alloc_topics() {
run_test_with_kafka_wal(|broker_endpoints| {
Box::pin(async {
// Constructs topics that should be created.
let topics = (0..256)
.map(|i| format!("test_alloc_topics_{}_{}", i, uuid::Uuid::new_v4()))
.collect::<Vec<_>>();
// Creates a topic manager.
let kafka_topic = KafkaTopicConfig {
replication_factor: broker_endpoints.len() as i16,
..Default::default()
};
let config = MetasrvKafkaConfig {
connection: KafkaConnectionConfig {
broker_endpoints,
..Default::default()
},
kafka_topic,
..Default::default()
};
let kv_backend = Arc::new(MemoryKvBackend::new()) as KvBackendRef;
let topic_creator = build_kafka_topic_creator(&config).await.unwrap();
let mut topic_pool = KafkaTopicPool::new(&config, kv_backend, topic_creator);
// Replaces the default topic pool with the constructed topics.
topic_pool.topics.clone_from(&topics);
// Replaces the default selector with a round-robin selector without shuffled.
topic_pool.selector = Arc::new(RoundRobinTopicSelector::default());
topic_pool.activate().await.unwrap();
// Selects exactly the number of `num_topics` topics one by one.
let got = (0..topics.len())
.map(|_| topic_pool.select().unwrap())
.cloned()
.collect::<Vec<_>>();
assert_eq!(got, topics);
// Selects exactly the number of `num_topics` topics in a batching manner.
let got = topic_pool
.select_batch(topics.len())
.unwrap()
.into_iter()
.map(ToString::to_string)
.collect::<Vec<_>>();
assert_eq!(got, topics);
// Selects more than the number of `num_topics` topics.
let got = topic_pool
.select_batch(2 * topics.len())
.unwrap()
.into_iter()
.map(ToString::to_string)
.collect::<Vec<_>>();
let expected = vec![topics.clone(); 2]
.into_iter()
.flatten()
.collect::<Vec<_>>();
assert_eq!(got, expected);
})
})
.await;
}
}

View File

@@ -237,6 +237,15 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Failed to register UDF: {}", name))]
RegisterUdf {
name: String,
#[snafu(source)]
error: DataFusionError,
#[snafu(implicit)]
location: Location,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -268,7 +277,8 @@ impl ErrorExt for Error {
Error::MissingTableMutationHandler { .. }
| Error::MissingProcedureServiceHandler { .. }
| Error::MissingFlowServiceHandler { .. } => StatusCode::Unexpected,
| Error::MissingFlowServiceHandler { .. }
| Error::RegisterUdf { .. } => StatusCode::Unexpected,
Error::UnsupportedInputDataType { .. }
| Error::TypeCast { .. }

View File

@@ -92,10 +92,7 @@ pub fn rename_logical_plan_columns(
};
let (qualifier_rename, field_rename) =
match plan.schema().qualified_field_from_column(&old_column) {
Ok(qualifier_and_field) => qualifier_and_field,
Err(err) => return Err(err),
};
plan.schema().qualified_field_from_column(&old_column)?;
for (qualifier, field) in plan.schema().iter() {
if qualifier.eq(&qualifier_rename) && field.as_ref() == field_rename {
@@ -214,8 +211,7 @@ mod tests {
];
// call the function
let result = (df_udf.fun())(&args).unwrap();
let result = df_udf.invoke_batch(&args, 4).unwrap();
match result {
DfColumnarValue::Array(arr) => {
let arr = arr.as_any().downcast_ref::<BooleanArray>().unwrap();
@@ -308,7 +304,7 @@ mod tests {
Projection: person.id AS a, person.name AS b
Filter: person.id > Int32(500)
TableScan: person"#,
format!("\n{:?}", new_plan)
format!("\n{}", new_plan)
);
}

View File

@@ -39,6 +39,10 @@ pub enum TypeSignature {
Any(usize),
/// One of a list of signatures
OneOf(Vec<TypeSignature>),
/// Zero argument
/// This is the new signature for functions with zero arguments
/// TODO(discord9): make all other usize nonzero usize
NullAry,
}
///The Signature of a function defines its supported input types as well as its volatility.
@@ -112,6 +116,13 @@ impl Signature {
volatility,
}
}
pub fn nullary(volatility: Volatility) -> Self {
Signature {
type_signature: TypeSignature::NullAry,
volatility,
}
}
}
/// Conversations between datafusion signature and our signature
@@ -122,16 +133,25 @@ impl From<TypeSignature> for DfTypeSignature {
DfTypeSignature::Variadic(concrete_types_to_arrow_types(types))
}
TypeSignature::Uniform(n, types) => {
if n == 0 {
return DfTypeSignature::NullAry;
}
DfTypeSignature::Uniform(n, concrete_types_to_arrow_types(types))
}
TypeSignature::Exact(types) => {
DfTypeSignature::Exact(concrete_types_to_arrow_types(types))
}
TypeSignature::Any(n) => DfTypeSignature::Any(n),
TypeSignature::Any(n) => {
if n == 0 {
return DfTypeSignature::NullAry;
}
DfTypeSignature::Any(n)
}
TypeSignature::OneOf(ts) => {
DfTypeSignature::OneOf(ts.into_iter().map(Into::into).collect())
}
TypeSignature::VariadicAny => DfTypeSignature::VariadicAny,
TypeSignature::NullAry => DfTypeSignature::NullAry,
}
}
}

View File

@@ -119,6 +119,10 @@ impl ExecutionPlan for StreamScanAdapter {
.ok_or_else(|| DataFusionError::Execution("Stream already exhausted".to_string()))?;
Ok(Box::pin(DfRecordBatchStreamAdapter::new(stream)))
}
fn name(&self) -> &str {
"StreamScanAdapter"
}
}
#[cfg(test)]

View File

@@ -17,6 +17,7 @@ use std::slice;
use std::sync::Arc;
use datafusion::arrow::util::pretty::pretty_format_batches;
use datatypes::arrow::array::RecordBatchOptions;
use datatypes::prelude::DataType;
use datatypes::schema::SchemaRef;
use datatypes::value::Value;
@@ -73,6 +74,21 @@ impl RecordBatch {
}
}
/// Create an empty [`RecordBatch`] from `schema` with `num_rows`.
pub fn new_with_count(schema: SchemaRef, num_rows: usize) -> Result<Self> {
let df_record_batch = DfRecordBatch::try_new_with_options(
schema.arrow_schema().clone(),
vec![],
&RecordBatchOptions::new().with_row_count(Some(num_rows)),
)
.context(error::NewDfRecordBatchSnafu)?;
Ok(RecordBatch {
schema,
columns: vec![],
df_record_batch,
})
}
pub fn try_project(&self, indices: &[usize]) -> Result<Self> {
let schema = Arc::new(self.schema.try_project(indices).context(DataTypesSnafu)?);
let mut columns = Vec::with_capacity(indices.len());

View File

@@ -142,16 +142,25 @@ impl Builder {
impl BuilderBuild<DefaultRuntime> for Builder {
fn build(&mut self) -> Result<DefaultRuntime> {
let runtime = self
let builder = self
.builder
.enable_all()
.thread_name(self.thread_name.clone())
.on_thread_start(on_thread_start(self.thread_name.clone()))
.on_thread_stop(on_thread_stop(self.thread_name.clone()))
.on_thread_park(on_thread_park(self.thread_name.clone()))
.on_thread_unpark(on_thread_unpark(self.thread_name.clone()))
.build()
.context(BuildRuntimeSnafu)?;
.on_thread_unpark(on_thread_unpark(self.thread_name.clone()));
let runtime = if cfg!(debug_assertions) {
// Set the stack size to 8MB for the thread so it wouldn't overflow on large stack usage in debug mode
// This is necessary to avoid stack overflow while running sqlness.
// https://github.com/rust-lang/rust/issues/34283
builder
.thread_stack_size(8 * 1024 * 1024)
.build()
.context(BuildRuntimeSnafu)?
} else {
builder.build().context(BuildRuntimeSnafu)?
};
let name = self.runtime_name.clone();
let handle = runtime.handle().clone();

View File

@@ -16,10 +16,10 @@ use std::sync::Arc;
use async_trait::async_trait;
use bytes::{Buf, Bytes, BytesMut};
use datafusion::catalog::CatalogProviderList;
use datafusion::execution::context::SessionState;
use datafusion::execution::runtime_env::RuntimeEnv;
use datafusion::prelude::{SessionConfig, SessionContext};
use datafusion::execution::SessionStateBuilder;
use datafusion::prelude::SessionConfig;
use datafusion_expr::LogicalPlan;
use datafusion_substrait::logical_plan::consumer::from_substrait_plan;
use datafusion_substrait::logical_plan::producer::to_substrait_plan;
@@ -41,13 +41,10 @@ impl SubstraitPlan for DFLogicalSubstraitConvertor {
async fn decode<B: Buf + Send>(
&self,
message: B,
catalog_list: Arc<dyn CatalogProviderList>,
state: SessionState,
) -> Result<Self::Plan, Self::Error> {
let mut context = SessionContext::new_with_state(state);
context.register_catalog_list(catalog_list);
let plan = Plan::decode(message).context(DecodeRelSnafu)?;
let df_plan = from_substrait_plan(&context, &plan)
let df_plan = from_substrait_plan(&state, &plan)
.await
.context(DecodeDfPlanSnafu)?;
Ok(df_plan)
@@ -72,11 +69,12 @@ impl DFLogicalSubstraitConvertor {
plan: &LogicalPlan,
serializer: impl SerializerRegistry + 'static,
) -> Result<Box<Plan>, Error> {
let session_state =
SessionState::new_with_config_rt(SessionConfig::new(), Arc::new(RuntimeEnv::default()))
.with_serializer_registry(Arc::new(serializer));
let context = SessionContext::new_with_state(session_state);
to_substrait_plan(plan, &context).context(EncodeDfPlanSnafu)
let state = SessionStateBuilder::new()
.with_config(SessionConfig::new())
.with_runtime_env(Arc::new(RuntimeEnv::default()))
.with_default_features()
.with_serializer_registry(Arc::new(serializer))
.build();
to_substrait_plan(plan, &state).context(EncodeDfPlanSnafu)
}
}

View File

@@ -22,6 +22,7 @@ use promql::extension_plan::{
EmptyMetric, InstantManipulate, RangeManipulate, ScalarCalculate, SeriesDivide, SeriesNormalize,
};
#[derive(Debug)]
pub struct ExtensionSerializer;
impl SerializerRegistry for ExtensionSerializer {

Some files were not shown because too many files have changed in this diff Show More