Compare commits

...

44 Commits

Author SHA1 Message Date
Yingwen
3f6cbc378d ci: Disable arm64 release temporarily (#1141) 2023-03-08 19:13:00 +08:00
Yingwen
9619940569 ci: Allow error when building release for non-x86 platform (#1140) 2023-03-08 18:12:06 +08:00
Weny Xu
ed8252157a chore: code styling (#1137)
Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
2023-03-08 08:10:12 +00:00
Ruihang Xia
3e0fb7e75b test: ignore two test cases due to arrow-datafusion#5513 (#1138)
* test: ignore two test cases due to arrow-datafusion#5513

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* clean up

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2023-03-08 07:29:34 +00:00
Bohan Wu
ba3ce436df refactor(SST): UUID as id in FileMeta (#1116)
* feat(SST): use a newType named FileId for FileMeta

* chore: rename some functions

* fix: compatible for previous FileMeta format

* fix: alias for file_id when getting deserialized
2023-03-08 14:27:20 +08:00
Eugene Tolbakov
b31a6cb506 refactor: replace tempdir with tempfile (#1123)
* refactor: replace tempdir with tempfile

* refactor(query): move tempfile dependency under the workspace's Cargo.toml

* refactor(tempfile): create common-test-util

* refactor(tempfile): fix toml format

* refactor(tempfile): remove tempfile out of dependencies

* refactor(tempfile): fix incorrect toml
2023-03-08 11:15:56 +08:00
SSebo
95090592f0 feat: mysql prepare replacing sql placeholder to param (#1086)
* feat: mysql prepare by replace ? in sql to param

* chore: mysql prepare statment support time param

* chore: prepare test more types

* chore: add TODO
2023-03-08 11:02:29 +08:00
Ruihang Xia
3a527c0fd5 feat: impl proc macro range_fn and some aggr_over_time functions (#1072)
* impl range_fn proc macro

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* impl some aggr_over_time fn

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* impl present_over_time and absent_over_time

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* accomplish planner, and correct type cast

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* clean up

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* document the macro

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix styles

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update irate/idelta test

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add test cases

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2023-03-07 23:39:45 +08:00
elijah
819b60ca13 feat(datatypes): implement VectorOp::take (#1115)
* feat: add take index method for VectorOp

* chore: make clippy happy

* chore: make clippy happy

* chore: improve the code

* chore: improve the code

* chore: add take null test

* chore: fix clippy
2023-03-07 19:27:33 +08:00
Weny Xu
7169fe2989 feat: implement Copy From (#1064) 2023-03-07 17:54:11 +08:00
Zheming Li
b70672be77 feat: track disk usage of regions (#1125)
* feat: track disk usage of regions

Signed-off-by: Zheming Li <nkdudu@126.com>

* calculate disk usage when call

* add default on file meta

---------

Signed-off-by: Zheming Li <nkdudu@126.com>
2023-03-07 17:13:12 +08:00
Lei, HUANG
a4c01f4a3a feat: memory profiling (#1124)
* feat: use jemalloc as default allocator

* feat: add feature for mem-prof

* feat: add errors

* make common-mem-prof optional dep

* fix: toml format

* doc: add profile doc

* fix: typo
2023-03-07 17:12:51 +08:00
Weny Xu
bd98a26cca chore: bump greptime-proto to latest(ad01872) (#1102) 2023-03-07 10:52:42 +08:00
shuiyisong
1b4236d698 refactor: use split instead of serde_urlencoded in http auth (#1110)
* refactor: change from urlencoded to regex

* refactor: change from urlencoded to regex

* chore: add unit test

* chore: update comment

* chore: remove local benchmark test

* chore: minor fix

* chore: remove unused dep
2023-03-07 10:51:47 +08:00
Lei, HUANG
e8cc9b4b29 test: add manifest compatibility tests (#1130)
* tests: add manifest compatibility tests

* fix: clippy
2023-03-06 19:31:54 +08:00
discord9
379f581780 test: add Integrated Test for Coprocessor& fix minor bugs (#1122)
* feat: cache `Runtime`

* fix: coprstream schema not set

* test: integrated tests for Coprocessor

* fix: UDF fixed

* style: remove unused import

* chore: remove more unused import

* feat: `filter`, (r)floordiv for Vector

* chore: CR advices

* feat: auto convert to `lit`

* chore: fix typo

* feat: from&to `pyarrow.array`

* feat: allow `pyarrow.array` as args to builtins

* chore: cargo fmt

* test: CI add `pyarrow`

* test: install Python&PyArrow in CI

* test: not cache depend for now

* chore: CR advices

* test: fix name

* style: rename
2023-03-06 19:20:59 +08:00
fys
ff6cfe8e70 refactor: move the batch_get to KvStore trait (#1029)
* move batch_get from KvStoreExt to KvStore

* add some unit tests

* add some unit test

* add some unit tests

* expose batch_get grpc method
2023-03-06 17:35:43 +08:00
Igor Morozov
5a397917c0 docs(contributingmd): add run tests commands (#1129)
* docs(contributingmd): add run tests commands

* docs(contributingmd): add link to nextest website

Co-authored-by: dennis zhuang <killme2008@gmail.com>

---------

Co-authored-by: dennis zhuang <killme2008@gmail.com>
2023-03-06 15:54:16 +08:00
fys
559880cb84 fix: can not find catalog when create table (#1118)
* fix: get catalog by name in RemoteCatalogManager

* cr

* cr

* cr

* fix: ut failed
2023-03-06 14:44:40 +08:00
Ruihang Xia
b76b27f3bf refactor: try to remove unnecessary tests in error mod (#750)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2023-03-06 12:31:30 +08:00
yuanbohan
d4e0dc3685 feat: specify prom server start addr (#1111)
* feat: specify promql server start addr

* refactor: rename promql to prom in Prometheus API server scenario
2023-03-06 11:07:21 +08:00
Eugene Tolbakov
b022556b79 fix: apply ttl and write_buffer_size options when a table is created via procedure (#1117)
* fix: apply ttl and write_buffer_size options when a table is created via procedure

* fix: address code review suggestion

* fix: use borrowing of table_options correctly
2023-03-05 19:37:23 +08:00
shuiyisong
bd065ea6e8 fix: remove incorrect continue (#1114) 2023-03-02 19:52:17 +08:00
yuanbohan
9a87f5edf8 fix(grpc): support timestamp precision (#1113) 2023-03-02 17:33:59 +08:00
Weny Xu
e851b6d019 feat: implement Copy From parser (#1092)
* feat: implement Copy From parser

* Apply suggestions from code review

Co-authored-by: Yingwen <realevenyag@gmail.com>

---------

Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
2023-03-02 14:03:13 +08:00
Ruihang Xia
e7b92f24e8 feat: impl EmptyMetric plan and time() function (#1100)
* impl EmptyMetric plan

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add test cases

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* impl planner part

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* adapt new datafusion changes

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix typo

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2023-03-02 03:15:55 +00:00
Igor Morozov
4b8db408cf style(contributingmd): fix markdown issues and typos (#1107)
* style(contributingmd): fix markdown issues and typos

* style(contributingmd): remove code blocks in lists
2023-03-01 20:00:36 +08:00
Yingwen
98659899c0 refactor: Move mito engine tests to a separate file (#1104)
* refactor(mito): Move tests to a separate file

* chore(query): Remove empty mod function
2023-03-01 11:46:39 +00:00
Ruihang Xia
b1311801da ci: update breaking-change labeler (#1109)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2023-03-01 19:24:21 +08:00
Yingwen
f1b65d9b77 test: fix datanode::test_read_from_config_file (#1106)
* test: Fix datanode::test_read_from_config_file

* test: frontend and metasrv don't read example toml file
2023-03-01 18:31:40 +08:00
Ruihang Xia
d5a2a26916 chore(deps): bump sqlness to v0.4 (#1101)
deps: bump sqlness to v0.4

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2023-03-01 17:27:16 +08:00
Ning Sun
8e7e68708f docs: correct readme format (#1105)
* docs: correct readme format

* ci: fix config name
2023-03-01 16:59:11 +08:00
Ruihang Xia
9c1118b06d ci: adjust title labeler's rule (#1079)
* ci: adjust title labeler's rule

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* Apply suggestions from code review

Co-authored-by: Yingwen <realevenyag@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
2023-03-01 15:16:21 +08:00
Yingwen
3fb93efbd0 docs: Document fields in the config examples (#1098)
* docs: Add comments to standalone config example

* docs: Add comments to datanode config example

* docs: Add comments to frontend config example

* docs: Add comments to meta-srv config example

* docs: Use "GB" instead of "GiB"

* docs: Add link to the selector doc

* docs: Fix grammar
2023-03-01 15:14:08 +08:00
Yingwen
3fd9c2f144 feat: Store error in procedure state (#1062)
* docs: Change comment position

* refactor(procedure): Store error in ProcedureState

* test: Mock instance with procedure enabled

* feat: Add wait method to wait for procedure

* test(datanode): Test create table by procedure

* chore: Fix clippy
2023-03-01 14:37:50 +08:00
Ning Sun
75e48c5f20 ci: fix apidoc generation 2023-03-01 14:09:47 +08:00
Ning Sun
d402f83442 ci: generate apidocs when pushing to default branch (#1093)
* ci: generate apidocs when pushing to default branch

* ci: require clippy before running tests

* fix: resolve new clippy warnings on primitive slice

* fix: resolve more clippy warnings

* Update .github/workflows/apidoc.yml

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

* ci: add an index html to redirect

---------

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
2023-03-01 13:18:26 +08:00
discord9
c5c6494e0b feat: add PyO3(Hence CPython as a Optional Backend (#976)
* refactor: ffi_types

* style: fmt

* refactor: use `String` for return when possible

* todo: vector_impl

* feat: pyobj_try_typed_val

* refactor: more backend indep function

* feat: +-*/ magic methods

* refactor: copr

* style: fmt

* feat: add paired tests

* refactor: more

* refactor: move inside `python` folder

* refactor: all but test code

* feat: builtins for PyO3

* chore: add licenses

* chore: remove unused&add todos

* refactor: remove old files

* chore: mark unused

* chore: fmt

* chore: license

* feat: query in PyO3

* test: paired testcases for rspy&pyo3

* feat: PyDataFrame(Untested)

* feat: some allow_threads

* style: fmt

* style: add license

* feat: rebase manually of #962

* feat: more `allow_threads`

* chore: typo

* chore: remove some `TODO`

* test: allow margin of epsilon

* chore: code review advices

* chore: more CR adjust

* chore: more adjust

* feat: kwargs&its test

* chore: remove some `dbg!`

* chore: allow params

* fix: put `dataframe` into scope

* chore: newline

* fix: adjust after rebase

* fix: test serde skip attr

* style: taplo

* feat: add `pyo3_backend` feature

* doc: update CI&readme
2023-03-01 10:45:55 +08:00
shuiyisong
dc50095af3 fix: use catalog from connection (#1099)
* fix: using schema instead of full database

* fix: using schema instead of full database

* fix: using schema instead of full database

* chore: add debug log

* chore: remove debug log

* chore: remove debug log

* chore: fix cr
2023-03-01 10:34:57 +08:00
LFC
8cd69f441e feat: REPL issues logical plan to DB (#1097) 2023-02-28 16:59:48 +08:00
Weny Xu
f52fc9b7d4 fix: fix panic when the root is not specified (#1089) 2023-02-28 10:54:52 +08:00
shuiyisong
50d2685365 fix: fix catalog parsing issue (#1091)
fix: try fix catalog parsing issue
2023-02-27 22:51:49 +08:00
LFC
11d45e2918 refactor: upgrade DataFusion, Arrow and Sqlparser (#1074)
* refactor: upgrade DataFusion, Arrow and Sqlparser

* fix: resolve PR comments
2023-02-27 22:20:08 +08:00
shuiyisong
30287e7e41 fix: continue if parsing err catalog (#1090)
* fix: continue if parsing err catalog

* fix: change from warn to error
2023-02-27 11:28:45 +00:00
323 changed files with 12049 additions and 4755 deletions

View File

@@ -0,0 +1,13 @@
{
"LABEL": {
"name": "breaking change",
"color": "D93F0B"
},
"CHECKS": {
"regexp": "^(?:(?!!:).)*$",
"ignoreLabels": [
"ignore-title"
],
"alwaysPassCI": true
}
}

View File

@@ -1,10 +1,12 @@
{
"LABEL": {
"name": "Invalid PR Title",
"color": "B60205"
},
"CHECKS": {
"regexp": "^(feat|fix|test|refactor|chore|style|docs|perf|build|ci|revert)(\\(.*\\))?:.*",
"ignoreLabels" : ["ignore-title"]
}
"LABEL": {
"name": "Invalid PR Title",
"color": "B60205"
},
"CHECKS": {
"regexp": "^(feat|fix|test|refactor|chore|style|docs|perf|build|ci|revert)(\\(.*\\))?\\!?:.*",
"ignoreLabels": [
"ignore-title"
]
}
}

42
.github/workflows/apidoc.yml vendored Normal file
View File

@@ -0,0 +1,42 @@
on:
push:
branches:
- develop
paths-ignore:
- 'docs/**'
- 'config/**'
- '**.md'
- '.dockerignore'
- 'docker/**'
- '.gitignore'
name: Build API docs
env:
RUST_TOOLCHAIN: nightly-2023-02-26
jobs:
apidoc:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: arduino/setup-protoc@v1
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ env.RUST_TOOLCHAIN }}
- run: cargo doc --workspace --no-deps --document-private-items
- run: |
cat <<EOF > target/doc/index.html
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="refresh" content="0; url='greptime/'" />
</head>
<body></body></html>
EOF
- name: Publish dist directory
uses: JamesIves/github-pages-deploy-action@v4
with:
folder: target/doc

View File

@@ -24,7 +24,7 @@ on:
name: CI
env:
RUST_TOOLCHAIN: nightly-2023-02-14
RUST_TOOLCHAIN: nightly-2023-02-26
jobs:
typos:
@@ -116,6 +116,7 @@ jobs:
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest-8-cores
timeout-minutes: 60
needs: [clippy]
steps:
- uses: actions/checkout@v3
- uses: arduino/setup-protoc@v1
@@ -131,7 +132,7 @@ jobs:
ETCD_VER=v3.5.7
DOWNLOAD_URL=https://github.com/etcd-io/etcd/releases/download
curl -L ${DOWNLOAD_URL}/${ETCD_VER}/etcd-${ETCD_VER}-linux-amd64.tar.gz -o /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz
mkdir -p /tmp/etcd-download
mkdir -p /tmp/etcd-download
tar xzvf /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz -C /tmp/etcd-download --strip-components=1
rm -f /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz
@@ -188,6 +189,7 @@ jobs:
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest-8-cores
timeout-minutes: 60
needs: [clippy]
steps:
- uses: actions/checkout@v3
- uses: arduino/setup-protoc@v1
@@ -205,10 +207,16 @@ jobs:
uses: Swatinem/rust-cache@v2
- name: Install latest nextest release
uses: taiki-e/install-action@nextest
- name: Install Python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install PyArrow Package
run: pip install pyarrow
- name: Install cargo-llvm-cov
uses: taiki-e/install-action@cargo-llvm-cov
- name: Collect coverage data
run: cargo llvm-cov nextest --workspace --lcov --output-path lcov.info
run: cargo llvm-cov nextest --workspace --lcov --output-path lcov.info -F pyo3_backend
env:
CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=lld"
RUST_BACKTRACE: 1

View File

@@ -18,3 +18,12 @@ jobs:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
pass_on_octokit_error: false
configuration_path: ".github/pr-title-checker-config.json"
breaking:
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- uses: thehanimo/pr-title-checker@v1.3.4
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
pass_on_octokit_error: false
configuration_path: ".github/pr-title-breaking-change-label-config.json"

View File

@@ -10,7 +10,7 @@ on:
name: Release
env:
RUST_TOOLCHAIN: nightly-2023-02-14
RUST_TOOLCHAIN: nightly-2023-02-26
# FIXME(zyy17): Would be better to use `gh release list -L 1 | cut -f 3` to get the latest release version tag, but for a long time, we will stay at 'v0.1.0-alpha-*'.
SCHEDULED_BUILD_VERSION_PREFIX: v0.1.0-alpha
@@ -30,15 +30,15 @@ jobs:
- arch: x86_64-unknown-linux-gnu
os: ubuntu-2004-16-cores
file: greptime-linux-amd64
- arch: aarch64-unknown-linux-gnu
os: ubuntu-2004-16-cores
file: greptime-linux-arm64
- arch: aarch64-apple-darwin
os: macos-latest
file: greptime-darwin-arm64
- arch: x86_64-apple-darwin
os: macos-latest
file: greptime-darwin-amd64
# - arch: aarch64-unknown-linux-gnu
# os: ubuntu-2004-16-cores
# file: greptime-linux-arm64
# - arch: aarch64-apple-darwin
# os: macos-latest
# file: greptime-darwin-arm64
# - arch: x86_64-apple-darwin
# os: macos-latest
# file: greptime-darwin-amd64
runs-on: ${{ matrix.os }}
if: github.repository == 'GreptimeTeam/greptimedb'
steps:
@@ -76,10 +76,10 @@ jobs:
ETCD_VER=v3.5.7
DOWNLOAD_URL=https://github.com/etcd-io/etcd/releases/download
curl -L ${DOWNLOAD_URL}/${ETCD_VER}/etcd-${ETCD_VER}-linux-amd64.tar.gz -o /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz
mkdir -p /tmp/etcd-download
tar xzvf /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz -C /tmp/etcd-download --strip-components=1
mkdir -p /tmp/etcd-download
tar xzvf /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz -C /tmp/etcd-download --strip-components=1
rm -f /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz
sudo cp -a /tmp/etcd-download/etcd* /usr/local/bin/
nohup etcd >/tmp/etcd.log 2>&1 &
@@ -193,17 +193,17 @@ jobs:
tar xvf greptime-linux-amd64.tgz
rm greptime-linux-amd64.tgz
- name: Download arm64 binary
uses: actions/download-artifact@v3
with:
name: greptime-linux-arm64
path: arm64
# - name: Download arm64 binary
# uses: actions/download-artifact@v3
# with:
# name: greptime-linux-arm64
# path: arm64
- name: Unzip the arm64 artifacts
run: |
cd arm64
tar xvf greptime-linux-arm64.tgz
rm greptime-linux-arm64.tgz
# - name: Unzip the arm64 artifacts
# run: |
# cd arm64
# tar xvf greptime-linux-arm64.tgz
# rm greptime-linux-arm64.tgz
- name: Login to UCloud Container Registry
uses: docker/login-action@v2
@@ -245,7 +245,8 @@ jobs:
context: .
file: ./docker/ci/Dockerfile
push: true
platforms: linux/amd64,linux/arm64
# platforms: linux/amd64,linux/arm64
platforms: linux/amd64
tags: |
greptime/greptimedb:latest
greptime/greptimedb:${{ env.IMAGE_TAG }}

View File

@@ -1,4 +1,4 @@
# Welcome!
# Welcome 👋
Thanks a lot for considering contributing to GreptimeDB. We believe people like you would make GreptimeDB a great product. We intend to build a community where individuals can have open talks, show respect for one another, and speak with true ❤️. Meanwhile, we are to keep transparency and make your effort count here.
@@ -50,34 +50,33 @@ GreptimeDB uses the [Apache 2.0 license](https://github.com/GreptimeTeam/greptim
- To ensure that community is free and confident in its ability to use your contributions, please sign the Contributor License Agreement (CLA) which will be incorporated in the pull request process.
- Make sure all your codes are formatted and follow the [coding style](https://pingcap.github.io/style-guide/rust/).
- Make sure all unit tests are passed.
- Make sure all unit tests are passed (using `cargo test --workspace` or [nextest](https://nexte.st/index.html) `cargo nextest run`).
- Make sure all clippy warnings are fixed (you can check it locally by running `cargo clippy --workspace --all-targets -- -D warnings -D clippy::print_stdout -D clippy::print_stderr`).
#### `pre-commit` Hooks
You could setup the [`pre-commit`](https://pre-commit.com/#plugins) hooks to run these checks on every commit automatically.
1. Install `pre-commit`
```
$ pip install pre-commit
```
or
```
$ brew install pre-commit
```
pip install pre-commit
or
brew install pre-commit
2. Install the `pre-commit` hooks
```
$ pre-commit install
pre-commit installed at .git/hooks/pre-commit
$ pre-commit install --hook-type commit-msg
pre-commit installed at .git/hooks/commit-msg
$ pre-commit install
pre-commit installed at .git/hooks/pre-commit
$ pre-commit install --hook-type pre-push
pre-commit installed at .git/hooks/pre-pus
```
$ pre-commit install --hook-type commit-msg
pre-commit installed at .git/hooks/commit-msg
now `pre-commit` will run automatically on `git commit`.
$ pre-commit install --hook-type pre-push
pre-commit installed at .git/hooks/pre-push
Now, `pre-commit` will run automatically on `git commit`.
### Title
@@ -102,10 +101,12 @@ of what you were trying to do and what went wrong. You can also reach for help i
## Community
The core team will be thrilled if you participate in any way you like. When you are stuck, try ask for help by filing an issue, with a detailed description of what you were trying to do and what went wrong. If you have any questions or if you would like to get involved in our community, please check out:
- [GreptimeDB Community Slack](https://greptime.com/slack)
- [GreptimeDB Github Discussions](https://github.com/GreptimeTeam/greptimedb/discussions)
Also, see some extra GreptimeDB content:
- [GreptimeDB Docs](https://greptime.com/docs)
- [Learn GreptimeDB](https://greptime.com/products/db)
- [Greptime Inc. Website](https://greptime.com)

550
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -12,12 +12,14 @@ members = [
"src/common/function-macro",
"src/common/grpc",
"src/common/grpc-expr",
"src/common/mem-prof",
"src/common/procedure",
"src/common/query",
"src/common/recordbatch",
"src/common/runtime",
"src/common/substrait",
"src/common/telemetry",
"src/common/test-util",
"src/common/time",
"src/datanode",
"src/datatypes",
@@ -48,29 +50,29 @@ edition = "2021"
license = "Apache-2.0"
[workspace.dependencies]
arrow = "29.0"
arrow-array = "29.0"
arrow-flight = "29.0"
arrow-schema = { version = "29.0", features = ["serde"] }
arrow = { version = "33.0", features = ["pyarrow"] }
arrow-array = "33.0"
arrow-flight = "33.0"
arrow-schema = { version = "33.0", features = ["serde"] }
async-stream = "0.3"
async-trait = "0.1"
chrono = { version = "0.4", features = ["serde"] }
# TODO(LFC): Use released Datafusion when it officially dependent on Arrow 29.0
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "4917235a398ae20145c87d20984e6367dc1a0c1e" }
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "4917235a398ae20145c87d20984e6367dc1a0c1e" }
datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "4917235a398ae20145c87d20984e6367dc1a0c1e" }
datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "4917235a398ae20145c87d20984e6367dc1a0c1e" }
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "4917235a398ae20145c87d20984e6367dc1a0c1e" }
datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "4917235a398ae20145c87d20984e6367dc1a0c1e" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "fad360df0132a2fcb264a7c07b2b02f0b1dfc644" }
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "fad360df0132a2fcb264a7c07b2b02f0b1dfc644" }
datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "fad360df0132a2fcb264a7c07b2b02f0b1dfc644" }
datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "fad360df0132a2fcb264a7c07b2b02f0b1dfc644" }
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "fad360df0132a2fcb264a7c07b2b02f0b1dfc644" }
datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "fad360df0132a2fcb264a7c07b2b02f0b1dfc644" }
futures = "0.3"
futures-util = "0.3"
parquet = "29.0"
parquet = "33.0"
paste = "1.0"
prost = "0.11"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
snafu = { version = "0.7", features = ["backtraces"] }
sqlparser = "0.28"
sqlparser = "0.30"
tempfile = "3"
tokio = { version = "1.24.2", features = ["full"] }
tokio-util = "0.7"
tonic = { version = "0.8", features = ["tls"] }

View File

@@ -61,6 +61,12 @@ To compile GreptimeDB from source, you'll need:
find an installation instructions [here](https://grpc.io/docs/protoc-installation/).
**Note that `protoc` version needs to be >= 3.15** because we have used the `optional`
keyword. You can check it with `protoc --version`.
- python3-dev or python3-devel(Optional, only needed if you want to run scripts
in cpython): this install a Python shared library required for running python
scripting engine(In CPython Mode). This is available as `python3-dev` on
ubuntu, you can install it with `sudo apt install python3-dev`, or
`python3-devel` on RPM based distributions (e.g. Fedora, Red Hat, SuSE). Mac's
`Python3` package should have this shared library by default.
#### Build with Docker

View File

@@ -208,6 +208,7 @@ fn build_values(column: &ArrayRef) -> Values {
| DataType::Dictionary(_, _)
| DataType::Decimal128(_, _)
| DataType::Decimal256(_, _)
| DataType::RunEndEncoded(_, _)
| DataType::Map(_, _) => todo!(),
}
}

View File

@@ -1,35 +1,52 @@
node_id = 42
mode = 'distributed'
rpc_addr = '127.0.0.1:3001'
rpc_hostname = '127.0.0.1'
rpc_runtime_size = 8
mysql_addr = '127.0.0.1:4406'
mysql_runtime_size = 4
# Node running mode, see `standalone.example.toml`.
mode = "distributed"
# Whether to use in-memory catalog, see `standalone.example.toml`.
enable_memory_catalog = false
# The datanode identifier, should be unique.
node_id = 42
# gRPC server address, "127.0.0.1:3001" by default.
rpc_addr = "127.0.0.1:3001"
# Hostname of this node.
rpc_hostname = "127.0.0.1"
# The number of gRPC server worker threads, 8 by default.
rpc_runtime_size = 8
# MySQL server address, "127.0.0.1:4406" by default.
mysql_addr = "127.0.0.1:4406"
# The number of MySQL server worker threads, 2 by default.
mysql_runtime_size = 2
# Metasrv client options.
[meta_client_options]
# Metasrv address list.
metasrv_addrs = ["127.0.0.1:3002"]
# Operation timeout in milliseconds, 3000 by default.
timeout_millis = 3000
# Connect server timeout in milliseconds, 5000 by default.
connect_timeout_millis = 5000
# `TCP_NODELAY` option for accepted connections, true by default.
tcp_nodelay = true
# WAL options, see `standalone.example.toml`.
[wal]
dir = "/tmp/greptimedb/wal"
file_size = '1GB'
purge_interval = '10m'
purge_threshold = '50GB'
file_size = "1GB"
purge_threshold = "50GB"
purge_interval = "10m"
read_batch_size = 128
sync_write = false
# Storage options, see `standalone.example.toml`.
[storage]
type = 'File'
data_dir = '/tmp/greptimedb/data/'
[meta_client_options]
metasrv_addrs = ['127.0.0.1:3002']
timeout_millis = 3000
connect_timeout_millis = 5000
tcp_nodelay = false
type = "File"
data_dir = "/tmp/greptimedb/data/"
# Compaction options, see `standalone.example.toml`.
[compaction]
max_inflight_tasks = 4
max_files_in_level0 = 16
max_files_in_level0 = 8
max_purge_tasks = 32
[procedure.store]
type = 'File'
data_dir = '/tmp/greptimedb/procedure/'
# Procedure storage options, see `standalone.example.toml`.
# [procedure.store]
# type = 'File'
# data_dir = '/tmp/greptimedb/procedure/'

View File

@@ -1,12 +1,58 @@
mode = 'distributed'
datanode_rpc_addr = '127.0.0.1:3001'
# Node running mode, see `standalone.example.toml`.
mode = "distributed"
# HTTP server options, see `standalone.example.toml`.
[http_options]
addr = '127.0.0.1:4000'
addr = "127.0.0.1:4000"
timeout = "30s"
# gRPC server options, see `standalone.example.toml`.
[grpc_options]
addr = "127.0.0.1:4001"
runtime_size = 8
# MySQL server options, see `standalone.example.toml`.
[mysql_options]
addr = "127.0.0.1:4002"
runtime_size = 2
# MySQL server TLS options, see `standalone.example.toml`.
[mysql_options.tls]
mode = "disable"
cert_path = ""
key_path = ""
# PostgresSQL server options, see `standalone.example.toml`.
[postgres_options]
addr = "127.0.0.1:4003"
runtime_size = 2
# PostgresSQL server TLS options, see `standalone.example.toml`.
[postgres_options.tls]
mode = "disable"
cert_path = ""
key_path = ""
# OpenTSDB protocol options, see `standalone.example.toml`.
[opentsdb_options]
addr = "127.0.0.1:4242"
runtime_size = 2
# InfluxDB protocol options, see `standalone.example.toml`.
[influxdb_options]
enable = true
# Prometheus protocol options, see `standalone.example.toml`.
[prometheus_options]
enable = true
# Prometheus protocol options, see `standalone.example.toml`.
[prom_options]
addr = "127.0.0.1:4004"
# Metasrv client options, see `datanode.example.toml`.
[meta_client_options]
metasrv_addrs = ['127.0.0.1:3002']
metasrv_addrs = ["127.0.0.1:3002"]
timeout_millis = 3000
connect_timeout_millis = 5000
tcp_nodelay = false
tcp_nodelay = true

View File

@@ -1,6 +1,15 @@
bind_addr = '127.0.0.1:3002'
server_addr = '127.0.0.1:3002'
store_addr = '127.0.0.1:2379'
# The bind address of metasrv, "127.0.0.1:3002" by default.
bind_addr = "127.0.0.1:3002"
# The communication server address for frontend and datanode to connect to metasrv, "127.0.0.1:3002" by default for localhost.
server_addr = "127.0.0.1:3002"
# Etcd server address, "127.0.0.1:2379" by default.
store_addr = "127.0.0.1:2379"
# Datanode lease in seconds, 15 seconds by default.
datanode_lease_secs = 15
# selector: 'LeaseBased', 'LoadBased'
selector = 'LeaseBased'
# Datanode selector type.
# - "LeaseBased" (default value).
# - "LoadBased"
# For details, please see "https://docs.greptime.com/developer-guide/meta/selector".
selector = "LeaseBased"
# Store data in memory, false by default.
use_memory_store = false

View File

@@ -1,47 +1,116 @@
node_id = 0
mode = 'standalone'
# Node running mode, "standalone" or "distributed".
mode = "standalone"
# Whether to use in-memory catalog, `false` by default.
enable_memory_catalog = false
# HTTP server options.
[http_options]
addr = '127.0.0.1:4000'
# Server address, "127.0.0.1:4000" by default.
addr = "127.0.0.1:4000"
# HTTP request timeout, 30s by default.
timeout = "30s"
[wal]
dir = "/tmp/greptimedb/wal"
file_size = '1GB'
purge_interval = '10m'
purge_threshold = '50GB'
read_batch_size = 128
sync_write = false
[storage]
type = 'File'
data_dir = '/tmp/greptimedb/data/'
# gRPC server options.
[grpc_options]
addr = '127.0.0.1:4001'
# Server address, "127.0.0.1:4001" by default.
addr = "127.0.0.1:4001"
# The number of server worker threads, 8 by default.
runtime_size = 8
# MySQL server options.
[mysql_options]
addr = '127.0.0.1:4002'
# Server address, "127.0.0.1:4002" by default.
addr = "127.0.0.1:4002"
# The number of server worker threads, 2 by default.
runtime_size = 2
[influxdb_options]
enable = true
[opentsdb_options]
addr = '127.0.0.1:4242'
enable = true
runtime_size = 2
[prometheus_options]
enable = true
# MySQL server TLS options.
[mysql_options.tls]
# TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html
# - "disable" (default value)
# - "prefer"
# - "require"
# - "verify-ca"
# - "verify-full"
mode = "disable"
# Certificate file path.
cert_path = ""
# Private key file path.
key_path = ""
# PostgresSQL server options.
[postgres_options]
addr = '127.0.0.1:4003'
# Server address, "127.0.0.1:4003" by default.
addr = "127.0.0.1:4003"
# The number of server worker threads, 2 by default.
runtime_size = 2
check_pwd = false
[procedure.store]
type = 'File'
data_dir = '/tmp/greptimedb/procedure/'
# PostgresSQL server TLS options, see `[mysql_options.tls]` section.
[postgres_options.tls]
# TLS mode.
mode = "disable"
# certificate file path.
cert_path = ""
# private key file path.
key_path = ""
# OpenTSDB protocol options.
[opentsdb_options]
# OpenTSDB telnet API server address, "127.0.0.1:4242" by default.
addr = "127.0.0.1:4242"
# The number of server worker threads, 2 by default.
runtime_size = 2
# InfluxDB protocol options.
[influxdb_options]
# Whether to enable InfluxDB protocol in HTTP API, true by default.
enable = true
# Prometheus protocol options.
[prometheus_options]
# Whether to enable Prometheus remote write and read in HTTP API, true by default.
enable = true
# Prom protocol options.
[prom_options]
# Prometheus API server address, "127.0.0.1:4004" by default.
addr = "127.0.0.1:4004"
# WAL options.
[wal]
# WAL data directory.
dir = "/tmp/greptimedb/wal"
# WAL file size in bytes.
file_size = "1GB"
# WAL purge threshold in bytes.
purge_threshold = "50GB"
# WAL purge interval in seconds.
purge_interval = "10m"
# WAL read batch size.
read_batch_size = 128
# Whether to sync log file after every write.
sync_write = false
# Storage options.
[storage]
# Storage type.
type = "File"
# Data directory, "/tmp/greptimedb/data" by default.
data_dir = "/tmp/greptimedb/data/"
# Compaction options.
[compaction]
# Max task number that can concurrently run.
max_inflight_tasks = 4
# Max files in level 0 to trigger compaction.
max_files_in_level0 = 8
# Max task number for SST purge task after compaction.
max_purge_tasks = 32
# Procedure storage options.
# Uncomment to enable.
# [procedure.store]
# # Storage type.
# type = "File"
# # Procedure data path.
# data_dir = "/tmp/greptimedb/procedure/"

View File

@@ -9,7 +9,10 @@ RUN apt-get update && apt-get install -y \
protobuf-compiler \
curl \
build-essential \
pkg-config
pkg-config \
python3 \
python3-dev \
&& pip install pyarrow
# Install Rust.
SHELL ["/bin/bash", "-c"]

View File

@@ -1,2 +1,2 @@
[toolchain]
channel = "nightly-2023-02-14"
channel = "nightly-2023-02-26"

View File

@@ -10,7 +10,7 @@ common-base = { path = "../common/base" }
common-error = { path = "../common/error" }
common-time = { path = "../common/time" }
datatypes = { path = "../datatypes" }
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "1599ae2a0d1d8f42ee23ed26e4ad7a7b34134c60" }
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "ad0187295035e83f76272da553453e649b7570de" }
prost.workspace = true
snafu = { version = "0.7", features = ["backtraces"] }
tonic.workspace = true

View File

@@ -18,25 +18,28 @@ common-recordbatch = { path = "../common/recordbatch" }
common-runtime = { path = "../common/runtime" }
common-telemetry = { path = "../common/telemetry" }
common-time = { path = "../common/time" }
dashmap = "5.4"
datafusion.workspace = true
datatypes = { path = "../datatypes" }
futures = "0.3"
futures-util.workspace = true
lazy_static = "1.4"
meta-client = { path = "../meta-client" }
parking_lot = "0.12"
regex = "1.6"
serde = "1.0"
serde_json = "1.0"
session = { path = "../session" }
snafu = { version = "0.7", features = ["backtraces"] }
storage = { path = "../storage" }
table = { path = "../table" }
tokio.workspace = true
[dev-dependencies]
common-test-util = { path = "../common/test-util" }
chrono.workspace = true
log-store = { path = "../log-store" }
mito = { path = "../mito", features = ["test"] }
object-store = { path = "../object-store" }
storage = { path = "../storage" }
tempdir = "0.3"
tokio.workspace = true

View File

@@ -201,6 +201,9 @@ pub enum Error {
#[snafu(backtrace)]
source: common_catalog::error::Error,
},
#[snafu(display("Illegal access to catalog: {} and schema: {}", catalog, schema))]
QueryAccessDenied { catalog: String, schema: String },
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -246,6 +249,7 @@ impl ErrorExt for Error {
}
Error::Unimplemented { .. } => StatusCode::Unsupported,
Error::QueryAccessDenied { .. } => StatusCode::AccessDenied,
}
}

View File

@@ -34,6 +34,7 @@ pub mod local;
pub mod remote;
pub mod schema;
pub mod system;
pub mod table_source;
pub mod tables;
/// Represent a list of named catalogs
@@ -107,7 +108,12 @@ pub trait CatalogManager: CatalogList {
fn schema(&self, catalog: &str, schema: &str) -> Result<Option<SchemaProviderRef>>;
/// Returns the table by catalog, schema and table name.
fn table(&self, catalog: &str, schema: &str, table_name: &str) -> Result<Option<TableRef>>;
async fn table(
&self,
catalog: &str,
schema: &str,
table_name: &str,
) -> Result<Option<TableRef>>;
}
pub type CatalogManagerRef = Arc<dyn CatalogManager>;
@@ -186,7 +192,8 @@ pub(crate) async fn handle_system_table_request<'a, M: CatalogManager>(
let table_name = &req.create_table_request.table_name;
let table_id = req.create_table_request.id;
let table = if let Some(table) = manager.table(catalog_name, schema_name, table_name)? {
let table = manager.table(catalog_name, schema_name, table_name).await?;
let table = if let Some(table) = table {
table
} else {
let table = engine
@@ -219,7 +226,7 @@ pub(crate) async fn handle_system_table_request<'a, M: CatalogManager>(
}
/// The number of regions in the datanode node.
pub fn region_number(catalog_manager: &CatalogManagerRef) -> Result<u64> {
pub async fn region_number(catalog_manager: &CatalogManagerRef) -> Result<u64> {
let mut region_number: u64 = 0;
for catalog_name in catalog_manager.catalog_names()? {
@@ -239,11 +246,13 @@ pub fn region_number(catalog_manager: &CatalogManagerRef) -> Result<u64> {
})?;
for table_name in schema.table_names()? {
let table = schema
.table(&table_name)?
.context(error::TableNotFoundSnafu {
table_info: &table_name,
})?;
let table =
schema
.table(&table_name)
.await?
.context(error::TableNotFoundSnafu {
table_info: &table_name,
})?;
let region_numbers = &table.table_info().meta.region_numbers;
region_number += region_numbers.len() as u64;

View File

@@ -345,7 +345,7 @@ impl CatalogManager for LocalCatalogManager {
{
let _lock = self.register_lock.lock().await;
if let Some(existing) = schema.table(&request.table_name)? {
if let Some(existing) = schema.table(&request.table_name).await? {
if existing.table_info().ident.table_id != request.table_id {
error!(
"Unexpected table register request: {:?}, existing: {:?}",
@@ -434,9 +434,10 @@ impl CatalogManager for LocalCatalogManager {
} = &request;
let table_id = self
.catalogs
.table(catalog, schema, table_name)?
.table(catalog, schema, table_name)
.await?
.with_context(|| error::TableNotExistSnafu {
table: format!("{catalog}.{schema}.{table_name}"),
table: format_full_table_name(catalog, schema, table_name),
})?
.table_info()
.ident
@@ -505,7 +506,7 @@ impl CatalogManager for LocalCatalogManager {
.schema(schema)
}
fn table(
async fn table(
&self,
catalog_name: &str,
schema_name: &str,
@@ -521,7 +522,7 @@ impl CatalogManager for LocalCatalogManager {
catalog: catalog_name,
schema: schema_name,
})?;
schema.table(table_name)
schema.table(table_name).await
}
}

View File

@@ -18,6 +18,7 @@ use std::collections::HashMap;
use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::{Arc, RwLock};
use async_trait::async_trait;
use common_catalog::consts::MIN_USER_TABLE_ID;
use common_telemetry::error;
use snafu::{ensure, OptionExt};
@@ -155,16 +156,20 @@ impl CatalogManager for MemoryCatalogManager {
}
}
fn table(&self, catalog: &str, schema: &str, table_name: &str) -> Result<Option<TableRef>> {
let c = self.catalogs.read().unwrap();
let catalog = if let Some(c) = c.get(catalog) {
async fn table(
&self,
catalog: &str,
schema: &str,
table_name: &str,
) -> Result<Option<TableRef>> {
let catalog = {
let c = self.catalogs.read().unwrap();
let Some(c) = c.get(catalog) else { return Ok(None) };
c.clone()
} else {
return Ok(None);
};
match catalog.schema(schema)? {
None => Ok(None),
Some(s) => s.table(table_name),
Some(s) => s.table(table_name).await,
}
}
}
@@ -283,6 +288,7 @@ impl Default for MemorySchemaProvider {
}
}
#[async_trait]
impl SchemaProvider for MemorySchemaProvider {
fn as_any(&self) -> &dyn Any {
self
@@ -293,7 +299,7 @@ impl SchemaProvider for MemorySchemaProvider {
Ok(tables.keys().cloned().collect())
}
fn table(&self, name: &str) -> Result<Option<TableRef>> {
async fn table(&self, name: &str) -> Result<Option<TableRef>> {
let tables = self.tables.read().unwrap();
Ok(tables.get(name).cloned())
}
@@ -355,8 +361,8 @@ mod tests {
use super::*;
#[test]
fn test_new_memory_catalog_list() {
#[tokio::test]
async fn test_new_memory_catalog_list() {
let catalog_list = new_memory_catalog_list().unwrap();
let default_catalog = catalog_list.catalog(DEFAULT_CATALOG_NAME).unwrap().unwrap();
@@ -369,9 +375,9 @@ mod tests {
.register_table("numbers".to_string(), Arc::new(NumbersTable::default()))
.unwrap();
let table = default_schema.table("numbers").unwrap();
let table = default_schema.table("numbers").await.unwrap();
assert!(table.is_some());
assert!(default_schema.table("not_exists").unwrap().is_none());
assert!(default_schema.table("not_exists").await.unwrap().is_none());
}
#[tokio::test]
@@ -419,7 +425,7 @@ mod tests {
// test new table name exists
assert!(provider.table_exist(new_table_name).unwrap());
let registered_table = provider.table(new_table_name).unwrap().unwrap();
let registered_table = provider.table(new_table_name).await.unwrap().unwrap();
assert_eq!(
registered_table.table_info().ident.table_id,
test_table.table_info().ident.table_id
@@ -468,6 +474,7 @@ mod tests {
let registered_table = catalog
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, new_table_name)
.await
.unwrap()
.unwrap();
assert_eq!(registered_table.table_info().ident.table_id, table_id);

View File

@@ -13,16 +13,19 @@
// limitations under the License.
use std::any::Any;
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
use std::pin::Pin;
use std::sync::Arc;
use arc_swap::ArcSwap;
use async_stream::stream;
use async_trait::async_trait;
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MIN_USER_TABLE_ID};
use common_telemetry::{debug, info};
use common_telemetry::{debug, error, info};
use dashmap::DashMap;
use futures::Stream;
use futures_util::StreamExt;
use parking_lot::RwLock;
use snafu::{OptionExt, ResultExt};
use table::engine::{EngineContext, TableEngineRef};
use table::metadata::TableId;
@@ -38,6 +41,7 @@ use crate::error::{
use crate::helper::{
build_catalog_prefix, build_schema_prefix, build_table_global_prefix, CatalogKey, CatalogValue,
SchemaKey, SchemaValue, TableGlobalKey, TableGlobalValue, TableRegionalKey, TableRegionalValue,
CATALOG_KEY_PREFIX,
};
use crate::remote::{Kv, KvBackendRef};
use crate::{
@@ -50,10 +54,9 @@ use crate::{
pub struct RemoteCatalogManager {
node_id: u64,
backend: KvBackendRef,
catalogs: Arc<ArcSwap<HashMap<String, CatalogProviderRef>>>,
catalogs: Arc<RwLock<DashMap<String, CatalogProviderRef>>>,
engine: TableEngineRef,
system_table_requests: Mutex<Vec<RegisterSystemTableRequest>>,
mutex: Arc<Mutex<()>>,
}
impl RemoteCatalogManager {
@@ -64,7 +67,6 @@ impl RemoteCatalogManager {
backend,
catalogs: Default::default(),
system_table_requests: Default::default(),
mutex: Default::default(),
}
}
@@ -108,9 +110,13 @@ impl RemoteCatalogManager {
debug!("Ignoring non-catalog key: {}", String::from_utf8_lossy(&k));
continue;
}
let key = CatalogKey::parse(&String::from_utf8_lossy(&k))
.context(InvalidCatalogValueSnafu)?;
yield Ok(key)
let catalog_key = String::from_utf8_lossy(&k);
if let Ok(key) = CatalogKey::parse(&catalog_key) {
yield Ok(key)
} else {
error!("Invalid catalog key: {:?}", catalog_key);
}
}
}))
}
@@ -381,7 +387,14 @@ impl CatalogManager for RemoteCatalogManager {
"Initialized catalogs: {:?}",
catalogs.keys().cloned().collect::<Vec<_>>()
);
self.catalogs.store(Arc::new(catalogs));
{
let self_catalogs = self.catalogs.read();
catalogs.into_iter().for_each(|(k, v)| {
self_catalogs.insert(k, v);
});
}
info!("Max table id allocated: {}", max_table_id);
let mut system_table_requests = self.system_table_requests.lock().await;
@@ -468,7 +481,7 @@ impl CatalogManager for RemoteCatalogManager {
.schema(schema)
}
fn table(
async fn table(
&self,
catalog_name: &str,
schema_name: &str,
@@ -483,7 +496,7 @@ impl CatalogManager for RemoteCatalogManager {
catalog: catalog_name,
schema: schema_name,
})?;
schema.table(table_name)
schema.table(table_name).await
}
}
@@ -499,12 +512,10 @@ impl CatalogList for RemoteCatalogManager {
) -> Result<Option<CatalogProviderRef>> {
let key = self.build_catalog_key(&name).to_string();
let backend = self.backend.clone();
let mutex = self.mutex.clone();
let catalogs = self.catalogs.clone();
std::thread::spawn(|| {
common_runtime::block_on_write(async move {
let _guard = mutex.lock().await;
backend
.set(
key.as_bytes(),
@@ -513,11 +524,10 @@ impl CatalogList for RemoteCatalogManager {
.context(InvalidCatalogValueSnafu)?,
)
.await?;
let prev_catalogs = catalogs.load();
let mut new_catalogs = HashMap::with_capacity(prev_catalogs.len() + 1);
new_catalogs.clone_from(&prev_catalogs);
let prev = new_catalogs.insert(name, catalog);
catalogs.store(Arc::new(new_catalogs));
let catalogs = catalogs.read();
let prev = catalogs.insert(name, catalog.clone());
Ok(prev)
})
})
@@ -527,12 +537,65 @@ impl CatalogList for RemoteCatalogManager {
/// List all catalogs from metasrv
fn catalog_names(&self) -> Result<Vec<String>> {
Ok(self.catalogs.load().keys().cloned().collect::<Vec<_>>())
let catalogs = self.catalogs.read();
Ok(catalogs.iter().map(|k| k.key().to_string()).collect())
}
/// Read catalog info of given name from metasrv.
fn catalog(&self, name: &str) -> Result<Option<CatalogProviderRef>> {
Ok(self.catalogs.load().get(name).cloned())
{
let catalogs = self.catalogs.read();
let catalog = catalogs.get(name);
if let Some(catalog) = catalog {
return Ok(Some(catalog.clone()));
}
}
let catalogs = self.catalogs.write();
let catalog = catalogs.get(name);
if let Some(catalog) = catalog {
return Ok(Some(catalog.clone()));
}
// It's for lack of incremental catalog syncing between datanode and meta. Here we fetch catalog
// from meta on demand. This can be removed when incremental catalog syncing is done in datanode.
let backend = self.backend.clone();
let catalogs_from_meta: HashSet<String> = std::thread::spawn(|| {
common_runtime::block_on_read(async move {
let mut stream = backend.range(CATALOG_KEY_PREFIX.as_bytes());
let mut catalogs = HashSet::new();
while let Some(catalog) = stream.next().await {
if let Ok(catalog) = catalog {
let catalog_key = String::from_utf8_lossy(&catalog.0);
if let Ok(key) = CatalogKey::parse(&catalog_key) {
catalogs.insert(key.catalog_name);
}
}
}
catalogs
})
})
.join()
.unwrap();
catalogs.retain(|catalog_name, _| catalogs_from_meta.get(catalog_name).is_some());
for catalog in catalogs_from_meta {
catalogs
.entry(catalog.clone())
.or_insert(self.new_catalog_provider(&catalog));
}
let catalog = catalogs.get(name);
Ok(catalog.as_deref().cloned())
}
}
@@ -692,6 +755,7 @@ impl RemoteSchemaProvider {
}
}
#[async_trait]
impl SchemaProvider for RemoteSchemaProvider {
fn as_any(&self) -> &dyn Any {
self
@@ -701,7 +765,7 @@ impl SchemaProvider for RemoteSchemaProvider {
Ok(self.tables.load().keys().cloned().collect::<Vec<_>>())
}
fn table(&self, name: &str) -> Result<Option<TableRef>> {
async fn table(&self, name: &str) -> Result<Option<TableRef>> {
Ok(self.tables.load().get(name).cloned())
}

View File

@@ -15,11 +15,13 @@
use std::any::Any;
use std::sync::Arc;
use async_trait::async_trait;
use table::TableRef;
use crate::error::Result;
/// Represents a schema, comprising a number of named tables.
#[async_trait]
pub trait SchemaProvider: Sync + Send {
/// Returns the schema provider as [`Any`](std::any::Any)
/// so that it can be downcast to a specific implementation.
@@ -29,7 +31,7 @@ pub trait SchemaProvider: Sync + Send {
fn table_names(&self) -> Result<Vec<String>>;
/// Retrieves a specific table from the schema by name, provided it exists.
fn table(&self, name: &str) -> Result<Option<TableRef>>;
async fn table(&self, name: &str) -> Result<Option<TableRef>>;
/// If supported by the implementation, adds a new table to this schema.
/// If a table of the same name existed before, it returns "Table already exists" error.

View File

@@ -219,7 +219,7 @@ fn build_primary_key_columns(entry_type: EntryType, key: &[u8]) -> HashMap<Strin
let mut m = HashMap::with_capacity(3);
m.insert(
"entry_type".to_string(),
Arc::new(UInt8Vector::from_slice(&[entry_type as u8])) as _,
Arc::new(UInt8Vector::from_slice([entry_type as u8])) as _,
);
m.insert(
"key".to_string(),
@@ -228,7 +228,7 @@ fn build_primary_key_columns(entry_type: EntryType, key: &[u8]) -> HashMap<Strin
// Timestamp in key part is intentionally left to 0
m.insert(
"timestamp".to_string(),
Arc::new(TimestampMillisecondVector::from_slice(&[0])) as _,
Arc::new(TimestampMillisecondVector::from_slice([0])) as _,
);
m
}
@@ -258,12 +258,12 @@ pub fn build_insert_request(entry_type: EntryType, key: &[u8], value: &[u8]) ->
let now = util::current_time_millis();
columns_values.insert(
"gmt_created".to_string(),
Arc::new(TimestampMillisecondVector::from_slice(&[now])) as _,
Arc::new(TimestampMillisecondVector::from_slice([now])) as _,
);
columns_values.insert(
"gmt_modified".to_string(),
Arc::new(TimestampMillisecondVector::from_slice(&[now])) as _,
Arc::new(TimestampMillisecondVector::from_slice([now])) as _,
);
InsertRequest {
@@ -395,6 +395,7 @@ pub struct TableEntryValue {
#[cfg(test)]
mod tests {
use common_recordbatch::RecordBatches;
use common_test_util::temp_dir::{create_temp_dir, TempDir};
use datatypes::value::Value;
use log_store::NoopLogStore;
use mito::config::EngineConfig;
@@ -405,7 +406,6 @@ mod tests {
use storage::EngineImpl;
use table::metadata::TableType;
use table::metadata::TableType::Base;
use tempdir::TempDir;
use super::*;
@@ -480,7 +480,7 @@ mod tests {
}
pub async fn prepare_table_engine() -> (TempDir, TableEngineRef) {
let dir = TempDir::new("system-table-test").unwrap();
let dir = create_temp_dir("system-table-test");
let store_dir = dir.path().to_string_lossy();
let accessor = object_store::services::Fs::default()
.root(&store_dir)

View File

@@ -0,0 +1,178 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::sync::Arc;
use common_catalog::format_full_table_name;
use datafusion::common::{OwnedTableReference, ResolvedTableReference, TableReference};
use datafusion::datasource::provider_as_source;
use datafusion::logical_expr::TableSource;
use session::context::QueryContext;
use snafu::{ensure, OptionExt};
use table::table::adapter::DfTableProviderAdapter;
use crate::error::{
CatalogNotFoundSnafu, QueryAccessDeniedSnafu, Result, SchemaNotFoundSnafu, TableNotExistSnafu,
};
use crate::CatalogListRef;
pub struct DfTableSourceProvider {
catalog_list: CatalogListRef,
resolved_tables: HashMap<String, Arc<dyn TableSource>>,
disallow_cross_schema_query: bool,
default_catalog: String,
default_schema: String,
}
impl DfTableSourceProvider {
pub fn new(
catalog_list: CatalogListRef,
disallow_cross_schema_query: bool,
query_ctx: &QueryContext,
) -> Self {
Self {
catalog_list,
disallow_cross_schema_query,
resolved_tables: HashMap::new(),
default_catalog: query_ctx.current_catalog(),
default_schema: query_ctx.current_schema(),
}
}
pub fn resolve_table_ref<'a>(
&'a self,
table_ref: TableReference<'a>,
) -> Result<ResolvedTableReference<'a>> {
if self.disallow_cross_schema_query {
match &table_ref {
TableReference::Bare { .. } => (),
TableReference::Partial { schema, .. } => {
ensure!(
schema.as_ref() == self.default_schema,
QueryAccessDeniedSnafu {
catalog: &self.default_catalog,
schema: schema.as_ref(),
}
);
}
TableReference::Full {
catalog, schema, ..
} => {
ensure!(
catalog.as_ref() == self.default_catalog
&& schema.as_ref() == self.default_schema,
QueryAccessDeniedSnafu {
catalog: catalog.as_ref(),
schema: schema.as_ref()
}
);
}
};
}
Ok(table_ref.resolve(&self.default_catalog, &self.default_schema))
}
pub async fn resolve_table(
&mut self,
table_ref: OwnedTableReference,
) -> Result<Arc<dyn TableSource>> {
let table_ref = table_ref.as_table_reference();
let table_ref = self.resolve_table_ref(table_ref)?;
let resolved_name = table_ref.to_string();
if let Some(table) = self.resolved_tables.get(&resolved_name) {
return Ok(table.clone());
}
let catalog_name = table_ref.catalog.as_ref();
let schema_name = table_ref.schema.as_ref();
let table_name = table_ref.table.as_ref();
let catalog = self
.catalog_list
.catalog(catalog_name)?
.context(CatalogNotFoundSnafu { catalog_name })?;
let schema = catalog.schema(schema_name)?.context(SchemaNotFoundSnafu {
catalog: catalog_name,
schema: schema_name,
})?;
let table = schema
.table(table_name)
.await?
.with_context(|| TableNotExistSnafu {
table: format_full_table_name(catalog_name, schema_name, table_name),
})?;
let table = DfTableProviderAdapter::new(table);
let table = provider_as_source(Arc::new(table));
self.resolved_tables.insert(resolved_name, table.clone());
Ok(table)
}
}
#[cfg(test)]
mod tests {
use std::borrow::Cow;
use session::context::QueryContext;
use super::*;
use crate::local::MemoryCatalogManager;
#[test]
fn test_validate_table_ref() {
let query_ctx = &QueryContext::with("greptime", "public");
let table_provider =
DfTableSourceProvider::new(Arc::new(MemoryCatalogManager::default()), true, query_ctx);
let table_ref = TableReference::Bare {
table: Cow::Borrowed("table_name"),
};
let result = table_provider.resolve_table_ref(table_ref);
assert!(result.is_ok());
let table_ref = TableReference::Partial {
schema: Cow::Borrowed("public"),
table: Cow::Borrowed("table_name"),
};
let result = table_provider.resolve_table_ref(table_ref);
assert!(result.is_ok());
let table_ref = TableReference::Partial {
schema: Cow::Borrowed("wrong_schema"),
table: Cow::Borrowed("table_name"),
};
let result = table_provider.resolve_table_ref(table_ref);
assert!(result.is_err());
let table_ref = TableReference::Full {
catalog: Cow::Borrowed("greptime"),
schema: Cow::Borrowed("public"),
table: Cow::Borrowed("table_name"),
};
let result = table_provider.resolve_table_ref(table_ref);
assert!(result.is_ok());
let table_ref = TableReference::Full {
catalog: Cow::Borrowed("wrong_catalog"),
schema: Cow::Borrowed("public"),
table: Cow::Borrowed("table_name"),
};
let result = table_provider.resolve_table_ref(table_ref);
assert!(result.is_err());
}
}

View File

@@ -20,6 +20,7 @@ use std::sync::Arc;
use std::task::{Context, Poll};
use async_stream::stream;
use async_trait::async_trait;
use common_catalog::consts::{INFORMATION_SCHEMA_NAME, SYSTEM_CATALOG_TABLE_NAME};
use common_error::ext::BoxedError;
use common_query::logical_plan::Expr;
@@ -200,6 +201,7 @@ pub struct InformationSchema {
pub system: Arc<SystemCatalogTable>,
}
#[async_trait]
impl SchemaProvider for InformationSchema {
fn as_any(&self) -> &dyn Any {
self
@@ -212,7 +214,7 @@ impl SchemaProvider for InformationSchema {
])
}
fn table(&self, name: &str) -> Result<Option<TableRef>, Error> {
async fn table(&self, name: &str) -> Result<Option<TableRef>, Error> {
if name.eq_ignore_ascii_case("tables") {
Ok(Some(self.tables.clone()))
} else if name.eq_ignore_ascii_case(SYSTEM_CATALOG_TABLE_NAME) {

View File

@@ -71,6 +71,7 @@ mod tests {
let registered_table = catalog_manager
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, new_table_name)
.await
.unwrap()
.unwrap();
assert_eq!(registered_table.table_info().ident.table_id, table_id);
@@ -158,6 +159,7 @@ mod tests {
let table = guard.as_ref().unwrap();
let table_registered = catalog_manager
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "test_table")
.await
.unwrap()
.unwrap();
assert_eq!(

View File

@@ -55,10 +55,18 @@ impl Database {
}
}
pub fn catalog(&self) -> &String {
&self.catalog
}
pub fn set_catalog(&mut self, catalog: impl Into<String>) {
self.catalog = catalog.into();
}
pub fn schema(&self) -> &String {
&self.schema
}
pub fn set_schema(&mut self, schema: impl Into<String>) {
self.schema = schema.into();
}
@@ -118,7 +126,7 @@ impl Database {
request: Some(request),
};
let request = Ticket {
ticket: request.encode_to_vec(),
ticket: request.encode_to_vec().into(),
};
let mut client = self.client.make_client()?;

View File

@@ -9,8 +9,12 @@ default-run = "greptime"
name = "greptime"
path = "src/bin/greptime.rs"
[features]
mem-prof = ["tikv-jemallocator", "tikv-jemalloc-ctl"]
[dependencies]
anymap = "1.0.0-beta.2"
catalog = { path = "../catalog" }
clap = { version = "3.1", features = ["derive"] }
client = { path = "../client" }
common-base = { path = "../common/base" }
@@ -27,17 +31,24 @@ futures.workspace = true
meta-client = { path = "../meta-client" }
meta-srv = { path = "../meta-srv" }
nu-ansi-term = "0.46"
partition = { path = "../partition" }
query = { path = "../query" }
rustyline = "10.1"
serde.workspace = true
servers = { path = "../servers" }
session = { path = "../session" }
snafu.workspace = true
substrait = { path = "../common/substrait" }
tikv-jemalloc-ctl = { version = "0.5", optional = true }
tikv-jemallocator = { version = "0.5", optional = true }
tokio.workspace = true
toml = "0.5"
[dev-dependencies]
common-test-util = { path = "../common/test-util" }
rexpect = "0.5"
serde.workspace = true
tempdir = "0.3"
[build-dependencies]
build-data = "0.1.3"

View File

@@ -87,6 +87,10 @@ fn print_version() -> &'static str {
)
}
#[cfg(feature = "mem-prof")]
#[global_allocator]
static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
#[tokio::main]
async fn main() -> Result<()> {
let cmd = Command::parse();

View File

@@ -50,13 +50,15 @@ impl SubCommand {
pub(crate) struct AttachCommand {
#[clap(long)]
pub(crate) grpc_addr: String,
#[clap(long)]
pub(crate) meta_addr: Option<String>,
#[clap(long, action)]
pub(crate) disable_helper: bool,
}
impl AttachCommand {
async fn run(self) -> Result<()> {
let mut repl = Repl::try_new(&self)?;
let mut repl = Repl::try_new(&self).await?;
repl.run().await
}
}

View File

@@ -13,24 +13,39 @@
// limitations under the License.
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Instant;
use catalog::remote::MetaKvBackend;
use client::{Client, Database, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_error::prelude::ErrorExt;
use common_query::Output;
use common_recordbatch::RecordBatches;
use common_telemetry::logging;
use either::Either;
use frontend::catalog::FrontendCatalogManager;
use frontend::datanode::DatanodeClients;
use meta_client::client::MetaClientBuilder;
use partition::manager::PartitionRuleManager;
use partition::route::TableRoutes;
use query::datafusion::DatafusionQueryEngine;
use query::logical_optimizer::LogicalOptimizer;
use query::parser::QueryLanguageParser;
use query::plan::LogicalPlan;
use query::QueryEngine;
use rustyline::error::ReadlineError;
use rustyline::Editor;
use session::context::QueryContext;
use snafu::{ErrorCompat, ResultExt};
use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan};
use crate::cli::cmd::ReplCommand;
use crate::cli::helper::RustylineHelper;
use crate::cli::AttachCommand;
use crate::error::{
CollectRecordBatchesSnafu, PrettyPrintRecordBatchesSnafu, ReadlineSnafu, ReplCreationSnafu,
RequestDatabaseSnafu, Result,
CollectRecordBatchesSnafu, ParseSqlSnafu, PlanStatementSnafu, PrettyPrintRecordBatchesSnafu,
ReadlineSnafu, ReplCreationSnafu, RequestDatabaseSnafu, Result, StartMetaClientSnafu,
SubstraitEncodeLogicalPlanSnafu,
};
/// Captures the state of the repl, gathers commands and executes them one by one
@@ -43,6 +58,8 @@ pub(crate) struct Repl {
/// Client for interacting with GreptimeDB
database: Database,
query_engine: Option<DatafusionQueryEngine>,
}
#[allow(clippy::print_stdout)]
@@ -51,7 +68,7 @@ impl Repl {
println!("{}", ReplCommand::help())
}
pub(crate) fn try_new(cmd: &AttachCommand) -> Result<Self> {
pub(crate) async fn try_new(cmd: &AttachCommand) -> Result<Self> {
let mut rl = Editor::new().context(ReplCreationSnafu)?;
if !cmd.disable_helper {
@@ -69,10 +86,17 @@ impl Repl {
let client = Client::with_urls([&cmd.grpc_addr]);
let database = Database::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, client);
let query_engine = if let Some(meta_addr) = &cmd.meta_addr {
create_query_engine(meta_addr).await.map(Some)?
} else {
None
};
Ok(Self {
rl,
prompt: "> ".to_string(),
database,
query_engine,
})
}
@@ -134,11 +158,29 @@ impl Repl {
async fn do_execute_sql(&self, sql: String) -> Result<()> {
let start = Instant::now();
let output = self
.database
.sql(&sql)
.await
.context(RequestDatabaseSnafu { sql: &sql })?;
let output = if let Some(query_engine) = &self.query_engine {
let stmt = QueryLanguageParser::parse_sql(&sql)
.with_context(|_| ParseSqlSnafu { sql: sql.clone() })?;
let query_ctx = Arc::new(QueryContext::with(
self.database.catalog(),
self.database.schema(),
));
let LogicalPlan::DfPlan(plan) = query_engine
.statement_to_plan(stmt, query_ctx)
.await
.and_then(|x| query_engine.optimize(&x))
.context(PlanStatementSnafu)?;
let plan = DFLogicalSubstraitConvertor {}
.encode(plan)
.context(SubstraitEncodeLogicalPlanSnafu)?;
self.database.logical_plan(plan.to_vec()).await
} else {
self.database.sql(&sql).await
}
.context(RequestDatabaseSnafu { sql: &sql })?;
let either = match output {
Output::Stream(s) => {
@@ -197,3 +239,29 @@ fn history_file() -> PathBuf {
buf.push(".greptimedb_cli_history");
buf
}
async fn create_query_engine(meta_addr: &str) -> Result<DatafusionQueryEngine> {
let mut meta_client = MetaClientBuilder::default().enable_store().build();
meta_client
.start([meta_addr])
.await
.context(StartMetaClientSnafu)?;
let meta_client = Arc::new(meta_client);
let backend = Arc::new(MetaKvBackend {
client: meta_client.clone(),
});
let table_routes = Arc::new(TableRoutes::new(meta_client));
let partition_manager = Arc::new(PartitionRuleManager::new(table_routes));
let datanode_clients = Arc::new(DatanodeClients::default());
let catalog_list = Arc::new(FrontendCatalogManager::new(
backend,
partition_manager,
datanode_clients,
));
Ok(DatafusionQueryEngine::new(catalog_list, Default::default()))
}

View File

@@ -150,8 +150,10 @@ impl TryFrom<StartCommand> for DatanodeOptions {
#[cfg(test)]
mod tests {
use std::assert_matches::assert_matches;
use std::io::Write;
use std::time::Duration;
use common_test_util::temp_dir::create_named_temp_file;
use datanode::datanode::{CompactionConfig, ObjectStoreConfig};
use servers::Mode;
@@ -159,18 +161,57 @@ mod tests {
#[test]
fn test_read_from_config_file() {
let mut file = create_named_temp_file();
let toml_str = r#"
mode = "distributed"
enable_memory_catalog = false
node_id = 42
rpc_addr = "127.0.0.1:3001"
rpc_hostname = "127.0.0.1"
rpc_runtime_size = 8
mysql_addr = "127.0.0.1:4406"
mysql_runtime_size = 2
[meta_client_options]
metasrv_addrs = ["127.0.0.1:3002"]
timeout_millis = 3000
connect_timeout_millis = 5000
tcp_nodelay = true
[wal]
dir = "/tmp/greptimedb/wal"
file_size = "1GB"
purge_threshold = "50GB"
purge_interval = "10m"
read_batch_size = 128
sync_write = false
[storage]
type = "File"
data_dir = "/tmp/greptimedb/data/"
[compaction]
max_inflight_tasks = 4
max_files_in_level0 = 8
max_purge_tasks = 32
"#;
write!(file, "{}", toml_str).unwrap();
let cmd = StartCommand {
config_file: Some(format!(
"{}/../../config/datanode.example.toml",
std::env::current_dir().unwrap().as_path().to_str().unwrap()
)),
config_file: Some(file.path().to_str().unwrap().to_string()),
..Default::default()
};
let options: DatanodeOptions = cmd.try_into().unwrap();
assert_eq!("127.0.0.1:3001".to_string(), options.rpc_addr);
assert_eq!("/tmp/greptimedb/wal".to_string(), options.wal.dir);
assert_eq!("127.0.0.1:4406".to_string(), options.mysql_addr);
assert_eq!(4, options.mysql_runtime_size);
assert_eq!(2, options.mysql_runtime_size);
assert_eq!(Some(42), options.node_id);
assert_eq!(Duration::from_secs(600), options.wal.purge_interval);
assert_eq!(1024 * 1024 * 1024, options.wal.file_size.0);
assert_eq!(1024 * 1024 * 1024 * 50, options.wal.purge_threshold.0);
assert!(!options.wal.sync_write);
let MetaClientOptions {
metasrv_addrs: metasrv_addr,
timeout_millis,
@@ -181,7 +222,7 @@ mod tests {
assert_eq!(vec!["127.0.0.1:3002".to_string()], metasrv_addr);
assert_eq!(5000, connect_timeout_millis);
assert_eq!(3000, timeout_millis);
assert!(!tcp_nodelay);
assert!(tcp_nodelay);
match options.storage {
ObjectStoreConfig::File(FileConfig { data_dir }) => {
@@ -194,7 +235,7 @@ mod tests {
assert_eq!(
CompactionConfig {
max_inflight_tasks: 4,
max_files_in_level0: 16,
max_files_in_level0: 8,
max_purge_tasks: 32,
},
options.compaction
@@ -232,32 +273,4 @@ mod tests {
})
.unwrap();
}
#[test]
fn test_merge_config() {
let dn_opts = DatanodeOptions::try_from(StartCommand {
config_file: Some(format!(
"{}/../../config/datanode.example.toml",
std::env::current_dir().unwrap().as_path().to_str().unwrap()
)),
..Default::default()
})
.unwrap();
assert_eq!("/tmp/greptimedb/wal", dn_opts.wal.dir);
assert_eq!(Duration::from_secs(600), dn_opts.wal.purge_interval);
assert_eq!(1024 * 1024 * 1024, dn_opts.wal.file_size.0);
assert_eq!(1024 * 1024 * 1024 * 50, dn_opts.wal.purge_threshold.0);
assert!(!dn_opts.wal.sync_write);
assert_eq!(Some(42), dn_opts.node_id);
let MetaClientOptions {
metasrv_addrs: metasrv_addr,
timeout_millis,
connect_timeout_millis,
tcp_nodelay,
} = dn_opts.meta_client_options.unwrap();
assert_eq!(vec!["127.0.0.1:3002".to_string()], metasrv_addr);
assert_eq!(3000, timeout_millis);
assert_eq!(5000, connect_timeout_millis);
assert!(!tcp_nodelay);
}
}

View File

@@ -103,6 +103,31 @@ pub enum Error {
#[snafu(backtrace)]
source: common_recordbatch::error::Error,
},
#[snafu(display("Failed to start Meta client, source: {}", source))]
StartMetaClient {
#[snafu(backtrace)]
source: meta_client::error::Error,
},
#[snafu(display("Failed to parse SQL: {}, source: {}", sql, source))]
ParseSql {
sql: String,
#[snafu(backtrace)]
source: query::error::Error,
},
#[snafu(display("Failed to plan statement, source: {}", source))]
PlanStatement {
#[snafu(backtrace)]
source: query::error::Error,
},
#[snafu(display("Failed to encode logical plan in substrait, source: {}", source))]
SubstraitEncodeLogicalPlan {
#[snafu(backtrace)]
source: substrait::error::Error,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -126,6 +151,11 @@ impl ErrorExt for Error {
Error::CollectRecordBatches { source } | Error::PrettyPrintRecordBatches { source } => {
source.status_code()
}
Error::StartMetaClient { source } => source.status_code(),
Error::ParseSql { source, .. } | Error::PlanStatement { source } => {
source.status_code()
}
Error::SubstraitEncodeLogicalPlan { source } => source.status_code(),
}
}

View File

@@ -23,6 +23,7 @@ use frontend::instance::Instance;
use frontend::mysql::MysqlOptions;
use frontend::opentsdb::OpentsdbOptions;
use frontend::postgres::PostgresOptions;
use frontend::prom::PromOptions;
use meta_client::MetaClientOptions;
use servers::auth::UserProviderRef;
use servers::http::HttpOptions;
@@ -67,6 +68,8 @@ pub struct StartCommand {
#[clap(long)]
mysql_addr: Option<String>,
#[clap(long)]
prom_addr: Option<String>,
#[clap(long)]
postgres_addr: Option<String>,
#[clap(long)]
opentsdb_addr: Option<String>,
@@ -141,6 +144,9 @@ impl TryFrom<StartCommand> for FrontendOptions {
..Default::default()
});
}
if let Some(addr) = cmd.prom_addr {
opts.prom_options = Some(PromOptions { addr });
}
if let Some(addr) = cmd.postgres_addr {
opts.postgres_options = Some(PostgresOptions {
addr,
@@ -173,8 +179,10 @@ impl TryFrom<StartCommand> for FrontendOptions {
#[cfg(test)]
mod tests {
use std::io::Write;
use std::time::Duration;
use common_test_util::temp_dir::create_named_temp_file;
use servers::auth::{Identity, Password, UserProviderRef};
use super::*;
@@ -184,6 +192,7 @@ mod tests {
let command = StartCommand {
http_addr: Some("127.0.0.1:1234".to_string()),
grpc_addr: None,
prom_addr: Some("127.0.0.1:4444".to_string()),
mysql_addr: Some("127.0.0.1:5678".to_string()),
postgres_addr: Some("127.0.0.1:5432".to_string()),
opentsdb_addr: Some("127.0.0.1:4321".to_string()),
@@ -207,6 +216,7 @@ mod tests {
opts.opentsdb_options.as_ref().unwrap().addr,
"127.0.0.1:4321"
);
assert_eq!(opts.prom_options.as_ref().unwrap().addr, "127.0.0.1:4444");
let default_opts = FrontendOptions::default();
assert_eq!(
@@ -231,17 +241,25 @@ mod tests {
#[test]
fn test_read_from_config_file() {
let mut file = create_named_temp_file();
let toml_str = r#"
mode = "distributed"
[http_options]
addr = "127.0.0.1:4000"
timeout = "30s"
"#;
write!(file, "{}", toml_str).unwrap();
let command = StartCommand {
http_addr: None,
grpc_addr: None,
mysql_addr: None,
prom_addr: None,
postgres_addr: None,
opentsdb_addr: None,
influxdb_enable: None,
config_file: Some(format!(
"{}/../../config/frontend.example.toml",
std::env::current_dir().unwrap().as_path().to_str().unwrap()
)),
config_file: Some(file.path().to_str().unwrap().to_string()),
metasrv_addr: None,
tls_mode: None,
tls_cert_path: None,
@@ -267,6 +285,7 @@ mod tests {
http_addr: None,
grpc_addr: None,
mysql_addr: None,
prom_addr: None,
postgres_addr: None,
opentsdb_addr: None,
influxdb_enable: None,

View File

@@ -113,6 +113,9 @@ impl TryFrom<StartCommand> for MetaSrvOptions {
#[cfg(test)]
mod tests {
use std::io::Write;
use common_test_util::temp_dir::create_named_temp_file;
use meta_srv::selector::SelectorType;
use super::*;
@@ -136,15 +139,23 @@ mod tests {
#[test]
fn test_read_from_config_file() {
let mut file = create_named_temp_file();
let toml_str = r#"
bind_addr = "127.0.0.1:3002"
server_addr = "127.0.0.1:3002"
store_addr = "127.0.0.1:2379"
datanode_lease_secs = 15
selector = "LeaseBased"
use_memory_store = false
"#;
write!(file, "{}", toml_str).unwrap();
let cmd = StartCommand {
bind_addr: None,
server_addr: None,
store_addr: None,
selector: None,
config_file: Some(format!(
"{}/../../config/metasrv.example.toml",
std::env::current_dir().unwrap().as_path().to_str().unwrap()
)),
config_file: Some(file.path().to_str().unwrap().to_string()),
use_memory_store: false,
};
let options: MetaSrvOptions = cmd.try_into().unwrap();

View File

@@ -28,8 +28,8 @@ use frontend::instance::Instance as FeInstance;
use frontend::mysql::MysqlOptions;
use frontend::opentsdb::OpentsdbOptions;
use frontend::postgres::PostgresOptions;
use frontend::prom::PromOptions;
use frontend::prometheus::PrometheusOptions;
use frontend::promql::PromqlOptions;
use serde::{Deserialize, Serialize};
use servers::http::HttpOptions;
use servers::tls::{TlsMode, TlsOption};
@@ -77,7 +77,7 @@ pub struct StandaloneOptions {
pub opentsdb_options: Option<OpentsdbOptions>,
pub influxdb_options: Option<InfluxdbOptions>,
pub prometheus_options: Option<PrometheusOptions>,
pub promql_options: Option<PromqlOptions>,
pub prom_options: Option<PromOptions>,
pub wal: WalConfig,
pub storage: ObjectStoreConfig,
pub compaction: CompactionConfig,
@@ -96,7 +96,7 @@ impl Default for StandaloneOptions {
opentsdb_options: Some(OpentsdbOptions::default()),
influxdb_options: Some(InfluxdbOptions::default()),
prometheus_options: Some(PrometheusOptions::default()),
promql_options: Some(PromqlOptions::default()),
prom_options: Some(PromOptions::default()),
wal: WalConfig::default(),
storage: ObjectStoreConfig::default(),
compaction: CompactionConfig::default(),
@@ -116,7 +116,7 @@ impl StandaloneOptions {
opentsdb_options: self.opentsdb_options,
influxdb_options: self.influxdb_options,
prometheus_options: self.prometheus_options,
promql_options: self.promql_options,
prom_options: self.prom_options,
meta_client_options: None,
}
}
@@ -142,6 +142,8 @@ struct StartCommand {
#[clap(long)]
mysql_addr: Option<String>,
#[clap(long)]
prom_addr: Option<String>,
#[clap(long)]
postgres_addr: Option<String>,
#[clap(long)]
opentsdb_addr: Option<String>,
@@ -254,6 +256,11 @@ impl TryFrom<StartCommand> for FrontendOptions {
..Default::default()
})
}
if let Some(addr) = cmd.prom_addr {
opts.prom_options = Some(PromOptions { addr })
}
if let Some(addr) = cmd.postgres_addr {
opts.postgres_options = Some(PostgresOptions {
addr,
@@ -302,6 +309,7 @@ mod tests {
http_addr: None,
rpc_addr: None,
mysql_addr: None,
prom_addr: None,
postgres_addr: None,
opentsdb_addr: None,
config_file: Some(format!(
@@ -347,6 +355,7 @@ mod tests {
let command = StartCommand {
http_addr: None,
rpc_addr: None,
prom_addr: None,
mysql_addr: None,
postgres_addr: None,
opentsdb_addr: None,

View File

@@ -29,9 +29,9 @@ mod tests {
use std::fs::File;
use std::io::Write;
use common_test_util::temp_dir::create_temp_dir;
use serde::{Deserialize, Serialize};
use snafu::ResultExt;
use tempdir::TempDir;
use super::*;
use crate::error::Result;
@@ -62,7 +62,7 @@ mod tests {
host: "greptime.test".to_string(),
};
let dir = TempDir::new("test_from_file").unwrap();
let dir = create_temp_dir("test_from_file");
let test_file = format!("{}/test.toml", dir.path().to_str().unwrap());
let s = toml::to_string(&config).unwrap();

View File

@@ -18,8 +18,8 @@ mod tests {
use std::process::{Command, Stdio};
use std::time::Duration;
use common_test_util::temp_dir::create_temp_dir;
use rexpect::session::PtyReplSession;
use tempdir::TempDir;
struct Repl {
repl: PtyReplSession,
@@ -48,8 +48,8 @@ mod tests {
#[test]
fn test_repl() {
let data_dir = TempDir::new_in("/tmp", "data").unwrap();
let wal_dir = TempDir::new_in("/tmp", "wal").unwrap();
let data_dir = create_temp_dir("data");
let wal_dir = create_temp_dir("wal");
let mut bin_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
bin_path.push("../../target/debug");

View File

@@ -20,6 +20,12 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer};
#[derive(Debug, Default, Clone, PartialEq, Eq, PartialOrd, Ord, Deserialize, Serialize)]
pub struct Bytes(bytes::Bytes);
impl From<Bytes> for bytes::Bytes {
fn from(value: Bytes) -> Self {
value.0
}
}
impl From<bytes::Bytes> for Bytes {
fn from(bytes: bytes::Bytes) -> Bytes {
Bytes(bytes)

View File

@@ -17,5 +17,4 @@ snafu = { version = "0.7", features = ["backtraces"] }
[dev-dependencies]
chrono.workspace = true
tempdir = "0.3"
tokio.workspace = true

View File

@@ -10,6 +10,7 @@ proc-macro = true
[dependencies]
quote = "1.0"
syn = "1.0"
proc-macro2 = "1.0"
[dev-dependencies]
arc-swap = "1.0"

View File

@@ -12,8 +12,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
mod range_fn;
use proc_macro::TokenStream;
use quote::{quote, quote_spanned};
use range_fn::process_range_fn;
use syn::parse::Parser;
use syn::spanned::Spanned;
use syn::{parse_macro_input, DeriveInput, ItemStruct};
@@ -83,3 +86,31 @@ pub fn as_aggr_func_creator(_args: TokenStream, input: TokenStream) -> TokenStre
}
.into()
}
/// Attribute macro to convert an arithimetic function to a range function. The annotated function
/// should accept servaral arrays as input and return a single value as output. This procedure
/// macro can works on any number of input parameters. Return type can be either primitive type
/// or wrapped in `Option`.
///
/// # Example
/// Take `count_over_time()` in PromQL as an example:
/// ```rust, ignore
/// /// The count of all values in the specified interval.
/// #[range_fn(
/// name = "CountOverTime",
/// ret = "Float64Array",
/// display_name = "prom_count_over_time"
/// )]
/// pub fn count_over_time(_: &TimestampMillisecondArray, values: &Float64Array) -> f64 {
/// values.len() as f64
/// }
/// ```
///
/// # Arguments
/// - `name`: The name of the generated [ScalarUDF] struct.
/// - `ret`: The return type of the generated UDF function.
/// - `display_name`: The display name of the generated UDF function.
#[proc_macro_attribute]
pub fn range_fn(args: TokenStream, input: TokenStream) -> TokenStream {
process_range_fn(args, input)
}

View File

@@ -0,0 +1,230 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use proc_macro::TokenStream;
use proc_macro2::Span;
use quote::quote;
use syn::punctuated::Punctuated;
use syn::spanned::Spanned;
use syn::token::Comma;
use syn::{
parse_macro_input, Attribute, AttributeArgs, FnArg, Ident, ItemFn, Meta, MetaNameValue,
NestedMeta, Signature, Type, TypeReference, Visibility,
};
/// Internal util macro to early return on error.
macro_rules! ok {
($item:expr) => {
match $item {
Ok(item) => item,
Err(e) => return e.into_compile_error().into(),
}
};
}
pub(crate) fn process_range_fn(args: TokenStream, input: TokenStream) -> TokenStream {
// extract arg map
let arg_pairs = parse_macro_input!(args as AttributeArgs);
let arg_span = arg_pairs[0].span();
let arg_map = ok!(extract_arg_map(arg_pairs));
// decompose the fn block
let compute_fn = parse_macro_input!(input as ItemFn);
let ItemFn {
attrs,
vis,
sig,
block,
} = compute_fn;
// extract fn arg list
let Signature {
inputs,
ident: fn_name,
..
} = &sig;
let arg_types = ok!(extract_input_types(inputs));
// build the struct and its impl block
let struct_code = build_struct(
attrs,
vis,
ok!(get_ident(&arg_map, "name", arg_span)),
ok!(get_ident(&arg_map, "display_name", arg_span)),
);
let calc_fn_code = build_calc_fn(
ok!(get_ident(&arg_map, "name", arg_span)),
arg_types,
fn_name.clone(),
ok!(get_ident(&arg_map, "ret", arg_span)),
);
// preserve this fn, but remove its `pub` modifier
let input_fn_code: TokenStream = quote! {
#sig { #block }
}
.into();
let mut result = TokenStream::new();
result.extend(struct_code);
result.extend(calc_fn_code);
result.extend(input_fn_code);
result
}
/// Extract a String <-> Ident map from the attribute args.
fn extract_arg_map(args: Vec<NestedMeta>) -> Result<HashMap<String, Ident>, syn::Error> {
args.into_iter()
.map(|meta| {
if let NestedMeta::Meta(Meta::NameValue(MetaNameValue { path, lit, .. })) = meta {
let name = path.get_ident().unwrap().to_string();
let ident = match lit {
syn::Lit::Str(lit_str) => lit_str.parse::<Ident>(),
_ => Err(syn::Error::new(
lit.span(),
"Unexpected attribute format. Expected `name = \"value\"`",
)),
}?;
Ok((name, ident))
} else {
Err(syn::Error::new(
meta.span(),
"Unexpected attribute format. Expected `name = \"value\"`",
))
}
})
.collect::<Result<HashMap<String, Ident>, syn::Error>>()
}
/// Helper function to get an Ident from the previous arg map.
fn get_ident(map: &HashMap<String, Ident>, key: &str, span: Span) -> Result<Ident, syn::Error> {
map.get(key)
.cloned()
.ok_or_else(|| syn::Error::new(span, format!("Expect attribute {key} but not found")))
}
/// Extract the argument list from the annotated function.
fn extract_input_types(inputs: &Punctuated<FnArg, Comma>) -> Result<Vec<Type>, syn::Error> {
inputs
.iter()
.map(|arg| match arg {
FnArg::Receiver(receiver) => Err(syn::Error::new(receiver.span(), "expected bool")),
FnArg::Typed(pat_type) => Ok(*pat_type.ty.clone()),
})
.collect()
}
fn build_struct(
attrs: Vec<Attribute>,
vis: Visibility,
name: Ident,
display_name_ident: Ident,
) -> TokenStream {
let display_name = display_name_ident.to_string();
quote! {
#(#attrs)*
#[derive(Debug)]
#vis struct #name {}
impl #name {
pub const fn name() -> &'static str {
#display_name
}
pub fn scalar_udf() -> ScalarUDF {
ScalarUDF {
name: Self::name().to_string(),
signature: Signature::new(
TypeSignature::Exact(Self::input_type()),
Volatility::Immutable,
),
return_type: Arc::new(|_| Ok(Arc::new(Self::return_type()))),
fun: Arc::new(Self::calc),
}
}
// TODO(ruihang): this should be parameterized
// time index column and value column
fn input_type() -> Vec<DataType> {
vec![
RangeArray::convert_data_type(DataType::Timestamp(TimeUnit::Millisecond, None)),
RangeArray::convert_data_type(DataType::Float64),
]
}
// TODO(ruihang): this should be parameterized
fn return_type() -> DataType {
DataType::Float64
}
}
}
.into()
}
fn build_calc_fn(
name: Ident,
param_types: Vec<Type>,
fn_name: Ident,
ret_type: Ident,
) -> TokenStream {
let param_names = param_types
.iter()
.enumerate()
.map(|(i, ty)| Ident::new(&format!("param_{}", i), ty.span()))
.collect::<Vec<_>>();
let unref_param_types = param_types
.iter()
.map(|ty| {
if let Type::Reference(TypeReference { elem, .. }) = ty {
elem.as_ref().clone()
} else {
ty.clone()
}
})
.collect::<Vec<_>>();
let num_params = param_types.len();
let param_numbers = (0..num_params).collect::<Vec<_>>();
let range_array_names = param_names
.iter()
.map(|name| Ident::new(&format!("{}_range_array", name), name.span()))
.collect::<Vec<_>>();
let first_range_array_name = range_array_names.first().unwrap().clone();
quote! {
impl #name {
fn calc(input: &[ColumnarValue]) -> Result<ColumnarValue, DataFusionError> {
assert_eq!(input.len(), #num_params);
#( let #range_array_names = RangeArray::try_new(extract_array(&input[#param_numbers])?.data().clone().into())?; )*
// TODO(ruihang): add ensure!()
let mut result_array = Vec::new();
for index in 0..#first_range_array_name.len(){
#( let #param_names = #range_array_names.get(index).unwrap().as_any().downcast_ref::<#unref_param_types>().unwrap().clone(); )*
// TODO(ruihang): add ensure!() to check length
let result = #fn_name(#( &#param_names, )*);
result_array.push(result);
}
let result = ColumnarValue::Array(Arc::new(#ret_type::from_iter(result_array)));
Ok(result)
}
}
}
.into()
}

View File

@@ -14,9 +14,9 @@
use std::sync::Arc;
use common_query::error::{ExecuteFunctionSnafu, FromScalarValueSnafu};
use common_query::error::FromScalarValueSnafu;
use common_query::prelude::{
ColumnarValue, ReturnTypeFunction, ScalarFunctionImplementation, ScalarUdf, ScalarValue,
ColumnarValue, ReturnTypeFunction, ScalarFunctionImplementation, ScalarUdf,
};
use datatypes::error::Error as DataTypeError;
use datatypes::prelude::*;
@@ -54,16 +54,8 @@ pub fn create_udf(func: FunctionRef) -> ScalarUdf {
.collect();
let result = func_cloned.eval(func_ctx, &args.context(FromScalarValueSnafu)?);
let udf = if len.is_some() {
result.map(ColumnarValue::Vector)?
} else {
ScalarValue::try_from_array(&result?.to_arrow_array(), 0)
.map(ColumnarValue::Scalar)
.context(ExecuteFunctionSnafu)?
};
Ok(udf)
let udf_result = result.map(ColumnarValue::Vector)?;
Ok(udf_result)
});
ScalarUdf::new(func.name(), &func.signature(), &return_type, &fun)

View File

@@ -26,6 +26,7 @@ use common_time::{Date, DateTime};
use datatypes::data_type::{ConcreteDataType, DataType};
use datatypes::prelude::{ValueRef, VectorRef};
use datatypes::schema::SchemaRef;
use datatypes::types::TimestampType;
use datatypes::value::Value;
use datatypes::vectors::MutableVector;
use snafu::{ensure, OptionExt, ResultExt};
@@ -414,11 +415,26 @@ fn convert_values(data_type: &ConcreteDataType, values: Values) -> Vec<Value> {
.into_iter()
.map(|v| Value::Date(v.into()))
.collect(),
ConcreteDataType::Timestamp(_) => values
ConcreteDataType::Timestamp(TimestampType::Second(_)) => values
.ts_second_values
.into_iter()
.map(|v| Value::Timestamp(Timestamp::new_second(v)))
.collect(),
ConcreteDataType::Timestamp(TimestampType::Millisecond(_)) => values
.ts_millisecond_values
.into_iter()
.map(|v| Value::Timestamp(Timestamp::new_millisecond(v)))
.collect(),
ConcreteDataType::Timestamp(TimestampType::Microsecond(_)) => values
.ts_microsecond_values
.into_iter()
.map(|v| Value::Timestamp(Timestamp::new_microsecond(v)))
.collect(),
ConcreteDataType::Timestamp(TimestampType::Nanosecond(_)) => values
.ts_nanosecond_values
.into_iter()
.map(|v| Value::Timestamp(Timestamp::new_nanosecond(v)))
.collect(),
ConcreteDataType::Null(_) | ConcreteDataType::List(_) | ConcreteDataType::Dictionary(_) => {
unreachable!()
}
@@ -444,6 +460,7 @@ mod tests {
use common_time::timestamp::Timestamp;
use datatypes::data_type::ConcreteDataType;
use datatypes::schema::{ColumnSchema, SchemaBuilder, SchemaRef};
use datatypes::types::{TimestampMillisecondType, TimestampSecondType, TimestampType};
use datatypes::value::Value;
use snafu::ResultExt;
use table::error::Result as TableResult;
@@ -647,6 +664,39 @@ mod tests {
);
}
#[test]
fn test_convert_timestamp_values() {
// second
let actual = convert_values(
&ConcreteDataType::Timestamp(TimestampType::Second(TimestampSecondType)),
Values {
ts_second_values: vec![1_i64, 2_i64, 3_i64],
..Default::default()
},
);
let expect = vec![
Value::Timestamp(Timestamp::new_second(1_i64)),
Value::Timestamp(Timestamp::new_second(2_i64)),
Value::Timestamp(Timestamp::new_second(3_i64)),
];
assert_eq!(expect, actual);
// millisecond
let actual = convert_values(
&ConcreteDataType::Timestamp(TimestampType::Millisecond(TimestampMillisecondType)),
Values {
ts_millisecond_values: vec![1_i64, 2_i64, 3_i64],
..Default::default()
},
);
let expect = vec![
Value::Timestamp(Timestamp::new_millisecond(1_i64)),
Value::Timestamp(Timestamp::new_millisecond(2_i64)),
Value::Timestamp(Timestamp::new_millisecond(3_i64)),
];
assert_eq!(expect, actual);
}
#[test]
fn test_is_null() {
let null_mask = BitVec::from_slice(&[0b0000_0001, 0b0000_1000]);

View File

@@ -16,7 +16,7 @@ common-runtime = { path = "../runtime" }
dashmap = "5.4"
datafusion.workspace = true
datatypes = { path = "../../datatypes" }
flatbuffers = "22"
flatbuffers = "23.1"
futures = "0.3"
prost.workspace = true
snafu = { version = "0.7", features = ["backtraces"] }

View File

@@ -16,8 +16,9 @@ use std::collections::HashMap;
use std::sync::Arc;
use api::v1::{AffectedRows, FlightMetadata};
use arrow_flight::utils::{flight_data_from_arrow_batch, flight_data_to_arrow_batch};
use arrow_flight::utils::flight_data_to_arrow_batch;
use arrow_flight::{FlightData, IpcMessage, SchemaAsIpc};
use common_base::bytes::Bytes;
use common_recordbatch::{RecordBatch, RecordBatches};
use datatypes::arrow;
use datatypes::arrow::datatypes::Schema as ArrowSchema;
@@ -39,38 +40,58 @@ pub enum FlightMessage {
AffectedRows(usize),
}
#[derive(Default)]
pub struct FlightEncoder {
write_options: writer::IpcWriteOptions,
data_gen: writer::IpcDataGenerator,
dictionary_tracker: writer::DictionaryTracker,
}
impl Default for FlightEncoder {
fn default() -> Self {
Self {
write_options: writer::IpcWriteOptions::default(),
data_gen: writer::IpcDataGenerator::default(),
dictionary_tracker: writer::DictionaryTracker::new(false),
}
}
}
impl FlightEncoder {
pub fn encode(&self, flight_message: FlightMessage) -> FlightData {
pub fn encode(&mut self, flight_message: FlightMessage) -> FlightData {
match flight_message {
FlightMessage::Schema(schema) => {
SchemaAsIpc::new(schema.arrow_schema(), &self.write_options).into()
}
FlightMessage::Recordbatch(recordbatch) => {
let (flight_dictionaries, flight_batch) = flight_data_from_arrow_batch(
recordbatch.df_record_batch(),
&self.write_options,
);
let (encoded_dictionaries, encoded_batch) = self
.data_gen
.encoded_batch(
recordbatch.df_record_batch(),
&mut self.dictionary_tracker,
&self.write_options,
)
.expect("DictionaryTracker configured above to not fail on replacement");
// TODO(LFC): Handle dictionary as FlightData here, when we supported Arrow's Dictionary DataType.
// Currently we don't have a datatype corresponding to Arrow's Dictionary DataType,
// so there won't be any "dictionaries" here. Assert to be sure about it, and
// perform a "testing guard" in case we forgot to handle the possible "dictionaries"
// here in the future.
debug_assert_eq!(flight_dictionaries.len(), 0);
debug_assert_eq!(encoded_dictionaries.len(), 0);
flight_batch
encoded_batch.into()
}
FlightMessage::AffectedRows(rows) => {
let metadata = FlightMetadata {
affected_rows: Some(AffectedRows { value: rows as _ }),
}
.encode_to_vec();
FlightData::new(None, IpcMessage(build_none_flight_msg()), metadata, vec![])
FlightData::new(
None,
IpcMessage(build_none_flight_msg().into()),
metadata,
vec![],
)
}
}
}
@@ -83,7 +104,8 @@ pub struct FlightDecoder {
impl FlightDecoder {
pub fn try_decode(&mut self, flight_data: FlightData) -> Result<FlightMessage> {
let message = root_as_message(flight_data.data_header.as_slice()).map_err(|e| {
let bytes = flight_data.data_header.slice(..);
let message = root_as_message(&bytes).map_err(|e| {
InvalidFlightDataSnafu {
reason: e.to_string(),
}
@@ -91,7 +113,7 @@ impl FlightDecoder {
})?;
match message.header_type() {
MessageHeader::NONE => {
let metadata = FlightMetadata::decode(flight_data.app_metadata.as_slice())
let metadata = FlightMetadata::decode(flight_data.app_metadata)
.context(DecodeFlightDataSnafu)?;
if let Some(AffectedRows { value }) = metadata.affected_rows {
return Ok(FlightMessage::AffectedRows(value as _));
@@ -176,7 +198,7 @@ pub fn flight_messages_to_recordbatches(messages: Vec<FlightMessage>) -> Result<
}
}
fn build_none_flight_msg() -> Vec<u8> {
fn build_none_flight_msg() -> Bytes {
let mut builder = FlatBufferBuilder::new();
let mut message = arrow::ipc::MessageBuilder::new(&mut builder);
@@ -187,7 +209,7 @@ fn build_none_flight_msg() -> Vec<u8> {
let data = message.finish();
builder.finish(data, None);
builder.finished_data().to_vec()
builder.finished_data().into()
}
#[cfg(test)]

View File

@@ -0,0 +1,20 @@
[package]
name = "common-mem-prof"
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
common-error = { path = "../error" }
snafu.workspace = true
tempfile = "3.4"
tikv-jemalloc-ctl = { version = "0.5", features = ["use_std"] }
tikv-jemallocator = "0.5"
tokio.workspace = true
[dependencies.tikv-jemalloc-sys]
version = "0.5"
features = ["stats", "profiling", "unprefixed_malloc_on_supported_platforms"]
[profile.release]
debug = true

View File

@@ -0,0 +1,50 @@
# Profile memory usage of GreptimeDB
This crate provides an easy approach to dump memory profiling info.
## Prerequisites
### jemalloc
```bash
# for macOS
brew install jemalloc
# for Ubuntu
sudo apt install libjemalloc-dev
```
### [flamegraph](https://github.com/brendangregg/FlameGraph)
```bash
curl https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl > ./flamegraph.pl
```
### Build GreptimeDB with `mem-prof` feature.
```bash
cargo build --features=mem-prof
```
## Profiling
Start GreptimeDB instance with environment variables:
```bash
MALLOC_CONF=prof:true,lg_prof_interval:28 ./target/debug/greptime standalone start
```
Dump memory profiling data through HTTP API:
```bash
curl localhost:4000/v1/prof/mem > greptime.hprof
```
You can periodically dump profiling data and compare them to find the delta memory usage.
## Analyze profiling data with flamegraph
To create flamegraph according to dumped profiling data:
```bash
jeprof --svg <path_to_greptimedb_binary> --base=<baseline_prof> <profile_data> > output.svg
```

View File

@@ -0,0 +1,66 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::any::Any;
use std::path::PathBuf;
use common_error::prelude::{ErrorExt, StatusCode};
use snafu::{Backtrace, Snafu};
pub type Result<T> = std::result::Result<T, Error>;
#[derive(Debug, Snafu)]
#[snafu(visibility(pub))]
pub enum Error {
#[snafu(display("Failed to read OPT_PROF"))]
ReadOptProf { source: tikv_jemalloc_ctl::Error },
#[snafu(display("Memory profiling is not enabled"))]
ProfilingNotEnabled,
#[snafu(display("Failed to build temp file from given path: {:?}", path))]
BuildTempPath { path: PathBuf, backtrace: Backtrace },
#[snafu(display("Failed to open temp file: {}", path))]
OpenTempFile {
path: String,
source: std::io::Error,
},
#[snafu(display("Failed to dump profiling data to temp file: {:?}", path))]
DumpProfileData {
path: PathBuf,
source: tikv_jemalloc_ctl::Error,
},
}
impl ErrorExt for Error {
fn status_code(&self) -> StatusCode {
match self {
Error::ReadOptProf { .. } => StatusCode::Internal,
Error::ProfilingNotEnabled => StatusCode::InvalidArguments,
Error::BuildTempPath { .. } => StatusCode::Internal,
Error::OpenTempFile { .. } => StatusCode::StorageUnavailable,
Error::DumpProfileData { .. } => StatusCode::StorageUnavailable,
}
}
fn backtrace_opt(&self) -> Option<&Backtrace> {
snafu::ErrorCompat::backtrace(self)
}
fn as_any(&self) -> &dyn Any {
self
}
}

View File

@@ -0,0 +1,74 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
pub mod error;
use std::ffi::{c_char, CString};
use std::path::PathBuf;
use snafu::{ensure, ResultExt};
use tokio::io::AsyncReadExt;
use crate::error::{
BuildTempPathSnafu, DumpProfileDataSnafu, OpenTempFileSnafu, ProfilingNotEnabledSnafu,
ReadOptProfSnafu,
};
const PROF_DUMP: &[u8] = b"prof.dump\0";
const OPT_PROF: &[u8] = b"opt.prof\0";
pub async fn dump_profile() -> error::Result<Vec<u8>> {
ensure!(is_prof_enabled()?, ProfilingNotEnabledSnafu);
let tmp_path = tempfile::tempdir().map_err(|_| {
BuildTempPathSnafu {
path: std::env::temp_dir(),
}
.build()
})?;
let mut path_buf = PathBuf::from(tmp_path.path());
path_buf.push("greptimedb.hprof");
let path = path_buf
.to_str()
.ok_or_else(|| BuildTempPathSnafu { path: &path_buf }.build())?
.to_string();
let mut bytes = CString::new(path.as_str())
.map_err(|_| BuildTempPathSnafu { path: &path_buf }.build())?
.into_bytes_with_nul();
{
// #safety: we always expect a valid temp file path to write profiling data to.
let ptr = bytes.as_mut_ptr() as *mut c_char;
unsafe {
tikv_jemalloc_ctl::raw::write(PROF_DUMP, ptr)
.context(DumpProfileDataSnafu { path: path_buf })?
}
}
let mut f = tokio::fs::File::open(path.as_str())
.await
.context(OpenTempFileSnafu { path: &path })?;
let mut buf = vec![];
f.read_to_end(&mut buf)
.await
.context(OpenTempFileSnafu { path })?;
Ok(buf)
}
fn is_prof_enabled() -> error::Result<bool> {
// safety: OPT_PROF variable, if present, is always a boolean value.
Ok(unsafe { tikv_jemalloc_ctl::raw::read::<bool>(OPT_PROF).context(ReadOptProfSnafu)? })
}

View File

@@ -19,5 +19,5 @@ tokio.workspace = true
uuid.workspace = true
[dev-dependencies]
common-test-util = { path = "../test-util" }
futures-util.workspace = true
tempdir = "0.3"

View File

@@ -13,6 +13,7 @@
// limitations under the License.
use std::any::Any;
use std::sync::Arc;
use common_error::prelude::*;
@@ -81,6 +82,21 @@ pub enum Error {
#[snafu(backtrace)]
source: BoxedError,
},
#[snafu(display("Procedure panics, procedure_id: {}", procedure_id))]
ProcedurePanic { procedure_id: ProcedureId },
#[snafu(display("Failed to wait watcher, source: {}", source))]
WaitWatcher {
source: tokio::sync::watch::error::RecvError,
backtrace: Backtrace,
},
#[snafu(display("Failed to execute procedure, source: {}", source))]
ProcedureExec {
source: Arc<Error>,
backtrace: Backtrace,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -95,10 +111,13 @@ impl ErrorExt for Error {
| Error::ListState { .. }
| Error::ReadState { .. }
| Error::FromJson { .. }
| Error::RetryLater { .. } => StatusCode::Internal,
| Error::RetryLater { .. }
| Error::WaitWatcher { .. } => StatusCode::Internal,
Error::LoaderConflict { .. } | Error::DuplicateProcedure { .. } => {
StatusCode::InvalidArguments
}
Error::ProcedurePanic { .. } => StatusCode::Unexpected,
Error::ProcedureExec { source, .. } => source.status_code(),
}
}

View File

@@ -18,9 +18,11 @@ pub mod error;
pub mod local;
mod procedure;
mod store;
pub mod watcher;
pub use crate::error::{Error, Result};
pub use crate::procedure::{
BoxedProcedure, Context, ContextProvider, LockKey, Procedure, ProcedureId, ProcedureManager,
ProcedureManagerRef, ProcedureState, ProcedureWithId, Status, Watcher,
ProcedureManagerRef, ProcedureState, ProcedureWithId, Status,
};
pub use crate::watcher::Watcher;

View File

@@ -334,7 +334,7 @@ impl LocalManager {
common_runtime::spawn_bg(async move {
// Run the root procedure.
let _ = runner.run().await;
runner.run().await;
});
Ok(watcher)
@@ -409,9 +409,9 @@ impl ProcedureManager for LocalManager {
/// Create a new [ProcedureMeta] for test purpose.
#[cfg(test)]
mod test_util {
use common_test_util::temp_dir::TempDir;
use object_store::services::Fs as Builder;
use object_store::ObjectStoreBuilder;
use tempdir::TempDir;
use super::*;
@@ -430,7 +430,7 @@ mod test_util {
mod tests {
use common_error::mock::MockError;
use common_error::prelude::StatusCode;
use tempdir::TempDir;
use common_test_util::temp_dir::create_temp_dir;
use super::*;
use crate::error::Error;
@@ -447,9 +447,9 @@ mod tests {
assert!(ctx.try_insert_procedure(meta.clone()));
assert!(ctx.contains_procedure(meta.id));
assert_eq!(ProcedureState::Running, ctx.state(meta.id).unwrap());
assert!(ctx.state(meta.id).unwrap().is_running());
meta.set_state(ProcedureState::Done);
assert_eq!(ProcedureState::Done, ctx.state(meta.id).unwrap());
assert!(ctx.state(meta.id).unwrap().is_done());
}
#[test]
@@ -540,7 +540,7 @@ mod tests {
#[test]
fn test_register_loader() {
let dir = TempDir::new("register").unwrap();
let dir = create_temp_dir("register");
let config = ManagerConfig {
object_store: test_util::new_object_store(&dir),
};
@@ -558,7 +558,7 @@ mod tests {
#[tokio::test]
async fn test_recover() {
let dir = TempDir::new("recover").unwrap();
let dir = create_temp_dir("recover");
let object_store = test_util::new_object_store(&dir);
let config = ManagerConfig {
object_store: object_store.clone(),
@@ -603,7 +603,7 @@ mod tests {
#[tokio::test]
async fn test_submit_procedure() {
let dir = TempDir::new("submit").unwrap();
let dir = create_temp_dir("submit");
let config = ManagerConfig {
object_store: test_util::new_object_store(&dir),
};
@@ -634,7 +634,7 @@ mod tests {
// Wait for the procedure done.
let mut watcher = manager.procedure_watcher(procedure_id).unwrap();
watcher.changed().await.unwrap();
assert_eq!(ProcedureState::Done, *watcher.borrow());
assert!(watcher.borrow().is_done());
// Try to submit procedure with same id again.
let err = manager
@@ -649,7 +649,7 @@ mod tests {
#[tokio::test]
async fn test_state_changed_on_err() {
let dir = TempDir::new("on_err").unwrap();
let dir = create_temp_dir("on_err");
let config = ManagerConfig {
object_store: test_util::new_object_store(&dir),
};
@@ -697,7 +697,7 @@ mod tests {
.unwrap();
// Wait for the notification.
watcher.changed().await.unwrap();
assert_eq!(ProcedureState::Failed, *watcher.borrow());
assert!(watcher.borrow().is_failed());
}
};

View File

@@ -18,7 +18,7 @@ use std::time::Duration;
use common_telemetry::logging;
use tokio::time;
use crate::error::{Error, Result};
use crate::error::{ProcedurePanicSnafu, Result};
use crate::local::{ManagerContext, ProcedureMeta, ProcedureMetaRef};
use crate::store::ProcedureStore;
use crate::{BoxedProcedure, Context, ProcedureId, ProcedureState, ProcedureWithId, Status};
@@ -30,7 +30,7 @@ enum ExecResult {
Continue,
Done,
RetryLater,
Failed(Error),
Failed,
}
#[cfg(test)]
@@ -48,7 +48,7 @@ impl ExecResult {
}
fn is_failed(&self) -> bool {
matches!(self, ExecResult::Failed(_))
matches!(self, ExecResult::Failed)
}
}
@@ -83,7 +83,11 @@ impl Drop for ProcedureGuard {
// Set state to failed. This is useful in test as runtime may not abort when the runner task panics.
// See https://github.com/tokio-rs/tokio/issues/2002 .
// We set set_panic_hook() in the application's main function. But our tests don't have this panic hook.
self.meta.set_state(ProcedureState::Failed);
let err = ProcedurePanicSnafu {
procedure_id: self.meta.id,
}
.build();
self.meta.set_state(ProcedureState::failed(Arc::new(err)));
}
// Notify parent procedure.
@@ -109,7 +113,7 @@ pub(crate) struct Runner {
impl Runner {
/// Run the procedure.
pub(crate) async fn run(mut self) -> Result<()> {
pub(crate) async fn run(mut self) {
// Ensure we can update the procedure state.
let guard = ProcedureGuard::new(self.meta.clone(), self.manager_ctx.clone());
@@ -129,12 +133,9 @@ impl Runner {
.await;
}
let mut result = Ok(());
// Execute the procedure. We need to release the lock whenever the the execution
// is successful or fail.
if let Err(e) = self.execute_procedure_in_loop().await {
result = Err(e);
}
self.execute_procedure_in_loop().await;
// We can't remove the metadata of the procedure now as users and its parent might
// need to query its state.
@@ -155,11 +156,9 @@ impl Runner {
self.procedure.type_name(),
self.meta.id
);
result
}
async fn execute_procedure_in_loop(&mut self) -> Result<()> {
async fn execute_procedure_in_loop(&mut self) {
let ctx = Context {
procedure_id: self.meta.id,
provider: self.manager_ctx.clone(),
@@ -168,11 +167,10 @@ impl Runner {
loop {
match self.execute_once(&ctx).await {
ExecResult::Continue => (),
ExecResult::Done => return Ok(()),
ExecResult::Done | ExecResult::Failed => return,
ExecResult::RetryLater => {
self.wait_on_err().await;
}
ExecResult::Failed(e) => return Err(e),
}
}
}
@@ -222,14 +220,14 @@ impl Runner {
return ExecResult::RetryLater;
}
self.meta.set_state(ProcedureState::Failed);
self.meta.set_state(ProcedureState::failed(Arc::new(e)));
// Write rollback key so we can skip this procedure while recovering procedures.
if self.rollback_procedure().await.is_err() {
return ExecResult::RetryLater;
}
ExecResult::Failed(e)
ExecResult::Failed
}
}
}
@@ -397,14 +395,14 @@ mod tests {
use common_error::ext::PlainError;
use common_error::mock::MockError;
use common_error::prelude::StatusCode;
use common_test_util::temp_dir::create_temp_dir;
use futures_util::future::BoxFuture;
use futures_util::{FutureExt, TryStreamExt};
use object_store::ObjectStore;
use tempdir::TempDir;
use super::*;
use crate::local::test_util;
use crate::{ContextProvider, LockKey, Procedure};
use crate::{ContextProvider, Error, LockKey, Procedure};
const ROOT_ID: &str = "9f805a1f-05f7-490c-9f91-bd56e3cc54c1";
@@ -513,7 +511,7 @@ mod tests {
exec_fn,
};
let dir = TempDir::new("normal").unwrap();
let dir = create_temp_dir("normal");
let meta = normal.new_meta(ROOT_ID);
let ctx = context_without_provider(meta.id);
let object_store = test_util::new_object_store(&dir);
@@ -561,7 +559,7 @@ mod tests {
exec_fn,
};
let dir = TempDir::new("suspend").unwrap();
let dir = create_temp_dir("suspend");
let meta = suspend.new_meta(ROOT_ID);
let ctx = context_without_provider(meta.id);
let object_store = test_util::new_object_store(&dir);
@@ -630,9 +628,14 @@ mod tests {
// Wait for subprocedures.
let mut all_child_done = true;
for id in children_ids {
if ctx.provider.procedure_state(id).await.unwrap()
!= Some(ProcedureState::Done)
{
let is_not_done = ctx
.provider
.procedure_state(id)
.await
.unwrap()
.map(|s| !s.is_done())
.unwrap_or(true);
if is_not_done {
all_child_done = false;
}
}
@@ -655,7 +658,7 @@ mod tests {
exec_fn,
};
let dir = TempDir::new("parent").unwrap();
let dir = create_temp_dir("parent");
let meta = parent.new_meta(ROOT_ID);
let procedure_id = meta.id;
@@ -668,7 +671,7 @@ mod tests {
// Replace the manager ctx.
runner.manager_ctx = manager_ctx;
runner.run().await.unwrap();
runner.run().await;
// Check files on store.
for child_id in children_ids {
@@ -697,7 +700,7 @@ mod tests {
exec_fn,
};
let dir = TempDir::new("fail").unwrap();
let dir = create_temp_dir("fail");
let meta = fail.new_meta(ROOT_ID);
let ctx = context_without_provider(meta.id);
let object_store = test_util::new_object_store(&dir);
@@ -706,7 +709,7 @@ mod tests {
let res = runner.execute_once(&ctx).await;
assert!(res.is_failed(), "{res:?}");
assert_eq!(ProcedureState::Failed, meta.state());
assert!(meta.state().is_failed());
check_files(&object_store, ctx.procedure_id, &["0000000000.rollback"]).await;
}
@@ -732,7 +735,7 @@ mod tests {
exec_fn,
};
let dir = TempDir::new("retry_later").unwrap();
let dir = create_temp_dir("retry_later");
let meta = retry_later.new_meta(ROOT_ID);
let ctx = context_without_provider(meta.id);
let object_store = test_util::new_object_store(&dir);
@@ -741,11 +744,11 @@ mod tests {
let res = runner.execute_once(&ctx).await;
assert!(res.is_retry_later(), "{res:?}");
assert_eq!(ProcedureState::Running, meta.state());
assert!(meta.state().is_running());
let res = runner.execute_once(&ctx).await;
assert!(res.is_done(), "{res:?}");
assert_eq!(ProcedureState::Done, meta.state());
assert!(meta.state().is_done());
check_files(&object_store, ctx.procedure_id, &["0000000000.commit"]).await;
}
@@ -779,7 +782,8 @@ mod tests {
} else {
// Wait for subprocedures.
let state = ctx.provider.procedure_state(child_id).await.unwrap();
if state == Some(ProcedureState::Failed) {
let is_failed = state.map(|s| s.is_failed()).unwrap_or(false);
if is_failed {
// The parent procedure to abort itself if child procedure is failed.
Err(Error::from_error_ext(PlainError::new(
"subprocedure failed".to_string(),
@@ -802,7 +806,7 @@ mod tests {
exec_fn,
};
let dir = TempDir::new("child_err").unwrap();
let dir = create_temp_dir("child_err");
let meta = parent.new_meta(ROOT_ID);
let object_store = test_util::new_object_store(&dir);
@@ -811,12 +815,13 @@ mod tests {
let manager_ctx = Arc::new(ManagerContext::new());
// Manually add this procedure to the manager ctx.
assert!(manager_ctx.try_insert_procedure(meta));
assert!(manager_ctx.try_insert_procedure(meta.clone()));
// Replace the manager ctx.
runner.manager_ctx = manager_ctx;
// Run the runer and execute the procedure.
let err = runner.run().await.unwrap_err();
assert!(err.to_string().contains("subprocedure failed"), "{err}");
runner.run().await;
let err = meta.state().error().unwrap().to_string();
assert!(err.contains("subprocedure failed"), "{err}");
}
}

View File

@@ -20,10 +20,10 @@ use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use smallvec::{smallvec, SmallVec};
use snafu::{ResultExt, Snafu};
use tokio::sync::watch::Receiver;
use uuid::Uuid;
use crate::error::Result;
use crate::error::{Error, Result};
use crate::watcher::Watcher;
/// Procedure execution status.
#[derive(Debug)]
@@ -198,20 +198,47 @@ impl FromStr for ProcedureId {
/// Loader to recover the [Procedure] instance from serialized data.
pub type BoxedProcedureLoader = Box<dyn Fn(&str) -> Result<BoxedProcedure> + Send>;
// TODO(yingwen): Find a way to return the error message if the procedure is failed.
/// State of a submitted procedure.
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Default, Clone)]
pub enum ProcedureState {
/// The procedure is running.
#[default]
Running,
/// The procedure is finished.
Done,
/// The procedure is failed and cannot proceed anymore.
Failed,
Failed { error: Arc<Error> },
}
/// Watcher to watch procedure state.
pub type Watcher = Receiver<ProcedureState>;
impl ProcedureState {
/// Returns a [ProcedureState] with failed state.
pub fn failed(error: Arc<Error>) -> ProcedureState {
ProcedureState::Failed { error }
}
/// Returns true if the procedure state is running.
pub fn is_running(&self) -> bool {
matches!(self, ProcedureState::Running)
}
/// Returns true if the procedure state is done.
pub fn is_done(&self) -> bool {
matches!(self, ProcedureState::Done)
}
/// Returns true if the procedure state failed.
pub fn is_failed(&self) -> bool {
matches!(self, ProcedureState::Failed { .. })
}
/// Returns the error.
pub fn error(&self) -> Option<&Arc<Error>> {
match self {
ProcedureState::Failed { error } => Some(error),
_ => None,
}
}
}
// TODO(yingwen): Shutdown
/// `ProcedureManager` executes [Procedure] submitted to it.
@@ -244,6 +271,9 @@ pub type ProcedureManagerRef = Arc<dyn ProcedureManager>;
#[cfg(test)]
mod tests {
use common_error::mock::MockError;
use common_error::prelude::StatusCode;
use super::*;
#[test]
@@ -311,4 +341,17 @@ mod tests {
let parsed = serde_json::from_str(&json).unwrap();
assert_eq!(id, parsed);
}
#[test]
fn test_procedure_state() {
assert!(ProcedureState::Running.is_running());
assert!(ProcedureState::Running.error().is_none());
assert!(ProcedureState::Done.is_done());
let state = ProcedureState::failed(Arc::new(Error::external(MockError::new(
StatusCode::Unexpected,
))));
assert!(state.is_failed());
assert!(state.error().is_some());
}
}

View File

@@ -246,9 +246,9 @@ impl ParsedKey {
#[cfg(test)]
mod tests {
use async_trait::async_trait;
use common_test_util::temp_dir::{create_temp_dir, TempDir};
use object_store::services::Fs as Builder;
use object_store::ObjectStoreBuilder;
use tempdir::TempDir;
use super::*;
use crate::{Context, LockKey, Procedure, Status};
@@ -373,7 +373,7 @@ mod tests {
#[tokio::test]
async fn test_store_procedure() {
let dir = TempDir::new("store_procedure").unwrap();
let dir = create_temp_dir("store_procedure");
let store = procedure_store_for_test(&dir);
let procedure_id = ProcedureId::random();
@@ -398,7 +398,7 @@ mod tests {
#[tokio::test]
async fn test_commit_procedure() {
let dir = TempDir::new("commit_procedure").unwrap();
let dir = create_temp_dir("commit_procedure");
let store = procedure_store_for_test(&dir);
let procedure_id = ProcedureId::random();
@@ -416,7 +416,7 @@ mod tests {
#[tokio::test]
async fn test_rollback_procedure() {
let dir = TempDir::new("rollback_procedure").unwrap();
let dir = create_temp_dir("rollback_procedure");
let store = procedure_store_for_test(&dir);
let procedure_id = ProcedureId::random();
@@ -434,7 +434,7 @@ mod tests {
#[tokio::test]
async fn test_load_messages() {
let dir = TempDir::new("load_messages").unwrap();
let dir = create_temp_dir("load_messages");
let store = procedure_store_for_test(&dir);
// store 3 steps

View File

@@ -115,15 +115,15 @@ impl StateStore for ObjectStateStore {
#[cfg(test)]
mod tests {
use common_test_util::temp_dir::create_temp_dir;
use object_store::services::Fs as Builder;
use object_store::ObjectStoreBuilder;
use tempdir::TempDir;
use super::*;
#[tokio::test]
async fn test_object_state_store() {
let dir = TempDir::new("state_store").unwrap();
let dir = create_temp_dir("state_store");
let store_dir = dir.path().to_str().unwrap();
let accessor = Builder::default().root(store_dir).build().unwrap();
let object_store = ObjectStore::new(accessor).finish();

View File

@@ -0,0 +1,38 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use snafu::ResultExt;
use tokio::sync::watch::Receiver;
use crate::error::{ProcedureExecSnafu, Result, WaitWatcherSnafu};
use crate::procedure::ProcedureState;
/// Watcher to watch procedure state.
pub type Watcher = Receiver<ProcedureState>;
/// Wait the [Watcher] until the [ProcedureState] is done.
pub async fn wait(watcher: &mut Watcher) -> Result<()> {
loop {
watcher.changed().await.context(WaitWatcherSnafu)?;
match &*watcher.borrow() {
ProcedureState::Running => (),
ProcedureState::Done => {
return Ok(());
}
ProcedureState::Failed { error } => {
return Err(error.clone()).context(ProcedureExecSnafu);
}
}
}
}

View File

@@ -239,7 +239,6 @@ impl From<BoxedError> for Error {
#[cfg(test)]
mod tests {
use datatypes::arrow::error::ArrowError;
use snafu::GenerateImplicitData;
use super::*;
@@ -286,7 +285,7 @@ mod tests {
fn test_convert_df_recordbatch_stream_error() {
let result: std::result::Result<i32, common_recordbatch::error::Error> =
Err(common_recordbatch::error::Error::PollStream {
source: ArrowError::DivideByZero,
source: DataFusionError::Internal("blabla".to_string()),
backtrace: Backtrace::generate(),
});
let error = result

View File

@@ -315,7 +315,11 @@ mod test {
.unwrap()
.build()
.unwrap();
let physical_plan = ctx.create_physical_plan(&logical_plan).await.unwrap();
let physical_plan = ctx
.state()
.create_physical_plan(&logical_plan)
.await
.unwrap();
let df_recordbatches = collect(physical_plan, Arc::new(TaskContext::from(&ctx)))
.await
.unwrap();

View File

@@ -18,9 +18,9 @@ use std::sync::Arc;
use std::task::{Context, Poll};
use datafusion::arrow::datatypes::SchemaRef as DfSchemaRef;
use datafusion::error::Result as DfResult;
use datafusion::physical_plan::RecordBatchStream as DfRecordBatchStream;
use datafusion_common::DataFusionError;
use datatypes::arrow::error::{ArrowError, Result as ArrowResult};
use datatypes::schema::{Schema, SchemaRef};
use futures::ready;
use snafu::ResultExt;
@@ -57,14 +57,14 @@ impl DfRecordBatchStream for DfRecordBatchStreamAdapter {
}
impl Stream for DfRecordBatchStreamAdapter {
type Item = ArrowResult<DfRecordBatch>;
type Item = DfResult<DfRecordBatch>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
match Pin::new(&mut self.stream).poll_next(cx) {
Poll::Pending => Poll::Pending,
Poll::Ready(Some(recordbatch)) => match recordbatch {
Ok(recordbatch) => Poll::Ready(Some(Ok(recordbatch.into_df_record_batch()))),
Err(e) => Poll::Ready(Some(Err(ArrowError::ExternalError(Box::new(e))))),
Err(e) => Poll::Ready(Some(Err(DataFusionError::External(Box::new(e))))),
},
Poll::Ready(None) => Poll::Ready(None),
}
@@ -242,12 +242,12 @@ mod test {
)]));
let batch1 = RecordBatch::new(
schema.clone(),
vec![Arc::new(Int32Vector::from_slice(&[1])) as _],
vec![Arc::new(Int32Vector::from_slice([1])) as _],
)
.unwrap();
let batch2 = RecordBatch::new(
schema.clone(),
vec![Arc::new(Int32Vector::from_slice(&[2])) as _],
vec![Arc::new(Int32Vector::from_slice([2])) as _],
)
.unwrap();

View File

@@ -55,7 +55,7 @@ pub enum Error {
#[snafu(display("Failed to poll stream, source: {}", source))]
PollStream {
source: datatypes::arrow::error::ArrowError,
source: datafusion::error::DataFusionError,
backtrace: Backtrace,
},

View File

@@ -204,7 +204,7 @@ mod tests {
);
assert!(result.is_err());
let v: VectorRef = Arc::new(Int32Vector::from_slice(&[1, 2]));
let v: VectorRef = Arc::new(Int32Vector::from_slice([1, 2]));
let expected = vec![RecordBatch::new(schema.clone(), vec![v.clone()]).unwrap()];
let r = RecordBatches::try_from_columns(schema, vec![v]).unwrap();
assert_eq!(r.take(), expected);
@@ -216,7 +216,7 @@ mod tests {
let column_b = ColumnSchema::new("b", ConcreteDataType::string_datatype(), false);
let column_c = ColumnSchema::new("c", ConcreteDataType::boolean_datatype(), false);
let va: VectorRef = Arc::new(Int32Vector::from_slice(&[1, 2]));
let va: VectorRef = Arc::new(Int32Vector::from_slice([1, 2]));
let vb: VectorRef = Arc::new(StringVector::from(vec!["hello", "world"]));
let vc: VectorRef = Arc::new(BooleanVector::from(vec![true, false]));
@@ -255,11 +255,11 @@ mod tests {
let column_b = ColumnSchema::new("b", ConcreteDataType::string_datatype(), false);
let schema = Arc::new(Schema::new(vec![column_a, column_b]));
let va1: VectorRef = Arc::new(Int32Vector::from_slice(&[1, 2]));
let va1: VectorRef = Arc::new(Int32Vector::from_slice([1, 2]));
let vb1: VectorRef = Arc::new(StringVector::from(vec!["a", "b"]));
let batch1 = RecordBatch::new(schema.clone(), vec![va1, vb1]).unwrap();
let va2: VectorRef = Arc::new(Int32Vector::from_slice(&[3, 4, 5]));
let va2: VectorRef = Arc::new(Int32Vector::from_slice([3, 4, 5]));
let vb2: VectorRef = Arc::new(StringVector::from(vec!["c", "d", "e"]));
let batch2 = RecordBatch::new(schema.clone(), vec![va2, vb2]).unwrap();

View File

@@ -189,8 +189,8 @@ mod tests {
]));
let schema = Arc::new(Schema::try_from(arrow_schema).unwrap());
let c1 = Arc::new(UInt32Vector::from_slice(&[1, 2, 3]));
let c2 = Arc::new(UInt32Vector::from_slice(&[4, 5, 6]));
let c1 = Arc::new(UInt32Vector::from_slice([1, 2, 3]));
let c2 = Arc::new(UInt32Vector::from_slice([4, 5, 6]));
let columns: Vec<VectorRef> = vec![c1, c2];
let batch = RecordBatch::new(schema.clone(), columns.clone()).unwrap();
@@ -222,7 +222,7 @@ mod tests {
let schema = Arc::new(Schema::try_new(column_schemas).unwrap());
let numbers: Vec<u32> = (0..10).collect();
let columns = vec![Arc::new(UInt32Vector::from_slice(&numbers)) as VectorRef];
let columns = vec![Arc::new(UInt32Vector::from_slice(numbers)) as VectorRef];
let batch = RecordBatch::new(schema, columns).unwrap();
let output = serde_json::to_string(&batch).unwrap();

View File

@@ -5,6 +5,8 @@ edition.workspace = true
license.workspace = true
[dependencies]
async-recursion = "1.0"
async-trait.workspace = true
bytes = "1.1"
catalog = { path = "../../catalog" }
common-catalog = { path = "../catalog" }
@@ -15,6 +17,7 @@ datafusion-expr.workspace = true
datatypes = { path = "../../datatypes" }
futures = "0.3"
prost.workspace = true
session = { path = "../../session" }
snafu.workspace = true
table = { path = "../../table" }

View File

@@ -635,8 +635,6 @@ mod utils {
Operator::Modulo => "modulo",
Operator::And => "and",
Operator::Or => "or",
Operator::Like => "like",
Operator::NotLike => "not_like",
Operator::IsDistinctFrom => "is_distinct_from",
Operator::IsNotDistinctFrom => "is_not_distinct_from",
Operator::RegexMatch => "regex_match",
@@ -649,8 +647,6 @@ mod utils {
Operator::BitwiseShiftRight => "bitwise_shift_right",
Operator::BitwiseShiftLeft => "bitwise_shift_left",
Operator::StringConcat => "string_concat",
Operator::ILike => "i_like",
Operator::NotILike => "not_i_like",
}
}

View File

@@ -14,16 +14,20 @@
use std::sync::Arc;
use async_recursion::async_recursion;
use async_trait::async_trait;
use bytes::{Buf, Bytes, BytesMut};
use catalog::table_source::DfTableSourceProvider;
use catalog::CatalogManagerRef;
use common_error::prelude::BoxedError;
use common_catalog::format_full_table_name;
use common_telemetry::debug;
use datafusion::arrow::datatypes::SchemaRef as ArrowSchemaRef;
use datafusion::common::{DFField, DFSchema};
use datafusion::common::{DFField, DFSchema, OwnedTableReference};
use datafusion::datasource::DefaultTableSource;
use datafusion::physical_plan::project_schema;
use datafusion_expr::{Filter, LogicalPlan, TableScan, TableSource};
use datafusion_expr::{Filter, LogicalPlan, TableScan};
use prost::Message;
use session::context::QueryContext;
use snafu::{ensure, OptionExt, ResultExt};
use substrait_proto::proto::expression::mask_expression::{StructItem, StructSelect};
use substrait_proto::proto::expression::MaskExpression;
@@ -37,8 +41,8 @@ use table::table::adapter::DfTableProviderAdapter;
use crate::context::ConvertorContext;
use crate::df_expr::{expression_from_df_expr, to_df_expr};
use crate::error::{
self, DFInternalSnafu, DecodeRelSnafu, EmptyPlanSnafu, EncodeRelSnafu, Error, InternalSnafu,
InvalidParametersSnafu, MissingFieldSnafu, SchemaNotMatchSnafu, TableNotFoundSnafu,
self, DFInternalSnafu, DecodeRelSnafu, EmptyPlanSnafu, EncodeRelSnafu, Error,
InvalidParametersSnafu, MissingFieldSnafu, ResolveTableSnafu, SchemaNotMatchSnafu,
UnknownPlanSnafu, UnsupportedExprSnafu, UnsupportedPlanSnafu,
};
use crate::schema::{from_schema, to_schema};
@@ -46,18 +50,19 @@ use crate::SubstraitPlan;
pub struct DFLogicalSubstraitConvertor;
#[async_trait]
impl SubstraitPlan for DFLogicalSubstraitConvertor {
type Error = Error;
type Plan = LogicalPlan;
fn decode<B: Buf + Send>(
async fn decode<B: Buf + Send>(
&self,
message: B,
catalog_manager: CatalogManagerRef,
) -> Result<Self::Plan, Self::Error> {
let plan = Plan::decode(message).context(DecodeRelSnafu)?;
self.convert_plan(plan, catalog_manager)
self.convert_plan(plan, catalog_manager).await
}
fn encode(&self, plan: Self::Plan) -> Result<Bytes, Self::Error> {
@@ -71,7 +76,7 @@ impl SubstraitPlan for DFLogicalSubstraitConvertor {
}
impl DFLogicalSubstraitConvertor {
fn convert_plan(
async fn convert_plan(
&self,
mut plan: Plan,
catalog_manager: CatalogManagerRef,
@@ -102,20 +107,25 @@ impl DFLogicalSubstraitConvertor {
.fail()?
};
self.rel_to_logical_plan(&mut ctx, Box::new(rel), catalog_manager)
// TODO(LFC): Create table provider from outside, respect "disallow_cross_schema_query" option in query engine state.
let mut table_provider =
DfTableSourceProvider::new(catalog_manager, false, &QueryContext::new());
self.rel_to_logical_plan(&mut ctx, Box::new(rel), &mut table_provider)
.await
}
fn rel_to_logical_plan(
#[async_recursion]
async fn rel_to_logical_plan(
&self,
ctx: &mut ConvertorContext,
rel: Box<Rel>,
catalog_manager: CatalogManagerRef,
table_provider: &mut DfTableSourceProvider,
) -> Result<LogicalPlan, Error> {
let rel_type = rel.rel_type.context(EmptyPlanSnafu)?;
// build logical plan
let logical_plan = match rel_type {
RelType::Read(read_rel) => self.convert_read_rel(ctx, read_rel, catalog_manager)?,
RelType::Read(read_rel) => self.convert_read_rel(ctx, read_rel, table_provider).await?,
RelType::Filter(filter) => {
let FilterRel {
common: _,
@@ -128,7 +138,7 @@ impl DFLogicalSubstraitConvertor {
field: "input",
plan: "Filter",
})?;
let input = Arc::new(self.rel_to_logical_plan(ctx, input, catalog_manager)?);
let input = Arc::new(self.rel_to_logical_plan(ctx, input, table_provider).await?);
let condition = condition.context(MissingFieldSnafu {
field: "condition",
@@ -191,11 +201,11 @@ impl DFLogicalSubstraitConvertor {
Ok(logical_plan)
}
fn convert_read_rel(
async fn convert_read_rel(
&self,
ctx: &mut ConvertorContext,
read_rel: Box<ReadRel>,
catalog_manager: CatalogManagerRef,
table_provider: &mut DfTableSourceProvider,
) -> Result<LogicalPlan, Error> {
// Extract the catalog, schema and table name from NamedTable. Assume the first three are those names.
let read_type = read_rel.read_type.context(MissingFieldSnafu {
@@ -230,17 +240,17 @@ impl DFLogicalSubstraitConvertor {
.projection
.map(|mask_expr| self.convert_mask_expression(mask_expr));
// Get table handle from catalog manager
let table_ref = catalog_manager
.table(&catalog_name, &schema_name, &table_name)
.map_err(BoxedError::new)
.context(InternalSnafu)?
.context(TableNotFoundSnafu {
name: format!("{catalog_name}.{schema_name}.{table_name}"),
let table_ref = OwnedTableReference::Full {
catalog: catalog_name.clone(),
schema: schema_name.clone(),
table: table_name.clone(),
};
let adapter = table_provider
.resolve_table(table_ref)
.await
.with_context(|_| ResolveTableSnafu {
table_name: format_full_table_name(&catalog_name, &schema_name, &table_name),
})?;
let adapter = Arc::new(DefaultTableSource::new(Arc::new(
DfTableProviderAdapter::new(table_ref),
)));
// Get schema directly from the table, and compare it with the schema retrieved from substrait proto.
let stored_schema = adapter.schema();
@@ -262,7 +272,7 @@ impl DFLogicalSubstraitConvertor {
};
// Calculate the projected schema
let qualified = &format!("{catalog_name}.{schema_name}.{table_name}");
let qualified = &format_full_table_name(&catalog_name, &schema_name, &table_name);
let projected_schema = Arc::new(
project_schema(&stored_schema, projection.as_ref())
.and_then(|x| {
@@ -281,7 +291,7 @@ impl DFLogicalSubstraitConvertor {
// TODO(ruihang): Support limit(fetch)
Ok(LogicalPlan::TableScan(TableScan {
table_name: format!("{catalog_name}.{schema_name}.{table_name}"),
table_name: qualified.to_string(),
source: adapter,
projection,
projected_schema,
@@ -314,7 +324,7 @@ impl DFLogicalSubstraitConvertor {
.fail()?,
LogicalPlan::Filter(filter) => {
let input = Some(Box::new(
self.logical_plan_to_rel(ctx, filter.input().clone())?,
self.logical_plan_to_rel(ctx, filter.input.clone())?,
));
let schema = plan
@@ -324,7 +334,7 @@ impl DFLogicalSubstraitConvertor {
.context(error::ConvertDfSchemaSnafu)?;
let condition = Some(Box::new(expression_from_df_expr(
ctx,
filter.predicate(),
&filter.predicate,
&schema,
)?));
@@ -396,7 +406,10 @@ impl DFLogicalSubstraitConvertor {
| LogicalPlan::Explain(_)
| LogicalPlan::Analyze(_)
| LogicalPlan::Extension(_)
| LogicalPlan::Prepare(_) => InvalidParametersSnafu {
| LogicalPlan::Prepare(_)
| LogicalPlan::Dml(_)
| LogicalPlan::DescribeTable(_)
| LogicalPlan::Unnest(_) => InvalidParametersSnafu {
reason: format!(
"Trying to convert DDL/DML plan to substrait proto, plan: {plan:?}",
),
@@ -524,6 +537,7 @@ mod test {
use catalog::{CatalogList, CatalogProvider, RegisterTableRequest};
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use datafusion::common::{DFSchema, ToDFSchema};
use datafusion_expr::TableSource;
use datatypes::schema::RawSchema;
use table::requests::CreateTableRequest;
use table::test_util::{EmptyTable, MockTableEngine};
@@ -572,7 +586,7 @@ mod test {
let convertor = DFLogicalSubstraitConvertor;
let proto = convertor.encode(plan.clone()).unwrap();
let tripped_plan = convertor.decode(proto, catalog).unwrap();
let tripped_plan = convertor.decode(proto, catalog).await.unwrap();
assert_eq!(format!("{plan:?}"), format!("{tripped_plan:?}"));
}

View File

@@ -105,6 +105,13 @@ pub enum Error {
#[snafu(backtrace)]
source: datatypes::error::Error,
},
#[snafu(display("Unable to resolve table: {table_name}, error: {source}"))]
ResolveTable {
table_name: String,
#[snafu(backtrace)]
source: catalog::error::Error,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -127,6 +134,7 @@ impl ErrorExt for Error {
| Error::SchemaNotMatch { .. } => StatusCode::InvalidArguments,
Error::DFInternal { .. } | Error::Internal { .. } => StatusCode::Internal,
Error::ConvertDfSchema { source } => source.status_code(),
Error::ResolveTable { source, .. } => source.status_code(),
}
}

View File

@@ -13,6 +13,7 @@
// limitations under the License.
#![feature(let_chains)]
#![feature(trait_upcasting)]
mod context;
mod df_expr;
@@ -21,17 +22,19 @@ pub mod error;
mod schema;
mod types;
use async_trait::async_trait;
use bytes::{Buf, Bytes};
use catalog::CatalogManagerRef;
pub use crate::df_logical::DFLogicalSubstraitConvertor;
#[async_trait]
pub trait SubstraitPlan {
type Error: std::error::Error;
type Plan;
fn decode<B: Buf + Send>(
async fn decode<B: Buf + Send>(
&self,
message: B,
catalog_manager: CatalogManagerRef,

View File

@@ -0,0 +1,8 @@
[package]
name = "common-test-util"
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
tempfile.workspace = true

View File

@@ -12,4 +12,4 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//! GreptimeDB builtin functions
pub mod temp_dir;

View File

@@ -0,0 +1,23 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
pub use tempfile::{NamedTempFile, TempDir};
pub fn create_temp_dir(prefix: &str) -> TempDir {
tempfile::Builder::new().prefix(prefix).tempdir().unwrap()
}
pub fn create_named_temp_file() -> NamedTempFile {
NamedTempFile::new().unwrap()
}

View File

@@ -9,6 +9,7 @@ default = ["python"]
python = ["dep:script"]
[dependencies]
async-compat = "0.2"
async-stream.workspace = true
async-trait.workspace = true
api = { path = "../api" }
@@ -28,9 +29,11 @@ common-runtime = { path = "../common/runtime" }
common-telemetry = { path = "../common/telemetry" }
common-time = { path = "../common/time" }
datafusion.workspace = true
datafusion-common.workspace = true
datafusion-expr.workspace = true
datatypes = { path = "../datatypes" }
futures = "0.3"
futures-util.workspace = true
hyper = { version = "0.14", features = ["full"] }
humantime-serde = "1.1"
log-store = { path = "../log-store" }
@@ -42,6 +45,7 @@ object-store = { path = "../object-store" }
pin-project = "1.0"
prost.workspace = true
query = { path = "../query" }
regex = "1.6"
script = { path = "../script", features = ["python"], optional = true }
serde = "1.0"
serde_json = "1.0"
@@ -59,11 +63,12 @@ tokio-stream = { version = "0.1", features = ["net"] }
tonic.workspace = true
tower = { version = "0.4", features = ["full"] }
tower-http = { version = "0.3", features = ["full"] }
url = "2.3.1"
[dev-dependencies]
axum-test-helper = { git = "https://github.com/sunng87/axum-test-helper.git", branch = "patch-1" }
client = { path = "../client" }
common-test-util = { path = "../common/test-util" }
common-query = { path = "../common/query" }
datafusion-common.workspace = true
tempdir = "0.3"
toml = "0.5"

View File

@@ -21,6 +21,7 @@ use datafusion::parquet;
use datatypes::prelude::ConcreteDataType;
use storage::error::Error as StorageError;
use table::error::Error as TableError;
use url::ParseError;
use crate::datanode::ObjectStoreConfig;
@@ -190,6 +191,12 @@ pub enum Error {
backtrace: Backtrace,
},
#[snafu(display("Failed to build backend, source: {}", source))]
BuildBackend {
source: object_store::Error,
backtrace: Backtrace,
},
#[snafu(display("Runtime resource error, source: {}", source))]
RuntimeResource {
#[snafu(backtrace)]
@@ -199,6 +206,30 @@ pub enum Error {
#[snafu(display("Invalid SQL, error: {}", msg))]
InvalidSql { msg: String },
#[snafu(display("Invalid url: {}, error :{}", url, source))]
InvalidUrl { url: String, source: ParseError },
#[snafu(display("Invalid filepath: {}", path))]
InvalidPath { path: String },
#[snafu(display("Invalid connection: {}", msg))]
InvalidConnection { msg: String },
#[snafu(display("Unsupported backend protocol: {}", protocol))]
UnsupportedBackendProtocol { protocol: String },
#[snafu(display("Failed to regex, source: {}", source))]
BuildRegex {
backtrace: Backtrace,
source: regex::Error,
},
#[snafu(display("Failed to parse the data, source: {}", source))]
ParseDataTypes {
#[snafu(backtrace)]
source: common_recordbatch::error::Error,
},
#[snafu(display("Not support SQL, error: {}", msg))]
NotSupportSql { msg: String },
@@ -371,6 +402,22 @@ pub enum Error {
source: common_query::error::Error,
},
#[snafu(display(
"File Schema mismatch, expected table schema: {} but found :{}",
table_schema,
file_schema
))]
InvalidSchema {
table_schema: String,
file_schema: String,
},
#[snafu(display("Failed to read parquet file, source: {}", source))]
ReadParquet {
source: parquet::errors::ParquetError,
backtrace: Backtrace,
},
#[snafu(display("Failed to write parquet file, source: {}", source))]
WriteParquet {
source: parquet::errors::ParquetError,
@@ -379,10 +426,23 @@ pub enum Error {
#[snafu(display("Failed to poll stream, source: {}", source))]
PollStream {
source: datatypes::arrow::error::ArrowError,
source: datafusion_common::DataFusionError,
backtrace: Backtrace,
},
#[snafu(display("Failed to build parquet record batch stream, source: {}", source))]
BuildParquetRecordBatchStream {
backtrace: Backtrace,
source: parquet::errors::ParquetError,
},
#[snafu(display("Failed to read object in path: {}, source: {}", path, source))]
ReadObject {
path: String,
backtrace: Backtrace,
source: object_store::Error,
},
#[snafu(display("Failed to write object into path: {}, source: {}", path, source))]
WriteObject {
path: String,
@@ -390,6 +450,13 @@ pub enum Error {
source: object_store::Error,
},
#[snafu(display("Failed to lists object in path: {}, source: {}", path, source))]
ListObjects {
path: String,
backtrace: Backtrace,
source: object_store::Error,
},
#[snafu(display("Unrecognized table option: {}", source))]
UnrecognizedTableOption {
#[snafu(backtrace)]
@@ -402,23 +469,18 @@ pub enum Error {
source: common_procedure::error::Error,
},
#[snafu(display("Failed to submit procedure, source: {}", source))]
#[snafu(display("Failed to submit procedure {}, source: {}", procedure_id, source))]
SubmitProcedure {
procedure_id: ProcedureId,
#[snafu(backtrace)]
source: common_procedure::error::Error,
},
#[snafu(display("Failed to wait procedure done, source: {}", source))]
#[snafu(display("Failed to wait procedure {} done, source: {}", procedure_id, source))]
WaitProcedure {
source: tokio::sync::watch::error::RecvError,
backtrace: Backtrace,
},
// TODO(yingwen): Use procedure's error.
#[snafu(display("Failed to execute procedure, procedure_id: {}", procedure_id))]
ProcedureExec {
procedure_id: ProcedureId,
backtrace: Backtrace,
#[snafu(backtrace)]
source: common_procedure::error::Error,
},
}
@@ -455,6 +517,11 @@ impl ErrorExt for Error {
ColumnValuesNumberMismatch { .. }
| ColumnTypeMismatch { .. }
| InvalidSql { .. }
| InvalidUrl { .. }
| InvalidPath { .. }
| InvalidConnection { .. }
| UnsupportedBackendProtocol { .. }
| BuildRegex { .. }
| NotSupportSql { .. }
| KeyColumnNotFound { .. }
| IllegalPrimaryKeysDef { .. }
@@ -480,11 +547,19 @@ impl ErrorExt for Error {
| RenameTable { .. }
| Catalog { .. }
| MissingRequiredField { .. }
| BuildParquetRecordBatchStream { .. }
| InvalidSchema { .. }
| ParseDataTypes { .. }
| IncorrectInternalState { .. } => StatusCode::Internal,
InitBackend { .. } | WriteParquet { .. } | PollStream { .. } | WriteObject { .. } => {
StatusCode::StorageUnavailable
}
BuildBackend { .. }
| InitBackend { .. }
| ReadParquet { .. }
| WriteParquet { .. }
| PollStream { .. }
| ReadObject { .. }
| WriteObject { .. }
| ListObjects { .. } => StatusCode::StorageUnavailable,
OpenLogStore { source } => source.status_code(),
StartScriptManager { source } => source.status_code(),
OpenStorageEngine { source } => source.status_code(),
@@ -499,7 +574,7 @@ impl ErrorExt for Error {
RecoverProcedure { source, .. } | SubmitProcedure { source, .. } => {
source.status_code()
}
WaitProcedure { .. } | ProcedureExec { .. } => StatusCode::Internal,
WaitProcedure { source, .. } => source.status_code(),
}
}

View File

@@ -106,7 +106,7 @@ impl HeartbeatTask {
let mut tx = Self::create_streams(&meta_client, running.clone()).await?;
common_runtime::spawn_bg(async move {
while running.load(Ordering::Acquire) {
let region_num = match region_number(&catalog_manager_clone) {
let region_num = match region_number(&catalog_manager_clone).await {
Ok(region_num) => region_num as i64,
Err(e) => {
error!("failed to get region number, err: {e:?}");

View File

@@ -177,7 +177,6 @@ impl Instance {
};
let procedure_manager = create_procedure_manager(&opts.procedure).await?;
// Recover procedures.
if let Some(procedure_manager) = &procedure_manager {
table_engine.register_procedure_loaders(&**procedure_manager);
table_procedure::register_procedure_loaders(
@@ -187,6 +186,7 @@ impl Instance {
&**procedure_manager,
);
// Recover procedures.
procedure_manager
.recover()
.await
@@ -423,7 +423,7 @@ pub(crate) async fn create_log_store(wal_config: &WalConfig) -> Result<RaftEngin
Ok(logstore)
}
async fn create_procedure_manager(
pub(crate) async fn create_procedure_manager(
procedure_config: &Option<ProcedureConfig>,
) -> Result<Option<ProcedureManagerRef>> {
let Some(procedure_config) = procedure_config else {

View File

@@ -45,6 +45,7 @@ impl Instance {
pub(crate) async fn execute_logical(&self, plan_bytes: Vec<u8>) -> Result<Output> {
let logical_plan = DFLogicalSubstraitConvertor
.decode(plan_bytes.as_slice(), self.catalog_manager.clone())
.await
.context(DecodeLogicalPlanSnafu)?;
self.query_engine
@@ -74,6 +75,7 @@ impl Instance {
let table = self
.catalog_manager
.table(catalog, schema, table_name)
.await
.context(error::CatalogSnafu)?
.context(error::TableNotFoundSnafu { table_name })?;
@@ -96,6 +98,7 @@ impl Instance {
DdlExpr::Alter(expr) => self.handle_alter(expr).await,
DdlExpr::CreateDatabase(expr) => self.handle_create_database(expr, query_ctx).await,
DdlExpr::DropTable(expr) => self.handle_drop_table(expr).await,
DdlExpr::FlushTable(_) => todo!(),
}
}
}
@@ -287,9 +290,9 @@ mod test {
+---------------------+-------+-----+
| ts | host | cpu |
+---------------------+-------+-----+
| 2022-12-30T07:09:00 | host1 | 1 |
| 2022-12-30T07:09:00 | host1 | 1.0 |
| 2022-12-30T07:09:01 | host2 | |
| 2022-12-30T07:09:02 | host3 | 3 |
| 2022-12-30T07:09:02 | host3 | 3.0 |
+---------------------+-------+-----+";
assert_eq!(recordbatches.pretty_print().unwrap(), expected);
}
@@ -325,7 +328,7 @@ mod test {
+---------------------+-------+------+--------+
| ts | host | cpu | memory |
+---------------------+-------+------+--------+
| 2022-12-28T04:17:05 | host1 | 66.6 | 1024 |
| 2022-12-28T04:17:05 | host1 | 66.6 | 1024.0 |
| 2022-12-28T04:17:06 | host2 | 88.8 | 333.3 |
+---------------------+-------+------+--------+";
let actual = recordbatch.pretty_print().unwrap();

View File

@@ -24,15 +24,18 @@ use datatypes::schema::Schema;
use futures::StreamExt;
use query::parser::{PromQuery, QueryLanguageParser, QueryStatement};
use servers::error as server_error;
use servers::promql::PromqlHandler;
use servers::prom::PromHandler;
use servers::query_handler::sql::SqlQueryHandler;
use session::context::{QueryContext, QueryContextRef};
use snafu::prelude::*;
use sql::ast::ObjectName;
use sql::statements::copy::CopyTable;
use sql::statements::statement::Statement;
use sql::statements::tql::Tql;
use table::engine::TableReference;
use table::requests::{CopyTableRequest, CreateDatabaseRequest, DropTableRequest};
use table::requests::{
CopyTableFromRequest, CopyTableRequest, CreateDatabaseRequest, DropTableRequest,
};
use crate::error::{self, BumpTableIdSnafu, ExecuteSqlSnafu, Result, TableIdProviderNotFoundSnafu};
use crate::instance::Instance;
@@ -51,6 +54,7 @@ impl Instance {
let logical_plan = self
.query_engine
.statement_to_plan(stmt, query_ctx)
.await
.context(ExecuteSqlSnafu)?;
self.query_engine
@@ -183,22 +187,39 @@ impl Instance {
Ok(Output::RecordBatches(RecordBatches::empty()))
}
QueryStatement::Sql(Statement::Copy(copy_table)) => {
let (catalog_name, schema_name, table_name) =
table_idents_to_full_name(copy_table.table_name(), query_ctx.clone())?;
let file_name = copy_table.file_name().to_string();
QueryStatement::Sql(Statement::Copy(copy_table)) => match copy_table {
CopyTable::To(copy_table) => {
let (catalog_name, schema_name, table_name) =
table_idents_to_full_name(copy_table.table_name(), query_ctx.clone())?;
let file_name = copy_table.file_name().to_string();
let req = CopyTableRequest {
catalog_name,
schema_name,
table_name,
file_name,
};
let req = CopyTableRequest {
catalog_name,
schema_name,
table_name,
file_name,
};
self.sql_handler
.execute(SqlRequest::CopyTable(req), query_ctx)
.await
}
self.sql_handler
.execute(SqlRequest::CopyTable(req), query_ctx)
.await
}
CopyTable::From(copy_table) => {
let (catalog_name, schema_name, table_name) =
table_idents_to_full_name(&copy_table.table_name, query_ctx.clone())?;
let req = CopyTableFromRequest {
catalog_name,
schema_name,
table_name,
connection: copy_table.connection,
pattern: copy_table.pattern,
from: copy_table.from,
};
self.sql_handler
.execute(SqlRequest::CopyTableFrom(req), query_ctx)
.await
}
},
QueryStatement::Sql(Statement::Tql(tql)) => self.execute_tql(tql, query_ctx).await,
}
}
@@ -216,6 +237,7 @@ impl Instance {
let logical_plan = self
.query_engine
.statement_to_plan(stmt, query_ctx)
.await
.context(ExecuteSqlSnafu)?;
self.query_engine
@@ -335,10 +357,15 @@ impl SqlQueryHandler for Instance {
.await
}
fn do_describe(&self, stmt: Statement, query_ctx: QueryContextRef) -> Result<Option<Schema>> {
async fn do_describe(
&self,
stmt: Statement,
query_ctx: QueryContextRef,
) -> Result<Option<Schema>> {
if let Statement::Query(_) = stmt {
self.query_engine
.describe(QueryStatement::Sql(stmt), query_ctx)
.await
.map(Some)
.context(error::DescribeStatementSnafu)
} else {
@@ -355,7 +382,7 @@ impl SqlQueryHandler for Instance {
}
#[async_trait]
impl PromqlHandler for Instance {
impl PromHandler for Instance {
async fn do_query(&self, query: &PromQuery) -> server_error::Result<Output> {
let _timer = timer!(metric::METRIC_HANDLE_PROMQL_ELAPSED);

View File

@@ -31,9 +31,11 @@ use table::metadata::TableId;
use table::table::TableIdProvider;
use crate::datanode::DatanodeOptions;
use crate::error::{CatalogSnafu, Result};
use crate::error::{CatalogSnafu, RecoverProcedureSnafu, Result};
use crate::heartbeat::HeartbeatTask;
use crate::instance::{create_log_store, new_object_store, DefaultEngine, Instance};
use crate::instance::{
create_log_store, create_procedure_manager, new_object_store, DefaultEngine, Instance,
};
use crate::script::ScriptExecutor;
use crate::sql::SqlHandler;
@@ -93,6 +95,16 @@ impl Instance {
let script_executor =
ScriptExecutor::new(catalog_manager.clone(), query_engine.clone()).await?;
let procedure_manager = create_procedure_manager(&opts.procedure).await?;
if let Some(procedure_manager) = &procedure_manager {
table_engine.register_procedure_loaders(&**procedure_manager);
// Recover procedures.
procedure_manager
.recover()
.await
.context(RecoverProcedureSnafu)?;
}
Ok(Self {
query_engine: query_engine.clone(),
sql_handler: SqlHandler::new(
@@ -100,7 +112,7 @@ impl Instance {
catalog_manager.clone(),
query_engine.clone(),
table_engine,
None,
procedure_manager,
),
catalog_manager,
script_executor,

View File

@@ -33,6 +33,7 @@ use crate::instance::sql::table_idents_to_full_name;
mod alter;
mod copy_table;
mod copy_table_from;
mod create;
mod delete;
mod drop_table;
@@ -51,6 +52,7 @@ pub enum SqlRequest {
Explain(Box<Explain>),
Delete(Delete),
CopyTable(CopyTableRequest),
CopyTableFrom(CopyTableFromRequest),
}
// Handler to execute SQL except query
@@ -92,6 +94,7 @@ impl SqlHandler {
SqlRequest::DropTable(req) => self.drop_table(req).await,
SqlRequest::Delete(req) => self.delete(query_ctx.clone(), req).await,
SqlRequest::CopyTable(req) => self.copy_table(req).await,
SqlRequest::CopyTableFrom(req) => self.copy_table_from(req).await,
SqlRequest::ShowDatabases(req) => {
show_databases(req, self.catalog_manager.clone()).context(ExecuteSqlSnafu)
}
@@ -105,6 +108,7 @@ impl SqlHandler {
let table = self
.catalog_manager
.table(&catalog, &schema, &table)
.await
.context(error::CatalogSnafu)?
.with_context(|| TableNotFoundSnafu {
table_name: req.name().to_string(),
@@ -146,6 +150,7 @@ mod tests {
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_query::logical_plan::Expr;
use common_query::physical_plan::PhysicalPlanRef;
use common_test_util::temp_dir::create_temp_dir;
use common_time::timestamp::Timestamp;
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::{ColumnSchema, SchemaBuilder, SchemaRef};
@@ -166,7 +171,6 @@ mod tests {
use table::error::Result as TableResult;
use table::metadata::TableInfoRef;
use table::Table;
use tempdir::TempDir;
use super::*;
use crate::error::Error;
@@ -217,7 +221,7 @@ mod tests {
#[tokio::test]
async fn test_statement_to_request() {
let dir = TempDir::new("setup_test_engine_and_table").unwrap();
let dir = create_temp_dir("setup_test_engine_and_table");
let store_dir = dir.path().to_string_lossy();
let accessor = Builder::default().root(&store_dir).build().unwrap();
let object_store = ObjectStore::new(accessor).finish();
@@ -244,7 +248,7 @@ mod tests {
.unwrap(),
);
catalog_list.start().await.unwrap();
catalog_list
assert!(catalog_list
.register_table(RegisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
@@ -253,7 +257,7 @@ mod tests {
table: Arc::new(DemoTable),
})
.await
.unwrap();
.unwrap());
let factory = QueryEngineFactory::new(catalog_list.clone());
let query_engine = factory.query_engine();

View File

@@ -52,7 +52,10 @@ impl SqlHandler {
.context(error::TableScanExecSnafu)?;
let stream = Box::pin(DfRecordBatchStreamAdapter::new(stream));
let accessor = Builder::default().build().unwrap();
let accessor = Builder::default()
.root("/")
.build()
.context(error::BuildBackendSnafu)?;
let object_store = ObjectStore::new(accessor).finish();
let mut parquet_writer = ParquetWriter::new(req.file_name, stream, object_store);

View File

@@ -0,0 +1,445 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use async_compat::CompatExt;
use common_query::Output;
use common_recordbatch::error::DataTypesSnafu;
use datafusion::parquet::arrow::ParquetRecordBatchStreamBuilder;
use datatypes::arrow::record_batch::RecordBatch;
use datatypes::vectors::{Helper, VectorRef};
use futures::future;
use futures_util::TryStreamExt;
use object_store::services::{Fs, S3};
use object_store::{Object, ObjectStore, ObjectStoreBuilder};
use regex::Regex;
use snafu::{ensure, ResultExt};
use table::engine::TableReference;
use table::requests::{CopyTableFromRequest, InsertRequest};
use tokio::io::BufReader;
use url::{ParseError, Url};
use crate::error::{self, Result};
use crate::sql::SqlHandler;
const S3_SCHEMA: &str = "S3";
const ENDPOINT_URL: &str = "ENDPOINT_URL";
const ACCESS_KEY_ID: &str = "ACCESS_KEY_ID";
const SECRET_ACCESS_KEY: &str = "SECRET_ACCESS_KEY";
const SESSION_TOKEN: &str = "SESSION_TOKEN";
const REGION: &str = "REGION";
const ENABLE_VIRTUAL_HOST_STYLE: &str = "ENABLE_VIRTUAL_HOST_STYLE";
impl SqlHandler {
pub(crate) async fn copy_table_from(&self, req: CopyTableFromRequest) -> Result<Output> {
let table_ref = TableReference {
catalog: &req.catalog_name,
schema: &req.schema_name,
table: &req.table_name,
};
let table = self.get_table(&table_ref)?;
let datasource = DataSource::new(&req.from, req.pattern, req.connection)?;
let objects = datasource.list().await?;
let mut buf: Vec<RecordBatch> = Vec::new();
for obj in objects.iter() {
let reader = obj.reader().await.context(error::ReadObjectSnafu {
path: &obj.path().to_string(),
})?;
let buf_reader = BufReader::new(reader.compat());
let builder = ParquetRecordBatchStreamBuilder::new(buf_reader)
.await
.context(error::ReadParquetSnafu)?;
ensure!(
builder.schema() == table.schema().arrow_schema(),
error::InvalidSchemaSnafu {
table_schema: table.schema().arrow_schema().to_string(),
file_schema: (*(builder.schema())).to_string()
}
);
let stream = builder
.build()
.context(error::BuildParquetRecordBatchStreamSnafu)?;
let chunk = stream
.try_collect::<Vec<_>>()
.await
.context(error::ReadParquetSnafu)?;
buf.extend(chunk.into_iter());
}
let fields = table
.schema()
.arrow_schema()
.fields()
.iter()
.map(|f| f.name().to_string())
.collect::<Vec<_>>();
// Vec<Columns>
let column_chunks = buf
.into_iter()
.map(|c| Helper::try_into_vectors(c.columns()).context(DataTypesSnafu))
.collect::<Vec<_>>();
let mut futs = Vec::with_capacity(column_chunks.len());
for column_chunk in column_chunks.into_iter() {
let column_chunk = column_chunk.context(error::ParseDataTypesSnafu)?;
let columns_values = fields
.iter()
.cloned()
.zip(column_chunk.into_iter())
.collect::<HashMap<String, VectorRef>>();
futs.push(table.insert(InsertRequest {
catalog_name: req.catalog_name.to_string(),
schema_name: req.schema_name.to_string(),
table_name: req.table_name.to_string(),
columns_values,
//TODO: support multi-regions
region_number: 0,
}))
}
let result = futures::future::try_join_all(futs)
.await
.context(error::InsertSnafu {
table_name: req.table_name.to_string(),
})?;
Ok(Output::AffectedRows(result.iter().sum()))
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
enum Source {
Filename(String),
Dir,
}
struct DataSource {
object_store: ObjectStore,
source: Source,
path: String,
regex: Option<Regex>,
}
impl DataSource {
fn from_path(url: &str, regex: Option<Regex>) -> Result<DataSource> {
let result = if url.ends_with('/') {
Url::from_directory_path(url)
} else {
Url::from_file_path(url)
};
match result {
Ok(url) => {
let path = url.path();
let (path, filename) = DataSource::find_dir_and_filename(path);
let source = if let Some(filename) = filename {
Source::Filename(filename)
} else {
Source::Dir
};
let accessor = Fs::default()
.root(&path)
.build()
.context(error::BuildBackendSnafu)?;
Ok(DataSource {
object_store: ObjectStore::new(accessor).finish(),
source,
path,
regex,
})
}
Err(()) => error::InvalidPathSnafu {
path: url.to_string(),
}
.fail(),
}
}
fn build_s3_backend(
host: Option<&str>,
path: &str,
connection: HashMap<String, String>,
) -> Result<ObjectStore> {
let mut builder = S3::default();
builder.root(path);
if let Some(bucket) = host {
builder.bucket(bucket);
}
if let Some(endpoint) = connection.get(ENDPOINT_URL) {
builder.endpoint(endpoint);
}
if let Some(region) = connection.get(REGION) {
builder.region(region);
}
if let Some(key_id) = connection.get(ACCESS_KEY_ID) {
builder.access_key_id(key_id);
}
if let Some(key) = connection.get(SECRET_ACCESS_KEY) {
builder.secret_access_key(key);
}
if let Some(session_token) = connection.get(SESSION_TOKEN) {
builder.security_token(session_token);
}
if let Some(enable_str) = connection.get(ENABLE_VIRTUAL_HOST_STYLE) {
let enable = enable_str.as_str().parse::<bool>().map_err(|e| {
error::InvalidConnectionSnafu {
msg: format!(
"failed to parse the option {}={}, {}",
ENABLE_VIRTUAL_HOST_STYLE, enable_str, e
),
}
.build()
})?;
if enable {
builder.enable_virtual_host_style();
}
}
let accessor = builder.build().context(error::BuildBackendSnafu)?;
Ok(ObjectStore::new(accessor).finish())
}
fn from_url(
url: Url,
regex: Option<Regex>,
connection: HashMap<String, String>,
) -> Result<DataSource> {
let host = url.host_str();
let path = url.path();
let schema = url.scheme();
let (dir, filename) = DataSource::find_dir_and_filename(path);
let source = if let Some(filename) = filename {
Source::Filename(filename)
} else {
Source::Dir
};
let object_store = match schema.to_uppercase().as_str() {
S3_SCHEMA => DataSource::build_s3_backend(host, &dir, connection)?,
_ => {
return error::UnsupportedBackendProtocolSnafu {
protocol: schema.to_string(),
}
.fail()
}
};
Ok(DataSource {
object_store,
source,
path: dir,
regex,
})
}
pub fn new(
url: &str,
pattern: Option<String>,
connection: HashMap<String, String>,
) -> Result<DataSource> {
let regex = if let Some(pattern) = pattern {
let regex = Regex::new(&pattern).context(error::BuildRegexSnafu)?;
Some(regex)
} else {
None
};
let result = Url::parse(url);
match result {
Ok(url) => DataSource::from_url(url, regex, connection),
Err(err) => {
if ParseError::RelativeUrlWithoutBase == err {
DataSource::from_path(url, regex)
} else {
Err(error::Error::InvalidUrl {
url: url.to_string(),
source: err,
})
}
}
}
}
pub async fn list(&self) -> Result<Vec<Object>> {
match &self.source {
Source::Dir => {
let streamer = self
.object_store
.object("/")
.list()
.await
.context(error::ListObjectsSnafu { path: &self.path })?;
streamer
.try_filter(|f| {
let res = if let Some(regex) = &self.regex {
regex.is_match(f.name())
} else {
true
};
future::ready(res)
})
.try_collect::<Vec<_>>()
.await
.context(error::ListObjectsSnafu { path: &self.path })
}
Source::Filename(filename) => {
let obj = self.object_store.object(filename);
Ok(vec![obj])
}
}
}
fn find_dir_and_filename(path: &str) -> (String, Option<String>) {
if path.is_empty() {
("/".to_string(), None)
} else if path.ends_with('/') {
(path.to_string(), None)
} else if let Some(idx) = path.rfind('/') {
(
path[..idx + 1].to_string(),
Some(path[idx + 1..].to_string()),
)
} else {
("/".to_string(), Some(path.to_string()))
}
}
}
#[cfg(test)]
mod tests {
use url::Url;
use super::*;
#[test]
fn test_parse_uri() {
struct Test<'a> {
uri: &'a str,
expected_path: &'a str,
expected_schema: &'a str,
}
let tests = [
Test {
uri: "s3://bucket/to/path/",
expected_path: "/to/path/",
expected_schema: "s3",
},
Test {
uri: "fs:///to/path/",
expected_path: "/to/path/",
expected_schema: "fs",
},
Test {
uri: "fs:///to/path/file",
expected_path: "/to/path/file",
expected_schema: "fs",
},
];
for test in tests {
let parsed_uri = Url::parse(test.uri).unwrap();
assert_eq!(parsed_uri.path(), test.expected_path);
assert_eq!(parsed_uri.scheme(), test.expected_schema);
}
}
#[test]
fn test_parse_path_and_dir() {
let parsed = Url::from_file_path("/to/path/file").unwrap();
assert_eq!(parsed.path(), "/to/path/file");
let parsed = Url::from_directory_path("/to/path/").unwrap();
assert_eq!(parsed.path(), "/to/path/");
}
#[test]
fn test_find_dir_and_filename() {
struct Test<'a> {
path: &'a str,
expected_dir: &'a str,
expected_filename: Option<String>,
}
let tests = [
Test {
path: "to/path/",
expected_dir: "to/path/",
expected_filename: None,
},
Test {
path: "to/path/filename",
expected_dir: "to/path/",
expected_filename: Some("filename".into()),
},
Test {
path: "/to/path/filename",
expected_dir: "/to/path/",
expected_filename: Some("filename".into()),
},
Test {
path: "/",
expected_dir: "/",
expected_filename: None,
},
Test {
path: "filename",
expected_dir: "/",
expected_filename: Some("filename".into()),
},
Test {
path: "",
expected_dir: "/",
expected_filename: None,
},
];
for test in tests {
let (path, filename) = DataSource::find_dir_and_filename(test.path);
assert_eq!(test.expected_dir, path);
assert_eq!(test.expected_filename, filename)
}
}
}

View File

@@ -15,7 +15,7 @@
use std::collections::HashMap;
use catalog::{RegisterSchemaRequest, RegisterTableRequest};
use common_procedure::{ProcedureManagerRef, ProcedureState, ProcedureWithId};
use common_procedure::{watcher, ProcedureManagerRef, ProcedureWithId};
use common_query::Output;
use common_telemetry::tracing::{error, info};
use datatypes::schema::RawSchema;
@@ -33,8 +33,8 @@ use table_procedure::CreateTableProcedure;
use crate::error::{
self, CatalogNotFoundSnafu, CatalogSnafu, ConstraintNotSupportedSnafu, CreateTableSnafu,
IllegalPrimaryKeysDefSnafu, InsertSystemCatalogSnafu, KeyColumnNotFoundSnafu,
ProcedureExecSnafu, RegisterSchemaSnafu, Result, SchemaExistsSnafu, SchemaNotFoundSnafu,
SubmitProcedureSnafu, UnrecognizedTableOptionSnafu, WaitProcedureSnafu,
RegisterSchemaSnafu, Result, SchemaExistsSnafu, SchemaNotFoundSnafu, SubmitProcedureSnafu,
UnrecognizedTableOptionSnafu, WaitProcedureSnafu,
};
use crate::sql::SqlHandler;
@@ -152,21 +152,13 @@ impl SqlHandler {
let mut watcher = procedure_manager
.submit(procedure_with_id)
.await
.context(SubmitProcedureSnafu)?;
.context(SubmitProcedureSnafu { procedure_id })?;
// TODO(yingwen): Wrap this into a function and add error to ProcedureState::Failed.
loop {
watcher.changed().await.context(WaitProcedureSnafu)?;
match *watcher.borrow() {
ProcedureState::Running => (),
ProcedureState::Done => {
return Ok(Output::AffectedRows(0));
}
ProcedureState::Failed => {
return ProcedureExecSnafu { procedure_id }.fail();
}
}
}
watcher::wait(&mut watcher)
.await
.context(WaitProcedureSnafu { procedure_id })?;
Ok(Output::AffectedRows(0))
}
/// Converts [CreateTable] to [SqlRequest::CreateTable].

View File

@@ -15,6 +15,7 @@ use std::collections::HashMap;
use std::pin::Pin;
use catalog::CatalogManagerRef;
use common_catalog::format_full_table_name;
use common_query::Output;
use common_recordbatch::RecordBatch;
use datafusion_expr::type_coercion::binary::coerce_types;
@@ -239,6 +240,7 @@ impl SqlHandler {
QueryStatement::Sql(Statement::Query(Box::new(query))),
query_ctx.clone(),
)
.await
.context(ExecuteSqlSnafu)?;
let output = self
@@ -284,9 +286,10 @@ impl SqlHandler {
let table = catalog_manager
.table(&catalog_name, &schema_name, &table_name)
.await
.context(CatalogSnafu)?
.with_context(|| TableNotFoundSnafu {
table_name: table_name.clone(),
table_name: format_full_table_name(&catalog_name, &schema_name, &table_name),
})?;
if stmt.is_insert_select() {

View File

@@ -236,7 +236,7 @@ async fn test_execute_insert_by_select() {
+-------+------+--------+---------------------+
| host | cpu | memory | ts |
+-------+------+--------+---------------------+
| host1 | 66.6 | 1024 | 2022-06-15T07:02:37 |
| host1 | 66.6 | 1024.0 | 2022-06-15T07:02:37 |
| host2 | 88.8 | 333.3 | 2022-06-15T07:02:38 |
+-------+------+--------+---------------------+"
.to_string();
@@ -457,8 +457,8 @@ async fn test_rename_table() {
+-------+-----+--------+---------------------+
| host | cpu | memory | ts |
+-------+-----+--------+---------------------+
| host1 | 1.1 | 100 | 1970-01-01T00:00:01 |
| host2 | 2.2 | 200 | 1970-01-01T00:00:02 |
| host1 | 1.1 | 100.0 | 1970-01-01T00:00:01 |
| host2 | 2.2 | 200.0 | 1970-01-01T00:00:02 |
+-------+-----+--------+---------------------+\
"
.to_string();
@@ -559,9 +559,9 @@ async fn test_alter_table() {
+-------+-----+--------+---------------------+--------+
| host | cpu | memory | ts | my_tag |
+-------+-----+--------+---------------------+--------+
| host1 | 1.1 | 100 | 1970-01-01T00:00:01 | |
| host2 | 2.2 | 200 | 1970-01-01T00:00:02 | hello |
| host3 | 3.3 | 300 | 1970-01-01T00:00:03 | |
| host1 | 1.1 | 100.0 | 1970-01-01T00:00:01 | |
| host2 | 2.2 | 200.0 | 1970-01-01T00:00:02 | hello |
| host3 | 3.3 | 300.0 | 1970-01-01T00:00:03 | |
+-------+-----+--------+---------------------+--------+\
"
.to_string();
@@ -594,14 +594,14 @@ async fn test_alter_table() {
let output = execute_sql(&instance, "select * from demo order by ts").await;
let expected = "\
+-------+-----+---------------------+--------+
| host | cpu | ts | my_tag |
+-------+-----+---------------------+--------+
| host1 | 1.1 | 1970-01-01T00:00:01 | |
| host2 | 2.2 | 1970-01-01T00:00:02 | hello |
| host3 | 3.3 | 1970-01-01T00:00:03 | |
| host4 | 400 | 1970-01-01T00:00:04 | world |
+-------+-----+---------------------+--------+\
+-------+-------+---------------------+--------+
| host | cpu | ts | my_tag |
+-------+-------+---------------------+--------+
| host1 | 1.1 | 1970-01-01T00:00:01 | |
| host2 | 2.2 | 1970-01-01T00:00:02 | hello |
| host3 | 3.3 | 1970-01-01T00:00:03 | |
| host4 | 400.0 | 1970-01-01T00:00:04 | world |
+-------+-------+---------------------+--------+\
"
.to_string();
check_output_stream(output, expected).await;
@@ -757,14 +757,165 @@ async fn test_delete() {
+-------+---------------------+------+--------+
| host | ts | cpu | memory |
+-------+---------------------+------+--------+
| host2 | 2022-06-15T07:02:38 | 77.7 | 2048 |
| host3 | 2022-06-15T07:02:39 | 88.8 | 3072 |
| host2 | 2022-06-15T07:02:38 | 77.7 | 2048.0 |
| host3 | 2022-06-15T07:02:39 | 88.8 | 3072.0 |
+-------+---------------------+------+--------+\
"
.to_string();
check_output_stream(output, expect).await;
}
#[tokio::test(flavor = "multi_thread")]
async fn test_execute_copy_to() {
let instance = setup_test_instance("test_execute_copy_to").await;
// setups
execute_sql(
&instance,
"create table demo(host string, cpu double, memory double, ts timestamp time index);",
)
.await;
let output = execute_sql(
&instance,
r#"insert into demo(host, cpu, memory, ts) values
('host1', 66.6, 1024, 1655276557000),
('host2', 88.8, 333.3, 1655276558000)
"#,
)
.await;
assert!(matches!(output, Output::AffectedRows(2)));
// exports
let data_dir = instance.data_tmp_dir().path();
let copy_to_stmt = format!("Copy demo TO '{}/export/demo.parquet'", data_dir.display());
let output = execute_sql(&instance, &copy_to_stmt).await;
assert!(matches!(output, Output::AffectedRows(2)));
}
#[tokio::test(flavor = "multi_thread")]
async fn test_execute_copy_from() {
let instance = setup_test_instance("test_execute_copy_from").await;
// setups
execute_sql(
&instance,
"create table demo(host string, cpu double, memory double, ts timestamp time index);",
)
.await;
let output = execute_sql(
&instance,
r#"insert into demo(host, cpu, memory, ts) values
('host1', 66.6, 1024, 1655276557000),
('host2', 88.8, 333.3, 1655276558000)
"#,
)
.await;
assert!(matches!(output, Output::AffectedRows(2)));
// export
let data_dir = instance.data_tmp_dir().path();
let copy_to_stmt = format!("Copy demo TO '{}/export/demo.parquet'", data_dir.display());
let output = execute_sql(&instance, &copy_to_stmt).await;
assert!(matches!(output, Output::AffectedRows(2)));
struct Test<'a> {
sql: &'a str,
table_name: &'a str,
}
let tests = [
Test {
sql: &format!(
"Copy with_filename FROM '{}/export/demo.parquet_1_2'",
data_dir.display()
),
table_name: "with_filename",
},
Test {
sql: &format!("Copy with_path FROM '{}/export/'", data_dir.display()),
table_name: "with_path",
},
Test {
sql: &format!(
"Copy with_pattern FROM '{}/export/' WITH (PATTERN = 'demo.*')",
data_dir.display()
),
table_name: "with_pattern",
},
];
for test in tests {
// import
execute_sql(
&instance,
&format!(
"create table {}(host string, cpu double, memory double, ts timestamp time index);",
test.table_name
),
)
.await;
let output = execute_sql(&instance, test.sql).await;
assert!(matches!(output, Output::AffectedRows(2)));
let output = execute_sql(
&instance,
&format!("select * from {} order by ts", test.table_name),
)
.await;
let expected = "\
+-------+------+--------+---------------------+
| host | cpu | memory | ts |
+-------+------+--------+---------------------+
| host1 | 66.6 | 1024.0 | 2022-06-15T07:02:37 |
| host2 | 88.8 | 333.3 | 2022-06-15T07:02:38 |
+-------+------+--------+---------------------+"
.to_string();
check_output_stream(output, expected).await;
}
}
#[tokio::test(flavor = "multi_thread")]
async fn test_create_by_procedure() {
common_telemetry::init_default_ut_logging();
let instance = MockInstance::with_procedure_enabled("create_by_procedure").await;
let output = execute_sql(
&instance,
r#"create table test_table(
host string,
ts timestamp,
cpu double default 0,
memory double,
TIME INDEX (ts),
PRIMARY KEY(host)
) engine=mito with(regions=1);"#,
)
.await;
assert!(matches!(output, Output::AffectedRows(0)));
// Create if not exists
let output = execute_sql(
&instance,
r#"create table if not exists test_table(
host string,
ts timestamp,
cpu double default 0,
memory double,
TIME INDEX (ts),
PRIMARY KEY(host)
) engine=mito with(regions=1);"#,
)
.await;
assert!(matches!(output, Output::AffectedRows(0)));
}
async fn execute_sql(instance: &MockInstance, sql: &str) -> Output {
execute_sql_in_db(instance, sql, DEFAULT_SCHEMA_NAME).await
}

View File

@@ -106,17 +106,17 @@ async fn sql_insert_tql_query_ceil() {
"+---------------------+-----------+--------------+-------+\
\n| ts | ceil(cpu) | ceil(memory) | host |\
\n+---------------------+-----------+--------------+-------+\
\n| 1970-01-01T00:00:00 | 67 | 1024 | host1 |\
\n| 1970-01-01T00:00:10 | 100 | 20480 | host1 |\
\n| 1970-01-01T00:00:20 | 100 | 20480 | host1 |\
\n| 1970-01-01T00:00:30 | 32 | 8192 | host1 |\
\n| 1970-01-01T00:00:40 | 96 | 334 | host1 |\
\n| 1970-01-01T00:00:50 | 12424 | 1334 | host1 |\
\n| 1970-01-01T00:01:00 | 12424 | 1334 | host1 |\
\n| 1970-01-01T00:01:10 | 12424 | 1334 | host1 |\
\n| 1970-01-01T00:01:20 | 0 | 2334 | host1 |\
\n| 1970-01-01T00:01:30 | 0 | 2334 | host1 |\
\n| 1970-01-01T00:01:40 | 49 | 3334 | host1 |\
\n| 1970-01-01T00:00:00 | 67.0 | 1024.0 | host1 |\
\n| 1970-01-01T00:00:10 | 100.0 | 20480.0 | host1 |\
\n| 1970-01-01T00:00:20 | 100.0 | 20480.0 | host1 |\
\n| 1970-01-01T00:00:30 | 32.0 | 8192.0 | host1 |\
\n| 1970-01-01T00:00:40 | 96.0 | 334.0 | host1 |\
\n| 1970-01-01T00:00:50 | 12424.0 | 1334.0 | host1 |\
\n| 1970-01-01T00:01:00 | 12424.0 | 1334.0 | host1 |\
\n| 1970-01-01T00:01:10 | 12424.0 | 1334.0 | host1 |\
\n| 1970-01-01T00:01:20 | 0.0 | 2334.0 | host1 |\
\n| 1970-01-01T00:01:30 | 0.0 | 2334.0 | host1 |\
\n| 1970-01-01T00:01:40 | 49.0 | 3334.0 | host1 |\
\n+---------------------+-----------+--------------+-------+",
)
.await;
@@ -154,12 +154,12 @@ async fn sql_insert_promql_query_ceil() {
"+---------------------+-----------+--------------+-------+\
\n| ts | ceil(cpu) | ceil(memory) | host |\
\n+---------------------+-----------+--------------+-------+\
\n| 1970-01-01T00:00:00 | 67 | 1024 | host1 |\
\n| 1970-01-01T00:00:05 | 67 | 4096 | host1 |\
\n| 1970-01-01T00:00:10 | 100 | 20480 | host1 |\
\n| 1970-01-01T00:00:50 | 12424 | 1334 | host1 |\
\n| 1970-01-01T00:01:20 | 0 | 2334 | host1 |\
\n| 1970-01-01T00:01:40 | 49 | 3334 | host1 |\
\n| 1970-01-01T00:00:00 | 67.0 | 1024.0 | host1 |\
\n| 1970-01-01T00:00:05 | 67.0 | 4096.0 | host1 |\
\n| 1970-01-01T00:00:10 | 100.0 | 20480.0 | host1 |\
\n| 1970-01-01T00:00:50 | 12424.0 | 1334.0 | host1 |\
\n| 1970-01-01T00:01:20 | 0.0 | 2334.0 | host1 |\
\n| 1970-01-01T00:01:40 | 49.0 | 3334.0 | host1 |\
\n+---------------------+-----------+--------------+-------+",
)
.await;
@@ -214,8 +214,8 @@ async fn aggregators_simple_sum() {
"+------------+---------------------+--------------------------+\
\n| group | ts | SUM(http_requests.value) |\
\n+------------+---------------------+--------------------------+\
\n| production | 1970-01-01T00:00:00 | 300 |\
\n| canary | 1970-01-01T00:00:00 | 700 |\
\n| production | 1970-01-01T00:00:00 | 300.0 |\
\n| canary | 1970-01-01T00:00:00 | 700.0 |\
\n+------------+---------------------+--------------------------+",
)
.await;
@@ -238,8 +238,8 @@ async fn aggregators_simple_avg() {
"+------------+---------------------+--------------------------+\
\n| group | ts | AVG(http_requests.value) |\
\n+------------+---------------------+--------------------------+\
\n| production | 1970-01-01T00:00:00 | 150 |\
\n| canary | 1970-01-01T00:00:00 | 350 |\
\n| production | 1970-01-01T00:00:00 | 150.0 |\
\n| canary | 1970-01-01T00:00:00 | 350.0 |\
\n+------------+---------------------+--------------------------+",
)
.await;
@@ -286,8 +286,8 @@ async fn aggregators_simple_without() {
"+------------+------------+---------------------+--------------------------+\
\n| group | job | ts | SUM(http_requests.value) |\
\n+------------+------------+---------------------+--------------------------+\
\n| production | api-server | 1970-01-01T00:00:00 | 300 |\
\n| canary | api-server | 1970-01-01T00:00:00 | 700 |\
\n| production | api-server | 1970-01-01T00:00:00 | 300.0 |\
\n| canary | api-server | 1970-01-01T00:00:00 | 700.0 |\
\n+------------+------------+---------------------+--------------------------+",
)
.await;
@@ -309,7 +309,7 @@ async fn aggregators_empty_by() {
"+---------------------+--------------------------+\
\n| ts | SUM(http_requests.value) |\
\n+---------------------+--------------------------+\
\n| 1970-01-01T00:00:00 | 1000 |\
\n| 1970-01-01T00:00:00 | 1000.0 |\
\n+---------------------+--------------------------+",
)
.await;
@@ -331,7 +331,7 @@ async fn aggregators_no_by_without() {
"+---------------------+--------------------------+\
\n| ts | SUM(http_requests.value) |\
\n+---------------------+--------------------------+\
\n| 1970-01-01T00:00:00 | 1000 |\
\n| 1970-01-01T00:00:00 | 1000.0 |\
\n+---------------------+--------------------------+",
)
.await;
@@ -354,8 +354,8 @@ async fn aggregators_empty_without() {
"+------------+----------+------------+---------------------+--------------------------+\
\n| group | instance | job | ts | SUM(http_requests.value) |\
\n+------------+----------+------------+---------------------+--------------------------+\
\n| production | 0 | api-server | 1970-01-01T00:00:00 | 100 |\
\n| production | 1 | api-server | 1970-01-01T00:00:00 | 200 |\
\n| production | 0 | api-server | 1970-01-01T00:00:00 | 100.0 |\
\n| production | 1 | api-server | 1970-01-01T00:00:00 | 200.0 |\
\n+------------+----------+------------+---------------------+--------------------------+",
)
.await;
@@ -378,8 +378,8 @@ async fn aggregators_complex_combined_aggrs() {
"+------------+-----------------------------------------------------------------------------------------------------------+\
\n| job | SUM(http_requests.value) + MIN(http_requests.value) + MAX(http_requests.value) + AVG(http_requests.value) |\
\n+------------+-----------------------------------------------------------------------------------------------------------+\
\n| api-server | 1750 |\
\n| app-server | 4550 |\
\n| api-server | 1750.0 |\
\n| app-server | 4550.0 |\
\n+------------+-----------------------------------------------------------------------------------------------------------+",
)
.await;
@@ -399,8 +399,8 @@ async fn two_aggregators_combined_aggrs() {
"+------------+-----------------------------------------------------+\
\n| job | SUM(http_requests.value) + MIN(http_requests.value) |\
\n+------------+-----------------------------------------------------+\
\n| api-server | 1100 |\
\n| app-server | 3100 |\
\n| api-server | 1100.0 |\
\n| app-server | 3100.0 |\
\n+------------+-----------------------------------------------------+",
)
.await;
@@ -444,14 +444,14 @@ async fn binary_op_plain_columns() {
"+------------+----------+------------+---------------------+-------------------------------------------+\
\n| job | instance | group | ts | http_requests.value - http_requests.value |\
\n+------------+----------+------------+---------------------+-------------------------------------------+\
\n| api-server | 0 | canary | 1970-01-01T00:00:00 | 0 |\
\n| api-server | 0 | production | 1970-01-01T00:00:00 | 0 |\
\n| api-server | 1 | canary | 1970-01-01T00:00:00 | 0 |\
\n| api-server | 1 | production | 1970-01-01T00:00:00 | 0 |\
\n| app-server | 0 | canary | 1970-01-01T00:00:00 | 0 |\
\n| app-server | 0 | production | 1970-01-01T00:00:00 | 0 |\
\n| app-server | 1 | canary | 1970-01-01T00:00:00 | 0 |\
\n| app-server | 1 | production | 1970-01-01T00:00:00 | 0 |\
\n| api-server | 0 | canary | 1970-01-01T00:00:00 | 0.0 |\
\n| api-server | 0 | production | 1970-01-01T00:00:00 | 0.0 |\
\n| api-server | 1 | canary | 1970-01-01T00:00:00 | 0.0 |\
\n| api-server | 1 | production | 1970-01-01T00:00:00 | 0.0 |\
\n| app-server | 0 | canary | 1970-01-01T00:00:00 | 0.0 |\
\n| app-server | 0 | production | 1970-01-01T00:00:00 | 0.0 |\
\n| app-server | 1 | canary | 1970-01-01T00:00:00 | 0.0 |\
\n| app-server | 1 | production | 1970-01-01T00:00:00 | 0.0 |\
\n+------------+----------+------------+---------------------+-------------------------------------------+",
)
.await;

View File

@@ -17,6 +17,7 @@ use std::sync::Arc;
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MIN_USER_TABLE_ID};
use common_query::Output;
use common_recordbatch::util;
use common_test_util::temp_dir::{create_temp_dir, TempDir};
use datatypes::data_type::ConcreteDataType;
use datatypes::schema::{ColumnSchema, RawSchema};
use mito::config::EngineConfig;
@@ -26,9 +27,8 @@ use servers::Mode;
use snafu::ResultExt;
use table::engine::{EngineContext, TableEngineRef};
use table::requests::{CreateTableRequest, TableOptions};
use tempdir::TempDir;
use crate::datanode::{DatanodeOptions, FileConfig, ObjectStoreConfig, WalConfig};
use crate::datanode::{DatanodeOptions, FileConfig, ObjectStoreConfig, ProcedureConfig, WalConfig};
use crate::error::{CreateTableSnafu, Result};
use crate::instance::Instance;
use crate::sql::SqlHandler;
@@ -36,6 +36,7 @@ use crate::sql::SqlHandler;
pub(crate) struct MockInstance {
instance: Instance,
_guard: TestGuard,
_procedure_dir: Option<TempDir>,
}
impl MockInstance {
@@ -45,12 +46,39 @@ impl MockInstance {
let instance = Instance::with_mock_meta_client(&opts).await.unwrap();
instance.start().await.unwrap();
MockInstance { instance, _guard }
MockInstance {
instance,
_guard,
_procedure_dir: None,
}
}
pub(crate) async fn with_procedure_enabled(name: &str) -> Self {
let (mut opts, _guard) = create_tmp_dir_and_datanode_opts(name);
let procedure_dir = create_temp_dir(&format!("gt_procedure_{name}"));
opts.procedure = Some(ProcedureConfig {
store: ObjectStoreConfig::File(FileConfig {
data_dir: procedure_dir.path().to_str().unwrap().to_string(),
}),
});
let instance = Instance::with_mock_meta_client(&opts).await.unwrap();
instance.start().await.unwrap();
MockInstance {
instance,
_guard,
_procedure_dir: Some(procedure_dir),
}
}
pub(crate) fn inner(&self) -> &Instance {
&self.instance
}
pub(crate) fn data_tmp_dir(&self) -> &TempDir {
&self._guard._data_tmp_dir
}
}
struct TestGuard {
@@ -59,8 +87,8 @@ struct TestGuard {
}
fn create_tmp_dir_and_datanode_opts(name: &str) -> (DatanodeOptions, TestGuard) {
let wal_tmp_dir = TempDir::new(&format!("gt_wal_{name}")).unwrap();
let data_tmp_dir = TempDir::new(&format!("gt_data_{name}")).unwrap();
let wal_tmp_dir = create_temp_dir(&format!("gt_wal_{name}"));
let data_tmp_dir = create_temp_dir(&format!("gt_data_{name}"));
let opts = DatanodeOptions {
wal: WalConfig {
dir: wal_tmp_dir.path().to_str().unwrap().to_string(),

View File

@@ -341,7 +341,7 @@ mod tests {
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), false);
let vector = column_schema.create_default_vector_for_padding(4);
assert_eq!(4, vector.len());
let expect: VectorRef = Arc::new(Int32Vector::from_slice(&[0, 0, 0, 0]));
let expect: VectorRef = Arc::new(Int32Vector::from_slice([0, 0, 0, 0]));
assert_eq!(expect, vector);
}

View File

@@ -345,7 +345,7 @@ mod tests {
assert!(builder.try_push_value_ref(ValueRef::Int32(123)).is_err());
builder.extend_slice_of(&input, 1, 2).unwrap();
assert!(builder
.extend_slice_of(&crate::vectors::Int32Vector::from_slice(&[13]), 0, 1)
.extend_slice_of(&crate::vectors::Int32Vector::from_slice([13]), 0, 1)
.is_err());
let vector = builder.to_vector();

View File

@@ -365,7 +365,7 @@ mod tests {
assert!(builder.try_push_value_ref(ValueRef::Int32(123)).is_err());
builder.extend_slice_of(&input, 1, 2).unwrap();
assert!(builder
.extend_slice_of(&crate::vectors::Int32Vector::from_slice(&[13]), 0, 1)
.extend_slice_of(&crate::vectors::Int32Vector::from_slice([13]), 0, 1)
.is_err());
let vector = builder.to_vector();

View File

@@ -16,14 +16,14 @@ use std::any::Any;
use std::fmt;
use std::sync::Arc;
use arrow::array::{Array, ArrayRef};
use snafu::ResultExt;
use arrow::array::{Array, ArrayRef, UInt32Array};
use snafu::{ensure, ResultExt};
use crate::data_type::ConcreteDataType;
use crate::error::{Result, SerializeSnafu};
use crate::error::{self, Result, SerializeSnafu};
use crate::serialize::Serializable;
use crate::value::{Value, ValueRef};
use crate::vectors::{BooleanVector, Helper, Validity, Vector, VectorRef};
use crate::vectors::{BooleanVector, Helper, UInt32Vector, Validity, Vector, VectorRef};
#[derive(Clone)]
pub struct ConstantVector {
@@ -83,6 +83,35 @@ impl ConstantVector {
self.length,
)))
}
pub(crate) fn take_vector(&self, indices: &UInt32Vector) -> Result<VectorRef> {
if indices.is_empty() {
return Ok(self.slice(0, 0));
}
ensure!(
indices.null_count() == 0,
error::UnsupportedOperationSnafu {
op: "taking a null index",
vector_type: self.vector_type_name(),
}
);
let len = self.len();
let arr = indices.to_arrow_array();
let indices_arr = arr.as_any().downcast_ref::<UInt32Array>().unwrap();
if !arrow::compute::min_boolean(
&arrow::compute::lt_scalar(indices_arr, len as u32).unwrap(),
)
.unwrap()
{
panic!("Array index out of bounds, cannot take index out of the length of the array: {len}");
}
Ok(Arc::new(ConstantVector::new(
self.inner().clone(),
indices.len(),
)))
}
}
impl Vector for ConstantVector {

View File

@@ -58,7 +58,7 @@ mod tests {
#[test]
fn test_date_scalar() {
let vector = DateVector::from_slice(&[1, 2]);
let vector = DateVector::from_slice([1, 2]);
assert_eq!(2, vector.len());
assert_eq!(Some(Date::new(1)), vector.get_data(0));
assert_eq!(Some(Date::new(2)), vector.get_data(1));
@@ -66,24 +66,24 @@ mod tests {
#[test]
fn test_date_vector_builder() {
let input = DateVector::from_slice(&[1, 2, 3]);
let input = DateVector::from_slice([1, 2, 3]);
let mut builder = DateType::default().create_mutable_vector(3);
builder.push_value_ref(ValueRef::Date(Date::new(5)));
assert!(builder.try_push_value_ref(ValueRef::Int32(123)).is_err());
builder.extend_slice_of(&input, 1, 2).unwrap();
assert!(builder
.extend_slice_of(&crate::vectors::Int32Vector::from_slice(&[13]), 0, 1)
.extend_slice_of(&crate::vectors::Int32Vector::from_slice([13]), 0, 1)
.is_err());
let vector = builder.to_vector();
let expect: VectorRef = Arc::new(DateVector::from_slice(&[5, 2, 3]));
let expect: VectorRef = Arc::new(DateVector::from_slice([5, 2, 3]));
assert_eq!(expect, vector);
}
#[test]
fn test_date_from_arrow() {
let vector = DateVector::from_slice(&[1, 2]);
let vector = DateVector::from_slice([1, 2]);
let arrow = vector.as_arrow().slice(0, vector.len());
let vector2 = DateVector::try_from_arrow_array(&arrow).unwrap();
assert_eq!(vector, vector2);
@@ -91,7 +91,7 @@ mod tests {
#[test]
fn test_serialize_date_vector() {
let vector = DateVector::from_slice(&[-1, 0, 1]);
let vector = DateVector::from_slice([-1, 0, 1]);
let serialized_json = serde_json::to_string(&vector.serialize_to_json().unwrap()).unwrap();
assert_eq!(
r#"["1969-12-31","1970-01-01","1970-01-02"]"#,

View File

@@ -81,7 +81,7 @@ mod tests {
assert_eq!(Value::Null, v.get(1));
assert_eq!(Value::DateTime(DateTime::new(-1)), v.get(2));
let input = DateTimeVector::from_wrapper_slice(&[
let input = DateTimeVector::from_wrapper_slice([
DateTime::new(1),
DateTime::new(2),
DateTime::new(3),
@@ -92,11 +92,11 @@ mod tests {
assert!(builder.try_push_value_ref(ValueRef::Int32(123)).is_err());
builder.extend_slice_of(&input, 1, 2).unwrap();
assert!(builder
.extend_slice_of(&crate::vectors::Int32Vector::from_slice(&[13]), 0, 1)
.extend_slice_of(&crate::vectors::Int32Vector::from_slice([13]), 0, 1)
.is_err());
let vector = builder.to_vector();
let expect: VectorRef = Arc::new(DateTimeVector::from_wrapper_slice(&[
let expect: VectorRef = Arc::new(DateTimeVector::from_wrapper_slice([
DateTime::new(5),
DateTime::new(2),
DateTime::new(3),
@@ -106,7 +106,7 @@ mod tests {
#[test]
fn test_datetime_from_arrow() {
let vector = DateTimeVector::from_wrapper_slice(&[DateTime::new(1), DateTime::new(2)]);
let vector = DateTimeVector::from_wrapper_slice([DateTime::new(1), DateTime::new(2)]);
let arrow = vector.as_arrow().slice(0, vector.len());
let vector2 = DateTimeVector::try_from_arrow_array(&arrow).unwrap();
assert_eq!(vector, vector2);

View File

@@ -167,30 +167,30 @@ mod tests {
Some("world"),
])));
assert_vector_ref_eq(Arc::new(Int8Vector::from_slice(&[1, 2, 3, 4])));
assert_vector_ref_eq(Arc::new(UInt8Vector::from_slice(&[1, 2, 3, 4])));
assert_vector_ref_eq(Arc::new(Int16Vector::from_slice(&[1, 2, 3, 4])));
assert_vector_ref_eq(Arc::new(UInt16Vector::from_slice(&[1, 2, 3, 4])));
assert_vector_ref_eq(Arc::new(Int32Vector::from_slice(&[1, 2, 3, 4])));
assert_vector_ref_eq(Arc::new(UInt32Vector::from_slice(&[1, 2, 3, 4])));
assert_vector_ref_eq(Arc::new(Int64Vector::from_slice(&[1, 2, 3, 4])));
assert_vector_ref_eq(Arc::new(UInt64Vector::from_slice(&[1, 2, 3, 4])));
assert_vector_ref_eq(Arc::new(Float32Vector::from_slice(&[1.0, 2.0, 3.0, 4.0])));
assert_vector_ref_eq(Arc::new(Float64Vector::from_slice(&[1.0, 2.0, 3.0, 4.0])));
assert_vector_ref_eq(Arc::new(Int8Vector::from_slice([1, 2, 3, 4])));
assert_vector_ref_eq(Arc::new(UInt8Vector::from_slice([1, 2, 3, 4])));
assert_vector_ref_eq(Arc::new(Int16Vector::from_slice([1, 2, 3, 4])));
assert_vector_ref_eq(Arc::new(UInt16Vector::from_slice([1, 2, 3, 4])));
assert_vector_ref_eq(Arc::new(Int32Vector::from_slice([1, 2, 3, 4])));
assert_vector_ref_eq(Arc::new(UInt32Vector::from_slice([1, 2, 3, 4])));
assert_vector_ref_eq(Arc::new(Int64Vector::from_slice([1, 2, 3, 4])));
assert_vector_ref_eq(Arc::new(UInt64Vector::from_slice([1, 2, 3, 4])));
assert_vector_ref_eq(Arc::new(Float32Vector::from_slice([1.0, 2.0, 3.0, 4.0])));
assert_vector_ref_eq(Arc::new(Float64Vector::from_slice([1.0, 2.0, 3.0, 4.0])));
}
#[test]
fn test_vector_ne() {
assert_vector_ref_ne(
Arc::new(Int32Vector::from_slice(&[1, 2, 3, 4])),
Arc::new(Int32Vector::from_slice(&[1, 2])),
Arc::new(Int32Vector::from_slice([1, 2, 3, 4])),
Arc::new(Int32Vector::from_slice([1, 2])),
);
assert_vector_ref_ne(
Arc::new(Int32Vector::from_slice(&[1, 2, 3, 4])),
Arc::new(Int8Vector::from_slice(&[1, 2, 3, 4])),
Arc::new(Int32Vector::from_slice([1, 2, 3, 4])),
Arc::new(Int8Vector::from_slice([1, 2, 3, 4])),
);
assert_vector_ref_ne(
Arc::new(Int32Vector::from_slice(&[1, 2, 3, 4])),
Arc::new(Int32Vector::from_slice([1, 2, 3, 4])),
Arc::new(BooleanVector::from(vec![true, true])),
);
assert_vector_ref_ne(

View File

@@ -264,7 +264,8 @@ impl Helper {
| ArrowDataType::Dictionary(_, _)
| ArrowDataType::Decimal128(_, _)
| ArrowDataType::Decimal256(_, _)
| ArrowDataType::Map(_, _) => {
| ArrowDataType::Map(_, _)
| ArrowDataType::RunEndEncoded(_, _) => {
unimplemented!("Arrow array datatype: {:?}", array.as_ref().data_type())
}
})

Some files were not shown because too many files have changed in this diff Show More